|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.432772549922241, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.5992650985717773, |
|
"logits/rejected": -2.567516326904297, |
|
"logps/chosen": -272.1844482421875, |
|
"logps/rejected": -362.26898193359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.993362324491976, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.4130637645721436, |
|
"logits/rejected": -2.3763909339904785, |
|
"logps/chosen": -268.1092834472656, |
|
"logps/rejected": -252.8538360595703, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.00011327523679938167, |
|
"rewards/margins": 0.00018297109636478126, |
|
"rewards/rejected": -6.969591049710289e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 8.212783868686264, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.472649097442627, |
|
"logits/rejected": -2.4103596210479736, |
|
"logps/chosen": -283.3070068359375, |
|
"logps/rejected": -297.09979248046875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.001224780222401023, |
|
"rewards/margins": 0.0005858406075276434, |
|
"rewards/rejected": 0.0006389396148733795, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.684799704050697, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.508185386657715, |
|
"logits/rejected": -2.415645122528076, |
|
"logps/chosen": -301.5997619628906, |
|
"logps/rejected": -265.80426025390625, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0062199728563427925, |
|
"rewards/margins": 0.0035330094397068024, |
|
"rewards/rejected": 0.002686963649466634, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.49113948544429, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.404700756072998, |
|
"logits/rejected": -2.350811243057251, |
|
"logps/chosen": -268.32647705078125, |
|
"logps/rejected": -247.6831817626953, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0182647742331028, |
|
"rewards/margins": 0.020485591143369675, |
|
"rewards/rejected": -0.0022208169102668762, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 7.812080735900241, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.3245081901550293, |
|
"logits/rejected": -2.2887587547302246, |
|
"logps/chosen": -280.1948547363281, |
|
"logps/rejected": -293.09405517578125, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.022629689425230026, |
|
"rewards/margins": 0.031411103904247284, |
|
"rewards/rejected": -0.00878141075372696, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.653367610484782, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.3789138793945312, |
|
"logits/rejected": -2.3128437995910645, |
|
"logps/chosen": -271.4405822753906, |
|
"logps/rejected": -303.5579528808594, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.029351558536291122, |
|
"rewards/margins": 0.06681646406650543, |
|
"rewards/rejected": -0.03746490180492401, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.793859330498844, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.1905629634857178, |
|
"logits/rejected": -2.1994009017944336, |
|
"logps/chosen": -267.55340576171875, |
|
"logps/rejected": -273.4862976074219, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0027915718965232372, |
|
"rewards/margins": 0.1346043348312378, |
|
"rewards/rejected": -0.13739590346813202, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.837088401780129, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.3278985023498535, |
|
"logits/rejected": -2.243424892425537, |
|
"logps/chosen": -337.2379455566406, |
|
"logps/rejected": -292.844970703125, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.09643656760454178, |
|
"rewards/margins": 0.17360267043113708, |
|
"rewards/rejected": -0.27003923058509827, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 17.06156274259609, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.1236023902893066, |
|
"logits/rejected": -2.0597236156463623, |
|
"logps/chosen": -276.4493103027344, |
|
"logps/rejected": -299.7818908691406, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15940961241722107, |
|
"rewards/margins": 0.3067048490047455, |
|
"rewards/rejected": -0.46611452102661133, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 15.017538794455808, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.0527923107147217, |
|
"logits/rejected": -1.9835008382797241, |
|
"logps/chosen": -294.0035400390625, |
|
"logps/rejected": -321.8211669921875, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.27848348021507263, |
|
"rewards/margins": 0.3403889834880829, |
|
"rewards/rejected": -0.6188725233078003, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.1194002628326416, |
|
"eval_logits/rejected": -2.0640361309051514, |
|
"eval_logps/chosen": -308.44342041015625, |
|
"eval_logps/rejected": -344.93780517578125, |
|
"eval_loss": 0.6008175015449524, |
|
"eval_rewards/accuracies": 0.71484375, |
|
"eval_rewards/chosen": -0.43487486243247986, |
|
"eval_rewards/margins": 0.3607807159423828, |
|
"eval_rewards/rejected": -0.7956556081771851, |
|
"eval_runtime": 39.9329, |
|
"eval_samples_per_second": 50.084, |
|
"eval_steps_per_second": 0.801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 13.512613063149377, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.1186444759368896, |
|
"logits/rejected": -1.960219383239746, |
|
"logps/chosen": -346.79327392578125, |
|
"logps/rejected": -330.68634033203125, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.373442143201828, |
|
"rewards/margins": 0.33256274461746216, |
|
"rewards/rejected": -0.7060048580169678, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 16.771221195438027, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -1.878488302230835, |
|
"logits/rejected": -1.82696533203125, |
|
"logps/chosen": -322.34173583984375, |
|
"logps/rejected": -339.63104248046875, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3693729043006897, |
|
"rewards/margins": 0.3852415680885315, |
|
"rewards/rejected": -0.754614531993866, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 14.00034800920836, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -1.8512026071548462, |
|
"logits/rejected": -1.7661195993423462, |
|
"logps/chosen": -322.70599365234375, |
|
"logps/rejected": -353.8482360839844, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3978124260902405, |
|
"rewards/margins": 0.42583903670310974, |
|
"rewards/rejected": -0.8236514925956726, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 15.3625036150752, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -1.7981727123260498, |
|
"logits/rejected": -1.7398284673690796, |
|
"logps/chosen": -359.68994140625, |
|
"logps/rejected": -386.440185546875, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.40198105573654175, |
|
"rewards/margins": 0.43909168243408203, |
|
"rewards/rejected": -0.841072678565979, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 20.80601584306436, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -1.7028295993804932, |
|
"logits/rejected": -1.5830708742141724, |
|
"logps/chosen": -345.12286376953125, |
|
"logps/rejected": -376.55859375, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5939025282859802, |
|
"rewards/margins": 0.37951546907424927, |
|
"rewards/rejected": -0.9734179377555847, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 18.137407218927724, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -1.6537895202636719, |
|
"logits/rejected": -1.4418971538543701, |
|
"logps/chosen": -318.0480651855469, |
|
"logps/rejected": -349.61431884765625, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43722066283226013, |
|
"rewards/margins": 0.5587003827095032, |
|
"rewards/rejected": -0.9959210157394409, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 16.981581647441832, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -1.5120352506637573, |
|
"logits/rejected": -1.4046074151992798, |
|
"logps/chosen": -306.04840087890625, |
|
"logps/rejected": -330.2176208496094, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.538715124130249, |
|
"rewards/margins": 0.42584919929504395, |
|
"rewards/rejected": -0.9645644426345825, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 14.617848943306955, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -1.3031604290008545, |
|
"logits/rejected": -1.1622366905212402, |
|
"logps/chosen": -317.8174743652344, |
|
"logps/rejected": -331.2264404296875, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6144936084747314, |
|
"rewards/margins": 0.4146398603916168, |
|
"rewards/rejected": -1.0291334390640259, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 18.83954708764831, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -1.3724639415740967, |
|
"logits/rejected": -1.2839093208312988, |
|
"logps/chosen": -320.2606506347656, |
|
"logps/rejected": -353.50677490234375, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.47087445855140686, |
|
"rewards/margins": 0.4510224461555481, |
|
"rewards/rejected": -0.9218968152999878, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 27.897186532435434, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -1.14837646484375, |
|
"logits/rejected": -1.072177767753601, |
|
"logps/chosen": -318.0636291503906, |
|
"logps/rejected": -362.46044921875, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5620325207710266, |
|
"rewards/margins": 0.5343230962753296, |
|
"rewards/rejected": -1.096355676651001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.2653636932373047, |
|
"eval_logits/rejected": -1.1455148458480835, |
|
"eval_logps/chosen": -328.60369873046875, |
|
"eval_logps/rejected": -382.0739440917969, |
|
"eval_loss": 0.558937132358551, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -0.6364771723747253, |
|
"eval_rewards/margins": 0.5305400490760803, |
|
"eval_rewards/rejected": -1.1670172214508057, |
|
"eval_runtime": 39.8802, |
|
"eval_samples_per_second": 50.15, |
|
"eval_steps_per_second": 0.802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 15.625329725139888, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -1.0910618305206299, |
|
"logits/rejected": -0.9091793298721313, |
|
"logps/chosen": -360.6944885253906, |
|
"logps/rejected": -366.00146484375, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6661044359207153, |
|
"rewards/margins": 0.46460071206092834, |
|
"rewards/rejected": -1.1307051181793213, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 19.24668760178638, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -0.9850679636001587, |
|
"logits/rejected": -0.7914190292358398, |
|
"logps/chosen": -309.50775146484375, |
|
"logps/rejected": -338.098876953125, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.44118037819862366, |
|
"rewards/margins": 0.524976372718811, |
|
"rewards/rejected": -0.9661566019058228, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.97223623454459, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -0.4917120039463043, |
|
"logits/rejected": -0.298466295003891, |
|
"logps/chosen": -313.9906005859375, |
|
"logps/rejected": -375.44989013671875, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6258713603019714, |
|
"rewards/margins": 0.6441494226455688, |
|
"rewards/rejected": -1.2700207233428955, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 21.91087703960587, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": 0.09583790600299835, |
|
"logits/rejected": 0.32567495107650757, |
|
"logps/chosen": -339.2015380859375, |
|
"logps/rejected": -384.8148498535156, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7344726324081421, |
|
"rewards/margins": 0.6358748078346252, |
|
"rewards/rejected": -1.370347499847412, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 19.954418058737403, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": 0.5116527676582336, |
|
"logits/rejected": 0.8739731907844543, |
|
"logps/chosen": -363.95684814453125, |
|
"logps/rejected": -394.17877197265625, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6146451830863953, |
|
"rewards/margins": 0.7260497808456421, |
|
"rewards/rejected": -1.3406950235366821, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 29.50918535565258, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": 0.7538167834281921, |
|
"logits/rejected": 1.1193482875823975, |
|
"logps/chosen": -365.6874694824219, |
|
"logps/rejected": -389.31396484375, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6788283586502075, |
|
"rewards/margins": 0.6221181154251099, |
|
"rewards/rejected": -1.3009464740753174, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 24.53746609446516, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": 1.132846474647522, |
|
"logits/rejected": 1.5623472929000854, |
|
"logps/chosen": -345.86700439453125, |
|
"logps/rejected": -378.31719970703125, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6558988690376282, |
|
"rewards/margins": 0.5384365320205688, |
|
"rewards/rejected": -1.1943353414535522, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 27.660029561250692, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": 1.715608835220337, |
|
"logits/rejected": 2.1731343269348145, |
|
"logps/chosen": -352.11346435546875, |
|
"logps/rejected": -406.60772705078125, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8623917698860168, |
|
"rewards/margins": 0.7531214952468872, |
|
"rewards/rejected": -1.6155132055282593, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 25.659061335686694, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": 1.6510066986083984, |
|
"logits/rejected": 1.7990186214447021, |
|
"logps/chosen": -337.61016845703125, |
|
"logps/rejected": -396.5470886230469, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8643373250961304, |
|
"rewards/margins": 0.6685428023338318, |
|
"rewards/rejected": -1.5328800678253174, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 22.694419610449454, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": 1.5950249433517456, |
|
"logits/rejected": 2.302058696746826, |
|
"logps/chosen": -376.0797424316406, |
|
"logps/rejected": -401.6100769042969, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7354680299758911, |
|
"rewards/margins": 0.6922025680541992, |
|
"rewards/rejected": -1.4276707172393799, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.7225008010864258, |
|
"eval_logits/rejected": 2.1388766765594482, |
|
"eval_logps/chosen": -334.2620849609375, |
|
"eval_logps/rejected": -418.3771667480469, |
|
"eval_loss": 0.5288156270980835, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -0.6930612325668335, |
|
"eval_rewards/margins": 0.8369885683059692, |
|
"eval_rewards/rejected": -1.5300499200820923, |
|
"eval_runtime": 39.9288, |
|
"eval_samples_per_second": 50.089, |
|
"eval_steps_per_second": 0.801, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 24.37280438119094, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": 2.1118528842926025, |
|
"logits/rejected": 2.5268707275390625, |
|
"logps/chosen": -376.37969970703125, |
|
"logps/rejected": -417.34869384765625, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.778560996055603, |
|
"rewards/margins": 0.7997097969055176, |
|
"rewards/rejected": -1.5782709121704102, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 26.080136074985454, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": 2.5625953674316406, |
|
"logits/rejected": 3.1481173038482666, |
|
"logps/chosen": -383.0509338378906, |
|
"logps/rejected": -411.533935546875, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8686873316764832, |
|
"rewards/margins": 0.7724698781967163, |
|
"rewards/rejected": -1.6411571502685547, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 27.228237079063305, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": 2.7803778648376465, |
|
"logits/rejected": 3.247398853302002, |
|
"logps/chosen": -351.1916809082031, |
|
"logps/rejected": -420.370849609375, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8757610321044922, |
|
"rewards/margins": 0.7058261632919312, |
|
"rewards/rejected": -1.5815874338150024, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 28.892127434127993, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": 2.754971504211426, |
|
"logits/rejected": 3.230527400970459, |
|
"logps/chosen": -377.8298645019531, |
|
"logps/rejected": -465.24761962890625, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8463465571403503, |
|
"rewards/margins": 0.987470269203186, |
|
"rewards/rejected": -1.8338168859481812, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 26.281931375691812, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": 2.723754405975342, |
|
"logits/rejected": 3.361722230911255, |
|
"logps/chosen": -351.22900390625, |
|
"logps/rejected": -415.8351135253906, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8969828486442566, |
|
"rewards/margins": 0.857469916343689, |
|
"rewards/rejected": -1.7544529438018799, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 24.286834238524502, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": 2.8787496089935303, |
|
"logits/rejected": 3.6532554626464844, |
|
"logps/chosen": -402.9510192871094, |
|
"logps/rejected": -443.6593322753906, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9707611203193665, |
|
"rewards/margins": 0.8318966627120972, |
|
"rewards/rejected": -1.8026577234268188, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 23.72120672745611, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": 3.308849811553955, |
|
"logits/rejected": 3.967015504837036, |
|
"logps/chosen": -388.5707702636719, |
|
"logps/rejected": -417.2923889160156, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0928517580032349, |
|
"rewards/margins": 0.6804816722869873, |
|
"rewards/rejected": -1.7733335494995117, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 24.194836344161388, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": 3.4659945964813232, |
|
"logits/rejected": 3.6677188873291016, |
|
"logps/chosen": -360.42303466796875, |
|
"logps/rejected": -468.11322021484375, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1740145683288574, |
|
"rewards/margins": 0.8953350186347961, |
|
"rewards/rejected": -2.069349765777588, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 33.960328274537595, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": 3.0402557849884033, |
|
"logits/rejected": 3.3952622413635254, |
|
"logps/chosen": -395.9051208496094, |
|
"logps/rejected": -456.98162841796875, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9203785061836243, |
|
"rewards/margins": 0.77605140209198, |
|
"rewards/rejected": -1.6964296102523804, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 24.96336693295718, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": 3.402864456176758, |
|
"logits/rejected": 3.9089291095733643, |
|
"logps/chosen": -373.3275146484375, |
|
"logps/rejected": -453.81109619140625, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0015218257904053, |
|
"rewards/margins": 0.86052405834198, |
|
"rewards/rejected": -1.8620456457138062, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 2.9372177124023438, |
|
"eval_logits/rejected": 3.4323720932006836, |
|
"eval_logps/chosen": -352.0043029785156, |
|
"eval_logps/rejected": -445.87408447265625, |
|
"eval_loss": 0.5152841210365295, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -0.8704833984375, |
|
"eval_rewards/margins": 0.9345353841781616, |
|
"eval_rewards/rejected": -1.8050185441970825, |
|
"eval_runtime": 39.8891, |
|
"eval_samples_per_second": 50.139, |
|
"eval_steps_per_second": 0.802, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 26.45544298088616, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": 3.266558885574341, |
|
"logits/rejected": 4.045865535736084, |
|
"logps/chosen": -393.68505859375, |
|
"logps/rejected": -454.821044921875, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.9244238138198853, |
|
"rewards/margins": 0.9223299026489258, |
|
"rewards/rejected": -1.8467538356781006, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 24.753221828065943, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": 3.017789363861084, |
|
"logits/rejected": 3.619795322418213, |
|
"logps/chosen": -396.9893798828125, |
|
"logps/rejected": -455.4769592285156, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9954174160957336, |
|
"rewards/margins": 0.8088730573654175, |
|
"rewards/rejected": -1.804290533065796, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 28.64072501651785, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": 3.4113173484802246, |
|
"logits/rejected": 3.839292526245117, |
|
"logps/chosen": -386.08624267578125, |
|
"logps/rejected": -451.10894775390625, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0454809665679932, |
|
"rewards/margins": 0.8698482513427734, |
|
"rewards/rejected": -1.9153292179107666, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 28.408098716357102, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": 2.9448680877685547, |
|
"logits/rejected": 3.614654541015625, |
|
"logps/chosen": -380.95782470703125, |
|
"logps/rejected": -454.02191162109375, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9367244839668274, |
|
"rewards/margins": 0.8637276887893677, |
|
"rewards/rejected": -1.8004519939422607, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 27.41806430030018, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": 3.149013042449951, |
|
"logits/rejected": 3.4816536903381348, |
|
"logps/chosen": -401.301513671875, |
|
"logps/rejected": -495.34002685546875, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.096161127090454, |
|
"rewards/margins": 0.7614067792892456, |
|
"rewards/rejected": -1.8575680255889893, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 30.97252598966743, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": 3.359788417816162, |
|
"logits/rejected": 3.880640745162964, |
|
"logps/chosen": -376.32879638671875, |
|
"logps/rejected": -425.67010498046875, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0073617696762085, |
|
"rewards/margins": 0.8302923440933228, |
|
"rewards/rejected": -1.8376541137695312, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 23.032101494576157, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": 3.195591688156128, |
|
"logits/rejected": 3.7321903705596924, |
|
"logps/chosen": -378.28631591796875, |
|
"logps/rejected": -458.43096923828125, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0239256620407104, |
|
"rewards/margins": 0.7799959778785706, |
|
"rewards/rejected": -1.8039219379425049, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5581105443723032, |
|
"train_runtime": 5172.5891, |
|
"train_samples_per_second": 11.819, |
|
"train_steps_per_second": 0.092 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|