|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": -2.2731704711914062, |
|
"logits/rejected": -2.1761367321014404, |
|
"logps/chosen": -360.11749267578125, |
|
"logps/rejected": -295.71942138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": -2.259983777999878, |
|
"logits/rejected": -2.1553776264190674, |
|
"logps/chosen": -303.7694091796875, |
|
"logps/rejected": -241.65496826171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": 0.0003990587720181793, |
|
"rewards/margins": 0.00018596058362163603, |
|
"rewards/rejected": 0.00021309818839654326, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": -2.2269904613494873, |
|
"logits/rejected": -2.1716256141662598, |
|
"logps/chosen": -262.0898132324219, |
|
"logps/rejected": -249.70352172851562, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.001905500190332532, |
|
"rewards/margins": 0.00033334543695673347, |
|
"rewards/rejected": 0.0015721546951681376, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": -2.1317379474639893, |
|
"logits/rejected": -2.082672595977783, |
|
"logps/chosen": -277.956298828125, |
|
"logps/rejected": -239.60293579101562, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.005530746188014746, |
|
"rewards/margins": 0.0024719357024878263, |
|
"rewards/rejected": 0.003058810718357563, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": -2.241527795791626, |
|
"logits/rejected": -2.1675925254821777, |
|
"logps/chosen": -267.1982727050781, |
|
"logps/rejected": -270.7894287109375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.010646065697073936, |
|
"rewards/margins": 0.007375557906925678, |
|
"rewards/rejected": 0.003270507324486971, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": -2.269123077392578, |
|
"logits/rejected": -2.1638808250427246, |
|
"logps/chosen": -285.50164794921875, |
|
"logps/rejected": -254.6826629638672, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.016856886446475983, |
|
"rewards/margins": 0.012282918207347393, |
|
"rewards/rejected": 0.004573967307806015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": -2.272505283355713, |
|
"logits/rejected": -2.122178316116333, |
|
"logps/chosen": -310.4421081542969, |
|
"logps/rejected": -260.3993835449219, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.027886558324098587, |
|
"rewards/margins": 0.01857396773993969, |
|
"rewards/rejected": 0.009312589652836323, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": -2.2799620628356934, |
|
"logits/rejected": -2.126312494277954, |
|
"logps/chosen": -266.8229064941406, |
|
"logps/rejected": -234.4290313720703, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03913799300789833, |
|
"rewards/margins": 0.0319330058991909, |
|
"rewards/rejected": 0.0072049833834171295, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": -2.2087223529815674, |
|
"logits/rejected": -2.1472485065460205, |
|
"logps/chosen": -279.0526123046875, |
|
"logps/rejected": -270.34185791015625, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05015086010098457, |
|
"rewards/margins": 0.03647767752408981, |
|
"rewards/rejected": 0.013673178851604462, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": -2.2452147006988525, |
|
"logits/rejected": -2.178189992904663, |
|
"logps/chosen": -250.22409057617188, |
|
"logps/rejected": -241.08779907226562, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0462195985019207, |
|
"rewards/margins": 0.03395666554570198, |
|
"rewards/rejected": 0.012262934818863869, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": -2.2681682109832764, |
|
"logits/rejected": -2.121201515197754, |
|
"logps/chosen": -244.41537475585938, |
|
"logps/rejected": -206.2124481201172, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.027621516957879066, |
|
"rewards/margins": 0.07613887637853622, |
|
"rewards/rejected": -0.048517368733882904, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.136847972869873, |
|
"eval_logits/rejected": -2.0436248779296875, |
|
"eval_logps/chosen": -271.1658935546875, |
|
"eval_logps/rejected": -254.5337371826172, |
|
"eval_loss": 0.6636302471160889, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": 0.00537072541192174, |
|
"eval_rewards/margins": 0.07350712269544601, |
|
"eval_rewards/rejected": -0.06813640147447586, |
|
"eval_runtime": 206.3535, |
|
"eval_samples_per_second": 9.692, |
|
"eval_steps_per_second": 0.606, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.125, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": -2.2474560737609863, |
|
"logits/rejected": -2.1244096755981445, |
|
"logps/chosen": -264.55267333984375, |
|
"logps/rejected": -220.3800506591797, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01394604705274105, |
|
"rewards/margins": 0.09943284094333649, |
|
"rewards/rejected": -0.08548679202795029, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": -2.269590139389038, |
|
"logits/rejected": -2.1833536624908447, |
|
"logps/chosen": -320.98858642578125, |
|
"logps/rejected": -295.50213623046875, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03138069808483124, |
|
"rewards/margins": 0.07552894949913025, |
|
"rewards/rejected": -0.10690964758396149, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": -2.2066664695739746, |
|
"logits/rejected": -2.1157479286193848, |
|
"logps/chosen": -286.4569396972656, |
|
"logps/rejected": -271.8417053222656, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.026338139548897743, |
|
"rewards/margins": 0.12607838213443756, |
|
"rewards/rejected": -0.09974025189876556, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": -2.1494216918945312, |
|
"logits/rejected": -2.084484815597534, |
|
"logps/chosen": -266.2762756347656, |
|
"logps/rejected": -270.8194580078125, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.075466588139534, |
|
"rewards/margins": 0.14162591099739075, |
|
"rewards/rejected": -0.21709248423576355, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": -2.189535617828369, |
|
"logits/rejected": -2.0706353187561035, |
|
"logps/chosen": -291.97283935546875, |
|
"logps/rejected": -291.4541015625, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.13837790489196777, |
|
"rewards/margins": 0.16128641366958618, |
|
"rewards/rejected": -0.29966431856155396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.9375, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": -2.3002448081970215, |
|
"logits/rejected": -2.1360554695129395, |
|
"logps/chosen": -293.7611083984375, |
|
"logps/rejected": -274.07025146484375, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17077429592609406, |
|
"rewards/margins": 0.15933959186077118, |
|
"rewards/rejected": -0.3301139175891876, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": -2.1448490619659424, |
|
"logits/rejected": -2.123392105102539, |
|
"logps/chosen": -263.1458740234375, |
|
"logps/rejected": -278.94635009765625, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16839735209941864, |
|
"rewards/margins": 0.17006024718284607, |
|
"rewards/rejected": -0.3384575843811035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": -2.2578933238983154, |
|
"logits/rejected": -2.1124982833862305, |
|
"logps/chosen": -318.94476318359375, |
|
"logps/rejected": -312.0294494628906, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.23213641345500946, |
|
"rewards/margins": 0.20630350708961487, |
|
"rewards/rejected": -0.43843990564346313, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": -2.2287371158599854, |
|
"logits/rejected": -2.1092655658721924, |
|
"logps/chosen": -303.0882873535156, |
|
"logps/rejected": -299.7857360839844, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2376522272825241, |
|
"rewards/margins": 0.2720206081867218, |
|
"rewards/rejected": -0.5096728205680847, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": -2.1837334632873535, |
|
"logits/rejected": -2.094944477081299, |
|
"logps/chosen": -285.54425048828125, |
|
"logps/rejected": -296.94720458984375, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1814960539340973, |
|
"rewards/margins": 0.24684414267539978, |
|
"rewards/rejected": -0.4283401370048523, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.0918540954589844, |
|
"eval_logits/rejected": -2.003007173538208, |
|
"eval_logps/chosen": -304.0613098144531, |
|
"eval_logps/rejected": -307.0966796875, |
|
"eval_loss": 0.607542872428894, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": -0.32358381152153015, |
|
"eval_rewards/margins": 0.2701820731163025, |
|
"eval_rewards/rejected": -0.5937658548355103, |
|
"eval_runtime": 205.7166, |
|
"eval_samples_per_second": 9.722, |
|
"eval_steps_per_second": 0.608, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": -2.132068395614624, |
|
"logits/rejected": -2.023831844329834, |
|
"logps/chosen": -262.7835693359375, |
|
"logps/rejected": -267.73046875, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.24357381463050842, |
|
"rewards/margins": 0.18603582680225372, |
|
"rewards/rejected": -0.4296096861362457, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": -2.1585371494293213, |
|
"logits/rejected": -2.0781378746032715, |
|
"logps/chosen": -319.03131103515625, |
|
"logps/rejected": -319.93634033203125, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.40161317586898804, |
|
"rewards/margins": 0.3085588216781616, |
|
"rewards/rejected": -0.7101720571517944, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": -2.099738359451294, |
|
"logits/rejected": -2.0198581218719482, |
|
"logps/chosen": -287.83807373046875, |
|
"logps/rejected": -313.92620849609375, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.435781329870224, |
|
"rewards/margins": 0.37423354387283325, |
|
"rewards/rejected": -0.8100149035453796, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": -2.1551759243011475, |
|
"logits/rejected": -2.0810234546661377, |
|
"logps/chosen": -326.7232360839844, |
|
"logps/rejected": -348.8532409667969, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7081143260002136, |
|
"rewards/margins": 0.3044819235801697, |
|
"rewards/rejected": -1.0125962495803833, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": -2.1337616443634033, |
|
"logits/rejected": -2.0014939308166504, |
|
"logps/chosen": -374.9393615722656, |
|
"logps/rejected": -349.9031066894531, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6511236429214478, |
|
"rewards/margins": 0.35167860984802246, |
|
"rewards/rejected": -1.0028022527694702, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": -2.1727969646453857, |
|
"logits/rejected": -2.108790874481201, |
|
"logps/chosen": -314.8628234863281, |
|
"logps/rejected": -321.34710693359375, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4089882969856262, |
|
"rewards/margins": 0.38155826926231384, |
|
"rewards/rejected": -0.7905465960502625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": -2.1201374530792236, |
|
"logits/rejected": -2.074112892150879, |
|
"logps/chosen": -317.30908203125, |
|
"logps/rejected": -349.3733825683594, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4397730827331543, |
|
"rewards/margins": 0.4018850326538086, |
|
"rewards/rejected": -0.8416581153869629, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": -2.1125736236572266, |
|
"logits/rejected": -2.088714122772217, |
|
"logps/chosen": -330.7603759765625, |
|
"logps/rejected": -371.1779479980469, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5998188853263855, |
|
"rewards/margins": 0.33086952567100525, |
|
"rewards/rejected": -0.9306885004043579, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": -2.266671657562256, |
|
"logits/rejected": -2.168347120285034, |
|
"logps/chosen": -372.093017578125, |
|
"logps/rejected": -373.88555908203125, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5426467657089233, |
|
"rewards/margins": 0.4017227590084076, |
|
"rewards/rejected": -0.9443694353103638, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.375, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": -2.1483025550842285, |
|
"logits/rejected": -2.048117160797119, |
|
"logps/chosen": -341.9796447753906, |
|
"logps/rejected": -351.8575744628906, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6638094782829285, |
|
"rewards/margins": 0.44865164160728455, |
|
"rewards/rejected": -1.1124610900878906, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.076050281524658, |
|
"eval_logits/rejected": -1.9913941621780396, |
|
"eval_logps/chosen": -342.9188232421875, |
|
"eval_logps/rejected": -360.5767517089844, |
|
"eval_loss": 0.5817497372627258, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -0.7121586203575134, |
|
"eval_rewards/margins": 0.4164075553417206, |
|
"eval_rewards/rejected": -1.1285661458969116, |
|
"eval_runtime": 205.7512, |
|
"eval_samples_per_second": 9.72, |
|
"eval_steps_per_second": 0.608, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": -2.1432528495788574, |
|
"logits/rejected": -2.0720839500427246, |
|
"logps/chosen": -332.2557067871094, |
|
"logps/rejected": -340.0525817871094, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.576932966709137, |
|
"rewards/margins": 0.40913018584251404, |
|
"rewards/rejected": -0.9860631823539734, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": -2.1997363567352295, |
|
"logits/rejected": -2.0350875854492188, |
|
"logps/chosen": -320.69720458984375, |
|
"logps/rejected": -334.16217041015625, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5008949041366577, |
|
"rewards/margins": 0.5016661882400513, |
|
"rewards/rejected": -1.002561092376709, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": -2.1636505126953125, |
|
"logits/rejected": -2.0376198291778564, |
|
"logps/chosen": -330.29254150390625, |
|
"logps/rejected": -358.37811279296875, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6509741544723511, |
|
"rewards/margins": 0.4970785677433014, |
|
"rewards/rejected": -1.1480529308319092, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": -2.1893808841705322, |
|
"logits/rejected": -2.115548849105835, |
|
"logps/chosen": -376.6805114746094, |
|
"logps/rejected": -387.1194763183594, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8470619320869446, |
|
"rewards/margins": 0.41202712059020996, |
|
"rewards/rejected": -1.2590891122817993, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": -2.1224799156188965, |
|
"logits/rejected": -2.1183266639709473, |
|
"logps/chosen": -311.69000244140625, |
|
"logps/rejected": -391.626220703125, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6741515398025513, |
|
"rewards/margins": 0.5650461912155151, |
|
"rewards/rejected": -1.2391977310180664, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": -2.1981089115142822, |
|
"logits/rejected": -2.0802032947540283, |
|
"logps/chosen": -349.52618408203125, |
|
"logps/rejected": -364.61285400390625, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5019327998161316, |
|
"rewards/margins": 0.4679330885410309, |
|
"rewards/rejected": -0.9698659181594849, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": -2.064434051513672, |
|
"logits/rejected": -1.957092523574829, |
|
"logps/chosen": -323.6076965332031, |
|
"logps/rejected": -344.89971923828125, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6532014608383179, |
|
"rewards/margins": 0.5228798985481262, |
|
"rewards/rejected": -1.1760812997817993, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.5, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": -2.0200791358947754, |
|
"logits/rejected": -1.9938948154449463, |
|
"logps/chosen": -333.9423522949219, |
|
"logps/rejected": -399.6837463378906, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8045186996459961, |
|
"rewards/margins": 0.47954291105270386, |
|
"rewards/rejected": -1.2840616703033447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": -2.0388081073760986, |
|
"logits/rejected": -1.9344475269317627, |
|
"logps/chosen": -365.44732666015625, |
|
"logps/rejected": -364.44549560546875, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.677007794380188, |
|
"rewards/margins": 0.46105116605758667, |
|
"rewards/rejected": -1.1380589008331299, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": -2.023102283477783, |
|
"logits/rejected": -1.9553874731063843, |
|
"logps/chosen": -306.1051330566406, |
|
"logps/rejected": -330.5825500488281, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5321759581565857, |
|
"rewards/margins": 0.4713428020477295, |
|
"rewards/rejected": -1.00351881980896, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.982036828994751, |
|
"eval_logits/rejected": -1.9001150131225586, |
|
"eval_logps/chosen": -350.7093200683594, |
|
"eval_logps/rejected": -376.6873779296875, |
|
"eval_loss": 0.566522479057312, |
|
"eval_rewards/accuracies": 0.7250000238418579, |
|
"eval_rewards/chosen": -0.7900638580322266, |
|
"eval_rewards/margins": 0.4996088147163391, |
|
"eval_rewards/rejected": -1.289672613143921, |
|
"eval_runtime": 205.8214, |
|
"eval_samples_per_second": 9.717, |
|
"eval_steps_per_second": 0.607, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": -2.045619487762451, |
|
"logits/rejected": -2.054408550262451, |
|
"logps/chosen": -364.0269775390625, |
|
"logps/rejected": -412.7276306152344, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.106697678565979, |
|
"rewards/margins": 0.3950948119163513, |
|
"rewards/rejected": -1.501792311668396, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": -2.0790481567382812, |
|
"logits/rejected": -2.003018617630005, |
|
"logps/chosen": -399.572998046875, |
|
"logps/rejected": -433.9266662597656, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2603890895843506, |
|
"rewards/margins": 0.41771477460861206, |
|
"rewards/rejected": -1.6781038045883179, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": -2.0433547496795654, |
|
"logits/rejected": -1.968266248703003, |
|
"logps/chosen": -357.1161193847656, |
|
"logps/rejected": -381.88507080078125, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8924205899238586, |
|
"rewards/margins": 0.43997693061828613, |
|
"rewards/rejected": -1.3323975801467896, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": -1.973879098892212, |
|
"logits/rejected": -1.9118854999542236, |
|
"logps/chosen": -329.8348693847656, |
|
"logps/rejected": -366.04461669921875, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.562414288520813, |
|
"rewards/margins": 0.5114336013793945, |
|
"rewards/rejected": -1.073848009109497, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": -1.8499130010604858, |
|
"logits/rejected": -1.827254056930542, |
|
"logps/chosen": -307.35150146484375, |
|
"logps/rejected": -369.239990234375, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7073386311531067, |
|
"rewards/margins": 0.5170890092849731, |
|
"rewards/rejected": -1.224427580833435, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.625, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": -1.9882104396820068, |
|
"logits/rejected": -1.8469337224960327, |
|
"logps/chosen": -375.27935791015625, |
|
"logps/rejected": -379.7239685058594, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9050993919372559, |
|
"rewards/margins": 0.47090214490890503, |
|
"rewards/rejected": -1.3760015964508057, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": -1.9672870635986328, |
|
"logits/rejected": -1.9157249927520752, |
|
"logps/chosen": -396.733154296875, |
|
"logps/rejected": -421.2018127441406, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9322503805160522, |
|
"rewards/margins": 0.5409501791000366, |
|
"rewards/rejected": -1.4732005596160889, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": -1.973670244216919, |
|
"logits/rejected": -1.9386718273162842, |
|
"logps/chosen": -343.16387939453125, |
|
"logps/rejected": -386.0146484375, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9054447412490845, |
|
"rewards/margins": 0.559832751750946, |
|
"rewards/rejected": -1.4652774333953857, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 5.0, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": -1.9971214532852173, |
|
"logits/rejected": -1.9401931762695312, |
|
"logps/chosen": -368.4125061035156, |
|
"logps/rejected": -396.1431579589844, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8962229490280151, |
|
"rewards/margins": 0.6276494264602661, |
|
"rewards/rejected": -1.5238726139068604, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 5.25, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": -2.0031449794769287, |
|
"logits/rejected": -1.9153926372528076, |
|
"logps/chosen": -377.18121337890625, |
|
"logps/rejected": -411.65234375, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.0275315046310425, |
|
"rewards/margins": 0.6614707708358765, |
|
"rewards/rejected": -1.6890023946762085, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.8879700899124146, |
|
"eval_logits/rejected": -1.8081378936767578, |
|
"eval_logps/chosen": -374.9992370605469, |
|
"eval_logps/rejected": -414.1807556152344, |
|
"eval_loss": 0.5519765019416809, |
|
"eval_rewards/accuracies": 0.718999981880188, |
|
"eval_rewards/chosen": -1.0329629182815552, |
|
"eval_rewards/margins": 0.6316434741020203, |
|
"eval_rewards/rejected": -1.6646064519882202, |
|
"eval_runtime": 205.8805, |
|
"eval_samples_per_second": 9.714, |
|
"eval_steps_per_second": 0.607, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": -1.9341243505477905, |
|
"logits/rejected": -1.8541990518569946, |
|
"logps/chosen": -348.41650390625, |
|
"logps/rejected": -377.76055908203125, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9087320566177368, |
|
"rewards/margins": 0.5346066355705261, |
|
"rewards/rejected": -1.4433386325836182, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": -1.9719091653823853, |
|
"logits/rejected": -1.8899688720703125, |
|
"logps/chosen": -356.0486145019531, |
|
"logps/rejected": -401.54852294921875, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.79421067237854, |
|
"rewards/margins": 0.6548107266426086, |
|
"rewards/rejected": -1.449021577835083, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": -1.938852071762085, |
|
"logits/rejected": -1.8662010431289673, |
|
"logps/chosen": -384.50897216796875, |
|
"logps/rejected": -447.8426818847656, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1384373903274536, |
|
"rewards/margins": 0.725753664970398, |
|
"rewards/rejected": -1.8641912937164307, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.625, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": -1.7794740200042725, |
|
"logits/rejected": -1.712432861328125, |
|
"logps/chosen": -402.6400146484375, |
|
"logps/rejected": -434.7808532714844, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2654469013214111, |
|
"rewards/margins": 0.6806719303131104, |
|
"rewards/rejected": -1.9461190700531006, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": -1.789461374282837, |
|
"logits/rejected": -1.69761061668396, |
|
"logps/chosen": -392.892578125, |
|
"logps/rejected": -422.740478515625, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.173139214515686, |
|
"rewards/margins": 0.6945830583572388, |
|
"rewards/rejected": -1.8677222728729248, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": -1.7166792154312134, |
|
"logits/rejected": -1.6769170761108398, |
|
"logps/chosen": -347.1570129394531, |
|
"logps/rejected": -412.37762451171875, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7677599787712097, |
|
"rewards/margins": 0.697329580783844, |
|
"rewards/rejected": -1.4650894403457642, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": -1.834933876991272, |
|
"logits/rejected": -1.6764202117919922, |
|
"logps/chosen": -381.7269592285156, |
|
"logps/rejected": -391.5668029785156, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7797503471374512, |
|
"rewards/margins": 0.558749794960022, |
|
"rewards/rejected": -1.3385001420974731, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": -1.7892568111419678, |
|
"logits/rejected": -1.662570595741272, |
|
"logps/chosen": -390.51953125, |
|
"logps/rejected": -397.81451416015625, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8887346982955933, |
|
"rewards/margins": 0.6098966002464294, |
|
"rewards/rejected": -1.498631477355957, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.90625, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": -1.6196047067642212, |
|
"logits/rejected": -1.5555574893951416, |
|
"logps/chosen": -402.47772216796875, |
|
"logps/rejected": -446.379638671875, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4109909534454346, |
|
"rewards/margins": 0.6602510213851929, |
|
"rewards/rejected": -2.071241855621338, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.0, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": -1.6825096607208252, |
|
"logits/rejected": -1.56454336643219, |
|
"logps/chosen": -378.9342041015625, |
|
"logps/rejected": -428.3062438964844, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3179118633270264, |
|
"rewards/margins": 0.6010549068450928, |
|
"rewards/rejected": -1.9189666509628296, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.5609403848648071, |
|
"eval_logits/rejected": -1.4665495157241821, |
|
"eval_logps/chosen": -403.8534240722656, |
|
"eval_logps/rejected": -448.6079406738281, |
|
"eval_loss": 0.5327390432357788, |
|
"eval_rewards/accuracies": 0.7319999933242798, |
|
"eval_rewards/chosen": -1.3215045928955078, |
|
"eval_rewards/margins": 0.6873735189437866, |
|
"eval_rewards/rejected": -2.008878231048584, |
|
"eval_runtime": 205.8124, |
|
"eval_samples_per_second": 9.718, |
|
"eval_steps_per_second": 0.607, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": -1.657099962234497, |
|
"logits/rejected": -1.5476830005645752, |
|
"logps/chosen": -389.5664367675781, |
|
"logps/rejected": -463.8951110839844, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1841762065887451, |
|
"rewards/margins": 0.7400022745132446, |
|
"rewards/rejected": -1.9241783618927002, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": -1.687072515487671, |
|
"logits/rejected": -1.5819660425186157, |
|
"logps/chosen": -392.8175354003906, |
|
"logps/rejected": -414.7140197753906, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9409040212631226, |
|
"rewards/margins": 0.7219224572181702, |
|
"rewards/rejected": -1.6628265380859375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": -1.6374574899673462, |
|
"logits/rejected": -1.513041377067566, |
|
"logps/chosen": -367.43792724609375, |
|
"logps/rejected": -422.2757873535156, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0590665340423584, |
|
"rewards/margins": 0.6901532411575317, |
|
"rewards/rejected": -1.749219536781311, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.5, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": -1.5762876272201538, |
|
"logits/rejected": -1.439675211906433, |
|
"logps/chosen": -353.2894592285156, |
|
"logps/rejected": -415.69384765625, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0523182153701782, |
|
"rewards/margins": 0.8923817873001099, |
|
"rewards/rejected": -1.944700002670288, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": -1.5430004596710205, |
|
"logits/rejected": -1.5310301780700684, |
|
"logps/chosen": -418.9070739746094, |
|
"logps/rejected": -503.3553771972656, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7990005016326904, |
|
"rewards/margins": 0.6734641194343567, |
|
"rewards/rejected": -2.4724647998809814, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": -1.553636074066162, |
|
"logits/rejected": -1.4528017044067383, |
|
"logps/chosen": -416.9197692871094, |
|
"logps/rejected": -488.38824462890625, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6012948751449585, |
|
"rewards/margins": 0.7516334652900696, |
|
"rewards/rejected": -2.352928638458252, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": -1.5965683460235596, |
|
"logits/rejected": -1.5354357957839966, |
|
"logps/chosen": -373.5552978515625, |
|
"logps/rejected": -417.95806884765625, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0864415168762207, |
|
"rewards/margins": 0.6252428293228149, |
|
"rewards/rejected": -1.711684226989746, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": -1.5761507749557495, |
|
"logits/rejected": -1.4767498970031738, |
|
"logps/chosen": -377.71893310546875, |
|
"logps/rejected": -418.2323303222656, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.113816261291504, |
|
"rewards/margins": 0.6647173166275024, |
|
"rewards/rejected": -1.7785335779190063, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": -1.692792296409607, |
|
"logits/rejected": -1.6168203353881836, |
|
"logps/chosen": -386.3111267089844, |
|
"logps/rejected": -467.1307678222656, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1883788108825684, |
|
"rewards/margins": 0.8070133328437805, |
|
"rewards/rejected": -1.9953922033309937, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": -1.5926947593688965, |
|
"logits/rejected": -1.482301950454712, |
|
"logps/chosen": -407.1866455078125, |
|
"logps/rejected": -477.66632080078125, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3628543615341187, |
|
"rewards/margins": 0.8543855547904968, |
|
"rewards/rejected": -2.21724009513855, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.4903253316879272, |
|
"eval_logits/rejected": -1.3917725086212158, |
|
"eval_logps/chosen": -399.6684265136719, |
|
"eval_logps/rejected": -467.6413269042969, |
|
"eval_loss": 0.5299228429794312, |
|
"eval_rewards/accuracies": 0.7229999899864197, |
|
"eval_rewards/chosen": -1.2796550989151, |
|
"eval_rewards/margins": 0.9195566177368164, |
|
"eval_rewards/rejected": -2.199211597442627, |
|
"eval_runtime": 205.8617, |
|
"eval_samples_per_second": 9.715, |
|
"eval_steps_per_second": 0.607, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.125, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": -1.5940111875534058, |
|
"logits/rejected": -1.4753977060317993, |
|
"logps/chosen": -437.0880432128906, |
|
"logps/rejected": -492.3658752441406, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.347034215927124, |
|
"rewards/margins": 0.9986494183540344, |
|
"rewards/rejected": -2.345684051513672, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": -1.4148577451705933, |
|
"logits/rejected": -1.3983594179153442, |
|
"logps/chosen": -392.7431640625, |
|
"logps/rejected": -485.83819580078125, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4197003841400146, |
|
"rewards/margins": 1.0409051179885864, |
|
"rewards/rejected": -2.4606053829193115, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.0, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": -1.5557122230529785, |
|
"logits/rejected": -1.5083773136138916, |
|
"logps/chosen": -366.6706848144531, |
|
"logps/rejected": -449.88397216796875, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3032705783843994, |
|
"rewards/margins": 0.8904848098754883, |
|
"rewards/rejected": -2.1937553882598877, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": -1.607391595840454, |
|
"logits/rejected": -1.5484809875488281, |
|
"logps/chosen": -386.6382751464844, |
|
"logps/rejected": -422.96148681640625, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2972227334976196, |
|
"rewards/margins": 0.7347890734672546, |
|
"rewards/rejected": -2.0320117473602295, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.625, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": -1.5829339027404785, |
|
"logits/rejected": -1.4720735549926758, |
|
"logps/chosen": -412.76531982421875, |
|
"logps/rejected": -479.85272216796875, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.502972960472107, |
|
"rewards/margins": 0.8380988240242004, |
|
"rewards/rejected": -2.341071605682373, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": -1.4679361581802368, |
|
"logits/rejected": -1.4195685386657715, |
|
"logps/chosen": -412.76763916015625, |
|
"logps/rejected": -477.0049743652344, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7044397592544556, |
|
"rewards/margins": 0.8208500742912292, |
|
"rewards/rejected": -2.525290012359619, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.125, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": -1.4565179347991943, |
|
"logits/rejected": -1.4047472476959229, |
|
"logps/chosen": -431.69512939453125, |
|
"logps/rejected": -482.6431579589844, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7018753290176392, |
|
"rewards/margins": 0.5642359852790833, |
|
"rewards/rejected": -2.2661116123199463, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": -1.5907853841781616, |
|
"logits/rejected": -1.5108683109283447, |
|
"logps/chosen": -422.84735107421875, |
|
"logps/rejected": -478.88580322265625, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5510692596435547, |
|
"rewards/margins": 0.7624626159667969, |
|
"rewards/rejected": -2.3135318756103516, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": -1.51558518409729, |
|
"logits/rejected": -1.4765126705169678, |
|
"logps/chosen": -420.275634765625, |
|
"logps/rejected": -512.0084838867188, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6825615167617798, |
|
"rewards/margins": 0.7326836585998535, |
|
"rewards/rejected": -2.4152450561523438, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": -1.5319992303848267, |
|
"logits/rejected": -1.4801173210144043, |
|
"logps/chosen": -434.16082763671875, |
|
"logps/rejected": -493.60308837890625, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7253410816192627, |
|
"rewards/margins": 0.7062736749649048, |
|
"rewards/rejected": -2.431614398956299, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.4641598463058472, |
|
"eval_logits/rejected": -1.3684852123260498, |
|
"eval_logps/chosen": -438.16168212890625, |
|
"eval_logps/rejected": -494.5843505859375, |
|
"eval_loss": 0.5188941359519958, |
|
"eval_rewards/accuracies": 0.7200000286102295, |
|
"eval_rewards/chosen": -1.664587140083313, |
|
"eval_rewards/margins": 0.8040550947189331, |
|
"eval_rewards/rejected": -2.468642234802246, |
|
"eval_runtime": 205.9278, |
|
"eval_samples_per_second": 9.712, |
|
"eval_steps_per_second": 0.607, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": -1.5462032556533813, |
|
"logits/rejected": -1.4628247022628784, |
|
"logps/chosen": -409.8141174316406, |
|
"logps/rejected": -486.94940185546875, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5370090007781982, |
|
"rewards/margins": 0.756435751914978, |
|
"rewards/rejected": -2.293444871902466, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": -1.492166519165039, |
|
"logits/rejected": -1.4682246446609497, |
|
"logps/chosen": -375.8512268066406, |
|
"logps/rejected": -446.55914306640625, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.162585973739624, |
|
"rewards/margins": 0.9081439971923828, |
|
"rewards/rejected": -2.070729970932007, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.25, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": -1.5281693935394287, |
|
"logits/rejected": -1.4133872985839844, |
|
"logps/chosen": -343.50860595703125, |
|
"logps/rejected": -365.1877746582031, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.016842246055603, |
|
"rewards/margins": 0.5352990031242371, |
|
"rewards/rejected": -1.5521411895751953, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": -1.5180078744888306, |
|
"logits/rejected": -1.351276159286499, |
|
"logps/chosen": -419.5958557128906, |
|
"logps/rejected": -453.79693603515625, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1950498819351196, |
|
"rewards/margins": 0.8461320996284485, |
|
"rewards/rejected": -2.041182041168213, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.0, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": -1.488877534866333, |
|
"logits/rejected": -1.3683526515960693, |
|
"logps/chosen": -446.36004638671875, |
|
"logps/rejected": -501.3478088378906, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.955243468284607, |
|
"rewards/margins": 0.8272415399551392, |
|
"rewards/rejected": -2.782485246658325, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": -1.5099315643310547, |
|
"logits/rejected": -1.4115077257156372, |
|
"logps/chosen": -438.7635803222656, |
|
"logps/rejected": -531.4382934570312, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.076658248901367, |
|
"rewards/margins": 0.8972193598747253, |
|
"rewards/rejected": -2.9738779067993164, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": -1.3534867763519287, |
|
"logits/rejected": -1.33297598361969, |
|
"logps/chosen": -432.561279296875, |
|
"logps/rejected": -499.51153564453125, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.744372010231018, |
|
"rewards/margins": 0.8061767816543579, |
|
"rewards/rejected": -2.550548791885376, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": -1.5983731746673584, |
|
"logits/rejected": -1.5390180349349976, |
|
"logps/chosen": -411.49652099609375, |
|
"logps/rejected": -469.626708984375, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.632322907447815, |
|
"rewards/margins": 0.7145469784736633, |
|
"rewards/rejected": -2.346869945526123, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": -1.6237919330596924, |
|
"logits/rejected": -1.5474942922592163, |
|
"logps/chosen": -407.1629943847656, |
|
"logps/rejected": -489.897705078125, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5020484924316406, |
|
"rewards/margins": 0.8913022875785828, |
|
"rewards/rejected": -2.393350839614868, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": -1.4964396953582764, |
|
"logits/rejected": -1.4070580005645752, |
|
"logps/chosen": -410.7691345214844, |
|
"logps/rejected": -528.3038330078125, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6621105670928955, |
|
"rewards/margins": 1.0146641731262207, |
|
"rewards/rejected": -2.676774501800537, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.505366325378418, |
|
"eval_logits/rejected": -1.4178627729415894, |
|
"eval_logps/chosen": -450.1383056640625, |
|
"eval_logps/rejected": -519.8884887695312, |
|
"eval_loss": 0.5142305493354797, |
|
"eval_rewards/accuracies": 0.7289999723434448, |
|
"eval_rewards/chosen": -1.784353494644165, |
|
"eval_rewards/margins": 0.9373300075531006, |
|
"eval_rewards/rejected": -2.7216835021972656, |
|
"eval_runtime": 205.8963, |
|
"eval_samples_per_second": 9.714, |
|
"eval_steps_per_second": 0.607, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": -1.473548173904419, |
|
"logits/rejected": -1.4354422092437744, |
|
"logps/chosen": -455.54534912109375, |
|
"logps/rejected": -518.474609375, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9718334674835205, |
|
"rewards/margins": 0.7413331270217896, |
|
"rewards/rejected": -2.7131667137145996, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": -1.4893120527267456, |
|
"logits/rejected": -1.4045579433441162, |
|
"logps/chosen": -469.7232360839844, |
|
"logps/rejected": -551.2777099609375, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.048574209213257, |
|
"rewards/margins": 1.0462497472763062, |
|
"rewards/rejected": -3.0948235988616943, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 13.0, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": -1.518959403038025, |
|
"logits/rejected": -1.447871208190918, |
|
"logps/chosen": -473.65850830078125, |
|
"logps/rejected": -532.4075927734375, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.0019118785858154, |
|
"rewards/margins": 0.7329636216163635, |
|
"rewards/rejected": -2.734875202178955, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.375, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": -1.5328662395477295, |
|
"logits/rejected": -1.4063748121261597, |
|
"logps/chosen": -478.6943359375, |
|
"logps/rejected": -537.5908813476562, |
|
"loss": 0.4481, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9542016983032227, |
|
"rewards/margins": 0.9170918464660645, |
|
"rewards/rejected": -2.871293544769287, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": -1.4631025791168213, |
|
"logits/rejected": -1.3391731977462769, |
|
"logps/chosen": -521.5445556640625, |
|
"logps/rejected": -596.1942138671875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2124335765838623, |
|
"rewards/margins": 1.047732949256897, |
|
"rewards/rejected": -3.260166645050049, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": -1.5032353401184082, |
|
"logits/rejected": -1.4071391820907593, |
|
"logps/chosen": -507.3263244628906, |
|
"logps/rejected": -564.1354370117188, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2760252952575684, |
|
"rewards/margins": 0.9310242533683777, |
|
"rewards/rejected": -3.20704984664917, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": -1.5140959024429321, |
|
"logits/rejected": -1.4394776821136475, |
|
"logps/chosen": -478.9674377441406, |
|
"logps/rejected": -564.43212890625, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1803536415100098, |
|
"rewards/margins": 0.8669061660766602, |
|
"rewards/rejected": -3.04725980758667, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": -1.5928372144699097, |
|
"logits/rejected": -1.4567886590957642, |
|
"logps/chosen": -487.82489013671875, |
|
"logps/rejected": -544.0657348632812, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9777450561523438, |
|
"rewards/margins": 0.938134491443634, |
|
"rewards/rejected": -2.915879726409912, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": -1.4225056171417236, |
|
"logits/rejected": -1.3867155313491821, |
|
"logps/chosen": -464.3641052246094, |
|
"logps/rejected": -549.7391357421875, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0725159645080566, |
|
"rewards/margins": 1.0133941173553467, |
|
"rewards/rejected": -3.0859100818634033, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": -1.4609169960021973, |
|
"logits/rejected": -1.3873465061187744, |
|
"logps/chosen": -514.2507934570312, |
|
"logps/rejected": -596.9881591796875, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.514453887939453, |
|
"rewards/margins": 0.9918726682662964, |
|
"rewards/rejected": -3.506326198577881, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.3947843313217163, |
|
"eval_logits/rejected": -1.2972902059555054, |
|
"eval_logps/chosen": -533.4492797851562, |
|
"eval_logps/rejected": -608.9218139648438, |
|
"eval_loss": 0.5058408379554749, |
|
"eval_rewards/accuracies": 0.7360000014305115, |
|
"eval_rewards/chosen": -2.6174628734588623, |
|
"eval_rewards/margins": 0.9945541024208069, |
|
"eval_rewards/rejected": -3.6120169162750244, |
|
"eval_runtime": 205.8968, |
|
"eval_samples_per_second": 9.714, |
|
"eval_steps_per_second": 0.607, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.0, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": -1.4565789699554443, |
|
"logits/rejected": -1.4135055541992188, |
|
"logps/chosen": -489.64874267578125, |
|
"logps/rejected": -567.3189697265625, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4934046268463135, |
|
"rewards/margins": 0.8419889211654663, |
|
"rewards/rejected": -3.3353939056396484, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.375, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": -1.573761224746704, |
|
"logits/rejected": -1.4989092350006104, |
|
"logps/chosen": -512.4972534179688, |
|
"logps/rejected": -576.3961181640625, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.162745952606201, |
|
"rewards/margins": 1.0285950899124146, |
|
"rewards/rejected": -3.191340923309326, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": -1.484546184539795, |
|
"logits/rejected": -1.3231149911880493, |
|
"logps/chosen": -528.154541015625, |
|
"logps/rejected": -576.6033325195312, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.636870861053467, |
|
"rewards/margins": 0.9289640188217163, |
|
"rewards/rejected": -3.5658352375030518, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": -1.4552520513534546, |
|
"logits/rejected": -1.4171597957611084, |
|
"logps/chosen": -519.861328125, |
|
"logps/rejected": -591.8118896484375, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.421581745147705, |
|
"rewards/margins": 0.9120811223983765, |
|
"rewards/rejected": -3.33366322517395, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": -1.4553632736206055, |
|
"logits/rejected": -1.3192330598831177, |
|
"logps/chosen": -525.8233642578125, |
|
"logps/rejected": -599.904296875, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.279545307159424, |
|
"rewards/margins": 1.0983493328094482, |
|
"rewards/rejected": -3.377894639968872, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": -1.4849998950958252, |
|
"logits/rejected": -1.4006808996200562, |
|
"logps/chosen": -488.8839416503906, |
|
"logps/rejected": -538.5235595703125, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3017091751098633, |
|
"rewards/margins": 0.8732277750968933, |
|
"rewards/rejected": -3.1749370098114014, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.375, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": -1.5076830387115479, |
|
"logits/rejected": -1.427677869796753, |
|
"logps/chosen": -512.9411010742188, |
|
"logps/rejected": -549.7650146484375, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.252761125564575, |
|
"rewards/margins": 0.8173268437385559, |
|
"rewards/rejected": -3.0700876712799072, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": -1.5420407056808472, |
|
"logits/rejected": -1.4118328094482422, |
|
"logps/chosen": -469.3882751464844, |
|
"logps/rejected": -503.13726806640625, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.117811679840088, |
|
"rewards/margins": 0.8648239970207214, |
|
"rewards/rejected": -2.982635498046875, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": -1.5114177465438843, |
|
"logits/rejected": -1.377497673034668, |
|
"logps/chosen": -492.01141357421875, |
|
"logps/rejected": -511.5269470214844, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3241496086120605, |
|
"rewards/margins": 0.6907966732978821, |
|
"rewards/rejected": -3.014946460723877, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.0, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": -1.4633898735046387, |
|
"logits/rejected": -1.3916144371032715, |
|
"logps/chosen": -471.26141357421875, |
|
"logps/rejected": -557.3035278320312, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.141718626022339, |
|
"rewards/margins": 0.8240246772766113, |
|
"rewards/rejected": -2.96574330329895, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.4740034341812134, |
|
"eval_logits/rejected": -1.3782777786254883, |
|
"eval_logps/chosen": -477.5079650878906, |
|
"eval_logps/rejected": -545.9103393554688, |
|
"eval_loss": 0.504342794418335, |
|
"eval_rewards/accuracies": 0.7369999885559082, |
|
"eval_rewards/chosen": -2.0580503940582275, |
|
"eval_rewards/margins": 0.923851728439331, |
|
"eval_rewards/rejected": -2.9819023609161377, |
|
"eval_runtime": 205.8953, |
|
"eval_samples_per_second": 9.714, |
|
"eval_steps_per_second": 0.607, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.75, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": -1.4495588541030884, |
|
"logits/rejected": -1.3826367855072021, |
|
"logps/chosen": -462.7281799316406, |
|
"logps/rejected": -548.97265625, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0098259449005127, |
|
"rewards/margins": 0.95000821352005, |
|
"rewards/rejected": -2.959834337234497, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": -1.4757729768753052, |
|
"logits/rejected": -1.3990167379379272, |
|
"logps/chosen": -496.04339599609375, |
|
"logps/rejected": -542.3977661132812, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0698461532592773, |
|
"rewards/margins": 0.8941160440444946, |
|
"rewards/rejected": -2.9639620780944824, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 9.125, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": -1.4388748407363892, |
|
"logits/rejected": -1.3238605260849, |
|
"logps/chosen": -490.1044006347656, |
|
"logps/rejected": -552.4682006835938, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0447587966918945, |
|
"rewards/margins": 0.98194420337677, |
|
"rewards/rejected": -3.026703119277954, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": -1.579469919204712, |
|
"logits/rejected": -1.4409890174865723, |
|
"logps/chosen": -501.4720764160156, |
|
"logps/rejected": -554.6583251953125, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0851852893829346, |
|
"rewards/margins": 0.9953821301460266, |
|
"rewards/rejected": -3.0805675983428955, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": -1.5608408451080322, |
|
"logits/rejected": -1.4242637157440186, |
|
"logps/chosen": -509.3160705566406, |
|
"logps/rejected": -564.5472412109375, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.316391944885254, |
|
"rewards/margins": 0.892681896686554, |
|
"rewards/rejected": -3.209073543548584, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 6.375, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": -1.4031821489334106, |
|
"logits/rejected": -1.392533779144287, |
|
"logps/chosen": -465.71734619140625, |
|
"logps/rejected": -529.384033203125, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.336087465286255, |
|
"rewards/margins": 0.8120396733283997, |
|
"rewards/rejected": -3.1481270790100098, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 7.75, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": -1.4990739822387695, |
|
"logits/rejected": -1.444461703300476, |
|
"logps/chosen": -472.1663513183594, |
|
"logps/rejected": -566.5687255859375, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.144223690032959, |
|
"rewards/margins": 0.9988595843315125, |
|
"rewards/rejected": -3.1430835723876953, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": -1.5581806898117065, |
|
"logits/rejected": -1.4660371541976929, |
|
"logps/chosen": -500.2381286621094, |
|
"logps/rejected": -572.9973754882812, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.226621150970459, |
|
"rewards/margins": 0.942787766456604, |
|
"rewards/rejected": -3.1694092750549316, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.375, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": -1.4782741069793701, |
|
"logits/rejected": -1.3444011211395264, |
|
"logps/chosen": -538.46923828125, |
|
"logps/rejected": -594.9655151367188, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3236842155456543, |
|
"rewards/margins": 1.0366638898849487, |
|
"rewards/rejected": -3.3603484630584717, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 5.5, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": -1.4107558727264404, |
|
"logits/rejected": -1.2883371114730835, |
|
"logps/chosen": -511.8663635253906, |
|
"logps/rejected": -562.5142822265625, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.319617748260498, |
|
"rewards/margins": 0.765404224395752, |
|
"rewards/rejected": -3.08502197265625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.4261987209320068, |
|
"eval_logits/rejected": -1.333054780960083, |
|
"eval_logps/chosen": -508.8494567871094, |
|
"eval_logps/rejected": -582.47119140625, |
|
"eval_loss": 0.5040486454963684, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -2.371464729309082, |
|
"eval_rewards/margins": 0.9760459661483765, |
|
"eval_rewards/rejected": -3.347510814666748, |
|
"eval_runtime": 205.8406, |
|
"eval_samples_per_second": 9.716, |
|
"eval_steps_per_second": 0.607, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": -1.5876115560531616, |
|
"logits/rejected": -1.5332571268081665, |
|
"logps/chosen": -502.91046142578125, |
|
"logps/rejected": -568.5400390625, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2924888134002686, |
|
"rewards/margins": 0.8915454745292664, |
|
"rewards/rejected": -3.1840338706970215, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": -1.4910120964050293, |
|
"logits/rejected": -1.3955504894256592, |
|
"logps/chosen": -508.55474853515625, |
|
"logps/rejected": -586.0122680664062, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.406351089477539, |
|
"rewards/margins": 1.08707594871521, |
|
"rewards/rejected": -3.49342679977417, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": -1.5146963596343994, |
|
"logits/rejected": -1.401847243309021, |
|
"logps/chosen": -513.2926635742188, |
|
"logps/rejected": -559.9019775390625, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2331342697143555, |
|
"rewards/margins": 0.9627434015274048, |
|
"rewards/rejected": -3.19587779045105, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.625, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": -1.5016862154006958, |
|
"logits/rejected": -1.433712124824524, |
|
"logps/chosen": -480.95703125, |
|
"logps/rejected": -585.30712890625, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.1668860912323, |
|
"rewards/margins": 1.0968172550201416, |
|
"rewards/rejected": -3.2637035846710205, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 5.875, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": -1.435272455215454, |
|
"logits/rejected": -1.3818395137786865, |
|
"logps/chosen": -489.82293701171875, |
|
"logps/rejected": -585.4735717773438, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3021085262298584, |
|
"rewards/margins": 1.004734992980957, |
|
"rewards/rejected": -3.3068439960479736, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": -1.527229905128479, |
|
"logits/rejected": -1.4203643798828125, |
|
"logps/chosen": -503.65716552734375, |
|
"logps/rejected": -556.7635498046875, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.143995523452759, |
|
"rewards/margins": 1.0609943866729736, |
|
"rewards/rejected": -3.2049896717071533, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": -1.4004570245742798, |
|
"logits/rejected": -1.3315227031707764, |
|
"logps/chosen": -484.2720642089844, |
|
"logps/rejected": -577.0310668945312, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3353257179260254, |
|
"rewards/margins": 1.0654656887054443, |
|
"rewards/rejected": -3.4007911682128906, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": -1.44136381149292, |
|
"logits/rejected": -1.3490862846374512, |
|
"logps/chosen": -492.79620361328125, |
|
"logps/rejected": -599.3836059570312, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4487698078155518, |
|
"rewards/margins": 1.134749174118042, |
|
"rewards/rejected": -3.5835189819335938, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": -1.481796145439148, |
|
"logits/rejected": -1.3834607601165771, |
|
"logps/chosen": -502.568603515625, |
|
"logps/rejected": -589.8240356445312, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.3244917392730713, |
|
"rewards/margins": 0.9923933744430542, |
|
"rewards/rejected": -3.316884994506836, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": -1.5021679401397705, |
|
"logits/rejected": -1.4300124645233154, |
|
"logps/chosen": -498.32763671875, |
|
"logps/rejected": -587.1395874023438, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2161061763763428, |
|
"rewards/margins": 1.0599734783172607, |
|
"rewards/rejected": -3.2760796546936035, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.4277222156524658, |
|
"eval_logits/rejected": -1.3340253829956055, |
|
"eval_logps/chosen": -502.36865234375, |
|
"eval_logps/rejected": -582.15625, |
|
"eval_loss": 0.5011327266693115, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -2.306657075881958, |
|
"eval_rewards/margins": 1.0377042293548584, |
|
"eval_rewards/rejected": -3.3443613052368164, |
|
"eval_runtime": 205.8366, |
|
"eval_samples_per_second": 9.716, |
|
"eval_steps_per_second": 0.607, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 14.375, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": -1.5242735147476196, |
|
"logits/rejected": -1.423452377319336, |
|
"logps/chosen": -495.67608642578125, |
|
"logps/rejected": -586.1196899414062, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3464579582214355, |
|
"rewards/margins": 1.1166086196899414, |
|
"rewards/rejected": -3.463066577911377, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 6.625, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": -1.4835782051086426, |
|
"logits/rejected": -1.394766092300415, |
|
"logps/chosen": -526.1903076171875, |
|
"logps/rejected": -599.1194458007812, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.623629093170166, |
|
"rewards/margins": 0.9394786953926086, |
|
"rewards/rejected": -3.56310772895813, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": -1.4715049266815186, |
|
"logits/rejected": -1.3863550424575806, |
|
"logps/chosen": -536.083984375, |
|
"logps/rejected": -642.5689697265625, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.66798996925354, |
|
"rewards/margins": 1.0482239723205566, |
|
"rewards/rejected": -3.7162139415740967, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": -1.418473482131958, |
|
"logits/rejected": -1.34147047996521, |
|
"logps/chosen": -537.9956665039062, |
|
"logps/rejected": -605.8355102539062, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.777344226837158, |
|
"rewards/margins": 1.0527513027191162, |
|
"rewards/rejected": -3.8300960063934326, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 11.0, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": -1.5058234930038452, |
|
"logits/rejected": -1.4633159637451172, |
|
"logps/chosen": -527.7548828125, |
|
"logps/rejected": -597.4782104492188, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.516946792602539, |
|
"rewards/margins": 0.9122180938720703, |
|
"rewards/rejected": -3.429164409637451, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": -1.416601538658142, |
|
"logits/rejected": -1.351701021194458, |
|
"logps/chosen": -547.7350463867188, |
|
"logps/rejected": -607.982666015625, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.456645965576172, |
|
"rewards/margins": 0.9266567230224609, |
|
"rewards/rejected": -3.383302688598633, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.125, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": -1.5074869394302368, |
|
"logits/rejected": -1.4494173526763916, |
|
"logps/chosen": -547.4534912109375, |
|
"logps/rejected": -611.69482421875, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.41982102394104, |
|
"rewards/margins": 1.221834659576416, |
|
"rewards/rejected": -3.641655445098877, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": -1.4987690448760986, |
|
"logits/rejected": -1.3986529111862183, |
|
"logps/chosen": -490.8663024902344, |
|
"logps/rejected": -572.9545288085938, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.3217995166778564, |
|
"rewards/margins": 0.8936141729354858, |
|
"rewards/rejected": -3.215414047241211, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": -1.4200499057769775, |
|
"logits/rejected": -1.355092167854309, |
|
"logps/chosen": -523.000244140625, |
|
"logps/rejected": -604.7027587890625, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.414511203765869, |
|
"rewards/margins": 1.0894904136657715, |
|
"rewards/rejected": -3.504002094268799, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 6.875, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": -1.4363720417022705, |
|
"logits/rejected": -1.4057958126068115, |
|
"logps/chosen": -509.18304443359375, |
|
"logps/rejected": -622.3807373046875, |
|
"loss": 0.4606, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.538700580596924, |
|
"rewards/margins": 1.1025012731552124, |
|
"rewards/rejected": -3.6412017345428467, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.4218993186950684, |
|
"eval_logits/rejected": -1.3291493654251099, |
|
"eval_logps/chosen": -521.8630981445312, |
|
"eval_logps/rejected": -603.546875, |
|
"eval_loss": 0.4991470277309418, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.5016019344329834, |
|
"eval_rewards/margins": 1.056665062904358, |
|
"eval_rewards/rejected": -3.5582666397094727, |
|
"eval_runtime": 206.8725, |
|
"eval_samples_per_second": 9.668, |
|
"eval_steps_per_second": 0.604, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": -1.425769567489624, |
|
"logits/rejected": -1.3510525226593018, |
|
"logps/chosen": -534.87353515625, |
|
"logps/rejected": -616.7877807617188, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6500022411346436, |
|
"rewards/margins": 0.9020141363143921, |
|
"rewards/rejected": -3.552016496658325, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": -1.497687578201294, |
|
"logits/rejected": -1.411454439163208, |
|
"logps/chosen": -542.4428100585938, |
|
"logps/rejected": -576.1937255859375, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.580232620239258, |
|
"rewards/margins": 0.7872194051742554, |
|
"rewards/rejected": -3.3674521446228027, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.75, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": -1.4858081340789795, |
|
"logits/rejected": -1.3852221965789795, |
|
"logps/chosen": -526.9737548828125, |
|
"logps/rejected": -635.5704345703125, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.443568468093872, |
|
"rewards/margins": 1.2052720785140991, |
|
"rewards/rejected": -3.6488404273986816, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": -1.6181617975234985, |
|
"logits/rejected": -1.5521459579467773, |
|
"logps/chosen": -541.6924438476562, |
|
"logps/rejected": -613.9597778320312, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4517698287963867, |
|
"rewards/margins": 0.8626209497451782, |
|
"rewards/rejected": -3.3143906593322754, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": -1.511328935623169, |
|
"logits/rejected": -1.432902216911316, |
|
"logps/chosen": -508.4786071777344, |
|
"logps/rejected": -612.4136962890625, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.4070212841033936, |
|
"rewards/margins": 1.1571033000946045, |
|
"rewards/rejected": -3.564124584197998, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 5.75, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": -1.4642016887664795, |
|
"logits/rejected": -1.353859543800354, |
|
"logps/chosen": -487.744140625, |
|
"logps/rejected": -559.90234375, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.3719143867492676, |
|
"rewards/margins": 0.841119647026062, |
|
"rewards/rejected": -3.213034152984619, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": -1.430325984954834, |
|
"logits/rejected": -1.3274606466293335, |
|
"logps/chosen": -524.908447265625, |
|
"logps/rejected": -588.8642578125, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.478081226348877, |
|
"rewards/margins": 1.044504165649414, |
|
"rewards/rejected": -3.522585391998291, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": -1.4950729608535767, |
|
"logits/rejected": -1.399980902671814, |
|
"logps/chosen": -539.3687133789062, |
|
"logps/rejected": -593.13623046875, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.505924701690674, |
|
"rewards/margins": 1.0100219249725342, |
|
"rewards/rejected": -3.515946626663208, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": -1.422590732574463, |
|
"logits/rejected": -1.3758926391601562, |
|
"logps/chosen": -523.3316650390625, |
|
"logps/rejected": -640.554931640625, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.6385467052459717, |
|
"rewards/margins": 1.0877034664154053, |
|
"rewards/rejected": -3.726250410079956, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": -1.454978585243225, |
|
"logits/rejected": -1.4019845724105835, |
|
"logps/chosen": -488.15228271484375, |
|
"logps/rejected": -599.3827514648438, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.454293727874756, |
|
"rewards/margins": 1.122016429901123, |
|
"rewards/rejected": -3.576310396194458, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.4325312376022339, |
|
"eval_logits/rejected": -1.3394147157669067, |
|
"eval_logps/chosen": -521.4944458007812, |
|
"eval_logps/rejected": -599.7630615234375, |
|
"eval_loss": 0.4985138177871704, |
|
"eval_rewards/accuracies": 0.746999979019165, |
|
"eval_rewards/chosen": -2.497915267944336, |
|
"eval_rewards/margins": 1.0225143432617188, |
|
"eval_rewards/rejected": -3.5204296112060547, |
|
"eval_runtime": 206.7979, |
|
"eval_samples_per_second": 9.671, |
|
"eval_steps_per_second": 0.604, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": -1.4497127532958984, |
|
"logits/rejected": -1.3799619674682617, |
|
"logps/chosen": -531.990966796875, |
|
"logps/rejected": -574.7440185546875, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.597123861312866, |
|
"rewards/margins": 0.8730913400650024, |
|
"rewards/rejected": -3.470215320587158, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": -1.5374524593353271, |
|
"logits/rejected": -1.4480254650115967, |
|
"logps/chosen": -493.16351318359375, |
|
"logps/rejected": -555.0206298828125, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.448246717453003, |
|
"rewards/margins": 0.842933177947998, |
|
"rewards/rejected": -3.291179656982422, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.21875, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": -1.4127936363220215, |
|
"logits/rejected": -1.4277909994125366, |
|
"logps/chosen": -473.9107360839844, |
|
"logps/rejected": -639.1129150390625, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.282348155975342, |
|
"rewards/margins": 1.2384718656539917, |
|
"rewards/rejected": -3.520819902420044, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": -1.5411286354064941, |
|
"logits/rejected": -1.4845304489135742, |
|
"logps/chosen": -528.363525390625, |
|
"logps/rejected": -610.4037475585938, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.430098056793213, |
|
"rewards/margins": 1.0298941135406494, |
|
"rewards/rejected": -3.4599921703338623, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": -1.5715194940567017, |
|
"logits/rejected": -1.4276337623596191, |
|
"logps/chosen": -515.0184326171875, |
|
"logps/rejected": -564.2301025390625, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.4059693813323975, |
|
"rewards/margins": 0.9038017988204956, |
|
"rewards/rejected": -3.3097712993621826, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": -1.537407398223877, |
|
"logits/rejected": -1.352263331413269, |
|
"logps/chosen": -559.8363037109375, |
|
"logps/rejected": -603.9837646484375, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3580124378204346, |
|
"rewards/margins": 1.0659737586975098, |
|
"rewards/rejected": -3.4239859580993652, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": -1.4590742588043213, |
|
"logits/rejected": -1.3711886405944824, |
|
"logps/chosen": -499.3367614746094, |
|
"logps/rejected": -608.9996337890625, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5338335037231445, |
|
"rewards/margins": 0.9903131723403931, |
|
"rewards/rejected": -3.524146318435669, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": -1.4868284463882446, |
|
"logits/rejected": -1.4123101234436035, |
|
"logps/chosen": -530.2494506835938, |
|
"logps/rejected": -610.2762451171875, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6920907497406006, |
|
"rewards/margins": 0.8161090612411499, |
|
"rewards/rejected": -3.508200168609619, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": -1.4255352020263672, |
|
"logits/rejected": -1.3225051164627075, |
|
"logps/chosen": -488.10015869140625, |
|
"logps/rejected": -570.8845825195312, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4687325954437256, |
|
"rewards/margins": 1.027092695236206, |
|
"rewards/rejected": -3.4958252906799316, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": -1.4365253448486328, |
|
"logits/rejected": -1.4320509433746338, |
|
"logps/chosen": -510.7242126464844, |
|
"logps/rejected": -598.7286987304688, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.55981707572937, |
|
"rewards/margins": 0.9268480539321899, |
|
"rewards/rejected": -3.4866650104522705, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.4415297508239746, |
|
"eval_logits/rejected": -1.3491688966751099, |
|
"eval_logps/chosen": -517.2503662109375, |
|
"eval_logps/rejected": -594.9102172851562, |
|
"eval_loss": 0.4976504147052765, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.4554741382598877, |
|
"eval_rewards/margins": 1.016426682472229, |
|
"eval_rewards/rejected": -3.471900701522827, |
|
"eval_runtime": 206.6827, |
|
"eval_samples_per_second": 9.677, |
|
"eval_steps_per_second": 0.605, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": -1.4340312480926514, |
|
"logits/rejected": -1.381696105003357, |
|
"logps/chosen": -529.0896606445312, |
|
"logps/rejected": -568.31787109375, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3479690551757812, |
|
"rewards/margins": 0.7791107892990112, |
|
"rewards/rejected": -3.127079963684082, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": -1.5787631273269653, |
|
"logits/rejected": -1.4651563167572021, |
|
"logps/chosen": -544.9623413085938, |
|
"logps/rejected": -591.4544677734375, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.447205066680908, |
|
"rewards/margins": 0.9208120107650757, |
|
"rewards/rejected": -3.3680167198181152, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": -1.5388927459716797, |
|
"logits/rejected": -1.435225248336792, |
|
"logps/chosen": -527.3236694335938, |
|
"logps/rejected": -591.0799560546875, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.576470136642456, |
|
"rewards/margins": 0.910003662109375, |
|
"rewards/rejected": -3.486473798751831, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": -1.4623661041259766, |
|
"logits/rejected": -1.418001651763916, |
|
"logps/chosen": -497.0453186035156, |
|
"logps/rejected": -582.2621459960938, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3540682792663574, |
|
"rewards/margins": 0.9974225759506226, |
|
"rewards/rejected": -3.3514907360076904, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": -1.4713447093963623, |
|
"logits/rejected": -1.4519026279449463, |
|
"logps/chosen": -513.2701416015625, |
|
"logps/rejected": -621.521484375, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4599874019622803, |
|
"rewards/margins": 1.0760111808776855, |
|
"rewards/rejected": -3.535998582839966, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": -1.4529814720153809, |
|
"logits/rejected": -1.3669501543045044, |
|
"logps/chosen": -488.9100646972656, |
|
"logps/rejected": -551.0786743164062, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.357375144958496, |
|
"rewards/margins": 0.9981120824813843, |
|
"rewards/rejected": -3.355487108230591, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": -1.577505350112915, |
|
"logits/rejected": -1.4699662923812866, |
|
"logps/chosen": -525.9560546875, |
|
"logps/rejected": -618.0115966796875, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5162107944488525, |
|
"rewards/margins": 1.0870957374572754, |
|
"rewards/rejected": -3.603306293487549, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": -1.4847347736358643, |
|
"logits/rejected": -1.3919851779937744, |
|
"logps/chosen": -512.091552734375, |
|
"logps/rejected": -616.5901489257812, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.374174118041992, |
|
"rewards/margins": 1.1540172100067139, |
|
"rewards/rejected": -3.528191328048706, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": -1.4473907947540283, |
|
"logits/rejected": -1.346949815750122, |
|
"logps/chosen": -485.31512451171875, |
|
"logps/rejected": -593.9915161132812, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.3521013259887695, |
|
"rewards/margins": 1.203741192817688, |
|
"rewards/rejected": -3.555842161178589, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": -1.4863381385803223, |
|
"logits/rejected": -1.3555645942687988, |
|
"logps/chosen": -513.4937744140625, |
|
"logps/rejected": -581.9398803710938, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.393968105316162, |
|
"rewards/margins": 1.0673789978027344, |
|
"rewards/rejected": -3.4613468647003174, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.440211534500122, |
|
"eval_logits/rejected": -1.347786784172058, |
|
"eval_logps/chosen": -516.6851806640625, |
|
"eval_logps/rejected": -594.441650390625, |
|
"eval_loss": 0.49758249521255493, |
|
"eval_rewards/accuracies": 0.7509999871253967, |
|
"eval_rewards/chosen": -2.449821710586548, |
|
"eval_rewards/margins": 1.0173932313919067, |
|
"eval_rewards/rejected": -3.467214822769165, |
|
"eval_runtime": 206.7023, |
|
"eval_samples_per_second": 9.676, |
|
"eval_steps_per_second": 0.605, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": -1.5661511421203613, |
|
"logits/rejected": -1.444165825843811, |
|
"logps/chosen": -521.919189453125, |
|
"logps/rejected": -586.3562622070312, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.438523769378662, |
|
"rewards/margins": 1.0026253461837769, |
|
"rewards/rejected": -3.4411492347717285, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.75, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": -1.4804189205169678, |
|
"logits/rejected": -1.3565007448196411, |
|
"logps/chosen": -517.4879150390625, |
|
"logps/rejected": -590.8859252929688, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4466044902801514, |
|
"rewards/margins": 0.9578853845596313, |
|
"rewards/rejected": -3.4044899940490723, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": -1.4345219135284424, |
|
"logits/rejected": -1.398148775100708, |
|
"logps/chosen": -510.354736328125, |
|
"logps/rejected": -606.5594482421875, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.461632490158081, |
|
"rewards/margins": 1.1440181732177734, |
|
"rewards/rejected": -3.6056504249572754, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": -1.4843945503234863, |
|
"logits/rejected": -1.3764415979385376, |
|
"logps/chosen": -511.3872985839844, |
|
"logps/rejected": -572.77783203125, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.429408311843872, |
|
"rewards/margins": 1.009290099143982, |
|
"rewards/rejected": -3.4386982917785645, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": -1.5372560024261475, |
|
"logits/rejected": -1.3980759382247925, |
|
"logps/chosen": -542.35400390625, |
|
"logps/rejected": -590.6364135742188, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.490891933441162, |
|
"rewards/margins": 0.8898002505302429, |
|
"rewards/rejected": -3.38069224357605, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": -1.5029523372650146, |
|
"logits/rejected": -1.4057238101959229, |
|
"logps/chosen": -491.0376892089844, |
|
"logps/rejected": -596.9283447265625, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.436763286590576, |
|
"rewards/margins": 1.0951788425445557, |
|
"rewards/rejected": -3.5319418907165527, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": -1.4329261779785156, |
|
"logits/rejected": -1.3377002477645874, |
|
"logps/chosen": -494.4960021972656, |
|
"logps/rejected": -598.6796264648438, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.4506049156188965, |
|
"rewards/margins": 1.1435714960098267, |
|
"rewards/rejected": -3.5941765308380127, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 4.75, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": -1.4736502170562744, |
|
"logits/rejected": -1.451183557510376, |
|
"logps/chosen": -493.92706298828125, |
|
"logps/rejected": -590.5465087890625, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3842227458953857, |
|
"rewards/margins": 1.1362553834915161, |
|
"rewards/rejected": -3.5204784870147705, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": -1.4801084995269775, |
|
"logits/rejected": -1.4299700260162354, |
|
"logps/chosen": -557.2908325195312, |
|
"logps/rejected": -669.438720703125, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4419429302215576, |
|
"rewards/margins": 1.1474285125732422, |
|
"rewards/rejected": -3.5893714427948, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": -1.4195510149002075, |
|
"logits/rejected": -1.336871862411499, |
|
"logps/chosen": -495.6595153808594, |
|
"logps/rejected": -588.1336669921875, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4461569786071777, |
|
"rewards/margins": 1.0101310014724731, |
|
"rewards/rejected": -3.4562880992889404, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.4365895986557007, |
|
"eval_logits/rejected": -1.34409499168396, |
|
"eval_logps/chosen": -516.9639892578125, |
|
"eval_logps/rejected": -595.033935546875, |
|
"eval_loss": 0.49747607111930847, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.4526102542877197, |
|
"eval_rewards/margins": 1.0205274820327759, |
|
"eval_rewards/rejected": -3.473137855529785, |
|
"eval_runtime": 206.9741, |
|
"eval_samples_per_second": 9.663, |
|
"eval_steps_per_second": 0.604, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": -1.4668951034545898, |
|
"logits/rejected": -1.3559987545013428, |
|
"logps/chosen": -506.38153076171875, |
|
"logps/rejected": -566.1290283203125, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.4424426555633545, |
|
"rewards/margins": 1.029581904411316, |
|
"rewards/rejected": -3.472024440765381, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": -1.5458420515060425, |
|
"logits/rejected": -1.3794844150543213, |
|
"logps/chosen": -514.1174926757812, |
|
"logps/rejected": -563.9821166992188, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.361403226852417, |
|
"rewards/margins": 1.0842626094818115, |
|
"rewards/rejected": -3.4456658363342285, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.75, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": -1.4805978536605835, |
|
"logits/rejected": -1.4541041851043701, |
|
"logps/chosen": -495.7486877441406, |
|
"logps/rejected": -565.6021118164062, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.410522699356079, |
|
"rewards/margins": 0.8961440324783325, |
|
"rewards/rejected": -3.306666612625122, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.125, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": -1.4646061658859253, |
|
"logits/rejected": -1.4149842262268066, |
|
"logps/chosen": -559.3178100585938, |
|
"logps/rejected": -612.7825927734375, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.692322254180908, |
|
"rewards/margins": 0.8889071345329285, |
|
"rewards/rejected": -3.5812296867370605, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": -1.4032434225082397, |
|
"logits/rejected": -1.3378710746765137, |
|
"logps/chosen": -570.8396606445312, |
|
"logps/rejected": -627.8958129882812, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5015716552734375, |
|
"rewards/margins": 0.943825364112854, |
|
"rewards/rejected": -3.445397138595581, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": -1.3695303201675415, |
|
"logits/rejected": -1.251265525817871, |
|
"logps/chosen": -500.8570251464844, |
|
"logps/rejected": -570.6333618164062, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.518383502960205, |
|
"rewards/margins": 1.0404582023620605, |
|
"rewards/rejected": -3.5588417053222656, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": -1.4154746532440186, |
|
"logits/rejected": -1.3926751613616943, |
|
"logps/chosen": -495.06915283203125, |
|
"logps/rejected": -597.0353393554688, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.457148313522339, |
|
"rewards/margins": 0.9585596919059753, |
|
"rewards/rejected": -3.415708065032959, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": -1.5453914403915405, |
|
"logits/rejected": -1.4504231214523315, |
|
"logps/chosen": -514.4291381835938, |
|
"logps/rejected": -585.8607177734375, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3940634727478027, |
|
"rewards/margins": 1.0368168354034424, |
|
"rewards/rejected": -3.430880308151245, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": -1.4837844371795654, |
|
"logits/rejected": -1.3669326305389404, |
|
"logps/chosen": -526.9951171875, |
|
"logps/rejected": -609.9420166015625, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.426424503326416, |
|
"rewards/margins": 1.0135810375213623, |
|
"rewards/rejected": -3.4400055408477783, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": -1.4921762943267822, |
|
"logits/rejected": -1.3556934595108032, |
|
"logps/chosen": -516.9928588867188, |
|
"logps/rejected": -590.8782958984375, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5034995079040527, |
|
"rewards/margins": 1.0504939556121826, |
|
"rewards/rejected": -3.5539937019348145, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.4358515739440918, |
|
"eval_logits/rejected": -1.3432255983352661, |
|
"eval_logps/chosen": -517.0148315429688, |
|
"eval_logps/rejected": -595.1220703125, |
|
"eval_loss": 0.49738776683807373, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -2.4531185626983643, |
|
"eval_rewards/margins": 1.020900845527649, |
|
"eval_rewards/rejected": -3.474019765853882, |
|
"eval_runtime": 206.2288, |
|
"eval_samples_per_second": 9.698, |
|
"eval_steps_per_second": 0.606, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.5044982433319092, |
|
"logits/rejected": -1.4231712818145752, |
|
"logps/chosen": -519.6868896484375, |
|
"logps/rejected": -603.6512451171875, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5049242973327637, |
|
"rewards/margins": 0.8985089063644409, |
|
"rewards/rejected": -3.403432846069336, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5312882837824796, |
|
"train_runtime": 19005.9968, |
|
"train_samples_per_second": 3.217, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|