|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": -0.58705735206604, |
|
"logits/rejected": -0.5808682441711426, |
|
"logps/chosen": -386.9568786621094, |
|
"logps/rejected": -358.9479064941406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": -0.4515784680843353, |
|
"logits/rejected": -0.4993375241756439, |
|
"logps/chosen": -340.8123474121094, |
|
"logps/rejected": -277.9239501953125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.0005649582017213106, |
|
"rewards/margins": -6.073586700949818e-05, |
|
"rewards/rejected": -0.0005042223492637277, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": -0.4952339231967926, |
|
"logits/rejected": -0.5711107850074768, |
|
"logps/chosen": -382.88726806640625, |
|
"logps/rejected": -338.0599670410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0028820387087762356, |
|
"rewards/margins": 0.00026537588564679027, |
|
"rewards/rejected": 0.002616662997752428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": -0.48099619150161743, |
|
"logits/rejected": -0.4897836148738861, |
|
"logps/chosen": -309.4810791015625, |
|
"logps/rejected": -301.8321838378906, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004554978106170893, |
|
"rewards/margins": 0.0005212987889535725, |
|
"rewards/rejected": 0.004033679608255625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": -0.5069094896316528, |
|
"logits/rejected": -0.5498972535133362, |
|
"logps/chosen": -294.9288330078125, |
|
"logps/rejected": -289.6306457519531, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00764175970107317, |
|
"rewards/margins": 0.0012837459798902273, |
|
"rewards/rejected": 0.006358013954013586, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": -0.4766748547554016, |
|
"logits/rejected": -0.6654168367385864, |
|
"logps/chosen": -353.96112060546875, |
|
"logps/rejected": -258.60479736328125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.013894833624362946, |
|
"rewards/margins": 0.004527085926383734, |
|
"rewards/rejected": 0.009367748163640499, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": -0.5836187601089478, |
|
"logits/rejected": -0.5082155466079712, |
|
"logps/chosen": -311.2541809082031, |
|
"logps/rejected": -351.26812744140625, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.019704651087522507, |
|
"rewards/margins": -0.0016706284368410707, |
|
"rewards/rejected": 0.021375281736254692, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": -0.4787808954715729, |
|
"logits/rejected": -0.5381947755813599, |
|
"logps/chosen": -394.5220031738281, |
|
"logps/rejected": -304.90826416015625, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.037270233035087585, |
|
"rewards/margins": 0.01263844221830368, |
|
"rewards/rejected": 0.024631790816783905, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": -0.4997861981391907, |
|
"logits/rejected": -0.5136772990226746, |
|
"logps/chosen": -334.28045654296875, |
|
"logps/rejected": -317.20477294921875, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.042300838977098465, |
|
"rewards/margins": 0.006197690032422543, |
|
"rewards/rejected": 0.036103151738643646, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": -0.502038836479187, |
|
"logits/rejected": -0.5897258520126343, |
|
"logps/chosen": -346.77593994140625, |
|
"logps/rejected": -291.6183776855469, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.053455304354429245, |
|
"rewards/margins": 0.013971390202641487, |
|
"rewards/rejected": 0.03948391601443291, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": -0.41690191626548767, |
|
"logits/rejected": -0.5484428405761719, |
|
"logps/chosen": -355.68505859375, |
|
"logps/rejected": -283.31048583984375, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.07831202447414398, |
|
"rewards/margins": 0.021578781306743622, |
|
"rewards/rejected": 0.05673323944211006, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -0.31417590379714966, |
|
"eval_logits/rejected": -0.33973267674446106, |
|
"eval_logps/chosen": -338.1841735839844, |
|
"eval_logps/rejected": -307.254638671875, |
|
"eval_loss": 0.6868060231208801, |
|
"eval_rewards/accuracies": 0.5376983880996704, |
|
"eval_rewards/chosen": 0.08433966338634491, |
|
"eval_rewards/margins": 0.015132981352508068, |
|
"eval_rewards/rejected": 0.06920668482780457, |
|
"eval_runtime": 303.4683, |
|
"eval_samples_per_second": 6.59, |
|
"eval_steps_per_second": 0.208, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": -0.5858451724052429, |
|
"logits/rejected": -0.5726209282875061, |
|
"logps/chosen": -321.0362243652344, |
|
"logps/rejected": -316.78009033203125, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.09119822084903717, |
|
"rewards/margins": 0.01558433473110199, |
|
"rewards/rejected": 0.07561388611793518, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": -0.4955894947052002, |
|
"logits/rejected": -0.5918242335319519, |
|
"logps/chosen": -307.3293762207031, |
|
"logps/rejected": -277.9299011230469, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09477759897708893, |
|
"rewards/margins": 0.022622792050242424, |
|
"rewards/rejected": 0.07215481251478195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": -0.433607816696167, |
|
"logits/rejected": -0.5049300193786621, |
|
"logps/chosen": -317.8403625488281, |
|
"logps/rejected": -276.2845153808594, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.12141671031713486, |
|
"rewards/margins": 0.027771497145295143, |
|
"rewards/rejected": 0.09364522993564606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": -0.35517334938049316, |
|
"logits/rejected": -0.41019129753112793, |
|
"logps/chosen": -331.9830627441406, |
|
"logps/rejected": -290.9942321777344, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.150104820728302, |
|
"rewards/margins": 0.034656353294849396, |
|
"rewards/rejected": 0.11544845998287201, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": -0.41458845138549805, |
|
"logits/rejected": -0.45303764939308167, |
|
"logps/chosen": -374.31268310546875, |
|
"logps/rejected": -304.92242431640625, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.17660722136497498, |
|
"rewards/margins": 0.06261179596185684, |
|
"rewards/rejected": 0.11399543285369873, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": -0.4083201289176941, |
|
"logits/rejected": -0.4408670961856842, |
|
"logps/chosen": -372.0410461425781, |
|
"logps/rejected": -322.4227600097656, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.19496707618236542, |
|
"rewards/margins": 0.05301988124847412, |
|
"rewards/rejected": 0.1419472098350525, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": -0.46804505586624146, |
|
"logits/rejected": -0.4163655638694763, |
|
"logps/chosen": -262.0623474121094, |
|
"logps/rejected": -281.7099609375, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.15472838282585144, |
|
"rewards/margins": 0.0040833535604178905, |
|
"rewards/rejected": 0.15064503252506256, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": -0.41915637254714966, |
|
"logits/rejected": -0.41267627477645874, |
|
"logps/chosen": -316.70330810546875, |
|
"logps/rejected": -292.5999450683594, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2095501720905304, |
|
"rewards/margins": 0.06560155004262924, |
|
"rewards/rejected": 0.14394858479499817, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": -0.30535200238227844, |
|
"logits/rejected": -0.4938386082649231, |
|
"logps/chosen": -324.11016845703125, |
|
"logps/rejected": -270.20721435546875, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.23200471699237823, |
|
"rewards/margins": 0.09016872942447662, |
|
"rewards/rejected": 0.1418360024690628, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": -0.3207201361656189, |
|
"logits/rejected": -0.45532283186912537, |
|
"logps/chosen": -347.30316162109375, |
|
"logps/rejected": -325.71929931640625, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.23967811465263367, |
|
"rewards/margins": 0.06717831641435623, |
|
"rewards/rejected": 0.17249980568885803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -0.3406147360801697, |
|
"eval_logits/rejected": -0.3758181631565094, |
|
"eval_logps/chosen": -322.39111328125, |
|
"eval_logps/rejected": -296.1336669921875, |
|
"eval_loss": 0.6715283989906311, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": 0.24227052927017212, |
|
"eval_rewards/margins": 0.061853885650634766, |
|
"eval_rewards/rejected": 0.18041667342185974, |
|
"eval_runtime": 307.592, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 0.205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": -0.4271935820579529, |
|
"logits/rejected": -0.5097531676292419, |
|
"logps/chosen": -321.6978454589844, |
|
"logps/rejected": -279.8887939453125, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.21578499674797058, |
|
"rewards/margins": 0.05311957001686096, |
|
"rewards/rejected": 0.1626654416322708, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": -0.2373839169740677, |
|
"logits/rejected": -0.3769948184490204, |
|
"logps/chosen": -373.025634765625, |
|
"logps/rejected": -327.1902160644531, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.287474125623703, |
|
"rewards/margins": 0.09721784293651581, |
|
"rewards/rejected": 0.19025632739067078, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": -0.3467353880405426, |
|
"logits/rejected": -0.49967822432518005, |
|
"logps/chosen": -360.65362548828125, |
|
"logps/rejected": -339.7496643066406, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.27538615465164185, |
|
"rewards/margins": 0.08647196739912033, |
|
"rewards/rejected": 0.18891416490077972, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": -0.3842504024505615, |
|
"logits/rejected": -0.4908681809902191, |
|
"logps/chosen": -323.8313293457031, |
|
"logps/rejected": -300.1197204589844, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2536216378211975, |
|
"rewards/margins": 0.08490858227014542, |
|
"rewards/rejected": 0.16871307790279388, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": -0.37045997381210327, |
|
"logits/rejected": -0.41814011335372925, |
|
"logps/chosen": -311.5799560546875, |
|
"logps/rejected": -304.9263610839844, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.22143617272377014, |
|
"rewards/margins": 0.06613994389772415, |
|
"rewards/rejected": 0.1552962213754654, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": -0.33250561356544495, |
|
"logits/rejected": -0.38181784749031067, |
|
"logps/chosen": -346.22845458984375, |
|
"logps/rejected": -355.2063293457031, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2670690417289734, |
|
"rewards/margins": 0.0644298866391182, |
|
"rewards/rejected": 0.20263917744159698, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": -0.46652165055274963, |
|
"logits/rejected": -0.4939687252044678, |
|
"logps/chosen": -316.5768127441406, |
|
"logps/rejected": -301.8284606933594, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.22106440365314484, |
|
"rewards/margins": 0.05704628303647041, |
|
"rewards/rejected": 0.16401812434196472, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": -0.325555682182312, |
|
"logits/rejected": -0.42002320289611816, |
|
"logps/chosen": -332.63031005859375, |
|
"logps/rejected": -311.92181396484375, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.19830381870269775, |
|
"rewards/margins": 0.07296115905046463, |
|
"rewards/rejected": 0.12534265220165253, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": -0.36657431721687317, |
|
"logits/rejected": -0.4914252758026123, |
|
"logps/chosen": -323.62890625, |
|
"logps/rejected": -286.2757873535156, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.17159457504749298, |
|
"rewards/margins": 0.07951690256595612, |
|
"rewards/rejected": 0.09207765758037567, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": -0.4312424659729004, |
|
"logits/rejected": -0.4952569007873535, |
|
"logps/chosen": -284.1667785644531, |
|
"logps/rejected": -273.22900390625, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14056554436683655, |
|
"rewards/margins": 0.13772882521152496, |
|
"rewards/rejected": 0.002836701227352023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -0.442807137966156, |
|
"eval_logits/rejected": -0.475908488035202, |
|
"eval_logps/chosen": -331.02752685546875, |
|
"eval_logps/rejected": -309.7589111328125, |
|
"eval_loss": 0.6529492735862732, |
|
"eval_rewards/accuracies": 0.6646825671195984, |
|
"eval_rewards/chosen": 0.155906543135643, |
|
"eval_rewards/margins": 0.11174201965332031, |
|
"eval_rewards/rejected": 0.04416452720761299, |
|
"eval_runtime": 303.7935, |
|
"eval_samples_per_second": 6.583, |
|
"eval_steps_per_second": 0.207, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": -0.41296476125717163, |
|
"logits/rejected": -0.5174468755722046, |
|
"logps/chosen": -362.47833251953125, |
|
"logps/rejected": -323.5210266113281, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.1841851770877838, |
|
"rewards/margins": 0.19086548686027527, |
|
"rewards/rejected": -0.00668031070381403, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": -0.5255850553512573, |
|
"logits/rejected": -0.5664322376251221, |
|
"logps/chosen": -277.1454162597656, |
|
"logps/rejected": -295.079833984375, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09250400960445404, |
|
"rewards/margins": 0.09653620421886444, |
|
"rewards/rejected": -0.004032188560813665, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": -0.5266455411911011, |
|
"logits/rejected": -0.4706489145755768, |
|
"logps/chosen": -285.48236083984375, |
|
"logps/rejected": -343.0083923339844, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06514567881822586, |
|
"rewards/margins": 0.07579077780246735, |
|
"rewards/rejected": -0.010645096190273762, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": -0.43907618522644043, |
|
"logits/rejected": -0.5358369946479797, |
|
"logps/chosen": -377.4954528808594, |
|
"logps/rejected": -327.7589111328125, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.13037431240081787, |
|
"rewards/margins": 0.13443666696548462, |
|
"rewards/rejected": -0.004062363877892494, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": -0.4344902038574219, |
|
"logits/rejected": -0.5312429666519165, |
|
"logps/chosen": -323.53448486328125, |
|
"logps/rejected": -311.77496337890625, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09232774376869202, |
|
"rewards/margins": 0.17667637765407562, |
|
"rewards/rejected": -0.0843486562371254, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": -0.5391712784767151, |
|
"logits/rejected": -0.533171534538269, |
|
"logps/chosen": -264.92230224609375, |
|
"logps/rejected": -285.822998046875, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05296499282121658, |
|
"rewards/margins": 0.13872875273227692, |
|
"rewards/rejected": -0.08576375991106033, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": -0.5374969244003296, |
|
"logits/rejected": -0.5305294394493103, |
|
"logps/chosen": -330.28271484375, |
|
"logps/rejected": -368.3117980957031, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04659426584839821, |
|
"rewards/margins": 0.18263372778892517, |
|
"rewards/rejected": -0.13603946566581726, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": -0.506037712097168, |
|
"logits/rejected": -0.5441786646842957, |
|
"logps/chosen": -345.60064697265625, |
|
"logps/rejected": -334.61407470703125, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.023661959916353226, |
|
"rewards/margins": 0.12598641216754913, |
|
"rewards/rejected": -0.14964835345745087, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": -0.6301513314247131, |
|
"logits/rejected": -0.615659236907959, |
|
"logps/chosen": -330.850830078125, |
|
"logps/rejected": -338.16070556640625, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07495202869176865, |
|
"rewards/margins": 0.1975657194852829, |
|
"rewards/rejected": -0.27251774072647095, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": -0.5807877779006958, |
|
"logits/rejected": -0.6708138585090637, |
|
"logps/chosen": -367.5466003417969, |
|
"logps/rejected": -348.1293640136719, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1158147007226944, |
|
"rewards/margins": 0.1703101098537445, |
|
"rewards/rejected": -0.2861248254776001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -0.5491507649421692, |
|
"eval_logits/rejected": -0.5776416659355164, |
|
"eval_logps/chosen": -357.9351806640625, |
|
"eval_logps/rejected": -345.4769287109375, |
|
"eval_loss": 0.6271597743034363, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -0.1131698414683342, |
|
"eval_rewards/margins": 0.19984641671180725, |
|
"eval_rewards/rejected": -0.31301626563072205, |
|
"eval_runtime": 303.4179, |
|
"eval_samples_per_second": 6.592, |
|
"eval_steps_per_second": 0.208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": -0.6273578405380249, |
|
"logits/rejected": -0.6960797309875488, |
|
"logps/chosen": -361.93359375, |
|
"logps/rejected": -319.00341796875, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.197979137301445, |
|
"rewards/margins": 0.10214652866125107, |
|
"rewards/rejected": -0.3001256585121155, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": -0.5516955852508545, |
|
"logits/rejected": -0.6049106121063232, |
|
"logps/chosen": -367.198974609375, |
|
"logps/rejected": -363.4229736328125, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12621311843395233, |
|
"rewards/margins": 0.13846664130687714, |
|
"rewards/rejected": -0.26467975974082947, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": -0.5340523719787598, |
|
"logits/rejected": -0.6084105372428894, |
|
"logps/chosen": -407.4958190917969, |
|
"logps/rejected": -401.229736328125, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.05744675546884537, |
|
"rewards/margins": 0.27851784229278564, |
|
"rewards/rejected": -0.3359646201133728, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": -0.5335565805435181, |
|
"logits/rejected": -0.5681129693984985, |
|
"logps/chosen": -342.0080261230469, |
|
"logps/rejected": -333.02569580078125, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16289742290973663, |
|
"rewards/margins": 0.25955528020858765, |
|
"rewards/rejected": -0.4224526882171631, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": -0.554370105266571, |
|
"logits/rejected": -0.6364303827285767, |
|
"logps/chosen": -396.9653625488281, |
|
"logps/rejected": -362.92730712890625, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19718773663043976, |
|
"rewards/margins": 0.19910624623298645, |
|
"rewards/rejected": -0.396293967962265, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": -0.624381422996521, |
|
"logits/rejected": -0.6213779449462891, |
|
"logps/chosen": -394.22039794921875, |
|
"logps/rejected": -391.92388916015625, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.17801611125469208, |
|
"rewards/margins": 0.1999357044696808, |
|
"rewards/rejected": -0.37795180082321167, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": -0.5325026512145996, |
|
"logits/rejected": -0.6415473222732544, |
|
"logps/chosen": -373.08441162109375, |
|
"logps/rejected": -333.419189453125, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15178225934505463, |
|
"rewards/margins": 0.23722997307777405, |
|
"rewards/rejected": -0.38901224732398987, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": -0.5378426313400269, |
|
"logits/rejected": -0.5987221002578735, |
|
"logps/chosen": -354.837158203125, |
|
"logps/rejected": -358.8296813964844, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1486106812953949, |
|
"rewards/margins": 0.21964044868946075, |
|
"rewards/rejected": -0.36825114488601685, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": -0.594926118850708, |
|
"logits/rejected": -0.6234263181686401, |
|
"logps/chosen": -376.20172119140625, |
|
"logps/rejected": -401.26068115234375, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07999664545059204, |
|
"rewards/margins": 0.32993897795677185, |
|
"rewards/rejected": -0.4099356234073639, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": -0.5384534001350403, |
|
"logits/rejected": -0.5856305956840515, |
|
"logps/chosen": -344.09375, |
|
"logps/rejected": -328.9897766113281, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11651408672332764, |
|
"rewards/margins": 0.17748796939849854, |
|
"rewards/rejected": -0.2940020263195038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -0.5494967103004456, |
|
"eval_logits/rejected": -0.5819825530052185, |
|
"eval_logps/chosen": -362.3848571777344, |
|
"eval_logps/rejected": -356.7882080078125, |
|
"eval_loss": 0.6162292957305908, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": -0.15766657888889313, |
|
"eval_rewards/margins": 0.2684621512889862, |
|
"eval_rewards/rejected": -0.4261287748813629, |
|
"eval_runtime": 304.7575, |
|
"eval_samples_per_second": 6.563, |
|
"eval_steps_per_second": 0.207, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": -0.5900930166244507, |
|
"logits/rejected": -0.604232907295227, |
|
"logps/chosen": -365.4901123046875, |
|
"logps/rejected": -390.84246826171875, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15175417065620422, |
|
"rewards/margins": 0.3504869043827057, |
|
"rewards/rejected": -0.5022410750389099, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": -0.5539441108703613, |
|
"logits/rejected": -0.6341567039489746, |
|
"logps/chosen": -371.22308349609375, |
|
"logps/rejected": -340.50457763671875, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.36344796419143677, |
|
"rewards/margins": 0.220851331949234, |
|
"rewards/rejected": -0.5842992663383484, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": -0.7428617477416992, |
|
"logits/rejected": -0.7538031935691833, |
|
"logps/chosen": -303.5591735839844, |
|
"logps/rejected": -322.505126953125, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.45155367255210876, |
|
"rewards/margins": 0.2669391930103302, |
|
"rewards/rejected": -0.718492865562439, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": -0.5991286039352417, |
|
"logits/rejected": -0.6453845500946045, |
|
"logps/chosen": -350.3877258300781, |
|
"logps/rejected": -354.8768615722656, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24457433819770813, |
|
"rewards/margins": 0.3572844862937927, |
|
"rewards/rejected": -0.6018588542938232, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": -0.6068693399429321, |
|
"logits/rejected": -0.6633307337760925, |
|
"logps/chosen": -366.67510986328125, |
|
"logps/rejected": -344.18536376953125, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4160049557685852, |
|
"rewards/margins": 0.27529022097587585, |
|
"rewards/rejected": -0.6912952065467834, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": -0.6586125493049622, |
|
"logits/rejected": -0.6525910496711731, |
|
"logps/chosen": -389.03436279296875, |
|
"logps/rejected": -459.2569274902344, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4362773895263672, |
|
"rewards/margins": 0.41235917806625366, |
|
"rewards/rejected": -0.8486365079879761, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": -0.6040334701538086, |
|
"logits/rejected": -0.6174503564834595, |
|
"logps/chosen": -361.7618408203125, |
|
"logps/rejected": -354.9851379394531, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.33209994435310364, |
|
"rewards/margins": 0.2956308126449585, |
|
"rewards/rejected": -0.6277307271957397, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": -0.5234232544898987, |
|
"logits/rejected": -0.6443308591842651, |
|
"logps/chosen": -349.57867431640625, |
|
"logps/rejected": -357.2449645996094, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3364105820655823, |
|
"rewards/margins": 0.3499498665332794, |
|
"rewards/rejected": -0.6863604784011841, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": -0.5796949863433838, |
|
"logits/rejected": -0.6078803539276123, |
|
"logps/chosen": -393.03057861328125, |
|
"logps/rejected": -394.01068115234375, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3195868730545044, |
|
"rewards/margins": 0.2843206822872162, |
|
"rewards/rejected": -0.603907585144043, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": -0.4854974150657654, |
|
"logits/rejected": -0.5861517786979675, |
|
"logps/chosen": -409.95849609375, |
|
"logps/rejected": -371.15240478515625, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.351412832736969, |
|
"rewards/margins": 0.25703001022338867, |
|
"rewards/rejected": -0.6084428429603577, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.573330819606781, |
|
"eval_logits/rejected": -0.6100495457649231, |
|
"eval_logps/chosen": -380.79119873046875, |
|
"eval_logps/rejected": -382.4302978515625, |
|
"eval_loss": 0.6062807440757751, |
|
"eval_rewards/accuracies": 0.6805555820465088, |
|
"eval_rewards/chosen": -0.3417300879955292, |
|
"eval_rewards/margins": 0.34081971645355225, |
|
"eval_rewards/rejected": -0.6825497150421143, |
|
"eval_runtime": 305.3418, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 0.206, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": -0.5746784210205078, |
|
"logits/rejected": -0.6247537136077881, |
|
"logps/chosen": -327.1573791503906, |
|
"logps/rejected": -373.3064270019531, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.30045533180236816, |
|
"rewards/margins": 0.5708844661712646, |
|
"rewards/rejected": -0.8713397979736328, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": -0.5362870097160339, |
|
"logits/rejected": -0.603136420249939, |
|
"logps/chosen": -377.772705078125, |
|
"logps/rejected": -375.9126281738281, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.35553207993507385, |
|
"rewards/margins": 0.38167324662208557, |
|
"rewards/rejected": -0.7372053265571594, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": -0.5939895510673523, |
|
"logits/rejected": -0.6751977801322937, |
|
"logps/chosen": -431.8272399902344, |
|
"logps/rejected": -434.0873107910156, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.404622882604599, |
|
"rewards/margins": 0.4936921000480652, |
|
"rewards/rejected": -0.8983149528503418, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": -0.5942648649215698, |
|
"logits/rejected": -0.6419891119003296, |
|
"logps/chosen": -398.41192626953125, |
|
"logps/rejected": -402.21881103515625, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5822068452835083, |
|
"rewards/margins": 0.2493685930967331, |
|
"rewards/rejected": -0.8315755128860474, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": -0.6248338222503662, |
|
"logits/rejected": -0.6994636654853821, |
|
"logps/chosen": -358.5780029296875, |
|
"logps/rejected": -354.32513427734375, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5734933614730835, |
|
"rewards/margins": 0.3680447041988373, |
|
"rewards/rejected": -0.9415380358695984, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": -0.5910335183143616, |
|
"logits/rejected": -0.6014568209648132, |
|
"logps/chosen": -367.816162109375, |
|
"logps/rejected": -425.21478271484375, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6200834512710571, |
|
"rewards/margins": 0.457810640335083, |
|
"rewards/rejected": -1.0778939723968506, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": -0.45557427406311035, |
|
"logits/rejected": -0.586103081703186, |
|
"logps/chosen": -435.7613220214844, |
|
"logps/rejected": -412.81610107421875, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5693609714508057, |
|
"rewards/margins": 0.3782386779785156, |
|
"rewards/rejected": -0.9475995898246765, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": -0.6208279132843018, |
|
"logits/rejected": -0.6261738538742065, |
|
"logps/chosen": -346.9092712402344, |
|
"logps/rejected": -385.6962890625, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6417191624641418, |
|
"rewards/margins": 0.22618098556995392, |
|
"rewards/rejected": -0.8679000735282898, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": -0.4598918855190277, |
|
"logits/rejected": -0.6179080009460449, |
|
"logps/chosen": -456.4967346191406, |
|
"logps/rejected": -428.18096923828125, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3372291326522827, |
|
"rewards/margins": 0.46832937002182007, |
|
"rewards/rejected": -0.8055585026741028, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": -0.514435350894928, |
|
"logits/rejected": -0.5911833047866821, |
|
"logps/chosen": -425.69085693359375, |
|
"logps/rejected": -387.5975646972656, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4158889651298523, |
|
"rewards/margins": 0.2637188136577606, |
|
"rewards/rejected": -0.6796077489852905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -0.5761136412620544, |
|
"eval_logits/rejected": -0.6107844710350037, |
|
"eval_logps/chosen": -395.6782531738281, |
|
"eval_logps/rejected": -402.2430725097656, |
|
"eval_loss": 0.5972979068756104, |
|
"eval_rewards/accuracies": 0.6944444179534912, |
|
"eval_rewards/chosen": -0.4906010627746582, |
|
"eval_rewards/margins": 0.39007675647735596, |
|
"eval_rewards/rejected": -0.8806778192520142, |
|
"eval_runtime": 305.6972, |
|
"eval_samples_per_second": 6.542, |
|
"eval_steps_per_second": 0.206, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": -0.5273677706718445, |
|
"logits/rejected": -0.5377794504165649, |
|
"logps/chosen": -402.70660400390625, |
|
"logps/rejected": -437.87628173828125, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.46286138892173767, |
|
"rewards/margins": 0.4086821675300598, |
|
"rewards/rejected": -0.8715435266494751, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": -0.6572099924087524, |
|
"logits/rejected": -0.6273663640022278, |
|
"logps/chosen": -354.00579833984375, |
|
"logps/rejected": -382.08441162109375, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5593703389167786, |
|
"rewards/margins": 0.3321402668952942, |
|
"rewards/rejected": -0.8915106654167175, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": -0.6124129891395569, |
|
"logits/rejected": -0.6364959478378296, |
|
"logps/chosen": -382.92462158203125, |
|
"logps/rejected": -385.5881042480469, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6445373892784119, |
|
"rewards/margins": 0.42275238037109375, |
|
"rewards/rejected": -1.0672898292541504, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": -0.5338658690452576, |
|
"logits/rejected": -0.6029759049415588, |
|
"logps/chosen": -454.47900390625, |
|
"logps/rejected": -433.13140869140625, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.659946858882904, |
|
"rewards/margins": 0.5364845395088196, |
|
"rewards/rejected": -1.1964313983917236, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": -0.6849664449691772, |
|
"logits/rejected": -0.7146345376968384, |
|
"logps/chosen": -375.855224609375, |
|
"logps/rejected": -412.647216796875, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7736431360244751, |
|
"rewards/margins": 0.37692931294441223, |
|
"rewards/rejected": -1.150572419166565, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": -0.5949341058731079, |
|
"logits/rejected": -0.630172610282898, |
|
"logps/chosen": -380.9452209472656, |
|
"logps/rejected": -441.0630798339844, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4381617605686188, |
|
"rewards/margins": 0.5467316508293152, |
|
"rewards/rejected": -0.9848934412002563, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": -0.5766685009002686, |
|
"logits/rejected": -0.6258134841918945, |
|
"logps/chosen": -385.54351806640625, |
|
"logps/rejected": -396.5734558105469, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5654970407485962, |
|
"rewards/margins": 0.3876551687717438, |
|
"rewards/rejected": -0.9531521797180176, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": -0.5654915571212769, |
|
"logits/rejected": -0.6695746183395386, |
|
"logps/chosen": -422.5735778808594, |
|
"logps/rejected": -401.92034912109375, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6763705015182495, |
|
"rewards/margins": 0.4181916117668152, |
|
"rewards/rejected": -1.0945621728897095, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": -0.5739408731460571, |
|
"logits/rejected": -0.649164617061615, |
|
"logps/chosen": -439.37615966796875, |
|
"logps/rejected": -465.0233459472656, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.711835503578186, |
|
"rewards/margins": 0.45104461908340454, |
|
"rewards/rejected": -1.1628801822662354, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": -0.5284892916679382, |
|
"logits/rejected": -0.6252182722091675, |
|
"logps/chosen": -437.35821533203125, |
|
"logps/rejected": -434.4740295410156, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6502100229263306, |
|
"rewards/margins": 0.4485486149787903, |
|
"rewards/rejected": -1.098758578300476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.5841491222381592, |
|
"eval_logits/rejected": -0.6245321035385132, |
|
"eval_logps/chosen": -409.95855712890625, |
|
"eval_logps/rejected": -422.52947998046875, |
|
"eval_loss": 0.5927994251251221, |
|
"eval_rewards/accuracies": 0.7023809552192688, |
|
"eval_rewards/chosen": -0.6334035396575928, |
|
"eval_rewards/margins": 0.45013830065727234, |
|
"eval_rewards/rejected": -1.083541750907898, |
|
"eval_runtime": 304.2196, |
|
"eval_samples_per_second": 6.574, |
|
"eval_steps_per_second": 0.207, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": -0.5947645902633667, |
|
"logits/rejected": -0.6664719581604004, |
|
"logps/chosen": -407.41339111328125, |
|
"logps/rejected": -436.47381591796875, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6832540035247803, |
|
"rewards/margins": 0.5449843406677246, |
|
"rewards/rejected": -1.2282384634017944, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": -0.5373682379722595, |
|
"logits/rejected": -0.5452297329902649, |
|
"logps/chosen": -385.5528869628906, |
|
"logps/rejected": -416.3194885253906, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4152299463748932, |
|
"rewards/margins": 0.42649930715560913, |
|
"rewards/rejected": -0.8417292833328247, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": -0.5158123970031738, |
|
"logits/rejected": -0.6795430183410645, |
|
"logps/chosen": -412.291015625, |
|
"logps/rejected": -380.50860595703125, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5058903694152832, |
|
"rewards/margins": 0.47143155336380005, |
|
"rewards/rejected": -0.977321982383728, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": -0.587105929851532, |
|
"logits/rejected": -0.6171265840530396, |
|
"logps/chosen": -391.59991455078125, |
|
"logps/rejected": -412.62750244140625, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5484491586685181, |
|
"rewards/margins": 0.42554783821105957, |
|
"rewards/rejected": -0.9739969968795776, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": -0.5018014311790466, |
|
"logits/rejected": -0.5997334718704224, |
|
"logps/chosen": -488.0768127441406, |
|
"logps/rejected": -467.55072021484375, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7004522085189819, |
|
"rewards/margins": 0.5092984437942505, |
|
"rewards/rejected": -1.2097506523132324, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": -0.5440029501914978, |
|
"logits/rejected": -0.5686444044113159, |
|
"logps/chosen": -364.2088623046875, |
|
"logps/rejected": -364.9720153808594, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5453570485115051, |
|
"rewards/margins": 0.4008842408657074, |
|
"rewards/rejected": -0.9462413787841797, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": -0.5663027167320251, |
|
"logits/rejected": -0.6566844582557678, |
|
"logps/chosen": -410.7333984375, |
|
"logps/rejected": -389.6551513671875, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4486373960971832, |
|
"rewards/margins": 0.37078288197517395, |
|
"rewards/rejected": -0.8194202184677124, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": -0.5438618659973145, |
|
"logits/rejected": -0.5869525671005249, |
|
"logps/chosen": -430.4832458496094, |
|
"logps/rejected": -415.47711181640625, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6006536483764648, |
|
"rewards/margins": 0.3156343400478363, |
|
"rewards/rejected": -0.9162880778312683, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": -0.5569460391998291, |
|
"logits/rejected": -0.5293912887573242, |
|
"logps/chosen": -342.6893615722656, |
|
"logps/rejected": -377.5931701660156, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5601915121078491, |
|
"rewards/margins": 0.33136099576950073, |
|
"rewards/rejected": -0.8915525674819946, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": -0.49963292479515076, |
|
"logits/rejected": -0.6308623552322388, |
|
"logps/chosen": -407.0679931640625, |
|
"logps/rejected": -378.2735900878906, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5700783133506775, |
|
"rewards/margins": 0.47629982233047485, |
|
"rewards/rejected": -1.0463781356811523, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -0.5358740091323853, |
|
"eval_logits/rejected": -0.5755640268325806, |
|
"eval_logps/chosen": -407.6412353515625, |
|
"eval_logps/rejected": -420.5953063964844, |
|
"eval_loss": 0.5896004438400269, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": -0.6102306246757507, |
|
"eval_rewards/margins": 0.4539690315723419, |
|
"eval_rewards/rejected": -1.064199686050415, |
|
"eval_runtime": 305.5138, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.206, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": -0.5590487718582153, |
|
"logits/rejected": -0.6419841051101685, |
|
"logps/chosen": -426.57550048828125, |
|
"logps/rejected": -397.7510681152344, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6472785472869873, |
|
"rewards/margins": 0.4323779046535492, |
|
"rewards/rejected": -1.0796566009521484, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": -0.571009635925293, |
|
"logits/rejected": -0.5964629054069519, |
|
"logps/chosen": -411.31201171875, |
|
"logps/rejected": -410.85260009765625, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6876448392868042, |
|
"rewards/margins": 0.34134042263031006, |
|
"rewards/rejected": -1.0289852619171143, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": -0.5807533264160156, |
|
"logits/rejected": -0.6887992024421692, |
|
"logps/chosen": -456.3690490722656, |
|
"logps/rejected": -431.4046936035156, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6661675572395325, |
|
"rewards/margins": 0.4776817262172699, |
|
"rewards/rejected": -1.14384925365448, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": -0.5229719877243042, |
|
"logits/rejected": -0.6582568287849426, |
|
"logps/chosen": -381.76239013671875, |
|
"logps/rejected": -381.06927490234375, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5734530091285706, |
|
"rewards/margins": 0.5467861294746399, |
|
"rewards/rejected": -1.120239019393921, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": -0.5676053166389465, |
|
"logits/rejected": -0.5334717035293579, |
|
"logps/chosen": -374.41717529296875, |
|
"logps/rejected": -421.0732421875, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.6005482077598572, |
|
"rewards/margins": 0.21005001664161682, |
|
"rewards/rejected": -0.8105981945991516, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": -0.44915372133255005, |
|
"logits/rejected": -0.5996781587600708, |
|
"logps/chosen": -453.83343505859375, |
|
"logps/rejected": -393.98333740234375, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6119068264961243, |
|
"rewards/margins": 0.29583027958869934, |
|
"rewards/rejected": -0.9077370762825012, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": -0.5181512832641602, |
|
"logits/rejected": -0.5320832133293152, |
|
"logps/chosen": -359.99542236328125, |
|
"logps/rejected": -414.4845275878906, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5852144956588745, |
|
"rewards/margins": 0.4881064295768738, |
|
"rewards/rejected": -1.073320984840393, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": -0.5162011981010437, |
|
"logits/rejected": -0.6026550531387329, |
|
"logps/chosen": -435.4556579589844, |
|
"logps/rejected": -434.9972229003906, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5601040124893188, |
|
"rewards/margins": 0.5399636030197144, |
|
"rewards/rejected": -1.1000676155090332, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": -0.5119590759277344, |
|
"logits/rejected": -0.6090120673179626, |
|
"logps/chosen": -407.456298828125, |
|
"logps/rejected": -399.5018310546875, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6310015916824341, |
|
"rewards/margins": 0.42230549454689026, |
|
"rewards/rejected": -1.053307056427002, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": -0.5462077856063843, |
|
"logits/rejected": -0.603754997253418, |
|
"logps/chosen": -436.95245361328125, |
|
"logps/rejected": -415.5531311035156, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.71515291929245, |
|
"rewards/margins": 0.3384590148925781, |
|
"rewards/rejected": -1.0536119937896729, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.50582355260849, |
|
"eval_logits/rejected": -0.5430945754051208, |
|
"eval_logps/chosen": -411.3542785644531, |
|
"eval_logps/rejected": -426.3283996582031, |
|
"eval_loss": 0.5865140557289124, |
|
"eval_rewards/accuracies": 0.6984127163887024, |
|
"eval_rewards/chosen": -0.6473610401153564, |
|
"eval_rewards/margins": 0.4741695821285248, |
|
"eval_rewards/rejected": -1.1215306520462036, |
|
"eval_runtime": 302.4755, |
|
"eval_samples_per_second": 6.612, |
|
"eval_steps_per_second": 0.208, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": -0.5315025448799133, |
|
"logits/rejected": -0.6642025709152222, |
|
"logps/chosen": -466.88250732421875, |
|
"logps/rejected": -440.3934020996094, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6935629844665527, |
|
"rewards/margins": 0.5693783760070801, |
|
"rewards/rejected": -1.2629413604736328, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": -0.4503622055053711, |
|
"logits/rejected": -0.515163779258728, |
|
"logps/chosen": -388.44940185546875, |
|
"logps/rejected": -394.9497985839844, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6574369668960571, |
|
"rewards/margins": 0.3425886929035187, |
|
"rewards/rejected": -1.000025749206543, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": -0.5630380511283875, |
|
"logits/rejected": -0.6658009886741638, |
|
"logps/chosen": -403.2070617675781, |
|
"logps/rejected": -395.23150634765625, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5559983253479004, |
|
"rewards/margins": 0.5452179908752441, |
|
"rewards/rejected": -1.1012163162231445, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": -0.5626110434532166, |
|
"logits/rejected": -0.633199155330658, |
|
"logps/chosen": -394.472412109375, |
|
"logps/rejected": -418.28741455078125, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8999873399734497, |
|
"rewards/margins": 0.44079795479774475, |
|
"rewards/rejected": -1.340785264968872, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": -0.5563883185386658, |
|
"logits/rejected": -0.6150022745132446, |
|
"logps/chosen": -443.32977294921875, |
|
"logps/rejected": -443.04248046875, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.826148509979248, |
|
"rewards/margins": 0.5606396198272705, |
|
"rewards/rejected": -1.3867881298065186, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": -0.4988153874874115, |
|
"logits/rejected": -0.5230491161346436, |
|
"logps/chosen": -409.4883728027344, |
|
"logps/rejected": -461.45184326171875, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9212621450424194, |
|
"rewards/margins": 0.43989840149879456, |
|
"rewards/rejected": -1.3611605167388916, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": -0.5152363777160645, |
|
"logits/rejected": -0.5550749897956848, |
|
"logps/chosen": -412.5245056152344, |
|
"logps/rejected": -469.71392822265625, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7094501256942749, |
|
"rewards/margins": 0.5959488749504089, |
|
"rewards/rejected": -1.3053990602493286, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": -0.40664130449295044, |
|
"logits/rejected": -0.4538194537162781, |
|
"logps/chosen": -474.70269775390625, |
|
"logps/rejected": -478.2893981933594, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6573441028594971, |
|
"rewards/margins": 0.4294394850730896, |
|
"rewards/rejected": -1.086783528327942, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": -0.47670966386795044, |
|
"logits/rejected": -0.5509120225906372, |
|
"logps/chosen": -417.5166015625, |
|
"logps/rejected": -451.0810546875, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8204286694526672, |
|
"rewards/margins": 0.3873991370201111, |
|
"rewards/rejected": -1.2078278064727783, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": -0.5169596076011658, |
|
"logits/rejected": -0.5811697244644165, |
|
"logps/chosen": -409.420166015625, |
|
"logps/rejected": -383.60638427734375, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.791795015335083, |
|
"rewards/margins": 0.3366108238697052, |
|
"rewards/rejected": -1.1284058094024658, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -0.5104440450668335, |
|
"eval_logits/rejected": -0.5501194596290588, |
|
"eval_logps/chosen": -419.2626037597656, |
|
"eval_logps/rejected": -437.0024719238281, |
|
"eval_loss": 0.5854523777961731, |
|
"eval_rewards/accuracies": 0.7063491940498352, |
|
"eval_rewards/chosen": -0.7264440655708313, |
|
"eval_rewards/margins": 0.5018272399902344, |
|
"eval_rewards/rejected": -1.228271245956421, |
|
"eval_runtime": 308.3584, |
|
"eval_samples_per_second": 6.486, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": -0.5197004079818726, |
|
"logits/rejected": -0.6333814859390259, |
|
"logps/chosen": -416.119873046875, |
|
"logps/rejected": -442.78363037109375, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7725197076797485, |
|
"rewards/margins": 0.5796698331832886, |
|
"rewards/rejected": -1.3521894216537476, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": -0.46561068296432495, |
|
"logits/rejected": -0.5527454614639282, |
|
"logps/chosen": -446.88372802734375, |
|
"logps/rejected": -421.5301208496094, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8624758720397949, |
|
"rewards/margins": 0.5891597867012024, |
|
"rewards/rejected": -1.451635718345642, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": -0.38593560457229614, |
|
"logits/rejected": -0.4362197518348694, |
|
"logps/chosen": -429.46893310546875, |
|
"logps/rejected": -482.09014892578125, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9297833442687988, |
|
"rewards/margins": 0.5348304510116577, |
|
"rewards/rejected": -1.464613676071167, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": -0.5493001937866211, |
|
"logits/rejected": -0.6307727098464966, |
|
"logps/chosen": -408.87017822265625, |
|
"logps/rejected": -407.6255187988281, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7089771032333374, |
|
"rewards/margins": 0.5302673578262329, |
|
"rewards/rejected": -1.2392443418502808, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": -0.44064339995384216, |
|
"logits/rejected": -0.5361225605010986, |
|
"logps/chosen": -439.63775634765625, |
|
"logps/rejected": -438.837158203125, |
|
"loss": 0.5686, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6102380156517029, |
|
"rewards/margins": 0.5436003804206848, |
|
"rewards/rejected": -1.1538383960723877, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": -0.526265561580658, |
|
"logits/rejected": -0.5658246874809265, |
|
"logps/chosen": -346.7317199707031, |
|
"logps/rejected": -412.7989807128906, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.733574390411377, |
|
"rewards/margins": 0.4724843502044678, |
|
"rewards/rejected": -1.2060587406158447, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": -0.5053200721740723, |
|
"logits/rejected": -0.5343925952911377, |
|
"logps/chosen": -354.54681396484375, |
|
"logps/rejected": -389.521240234375, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6786689758300781, |
|
"rewards/margins": 0.24279697239398956, |
|
"rewards/rejected": -0.921466052532196, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": -0.48046913743019104, |
|
"logits/rejected": -0.5356425046920776, |
|
"logps/chosen": -404.2420654296875, |
|
"logps/rejected": -372.3519287109375, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7317395806312561, |
|
"rewards/margins": 0.2877105176448822, |
|
"rewards/rejected": -1.019450068473816, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": -0.44227689504623413, |
|
"logits/rejected": -0.5266855359077454, |
|
"logps/chosen": -438.1207580566406, |
|
"logps/rejected": -435.01824951171875, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6313630938529968, |
|
"rewards/margins": 0.5005151629447937, |
|
"rewards/rejected": -1.1318782567977905, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": -0.44814664125442505, |
|
"logits/rejected": -0.3982391357421875, |
|
"logps/chosen": -371.9776916503906, |
|
"logps/rejected": -453.76397705078125, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6842934489250183, |
|
"rewards/margins": 0.3474181890487671, |
|
"rewards/rejected": -1.0317118167877197, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.4896259009838104, |
|
"eval_logits/rejected": -0.5296720266342163, |
|
"eval_logps/chosen": -415.6815490722656, |
|
"eval_logps/rejected": -434.11138916015625, |
|
"eval_loss": 0.582290768623352, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.6906338334083557, |
|
"eval_rewards/margins": 0.5087268352508545, |
|
"eval_rewards/rejected": -1.199360728263855, |
|
"eval_runtime": 305.1471, |
|
"eval_samples_per_second": 6.554, |
|
"eval_steps_per_second": 0.206, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": -0.45607271790504456, |
|
"logits/rejected": -0.5114004015922546, |
|
"logps/chosen": -411.0953063964844, |
|
"logps/rejected": -410.81622314453125, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6678163409233093, |
|
"rewards/margins": 0.5182808637619019, |
|
"rewards/rejected": -1.1860973834991455, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": -0.4843919277191162, |
|
"logits/rejected": -0.5591267347335815, |
|
"logps/chosen": -461.26971435546875, |
|
"logps/rejected": -451.92987060546875, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.848225474357605, |
|
"rewards/margins": 0.39363884925842285, |
|
"rewards/rejected": -1.2418644428253174, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": -0.48674625158309937, |
|
"logits/rejected": -0.5596768260002136, |
|
"logps/chosen": -418.35748291015625, |
|
"logps/rejected": -432.4639587402344, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8621279001235962, |
|
"rewards/margins": 0.6262621879577637, |
|
"rewards/rejected": -1.4883902072906494, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": -0.4551656246185303, |
|
"logits/rejected": -0.4784488081932068, |
|
"logps/chosen": -442.0116271972656, |
|
"logps/rejected": -449.7799377441406, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7251280546188354, |
|
"rewards/margins": 0.5167814493179321, |
|
"rewards/rejected": -1.2419095039367676, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": -0.4653591215610504, |
|
"logits/rejected": -0.5175148844718933, |
|
"logps/chosen": -433.5137634277344, |
|
"logps/rejected": -450.39263916015625, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7668915390968323, |
|
"rewards/margins": 0.5189617276191711, |
|
"rewards/rejected": -1.2858531475067139, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": -0.45602017641067505, |
|
"logits/rejected": -0.5941805243492126, |
|
"logps/chosen": -421.2008361816406, |
|
"logps/rejected": -438.20172119140625, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7053720951080322, |
|
"rewards/margins": 0.5101194977760315, |
|
"rewards/rejected": -1.215491533279419, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": -0.4900667071342468, |
|
"logits/rejected": -0.5974335670471191, |
|
"logps/chosen": -439.05084228515625, |
|
"logps/rejected": -448.49224853515625, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6744861006736755, |
|
"rewards/margins": 0.5117014050483704, |
|
"rewards/rejected": -1.186187505722046, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": -0.5272679328918457, |
|
"logits/rejected": -0.555342435836792, |
|
"logps/chosen": -392.576416015625, |
|
"logps/rejected": -438.9559631347656, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8570607304573059, |
|
"rewards/margins": 0.5523840188980103, |
|
"rewards/rejected": -1.4094446897506714, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": -0.364164263010025, |
|
"logits/rejected": -0.3934030830860138, |
|
"logps/chosen": -453.59210205078125, |
|
"logps/rejected": -465.28912353515625, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6626067161560059, |
|
"rewards/margins": 0.4651568531990051, |
|
"rewards/rejected": -1.1277635097503662, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": -0.4578070044517517, |
|
"logits/rejected": -0.5681411027908325, |
|
"logps/chosen": -439.03289794921875, |
|
"logps/rejected": -432.52105712890625, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6801882982254028, |
|
"rewards/margins": 0.5355648398399353, |
|
"rewards/rejected": -1.2157530784606934, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -0.4929659962654114, |
|
"eval_logits/rejected": -0.5339555740356445, |
|
"eval_logps/chosen": -421.1521911621094, |
|
"eval_logps/rejected": -441.3783264160156, |
|
"eval_loss": 0.5803412795066833, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.7453399896621704, |
|
"eval_rewards/margins": 0.5266899466514587, |
|
"eval_rewards/rejected": -1.2720301151275635, |
|
"eval_runtime": 305.6529, |
|
"eval_samples_per_second": 6.543, |
|
"eval_steps_per_second": 0.206, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": -0.5170468688011169, |
|
"logits/rejected": -0.572604775428772, |
|
"logps/chosen": -372.7608642578125, |
|
"logps/rejected": -370.40911865234375, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7620792388916016, |
|
"rewards/margins": 0.42411160469055176, |
|
"rewards/rejected": -1.1861908435821533, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": -0.5389008522033691, |
|
"logits/rejected": -0.5711629986763, |
|
"logps/chosen": -405.4247131347656, |
|
"logps/rejected": -452.078125, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8531595468521118, |
|
"rewards/margins": 0.46848931908607483, |
|
"rewards/rejected": -1.3216488361358643, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": -0.4288851320743561, |
|
"logits/rejected": -0.4834524989128113, |
|
"logps/chosen": -371.2089538574219, |
|
"logps/rejected": -410.0118103027344, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8295769691467285, |
|
"rewards/margins": 0.5004912614822388, |
|
"rewards/rejected": -1.3300683498382568, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": -0.47288522124290466, |
|
"logits/rejected": -0.5918864011764526, |
|
"logps/chosen": -463.8346252441406, |
|
"logps/rejected": -432.7979431152344, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7991669774055481, |
|
"rewards/margins": 0.45910316705703735, |
|
"rewards/rejected": -1.2582701444625854, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": -0.41379016637802124, |
|
"logits/rejected": -0.5847631692886353, |
|
"logps/chosen": -464.99267578125, |
|
"logps/rejected": -418.6041564941406, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.67491215467453, |
|
"rewards/margins": 0.5438855886459351, |
|
"rewards/rejected": -1.2187979221343994, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": -0.4751303791999817, |
|
"logits/rejected": -0.5804455876350403, |
|
"logps/chosen": -409.5224609375, |
|
"logps/rejected": -407.48443603515625, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7855092883110046, |
|
"rewards/margins": 0.5121535062789917, |
|
"rewards/rejected": -1.2976628541946411, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": -0.45879894495010376, |
|
"logits/rejected": -0.5505935549736023, |
|
"logps/chosen": -442.29931640625, |
|
"logps/rejected": -427.472412109375, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7745510339736938, |
|
"rewards/margins": 0.5624163746833801, |
|
"rewards/rejected": -1.3369674682617188, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": -0.5081161856651306, |
|
"logits/rejected": -0.5460919737815857, |
|
"logps/chosen": -387.21722412109375, |
|
"logps/rejected": -389.8815002441406, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8121119737625122, |
|
"rewards/margins": 0.3596836030483246, |
|
"rewards/rejected": -1.1717956066131592, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": -0.47852253913879395, |
|
"logits/rejected": -0.5535346269607544, |
|
"logps/chosen": -409.1778869628906, |
|
"logps/rejected": -408.6339111328125, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8597895503044128, |
|
"rewards/margins": 0.43825215101242065, |
|
"rewards/rejected": -1.298041582107544, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": -0.47451940178871155, |
|
"logits/rejected": -0.5755375027656555, |
|
"logps/chosen": -351.31610107421875, |
|
"logps/rejected": -404.18328857421875, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6808342933654785, |
|
"rewards/margins": 0.7804089784622192, |
|
"rewards/rejected": -1.4612432718276978, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -0.4871543347835541, |
|
"eval_logits/rejected": -0.5270918011665344, |
|
"eval_logps/chosen": -420.1601867675781, |
|
"eval_logps/rejected": -440.79998779296875, |
|
"eval_loss": 0.5805416107177734, |
|
"eval_rewards/accuracies": 0.7103174328804016, |
|
"eval_rewards/chosen": -0.7354201674461365, |
|
"eval_rewards/margins": 0.5308260321617126, |
|
"eval_rewards/rejected": -1.2662461996078491, |
|
"eval_runtime": 304.0241, |
|
"eval_samples_per_second": 6.578, |
|
"eval_steps_per_second": 0.207, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": -0.48386329412460327, |
|
"logits/rejected": -0.5785359144210815, |
|
"logps/chosen": -431.7328186035156, |
|
"logps/rejected": -404.26019287109375, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6404847502708435, |
|
"rewards/margins": 0.5482622385025024, |
|
"rewards/rejected": -1.1887470483779907, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": -0.4259588122367859, |
|
"logits/rejected": -0.512924075126648, |
|
"logps/chosen": -449.75921630859375, |
|
"logps/rejected": -424.4246520996094, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6056379079818726, |
|
"rewards/margins": 0.6098628044128418, |
|
"rewards/rejected": -1.2155007123947144, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": -0.3833313286304474, |
|
"logits/rejected": -0.4866362512111664, |
|
"logps/chosen": -418.40606689453125, |
|
"logps/rejected": -465.56805419921875, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6852587461471558, |
|
"rewards/margins": 0.4969066083431244, |
|
"rewards/rejected": -1.1821653842926025, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": -0.5720198154449463, |
|
"logits/rejected": -0.6064139604568481, |
|
"logps/chosen": -372.54229736328125, |
|
"logps/rejected": -417.3109436035156, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7984358072280884, |
|
"rewards/margins": 0.6686285138130188, |
|
"rewards/rejected": -1.467064380645752, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": -0.48107537627220154, |
|
"logits/rejected": -0.4693182110786438, |
|
"logps/chosen": -417.77197265625, |
|
"logps/rejected": -460.15692138671875, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7300211787223816, |
|
"rewards/margins": 0.5799316763877869, |
|
"rewards/rejected": -1.309952974319458, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": -0.5137463808059692, |
|
"logits/rejected": -0.5962769389152527, |
|
"logps/chosen": -412.7781677246094, |
|
"logps/rejected": -412.116943359375, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7613190412521362, |
|
"rewards/margins": 0.5068355798721313, |
|
"rewards/rejected": -1.268154501914978, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": -0.3398676812648773, |
|
"logits/rejected": -0.5123169422149658, |
|
"logps/chosen": -464.4519958496094, |
|
"logps/rejected": -445.8551330566406, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6963375210762024, |
|
"rewards/margins": 0.6760309934616089, |
|
"rewards/rejected": -1.3723684549331665, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": -0.44637346267700195, |
|
"logits/rejected": -0.4979945123195648, |
|
"logps/chosen": -436.58123779296875, |
|
"logps/rejected": -432.36126708984375, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.779857337474823, |
|
"rewards/margins": 0.5138452649116516, |
|
"rewards/rejected": -1.293702483177185, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": -0.5019239783287048, |
|
"logits/rejected": -0.5278982520103455, |
|
"logps/chosen": -399.7169494628906, |
|
"logps/rejected": -425.452392578125, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7415592074394226, |
|
"rewards/margins": 0.5297465920448303, |
|
"rewards/rejected": -1.271305799484253, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": -0.4993990957736969, |
|
"logits/rejected": -0.5913614630699158, |
|
"logps/chosen": -424.80047607421875, |
|
"logps/rejected": -439.79193115234375, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6962804794311523, |
|
"rewards/margins": 0.5849637985229492, |
|
"rewards/rejected": -1.2812442779541016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -0.48642095923423767, |
|
"eval_logits/rejected": -0.5267860889434814, |
|
"eval_logps/chosen": -418.4040222167969, |
|
"eval_logps/rejected": -439.218994140625, |
|
"eval_loss": 0.5798574090003967, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": -0.7178582549095154, |
|
"eval_rewards/margins": 0.5325784683227539, |
|
"eval_rewards/rejected": -1.250436782836914, |
|
"eval_runtime": 305.4058, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 0.206, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": -0.39493924379348755, |
|
"logits/rejected": -0.4824163317680359, |
|
"logps/chosen": -435.23944091796875, |
|
"logps/rejected": -434.67938232421875, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6798096895217896, |
|
"rewards/margins": 0.579690158367157, |
|
"rewards/rejected": -1.2594999074935913, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": -0.5511216521263123, |
|
"logits/rejected": -0.5999202728271484, |
|
"logps/chosen": -452.65020751953125, |
|
"logps/rejected": -435.93902587890625, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8256019353866577, |
|
"rewards/margins": 0.45076680183410645, |
|
"rewards/rejected": -1.2763688564300537, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": -0.5369911193847656, |
|
"logits/rejected": -0.519089937210083, |
|
"logps/chosen": -452.34210205078125, |
|
"logps/rejected": -478.9400939941406, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7827368974685669, |
|
"rewards/margins": 0.420412540435791, |
|
"rewards/rejected": -1.2031495571136475, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": -0.4579439163208008, |
|
"logits/rejected": -0.44433537125587463, |
|
"logps/chosen": -364.8140869140625, |
|
"logps/rejected": -434.513916015625, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7198919653892517, |
|
"rewards/margins": 0.45986801385879517, |
|
"rewards/rejected": -1.1797600984573364, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": -0.5248664021492004, |
|
"logits/rejected": -0.5349477529525757, |
|
"logps/chosen": -437.393798828125, |
|
"logps/rejected": -468.2730407714844, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.881768524646759, |
|
"rewards/margins": 0.5896928310394287, |
|
"rewards/rejected": -1.471461296081543, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": -0.4769573211669922, |
|
"logits/rejected": -0.5523253679275513, |
|
"logps/chosen": -423.78399658203125, |
|
"logps/rejected": -398.61163330078125, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8796138763427734, |
|
"rewards/margins": 0.2528652250766754, |
|
"rewards/rejected": -1.132478952407837, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": -0.4421960711479187, |
|
"logits/rejected": -0.4300961494445801, |
|
"logps/chosen": -378.70233154296875, |
|
"logps/rejected": -440.5601501464844, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8324155807495117, |
|
"rewards/margins": 0.5455555319786072, |
|
"rewards/rejected": -1.3779711723327637, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": -0.4712623655796051, |
|
"logits/rejected": -0.5238856673240662, |
|
"logps/chosen": -422.5208435058594, |
|
"logps/rejected": -434.720947265625, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8706847429275513, |
|
"rewards/margins": 0.3749724328517914, |
|
"rewards/rejected": -1.245656967163086, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": -0.40005379915237427, |
|
"logits/rejected": -0.4531996250152588, |
|
"logps/chosen": -439.6205139160156, |
|
"logps/rejected": -432.45318603515625, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6769813895225525, |
|
"rewards/margins": 0.5456241369247437, |
|
"rewards/rejected": -1.222605586051941, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": -0.41965001821517944, |
|
"logits/rejected": -0.5496610403060913, |
|
"logps/chosen": -458.8155212402344, |
|
"logps/rejected": -414.1279296875, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6864231824874878, |
|
"rewards/margins": 0.4698265492916107, |
|
"rewards/rejected": -1.156249761581421, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -0.4870251715183258, |
|
"eval_logits/rejected": -0.5271284580230713, |
|
"eval_logps/chosen": -419.6229553222656, |
|
"eval_logps/rejected": -440.7270812988281, |
|
"eval_loss": 0.5795175433158875, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": -0.7300478219985962, |
|
"eval_rewards/margins": 0.5354698896408081, |
|
"eval_rewards/rejected": -1.2655178308486938, |
|
"eval_runtime": 305.2401, |
|
"eval_samples_per_second": 6.552, |
|
"eval_steps_per_second": 0.206, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": -0.528279721736908, |
|
"logits/rejected": -0.6684530973434448, |
|
"logps/chosen": -430.3279724121094, |
|
"logps/rejected": -415.4619140625, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6265103220939636, |
|
"rewards/margins": 0.6475422978401184, |
|
"rewards/rejected": -1.274052619934082, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": -0.3826778829097748, |
|
"logits/rejected": -0.5138348937034607, |
|
"logps/chosen": -440.0087890625, |
|
"logps/rejected": -389.94049072265625, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7695432901382446, |
|
"rewards/margins": 0.3364180028438568, |
|
"rewards/rejected": -1.1059613227844238, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": -0.46464791893959045, |
|
"logits/rejected": -0.5091901421546936, |
|
"logps/chosen": -438.41461181640625, |
|
"logps/rejected": -474.1409606933594, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7322907447814941, |
|
"rewards/margins": 0.397641658782959, |
|
"rewards/rejected": -1.1299324035644531, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": -0.5395783185958862, |
|
"logits/rejected": -0.5762434601783752, |
|
"logps/chosen": -355.19256591796875, |
|
"logps/rejected": -356.80377197265625, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5796637535095215, |
|
"rewards/margins": 0.501792311668396, |
|
"rewards/rejected": -1.0814560651779175, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": -0.4488208293914795, |
|
"logits/rejected": -0.5003235340118408, |
|
"logps/chosen": -406.1018371582031, |
|
"logps/rejected": -456.99688720703125, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.835650622844696, |
|
"rewards/margins": 0.4407881796360016, |
|
"rewards/rejected": -1.2764387130737305, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": -0.4920230507850647, |
|
"logits/rejected": -0.5208636522293091, |
|
"logps/chosen": -429.73516845703125, |
|
"logps/rejected": -437.10992431640625, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7671813368797302, |
|
"rewards/margins": 0.3512424826622009, |
|
"rewards/rejected": -1.1184238195419312, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": -0.46324095129966736, |
|
"logits/rejected": -0.5051876306533813, |
|
"logps/chosen": -455.948486328125, |
|
"logps/rejected": -432.7015686035156, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.749980092048645, |
|
"rewards/margins": 0.4463537633419037, |
|
"rewards/rejected": -1.1963337659835815, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": -0.4768814444541931, |
|
"logits/rejected": -0.5184912085533142, |
|
"logps/chosen": -447.73699951171875, |
|
"logps/rejected": -459.3815002441406, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7029529809951782, |
|
"rewards/margins": 0.4211914539337158, |
|
"rewards/rejected": -1.1241443157196045, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": -0.5452268123626709, |
|
"logits/rejected": -0.5993489027023315, |
|
"logps/chosen": -437.57574462890625, |
|
"logps/rejected": -418.8999938964844, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7408413887023926, |
|
"rewards/margins": 0.390348881483078, |
|
"rewards/rejected": -1.1311901807785034, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": -0.413705438375473, |
|
"logits/rejected": -0.4628356099128723, |
|
"logps/chosen": -434.87677001953125, |
|
"logps/rejected": -442.89697265625, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7070480585098267, |
|
"rewards/margins": 0.4665676951408386, |
|
"rewards/rejected": -1.17361581325531, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -0.4876059889793396, |
|
"eval_logits/rejected": -0.527910053730011, |
|
"eval_logps/chosen": -418.4286193847656, |
|
"eval_logps/rejected": -439.34423828125, |
|
"eval_loss": 0.5797783136367798, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.7181041836738586, |
|
"eval_rewards/margins": 0.5335846543312073, |
|
"eval_rewards/rejected": -1.251688838005066, |
|
"eval_runtime": 304.6366, |
|
"eval_samples_per_second": 6.565, |
|
"eval_steps_per_second": 0.207, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": -0.4525040090084076, |
|
"logits/rejected": -0.49936771392822266, |
|
"logps/chosen": -421.5797424316406, |
|
"logps/rejected": -447.3121643066406, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7661898732185364, |
|
"rewards/margins": 0.37991976737976074, |
|
"rewards/rejected": -1.1461094617843628, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": -0.4586445689201355, |
|
"logits/rejected": -0.4811577796936035, |
|
"logps/chosen": -412.69036865234375, |
|
"logps/rejected": -422.6893615722656, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8322868347167969, |
|
"rewards/margins": 0.4826812148094177, |
|
"rewards/rejected": -1.3149678707122803, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": -0.4814591407775879, |
|
"logits/rejected": -0.4792747497558594, |
|
"logps/chosen": -403.96356201171875, |
|
"logps/rejected": -428.724365234375, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8141158223152161, |
|
"rewards/margins": 0.33095940947532654, |
|
"rewards/rejected": -1.1450750827789307, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": -0.48179054260253906, |
|
"logits/rejected": -0.5652682185173035, |
|
"logps/chosen": -372.9039001464844, |
|
"logps/rejected": -408.64044189453125, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7646127939224243, |
|
"rewards/margins": 0.5597863793373108, |
|
"rewards/rejected": -1.3243992328643799, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": -0.4800513684749603, |
|
"logits/rejected": -0.5272001624107361, |
|
"logps/chosen": -375.82318115234375, |
|
"logps/rejected": -419.78912353515625, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.700168251991272, |
|
"rewards/margins": 0.6963863968849182, |
|
"rewards/rejected": -1.396554708480835, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": -0.4976566731929779, |
|
"logits/rejected": -0.5353574752807617, |
|
"logps/chosen": -427.3526306152344, |
|
"logps/rejected": -440.6385803222656, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7464424967765808, |
|
"rewards/margins": 0.34794315695762634, |
|
"rewards/rejected": -1.0943857431411743, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": -0.47734421491622925, |
|
"logits/rejected": -0.49538105726242065, |
|
"logps/chosen": -419.697021484375, |
|
"logps/rejected": -415.8753356933594, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7787846922874451, |
|
"rewards/margins": 0.4026049077510834, |
|
"rewards/rejected": -1.1813896894454956, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": -0.46987882256507874, |
|
"logits/rejected": -0.548054039478302, |
|
"logps/chosen": -452.89031982421875, |
|
"logps/rejected": -434.79412841796875, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5671200156211853, |
|
"rewards/margins": 0.5638147592544556, |
|
"rewards/rejected": -1.1309348344802856, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": -0.451104074716568, |
|
"logits/rejected": -0.5252590179443359, |
|
"logps/chosen": -411.8269958496094, |
|
"logps/rejected": -424.35711669921875, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7318955659866333, |
|
"rewards/margins": 0.4990832805633545, |
|
"rewards/rejected": -1.2309788465499878, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": -0.5124109387397766, |
|
"logits/rejected": -0.5305658578872681, |
|
"logps/chosen": -402.7662048339844, |
|
"logps/rejected": -395.8765563964844, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6769779324531555, |
|
"rewards/margins": 0.48363232612609863, |
|
"rewards/rejected": -1.1606101989746094, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -0.4875066578388214, |
|
"eval_logits/rejected": -0.5277794599533081, |
|
"eval_logps/chosen": -418.2664489746094, |
|
"eval_logps/rejected": -439.24761962890625, |
|
"eval_loss": 0.5795618891716003, |
|
"eval_rewards/accuracies": 0.716269850730896, |
|
"eval_rewards/chosen": -0.7164830565452576, |
|
"eval_rewards/margins": 0.5342397689819336, |
|
"eval_rewards/rejected": -1.2507227659225464, |
|
"eval_runtime": 305.9708, |
|
"eval_samples_per_second": 6.537, |
|
"eval_steps_per_second": 0.206, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": -0.4408170282840729, |
|
"logits/rejected": -0.5464833974838257, |
|
"logps/chosen": -402.9147644042969, |
|
"logps/rejected": -394.70172119140625, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6640672087669373, |
|
"rewards/margins": 0.5019302368164062, |
|
"rewards/rejected": -1.1659975051879883, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": -0.4044371247291565, |
|
"logits/rejected": -0.44484180212020874, |
|
"logps/chosen": -477.49853515625, |
|
"logps/rejected": -480.2476501464844, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6610472202301025, |
|
"rewards/margins": 0.5942956209182739, |
|
"rewards/rejected": -1.2553428411483765, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": -0.45786118507385254, |
|
"logits/rejected": -0.5099942088127136, |
|
"logps/chosen": -395.73785400390625, |
|
"logps/rejected": -440.14495849609375, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6342514753341675, |
|
"rewards/margins": 0.6357657313346863, |
|
"rewards/rejected": -1.2700172662734985, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": -0.4265124797821045, |
|
"logits/rejected": -0.4525698721408844, |
|
"logps/chosen": -421.3204650878906, |
|
"logps/rejected": -476.8326110839844, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7191395163536072, |
|
"rewards/margins": 0.5786597728729248, |
|
"rewards/rejected": -1.2977991104125977, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": -0.4681014120578766, |
|
"logits/rejected": -0.530280590057373, |
|
"logps/chosen": -422.77197265625, |
|
"logps/rejected": -424.14056396484375, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7412760257720947, |
|
"rewards/margins": 0.6121469736099243, |
|
"rewards/rejected": -1.3534228801727295, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": -0.45095524191856384, |
|
"logits/rejected": -0.5133798122406006, |
|
"logps/chosen": -359.3103332519531, |
|
"logps/rejected": -419.0379333496094, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.862167477607727, |
|
"rewards/margins": 0.4554516673088074, |
|
"rewards/rejected": -1.3176189661026, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": -0.4633178114891052, |
|
"logits/rejected": -0.542106032371521, |
|
"logps/chosen": -444.0020446777344, |
|
"logps/rejected": -472.94390869140625, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6808910369873047, |
|
"rewards/margins": 0.5797610878944397, |
|
"rewards/rejected": -1.2606520652770996, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": -0.49038058519363403, |
|
"logits/rejected": -0.5752415060997009, |
|
"logps/chosen": -397.6487121582031, |
|
"logps/rejected": -417.7991638183594, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7362946271896362, |
|
"rewards/margins": 0.7132160067558289, |
|
"rewards/rejected": -1.4495106935501099, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": -0.4550807476043701, |
|
"logits/rejected": -0.5087591409683228, |
|
"logps/chosen": -394.6986999511719, |
|
"logps/rejected": -468.04742431640625, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7036923170089722, |
|
"rewards/margins": 0.5819169282913208, |
|
"rewards/rejected": -1.285609245300293, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": -0.47594064474105835, |
|
"logits/rejected": -0.4794263243675232, |
|
"logps/chosen": -370.91998291015625, |
|
"logps/rejected": -406.54864501953125, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6993014216423035, |
|
"rewards/margins": 0.5366551280021667, |
|
"rewards/rejected": -1.2359565496444702, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -0.48749276995658875, |
|
"eval_logits/rejected": -0.5277734398841858, |
|
"eval_logps/chosen": -418.4147033691406, |
|
"eval_logps/rejected": -439.3841857910156, |
|
"eval_loss": 0.5797061920166016, |
|
"eval_rewards/accuracies": 0.716269850730896, |
|
"eval_rewards/chosen": -0.7179651260375977, |
|
"eval_rewards/margins": 0.5341233611106873, |
|
"eval_rewards/rejected": -1.2520886659622192, |
|
"eval_runtime": 302.9913, |
|
"eval_samples_per_second": 6.601, |
|
"eval_steps_per_second": 0.208, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.5706673860549927, |
|
"logits/rejected": -0.613754153251648, |
|
"logps/chosen": -418.0396423339844, |
|
"logps/rejected": -457.19842529296875, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8323566317558289, |
|
"rewards/margins": 0.60826575756073, |
|
"rewards/rejected": -1.440622329711914, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.598351346759896, |
|
"train_runtime": 27817.4474, |
|
"train_samples_per_second": 2.198, |
|
"train_steps_per_second": 0.069 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|