|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.875, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.074951171875, |
|
"logits/rejected": 80.7809829711914, |
|
"logps/chosen": -34.20733642578125, |
|
"logps/rejected": -32.97297668457031, |
|
"loss": 0.9951, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.003438829444348812, |
|
"rewards/margins": 0.004895869642496109, |
|
"rewards/rejected": -0.0014570390339940786, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.64326477050781, |
|
"logits/rejected": 80.53416442871094, |
|
"logps/chosen": -33.720584869384766, |
|
"logps/rejected": -30.82167625427246, |
|
"loss": 0.9986, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.003067571669816971, |
|
"rewards/margins": 0.0013512909645214677, |
|
"rewards/rejected": -0.004418861120939255, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.51445007324219, |
|
"logits/rejected": 82.54810333251953, |
|
"logps/chosen": -33.81728744506836, |
|
"logps/rejected": -31.204355239868164, |
|
"loss": 1.0024, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.019553204998373985, |
|
"rewards/margins": -0.002394508570432663, |
|
"rewards/rejected": 0.0219477117061615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.101806640625, |
|
"logits/rejected": 81.09938049316406, |
|
"logps/chosen": -32.73223876953125, |
|
"logps/rejected": -33.143699645996094, |
|
"loss": 0.9783, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.04418279603123665, |
|
"rewards/margins": 0.02170029655098915, |
|
"rewards/rejected": 0.022482499480247498, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.82136535644531, |
|
"logits/rejected": 78.82911682128906, |
|
"logps/chosen": -30.3783016204834, |
|
"logps/rejected": -30.641677856445312, |
|
"loss": 0.9605, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.082694411277771, |
|
"rewards/margins": 0.039533428847789764, |
|
"rewards/rejected": 0.04316098242998123, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.75, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.49494171142578, |
|
"logits/rejected": 83.55232238769531, |
|
"logps/chosen": -30.781469345092773, |
|
"logps/rejected": -29.190662384033203, |
|
"loss": 1.0006, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.03936903923749924, |
|
"rewards/margins": -0.0006168211111798882, |
|
"rewards/rejected": 0.039985861629247665, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 84.1231918334961, |
|
"logits/rejected": 84.15650939941406, |
|
"logps/chosen": -30.209863662719727, |
|
"logps/rejected": -32.619781494140625, |
|
"loss": 0.9999, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.041915904730558395, |
|
"rewards/margins": 0.000123101839562878, |
|
"rewards/rejected": 0.04179280251264572, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.375, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.8515853881836, |
|
"logits/rejected": 81.83155822753906, |
|
"logps/chosen": -30.982410430908203, |
|
"logps/rejected": -30.617040634155273, |
|
"loss": 0.9618, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.05225307494401932, |
|
"rewards/margins": 0.03819245845079422, |
|
"rewards/rejected": 0.014060619287192822, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.75, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.68502807617188, |
|
"logits/rejected": 78.65934753417969, |
|
"logps/chosen": -32.18014144897461, |
|
"logps/rejected": -30.878421783447266, |
|
"loss": 0.9627, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.04580371826887131, |
|
"rewards/margins": 0.03849860280752182, |
|
"rewards/rejected": 0.007305114530026913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.375, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.6978530883789, |
|
"logits/rejected": 83.72080993652344, |
|
"logps/chosen": -33.769813537597656, |
|
"logps/rejected": -31.638240814208984, |
|
"loss": 0.9577, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.05351760983467102, |
|
"rewards/margins": 0.04228735715150833, |
|
"rewards/rejected": 0.01123025082051754, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.71279907226562, |
|
"eval_logits/rejected": 98.70589447021484, |
|
"eval_logps/chosen": -32.32750701904297, |
|
"eval_logps/rejected": -35.914642333984375, |
|
"eval_loss": 0.993482768535614, |
|
"eval_rewards/accuracies": 0.5066444873809814, |
|
"eval_rewards/chosen": 0.011567190289497375, |
|
"eval_rewards/margins": 0.006378817837685347, |
|
"eval_rewards/rejected": 0.005188372451812029, |
|
"eval_runtime": 104.0772, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 14.125, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.8444595336914, |
|
"logits/rejected": 83.72880554199219, |
|
"logps/chosen": -32.138458251953125, |
|
"logps/rejected": -32.649452209472656, |
|
"loss": 0.9122, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.08843465149402618, |
|
"rewards/margins": 0.0877910926938057, |
|
"rewards/rejected": 0.0006435603136196733, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 13.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.81868743896484, |
|
"logits/rejected": 83.92826080322266, |
|
"logps/chosen": -28.111730575561523, |
|
"logps/rejected": -35.34394454956055, |
|
"loss": 0.9297, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.08178504556417465, |
|
"rewards/margins": 0.07026515156030655, |
|
"rewards/rejected": 0.011519892141222954, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 9.375, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.93681335449219, |
|
"logits/rejected": 80.97130584716797, |
|
"logps/chosen": -30.195592880249023, |
|
"logps/rejected": -31.842870712280273, |
|
"loss": 0.939, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07117608934640884, |
|
"rewards/margins": 0.06230046600103378, |
|
"rewards/rejected": 0.008875617757439613, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 81.7886962890625, |
|
"logits/rejected": 81.80296325683594, |
|
"logps/chosen": -26.81143569946289, |
|
"logps/rejected": -32.920143127441406, |
|
"loss": 0.8867, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0635889396071434, |
|
"rewards/margins": 0.11326569318771362, |
|
"rewards/rejected": -0.04967674985527992, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.10134887695312, |
|
"logits/rejected": 80.06209564208984, |
|
"logps/chosen": -28.821029663085938, |
|
"logps/rejected": -33.226470947265625, |
|
"loss": 0.8772, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05680803209543228, |
|
"rewards/margins": 0.1228049248456955, |
|
"rewards/rejected": -0.06599690765142441, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 81.63700866699219, |
|
"logits/rejected": 81.66020202636719, |
|
"logps/chosen": -34.080284118652344, |
|
"logps/rejected": -30.817296981811523, |
|
"loss": 0.9079, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.005646559409797192, |
|
"rewards/margins": 0.09212217479944229, |
|
"rewards/rejected": -0.08647561073303223, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 15.125, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 82.27827453613281, |
|
"logits/rejected": 82.23299407958984, |
|
"logps/chosen": -31.016094207763672, |
|
"logps/rejected": -33.03407669067383, |
|
"loss": 0.8743, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.024319300428032875, |
|
"rewards/margins": 0.12572081387043, |
|
"rewards/rejected": -0.10140150785446167, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 12.5, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 79.44108581542969, |
|
"logits/rejected": 79.4173355102539, |
|
"logps/chosen": -30.948467254638672, |
|
"logps/rejected": -32.03376007080078, |
|
"loss": 0.8875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02486853487789631, |
|
"rewards/margins": 0.1125236377120018, |
|
"rewards/rejected": -0.08765510469675064, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 80.90037536621094, |
|
"logits/rejected": 80.8733901977539, |
|
"logps/chosen": -30.740875244140625, |
|
"logps/rejected": -31.226177215576172, |
|
"loss": 0.9221, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.002553999889642, |
|
"rewards/margins": 0.07836906611919403, |
|
"rewards/rejected": -0.07581506669521332, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 76.02571105957031, |
|
"logits/rejected": 75.9735107421875, |
|
"logps/chosen": -34.12778854370117, |
|
"logps/rejected": -33.30614471435547, |
|
"loss": 0.8562, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0624953992664814, |
|
"rewards/margins": 0.14376921951770782, |
|
"rewards/rejected": -0.08127383887767792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 97.97191619873047, |
|
"eval_logits/rejected": 97.95458984375, |
|
"eval_logps/chosen": -32.94685363769531, |
|
"eval_logps/rejected": -36.8216552734375, |
|
"eval_loss": 0.9646754860877991, |
|
"eval_rewards/accuracies": 0.5676910281181335, |
|
"eval_rewards/chosen": -0.05036771669983864, |
|
"eval_rewards/margins": 0.035145342350006104, |
|
"eval_rewards/rejected": -0.08551305532455444, |
|
"eval_runtime": 103.8205, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 18.625, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 78.56858825683594, |
|
"logits/rejected": 78.47276306152344, |
|
"logps/chosen": -33.586204528808594, |
|
"logps/rejected": -36.05295944213867, |
|
"loss": 0.8633, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0359516479074955, |
|
"rewards/margins": 0.1426382064819336, |
|
"rewards/rejected": -0.1066865548491478, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 14.6875, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 80.53726959228516, |
|
"logits/rejected": 80.62950134277344, |
|
"logps/chosen": -31.454355239868164, |
|
"logps/rejected": -32.02970504760742, |
|
"loss": 0.8286, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.049460187554359436, |
|
"rewards/margins": 0.17953529953956604, |
|
"rewards/rejected": -0.1300750970840454, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.0, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 77.57215881347656, |
|
"logits/rejected": 77.61759948730469, |
|
"logps/chosen": -32.74131393432617, |
|
"logps/rejected": -35.29151153564453, |
|
"loss": 0.8713, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.006443141493946314, |
|
"rewards/margins": 0.13437876105308533, |
|
"rewards/rejected": -0.12793561816215515, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 16.5, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 79.76509094238281, |
|
"logits/rejected": 80.07305908203125, |
|
"logps/chosen": -31.252161026000977, |
|
"logps/rejected": -32.75459671020508, |
|
"loss": 0.8345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04359050467610359, |
|
"rewards/margins": 0.16584597527980804, |
|
"rewards/rejected": -0.12225550413131714, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 78.32225036621094, |
|
"logits/rejected": 78.37224578857422, |
|
"logps/chosen": -27.815731048583984, |
|
"logps/rejected": -31.077539443969727, |
|
"loss": 0.9043, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.02198370359838009, |
|
"rewards/margins": 0.09569612890481949, |
|
"rewards/rejected": -0.11767983436584473, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.75, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 75.31268310546875, |
|
"logits/rejected": 75.46823120117188, |
|
"logps/chosen": -31.16106605529785, |
|
"logps/rejected": -38.22980880737305, |
|
"loss": 0.7589, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.039992958307266235, |
|
"rewards/margins": 0.2528113126754761, |
|
"rewards/rejected": -0.21281830966472626, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 74.42684173583984, |
|
"logits/rejected": 74.45647430419922, |
|
"logps/chosen": -31.796234130859375, |
|
"logps/rejected": -33.113197326660156, |
|
"loss": 0.8432, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0017309554386883974, |
|
"rewards/margins": 0.15904627740383148, |
|
"rewards/rejected": -0.1607772409915924, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 19.75, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 77.391845703125, |
|
"logits/rejected": 77.1694564819336, |
|
"logps/chosen": -32.110328674316406, |
|
"logps/rejected": -30.959293365478516, |
|
"loss": 0.9109, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.030043601989746094, |
|
"rewards/margins": 0.09918614476919174, |
|
"rewards/rejected": -0.12922975420951843, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 77.39460754394531, |
|
"logits/rejected": 77.31553649902344, |
|
"logps/chosen": -33.84746551513672, |
|
"logps/rejected": -34.28327178955078, |
|
"loss": 0.7586, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.02567802369594574, |
|
"rewards/margins": 0.2523185908794403, |
|
"rewards/rejected": -0.22664058208465576, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 73.05286407470703, |
|
"logits/rejected": 73.1805648803711, |
|
"logps/chosen": -33.01818084716797, |
|
"logps/rejected": -30.520904541015625, |
|
"loss": 0.8271, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03875284641981125, |
|
"rewards/margins": 0.18064062297344208, |
|
"rewards/rejected": -0.14188775420188904, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 97.71524047851562, |
|
"eval_logits/rejected": 97.68975067138672, |
|
"eval_logps/chosen": -33.206886291503906, |
|
"eval_logps/rejected": -37.196205139160156, |
|
"eval_loss": 0.9533767700195312, |
|
"eval_rewards/accuracies": 0.5714285373687744, |
|
"eval_rewards/chosen": -0.07637124508619308, |
|
"eval_rewards/margins": 0.04659651592373848, |
|
"eval_rewards/rejected": -0.12296776473522186, |
|
"eval_runtime": 104.0124, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 13.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 80.51168060302734, |
|
"logits/rejected": 80.51959228515625, |
|
"logps/chosen": -30.940738677978516, |
|
"logps/rejected": -33.976158142089844, |
|
"loss": 0.841, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00772014120593667, |
|
"rewards/margins": 0.1699191778898239, |
|
"rewards/rejected": -0.17763930559158325, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 77.665771484375, |
|
"logits/rejected": 77.6807861328125, |
|
"logps/chosen": -31.073196411132812, |
|
"logps/rejected": -30.321044921875, |
|
"loss": 0.8202, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05253634601831436, |
|
"rewards/margins": 0.19269177317619324, |
|
"rewards/rejected": -0.14015543460845947, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 16.375, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 74.7007064819336, |
|
"logits/rejected": 74.74031829833984, |
|
"logps/chosen": -29.66641616821289, |
|
"logps/rejected": -34.42815399169922, |
|
"loss": 0.7549, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.06861907988786697, |
|
"rewards/margins": 0.24794825911521912, |
|
"rewards/rejected": -0.17932915687561035, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 17.125, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 79.32682800292969, |
|
"logits/rejected": 79.36729431152344, |
|
"logps/chosen": -33.05027770996094, |
|
"logps/rejected": -35.377281188964844, |
|
"loss": 0.7972, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.01618681475520134, |
|
"rewards/margins": 0.21704569458961487, |
|
"rewards/rejected": -0.20085887610912323, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 15.5, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 78.2139663696289, |
|
"logits/rejected": 78.22695922851562, |
|
"logps/chosen": -33.175575256347656, |
|
"logps/rejected": -34.91777801513672, |
|
"loss": 0.7788, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04681625962257385, |
|
"rewards/margins": 0.22941403090953827, |
|
"rewards/rejected": -0.18259775638580322, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 79.90480041503906, |
|
"logits/rejected": 79.93946075439453, |
|
"logps/chosen": -28.85941505432129, |
|
"logps/rejected": -33.032447814941406, |
|
"loss": 0.8014, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.06927979737520218, |
|
"rewards/margins": 0.20753948390483856, |
|
"rewards/rejected": -0.13825969398021698, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 79.3287582397461, |
|
"logits/rejected": 79.33840942382812, |
|
"logps/chosen": -33.099693298339844, |
|
"logps/rejected": -37.02233123779297, |
|
"loss": 0.8425, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.022353025153279305, |
|
"rewards/margins": 0.16894304752349854, |
|
"rewards/rejected": -0.1912960708141327, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 13.25, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 72.94795227050781, |
|
"logits/rejected": 72.81517791748047, |
|
"logps/chosen": -30.674768447875977, |
|
"logps/rejected": -29.731273651123047, |
|
"loss": 0.8564, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.009864235296845436, |
|
"rewards/margins": 0.14720600843429565, |
|
"rewards/rejected": -0.15707024931907654, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.88636748450143, |
|
"train_runtime": 2554.6221, |
|
"train_samples_per_second": 1.205, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|