htlou's picture
Upload folder using huggingface_hub
1da9a47 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 50,
"global_step": 414,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.036231884057971016,
"grad_norm": 61.75757328159282,
"learning_rate": 5e-07,
"logits/chosen": -2.732090473175049,
"logits/rejected": -2.7100460529327393,
"logps/chosen": -182.59107971191406,
"logps/rejected": -189.5584716796875,
"loss": 0.6889,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.00281245238147676,
"rewards/margins": 0.0058334446512162685,
"rewards/rejected": -0.008645896799862385,
"step": 5
},
{
"epoch": 0.07246376811594203,
"grad_norm": 44.951594498596215,
"learning_rate": 1e-06,
"logits/chosen": -2.754081964492798,
"logits/rejected": -2.752152919769287,
"logps/chosen": -197.337158203125,
"logps/rejected": -184.00933837890625,
"loss": 0.6274,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.03147688880562782,
"rewards/margins": 0.1896156221628189,
"rewards/rejected": -0.1581387221813202,
"step": 10
},
{
"epoch": 0.10869565217391304,
"grad_norm": 51.34158391398985,
"learning_rate": 9.996221126793764e-07,
"logits/chosen": -2.694983959197998,
"logits/rejected": -2.692361831665039,
"logps/chosen": -203.20387268066406,
"logps/rejected": -204.64244079589844,
"loss": 0.5838,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.6150370836257935,
"rewards/margins": 0.9413955807685852,
"rewards/rejected": -0.32635849714279175,
"step": 15
},
{
"epoch": 0.14492753623188406,
"grad_norm": 34.76477183019994,
"learning_rate": 9.984890219128145e-07,
"logits/chosen": -2.612672805786133,
"logits/rejected": -2.5829074382781982,
"logps/chosen": -188.62716674804688,
"logps/rejected": -192.87452697753906,
"loss": 0.5142,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.8545471429824829,
"rewards/margins": 1.280996561050415,
"rewards/rejected": -0.4264492094516754,
"step": 20
},
{
"epoch": 0.18115942028985507,
"grad_norm": 36.75278346647978,
"learning_rate": 9.966024404228493e-07,
"logits/chosen": -2.450106143951416,
"logits/rejected": -2.4297895431518555,
"logps/chosen": -179.98348999023438,
"logps/rejected": -179.38925170898438,
"loss": 0.5032,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.38695499300956726,
"rewards/margins": 0.8900691866874695,
"rewards/rejected": -0.5031141638755798,
"step": 25
},
{
"epoch": 0.21739130434782608,
"grad_norm": 31.781918105397544,
"learning_rate": 9.939652198703783e-07,
"logits/chosen": -2.324214458465576,
"logits/rejected": -2.325657367706299,
"logps/chosen": -188.5428466796875,
"logps/rejected": -193.8271942138672,
"loss": 0.4995,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.6558719873428345,
"rewards/margins": 1.2207121849060059,
"rewards/rejected": -0.5648401975631714,
"step": 30
},
{
"epoch": 0.2536231884057971,
"grad_norm": 39.36776247005876,
"learning_rate": 9.905813465442354e-07,
"logits/chosen": -2.236240863800049,
"logits/rejected": -2.2105681896209717,
"logps/chosen": -203.98277282714844,
"logps/rejected": -194.84640502929688,
"loss": 0.5091,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.8834564089775085,
"rewards/margins": 1.2675695419311523,
"rewards/rejected": -0.3841131329536438,
"step": 35
},
{
"epoch": 0.2898550724637681,
"grad_norm": 30.817630358317576,
"learning_rate": 9.864559353357187e-07,
"logits/chosen": -2.068774700164795,
"logits/rejected": -2.0603950023651123,
"logps/chosen": -182.76817321777344,
"logps/rejected": -185.9797821044922,
"loss": 0.4873,
"rewards/accuracies": 0.78125,
"rewards/chosen": 1.03325617313385,
"rewards/margins": 1.0384714603424072,
"rewards/rejected": -0.005215352866798639,
"step": 40
},
{
"epoch": 0.32608695652173914,
"grad_norm": 29.09268118121073,
"learning_rate": 9.815952220071804e-07,
"logits/chosen": -1.8718488216400146,
"logits/rejected": -1.8250553607940674,
"logps/chosen": -195.60968017578125,
"logps/rejected": -221.5565643310547,
"loss": 0.4597,
"rewards/accuracies": 0.78125,
"rewards/chosen": 1.3850222826004028,
"rewards/margins": 1.8469291925430298,
"rewards/rejected": -0.4619070589542389,
"step": 45
},
{
"epoch": 0.36231884057971014,
"grad_norm": 29.526743630011346,
"learning_rate": 9.76006553766365e-07,
"logits/chosen": -1.653713583946228,
"logits/rejected": -1.6171553134918213,
"logps/chosen": -198.85989379882812,
"logps/rejected": -203.60678100585938,
"loss": 0.4516,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 0.8292047381401062,
"rewards/margins": 1.6851797103881836,
"rewards/rejected": -0.8559748530387878,
"step": 50
},
{
"epoch": 0.36231884057971014,
"eval_logits/chosen": -1.7065542936325073,
"eval_logits/rejected": -1.630993127822876,
"eval_logps/chosen": -192.20655822753906,
"eval_logps/rejected": -206.51295471191406,
"eval_loss": 0.4420754015445709,
"eval_rewards/accuracies": 0.7903226017951965,
"eval_rewards/chosen": 0.8112886548042297,
"eval_rewards/margins": 1.641775369644165,
"eval_rewards/rejected": -0.8304866552352905,
"eval_runtime": 247.7543,
"eval_samples_per_second": 15.83,
"eval_steps_per_second": 0.25,
"step": 50
},
{
"epoch": 0.39855072463768115,
"grad_norm": 30.94859785748943,
"learning_rate": 9.696983781607415e-07,
"logits/chosen": -1.7253024578094482,
"logits/rejected": -1.6905288696289062,
"logps/chosen": -182.9173126220703,
"logps/rejected": -171.9159698486328,
"loss": 0.4573,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 0.6172864437103271,
"rewards/margins": 1.648385763168335,
"rewards/rejected": -1.031099557876587,
"step": 55
},
{
"epoch": 0.43478260869565216,
"grad_norm": 40.75469044830845,
"learning_rate": 9.626802303086209e-07,
"logits/chosen": -1.87893807888031,
"logits/rejected": -1.8299003839492798,
"logps/chosen": -186.30145263671875,
"logps/rejected": -193.9145965576172,
"loss": 0.4264,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.4657188057899475,
"rewards/margins": 1.7288262844085693,
"rewards/rejected": -1.2631075382232666,
"step": 60
},
{
"epoch": 0.47101449275362317,
"grad_norm": 35.556274541495966,
"learning_rate": 9.549627184863528e-07,
"logits/chosen": -2.016784906387329,
"logits/rejected": -1.9150521755218506,
"logps/chosen": -191.3840789794922,
"logps/rejected": -192.66639709472656,
"loss": 0.4289,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 0.0974711924791336,
"rewards/margins": 1.6010549068450928,
"rewards/rejected": -1.5035837888717651,
"step": 65
},
{
"epoch": 0.5072463768115942,
"grad_norm": 26.46585227154451,
"learning_rate": 9.465575080933957e-07,
"logits/chosen": -1.853308916091919,
"logits/rejected": -1.7947351932525635,
"logps/chosen": -172.3099822998047,
"logps/rejected": -208.057373046875,
"loss": 0.3948,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.21238946914672852,
"rewards/margins": 1.8403332233428955,
"rewards/rejected": -1.627943754196167,
"step": 70
},
{
"epoch": 0.5434782608695652,
"grad_norm": 31.533541728553253,
"learning_rate": 9.374773040194878e-07,
"logits/chosen": -1.8850362300872803,
"logits/rejected": -1.8103622198104858,
"logps/chosen": -205.5053253173828,
"logps/rejected": -210.96981811523438,
"loss": 0.4364,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 0.17310531437397003,
"rewards/margins": 1.8103282451629639,
"rewards/rejected": -1.6372228860855103,
"step": 75
},
{
"epoch": 0.5797101449275363,
"grad_norm": 29.780905727815526,
"learning_rate": 9.277358314405818e-07,
"logits/chosen": -1.7906593084335327,
"logits/rejected": -1.742597222328186,
"logps/chosen": -188.9757080078125,
"logps/rejected": -205.398193359375,
"loss": 0.3987,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.19728976488113403,
"rewards/margins": 1.879663109779358,
"rewards/rejected": -2.0769529342651367,
"step": 80
},
{
"epoch": 0.6159420289855072,
"grad_norm": 34.4646468352745,
"learning_rate": 9.173478150725651e-07,
"logits/chosen": -1.7377640008926392,
"logits/rejected": -1.6257518529891968,
"logps/chosen": -210.00320434570312,
"logps/rejected": -215.84835815429688,
"loss": 0.4258,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.08612661063671112,
"rewards/margins": 2.4435980319976807,
"rewards/rejected": -2.357471227645874,
"step": 85
},
{
"epoch": 0.6521739130434783,
"grad_norm": 29.12537980218493,
"learning_rate": 9.063289569141251e-07,
"logits/chosen": -1.7976572513580322,
"logits/rejected": -1.739854097366333,
"logps/chosen": -214.8435821533203,
"logps/rejected": -224.52005004882812,
"loss": 0.4147,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.46363013982772827,
"rewards/margins": 2.330965518951416,
"rewards/rejected": -1.867335557937622,
"step": 90
},
{
"epoch": 0.6884057971014492,
"grad_norm": 35.00421638148543,
"learning_rate": 8.946959125124051e-07,
"logits/chosen": -1.861108422279358,
"logits/rejected": -1.780923843383789,
"logps/chosen": -207.5733184814453,
"logps/rejected": -193.34400939941406,
"loss": 0.4121,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.4294491708278656,
"rewards/margins": 2.142913341522217,
"rewards/rejected": -1.7134641408920288,
"step": 95
},
{
"epoch": 0.7246376811594203,
"grad_norm": 31.611698501726103,
"learning_rate": 8.824662657873238e-07,
"logits/chosen": -1.8221423625946045,
"logits/rejected": -1.802095651626587,
"logps/chosen": -173.2090301513672,
"logps/rejected": -206.5529327392578,
"loss": 0.3759,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.04721298813819885,
"rewards/margins": 1.9821780920028687,
"rewards/rejected": -2.029391050338745,
"step": 100
},
{
"epoch": 0.7246376811594203,
"eval_logits/chosen": -1.8523844480514526,
"eval_logits/rejected": -1.7929590940475464,
"eval_logps/chosen": -200.7910614013672,
"eval_logps/rejected": -220.96961975097656,
"eval_loss": 0.4121003746986389,
"eval_rewards/accuracies": 0.8145161271095276,
"eval_rewards/chosen": -0.047160252928733826,
"eval_rewards/margins": 2.2289960384368896,
"eval_rewards/rejected": -2.276156187057495,
"eval_runtime": 247.371,
"eval_samples_per_second": 15.855,
"eval_steps_per_second": 0.251,
"step": 100
},
{
"epoch": 0.7608695652173914,
"grad_norm": 30.01063089972391,
"learning_rate": 8.696585024526135e-07,
"logits/chosen": -1.7823431491851807,
"logits/rejected": -1.7234203815460205,
"logps/chosen": -189.0630340576172,
"logps/rejected": -224.55642700195312,
"loss": 0.3969,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0058825016021728516,
"rewards/margins": 2.5169529914855957,
"rewards/rejected": -2.5228357315063477,
"step": 105
},
{
"epoch": 0.7971014492753623,
"grad_norm": 35.65348267869082,
"learning_rate": 8.562919820737535e-07,
"logits/chosen": -1.7099103927612305,
"logits/rejected": -1.6304385662078857,
"logps/chosen": -206.9807586669922,
"logps/rejected": -209.36962890625,
"loss": 0.3755,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.26569992303848267,
"rewards/margins": 2.464618444442749,
"rewards/rejected": -2.730318546295166,
"step": 110
},
{
"epoch": 0.8333333333333334,
"grad_norm": 28.250647507886438,
"learning_rate": 8.423869088050315e-07,
"logits/chosen": -1.7219148874282837,
"logits/rejected": -1.677403450012207,
"logps/chosen": -195.88735961914062,
"logps/rejected": -222.36581420898438,
"loss": 0.3912,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -0.14722302556037903,
"rewards/margins": 2.4208686351776123,
"rewards/rejected": -2.568091630935669,
"step": 115
},
{
"epoch": 0.8695652173913043,
"grad_norm": 36.27157250663838,
"learning_rate": 8.2796430084997e-07,
"logits/chosen": -1.6080610752105713,
"logits/rejected": -1.521059513092041,
"logps/chosen": -197.2279510498047,
"logps/rejected": -208.6706085205078,
"loss": 0.3668,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 0.07672278583049774,
"rewards/margins": 2.720585584640503,
"rewards/rejected": -2.64386248588562,
"step": 120
},
{
"epoch": 0.9057971014492754,
"grad_norm": 28.694980241284195,
"learning_rate": 8.130459586912753e-07,
"logits/chosen": -1.4262475967407227,
"logits/rejected": -1.3733441829681396,
"logps/chosen": -219.4936981201172,
"logps/rejected": -217.61599731445312,
"loss": 0.4582,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8654589653015137,
"rewards/margins": 1.6804126501083374,
"rewards/rejected": -2.5458714962005615,
"step": 125
},
{
"epoch": 0.9420289855072463,
"grad_norm": 29.710262188798424,
"learning_rate": 7.97654432138333e-07,
"logits/chosen": -1.4053936004638672,
"logits/rejected": -1.336163878440857,
"logps/chosen": -210.55026245117188,
"logps/rejected": -243.9113311767578,
"loss": 0.3921,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.20443829894065857,
"rewards/margins": 2.7204947471618652,
"rewards/rejected": -2.516056537628174,
"step": 130
},
{
"epoch": 0.9782608695652174,
"grad_norm": 26.71701106117664,
"learning_rate": 7.81812986242061e-07,
"logits/chosen": -1.423004388809204,
"logits/rejected": -1.2980186939239502,
"logps/chosen": -193.02523803710938,
"logps/rejected": -232.86788940429688,
"loss": 0.3631,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.15479817986488342,
"rewards/margins": 3.0325751304626465,
"rewards/rejected": -2.877776622772217,
"step": 135
},
{
"epoch": 1.0144927536231885,
"grad_norm": 18.847111481815627,
"learning_rate": 7.655455661286375e-07,
"logits/chosen": -1.3630199432373047,
"logits/rejected": -1.3213447332382202,
"logps/chosen": -193.20803833007812,
"logps/rejected": -237.5965118408203,
"loss": 0.2543,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.20990173518657684,
"rewards/margins": 3.6189141273498535,
"rewards/rejected": -3.8288159370422363,
"step": 140
},
{
"epoch": 1.0507246376811594,
"grad_norm": 18.388157966842616,
"learning_rate": 7.488767608052628e-07,
"logits/chosen": -1.543648362159729,
"logits/rejected": -1.399395227432251,
"logps/chosen": -190.61196899414062,
"logps/rejected": -237.07424926757812,
"loss": 0.1744,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.6787484884262085,
"rewards/margins": 4.023434638977051,
"rewards/rejected": -3.3446857929229736,
"step": 145
},
{
"epoch": 1.0869565217391304,
"grad_norm": 15.923928842240379,
"learning_rate": 7.318317659926636e-07,
"logits/chosen": -1.6209495067596436,
"logits/rejected": -1.5568897724151611,
"logps/chosen": -172.939697265625,
"logps/rejected": -233.3376007080078,
"loss": 0.149,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.1221749782562256,
"rewards/margins": 3.8970863819122314,
"rewards/rejected": -2.774911403656006,
"step": 150
},
{
"epoch": 1.0869565217391304,
"eval_logits/chosen": -1.7425010204315186,
"eval_logits/rejected": -1.674597978591919,
"eval_logps/chosen": -194.48468017578125,
"eval_logps/rejected": -217.0243682861328,
"eval_loss": 0.42049291729927063,
"eval_rewards/accuracies": 0.8205645084381104,
"eval_rewards/chosen": 0.5834774374961853,
"eval_rewards/margins": 2.4651052951812744,
"eval_rewards/rejected": -1.881628155708313,
"eval_runtime": 247.5785,
"eval_samples_per_second": 15.841,
"eval_steps_per_second": 0.25,
"step": 150
},
{
"epoch": 1.1231884057971016,
"grad_norm": 14.18220461970911,
"learning_rate": 7.144363460405189e-07,
"logits/chosen": -1.7796205282211304,
"logits/rejected": -1.6700912714004517,
"logps/chosen": -190.59030151367188,
"logps/rejected": -233.08151245117188,
"loss": 0.1482,
"rewards/accuracies": 0.96875,
"rewards/chosen": 1.356343150138855,
"rewards/margins": 4.483328342437744,
"rewards/rejected": -3.1269848346710205,
"step": 155
},
{
"epoch": 1.1594202898550725,
"grad_norm": 12.199643576270322,
"learning_rate": 6.967167949833762e-07,
"logits/chosen": -1.7053067684173584,
"logits/rejected": -1.613364577293396,
"logps/chosen": -192.91790771484375,
"logps/rejected": -245.5927734375,
"loss": 0.143,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.4924169182777405,
"rewards/margins": 4.924551963806152,
"rewards/rejected": -4.432135105133057,
"step": 160
},
{
"epoch": 1.1956521739130435,
"grad_norm": 16.84620648534237,
"learning_rate": 6.786998967959219e-07,
"logits/chosen": -1.649950385093689,
"logits/rejected": -1.558600664138794,
"logps/chosen": -199.79678344726562,
"logps/rejected": -227.9362030029297,
"loss": 0.1491,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.5148376226425171,
"rewards/margins": 4.6056809425354,
"rewards/rejected": -4.090843200683594,
"step": 165
},
{
"epoch": 1.2318840579710144,
"grad_norm": 16.743277828937153,
"learning_rate": 6.604128849076838e-07,
"logits/chosen": -1.687930703163147,
"logits/rejected": -1.5980262756347656,
"logps/chosen": -199.6280517578125,
"logps/rejected": -237.2197265625,
"loss": 0.1514,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.0777654647827148,
"rewards/margins": 4.6541428565979,
"rewards/rejected": -3.5763778686523438,
"step": 170
},
{
"epoch": 1.2681159420289856,
"grad_norm": 13.4419999910089,
"learning_rate": 6.418834010383609e-07,
"logits/chosen": -1.7620418071746826,
"logits/rejected": -1.6492313146591187,
"logps/chosen": -170.84674072265625,
"logps/rejected": -228.17239379882812,
"loss": 0.1461,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.7329138517379761,
"rewards/margins": 4.496224403381348,
"rewards/rejected": -3.763310670852661,
"step": 175
},
{
"epoch": 1.3043478260869565,
"grad_norm": 15.060085944373489,
"learning_rate": 6.231394534160007e-07,
"logits/chosen": -1.8257992267608643,
"logits/rejected": -1.7924093008041382,
"logps/chosen": -183.6071319580078,
"logps/rejected": -224.40194702148438,
"loss": 0.142,
"rewards/accuracies": 0.96875,
"rewards/chosen": 1.3987538814544678,
"rewards/margins": 4.50449275970459,
"rewards/rejected": -3.105739116668701,
"step": 180
},
{
"epoch": 1.3405797101449275,
"grad_norm": 15.268514575865197,
"learning_rate": 6.042093744411828e-07,
"logits/chosen": -1.853198766708374,
"logits/rejected": -1.799068808555603,
"logps/chosen": -184.3455047607422,
"logps/rejected": -228.256591796875,
"loss": 0.1444,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.1724538803100586,
"rewards/margins": 4.359854698181152,
"rewards/rejected": -3.1874005794525146,
"step": 185
},
{
"epoch": 1.3768115942028984,
"grad_norm": 14.506206484787588,
"learning_rate": 5.851217778611993e-07,
"logits/chosen": -1.8662179708480835,
"logits/rejected": -1.8571313619613647,
"logps/chosen": -198.0624542236328,
"logps/rejected": -219.442626953125,
"loss": 0.1349,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.6407368779182434,
"rewards/margins": 4.615514278411865,
"rewards/rejected": -3.974777936935425,
"step": 190
},
{
"epoch": 1.4130434782608696,
"grad_norm": 20.861341598868098,
"learning_rate": 5.659055155189651e-07,
"logits/chosen": -1.9783111810684204,
"logits/rejected": -1.8647491931915283,
"logps/chosen": -189.13699340820312,
"logps/rejected": -227.8821563720703,
"loss": 0.1536,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.45683974027633667,
"rewards/margins": 4.89407205581665,
"rewards/rejected": -4.437232971191406,
"step": 195
},
{
"epoch": 1.4492753623188406,
"grad_norm": 12.518481247292005,
"learning_rate": 5.465896337420358e-07,
"logits/chosen": -1.964616060256958,
"logits/rejected": -1.8386001586914062,
"logps/chosen": -203.31442260742188,
"logps/rejected": -265.9437561035156,
"loss": 0.1474,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.7917453050613403,
"rewards/margins": 5.376971244812012,
"rewards/rejected": -4.585226535797119,
"step": 200
},
{
"epoch": 1.4492753623188406,
"eval_logits/chosen": -1.8598568439483643,
"eval_logits/rejected": -1.7946782112121582,
"eval_logps/chosen": -205.7305908203125,
"eval_logps/rejected": -235.58175659179688,
"eval_loss": 0.42740947008132935,
"eval_rewards/accuracies": 0.8306451439857483,
"eval_rewards/chosen": -0.5411156415939331,
"eval_rewards/margins": 3.1962532997131348,
"eval_rewards/rejected": -3.737368583679199,
"eval_runtime": 247.3295,
"eval_samples_per_second": 15.857,
"eval_steps_per_second": 0.251,
"step": 200
},
{
"epoch": 1.4855072463768115,
"grad_norm": 21.916596761757884,
"learning_rate": 5.272033294376521e-07,
"logits/chosen": -1.813153862953186,
"logits/rejected": -1.768066644668579,
"logps/chosen": -194.30654907226562,
"logps/rejected": -225.20693969726562,
"loss": 0.1401,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.010840868577361107,
"rewards/margins": 4.742875099182129,
"rewards/rejected": -4.732035160064697,
"step": 205
},
{
"epoch": 1.5217391304347827,
"grad_norm": 15.855755657784645,
"learning_rate": 5.077759059601755e-07,
"logits/chosen": -1.7765309810638428,
"logits/rejected": -1.7269878387451172,
"logps/chosen": -208.30252075195312,
"logps/rejected": -228.03353881835938,
"loss": 0.1563,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.508411705493927,
"rewards/margins": 5.39729118347168,
"rewards/rejected": -4.888879299163818,
"step": 210
},
{
"epoch": 1.5579710144927537,
"grad_norm": 16.97962064526418,
"learning_rate": 4.883367288176238e-07,
"logits/chosen": -1.7922019958496094,
"logits/rejected": -1.8073742389678955,
"logps/chosen": -179.53660583496094,
"logps/rejected": -229.8582763671875,
"loss": 0.1482,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.9332772493362427,
"rewards/margins": 5.133488178253174,
"rewards/rejected": -4.2002105712890625,
"step": 215
},
{
"epoch": 1.5942028985507246,
"grad_norm": 18.956232551415866,
"learning_rate": 4.6891518128425974e-07,
"logits/chosen": -1.9554294347763062,
"logits/rejected": -1.8807146549224854,
"logps/chosen": -198.28443908691406,
"logps/rejected": -239.2546844482422,
"loss": 0.1527,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.2289119958877563,
"rewards/margins": 5.14874792098999,
"rewards/rejected": -3.9198365211486816,
"step": 220
},
{
"epoch": 1.6304347826086958,
"grad_norm": 18.38495079748664,
"learning_rate": 4.495406199863217e-07,
"logits/chosen": -1.990740418434143,
"logits/rejected": -1.9738916158676147,
"logps/chosen": -177.74075317382812,
"logps/rejected": -256.86663818359375,
"loss": 0.1242,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.0406591892242432,
"rewards/margins": 5.407895088195801,
"rewards/rejected": -4.367236614227295,
"step": 225
},
{
"epoch": 1.6666666666666665,
"grad_norm": 21.41394990672278,
"learning_rate": 4.302423305280385e-07,
"logits/chosen": -2.0460195541381836,
"logits/rejected": -1.9780528545379639,
"logps/chosen": -177.8385009765625,
"logps/rejected": -264.10272216796875,
"loss": 0.1277,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.6652169227600098,
"rewards/margins": 5.778229713439941,
"rewards/rejected": -5.113012313842773,
"step": 230
},
{
"epoch": 1.7028985507246377,
"grad_norm": 18.775710144238932,
"learning_rate": 4.1104948322499386e-07,
"logits/chosen": -2.060439109802246,
"logits/rejected": -1.9934184551239014,
"logps/chosen": -184.96261596679688,
"logps/rejected": -243.159423828125,
"loss": 0.1271,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.6158139109611511,
"rewards/margins": 5.492893218994141,
"rewards/rejected": -4.877079010009766,
"step": 235
},
{
"epoch": 1.7391304347826086,
"grad_norm": 17.40854531155315,
"learning_rate": 3.919910890117584e-07,
"logits/chosen": -2.0762391090393066,
"logits/rejected": -2.0377309322357178,
"logps/chosen": -180.35549926757812,
"logps/rejected": -232.47412109375,
"loss": 0.1345,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.7253020405769348,
"rewards/margins": 5.309723854064941,
"rewards/rejected": -4.584421634674072,
"step": 240
},
{
"epoch": 1.7753623188405796,
"grad_norm": 16.818208369109737,
"learning_rate": 3.7309595559042973e-07,
"logits/chosen": -2.0712027549743652,
"logits/rejected": -2.053870916366577,
"logps/chosen": -186.7015380859375,
"logps/rejected": -233.06875610351562,
"loss": 0.1329,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.42518267035484314,
"rewards/margins": 5.10868501663208,
"rewards/rejected": -4.683502674102783,
"step": 245
},
{
"epoch": 1.8115942028985508,
"grad_norm": 17.857229947247134,
"learning_rate": 3.54392643886374e-07,
"logits/chosen": -2.1355109214782715,
"logits/rejected": -2.082988739013672,
"logps/chosen": -187.5356903076172,
"logps/rejected": -232.5768585205078,
"loss": 0.1268,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.629612147808075,
"rewards/margins": 5.511849403381348,
"rewards/rejected": -4.882236957550049,
"step": 250
},
{
"epoch": 1.8115942028985508,
"eval_logits/chosen": -2.1449685096740723,
"eval_logits/rejected": -2.0992848873138428,
"eval_logps/chosen": -200.98956298828125,
"eval_logps/rejected": -231.31536865234375,
"eval_loss": 0.4333266615867615,
"eval_rewards/accuracies": 0.8205645084381104,
"eval_rewards/chosen": -0.06701094657182693,
"eval_rewards/margins": 3.2437193393707275,
"eval_rewards/rejected": -3.310730457305908,
"eval_runtime": 247.4867,
"eval_samples_per_second": 15.847,
"eval_steps_per_second": 0.251,
"step": 250
},
{
"epoch": 1.8478260869565217,
"grad_norm": 19.685959893776015,
"learning_rate": 3.3590942487697765e-07,
"logits/chosen": -2.168308734893799,
"logits/rejected": -2.149319648742676,
"logps/chosen": -185.67611694335938,
"logps/rejected": -222.47714233398438,
"loss": 0.1592,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.9020439386367798,
"rewards/margins": 4.887114524841309,
"rewards/rejected": -3.9850707054138184,
"step": 255
},
{
"epoch": 1.8840579710144927,
"grad_norm": 17.22446921219621,
"learning_rate": 3.176742368586725e-07,
"logits/chosen": -2.170022964477539,
"logits/rejected": -2.1088662147521973,
"logps/chosen": -195.13963317871094,
"logps/rejected": -216.1388702392578,
"loss": 0.1331,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.9453809857368469,
"rewards/margins": 5.353398323059082,
"rewards/rejected": -4.408017158508301,
"step": 260
},
{
"epoch": 1.9202898550724639,
"grad_norm": 15.391628097816579,
"learning_rate": 2.997146432168236e-07,
"logits/chosen": -2.1484408378601074,
"logits/rejected": -2.0970280170440674,
"logps/chosen": -191.478271484375,
"logps/rejected": -245.2235870361328,
"loss": 0.1396,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.7697240114212036,
"rewards/margins": 6.003005027770996,
"rewards/rejected": -5.233281135559082,
"step": 265
},
{
"epoch": 1.9565217391304348,
"grad_norm": 21.110077363875014,
"learning_rate": 2.8205779076231446e-07,
"logits/chosen": -2.118835926055908,
"logits/rejected": -2.0954811573028564,
"logps/chosen": -189.69805908203125,
"logps/rejected": -235.75991821289062,
"loss": 0.1295,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.1907222270965576,
"rewards/margins": 5.428012847900391,
"rewards/rejected": -4.237290382385254,
"step": 270
},
{
"epoch": 1.9927536231884058,
"grad_norm": 20.754559500185525,
"learning_rate": 2.647303686978035e-07,
"logits/chosen": -2.063872814178467,
"logits/rejected": -2.017089366912842,
"logps/chosen": -182.68482971191406,
"logps/rejected": -217.0849151611328,
"loss": 0.1488,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.8351422548294067,
"rewards/margins": 4.959425926208496,
"rewards/rejected": -4.124283790588379,
"step": 275
},
{
"epoch": 2.028985507246377,
"grad_norm": 9.164991188020005,
"learning_rate": 2.4775856827568014e-07,
"logits/chosen": -2.0497758388519287,
"logits/rejected": -2.0113561153411865,
"logps/chosen": -185.25265502929688,
"logps/rejected": -223.13851928710938,
"loss": 0.0883,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 1.0435569286346436,
"rewards/margins": 5.537060737609863,
"rewards/rejected": -4.493502616882324,
"step": 280
},
{
"epoch": 2.0652173913043477,
"grad_norm": 8.010646253617603,
"learning_rate": 2.3116804320869464e-07,
"logits/chosen": -2.0255563259124756,
"logits/rejected": -1.9762938022613525,
"logps/chosen": -188.28140258789062,
"logps/rejected": -224.3779296875,
"loss": 0.0703,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.6650876402854919,
"rewards/margins": 5.7544169425964355,
"rewards/rejected": -5.089330196380615,
"step": 285
},
{
"epoch": 2.101449275362319,
"grad_norm": 8.650482101201316,
"learning_rate": 2.1498387089310865e-07,
"logits/chosen": -2.0082168579101562,
"logits/rejected": -2.001406192779541,
"logps/chosen": -194.86776733398438,
"logps/rejected": -250.7891082763672,
"loss": 0.0694,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 1.0825541019439697,
"rewards/margins": 6.063149452209473,
"rewards/rejected": -4.980595588684082,
"step": 290
},
{
"epoch": 2.13768115942029,
"grad_norm": 9.639312957872246,
"learning_rate": 1.9923051450297336e-07,
"logits/chosen": -2.034083843231201,
"logits/rejected": -1.9722801446914673,
"logps/chosen": -193.46327209472656,
"logps/rejected": -229.68417358398438,
"loss": 0.0724,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 1.1218969821929932,
"rewards/margins": 6.115258693695068,
"rewards/rejected": -4.993361949920654,
"step": 295
},
{
"epoch": 2.1739130434782608,
"grad_norm": 11.415587223465,
"learning_rate": 1.839317860128368e-07,
"logits/chosen": -2.004582166671753,
"logits/rejected": -1.9546029567718506,
"logps/chosen": -191.2842559814453,
"logps/rejected": -250.6446075439453,
"loss": 0.064,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.9089698791503906,
"rewards/margins": 6.477193355560303,
"rewards/rejected": -5.568222999572754,
"step": 300
},
{
"epoch": 2.1739130434782608,
"eval_logits/chosen": -1.9908860921859741,
"eval_logits/rejected": -1.9326670169830322,
"eval_logps/chosen": -205.4859619140625,
"eval_logps/rejected": -239.16648864746094,
"eval_loss": 0.4331871271133423,
"eval_rewards/accuracies": 0.8306451439857483,
"eval_rewards/chosen": -0.5166527032852173,
"eval_rewards/margins": 3.5791897773742676,
"eval_rewards/rejected": -4.095842361450195,
"eval_runtime": 247.4217,
"eval_samples_per_second": 15.851,
"eval_steps_per_second": 0.251,
"step": 300
},
{
"epoch": 2.210144927536232,
"grad_norm": 13.069163886945839,
"learning_rate": 1.6911081020477176e-07,
"logits/chosen": -1.9845168590545654,
"logits/rejected": -1.9513275623321533,
"logps/chosen": -188.0749969482422,
"logps/rejected": -253.936279296875,
"loss": 0.0617,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.8729268908500671,
"rewards/margins": 6.1785569190979,
"rewards/rejected": -5.305630683898926,
"step": 305
},
{
"epoch": 2.246376811594203,
"grad_norm": 12.409574297740958,
"learning_rate": 1.5478998971412666e-07,
"logits/chosen": -1.9543142318725586,
"logits/rejected": -1.9092079401016235,
"logps/chosen": -190.325439453125,
"logps/rejected": -254.7587432861328,
"loss": 0.0669,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.8444012403488159,
"rewards/margins": 6.521615028381348,
"rewards/rejected": -5.677213668823242,
"step": 310
},
{
"epoch": 2.282608695652174,
"grad_norm": 17.672030401926676,
"learning_rate": 1.4099097116683873e-07,
"logits/chosen": -1.996664047241211,
"logits/rejected": -1.9541898965835571,
"logps/chosen": -210.48422241210938,
"logps/rejected": -274.6320495605469,
"loss": 0.0611,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 1.3748170137405396,
"rewards/margins": 7.120137691497803,
"rewards/rejected": -5.7453203201293945,
"step": 315
},
{
"epoch": 2.318840579710145,
"grad_norm": 11.067029428009443,
"learning_rate": 1.2773461245949247e-07,
"logits/chosen": -1.9853408336639404,
"logits/rejected": -1.930381417274475,
"logps/chosen": -203.17752075195312,
"logps/rejected": -247.21945190429688,
"loss": 0.0587,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.9430925250053406,
"rewards/margins": 6.5651397705078125,
"rewards/rejected": -5.622047424316406,
"step": 320
},
{
"epoch": 2.355072463768116,
"grad_norm": 11.355849520152098,
"learning_rate": 1.1504095123158014e-07,
"logits/chosen": -1.9925590753555298,
"logits/rejected": -1.9830715656280518,
"logps/chosen": -195.1802520751953,
"logps/rejected": -257.0408020019531,
"loss": 0.0646,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.7049287557601929,
"rewards/margins": 6.576409339904785,
"rewards/rejected": -5.871480464935303,
"step": 325
},
{
"epoch": 2.391304347826087,
"grad_norm": 10.31665774657082,
"learning_rate": 1.0292917457762323e-07,
"logits/chosen": -1.981650948524475,
"logits/rejected": -1.9037120342254639,
"logps/chosen": -188.784912109375,
"logps/rejected": -247.7895965576172,
"loss": 0.0585,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.8972769975662231,
"rewards/margins": 6.589321136474609,
"rewards/rejected": -5.692043781280518,
"step": 330
},
{
"epoch": 2.427536231884058,
"grad_norm": 15.155495148000533,
"learning_rate": 9.141759004493282e-08,
"logits/chosen": -1.9452364444732666,
"logits/rejected": -1.9261302947998047,
"logps/chosen": -179.8898162841797,
"logps/rejected": -243.61328125,
"loss": 0.055,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.39632946252822876,
"rewards/margins": 6.330525875091553,
"rewards/rejected": -5.934195518493652,
"step": 335
},
{
"epoch": 2.463768115942029,
"grad_norm": 13.686346123613403,
"learning_rate": 8.052359796084951e-08,
"logits/chosen": -1.983902931213379,
"logits/rejected": -1.8964077234268188,
"logps/chosen": -190.7554473876953,
"logps/rejected": -248.05477905273438,
"loss": 0.0633,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 1.1571325063705444,
"rewards/margins": 6.923959255218506,
"rewards/rejected": -5.766826629638672,
"step": 340
},
{
"epoch": 2.5,
"grad_norm": 12.547226987434398,
"learning_rate": 7.026366513129139e-08,
"logits/chosen": -1.9592113494873047,
"logits/rejected": -1.8749430179595947,
"logps/chosen": -184.05401611328125,
"logps/rejected": -228.3144989013672,
"loss": 0.0603,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.9612242579460144,
"rewards/margins": 5.984218597412109,
"rewards/rejected": -5.0229949951171875,
"step": 345
},
{
"epoch": 2.536231884057971,
"grad_norm": 10.429640146690803,
"learning_rate": 6.065329995036572e-08,
"logits/chosen": -1.9933052062988281,
"logits/rejected": -1.9089053869247437,
"logps/chosen": -190.28915405273438,
"logps/rejected": -235.51040649414062,
"loss": 0.056,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.5995205640792847,
"rewards/margins": 6.035165786743164,
"rewards/rejected": -5.43564510345459,
"step": 350
},
{
"epoch": 2.536231884057971,
"eval_logits/chosen": -1.9756227731704712,
"eval_logits/rejected": -1.916344165802002,
"eval_logps/chosen": -205.54391479492188,
"eval_logps/rejected": -239.3422393798828,
"eval_loss": 0.44805780053138733,
"eval_rewards/accuracies": 0.8185483813285828,
"eval_rewards/chosen": -0.5224470496177673,
"eval_rewards/margins": 3.5909695625305176,
"eval_rewards/rejected": -4.1134161949157715,
"eval_runtime": 247.8226,
"eval_samples_per_second": 15.826,
"eval_steps_per_second": 0.25,
"step": 350
},
{
"epoch": 2.572463768115942,
"grad_norm": 14.703238617144146,
"learning_rate": 5.170702895866591e-08,
"logits/chosen": -1.9757484197616577,
"logits/rejected": -1.877820611000061,
"logps/chosen": -181.67088317871094,
"logps/rejected": -234.2239227294922,
"loss": 0.0539,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.7643092274665833,
"rewards/margins": 6.472746849060059,
"rewards/rejected": -5.708437919616699,
"step": 355
},
{
"epoch": 2.608695652173913,
"grad_norm": 16.11683384920457,
"learning_rate": 4.343837488569057e-08,
"logits/chosen": -1.9882529973983765,
"logits/rejected": -1.937097191810608,
"logps/chosen": -187.87106323242188,
"logps/rejected": -243.0420379638672,
"loss": 0.0668,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.925794780254364,
"rewards/margins": 6.516719818115234,
"rewards/rejected": -5.590925216674805,
"step": 360
},
{
"epoch": 2.644927536231884,
"grad_norm": 11.830853634227745,
"learning_rate": 3.585983620957112e-08,
"logits/chosen": -1.9911915063858032,
"logits/rejected": -1.9083200693130493,
"logps/chosen": -183.7674560546875,
"logps/rejected": -238.542236328125,
"loss": 0.0632,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.6369873285293579,
"rewards/margins": 6.4096479415893555,
"rewards/rejected": -5.772660732269287,
"step": 365
},
{
"epoch": 2.681159420289855,
"grad_norm": 12.756089452099605,
"learning_rate": 2.8982868265005454e-08,
"logits/chosen": -1.9761543273925781,
"logits/rejected": -1.9331614971160889,
"logps/chosen": -189.85000610351562,
"logps/rejected": -237.2536163330078,
"loss": 0.0563,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.8311630487442017,
"rewards/margins": 6.6239190101623535,
"rewards/rejected": -5.792755603790283,
"step": 370
},
{
"epoch": 2.717391304347826,
"grad_norm": 14.499306986468172,
"learning_rate": 2.2817865927956092e-08,
"logits/chosen": -1.9935853481292725,
"logits/rejected": -1.9374994039535522,
"logps/chosen": -182.0215606689453,
"logps/rejected": -235.44271850585938,
"loss": 0.0611,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.9944933652877808,
"rewards/margins": 6.3055338859558105,
"rewards/rejected": -5.31104040145874,
"step": 375
},
{
"epoch": 2.753623188405797,
"grad_norm": 12.558302938263997,
"learning_rate": 1.7374147903282176e-08,
"logits/chosen": -1.9671310186386108,
"logits/rejected": -1.9008516073226929,
"logps/chosen": -196.24166870117188,
"logps/rejected": -243.8437957763672,
"loss": 0.0483,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.6471545696258545,
"rewards/margins": 6.556443691253662,
"rewards/rejected": -5.909289360046387,
"step": 380
},
{
"epoch": 2.789855072463768,
"grad_norm": 7.550248384042512,
"learning_rate": 1.2659942639057952e-08,
"logits/chosen": -1.9696018695831299,
"logits/rejected": -1.9180386066436768,
"logps/chosen": -197.0672607421875,
"logps/rejected": -251.66244506835938,
"loss": 0.0548,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.7232016921043396,
"rewards/margins": 6.699126243591309,
"rewards/rejected": -5.975924968719482,
"step": 385
},
{
"epoch": 2.8260869565217392,
"grad_norm": 8.880479703177947,
"learning_rate": 8.682375888868166e-09,
"logits/chosen": -1.975227952003479,
"logits/rejected": -1.9457371234893799,
"logps/chosen": -191.05844116210938,
"logps/rejected": -256.9841003417969,
"loss": 0.0488,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.38350382447242737,
"rewards/margins": 6.125405311584473,
"rewards/rejected": -5.741901874542236,
"step": 390
},
{
"epoch": 2.86231884057971,
"grad_norm": 10.914718704369829,
"learning_rate": 5.447459940880084e-09,
"logits/chosen": -1.978607416152954,
"logits/rejected": -1.921014428138733,
"logps/chosen": -183.47064208984375,
"logps/rejected": -249.2570037841797,
"loss": 0.0512,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.4558964669704437,
"rewards/margins": 6.345686912536621,
"rewards/rejected": -5.8897905349731445,
"step": 395
},
{
"epoch": 2.898550724637681,
"grad_norm": 12.495149398792217,
"learning_rate": 2.9600845299737053e-09,
"logits/chosen": -1.958653450012207,
"logits/rejected": -1.9010944366455078,
"logps/chosen": -177.4004364013672,
"logps/rejected": -237.0882110595703,
"loss": 0.0721,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.2784881889820099,
"rewards/margins": 6.083136558532715,
"rewards/rejected": -5.804648399353027,
"step": 400
},
{
"epoch": 2.898550724637681,
"eval_logits/chosen": -1.9731168746948242,
"eval_logits/rejected": -1.9129306077957153,
"eval_logps/chosen": -207.34255981445312,
"eval_logps/rejected": -242.29010009765625,
"eval_loss": 0.4506772756576538,
"eval_rewards/accuracies": 0.8185483813285828,
"eval_rewards/chosen": -0.7023105025291443,
"eval_rewards/margins": 3.7058920860290527,
"eval_rewards/rejected": -4.408202171325684,
"eval_runtime": 247.9558,
"eval_samples_per_second": 15.817,
"eval_steps_per_second": 0.25,
"step": 400
},
{
"epoch": 2.9347826086956523,
"grad_norm": 10.380155420725053,
"learning_rate": 1.2240094466668404e-09,
"logits/chosen": -2.010136127471924,
"logits/rejected": -1.8942158222198486,
"logps/chosen": -191.6996612548828,
"logps/rejected": -268.3369445800781,
"loss": 0.0597,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.43750476837158203,
"rewards/margins": 6.982994079589844,
"rewards/rejected": -6.5454888343811035,
"step": 405
},
{
"epoch": 2.971014492753623,
"grad_norm": 18.203204347608896,
"learning_rate": 2.418588540059607e-10,
"logits/chosen": -1.9836390018463135,
"logits/rejected": -1.9380347728729248,
"logps/chosen": -186.40122985839844,
"logps/rejected": -238.04464721679688,
"loss": 0.0554,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.8866332769393921,
"rewards/margins": 6.659180641174316,
"rewards/rejected": -5.772547721862793,
"step": 410
}
],
"logging_steps": 5,
"max_steps": 414,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4881795388538880.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}