|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4492753623188406, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.036231884057971016, |
|
"grad_norm": 61.75757328159282, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.732090473175049, |
|
"logits/rejected": -2.7100460529327393, |
|
"logps/chosen": -182.59107971191406, |
|
"logps/rejected": -189.5584716796875, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.00281245238147676, |
|
"rewards/margins": 0.0058334446512162685, |
|
"rewards/rejected": -0.008645896799862385, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07246376811594203, |
|
"grad_norm": 44.951594498596215, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.754081964492798, |
|
"logits/rejected": -2.752152919769287, |
|
"logps/chosen": -197.337158203125, |
|
"logps/rejected": -184.00933837890625, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.03147688880562782, |
|
"rewards/margins": 0.1896156221628189, |
|
"rewards/rejected": -0.1581387221813202, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10869565217391304, |
|
"grad_norm": 51.34158391398985, |
|
"learning_rate": 9.996221126793764e-07, |
|
"logits/chosen": -2.694983959197998, |
|
"logits/rejected": -2.692361831665039, |
|
"logps/chosen": -203.20387268066406, |
|
"logps/rejected": -204.64244079589844, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6150370836257935, |
|
"rewards/margins": 0.9413955807685852, |
|
"rewards/rejected": -0.32635849714279175, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 34.76477183019994, |
|
"learning_rate": 9.984890219128145e-07, |
|
"logits/chosen": -2.612672805786133, |
|
"logits/rejected": -2.5829074382781982, |
|
"logps/chosen": -188.62716674804688, |
|
"logps/rejected": -192.87452697753906, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.8545471429824829, |
|
"rewards/margins": 1.280996561050415, |
|
"rewards/rejected": -0.4264492094516754, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18115942028985507, |
|
"grad_norm": 36.75278346647978, |
|
"learning_rate": 9.966024404228493e-07, |
|
"logits/chosen": -2.450106143951416, |
|
"logits/rejected": -2.4297895431518555, |
|
"logps/chosen": -179.98348999023438, |
|
"logps/rejected": -179.38925170898438, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.38695499300956726, |
|
"rewards/margins": 0.8900691866874695, |
|
"rewards/rejected": -0.5031141638755798, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 31.781918105397544, |
|
"learning_rate": 9.939652198703783e-07, |
|
"logits/chosen": -2.324214458465576, |
|
"logits/rejected": -2.325657367706299, |
|
"logps/chosen": -188.5428466796875, |
|
"logps/rejected": -193.8271942138672, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6558719873428345, |
|
"rewards/margins": 1.2207121849060059, |
|
"rewards/rejected": -0.5648401975631714, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2536231884057971, |
|
"grad_norm": 39.36776247005876, |
|
"learning_rate": 9.905813465442354e-07, |
|
"logits/chosen": -2.236240863800049, |
|
"logits/rejected": -2.2105681896209717, |
|
"logps/chosen": -203.98277282714844, |
|
"logps/rejected": -194.84640502929688, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8834564089775085, |
|
"rewards/margins": 1.2675695419311523, |
|
"rewards/rejected": -0.3841131329536438, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2898550724637681, |
|
"grad_norm": 30.817630358317576, |
|
"learning_rate": 9.864559353357187e-07, |
|
"logits/chosen": -2.068774700164795, |
|
"logits/rejected": -2.0603950023651123, |
|
"logps/chosen": -182.76817321777344, |
|
"logps/rejected": -185.9797821044922, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.03325617313385, |
|
"rewards/margins": 1.0384714603424072, |
|
"rewards/rejected": -0.005215352866798639, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32608695652173914, |
|
"grad_norm": 29.09268118121073, |
|
"learning_rate": 9.815952220071804e-07, |
|
"logits/chosen": -1.8718488216400146, |
|
"logits/rejected": -1.8250553607940674, |
|
"logps/chosen": -195.60968017578125, |
|
"logps/rejected": -221.5565643310547, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.3850222826004028, |
|
"rewards/margins": 1.8469291925430298, |
|
"rewards/rejected": -0.4619070589542389, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.36231884057971014, |
|
"grad_norm": 29.526743630011346, |
|
"learning_rate": 9.76006553766365e-07, |
|
"logits/chosen": -1.653713583946228, |
|
"logits/rejected": -1.6171553134918213, |
|
"logps/chosen": -198.85989379882812, |
|
"logps/rejected": -203.60678100585938, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.8292047381401062, |
|
"rewards/margins": 1.6851797103881836, |
|
"rewards/rejected": -0.8559748530387878, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36231884057971014, |
|
"eval_logits/chosen": -1.7065542936325073, |
|
"eval_logits/rejected": -1.630993127822876, |
|
"eval_logps/chosen": -192.20655822753906, |
|
"eval_logps/rejected": -206.51295471191406, |
|
"eval_loss": 0.4420754015445709, |
|
"eval_rewards/accuracies": 0.7903226017951965, |
|
"eval_rewards/chosen": 0.8112886548042297, |
|
"eval_rewards/margins": 1.641775369644165, |
|
"eval_rewards/rejected": -0.8304866552352905, |
|
"eval_runtime": 247.7543, |
|
"eval_samples_per_second": 15.83, |
|
"eval_steps_per_second": 0.25, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39855072463768115, |
|
"grad_norm": 30.94859785748943, |
|
"learning_rate": 9.696983781607415e-07, |
|
"logits/chosen": -1.7253024578094482, |
|
"logits/rejected": -1.6905288696289062, |
|
"logps/chosen": -182.9173126220703, |
|
"logps/rejected": -171.9159698486328, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.6172864437103271, |
|
"rewards/margins": 1.648385763168335, |
|
"rewards/rejected": -1.031099557876587, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 40.75469044830845, |
|
"learning_rate": 9.626802303086209e-07, |
|
"logits/chosen": -1.87893807888031, |
|
"logits/rejected": -1.8299003839492798, |
|
"logps/chosen": -186.30145263671875, |
|
"logps/rejected": -193.9145965576172, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.4657188057899475, |
|
"rewards/margins": 1.7288262844085693, |
|
"rewards/rejected": -1.2631075382232666, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47101449275362317, |
|
"grad_norm": 35.556274541495966, |
|
"learning_rate": 9.549627184863528e-07, |
|
"logits/chosen": -2.016784906387329, |
|
"logits/rejected": -1.9150521755218506, |
|
"logps/chosen": -191.3840789794922, |
|
"logps/rejected": -192.66639709472656, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.0974711924791336, |
|
"rewards/margins": 1.6010549068450928, |
|
"rewards/rejected": -1.5035837888717651, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5072463768115942, |
|
"grad_norm": 26.46585227154451, |
|
"learning_rate": 9.465575080933957e-07, |
|
"logits/chosen": -1.853308916091919, |
|
"logits/rejected": -1.7947351932525635, |
|
"logps/chosen": -172.3099822998047, |
|
"logps/rejected": -208.057373046875, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.21238946914672852, |
|
"rewards/margins": 1.8403332233428955, |
|
"rewards/rejected": -1.627943754196167, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5434782608695652, |
|
"grad_norm": 31.533541728553253, |
|
"learning_rate": 9.374773040194878e-07, |
|
"logits/chosen": -1.8850362300872803, |
|
"logits/rejected": -1.8103622198104858, |
|
"logps/chosen": -205.5053253173828, |
|
"logps/rejected": -210.96981811523438, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.17310531437397003, |
|
"rewards/margins": 1.8103282451629639, |
|
"rewards/rejected": -1.6372228860855103, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5797101449275363, |
|
"grad_norm": 29.780905727815526, |
|
"learning_rate": 9.277358314405818e-07, |
|
"logits/chosen": -1.7906593084335327, |
|
"logits/rejected": -1.742597222328186, |
|
"logps/chosen": -188.9757080078125, |
|
"logps/rejected": -205.398193359375, |
|
"loss": 0.3987, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19728976488113403, |
|
"rewards/margins": 1.879663109779358, |
|
"rewards/rejected": -2.0769529342651367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6159420289855072, |
|
"grad_norm": 34.4646468352745, |
|
"learning_rate": 9.173478150725651e-07, |
|
"logits/chosen": -1.7377640008926392, |
|
"logits/rejected": -1.6257518529891968, |
|
"logps/chosen": -210.00320434570312, |
|
"logps/rejected": -215.84835815429688, |
|
"loss": 0.4258, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08612661063671112, |
|
"rewards/margins": 2.4435980319976807, |
|
"rewards/rejected": -2.357471227645874, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 29.12537980218493, |
|
"learning_rate": 9.063289569141251e-07, |
|
"logits/chosen": -1.7976572513580322, |
|
"logits/rejected": -1.739854097366333, |
|
"logps/chosen": -214.8435821533203, |
|
"logps/rejected": -224.52005004882812, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.46363013982772827, |
|
"rewards/margins": 2.330965518951416, |
|
"rewards/rejected": -1.867335557937622, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6884057971014492, |
|
"grad_norm": 35.00421638148543, |
|
"learning_rate": 8.946959125124051e-07, |
|
"logits/chosen": -1.861108422279358, |
|
"logits/rejected": -1.780923843383789, |
|
"logps/chosen": -207.5733184814453, |
|
"logps/rejected": -193.34400939941406, |
|
"loss": 0.4121, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.4294491708278656, |
|
"rewards/margins": 2.142913341522217, |
|
"rewards/rejected": -1.7134641408920288, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"grad_norm": 31.611698501726103, |
|
"learning_rate": 8.824662657873238e-07, |
|
"logits/chosen": -1.8221423625946045, |
|
"logits/rejected": -1.802095651626587, |
|
"logps/chosen": -173.2090301513672, |
|
"logps/rejected": -206.5529327392578, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.04721298813819885, |
|
"rewards/margins": 1.9821780920028687, |
|
"rewards/rejected": -2.029391050338745, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"eval_logits/chosen": -1.8523844480514526, |
|
"eval_logits/rejected": -1.7929590940475464, |
|
"eval_logps/chosen": -200.7910614013672, |
|
"eval_logps/rejected": -220.96961975097656, |
|
"eval_loss": 0.4121003746986389, |
|
"eval_rewards/accuracies": 0.8145161271095276, |
|
"eval_rewards/chosen": -0.047160252928733826, |
|
"eval_rewards/margins": 2.2289960384368896, |
|
"eval_rewards/rejected": -2.276156187057495, |
|
"eval_runtime": 247.371, |
|
"eval_samples_per_second": 15.855, |
|
"eval_steps_per_second": 0.251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7608695652173914, |
|
"grad_norm": 30.01063089972391, |
|
"learning_rate": 8.696585024526135e-07, |
|
"logits/chosen": -1.7823431491851807, |
|
"logits/rejected": -1.7234203815460205, |
|
"logps/chosen": -189.0630340576172, |
|
"logps/rejected": -224.55642700195312, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0058825016021728516, |
|
"rewards/margins": 2.5169529914855957, |
|
"rewards/rejected": -2.5228357315063477, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7971014492753623, |
|
"grad_norm": 35.65348267869082, |
|
"learning_rate": 8.562919820737535e-07, |
|
"logits/chosen": -1.7099103927612305, |
|
"logits/rejected": -1.6304385662078857, |
|
"logps/chosen": -206.9807586669922, |
|
"logps/rejected": -209.36962890625, |
|
"loss": 0.3755, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.26569992303848267, |
|
"rewards/margins": 2.464618444442749, |
|
"rewards/rejected": -2.730318546295166, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 28.250647507886438, |
|
"learning_rate": 8.423869088050315e-07, |
|
"logits/chosen": -1.7219148874282837, |
|
"logits/rejected": -1.677403450012207, |
|
"logps/chosen": -195.88735961914062, |
|
"logps/rejected": -222.36581420898438, |
|
"loss": 0.3912, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.14722302556037903, |
|
"rewards/margins": 2.4208686351776123, |
|
"rewards/rejected": -2.568091630935669, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 36.27157250663838, |
|
"learning_rate": 8.2796430084997e-07, |
|
"logits/chosen": -1.6080610752105713, |
|
"logits/rejected": -1.521059513092041, |
|
"logps/chosen": -197.2279510498047, |
|
"logps/rejected": -208.6706085205078, |
|
"loss": 0.3668, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.07672278583049774, |
|
"rewards/margins": 2.720585584640503, |
|
"rewards/rejected": -2.64386248588562, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9057971014492754, |
|
"grad_norm": 28.694980241284195, |
|
"learning_rate": 8.130459586912753e-07, |
|
"logits/chosen": -1.4262475967407227, |
|
"logits/rejected": -1.3733441829681396, |
|
"logps/chosen": -219.4936981201172, |
|
"logps/rejected": -217.61599731445312, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8654589653015137, |
|
"rewards/margins": 1.6804126501083374, |
|
"rewards/rejected": -2.5458714962005615, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.9420289855072463, |
|
"grad_norm": 29.710262188798424, |
|
"learning_rate": 7.97654432138333e-07, |
|
"logits/chosen": -1.4053936004638672, |
|
"logits/rejected": -1.336163878440857, |
|
"logps/chosen": -210.55026245117188, |
|
"logps/rejected": -243.9113311767578, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.20443829894065857, |
|
"rewards/margins": 2.7204947471618652, |
|
"rewards/rejected": -2.516056537628174, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9782608695652174, |
|
"grad_norm": 26.71701106117664, |
|
"learning_rate": 7.81812986242061e-07, |
|
"logits/chosen": -1.423004388809204, |
|
"logits/rejected": -1.2980186939239502, |
|
"logps/chosen": -193.02523803710938, |
|
"logps/rejected": -232.86788940429688, |
|
"loss": 0.3631, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.15479817986488342, |
|
"rewards/margins": 3.0325751304626465, |
|
"rewards/rejected": -2.877776622772217, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.0144927536231885, |
|
"grad_norm": 18.847111481815627, |
|
"learning_rate": 7.655455661286375e-07, |
|
"logits/chosen": -1.3630199432373047, |
|
"logits/rejected": -1.3213447332382202, |
|
"logps/chosen": -193.20803833007812, |
|
"logps/rejected": -237.5965118408203, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20990173518657684, |
|
"rewards/margins": 3.6189141273498535, |
|
"rewards/rejected": -3.8288159370422363, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0507246376811594, |
|
"grad_norm": 18.388157966842616, |
|
"learning_rate": 7.488767608052628e-07, |
|
"logits/chosen": -1.543648362159729, |
|
"logits/rejected": -1.399395227432251, |
|
"logps/chosen": -190.61196899414062, |
|
"logps/rejected": -237.07424926757812, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.6787484884262085, |
|
"rewards/margins": 4.023434638977051, |
|
"rewards/rejected": -3.3446857929229736, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 15.923928842240379, |
|
"learning_rate": 7.318317659926636e-07, |
|
"logits/chosen": -1.6209495067596436, |
|
"logits/rejected": -1.5568897724151611, |
|
"logps/chosen": -172.939697265625, |
|
"logps/rejected": -233.3376007080078, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.1221749782562256, |
|
"rewards/margins": 3.8970863819122314, |
|
"rewards/rejected": -2.774911403656006, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"eval_logits/chosen": -1.7425010204315186, |
|
"eval_logits/rejected": -1.674597978591919, |
|
"eval_logps/chosen": -194.48468017578125, |
|
"eval_logps/rejected": -217.0243682861328, |
|
"eval_loss": 0.42049291729927063, |
|
"eval_rewards/accuracies": 0.8205645084381104, |
|
"eval_rewards/chosen": 0.5834774374961853, |
|
"eval_rewards/margins": 2.4651052951812744, |
|
"eval_rewards/rejected": -1.881628155708313, |
|
"eval_runtime": 247.5785, |
|
"eval_samples_per_second": 15.841, |
|
"eval_steps_per_second": 0.25, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1231884057971016, |
|
"grad_norm": 14.18220461970911, |
|
"learning_rate": 7.144363460405189e-07, |
|
"logits/chosen": -1.7796205282211304, |
|
"logits/rejected": -1.6700912714004517, |
|
"logps/chosen": -190.59030151367188, |
|
"logps/rejected": -233.08151245117188, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.356343150138855, |
|
"rewards/margins": 4.483328342437744, |
|
"rewards/rejected": -3.1269848346710205, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.1594202898550725, |
|
"grad_norm": 12.199643576270322, |
|
"learning_rate": 6.967167949833762e-07, |
|
"logits/chosen": -1.7053067684173584, |
|
"logits/rejected": -1.613364577293396, |
|
"logps/chosen": -192.91790771484375, |
|
"logps/rejected": -245.5927734375, |
|
"loss": 0.143, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.4924169182777405, |
|
"rewards/margins": 4.924551963806152, |
|
"rewards/rejected": -4.432135105133057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1956521739130435, |
|
"grad_norm": 16.84620648534237, |
|
"learning_rate": 6.786998967959219e-07, |
|
"logits/chosen": -1.649950385093689, |
|
"logits/rejected": -1.558600664138794, |
|
"logps/chosen": -199.79678344726562, |
|
"logps/rejected": -227.9362030029297, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.5148376226425171, |
|
"rewards/margins": 4.6056809425354, |
|
"rewards/rejected": -4.090843200683594, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.2318840579710144, |
|
"grad_norm": 16.743277828937153, |
|
"learning_rate": 6.604128849076838e-07, |
|
"logits/chosen": -1.687930703163147, |
|
"logits/rejected": -1.5980262756347656, |
|
"logps/chosen": -199.6280517578125, |
|
"logps/rejected": -237.2197265625, |
|
"loss": 0.1514, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.0777654647827148, |
|
"rewards/margins": 4.6541428565979, |
|
"rewards/rejected": -3.5763778686523438, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2681159420289856, |
|
"grad_norm": 13.4419999910089, |
|
"learning_rate": 6.418834010383609e-07, |
|
"logits/chosen": -1.7620418071746826, |
|
"logits/rejected": -1.6492313146591187, |
|
"logps/chosen": -170.84674072265625, |
|
"logps/rejected": -228.17239379882812, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.7329138517379761, |
|
"rewards/margins": 4.496224403381348, |
|
"rewards/rejected": -3.763310670852661, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 15.060085944373489, |
|
"learning_rate": 6.231394534160007e-07, |
|
"logits/chosen": -1.8257992267608643, |
|
"logits/rejected": -1.7924093008041382, |
|
"logps/chosen": -183.6071319580078, |
|
"logps/rejected": -224.40194702148438, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.3987538814544678, |
|
"rewards/margins": 4.50449275970459, |
|
"rewards/rejected": -3.105739116668701, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3405797101449275, |
|
"grad_norm": 15.268514575865197, |
|
"learning_rate": 6.042093744411828e-07, |
|
"logits/chosen": -1.853198766708374, |
|
"logits/rejected": -1.799068808555603, |
|
"logps/chosen": -184.3455047607422, |
|
"logps/rejected": -228.256591796875, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.1724538803100586, |
|
"rewards/margins": 4.359854698181152, |
|
"rewards/rejected": -3.1874005794525146, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.3768115942028984, |
|
"grad_norm": 14.506206484787588, |
|
"learning_rate": 5.851217778611993e-07, |
|
"logits/chosen": -1.8662179708480835, |
|
"logits/rejected": -1.8571313619613647, |
|
"logps/chosen": -198.0624542236328, |
|
"logps/rejected": -219.442626953125, |
|
"loss": 0.1349, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.6407368779182434, |
|
"rewards/margins": 4.615514278411865, |
|
"rewards/rejected": -3.974777936935425, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4130434782608696, |
|
"grad_norm": 20.861341598868098, |
|
"learning_rate": 5.659055155189651e-07, |
|
"logits/chosen": -1.9783111810684204, |
|
"logits/rejected": -1.8647491931915283, |
|
"logps/chosen": -189.13699340820312, |
|
"logps/rejected": -227.8821563720703, |
|
"loss": 0.1536, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.45683974027633667, |
|
"rewards/margins": 4.89407205581665, |
|
"rewards/rejected": -4.437232971191406, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"grad_norm": 12.518481247292005, |
|
"learning_rate": 5.465896337420358e-07, |
|
"logits/chosen": -1.964616060256958, |
|
"logits/rejected": -1.8386001586914062, |
|
"logps/chosen": -203.31442260742188, |
|
"logps/rejected": -265.9437561035156, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.7917453050613403, |
|
"rewards/margins": 5.376971244812012, |
|
"rewards/rejected": -4.585226535797119, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"eval_logits/chosen": -1.8598568439483643, |
|
"eval_logits/rejected": -1.7946782112121582, |
|
"eval_logps/chosen": -205.7305908203125, |
|
"eval_logps/rejected": -235.58175659179688, |
|
"eval_loss": 0.42740947008132935, |
|
"eval_rewards/accuracies": 0.8306451439857483, |
|
"eval_rewards/chosen": -0.5411156415939331, |
|
"eval_rewards/margins": 3.1962532997131348, |
|
"eval_rewards/rejected": -3.737368583679199, |
|
"eval_runtime": 247.3295, |
|
"eval_samples_per_second": 15.857, |
|
"eval_steps_per_second": 0.251, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 414, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2358113407598592.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|