|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 100, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 9.858836405653433, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.7008285522460938, |
|
"logits/rejected": -2.6250243186950684, |
|
"logps/chosen": -301.27081298828125, |
|
"logps/rejected": -281.75146484375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.00015341350808739662, |
|
"rewards/margins": 0.0001716136175673455, |
|
"rewards/rejected": -1.8200071281171404e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 7.79798162706573, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6407124996185303, |
|
"logits/rejected": -2.6055800914764404, |
|
"logps/chosen": -278.97711181640625, |
|
"logps/rejected": -254.7215576171875, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0019410619279369712, |
|
"rewards/margins": 0.0014655741397291422, |
|
"rewards/rejected": 0.0004754880501423031, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 8.391842432337471, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.63759183883667, |
|
"logits/rejected": -2.6166491508483887, |
|
"logps/chosen": -263.44866943359375, |
|
"logps/rejected": -263.5602111816406, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.012344349175691605, |
|
"rewards/margins": 0.008015613071620464, |
|
"rewards/rejected": 0.004328734241425991, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 9.39930524490582, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.6476945877075195, |
|
"logits/rejected": -2.5853049755096436, |
|
"logps/chosen": -290.5145568847656, |
|
"logps/rejected": -268.3503723144531, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.034826718270778656, |
|
"rewards/margins": 0.042017363011837006, |
|
"rewards/rejected": -0.007190642412751913, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 13.99279327188838, |
|
"learning_rate": 4.997124959943201e-07, |
|
"logits/chosen": -2.6765246391296387, |
|
"logits/rejected": -2.5974183082580566, |
|
"logps/chosen": -294.23516845703125, |
|
"logps/rejected": -254.03042602539062, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02075173892080784, |
|
"rewards/margins": 0.0924127846956253, |
|
"rewards/rejected": -0.07166104018688202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 10.917441498337828, |
|
"learning_rate": 4.979579212164186e-07, |
|
"logits/chosen": -2.5758731365203857, |
|
"logits/rejected": -2.472479820251465, |
|
"logps/chosen": -290.3058166503906, |
|
"logps/rejected": -270.32891845703125, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.09799840301275253, |
|
"rewards/margins": 0.11976947635412216, |
|
"rewards/rejected": -0.21776790916919708, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 14.438870715842985, |
|
"learning_rate": 4.946196886175515e-07, |
|
"logits/chosen": -2.568722724914551, |
|
"logits/rejected": -2.5102906227111816, |
|
"logps/chosen": -284.9170227050781, |
|
"logps/rejected": -291.45648193359375, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09138783067464828, |
|
"rewards/margins": 0.22690913081169128, |
|
"rewards/rejected": -0.31829696893692017, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 12.571362458092375, |
|
"learning_rate": 4.897191188239667e-07, |
|
"logits/chosen": -2.5521557331085205, |
|
"logits/rejected": -2.4944987297058105, |
|
"logps/chosen": -291.53851318359375, |
|
"logps/rejected": -307.15631103515625, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.24509508907794952, |
|
"rewards/margins": 0.27337878942489624, |
|
"rewards/rejected": -0.5184738636016846, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 13.355734952743322, |
|
"learning_rate": 4.832875107981763e-07, |
|
"logits/chosen": -2.6172804832458496, |
|
"logits/rejected": -2.551274538040161, |
|
"logps/chosen": -292.2721252441406, |
|
"logps/rejected": -307.9034423828125, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15986846387386322, |
|
"rewards/margins": 0.3636865019798279, |
|
"rewards/rejected": -0.5235549807548523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 14.60913947420442, |
|
"learning_rate": 4.753659419387223e-07, |
|
"logits/chosen": -2.629087448120117, |
|
"logits/rejected": -2.5364227294921875, |
|
"logps/chosen": -312.4242858886719, |
|
"logps/rejected": -296.9600524902344, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2603791356086731, |
|
"rewards/margins": 0.3747071921825409, |
|
"rewards/rejected": -0.6350862979888916, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"eval_logits/chosen": -2.465451717376709, |
|
"eval_logits/rejected": -2.34794545173645, |
|
"eval_logps/chosen": -306.55853271484375, |
|
"eval_logps/rejected": -294.0323486328125, |
|
"eval_loss": 0.5960295796394348, |
|
"eval_rewards/accuracies": 0.7155172228813171, |
|
"eval_rewards/chosen": -0.30954551696777344, |
|
"eval_rewards/margins": 0.4400167167186737, |
|
"eval_rewards/rejected": -0.7495622038841248, |
|
"eval_runtime": 90.9114, |
|
"eval_samples_per_second": 20.14, |
|
"eval_steps_per_second": 0.319, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 16.802955839707842, |
|
"learning_rate": 4.660050057270191e-07, |
|
"logits/chosen": -2.4196040630340576, |
|
"logits/rejected": -2.3442416191101074, |
|
"logps/chosen": -358.9793395996094, |
|
"logps/rejected": -372.57965087890625, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.42675742506980896, |
|
"rewards/margins": 0.3199427127838135, |
|
"rewards/rejected": -0.7467001676559448, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 18.751366323915487, |
|
"learning_rate": 4.5526448859687144e-07, |
|
"logits/chosen": -2.141848087310791, |
|
"logits/rejected": -1.955249547958374, |
|
"logps/chosen": -351.53961181640625, |
|
"logps/rejected": -315.23406982421875, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.40635138750076294, |
|
"rewards/margins": 0.42884722352027893, |
|
"rewards/rejected": -0.8351985812187195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 24.80835479779821, |
|
"learning_rate": 4.432129880904388e-07, |
|
"logits/chosen": -1.6259305477142334, |
|
"logits/rejected": -1.344472885131836, |
|
"logps/chosen": -370.61798095703125, |
|
"logps/rejected": -360.02374267578125, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6546204686164856, |
|
"rewards/margins": 0.4409145414829254, |
|
"rewards/rejected": -1.0955349206924438, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 22.937175363847114, |
|
"learning_rate": 4.299274747394055e-07, |
|
"logits/chosen": -1.5916811227798462, |
|
"logits/rejected": -1.4237914085388184, |
|
"logps/chosen": -361.98211669921875, |
|
"logps/rejected": -371.1536560058594, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4653448462486267, |
|
"rewards/margins": 0.5270770788192749, |
|
"rewards/rejected": -0.9924219250679016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 22.019311232158735, |
|
"learning_rate": 4.1549280046953653e-07, |
|
"logits/chosen": -0.7861512303352356, |
|
"logits/rejected": -0.3463224768638611, |
|
"logps/chosen": -362.31756591796875, |
|
"logps/rejected": -414.545166015625, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7533167004585266, |
|
"rewards/margins": 0.6291457414627075, |
|
"rewards/rejected": -1.3824622631072998, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 28.0394510543571, |
|
"learning_rate": 4.000011566683401e-07, |
|
"logits/chosen": -0.27471694350242615, |
|
"logits/rejected": 0.3287709653377533, |
|
"logps/chosen": -392.02667236328125, |
|
"logps/rejected": -424.70556640625, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9268640279769897, |
|
"rewards/margins": 0.7637636065483093, |
|
"rewards/rejected": -1.6906276941299438, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38857142857142857, |
|
"grad_norm": 26.83796617150145, |
|
"learning_rate": 3.8355148537705047e-07, |
|
"logits/chosen": -0.9808514714241028, |
|
"logits/rejected": -0.5751891732215881, |
|
"logps/chosen": -374.8761291503906, |
|
"logps/rejected": -386.53851318359375, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7077123522758484, |
|
"rewards/margins": 0.5003622770309448, |
|
"rewards/rejected": -1.2080745697021484, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4114285714285714, |
|
"grad_norm": 24.04944603295859, |
|
"learning_rate": 3.662488473675315e-07, |
|
"logits/chosen": -1.283348798751831, |
|
"logits/rejected": -0.556102991104126, |
|
"logps/chosen": -405.58306884765625, |
|
"logps/rejected": -420.27606201171875, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7540328502655029, |
|
"rewards/margins": 0.8492224812507629, |
|
"rewards/rejected": -1.603255271911621, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4342857142857143, |
|
"grad_norm": 24.200865372247023, |
|
"learning_rate": 3.48203751140067e-07, |
|
"logits/chosen": -0.9612107276916504, |
|
"logits/rejected": -0.4138285517692566, |
|
"logps/chosen": -374.7398376464844, |
|
"logps/rejected": -386.4855041503906, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9848228693008423, |
|
"rewards/margins": 0.5350502133369446, |
|
"rewards/rejected": -1.5198729038238525, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 21.201969213616838, |
|
"learning_rate": 3.2953144712759537e-07, |
|
"logits/chosen": -1.3318579196929932, |
|
"logits/rejected": -0.688397228717804, |
|
"logps/chosen": -338.3636169433594, |
|
"logps/rejected": -369.132080078125, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7002092599868774, |
|
"rewards/margins": 0.7023059725761414, |
|
"rewards/rejected": -1.402515172958374, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"eval_logits/chosen": -1.0623676776885986, |
|
"eval_logits/rejected": -0.09592445194721222, |
|
"eval_logps/chosen": -338.33111572265625, |
|
"eval_logps/rejected": -371.40704345703125, |
|
"eval_loss": 0.5325908660888672, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -0.627271831035614, |
|
"eval_rewards/margins": 0.8960375785827637, |
|
"eval_rewards/rejected": -1.523309350013733, |
|
"eval_runtime": 90.9273, |
|
"eval_samples_per_second": 20.137, |
|
"eval_steps_per_second": 0.319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 26.3766221546479, |
|
"learning_rate": 3.103511916141658e-07, |
|
"logits/chosen": -0.7819164991378784, |
|
"logits/rejected": -0.08627365529537201, |
|
"logps/chosen": -337.33160400390625, |
|
"logps/rejected": -386.0670471191406, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7387806177139282, |
|
"rewards/margins": 0.6690900921821594, |
|
"rewards/rejected": -1.4078707695007324, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5028571428571429, |
|
"grad_norm": 25.508902398231484, |
|
"learning_rate": 2.9078548506882117e-07, |
|
"logits/chosen": -0.4892755448818207, |
|
"logits/rejected": 0.36961695551872253, |
|
"logps/chosen": -375.30145263671875, |
|
"logps/rejected": -400.32037353515625, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.90235435962677, |
|
"rewards/margins": 0.6390146613121033, |
|
"rewards/rejected": -1.5413691997528076, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5257142857142857, |
|
"grad_norm": 20.815219094385377, |
|
"learning_rate": 2.709592897595191e-07, |
|
"logits/chosen": -0.5455812215805054, |
|
"logits/rejected": 0.40110301971435547, |
|
"logps/chosen": -356.85198974609375, |
|
"logps/rejected": -377.463623046875, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7579285502433777, |
|
"rewards/margins": 0.6238406896591187, |
|
"rewards/rejected": -1.3817692995071411, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 29.165147607391557, |
|
"learning_rate": 2.509992316440332e-07, |
|
"logits/chosen": -0.44089436531066895, |
|
"logits/rejected": 0.5840796828269958, |
|
"logps/chosen": -384.84747314453125, |
|
"logps/rejected": -446.173828125, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8613764047622681, |
|
"rewards/margins": 0.8861438632011414, |
|
"rewards/rejected": -1.7475202083587646, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 21.600370107234998, |
|
"learning_rate": 2.3103279163519918e-07, |
|
"logits/chosen": -0.7112084031105042, |
|
"logits/rejected": -0.16417662799358368, |
|
"logps/chosen": -350.6177978515625, |
|
"logps/rejected": -410.4039001464844, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.69991534948349, |
|
"rewards/margins": 0.7021188735961914, |
|
"rewards/rejected": -1.4020342826843262, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5942857142857143, |
|
"grad_norm": 25.092625042887974, |
|
"learning_rate": 2.1118749140573358e-07, |
|
"logits/chosen": -0.9435871243476868, |
|
"logits/rejected": -0.3378845751285553, |
|
"logps/chosen": -350.99188232421875, |
|
"logps/rejected": -403.04901123046875, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7120517492294312, |
|
"rewards/margins": 0.5850083231925964, |
|
"rewards/rejected": -1.297060251235962, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6171428571428571, |
|
"grad_norm": 27.282048688910123, |
|
"learning_rate": 1.9159007893272703e-07, |
|
"logits/chosen": -0.04894972965121269, |
|
"logits/rejected": 1.124455213546753, |
|
"logps/chosen": -359.29815673828125, |
|
"logps/rejected": -393.60260009765625, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8677228689193726, |
|
"rewards/margins": 0.7632287740707397, |
|
"rewards/rejected": -1.6309516429901123, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 28.952729734552342, |
|
"learning_rate": 1.7236571898357766e-07, |
|
"logits/chosen": 0.5159433484077454, |
|
"logits/rejected": 1.3371174335479736, |
|
"logps/chosen": -371.30694580078125, |
|
"logps/rejected": -441.8828125, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9826878309249878, |
|
"rewards/margins": 0.8050721287727356, |
|
"rewards/rejected": -1.7877601385116577, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6628571428571428, |
|
"grad_norm": 26.553591107110048, |
|
"learning_rate": 1.5363719371356882e-07, |
|
"logits/chosen": 0.31891578435897827, |
|
"logits/rejected": 1.1744709014892578, |
|
"logps/chosen": -396.26849365234375, |
|
"logps/rejected": -436.003173828125, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9536263346672058, |
|
"rewards/margins": 0.7509206533432007, |
|
"rewards/rejected": -1.7045469284057617, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 21.68374961944068, |
|
"learning_rate": 1.3552411848071565e-07, |
|
"logits/chosen": -0.4674099385738373, |
|
"logits/rejected": 0.9225466847419739, |
|
"logps/chosen": -378.4803771972656, |
|
"logps/rejected": -411.07000732421875, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7657118439674377, |
|
"rewards/margins": 0.8185675740242004, |
|
"rewards/rejected": -1.5842792987823486, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"eval_logits/chosen": -0.7676966190338135, |
|
"eval_logits/rejected": 0.6739733219146729, |
|
"eval_logps/chosen": -332.8910827636719, |
|
"eval_logps/rejected": -368.96173095703125, |
|
"eval_loss": 0.524158775806427, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -0.5728713274002075, |
|
"eval_rewards/margins": 0.9259848594665527, |
|
"eval_rewards/rejected": -1.4988560676574707, |
|
"eval_runtime": 90.1222, |
|
"eval_samples_per_second": 20.317, |
|
"eval_steps_per_second": 0.322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7085714285714285, |
|
"grad_norm": 21.32729150729434, |
|
"learning_rate": 1.1814217788631473e-07, |
|
"logits/chosen": -0.6603255271911621, |
|
"logits/rejected": 0.27117711305618286, |
|
"logps/chosen": -326.17193603515625, |
|
"logps/rejected": -374.136474609375, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6986777782440186, |
|
"rewards/margins": 0.6707334518432617, |
|
"rewards/rejected": -1.3694112300872803, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 26.8635604286969, |
|
"learning_rate": 1.0160238692045331e-07, |
|
"logits/chosen": -0.23095369338989258, |
|
"logits/rejected": 0.5602467656135559, |
|
"logps/chosen": -329.86492919921875, |
|
"logps/rejected": -387.0187683105469, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8558257222175598, |
|
"rewards/margins": 0.6044800281524658, |
|
"rewards/rejected": -1.4603056907653809, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7542857142857143, |
|
"grad_norm": 24.235988068898312, |
|
"learning_rate": 8.601038193139438e-08, |
|
"logits/chosen": -0.3879459798336029, |
|
"logits/rejected": 0.737913966178894, |
|
"logps/chosen": -381.5906677246094, |
|
"logps/rejected": -407.6598815917969, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.823724627494812, |
|
"rewards/margins": 0.7749950289726257, |
|
"rewards/rejected": -1.5987197160720825, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7771428571428571, |
|
"grad_norm": 19.828650597594493, |
|
"learning_rate": 7.146574594727572e-08, |
|
"logits/chosen": 0.20474159717559814, |
|
"logits/rejected": 1.0162971019744873, |
|
"logps/chosen": -362.6044921875, |
|
"logps/rejected": -421.4503479003906, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9665349721908569, |
|
"rewards/margins": 0.8277307748794556, |
|
"rewards/rejected": -1.7942657470703125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 22.77041280299393, |
|
"learning_rate": 5.8061372659157306e-08, |
|
"logits/chosen": -0.23095539212226868, |
|
"logits/rejected": 0.8589683771133423, |
|
"logps/chosen": -380.7283630371094, |
|
"logps/rejected": -412.0213317871094, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9102112054824829, |
|
"rewards/margins": 0.667026162147522, |
|
"rewards/rejected": -1.5772373676300049, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 24.963575760417292, |
|
"learning_rate": 4.5882873127531614e-08, |
|
"logits/chosen": -0.4319024682044983, |
|
"logits/rejected": 0.861344039440155, |
|
"logps/chosen": -366.2247314453125, |
|
"logps/rejected": -415.48028564453125, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8081969022750854, |
|
"rewards/margins": 0.8357402086257935, |
|
"rewards/rejected": -1.643937110900879, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8457142857142858, |
|
"grad_norm": 22.865584640185784, |
|
"learning_rate": 3.500802900154412e-08, |
|
"logits/chosen": -0.290349543094635, |
|
"logits/rejected": 1.0458359718322754, |
|
"logps/chosen": -349.37005615234375, |
|
"logps/rejected": -402.3358459472656, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7830866575241089, |
|
"rewards/margins": 0.856115996837616, |
|
"rewards/rejected": -1.6392027139663696, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8685714285714285, |
|
"grad_norm": 25.875193424699745, |
|
"learning_rate": 2.550629574310309e-08, |
|
"logits/chosen": -0.4227335453033447, |
|
"logits/rejected": 0.8518049120903015, |
|
"logps/chosen": -414.580322265625, |
|
"logps/rejected": -417.60418701171875, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9652398824691772, |
|
"rewards/margins": 0.6368075609207153, |
|
"rewards/rejected": -1.602047324180603, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8914285714285715, |
|
"grad_norm": 23.202834331091832, |
|
"learning_rate": 1.7438359028687983e-08, |
|
"logits/chosen": -0.143943652510643, |
|
"logits/rejected": 0.5985423922538757, |
|
"logps/chosen": -392.14068603515625, |
|
"logps/rejected": -445.93560791015625, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8076774477958679, |
|
"rewards/margins": 0.7039517760276794, |
|
"rewards/rejected": -1.5116291046142578, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 31.25928182468869, |
|
"learning_rate": 1.0855747162029361e-08, |
|
"logits/chosen": 0.005290505476295948, |
|
"logits/rejected": 0.5700523257255554, |
|
"logps/chosen": -371.91815185546875, |
|
"logps/rejected": -422.913818359375, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9318493604660034, |
|
"rewards/margins": 0.6315523982048035, |
|
"rewards/rejected": -1.563401699066162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"eval_logits/chosen": -0.012980658560991287, |
|
"eval_logits/rejected": 1.572424292564392, |
|
"eval_logps/chosen": -357.8257751464844, |
|
"eval_logps/rejected": -398.6412353515625, |
|
"eval_loss": 0.5201366543769836, |
|
"eval_rewards/accuracies": 0.7758620977401733, |
|
"eval_rewards/chosen": -0.8222182989120483, |
|
"eval_rewards/margins": 0.9734326004981995, |
|
"eval_rewards/rejected": -1.795650839805603, |
|
"eval_runtime": 90.9384, |
|
"eval_samples_per_second": 20.135, |
|
"eval_steps_per_second": 0.319, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9371428571428572, |
|
"grad_norm": 24.160283510679385, |
|
"learning_rate": 5.8005019731033615e-09, |
|
"logits/chosen": -0.10076072067022324, |
|
"logits/rejected": 0.8628407716751099, |
|
"logps/chosen": -385.8863525390625, |
|
"logps/rejected": -425.5079040527344, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.977064311504364, |
|
"rewards/margins": 0.6807142496109009, |
|
"rewards/rejected": -1.6577785015106201, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.949347011437847, |
|
"learning_rate": 2.3049103053431886e-09, |
|
"logits/chosen": -0.21599116921424866, |
|
"logits/rejected": 1.2091736793518066, |
|
"logps/chosen": -353.96514892578125, |
|
"logps/rejected": -403.6047058105469, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7121706604957581, |
|
"rewards/margins": 0.9840081930160522, |
|
"rewards/rejected": -1.696178674697876, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9828571428571429, |
|
"grad_norm": 23.473520497590073, |
|
"learning_rate": 3.9129780600541397e-10, |
|
"logits/chosen": 0.03587682545185089, |
|
"logits/rejected": 1.0841922760009766, |
|
"logps/chosen": -372.0699768066406, |
|
"logps/rejected": -431.086181640625, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8983810544013977, |
|
"rewards/margins": 0.775784432888031, |
|
"rewards/rejected": -1.6741654872894287, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9988571428571429, |
|
"step": 437, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5608050315822016, |
|
"train_runtime": 10950.5403, |
|
"train_samples_per_second": 5.114, |
|
"train_steps_per_second": 0.04 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|