|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 40000000000000000, |
|
"global_step": 522, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009578544061302681, |
|
"grad_norm": 19.977911424248134, |
|
"learning_rate": 4.777070063694267e-09, |
|
"logits/chosen": 0.9882557988166809, |
|
"logits/rejected": 0.86724853515625, |
|
"logps/chosen": -0.6835294961929321, |
|
"logps/rejected": -0.7026089429855347, |
|
"loss": 5.255, |
|
"nll_loss": 0.6835293769836426, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.835294246673584, |
|
"rewards/margins": 0.19079475104808807, |
|
"rewards/rejected": -7.026089668273926, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019157088122605363, |
|
"grad_norm": 15.532794462607491, |
|
"learning_rate": 9.554140127388535e-09, |
|
"logits/chosen": 0.8198736906051636, |
|
"logits/rejected": 0.7917336225509644, |
|
"logps/chosen": -0.7847039103507996, |
|
"logps/rejected": -0.7573590278625488, |
|
"loss": 5.3866, |
|
"nll_loss": 0.7847039103507996, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.847039222717285, |
|
"rewards/margins": -0.2734483778476715, |
|
"rewards/rejected": -7.573590278625488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.028735632183908046, |
|
"grad_norm": 17.18263537036945, |
|
"learning_rate": 1.4331210191082803e-08, |
|
"logits/chosen": 0.6409434080123901, |
|
"logits/rejected": 0.8134855031967163, |
|
"logps/chosen": -0.7896786332130432, |
|
"logps/rejected": -0.7245721220970154, |
|
"loss": 5.1665, |
|
"nll_loss": 0.7896786332130432, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -7.896786689758301, |
|
"rewards/margins": -0.6510653495788574, |
|
"rewards/rejected": -7.245721340179443, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.038314176245210725, |
|
"grad_norm": 14.59673969815782, |
|
"learning_rate": 1.910828025477707e-08, |
|
"logits/chosen": 0.7096026539802551, |
|
"logits/rejected": 0.8205940127372742, |
|
"logps/chosen": -0.7373861074447632, |
|
"logps/rejected": -0.7078397870063782, |
|
"loss": 5.229, |
|
"nll_loss": 0.7373861074447632, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -7.373861789703369, |
|
"rewards/margins": -0.29546356201171875, |
|
"rewards/rejected": -7.07839822769165, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04789272030651341, |
|
"grad_norm": 15.05129246603728, |
|
"learning_rate": 2.3885350318471336e-08, |
|
"logits/chosen": 0.9928520321846008, |
|
"logits/rejected": 0.9827763438224792, |
|
"logps/chosen": -0.5954749584197998, |
|
"logps/rejected": -0.654929518699646, |
|
"loss": 5.4048, |
|
"nll_loss": 0.595474898815155, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.954749584197998, |
|
"rewards/margins": 0.5945457220077515, |
|
"rewards/rejected": -6.549294948577881, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05747126436781609, |
|
"grad_norm": 20.985831817061353, |
|
"learning_rate": 2.8662420382165606e-08, |
|
"logits/chosen": 0.7672659158706665, |
|
"logits/rejected": 0.8577300310134888, |
|
"logps/chosen": -0.6465980410575867, |
|
"logps/rejected": -0.7104107737541199, |
|
"loss": 5.2394, |
|
"nll_loss": 0.6465979814529419, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.465980529785156, |
|
"rewards/margins": 0.6381272077560425, |
|
"rewards/rejected": -7.104107856750488, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06704980842911877, |
|
"grad_norm": 16.252866127879475, |
|
"learning_rate": 3.343949044585987e-08, |
|
"logits/chosen": 0.8599953651428223, |
|
"logits/rejected": 0.8298212289810181, |
|
"logps/chosen": -0.7009156346321106, |
|
"logps/rejected": -0.7079470157623291, |
|
"loss": 5.3018, |
|
"nll_loss": 0.700915515422821, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.009156227111816, |
|
"rewards/margins": 0.07031383365392685, |
|
"rewards/rejected": -7.079470157623291, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07662835249042145, |
|
"grad_norm": 16.99436823968821, |
|
"learning_rate": 3.821656050955414e-08, |
|
"logits/chosen": 0.6351531744003296, |
|
"logits/rejected": 0.7295518517494202, |
|
"logps/chosen": -0.7221059799194336, |
|
"logps/rejected": -0.7749537825584412, |
|
"loss": 5.2718, |
|
"nll_loss": 0.722105860710144, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.221059322357178, |
|
"rewards/margins": 0.5284790992736816, |
|
"rewards/rejected": -7.749538421630859, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08620689655172414, |
|
"grad_norm": 16.474791128586975, |
|
"learning_rate": 4.29936305732484e-08, |
|
"logits/chosen": 0.9291857481002808, |
|
"logits/rejected": 0.83441162109375, |
|
"logps/chosen": -0.6729990243911743, |
|
"logps/rejected": -0.7266682386398315, |
|
"loss": 5.2957, |
|
"nll_loss": 0.6729990243911743, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.729989528656006, |
|
"rewards/margins": 0.5366925001144409, |
|
"rewards/rejected": -7.2666826248168945, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09578544061302682, |
|
"grad_norm": 17.51495070571998, |
|
"learning_rate": 4.777070063694267e-08, |
|
"logits/chosen": 0.776635468006134, |
|
"logits/rejected": 0.845638632774353, |
|
"logps/chosen": -0.7210168838500977, |
|
"logps/rejected": -0.6794952154159546, |
|
"loss": 5.2687, |
|
"nll_loss": 0.7210168838500977, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.210168361663818, |
|
"rewards/margins": -0.41521579027175903, |
|
"rewards/rejected": -6.794952392578125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1053639846743295, |
|
"grad_norm": 15.080710084896578, |
|
"learning_rate": 5.2547770700636935e-08, |
|
"logits/chosen": 0.8091619610786438, |
|
"logits/rejected": 0.8544187545776367, |
|
"logps/chosen": -0.7046722769737244, |
|
"logps/rejected": -0.703011691570282, |
|
"loss": 5.276, |
|
"nll_loss": 0.7046722769737244, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.046723365783691, |
|
"rewards/margins": -0.01660604402422905, |
|
"rewards/rejected": -7.030117034912109, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11494252873563218, |
|
"grad_norm": 22.50777831402302, |
|
"learning_rate": 5.732484076433121e-08, |
|
"logits/chosen": 0.8281611204147339, |
|
"logits/rejected": 0.8864553570747375, |
|
"logps/chosen": -0.6377026438713074, |
|
"logps/rejected": -0.7030226588249207, |
|
"loss": 5.2442, |
|
"nll_loss": 0.6377025842666626, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.377026557922363, |
|
"rewards/margins": 0.6532003283500671, |
|
"rewards/rejected": -7.030226707458496, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12452107279693486, |
|
"grad_norm": 19.79864592347249, |
|
"learning_rate": 6.210191082802548e-08, |
|
"logits/chosen": 1.0290864706039429, |
|
"logits/rejected": 0.7254050374031067, |
|
"logps/chosen": -0.6748142838478088, |
|
"logps/rejected": -0.6928014755249023, |
|
"loss": 5.3121, |
|
"nll_loss": 0.6748142838478088, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.748143196105957, |
|
"rewards/margins": 0.1798720359802246, |
|
"rewards/rejected": -6.928015232086182, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13409961685823754, |
|
"grad_norm": 15.639368843476637, |
|
"learning_rate": 6.687898089171974e-08, |
|
"logits/chosen": 0.9622044563293457, |
|
"logits/rejected": 0.9868735074996948, |
|
"logps/chosen": -0.7220475077629089, |
|
"logps/rejected": -0.8194485902786255, |
|
"loss": 5.2694, |
|
"nll_loss": 0.7220475077629089, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.220475196838379, |
|
"rewards/margins": 0.9740108251571655, |
|
"rewards/rejected": -8.194485664367676, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14367816091954022, |
|
"grad_norm": 15.338789064769195, |
|
"learning_rate": 7.165605095541401e-08, |
|
"logits/chosen": 0.8242565989494324, |
|
"logits/rejected": 0.8403714299201965, |
|
"logps/chosen": -0.8075603246688843, |
|
"logps/rejected": -0.7985510230064392, |
|
"loss": 5.307, |
|
"nll_loss": 0.8075603246688843, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.075602531433105, |
|
"rewards/margins": -0.09009275585412979, |
|
"rewards/rejected": -7.985510349273682, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1532567049808429, |
|
"grad_norm": 16.558919395561837, |
|
"learning_rate": 7.643312101910828e-08, |
|
"logits/chosen": 0.852637767791748, |
|
"logits/rejected": 0.6942145228385925, |
|
"logps/chosen": -0.6923746466636658, |
|
"logps/rejected": -0.748778760433197, |
|
"loss": 5.2346, |
|
"nll_loss": 0.6923746466636658, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.923746585845947, |
|
"rewards/margins": 0.564041256904602, |
|
"rewards/rejected": -7.48778772354126, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16283524904214558, |
|
"grad_norm": 14.992266742573277, |
|
"learning_rate": 8.121019108280254e-08, |
|
"logits/chosen": 0.8808382749557495, |
|
"logits/rejected": 0.9680411219596863, |
|
"logps/chosen": -0.6730803847312927, |
|
"logps/rejected": -0.8849495649337769, |
|
"loss": 5.2452, |
|
"nll_loss": 0.6730804443359375, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.730803489685059, |
|
"rewards/margins": 2.118691921234131, |
|
"rewards/rejected": -8.849495887756348, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 14.696295604697424, |
|
"learning_rate": 8.59872611464968e-08, |
|
"logits/chosen": 0.6036122441291809, |
|
"logits/rejected": 0.6832916140556335, |
|
"logps/chosen": -0.7643388509750366, |
|
"logps/rejected": -0.7602866888046265, |
|
"loss": 5.3007, |
|
"nll_loss": 0.7643388509750366, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.643387794494629, |
|
"rewards/margins": -0.040520571172237396, |
|
"rewards/rejected": -7.602867126464844, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18199233716475097, |
|
"grad_norm": 14.017222797723742, |
|
"learning_rate": 9.076433121019108e-08, |
|
"logits/chosen": 0.9275333285331726, |
|
"logits/rejected": 0.7763484120368958, |
|
"logps/chosen": -0.7543958425521851, |
|
"logps/rejected": -0.7292922139167786, |
|
"loss": 5.2406, |
|
"nll_loss": 0.7543958425521851, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.5439581871032715, |
|
"rewards/margins": -0.25103625655174255, |
|
"rewards/rejected": -7.292922019958496, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19157088122605365, |
|
"grad_norm": 14.22025157094158, |
|
"learning_rate": 9.554140127388534e-08, |
|
"logits/chosen": 0.9820090532302856, |
|
"logits/rejected": 0.8248282670974731, |
|
"logps/chosen": -0.6709014177322388, |
|
"logps/rejected": -0.7284021973609924, |
|
"loss": 5.461, |
|
"nll_loss": 0.6709014177322388, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.709013938903809, |
|
"rewards/margins": 0.5750080943107605, |
|
"rewards/rejected": -7.284021854400635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20114942528735633, |
|
"grad_norm": 15.684437324553063, |
|
"learning_rate": 1.0031847133757961e-07, |
|
"logits/chosen": 0.8076725006103516, |
|
"logits/rejected": 0.8857797384262085, |
|
"logps/chosen": -0.6815362572669983, |
|
"logps/rejected": -0.6763182878494263, |
|
"loss": 5.3235, |
|
"nll_loss": 0.6815363168716431, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.815362453460693, |
|
"rewards/margins": -0.05218010023236275, |
|
"rewards/rejected": -6.763182163238525, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.210727969348659, |
|
"grad_norm": 24.773788939839015, |
|
"learning_rate": 1.0509554140127387e-07, |
|
"logits/chosen": 0.7703748941421509, |
|
"logits/rejected": 0.747571587562561, |
|
"logps/chosen": -0.6411559581756592, |
|
"logps/rejected": -0.6272796392440796, |
|
"loss": 5.1527, |
|
"nll_loss": 0.6411559581756592, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -6.411559104919434, |
|
"rewards/margins": -0.13876314461231232, |
|
"rewards/rejected": -6.272796630859375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22030651340996169, |
|
"grad_norm": 15.503433446190241, |
|
"learning_rate": 1.0987261146496813e-07, |
|
"logits/chosen": 0.7363082766532898, |
|
"logits/rejected": 0.7384678721427917, |
|
"logps/chosen": -0.7916821241378784, |
|
"logps/rejected": -0.8139774203300476, |
|
"loss": 5.2975, |
|
"nll_loss": 0.7916821241378784, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.916821479797363, |
|
"rewards/margins": 0.2229524552822113, |
|
"rewards/rejected": -8.139774322509766, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22988505747126436, |
|
"grad_norm": 14.405865015988635, |
|
"learning_rate": 1.1464968152866242e-07, |
|
"logits/chosen": 0.988644003868103, |
|
"logits/rejected": 0.6871722936630249, |
|
"logps/chosen": -0.8117585182189941, |
|
"logps/rejected": -0.7687948942184448, |
|
"loss": 5.2603, |
|
"nll_loss": 0.8117585182189941, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -8.117586135864258, |
|
"rewards/margins": -0.42963656783103943, |
|
"rewards/rejected": -7.687948703765869, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23946360153256704, |
|
"grad_norm": 15.441159671033866, |
|
"learning_rate": 1.194267515923567e-07, |
|
"logits/chosen": 0.9181255102157593, |
|
"logits/rejected": 1.0879169702529907, |
|
"logps/chosen": -0.7296438813209534, |
|
"logps/rejected": -0.7042439579963684, |
|
"loss": 5.3496, |
|
"nll_loss": 0.7296438217163086, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.296438694000244, |
|
"rewards/margins": -0.25399884581565857, |
|
"rewards/rejected": -7.0424394607543945, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24904214559386972, |
|
"grad_norm": 14.525160680247224, |
|
"learning_rate": 1.2420382165605095e-07, |
|
"logits/chosen": 1.0101947784423828, |
|
"logits/rejected": 0.9281114339828491, |
|
"logps/chosen": -0.6179158091545105, |
|
"logps/rejected": -0.5991578102111816, |
|
"loss": 5.349, |
|
"nll_loss": 0.6179158091545105, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -6.1791582107543945, |
|
"rewards/margins": -0.1875801980495453, |
|
"rewards/rejected": -5.991578102111816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25862068965517243, |
|
"grad_norm": 14.83309451045206, |
|
"learning_rate": 1.2898089171974521e-07, |
|
"logits/chosen": 0.6288986802101135, |
|
"logits/rejected": 0.8249040842056274, |
|
"logps/chosen": -0.671284019947052, |
|
"logps/rejected": -0.7514477968215942, |
|
"loss": 5.2492, |
|
"nll_loss": 0.671284019947052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.7128400802612305, |
|
"rewards/margins": 0.801638126373291, |
|
"rewards/rejected": -7.5144782066345215, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2681992337164751, |
|
"grad_norm": 15.778143495292198, |
|
"learning_rate": 1.3375796178343948e-07, |
|
"logits/chosen": 0.979997992515564, |
|
"logits/rejected": 0.8755594491958618, |
|
"logps/chosen": -0.5514487028121948, |
|
"logps/rejected": -0.675363302230835, |
|
"loss": 5.3335, |
|
"nll_loss": 0.5514487028121948, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.514487266540527, |
|
"rewards/margins": 1.2391456365585327, |
|
"rewards/rejected": -6.75363302230835, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 18.462671529719298, |
|
"learning_rate": 1.3853503184713377e-07, |
|
"logits/chosen": 0.8637372851371765, |
|
"logits/rejected": 0.9276704788208008, |
|
"logps/chosen": -0.7762002944946289, |
|
"logps/rejected": -1.0306679010391235, |
|
"loss": 5.2209, |
|
"nll_loss": 0.7762002348899841, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.762002468109131, |
|
"rewards/margins": 2.5446763038635254, |
|
"rewards/rejected": -10.306678771972656, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"grad_norm": 14.528423213748091, |
|
"learning_rate": 1.4331210191082803e-07, |
|
"logits/chosen": 0.9764812588691711, |
|
"logits/rejected": 1.0769740343093872, |
|
"logps/chosen": -0.7632086277008057, |
|
"logps/rejected": -0.7627191543579102, |
|
"loss": 5.1848, |
|
"nll_loss": 0.7632086277008057, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.632086277008057, |
|
"rewards/margins": -0.004894733428955078, |
|
"rewards/rejected": -7.627191066741943, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29693486590038315, |
|
"grad_norm": 14.670904215756279, |
|
"learning_rate": 1.480891719745223e-07, |
|
"logits/chosen": 0.7151988744735718, |
|
"logits/rejected": 0.8136464953422546, |
|
"logps/chosen": -0.8410874605178833, |
|
"logps/rejected": -0.6722872853279114, |
|
"loss": 5.322, |
|
"nll_loss": 0.8410874605178833, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -8.410874366760254, |
|
"rewards/margins": -1.688001036643982, |
|
"rewards/rejected": -6.722872734069824, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3065134099616858, |
|
"grad_norm": 16.96923463521279, |
|
"learning_rate": 1.5286624203821656e-07, |
|
"logits/chosen": 1.0444055795669556, |
|
"logits/rejected": 0.8398516774177551, |
|
"logps/chosen": -0.7191808819770813, |
|
"logps/rejected": -0.7984825372695923, |
|
"loss": 5.0657, |
|
"nll_loss": 0.7191808819770813, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.191809177398682, |
|
"rewards/margins": 0.7930164337158203, |
|
"rewards/rejected": -7.98482608795166, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3160919540229885, |
|
"grad_norm": 14.547279963040138, |
|
"learning_rate": 1.5764331210191082e-07, |
|
"logits/chosen": 0.694339394569397, |
|
"logits/rejected": 0.7881234884262085, |
|
"logps/chosen": -0.6559264063835144, |
|
"logps/rejected": -0.6876171827316284, |
|
"loss": 5.3857, |
|
"nll_loss": 0.6559264063835144, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.559264183044434, |
|
"rewards/margins": 0.3169073164463043, |
|
"rewards/rejected": -6.876172065734863, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.32567049808429116, |
|
"grad_norm": 15.883172546108769, |
|
"learning_rate": 1.6242038216560508e-07, |
|
"logits/chosen": 0.9939007759094238, |
|
"logits/rejected": 0.9327294230461121, |
|
"logps/chosen": -0.514694094657898, |
|
"logps/rejected": -0.6553934812545776, |
|
"loss": 5.1601, |
|
"nll_loss": 0.514694094657898, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.146941184997559, |
|
"rewards/margins": 1.4069939851760864, |
|
"rewards/rejected": -6.5539350509643555, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33524904214559387, |
|
"grad_norm": 14.974676446039048, |
|
"learning_rate": 1.6719745222929935e-07, |
|
"logits/chosen": 0.8872886896133423, |
|
"logits/rejected": 0.8021435737609863, |
|
"logps/chosen": -0.7085338830947876, |
|
"logps/rejected": -0.7116855382919312, |
|
"loss": 5.3339, |
|
"nll_loss": 0.7085338830947876, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.085339546203613, |
|
"rewards/margins": 0.031516265124082565, |
|
"rewards/rejected": -7.116854667663574, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 17.49142572813654, |
|
"learning_rate": 1.719745222929936e-07, |
|
"logits/chosen": 0.7162724733352661, |
|
"logits/rejected": 0.6837285757064819, |
|
"logps/chosen": -0.8341668844223022, |
|
"logps/rejected": -0.7984797358512878, |
|
"loss": 5.211, |
|
"nll_loss": 0.8341668248176575, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.341669082641602, |
|
"rewards/margins": -0.35687151551246643, |
|
"rewards/rejected": -7.984797477722168, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3544061302681992, |
|
"grad_norm": 15.695305726655395, |
|
"learning_rate": 1.7675159235668787e-07, |
|
"logits/chosen": 0.9962735176086426, |
|
"logits/rejected": 0.8522371053695679, |
|
"logps/chosen": -0.824070930480957, |
|
"logps/rejected": -0.7453502416610718, |
|
"loss": 5.1938, |
|
"nll_loss": 0.824070930480957, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.24070930480957, |
|
"rewards/margins": -0.7872062921524048, |
|
"rewards/rejected": -7.453502655029297, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.36398467432950193, |
|
"grad_norm": 15.933522154415847, |
|
"learning_rate": 1.8152866242038216e-07, |
|
"logits/chosen": 0.7882771492004395, |
|
"logits/rejected": 0.7340660691261292, |
|
"logps/chosen": -0.6636666059494019, |
|
"logps/rejected": -0.7198070287704468, |
|
"loss": 5.3231, |
|
"nll_loss": 0.6636666655540466, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.636666774749756, |
|
"rewards/margins": 0.5614040493965149, |
|
"rewards/rejected": -7.198070526123047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3735632183908046, |
|
"grad_norm": 14.263964254379374, |
|
"learning_rate": 1.8630573248407643e-07, |
|
"logits/chosen": 0.6639770269393921, |
|
"logits/rejected": 0.7358517050743103, |
|
"logps/chosen": -0.7163713574409485, |
|
"logps/rejected": -0.7314590215682983, |
|
"loss": 5.2454, |
|
"nll_loss": 0.7163712978363037, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.1637139320373535, |
|
"rewards/margins": 0.1508767157793045, |
|
"rewards/rejected": -7.3145904541015625, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3831417624521073, |
|
"grad_norm": 15.417127684289229, |
|
"learning_rate": 1.910828025477707e-07, |
|
"logits/chosen": 0.6845916509628296, |
|
"logits/rejected": 0.8717025518417358, |
|
"logps/chosen": -0.7460684776306152, |
|
"logps/rejected": -0.8934140205383301, |
|
"loss": 5.3951, |
|
"nll_loss": 0.7460684776306152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.460684776306152, |
|
"rewards/margins": 1.4734549522399902, |
|
"rewards/rejected": -8.9341402053833, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39272030651340994, |
|
"grad_norm": 15.29361256055114, |
|
"learning_rate": 1.9585987261146495e-07, |
|
"logits/chosen": 0.8160010576248169, |
|
"logits/rejected": 0.7316077351570129, |
|
"logps/chosen": -0.8079813122749329, |
|
"logps/rejected": -0.7992033958435059, |
|
"loss": 5.1982, |
|
"nll_loss": 0.8079813122749329, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.079813003540039, |
|
"rewards/margins": -0.08777885138988495, |
|
"rewards/rejected": -7.992033958435059, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.40229885057471265, |
|
"grad_norm": 18.735791443990614, |
|
"learning_rate": 2.0063694267515922e-07, |
|
"logits/chosen": 0.8831043243408203, |
|
"logits/rejected": 0.9638816118240356, |
|
"logps/chosen": -0.8348730802536011, |
|
"logps/rejected": -0.7787247896194458, |
|
"loss": 5.2804, |
|
"nll_loss": 0.8348730802536011, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -8.348731994628906, |
|
"rewards/margins": -0.5614833235740662, |
|
"rewards/rejected": -7.787248134613037, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4118773946360153, |
|
"grad_norm": 15.020231482587125, |
|
"learning_rate": 2.0541401273885348e-07, |
|
"logits/chosen": 0.8089305758476257, |
|
"logits/rejected": 0.7801268696784973, |
|
"logps/chosen": -0.9548230171203613, |
|
"logps/rejected": -0.8521941304206848, |
|
"loss": 5.4231, |
|
"nll_loss": 0.9548231363296509, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -9.548230171203613, |
|
"rewards/margins": -1.0262889862060547, |
|
"rewards/rejected": -8.521940231323242, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.421455938697318, |
|
"grad_norm": 16.66122418369343, |
|
"learning_rate": 2.1019108280254774e-07, |
|
"logits/chosen": 0.8265215754508972, |
|
"logits/rejected": 0.7605193257331848, |
|
"logps/chosen": -0.6805425882339478, |
|
"logps/rejected": -0.7011183500289917, |
|
"loss": 5.2324, |
|
"nll_loss": 0.6805425882339478, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -6.80542516708374, |
|
"rewards/margins": 0.20575818419456482, |
|
"rewards/rejected": -7.011183261871338, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 15.84641449526937, |
|
"learning_rate": 2.14968152866242e-07, |
|
"logits/chosen": 0.9168558120727539, |
|
"logits/rejected": 0.8388528823852539, |
|
"logps/chosen": -0.8054409027099609, |
|
"logps/rejected": -0.7549694776535034, |
|
"loss": 5.2786, |
|
"nll_loss": 0.8054410219192505, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -8.054409980773926, |
|
"rewards/margins": -0.5047143697738647, |
|
"rewards/rejected": -7.549695014953613, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44061302681992337, |
|
"grad_norm": 15.381291199652642, |
|
"learning_rate": 2.1974522292993627e-07, |
|
"logits/chosen": 0.8924915194511414, |
|
"logits/rejected": 0.9192771911621094, |
|
"logps/chosen": -0.7253198623657227, |
|
"logps/rejected": -0.6863486766815186, |
|
"loss": 5.3113, |
|
"nll_loss": 0.7253197431564331, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.253198146820068, |
|
"rewards/margins": -0.38971146941185, |
|
"rewards/rejected": -6.863486289978027, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4501915708812261, |
|
"grad_norm": 22.207135774874516, |
|
"learning_rate": 2.2452229299363056e-07, |
|
"logits/chosen": 0.6804489493370056, |
|
"logits/rejected": 0.8135985136032104, |
|
"logps/chosen": -0.7592498064041138, |
|
"logps/rejected": -0.8050843477249146, |
|
"loss": 5.306, |
|
"nll_loss": 0.7592498064041138, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.592497825622559, |
|
"rewards/margins": 0.4583454132080078, |
|
"rewards/rejected": -8.050843238830566, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"grad_norm": 15.418776278146453, |
|
"learning_rate": 2.2929936305732485e-07, |
|
"logits/chosen": 0.740727961063385, |
|
"logits/rejected": 0.7790459394454956, |
|
"logps/chosen": -0.6311203837394714, |
|
"logps/rejected": -0.7699800729751587, |
|
"loss": 5.3202, |
|
"nll_loss": 0.6311203837394714, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.311203956604004, |
|
"rewards/margins": 1.388596534729004, |
|
"rewards/rejected": -7.69980001449585, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.46934865900383144, |
|
"grad_norm": 18.062610898602617, |
|
"learning_rate": 2.340764331210191e-07, |
|
"logits/chosen": 0.7905587553977966, |
|
"logits/rejected": 0.70208740234375, |
|
"logps/chosen": -0.8448599576950073, |
|
"logps/rejected": -0.726888120174408, |
|
"loss": 5.2108, |
|
"nll_loss": 0.8448599576950073, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -8.448599815368652, |
|
"rewards/margins": -1.179718255996704, |
|
"rewards/rejected": -7.268881320953369, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4789272030651341, |
|
"grad_norm": 20.317601991362707, |
|
"learning_rate": 2.388535031847134e-07, |
|
"logits/chosen": 0.8352301716804504, |
|
"logits/rejected": 0.7928398847579956, |
|
"logps/chosen": -0.7886701822280884, |
|
"logps/rejected": -0.7603856325149536, |
|
"loss": 5.3794, |
|
"nll_loss": 0.7886701822280884, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.886702060699463, |
|
"rewards/margins": -0.28284597396850586, |
|
"rewards/rejected": -7.603856086730957, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4885057471264368, |
|
"grad_norm": 17.92061038766115, |
|
"learning_rate": 2.4363057324840764e-07, |
|
"logits/chosen": 0.7246678471565247, |
|
"logits/rejected": 0.8966231346130371, |
|
"logps/chosen": -0.8691000938415527, |
|
"logps/rejected": -0.7771162986755371, |
|
"loss": 5.2854, |
|
"nll_loss": 0.8691000938415527, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.691000938415527, |
|
"rewards/margins": -0.9198387861251831, |
|
"rewards/rejected": -7.771162509918213, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.49808429118773945, |
|
"grad_norm": 17.57639103239166, |
|
"learning_rate": 2.484076433121019e-07, |
|
"logits/chosen": 0.9574035406112671, |
|
"logits/rejected": 0.6729179620742798, |
|
"logps/chosen": -0.6743525266647339, |
|
"logps/rejected": -0.7344107031822205, |
|
"loss": 5.2276, |
|
"nll_loss": 0.6743525266647339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.743525505065918, |
|
"rewards/margins": 0.6005817651748657, |
|
"rewards/rejected": -7.344107151031494, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5076628352490421, |
|
"grad_norm": 21.283801923925182, |
|
"learning_rate": 2.5318471337579616e-07, |
|
"logits/chosen": 0.8346107602119446, |
|
"logits/rejected": 0.9647878408432007, |
|
"logps/chosen": -0.6870938539505005, |
|
"logps/rejected": -0.6139359474182129, |
|
"loss": 5.3469, |
|
"nll_loss": 0.6870938539505005, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -6.870938301086426, |
|
"rewards/margins": -0.7315785884857178, |
|
"rewards/rejected": -6.139359951019287, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 14.399379764504936, |
|
"learning_rate": 2.5796178343949043e-07, |
|
"logits/chosen": 0.864532470703125, |
|
"logits/rejected": 0.8542212247848511, |
|
"logps/chosen": -0.7113697528839111, |
|
"logps/rejected": -0.732496976852417, |
|
"loss": 5.177, |
|
"nll_loss": 0.7113697528839111, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.1136980056762695, |
|
"rewards/margins": 0.2112717628479004, |
|
"rewards/rejected": -7.324969291687012, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5268199233716475, |
|
"grad_norm": 14.537680856211797, |
|
"learning_rate": 2.627388535031847e-07, |
|
"logits/chosen": 0.9982441067695618, |
|
"logits/rejected": 0.8428419232368469, |
|
"logps/chosen": -0.687545120716095, |
|
"logps/rejected": -0.7962311506271362, |
|
"loss": 5.1719, |
|
"nll_loss": 0.687545120716095, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.87545108795166, |
|
"rewards/margins": 1.0868605375289917, |
|
"rewards/rejected": -7.9623122215271, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5363984674329502, |
|
"grad_norm": 19.6142347909319, |
|
"learning_rate": 2.6751592356687895e-07, |
|
"logits/chosen": 0.7479467988014221, |
|
"logits/rejected": 0.7518871426582336, |
|
"logps/chosen": -0.7905808687210083, |
|
"logps/rejected": -0.7879656553268433, |
|
"loss": 5.2129, |
|
"nll_loss": 0.7905808687210083, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.905808448791504, |
|
"rewards/margins": -0.02615184709429741, |
|
"rewards/rejected": -7.8796563148498535, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5459770114942529, |
|
"grad_norm": 15.380742906594616, |
|
"learning_rate": 2.722929936305732e-07, |
|
"logits/chosen": 0.6082831025123596, |
|
"logits/rejected": 0.6754466891288757, |
|
"logps/chosen": -0.866966724395752, |
|
"logps/rejected": -0.822973370552063, |
|
"loss": 5.2728, |
|
"nll_loss": 0.8669666051864624, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.66966724395752, |
|
"rewards/margins": -0.4399335980415344, |
|
"rewards/rejected": -8.229734420776367, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 16.49675270885239, |
|
"learning_rate": 2.7707006369426753e-07, |
|
"logits/chosen": 0.8076263666152954, |
|
"logits/rejected": 0.7774611711502075, |
|
"logps/chosen": -0.7558648586273193, |
|
"logps/rejected": -0.8593353033065796, |
|
"loss": 5.2718, |
|
"nll_loss": 0.7558648586273193, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.558648586273193, |
|
"rewards/margins": 1.0347039699554443, |
|
"rewards/rejected": -8.593353271484375, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5651340996168582, |
|
"grad_norm": 15.91951747554686, |
|
"learning_rate": 2.818471337579618e-07, |
|
"logits/chosen": 0.7341046333312988, |
|
"logits/rejected": 0.6761490106582642, |
|
"logps/chosen": -0.7117196321487427, |
|
"logps/rejected": -0.8302199244499207, |
|
"loss": 5.3494, |
|
"nll_loss": 0.7117196321487427, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.117195129394531, |
|
"rewards/margins": 1.1850030422210693, |
|
"rewards/rejected": -8.302199363708496, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"grad_norm": 17.296732181243744, |
|
"learning_rate": 2.8662420382165606e-07, |
|
"logits/chosen": 0.8246825933456421, |
|
"logits/rejected": 0.649739146232605, |
|
"logps/chosen": -0.823817253112793, |
|
"logps/rejected": -0.8983147740364075, |
|
"loss": 5.2106, |
|
"nll_loss": 0.8238171339035034, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.23817253112793, |
|
"rewards/margins": 0.7449753880500793, |
|
"rewards/rejected": -8.983147621154785, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5842911877394636, |
|
"grad_norm": 14.832912322402217, |
|
"learning_rate": 2.914012738853503e-07, |
|
"logits/chosen": 0.6315704584121704, |
|
"logits/rejected": 0.614464521408081, |
|
"logps/chosen": -0.8551123738288879, |
|
"logps/rejected": -0.7884758114814758, |
|
"loss": 5.2191, |
|
"nll_loss": 0.8551123738288879, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.55112361907959, |
|
"rewards/margins": -0.6663654446601868, |
|
"rewards/rejected": -7.884758949279785, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5938697318007663, |
|
"grad_norm": 22.263623540675233, |
|
"learning_rate": 2.961783439490446e-07, |
|
"logits/chosen": 0.8310597538948059, |
|
"logits/rejected": 0.7741198539733887, |
|
"logps/chosen": -0.769278883934021, |
|
"logps/rejected": -0.7818907499313354, |
|
"loss": 5.3222, |
|
"nll_loss": 0.7692790031433105, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.692789554595947, |
|
"rewards/margins": 0.1261179894208908, |
|
"rewards/rejected": -7.818907737731934, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.603448275862069, |
|
"grad_norm": 15.209695881976554, |
|
"learning_rate": 2.999999067864633e-07, |
|
"logits/chosen": 0.6580372452735901, |
|
"logits/rejected": 0.5932300090789795, |
|
"logps/chosen": -0.8376361727714539, |
|
"logps/rejected": -0.9159227609634399, |
|
"loss": 5.1548, |
|
"nll_loss": 0.8376361131668091, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.376360893249512, |
|
"rewards/margins": 0.782866358757019, |
|
"rewards/rejected": -9.15922737121582, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6130268199233716, |
|
"grad_norm": 16.786502978942124, |
|
"learning_rate": 2.9999664432484305e-07, |
|
"logits/chosen": 0.779415488243103, |
|
"logits/rejected": 0.8173543810844421, |
|
"logps/chosen": -0.8148608207702637, |
|
"logps/rejected": -0.8662956357002258, |
|
"loss": 5.0858, |
|
"nll_loss": 0.8148608207702637, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.148608207702637, |
|
"rewards/margins": 0.5143473744392395, |
|
"rewards/rejected": -8.662956237792969, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6226053639846744, |
|
"grad_norm": 14.809215737259183, |
|
"learning_rate": 2.999887213022373e-07, |
|
"logits/chosen": 0.9049234390258789, |
|
"logits/rejected": 0.6404236555099487, |
|
"logps/chosen": -0.9479631185531616, |
|
"logps/rejected": -0.8360861539840698, |
|
"loss": 5.282, |
|
"nll_loss": 0.9479631185531616, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -9.479631423950195, |
|
"rewards/margins": -1.1187700033187866, |
|
"rewards/rejected": -8.360861778259277, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.632183908045977, |
|
"grad_norm": 16.116801233519272, |
|
"learning_rate": 2.999761379648231e-07, |
|
"logits/chosen": 0.5830576419830322, |
|
"logits/rejected": 0.4962643086910248, |
|
"logps/chosen": -0.8123816251754761, |
|
"logps/rejected": -0.915209174156189, |
|
"loss": 5.0586, |
|
"nll_loss": 0.8123816251754761, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.12381649017334, |
|
"rewards/margins": 1.0282756090164185, |
|
"rewards/rejected": -9.152092933654785, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6417624521072797, |
|
"grad_norm": 17.606725914130628, |
|
"learning_rate": 2.999588947035786e-07, |
|
"logits/chosen": 0.666958212852478, |
|
"logits/rejected": 0.6473885774612427, |
|
"logps/chosen": -0.8091378211975098, |
|
"logps/rejected": -0.8131651878356934, |
|
"loss": 5.1567, |
|
"nll_loss": 0.8091378211975098, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.091379165649414, |
|
"rewards/margins": 0.04027414321899414, |
|
"rewards/rejected": -8.13165283203125, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6513409961685823, |
|
"grad_norm": 15.2118101330663, |
|
"learning_rate": 2.999369920542709e-07, |
|
"logits/chosen": 0.7825851440429688, |
|
"logits/rejected": 0.6557101011276245, |
|
"logps/chosen": -0.8586159944534302, |
|
"logps/rejected": -0.795002818107605, |
|
"loss": 5.2093, |
|
"nll_loss": 0.8586158752441406, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -8.586160659790039, |
|
"rewards/margins": -0.6361311078071594, |
|
"rewards/rejected": -7.950028419494629, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6609195402298851, |
|
"grad_norm": 21.51236359546376, |
|
"learning_rate": 2.9991043069743953e-07, |
|
"logits/chosen": 0.8391911387443542, |
|
"logits/rejected": 0.9106936454772949, |
|
"logps/chosen": -0.9227014780044556, |
|
"logps/rejected": -0.8664349317550659, |
|
"loss": 5.1782, |
|
"nll_loss": 0.9227014780044556, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -9.22701358795166, |
|
"rewards/margins": -0.5626646280288696, |
|
"rewards/rejected": -8.664348602294922, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6704980842911877, |
|
"grad_norm": 18.995163332531835, |
|
"learning_rate": 2.9987921145837506e-07, |
|
"logits/chosen": 0.5608048439025879, |
|
"logits/rejected": 0.504362940788269, |
|
"logps/chosen": -0.9775077700614929, |
|
"logps/rejected": -0.8174030184745789, |
|
"loss": 5.2409, |
|
"nll_loss": 0.9775077700614929, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -9.775077819824219, |
|
"rewards/margins": -1.6010481119155884, |
|
"rewards/rejected": -8.174030303955078, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6800766283524904, |
|
"grad_norm": 18.321608160126484, |
|
"learning_rate": 2.998433353070936e-07, |
|
"logits/chosen": 0.5228645205497742, |
|
"logits/rejected": 0.5875726342201233, |
|
"logps/chosen": -0.8083820343017578, |
|
"logps/rejected": -0.9094891548156738, |
|
"loss": 5.3646, |
|
"nll_loss": 0.8083820343017578, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.083820343017578, |
|
"rewards/margins": 1.011070966720581, |
|
"rewards/rejected": -9.094891548156738, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 39.97684130432745, |
|
"learning_rate": 2.998028033583067e-07, |
|
"logits/chosen": 0.635855495929718, |
|
"logits/rejected": 0.6091259717941284, |
|
"logps/chosen": -0.8663408160209656, |
|
"logps/rejected": -0.8845375776290894, |
|
"loss": 5.3618, |
|
"nll_loss": 0.8663408160209656, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.663408279418945, |
|
"rewards/margins": 0.18196812272071838, |
|
"rewards/rejected": -8.845376014709473, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6992337164750958, |
|
"grad_norm": 20.393894377350467, |
|
"learning_rate": 2.9975761687138675e-07, |
|
"logits/chosen": 0.8516343832015991, |
|
"logits/rejected": 0.9076502919197083, |
|
"logps/chosen": -0.7819638848304749, |
|
"logps/rejected": -0.8371108174324036, |
|
"loss": 5.1922, |
|
"nll_loss": 0.7819639444351196, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.819638252258301, |
|
"rewards/margins": 0.5514693260192871, |
|
"rewards/rejected": -8.371108055114746, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7088122605363985, |
|
"grad_norm": 15.92022688698728, |
|
"learning_rate": 2.997077772503276e-07, |
|
"logits/chosen": 0.6935936212539673, |
|
"logits/rejected": 0.7612776160240173, |
|
"logps/chosen": -0.8113873600959778, |
|
"logps/rejected": -0.8486092686653137, |
|
"loss": 5.2959, |
|
"nll_loss": 0.8113872408866882, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.113873481750488, |
|
"rewards/margins": 0.37221986055374146, |
|
"rewards/rejected": -8.486093521118164, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7183908045977011, |
|
"grad_norm": 19.844305829223966, |
|
"learning_rate": 2.9965328604370115e-07, |
|
"logits/chosen": 0.7288967370986938, |
|
"logits/rejected": 0.8481170535087585, |
|
"logps/chosen": -0.7829886674880981, |
|
"logps/rejected": -0.7796565294265747, |
|
"loss": 5.2523, |
|
"nll_loss": 0.7829886674880981, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.829885959625244, |
|
"rewards/margins": -0.033321283757686615, |
|
"rewards/rejected": -7.796565055847168, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7279693486590039, |
|
"grad_norm": 22.21588650736091, |
|
"learning_rate": 2.9959414494460934e-07, |
|
"logits/chosen": 0.7555449604988098, |
|
"logits/rejected": 0.8247362971305847, |
|
"logps/chosen": -0.8521485328674316, |
|
"logps/rejected": -0.9153167605400085, |
|
"loss": 5.1015, |
|
"nll_loss": 0.8521484136581421, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.521485328674316, |
|
"rewards/margins": 0.6316838264465332, |
|
"rewards/rejected": -9.153168678283691, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7375478927203065, |
|
"grad_norm": 16.32420836624451, |
|
"learning_rate": 2.995303557906312e-07, |
|
"logits/chosen": 0.8759990930557251, |
|
"logits/rejected": 0.684215784072876, |
|
"logps/chosen": -0.9286302328109741, |
|
"logps/rejected": -0.9789536595344543, |
|
"loss": 5.3124, |
|
"nll_loss": 0.9286301732063293, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -9.28630256652832, |
|
"rewards/margins": 0.5032343864440918, |
|
"rewards/rejected": -9.789536476135254, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7471264367816092, |
|
"grad_norm": 15.80345388242498, |
|
"learning_rate": 2.99461920563766e-07, |
|
"logits/chosen": 0.5319703817367554, |
|
"logits/rejected": 0.6946344971656799, |
|
"logps/chosen": -0.9111973643302917, |
|
"logps/rejected": -0.8061173558235168, |
|
"loss": 5.227, |
|
"nll_loss": 0.911197304725647, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -9.111973762512207, |
|
"rewards/margins": -1.0507996082305908, |
|
"rewards/rejected": -8.061173439025879, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7567049808429118, |
|
"grad_norm": 20.47124302367669, |
|
"learning_rate": 2.993888413903716e-07, |
|
"logits/chosen": 0.875220000743866, |
|
"logits/rejected": 0.9313627481460571, |
|
"logps/chosen": -0.6892002820968628, |
|
"logps/rejected": -0.7782602310180664, |
|
"loss": 5.1504, |
|
"nll_loss": 0.689200222492218, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -6.892002105712891, |
|
"rewards/margins": 0.8905998468399048, |
|
"rewards/rejected": -7.782601833343506, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7662835249042146, |
|
"grad_norm": 17.699773611139502, |
|
"learning_rate": 2.9931112054109855e-07, |
|
"logits/chosen": 0.5683273673057556, |
|
"logits/rejected": 0.6470497846603394, |
|
"logps/chosen": -0.8484388589859009, |
|
"logps/rejected": -0.8746269345283508, |
|
"loss": 5.1348, |
|
"nll_loss": 0.8484388589859009, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.48438835144043, |
|
"rewards/margins": 0.26188117265701294, |
|
"rewards/rejected": -8.746268272399902, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7758620689655172, |
|
"grad_norm": 19.019674807304686, |
|
"learning_rate": 2.992287604308192e-07, |
|
"logits/chosen": 0.6370224356651306, |
|
"logits/rejected": 0.500920295715332, |
|
"logps/chosen": -1.061689853668213, |
|
"logps/rejected": -1.0553849935531616, |
|
"loss": 5.2272, |
|
"nll_loss": 1.0616897344589233, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -10.616897583007812, |
|
"rewards/margins": -0.06304798275232315, |
|
"rewards/rejected": -10.553851127624512, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7854406130268199, |
|
"grad_norm": 22.32813440003362, |
|
"learning_rate": 2.9914176361855286e-07, |
|
"logits/chosen": 0.773957371711731, |
|
"logits/rejected": 0.5596843957901001, |
|
"logps/chosen": -0.8565713167190552, |
|
"logps/rejected": -0.8435229063034058, |
|
"loss": 5.1566, |
|
"nll_loss": 0.8565713167190552, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.565712928771973, |
|
"rewards/margins": -0.13048286736011505, |
|
"rewards/rejected": -8.435229301452637, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7950191570881227, |
|
"grad_norm": 18.60336417027728, |
|
"learning_rate": 2.9905013280738643e-07, |
|
"logits/chosen": 0.5330244898796082, |
|
"logits/rejected": 0.52126145362854, |
|
"logps/chosen": -0.9946764707565308, |
|
"logps/rejected": -1.0159534215927124, |
|
"loss": 5.3385, |
|
"nll_loss": 0.9946764707565308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -9.94676399230957, |
|
"rewards/margins": 0.21276941895484924, |
|
"rewards/rejected": -10.159533500671387, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8045977011494253, |
|
"grad_norm": 17.890613875131685, |
|
"learning_rate": 2.9895387084439007e-07, |
|
"logits/chosen": 0.9610258340835571, |
|
"logits/rejected": 0.7646621465682983, |
|
"logps/chosen": -0.7954119443893433, |
|
"logps/rejected": -0.8386955261230469, |
|
"loss": 5.1982, |
|
"nll_loss": 0.7954119443893433, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.954119682312012, |
|
"rewards/margins": 0.43283557891845703, |
|
"rewards/rejected": -8.386955261230469, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.814176245210728, |
|
"grad_norm": 16.55166479015083, |
|
"learning_rate": 2.9885298072052896e-07, |
|
"logits/chosen": 0.6969675421714783, |
|
"logits/rejected": 0.8127206563949585, |
|
"logps/chosen": -0.7167531251907349, |
|
"logps/rejected": -0.8444196581840515, |
|
"loss": 5.1488, |
|
"nll_loss": 0.7167531847953796, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.167531490325928, |
|
"rewards/margins": 1.2766650915145874, |
|
"rewards/rejected": -8.444195747375488, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8237547892720306, |
|
"grad_norm": 17.66990705043301, |
|
"learning_rate": 2.987474655705706e-07, |
|
"logits/chosen": 0.6976544260978699, |
|
"logits/rejected": 0.45688027143478394, |
|
"logps/chosen": -0.984510600566864, |
|
"logps/rejected": -0.9671609997749329, |
|
"loss": 5.1164, |
|
"nll_loss": 0.9845105409622192, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -9.84510612487793, |
|
"rewards/margins": -0.17349663376808167, |
|
"rewards/rejected": -9.671609878540039, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 17.864698049191677, |
|
"learning_rate": 2.9863732867298676e-07, |
|
"logits/chosen": 0.5837534070014954, |
|
"logits/rejected": 0.4828642010688782, |
|
"logps/chosen": -0.7947196960449219, |
|
"logps/rejected": -0.8388618230819702, |
|
"loss": 5.1483, |
|
"nll_loss": 0.7947196960449219, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.947196960449219, |
|
"rewards/margins": 0.44142085313796997, |
|
"rewards/rejected": -8.388618469238281, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.842911877394636, |
|
"grad_norm": 19.80331125773151, |
|
"learning_rate": 2.985225734498523e-07, |
|
"logits/chosen": 0.6473149061203003, |
|
"logits/rejected": 0.6882539987564087, |
|
"logps/chosen": -0.8926659822463989, |
|
"logps/rejected": -1.0417662858963013, |
|
"loss": 5.0516, |
|
"nll_loss": 0.8926659822463989, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.926658630371094, |
|
"rewards/margins": 1.4910037517547607, |
|
"rewards/rejected": -10.417662620544434, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8524904214559387, |
|
"grad_norm": 23.020768804171617, |
|
"learning_rate": 2.984032034667383e-07, |
|
"logits/chosen": 0.723087728023529, |
|
"logits/rejected": 0.7870732545852661, |
|
"logps/chosen": -0.9583943486213684, |
|
"logps/rejected": -0.9407602548599243, |
|
"loss": 5.2786, |
|
"nll_loss": 0.9583943486213684, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -9.583943367004395, |
|
"rewards/margins": -0.17634105682373047, |
|
"rewards/rejected": -9.40760326385498, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 19.047172710756705, |
|
"learning_rate": 2.982792224326018e-07, |
|
"logits/chosen": 0.6330351829528809, |
|
"logits/rejected": 0.6864033341407776, |
|
"logps/chosen": -0.7574523687362671, |
|
"logps/rejected": -0.8687712550163269, |
|
"loss": 5.1046, |
|
"nll_loss": 0.7574523091316223, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.574522972106934, |
|
"rewards/margins": 1.113189458847046, |
|
"rewards/rejected": -8.687711715698242, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8716475095785441, |
|
"grad_norm": 16.669062938958124, |
|
"learning_rate": 2.9815063419966994e-07, |
|
"logits/chosen": 0.5700467824935913, |
|
"logits/rejected": 0.4501461088657379, |
|
"logps/chosen": -0.8433354496955872, |
|
"logps/rejected": -1.0070374011993408, |
|
"loss": 5.224, |
|
"nll_loss": 0.8433355093002319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.433355331420898, |
|
"rewards/margins": 1.637019157409668, |
|
"rewards/rejected": -10.070374488830566, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8812260536398467, |
|
"grad_norm": 18.696544325644073, |
|
"learning_rate": 2.9801744276332095e-07, |
|
"logits/chosen": 0.7160075306892395, |
|
"logits/rejected": 0.7029515504837036, |
|
"logps/chosen": -0.929043173789978, |
|
"logps/rejected": -0.8915184140205383, |
|
"loss": 5.2651, |
|
"nll_loss": 0.9290431141853333, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -9.29043197631836, |
|
"rewards/margins": -0.37524765729904175, |
|
"rewards/rejected": -8.915184020996094, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8908045977011494, |
|
"grad_norm": 17.828488483344373, |
|
"learning_rate": 2.978796522619593e-07, |
|
"logits/chosen": 0.48793935775756836, |
|
"logits/rejected": 0.5863816142082214, |
|
"logps/chosen": -0.8918827772140503, |
|
"logps/rejected": -0.9165046811103821, |
|
"loss": 5.1141, |
|
"nll_loss": 0.8918827772140503, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.918828010559082, |
|
"rewards/margins": 0.24621906876564026, |
|
"rewards/rejected": -9.165046691894531, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9003831417624522, |
|
"grad_norm": 17.225972538621157, |
|
"learning_rate": 2.9773726697688786e-07, |
|
"logits/chosen": 0.8548671007156372, |
|
"logits/rejected": 0.6914275884628296, |
|
"logps/chosen": -0.7240065932273865, |
|
"logps/rejected": -0.7344146370887756, |
|
"loss": 5.2492, |
|
"nll_loss": 0.7240065336227417, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -7.2400665283203125, |
|
"rewards/margins": 0.10407984256744385, |
|
"rewards/rejected": -7.34414529800415, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9099616858237548, |
|
"grad_norm": 19.7970366847853, |
|
"learning_rate": 2.975902913321742e-07, |
|
"logits/chosen": 0.6560118794441223, |
|
"logits/rejected": 0.9177835583686829, |
|
"logps/chosen": -0.7790594100952148, |
|
"logps/rejected": -0.7989363670349121, |
|
"loss": 5.3694, |
|
"nll_loss": 0.7790594100952148, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.79059362411499, |
|
"rewards/margins": 0.19877009093761444, |
|
"rewards/rejected": -7.989363670349121, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 25.366911327657693, |
|
"learning_rate": 2.974387298945135e-07, |
|
"logits/chosen": 0.4641974866390228, |
|
"logits/rejected": 0.5562915802001953, |
|
"logps/chosen": -1.0258574485778809, |
|
"logps/rejected": -0.9660851359367371, |
|
"loss": 5.2603, |
|
"nll_loss": 1.0258575677871704, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -10.258573532104492, |
|
"rewards/margins": -0.5977233052253723, |
|
"rewards/rejected": -9.66085147857666, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9291187739463601, |
|
"grad_norm": 28.44742966645083, |
|
"learning_rate": 2.9728258737308666e-07, |
|
"logits/chosen": 0.7019563913345337, |
|
"logits/rejected": 0.9221956133842468, |
|
"logps/chosen": -0.7030410766601562, |
|
"logps/rejected": -0.7920758724212646, |
|
"loss": 5.1616, |
|
"nll_loss": 0.7030410766601562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.0304107666015625, |
|
"rewards/margins": 0.8903471231460571, |
|
"rewards/rejected": -7.920758247375488, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9386973180076629, |
|
"grad_norm": 20.06782347986855, |
|
"learning_rate": 2.9712186861941376e-07, |
|
"logits/chosen": 0.5050719976425171, |
|
"logits/rejected": 0.5799335241317749, |
|
"logps/chosen": -0.9137505292892456, |
|
"logps/rejected": -0.8959131240844727, |
|
"loss": 5.0958, |
|
"nll_loss": 0.9137505292892456, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -9.137506484985352, |
|
"rewards/margins": -0.17837515473365784, |
|
"rewards/rejected": -8.959131240844727, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9482758620689655, |
|
"grad_norm": 25.038776687357906, |
|
"learning_rate": 2.9695657862720366e-07, |
|
"logits/chosen": 0.6863113641738892, |
|
"logits/rejected": 0.617378294467926, |
|
"logps/chosen": -0.8844548463821411, |
|
"logps/rejected": -0.8485180139541626, |
|
"loss": 5.2545, |
|
"nll_loss": 0.8844548463821411, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.844549179077148, |
|
"rewards/margins": -0.3593680262565613, |
|
"rewards/rejected": -8.485180854797363, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9578544061302682, |
|
"grad_norm": 17.692781929545614, |
|
"learning_rate": 2.967867225321984e-07, |
|
"logits/chosen": 0.37053728103637695, |
|
"logits/rejected": 0.6822131276130676, |
|
"logps/chosen": -0.8528249859809875, |
|
"logps/rejected": -0.8191383481025696, |
|
"loss": 5.1402, |
|
"nll_loss": 0.8528249859809875, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -8.528249740600586, |
|
"rewards/margins": -0.33686715364456177, |
|
"rewards/rejected": -8.191383361816406, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9674329501915708, |
|
"grad_norm": 21.29237339971544, |
|
"learning_rate": 2.96612305612014e-07, |
|
"logits/chosen": 0.45431119203567505, |
|
"logits/rejected": 0.6691686511039734, |
|
"logps/chosen": -0.8379117250442505, |
|
"logps/rejected": -0.7931283116340637, |
|
"loss": 5.1782, |
|
"nll_loss": 0.8379117846488953, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -8.379117965698242, |
|
"rewards/margins": -0.44783440232276917, |
|
"rewards/rejected": -7.931282997131348, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9770114942528736, |
|
"grad_norm": 18.325302400422004, |
|
"learning_rate": 2.9643333328597636e-07, |
|
"logits/chosen": 0.4990893006324768, |
|
"logits/rejected": 0.44439974427223206, |
|
"logps/chosen": -0.8343612551689148, |
|
"logps/rejected": -0.861991286277771, |
|
"loss": 5.3056, |
|
"nll_loss": 0.8343612551689148, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.343611717224121, |
|
"rewards/margins": 0.27630099654197693, |
|
"rewards/rejected": -8.619913101196289, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9865900383141762, |
|
"grad_norm": 19.49284906216681, |
|
"learning_rate": 2.9624981111495277e-07, |
|
"logits/chosen": 0.6197850704193115, |
|
"logits/rejected": 0.7587565183639526, |
|
"logps/chosen": -0.7680959105491638, |
|
"logps/rejected": -0.9193935394287109, |
|
"loss": 5.0512, |
|
"nll_loss": 0.768095850944519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.6809587478637695, |
|
"rewards/margins": 1.5129766464233398, |
|
"rewards/rejected": -9.19393539428711, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9961685823754789, |
|
"grad_norm": 17.910862409831864, |
|
"learning_rate": 2.960617448011793e-07, |
|
"logits/chosen": 0.4001706540584564, |
|
"logits/rejected": 0.5382306575775146, |
|
"logps/chosen": -0.9400454759597778, |
|
"logps/rejected": -0.9204422235488892, |
|
"loss": 5.1904, |
|
"nll_loss": 0.9400454759597778, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -9.400455474853516, |
|
"rewards/margins": -0.19603300094604492, |
|
"rewards/rejected": -9.204421997070312, |
|
"step": 520 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3132, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|