|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 22.33847861457317, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.7707886695861816, |
|
"logits/rejected": -2.7283411026000977, |
|
"logps/chosen": -1.0281651020050049, |
|
"logps/rejected": -1.1735057830810547, |
|
"loss": 1.7068, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0281651020050049, |
|
"rewards/margins": 0.14534088969230652, |
|
"rewards/rejected": -1.1735057830810547, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 19.58286489214486, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.7509849071502686, |
|
"logits/rejected": -2.725268840789795, |
|
"logps/chosen": -0.9945869445800781, |
|
"logps/rejected": -1.070472002029419, |
|
"loss": 1.7055, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9945869445800781, |
|
"rewards/margins": 0.07588515430688858, |
|
"rewards/rejected": -1.070472002029419, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 20.875691978403097, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.731562852859497, |
|
"logits/rejected": -2.698035478591919, |
|
"logps/chosen": -0.9806415438652039, |
|
"logps/rejected": -1.1409623622894287, |
|
"loss": 1.6533, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9806415438652039, |
|
"rewards/margins": 0.1603206843137741, |
|
"rewards/rejected": -1.1409623622894287, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 18.447110198802093, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.804091453552246, |
|
"logits/rejected": -2.711153268814087, |
|
"logps/chosen": -0.9976784586906433, |
|
"logps/rejected": -1.0985018014907837, |
|
"loss": 1.6778, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9976784586906433, |
|
"rewards/margins": 0.10082335770130157, |
|
"rewards/rejected": -1.0985018014907837, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 17.24388250585097, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.800624132156372, |
|
"logits/rejected": -2.777677536010742, |
|
"logps/chosen": -0.977279007434845, |
|
"logps/rejected": -1.0392786264419556, |
|
"loss": 1.6818, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.977279007434845, |
|
"rewards/margins": 0.061999619007110596, |
|
"rewards/rejected": -1.0392786264419556, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 17.54927195272229, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.774305820465088, |
|
"logits/rejected": -2.734163284301758, |
|
"logps/chosen": -0.8836237192153931, |
|
"logps/rejected": -0.9883272051811218, |
|
"loss": 1.7148, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.8836237192153931, |
|
"rewards/margins": 0.10470354557037354, |
|
"rewards/rejected": -0.9883272051811218, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 19.597204446648743, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.7799019813537598, |
|
"logits/rejected": -2.6970131397247314, |
|
"logps/chosen": -0.9105981588363647, |
|
"logps/rejected": -1.032915711402893, |
|
"loss": 1.6819, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9105981588363647, |
|
"rewards/margins": 0.12231750786304474, |
|
"rewards/rejected": -1.032915711402893, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 17.382375782510174, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7961745262145996, |
|
"logits/rejected": -2.7064435482025146, |
|
"logps/chosen": -0.8972692489624023, |
|
"logps/rejected": -1.1690887212753296, |
|
"loss": 1.5799, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8972692489624023, |
|
"rewards/margins": 0.27181947231292725, |
|
"rewards/rejected": -1.1690887212753296, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 20.280760501242256, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.792088031768799, |
|
"logits/rejected": -2.748499870300293, |
|
"logps/chosen": -0.9211010932922363, |
|
"logps/rejected": -1.065104365348816, |
|
"loss": 1.5989, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.9211010932922363, |
|
"rewards/margins": 0.14400319755077362, |
|
"rewards/rejected": -1.065104365348816, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 24.965329955439582, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.7186241149902344, |
|
"logits/rejected": -2.6570615768432617, |
|
"logps/chosen": -0.9831829071044922, |
|
"logps/rejected": -1.2431915998458862, |
|
"loss": 1.5992, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9831829071044922, |
|
"rewards/margins": 0.2600088119506836, |
|
"rewards/rejected": -1.2431915998458862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 20.11572375657384, |
|
"learning_rate": 4.996716052911017e-07, |
|
"logits/chosen": -2.6839053630828857, |
|
"logits/rejected": -2.6501219272613525, |
|
"logps/chosen": -1.0190128087997437, |
|
"logps/rejected": -1.2308984994888306, |
|
"loss": 1.6076, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.0190128087997437, |
|
"rewards/margins": 0.21188561618328094, |
|
"rewards/rejected": -1.2308984994888306, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 23.21505244201069, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.7174434661865234, |
|
"logits/rejected": -2.6702983379364014, |
|
"logps/chosen": -1.0736011266708374, |
|
"logps/rejected": -1.2630236148834229, |
|
"loss": 1.5192, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0736011266708374, |
|
"rewards/margins": 0.18942244350910187, |
|
"rewards/rejected": -1.2630236148834229, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 23.25106134127669, |
|
"learning_rate": 4.980652179769217e-07, |
|
"logits/chosen": -2.6810200214385986, |
|
"logits/rejected": -2.5684618949890137, |
|
"logps/chosen": -1.0137274265289307, |
|
"logps/rejected": -1.498494267463684, |
|
"loss": 1.5546, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0137274265289307, |
|
"rewards/margins": 0.4847669005393982, |
|
"rewards/rejected": -1.498494267463684, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 27.792367557668214, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.530665636062622, |
|
"logits/rejected": -2.4802050590515137, |
|
"logps/chosen": -1.1090538501739502, |
|
"logps/rejected": -1.3939735889434814, |
|
"loss": 1.5395, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1090538501739502, |
|
"rewards/margins": 0.28491973876953125, |
|
"rewards/rejected": -1.3939735889434814, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 31.80202351677776, |
|
"learning_rate": 4.951291206355559e-07, |
|
"logits/chosen": -2.56596302986145, |
|
"logits/rejected": -2.4995336532592773, |
|
"logps/chosen": -1.0895880460739136, |
|
"logps/rejected": -1.4386751651763916, |
|
"loss": 1.5145, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0895880460739136, |
|
"rewards/margins": 0.3490869700908661, |
|
"rewards/rejected": -1.4386751651763916, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 24.649835098743253, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.5505566596984863, |
|
"logits/rejected": -2.499436140060425, |
|
"logps/chosen": -1.066165804862976, |
|
"logps/rejected": -1.3876395225524902, |
|
"loss": 1.5557, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.066165804862976, |
|
"rewards/margins": 0.3214736878871918, |
|
"rewards/rejected": -1.3876395225524902, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 36.00370335117123, |
|
"learning_rate": 4.908790517010636e-07, |
|
"logits/chosen": -2.5619356632232666, |
|
"logits/rejected": -2.5041964054107666, |
|
"logps/chosen": -1.0125417709350586, |
|
"logps/rejected": -1.3010904788970947, |
|
"loss": 1.5994, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0125417709350586, |
|
"rewards/margins": 0.2885487377643585, |
|
"rewards/rejected": -1.3010904788970947, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 24.146045834642553, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.4920132160186768, |
|
"logits/rejected": -2.465853691101074, |
|
"logps/chosen": -1.0736846923828125, |
|
"logps/rejected": -1.3465595245361328, |
|
"loss": 1.5492, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0736846923828125, |
|
"rewards/margins": 0.27287474274635315, |
|
"rewards/rejected": -1.3465595245361328, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 28.073142968648483, |
|
"learning_rate": 4.853377929214243e-07, |
|
"logits/chosen": -2.5122618675231934, |
|
"logits/rejected": -2.4115467071533203, |
|
"logps/chosen": -1.0956408977508545, |
|
"logps/rejected": -1.3623732328414917, |
|
"loss": 1.5645, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0956408977508545, |
|
"rewards/margins": 0.26673227548599243, |
|
"rewards/rejected": -1.3623732328414917, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 31.739558342111017, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.4314770698547363, |
|
"logits/rejected": -2.3331286907196045, |
|
"logps/chosen": -1.1069271564483643, |
|
"logps/rejected": -1.4718701839447021, |
|
"loss": 1.5661, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1069271564483643, |
|
"rewards/margins": 0.3649430274963379, |
|
"rewards/rejected": -1.4718701839447021, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 30.25619066044505, |
|
"learning_rate": 4.785350472409791e-07, |
|
"logits/chosen": -2.374175548553467, |
|
"logits/rejected": -2.345848321914673, |
|
"logps/chosen": -1.1067227125167847, |
|
"logps/rejected": -1.5379221439361572, |
|
"loss": 1.4913, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1067227125167847, |
|
"rewards/margins": 0.43119925260543823, |
|
"rewards/rejected": -1.5379221439361572, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 37.413658539880686, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -2.3754255771636963, |
|
"logits/rejected": -2.298557996749878, |
|
"logps/chosen": -1.2783015966415405, |
|
"logps/rejected": -1.6868267059326172, |
|
"loss": 1.4914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2783015966415405, |
|
"rewards/margins": 0.4085250496864319, |
|
"rewards/rejected": -1.6868267059326172, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 30.692988045641766, |
|
"learning_rate": 4.70507279583015e-07, |
|
"logits/chosen": -2.295192241668701, |
|
"logits/rejected": -2.2188549041748047, |
|
"logps/chosen": -1.2483012676239014, |
|
"logps/rejected": -1.789584755897522, |
|
"loss": 1.422, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2483012676239014, |
|
"rewards/margins": 0.541283369064331, |
|
"rewards/rejected": -1.789584755897522, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 30.61675572148456, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.2375972270965576, |
|
"logits/rejected": -2.188506603240967, |
|
"logps/chosen": -1.2778130769729614, |
|
"logps/rejected": -1.6632368564605713, |
|
"loss": 1.5023, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2778130769729614, |
|
"rewards/margins": 0.3854238986968994, |
|
"rewards/rejected": -1.6632368564605713, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 27.143222504991545, |
|
"learning_rate": 4.612975213859487e-07, |
|
"logits/chosen": -2.2686073780059814, |
|
"logits/rejected": -2.240734815597534, |
|
"logps/chosen": -1.2156823873519897, |
|
"logps/rejected": -1.5097521543502808, |
|
"loss": 1.5016, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2156823873519897, |
|
"rewards/margins": 0.2940698564052582, |
|
"rewards/rejected": -1.5097521543502808, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 28.662390853422256, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -2.1936447620391846, |
|
"logits/rejected": -2.1442556381225586, |
|
"logps/chosen": -1.2718418836593628, |
|
"logps/rejected": -1.5303620100021362, |
|
"loss": 1.4909, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2718418836593628, |
|
"rewards/margins": 0.25852006673812866, |
|
"rewards/rejected": -1.5303620100021362, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 44.28036575523435, |
|
"learning_rate": 4.5095513994085974e-07, |
|
"logits/chosen": -2.1270031929016113, |
|
"logits/rejected": -2.0248324871063232, |
|
"logps/chosen": -1.3885653018951416, |
|
"logps/rejected": -2.0145115852355957, |
|
"loss": 1.4337, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.3885653018951416, |
|
"rewards/margins": 0.6259465217590332, |
|
"rewards/rejected": -2.0145115852355957, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 36.60577846523631, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -2.1079370975494385, |
|
"logits/rejected": -2.0384957790374756, |
|
"logps/chosen": -1.5038011074066162, |
|
"logps/rejected": -1.9894378185272217, |
|
"loss": 1.4093, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5038011074066162, |
|
"rewards/margins": 0.48563677072525024, |
|
"rewards/rejected": -1.9894378185272217, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 28.487957762616777, |
|
"learning_rate": 4.395355737667985e-07, |
|
"logits/chosen": -2.0164198875427246, |
|
"logits/rejected": -1.9161510467529297, |
|
"logps/chosen": -1.5493210554122925, |
|
"logps/rejected": -2.018698215484619, |
|
"loss": 1.451, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.5493210554122925, |
|
"rewards/margins": 0.4693775177001953, |
|
"rewards/rejected": -2.018698215484619, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 41.06239885865088, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -1.9734079837799072, |
|
"logits/rejected": -1.9259965419769287, |
|
"logps/chosen": -1.4789108037948608, |
|
"logps/rejected": -1.9842697381973267, |
|
"loss": 1.3495, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4789108037948608, |
|
"rewards/margins": 0.5053588151931763, |
|
"rewards/rejected": -1.9842697381973267, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 48.830011945922514, |
|
"learning_rate": 4.271000354423425e-07, |
|
"logits/chosen": -1.8841253519058228, |
|
"logits/rejected": -1.8244024515151978, |
|
"logps/chosen": -1.7202045917510986, |
|
"logps/rejected": -2.2293524742126465, |
|
"loss": 1.3858, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7202045917510986, |
|
"rewards/margins": 0.5091480016708374, |
|
"rewards/rejected": -2.2293524742126465, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 58.39848893185564, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -1.7686011791229248, |
|
"logits/rejected": -1.7215473651885986, |
|
"logps/chosen": -1.7806098461151123, |
|
"logps/rejected": -2.337874174118042, |
|
"loss": 1.3384, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7806098461151123, |
|
"rewards/margins": 0.5572644472122192, |
|
"rewards/rejected": -2.337874174118042, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 39.19480392174481, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.7493757009506226, |
|
"logits/rejected": -1.6483466625213623, |
|
"logps/chosen": -1.6905717849731445, |
|
"logps/rejected": -2.2200164794921875, |
|
"loss": 1.3194, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6905717849731445, |
|
"rewards/margins": 0.5294445753097534, |
|
"rewards/rejected": -2.2200164794921875, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 43.29327849827671, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -1.7271289825439453, |
|
"logits/rejected": -1.6169135570526123, |
|
"logps/chosen": -1.7594356536865234, |
|
"logps/rejected": -2.3671681880950928, |
|
"loss": 1.4272, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7594356536865234, |
|
"rewards/margins": 0.6077327132225037, |
|
"rewards/rejected": -2.3671681880950928, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 51.0545099532057, |
|
"learning_rate": 3.994527650465352e-07, |
|
"logits/chosen": -1.6730191707611084, |
|
"logits/rejected": -1.5045350790023804, |
|
"logps/chosen": -1.7539478540420532, |
|
"logps/rejected": -2.3421220779418945, |
|
"loss": 1.4166, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7539478540420532, |
|
"rewards/margins": 0.5881742835044861, |
|
"rewards/rejected": -2.3421220779418945, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 39.83826698098843, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -1.5326063632965088, |
|
"logits/rejected": -1.4709655046463013, |
|
"logps/chosen": -1.8253322839736938, |
|
"logps/rejected": -2.5203299522399902, |
|
"loss": 1.327, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8253322839736938, |
|
"rewards/margins": 0.6949977278709412, |
|
"rewards/rejected": -2.5203299522399902, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 54.751515663792574, |
|
"learning_rate": 3.8438923131177237e-07, |
|
"logits/chosen": -1.6529709100723267, |
|
"logits/rejected": -1.547048807144165, |
|
"logps/chosen": -2.0327372550964355, |
|
"logps/rejected": -2.799630880355835, |
|
"loss": 1.3481, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0327372550964355, |
|
"rewards/margins": 0.7668935656547546, |
|
"rewards/rejected": -2.799630880355835, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 55.01879734493922, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -1.7503044605255127, |
|
"logits/rejected": -1.7193371057510376, |
|
"logps/chosen": -2.0020930767059326, |
|
"logps/rejected": -2.541438341140747, |
|
"loss": 1.4457, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0020930767059326, |
|
"rewards/margins": 0.5393451452255249, |
|
"rewards/rejected": -2.541438341140747, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 56.01695405727775, |
|
"learning_rate": 3.6860532770864005e-07, |
|
"logits/chosen": -1.6532561779022217, |
|
"logits/rejected": -1.5889365673065186, |
|
"logps/chosen": -2.2126450538635254, |
|
"logps/rejected": -2.833956241607666, |
|
"loss": 1.3837, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2126450538635254, |
|
"rewards/margins": 0.6213110685348511, |
|
"rewards/rejected": -2.833956241607666, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 53.693962657799496, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -1.5952329635620117, |
|
"logits/rejected": -1.5742504596710205, |
|
"logps/chosen": -2.298196315765381, |
|
"logps/rejected": -2.9012575149536133, |
|
"loss": 1.3376, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.298196315765381, |
|
"rewards/margins": 0.6030609607696533, |
|
"rewards/rejected": -2.9012575149536133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 64.50276884709064, |
|
"learning_rate": 3.5218566107988867e-07, |
|
"logits/chosen": -1.3948825597763062, |
|
"logits/rejected": -1.3095804452896118, |
|
"logps/chosen": -2.4898180961608887, |
|
"logps/rejected": -3.191925525665283, |
|
"loss": 1.2815, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.4898180961608887, |
|
"rewards/margins": 0.7021073698997498, |
|
"rewards/rejected": -3.191925525665283, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 84.53595036055246, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -1.3462097644805908, |
|
"logits/rejected": -1.2632777690887451, |
|
"logps/chosen": -2.999131679534912, |
|
"logps/rejected": -3.611553192138672, |
|
"loss": 1.3852, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.999131679534912, |
|
"rewards/margins": 0.6124216318130493, |
|
"rewards/rejected": -3.611553192138672, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 46.66563883333029, |
|
"learning_rate": 3.3521824616429284e-07, |
|
"logits/chosen": -1.3600023984909058, |
|
"logits/rejected": -1.2664874792099, |
|
"logps/chosen": -2.6604790687561035, |
|
"logps/rejected": -3.3327317237854004, |
|
"loss": 1.286, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.6604790687561035, |
|
"rewards/margins": 0.6722527742385864, |
|
"rewards/rejected": -3.3327317237854004, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 52.30084280147944, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -1.328802466392517, |
|
"logits/rejected": -1.1486713886260986, |
|
"logps/chosen": -2.2492258548736572, |
|
"logps/rejected": -3.048722743988037, |
|
"loss": 1.2381, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2492258548736572, |
|
"rewards/margins": 0.7994968295097351, |
|
"rewards/rejected": -3.048722743988037, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 59.5983284817177, |
|
"learning_rate": 3.1779403380910425e-07, |
|
"logits/chosen": -1.2022250890731812, |
|
"logits/rejected": -0.9333807229995728, |
|
"logps/chosen": -2.3682289123535156, |
|
"logps/rejected": -3.2058632373809814, |
|
"loss": 1.2818, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.3682289123535156, |
|
"rewards/margins": 0.8376340866088867, |
|
"rewards/rejected": -3.2058632373809814, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 57.79020776140021, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -0.9690738916397095, |
|
"logits/rejected": -0.6763086915016174, |
|
"logps/chosen": -2.678581714630127, |
|
"logps/rejected": -3.652540922164917, |
|
"loss": 1.1691, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.678581714630127, |
|
"rewards/margins": 0.9739594459533691, |
|
"rewards/rejected": -3.652540922164917, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 68.31261443984954, |
|
"learning_rate": 3.000064234440111e-07, |
|
"logits/chosen": -0.7249783277511597, |
|
"logits/rejected": -0.4497829079627991, |
|
"logps/chosen": -2.929415225982666, |
|
"logps/rejected": -3.6940486431121826, |
|
"loss": 1.219, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.929415225982666, |
|
"rewards/margins": 0.7646334171295166, |
|
"rewards/rejected": -3.6940486431121826, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 58.51770384505626, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -0.6646004915237427, |
|
"logits/rejected": -0.4312285780906677, |
|
"logps/chosen": -2.8067305088043213, |
|
"logps/rejected": -3.5952727794647217, |
|
"loss": 1.2149, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8067305088043213, |
|
"rewards/margins": 0.7885428071022034, |
|
"rewards/rejected": -3.5952727794647217, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 77.9188973058883, |
|
"learning_rate": 2.8195076242990116e-07, |
|
"logits/chosen": -0.8215748071670532, |
|
"logits/rejected": -0.6058939695358276, |
|
"logps/chosen": -3.0396523475646973, |
|
"logps/rejected": -3.7490572929382324, |
|
"loss": 1.3009, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.0396523475646973, |
|
"rewards/margins": 0.7094049453735352, |
|
"rewards/rejected": -3.7490572929382324, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 63.40691188463422, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -0.9382761716842651, |
|
"logits/rejected": -0.729052722454071, |
|
"logps/chosen": -2.595768451690674, |
|
"logps/rejected": -3.3881466388702393, |
|
"loss": 1.3493, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.595768451690674, |
|
"rewards/margins": 0.792378306388855, |
|
"rewards/rejected": -3.3881466388702393, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 48.6873954516319, |
|
"learning_rate": 2.6372383496608186e-07, |
|
"logits/chosen": -0.9191045761108398, |
|
"logits/rejected": -0.6374125480651855, |
|
"logps/chosen": -2.6228957176208496, |
|
"logps/rejected": -3.4444642066955566, |
|
"loss": 1.1933, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6228957176208496, |
|
"rewards/margins": 0.8215683698654175, |
|
"rewards/rejected": -3.4444642066955566, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 58.54153465550475, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -1.0062066316604614, |
|
"logits/rejected": -0.7524072527885437, |
|
"logps/chosen": -2.226409435272217, |
|
"logps/rejected": -3.2910056114196777, |
|
"loss": 1.2637, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.226409435272217, |
|
"rewards/margins": 1.0645958185195923, |
|
"rewards/rejected": -3.2910056114196777, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 56.27740231168225, |
|
"learning_rate": 2.454233432955807e-07, |
|
"logits/chosen": -0.9759531021118164, |
|
"logits/rejected": -0.7592412233352661, |
|
"logps/chosen": -2.4869279861450195, |
|
"logps/rejected": -3.270657777786255, |
|
"loss": 1.1763, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4869279861450195, |
|
"rewards/margins": 0.7837298512458801, |
|
"rewards/rejected": -3.270657777786255, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 79.16628471833214, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -0.7810020446777344, |
|
"logits/rejected": -0.563759446144104, |
|
"logps/chosen": -2.9826416969299316, |
|
"logps/rejected": -3.696744918823242, |
|
"loss": 1.2307, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.9826416969299316, |
|
"rewards/margins": 0.7141033411026001, |
|
"rewards/rejected": -3.696744918823242, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 67.9940381335306, |
|
"learning_rate": 2.2714738398943308e-07, |
|
"logits/chosen": -0.6166077256202698, |
|
"logits/rejected": -0.255386084318161, |
|
"logps/chosen": -3.5992627143859863, |
|
"logps/rejected": -4.62840461730957, |
|
"loss": 1.2213, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.5992627143859863, |
|
"rewards/margins": 1.029141902923584, |
|
"rewards/rejected": -4.62840461730957, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 77.77171483631982, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -0.4341735243797302, |
|
"logits/rejected": -0.08086974173784256, |
|
"logps/chosen": -3.569580554962158, |
|
"logps/rejected": -4.482806205749512, |
|
"loss": 1.2253, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.569580554962158, |
|
"rewards/margins": 0.9132259488105774, |
|
"rewards/rejected": -4.482806205749512, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 78.23503462159874, |
|
"learning_rate": 2.089939221172446e-07, |
|
"logits/chosen": -0.4769046902656555, |
|
"logits/rejected": -0.28695839643478394, |
|
"logps/chosen": -3.2708067893981934, |
|
"logps/rejected": -4.225908279418945, |
|
"loss": 1.2069, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.2708067893981934, |
|
"rewards/margins": 0.955101490020752, |
|
"rewards/rejected": -4.225908279418945, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 60.888870909608286, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -0.629644513130188, |
|
"logits/rejected": -0.3126750886440277, |
|
"logps/chosen": -3.1302852630615234, |
|
"logps/rejected": -4.096064567565918, |
|
"loss": 1.1961, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.1302852630615234, |
|
"rewards/margins": 0.9657794833183289, |
|
"rewards/rejected": -4.096064567565918, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 69.95748727333331, |
|
"learning_rate": 1.9106026612264315e-07, |
|
"logits/chosen": -0.6755684614181519, |
|
"logits/rejected": -0.30471256375312805, |
|
"logps/chosen": -2.978564739227295, |
|
"logps/rejected": -3.7914657592773438, |
|
"loss": 1.2554, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.978564739227295, |
|
"rewards/margins": 0.8129006624221802, |
|
"rewards/rejected": -3.7914657592773438, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 70.38289921062774, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -0.5232299566268921, |
|
"logits/rejected": -0.24573859572410583, |
|
"logps/chosen": -2.9739651679992676, |
|
"logps/rejected": -3.8225860595703125, |
|
"loss": 1.2417, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9739651679992676, |
|
"rewards/margins": 0.8486205339431763, |
|
"rewards/rejected": -3.8225860595703125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 72.4502519540176, |
|
"learning_rate": 1.7344254621846017e-07, |
|
"logits/chosen": -0.25070467591285706, |
|
"logits/rejected": 0.04965158551931381, |
|
"logps/chosen": -3.1987884044647217, |
|
"logps/rejected": -4.245509147644043, |
|
"loss": 1.1282, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.1987884044647217, |
|
"rewards/margins": 1.046720266342163, |
|
"rewards/rejected": -4.245509147644043, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 74.82154328000124, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -0.21263869106769562, |
|
"logits/rejected": -0.07299887388944626, |
|
"logps/chosen": -3.0970096588134766, |
|
"logps/rejected": -4.266674995422363, |
|
"loss": 1.1228, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0970096588134766, |
|
"rewards/margins": 1.1696654558181763, |
|
"rewards/rejected": -4.266674995422363, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 104.80383724137711, |
|
"learning_rate": 1.562351990976095e-07, |
|
"logits/chosen": -0.2823619246482849, |
|
"logits/rejected": 0.06330037117004395, |
|
"logps/chosen": -3.170875072479248, |
|
"logps/rejected": -4.283278465270996, |
|
"loss": 1.1544, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.170875072479248, |
|
"rewards/margins": 1.1124036312103271, |
|
"rewards/rejected": -4.283278465270996, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 77.84225661924857, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -0.15583737194538116, |
|
"logits/rejected": -0.0702785775065422, |
|
"logps/chosen": -3.0332465171813965, |
|
"logps/rejected": -3.9285430908203125, |
|
"loss": 1.2128, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0332465171813965, |
|
"rewards/margins": 0.8952968716621399, |
|
"rewards/rejected": -3.9285430908203125, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 82.4050976791357, |
|
"learning_rate": 1.3953046172178413e-07, |
|
"logits/chosen": -0.19347061216831207, |
|
"logits/rejected": 0.27237311005592346, |
|
"logps/chosen": -2.916917324066162, |
|
"logps/rejected": -3.891824245452881, |
|
"loss": 1.1941, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.916917324066162, |
|
"rewards/margins": 0.9749069213867188, |
|
"rewards/rejected": -3.891824245452881, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 74.25892962169908, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -0.2500423491001129, |
|
"logits/rejected": 0.1266798973083496, |
|
"logps/chosen": -2.7360358238220215, |
|
"logps/rejected": -3.647095203399658, |
|
"loss": 1.2222, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.7360358238220215, |
|
"rewards/margins": 0.9110593795776367, |
|
"rewards/rejected": -3.647095203399658, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 68.6281193094518, |
|
"learning_rate": 1.2341787690142435e-07, |
|
"logits/chosen": -0.4035646915435791, |
|
"logits/rejected": -0.033549416810274124, |
|
"logps/chosen": -2.763370990753174, |
|
"logps/rejected": -3.6889405250549316, |
|
"loss": 1.2685, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.763370990753174, |
|
"rewards/margins": 0.9255691766738892, |
|
"rewards/rejected": -3.6889405250549316, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 64.3244472190874, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -0.36889219284057617, |
|
"logits/rejected": -0.027694886550307274, |
|
"logps/chosen": -2.9837875366210938, |
|
"logps/rejected": -3.666727066040039, |
|
"loss": 1.2109, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.9837875366210938, |
|
"rewards/margins": 0.6829396486282349, |
|
"rewards/rejected": -3.666727066040039, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 67.8756323193863, |
|
"learning_rate": 1.0798381331721107e-07, |
|
"logits/chosen": -0.3677563965320587, |
|
"logits/rejected": -0.05767295882105827, |
|
"logps/chosen": -2.810889959335327, |
|
"logps/rejected": -3.620894193649292, |
|
"loss": 1.1996, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.810889959335327, |
|
"rewards/margins": 0.8100040555000305, |
|
"rewards/rejected": -3.620894193649292, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 75.08030338233884, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -0.2898016571998596, |
|
"logits/rejected": -0.137650728225708, |
|
"logps/chosen": -2.9614298343658447, |
|
"logps/rejected": -3.739614963531494, |
|
"loss": 1.2604, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.9614298343658447, |
|
"rewards/margins": 0.7781847715377808, |
|
"rewards/rejected": -3.739614963531494, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 74.74858163258845, |
|
"learning_rate": 9.331100255592436e-08, |
|
"logits/chosen": -0.2886708378791809, |
|
"logits/rejected": 0.029823053628206253, |
|
"logps/chosen": -3.0250728130340576, |
|
"logps/rejected": -4.2197394371032715, |
|
"loss": 1.1354, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.0250728130340576, |
|
"rewards/margins": 1.194666862487793, |
|
"rewards/rejected": -4.2197394371032715, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 70.24673714433773, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -0.40003472566604614, |
|
"logits/rejected": -0.07148544490337372, |
|
"logps/chosen": -3.224444627761841, |
|
"logps/rejected": -3.998364210128784, |
|
"loss": 1.1942, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.224444627761841, |
|
"rewards/margins": 0.7739196419715881, |
|
"rewards/rejected": -3.998364210128784, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 99.18869677889904, |
|
"learning_rate": 7.947809564230445e-08, |
|
"logits/chosen": -0.20483890175819397, |
|
"logits/rejected": 0.11580769717693329, |
|
"logps/chosen": -3.247260570526123, |
|
"logps/rejected": -4.369873046875, |
|
"loss": 1.2124, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.247260570526123, |
|
"rewards/margins": 1.122612714767456, |
|
"rewards/rejected": -4.369873046875, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 91.27768360001403, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -0.2206389605998993, |
|
"logits/rejected": 0.08472562581300735, |
|
"logps/chosen": -3.4089035987854004, |
|
"logps/rejected": -4.553833961486816, |
|
"loss": 1.1347, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.4089035987854004, |
|
"rewards/margins": 1.1449302434921265, |
|
"rewards/rejected": -4.553833961486816, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 76.20144058868142, |
|
"learning_rate": 6.655924144404906e-08, |
|
"logits/chosen": -0.2008267343044281, |
|
"logits/rejected": -0.1611969918012619, |
|
"logps/chosen": -3.3004608154296875, |
|
"logps/rejected": -4.173122406005859, |
|
"loss": 1.1354, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.3004608154296875, |
|
"rewards/margins": 0.8726619482040405, |
|
"rewards/rejected": -4.173122406005859, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 74.59904017410364, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -0.1321212500333786, |
|
"logits/rejected": -0.01653924025595188, |
|
"logps/chosen": -3.2044310569763184, |
|
"logps/rejected": -4.154183864593506, |
|
"loss": 1.1999, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.2044310569763184, |
|
"rewards/margins": 0.9497528076171875, |
|
"rewards/rejected": -4.154183864593506, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 72.89488713559955, |
|
"learning_rate": 5.4623689209832484e-08, |
|
"logits/chosen": -0.2955227494239807, |
|
"logits/rejected": -0.03774283826351166, |
|
"logps/chosen": -3.0728917121887207, |
|
"logps/rejected": -4.051039695739746, |
|
"loss": 1.1963, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0728917121887207, |
|
"rewards/margins": 0.9781482815742493, |
|
"rewards/rejected": -4.051039695739746, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 67.63752264326851, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.45038795471191406, |
|
"logits/rejected": -0.09373664855957031, |
|
"logps/chosen": -3.1700501441955566, |
|
"logps/rejected": -4.2985382080078125, |
|
"loss": 1.1136, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.1700501441955566, |
|
"rewards/margins": 1.1284879446029663, |
|
"rewards/rejected": -4.2985382080078125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 74.45872340513813, |
|
"learning_rate": 4.373541737087263e-08, |
|
"logits/chosen": -0.49037042260169983, |
|
"logits/rejected": -0.09940163046121597, |
|
"logps/chosen": -3.04799222946167, |
|
"logps/rejected": -4.031525135040283, |
|
"loss": 1.1544, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.04799222946167, |
|
"rewards/margins": 0.9835329055786133, |
|
"rewards/rejected": -4.031525135040283, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 78.72742631851618, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -0.32979053258895874, |
|
"logits/rejected": -0.04894006997346878, |
|
"logps/chosen": -3.051708698272705, |
|
"logps/rejected": -4.071520805358887, |
|
"loss": 1.2236, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.051708698272705, |
|
"rewards/margins": 1.0198121070861816, |
|
"rewards/rejected": -4.071520805358887, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 84.34790591849121, |
|
"learning_rate": 3.3952790595787986e-08, |
|
"logits/chosen": -0.38359755277633667, |
|
"logits/rejected": -0.2002539187669754, |
|
"logps/chosen": -3.3085391521453857, |
|
"logps/rejected": -4.132696151733398, |
|
"loss": 1.1961, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.3085391521453857, |
|
"rewards/margins": 0.8241568803787231, |
|
"rewards/rejected": -4.132696151733398, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 73.80761771512901, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -0.3136187195777893, |
|
"logits/rejected": -0.1394844651222229, |
|
"logps/chosen": -3.2027900218963623, |
|
"logps/rejected": -4.078015327453613, |
|
"loss": 1.2319, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.2027900218963623, |
|
"rewards/margins": 0.8752256631851196, |
|
"rewards/rejected": -4.078015327453613, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 55.42101250348971, |
|
"learning_rate": 2.5328246937043525e-08, |
|
"logits/chosen": -0.3292819857597351, |
|
"logits/rejected": 0.0064947158098220825, |
|
"logps/chosen": -3.1560277938842773, |
|
"logps/rejected": -4.08798360824585, |
|
"loss": 1.1358, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.1560277938842773, |
|
"rewards/margins": 0.9319561123847961, |
|
"rewards/rejected": -4.08798360824585, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 67.75077864144087, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -0.36769989132881165, |
|
"logits/rejected": -0.17563530802726746, |
|
"logps/chosen": -3.0870118141174316, |
|
"logps/rejected": -3.987121105194092, |
|
"loss": 1.2481, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.0870118141174316, |
|
"rewards/margins": 0.9001096487045288, |
|
"rewards/rejected": -3.987121105194092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 73.97322762411653, |
|
"learning_rate": 1.7908016745981856e-08, |
|
"logits/chosen": -0.3043641448020935, |
|
"logits/rejected": -0.044486187398433685, |
|
"logps/chosen": -3.249401569366455, |
|
"logps/rejected": -4.1566572189331055, |
|
"loss": 1.1883, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.249401569366455, |
|
"rewards/margins": 0.9072558283805847, |
|
"rewards/rejected": -4.1566572189331055, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 62.93268497369251, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -0.3604608178138733, |
|
"logits/rejected": -0.05810839682817459, |
|
"logps/chosen": -3.14021635055542, |
|
"logps/rejected": -4.223752975463867, |
|
"loss": 1.1337, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.14021635055542, |
|
"rewards/margins": 1.0835367441177368, |
|
"rewards/rejected": -4.223752975463867, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 72.41742830338748, |
|
"learning_rate": 1.1731874863145142e-08, |
|
"logits/chosen": -0.3878116011619568, |
|
"logits/rejected": -0.25898757576942444, |
|
"logps/chosen": -3.085711717605591, |
|
"logps/rejected": -4.0059380531311035, |
|
"loss": 1.1752, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.085711717605591, |
|
"rewards/margins": 0.9202262163162231, |
|
"rewards/rejected": -4.0059380531311035, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 67.55543823572617, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -0.26049160957336426, |
|
"logits/rejected": -0.0604906901717186, |
|
"logps/chosen": -3.136883497238159, |
|
"logps/rejected": -4.199965000152588, |
|
"loss": 1.1765, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.136883497238159, |
|
"rewards/margins": 1.0630815029144287, |
|
"rewards/rejected": -4.199965000152588, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 68.54556826267816, |
|
"learning_rate": 6.832927412229017e-09, |
|
"logits/chosen": -0.3159419894218445, |
|
"logits/rejected": -0.0612739734351635, |
|
"logps/chosen": -3.3173446655273438, |
|
"logps/rejected": -4.260361194610596, |
|
"loss": 1.1964, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.3173446655273438, |
|
"rewards/margins": 0.9430168271064758, |
|
"rewards/rejected": -4.260361194610596, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 67.1017202252172, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.2727358341217041, |
|
"logits/rejected": -0.0900549367070198, |
|
"logps/chosen": -3.1065354347229004, |
|
"logps/rejected": -4.069243431091309, |
|
"loss": 1.1456, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.1065354347229004, |
|
"rewards/margins": 0.9627076983451843, |
|
"rewards/rejected": -4.069243431091309, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 74.36090301118229, |
|
"learning_rate": 3.2374343405217884e-09, |
|
"logits/chosen": -0.2751519978046417, |
|
"logits/rejected": 0.001943744719028473, |
|
"logps/chosen": -3.195307493209839, |
|
"logps/rejected": -4.041089057922363, |
|
"loss": 1.2193, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.195307493209839, |
|
"rewards/margins": 0.8457815051078796, |
|
"rewards/rejected": -4.041089057922363, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 63.718344779873874, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -0.1701783835887909, |
|
"logits/rejected": 0.05854835361242294, |
|
"logps/chosen": -3.1413817405700684, |
|
"logps/rejected": -4.193324565887451, |
|
"loss": 1.1655, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.1413817405700684, |
|
"rewards/margins": 1.0519429445266724, |
|
"rewards/rejected": -4.193324565887451, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 66.29809600281665, |
|
"learning_rate": 9.64668657069706e-10, |
|
"logits/chosen": -0.3206351101398468, |
|
"logits/rejected": 0.049534112215042114, |
|
"logps/chosen": -3.1361141204833984, |
|
"logps/rejected": -4.281296730041504, |
|
"loss": 1.1251, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1361141204833984, |
|
"rewards/margins": 1.1451822519302368, |
|
"rewards/rejected": -4.281296730041504, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 74.93911331685558, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -0.34328070282936096, |
|
"logits/rejected": -0.11956997960805893, |
|
"logps/chosen": -3.241725206375122, |
|
"logps/rejected": -4.065664291381836, |
|
"loss": 1.2111, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.241725206375122, |
|
"rewards/margins": 0.8239390254020691, |
|
"rewards/rejected": -4.065664291381836, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 69.55111369299213, |
|
"learning_rate": 2.6813123097352287e-11, |
|
"logits/chosen": -0.4041665494441986, |
|
"logits/rejected": 0.02163061499595642, |
|
"logps/chosen": -3.192171573638916, |
|
"logps/rejected": -4.112965106964111, |
|
"loss": 1.2172, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.192171573638916, |
|
"rewards/margins": 0.9207934141159058, |
|
"rewards/rejected": -4.112965106964111, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 1.3340201452843048, |
|
"train_runtime": 7646.4301, |
|
"train_samples_per_second": 7.995, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|