|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 2.336021900177002, |
|
"learning_rate": 1.199040767386091e-10, |
|
"logits/chosen": -1.3860063552856445, |
|
"logits/rejected": -1.3949532508850098, |
|
"logps/chosen": -34.621925354003906, |
|
"logps/rejected": -37.30891418457031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 2.7931599617004395, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -1.546767234802246, |
|
"logits/rejected": -1.5282517671585083, |
|
"logps/chosen": -42.52494812011719, |
|
"logps/rejected": -44.546756744384766, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3680555522441864, |
|
"rewards/chosen": -0.00010908626427408308, |
|
"rewards/margins": -0.00013866486551705748, |
|
"rewards/rejected": 2.95786012429744e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 2.9333579540252686, |
|
"learning_rate": 2.398081534772182e-09, |
|
"logits/chosen": -1.5552335977554321, |
|
"logits/rejected": -1.5412750244140625, |
|
"logps/chosen": -44.075599670410156, |
|
"logps/rejected": -46.59809112548828, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 9.86563172773458e-05, |
|
"rewards/margins": 0.00012048264034092426, |
|
"rewards/rejected": -2.182633033953607e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 3.4909462928771973, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -1.511649250984192, |
|
"logits/rejected": -1.5045579671859741, |
|
"logps/chosen": -47.84404754638672, |
|
"logps/rejected": -50.805335998535156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 4.7573317715432495e-05, |
|
"rewards/margins": 0.00018799836107064039, |
|
"rewards/rejected": -0.0001404250506311655, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 2.575885772705078, |
|
"learning_rate": 4.796163069544364e-09, |
|
"logits/chosen": -1.5584311485290527, |
|
"logits/rejected": -1.5544531345367432, |
|
"logps/chosen": -43.06354522705078, |
|
"logps/rejected": -45.552642822265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0001178598977276124, |
|
"rewards/margins": -6.472436507465318e-05, |
|
"rewards/rejected": -5.313555084285326e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 2.653576374053955, |
|
"learning_rate": 5.995203836930456e-09, |
|
"logits/chosen": -1.469327688217163, |
|
"logits/rejected": -1.4684107303619385, |
|
"logps/chosen": -42.99556350708008, |
|
"logps/rejected": -44.81259536743164, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00011682316107908264, |
|
"rewards/margins": -2.396003037574701e-05, |
|
"rewards/rejected": 0.0001407831732649356, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 3.936546564102173, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -1.567378044128418, |
|
"logits/rejected": -1.5606577396392822, |
|
"logps/chosen": -50.69051742553711, |
|
"logps/rejected": -52.026954650878906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -3.9090933569241315e-05, |
|
"rewards/margins": 1.4172494047670625e-05, |
|
"rewards/rejected": -5.326343307388015e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 2.3031318187713623, |
|
"learning_rate": 8.393285371702639e-09, |
|
"logits/chosen": -1.5360815525054932, |
|
"logits/rejected": -1.5282552242279053, |
|
"logps/chosen": -50.07221221923828, |
|
"logps/rejected": -52.78315353393555, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -4.2442261474207044e-05, |
|
"rewards/margins": 4.4568816520040855e-05, |
|
"rewards/rejected": -8.701106708031148e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 3.459690570831299, |
|
"learning_rate": 9.592326139088728e-09, |
|
"logits/chosen": -1.5700651407241821, |
|
"logits/rejected": -1.5622011423110962, |
|
"logps/chosen": -51.09720230102539, |
|
"logps/rejected": -52.700775146484375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00014371283759828657, |
|
"rewards/margins": 0.0003250584995839745, |
|
"rewards/rejected": -0.00018134564743377268, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 2.787858009338379, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -1.5036344528198242, |
|
"logits/rejected": -1.5007749795913696, |
|
"logps/chosen": -49.02019500732422, |
|
"logps/rejected": -51.165855407714844, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -3.3910677302628756e-05, |
|
"rewards/margins": -8.141305443132296e-05, |
|
"rewards/rejected": 4.750239895656705e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 2.978163957595825, |
|
"learning_rate": 1.1990407673860912e-08, |
|
"logits/chosen": -1.5843650102615356, |
|
"logits/rejected": -1.5736749172210693, |
|
"logps/chosen": -45.69003677368164, |
|
"logps/rejected": -48.75359344482422, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 5.446890099847224e-06, |
|
"rewards/margins": -4.437283132574521e-05, |
|
"rewards/rejected": 4.981973324902356e-05, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 2.3196158409118652, |
|
"learning_rate": 1.3189448441247003e-08, |
|
"logits/chosen": -1.4547462463378906, |
|
"logits/rejected": -1.4310463666915894, |
|
"logps/chosen": -48.958641052246094, |
|
"logps/rejected": -51.16803741455078, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.00026367080863565207, |
|
"rewards/margins": -0.0002573660749476403, |
|
"rewards/rejected": -6.304704584181309e-06, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 2.3112876415252686, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -1.4834020137786865, |
|
"logits/rejected": -1.480193853378296, |
|
"logps/chosen": -44.25951385498047, |
|
"logps/rejected": -46.622459411621094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -6.177094473969191e-05, |
|
"rewards/margins": 0.00016041506023611873, |
|
"rewards/rejected": -0.00022218600497581065, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 3.208045482635498, |
|
"learning_rate": 1.5587529976019183e-08, |
|
"logits/chosen": -1.5762228965759277, |
|
"logits/rejected": -1.570908784866333, |
|
"logps/chosen": -49.414695739746094, |
|
"logps/rejected": -51.2851448059082, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -8.6869033111725e-05, |
|
"rewards/margins": -0.0002423443365842104, |
|
"rewards/rejected": 0.000155475310748443, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 2.822239875793457, |
|
"learning_rate": 1.6786570743405277e-08, |
|
"logits/chosen": -1.4522497653961182, |
|
"logits/rejected": -1.4427827596664429, |
|
"logps/chosen": -45.83789825439453, |
|
"logps/rejected": -50.24224090576172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 8.89547445694916e-05, |
|
"rewards/margins": 4.058218473801389e-05, |
|
"rewards/rejected": 4.837256346945651e-05, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 3.587710380554199, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -1.4713795185089111, |
|
"logits/rejected": -1.4646179676055908, |
|
"logps/chosen": -48.30494689941406, |
|
"logps/rejected": -51.4416389465332, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -4.7554804041283205e-05, |
|
"rewards/margins": 4.275305400369689e-05, |
|
"rewards/rejected": -9.030787623487413e-05, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 2.4577479362487793, |
|
"learning_rate": 1.9184652278177456e-08, |
|
"logits/chosen": -1.5048809051513672, |
|
"logits/rejected": -1.4864928722381592, |
|
"logps/chosen": -41.26644515991211, |
|
"logps/rejected": -44.58869171142578, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00011363021621946245, |
|
"rewards/margins": -0.00019988985150121152, |
|
"rewards/rejected": 8.625965710962191e-05, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 3.1715707778930664, |
|
"learning_rate": 2.038369304556355e-08, |
|
"logits/chosen": -1.5167441368103027, |
|
"logits/rejected": -1.4980051517486572, |
|
"logps/chosen": -44.847110748291016, |
|
"logps/rejected": -46.87194061279297, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 6.15198805462569e-05, |
|
"rewards/margins": 4.4902008085045964e-05, |
|
"rewards/rejected": 1.661786882323213e-05, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 2.6304078102111816, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -1.5800201892852783, |
|
"logits/rejected": -1.5682014226913452, |
|
"logps/chosen": -45.10023880004883, |
|
"logps/rejected": -46.83604049682617, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -8.165388862835243e-05, |
|
"rewards/margins": -0.00017628518980927765, |
|
"rewards/rejected": 9.46312939049676e-05, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 2.9077770709991455, |
|
"learning_rate": 2.278177458033573e-08, |
|
"logits/chosen": -1.5893758535385132, |
|
"logits/rejected": -1.5856701135635376, |
|
"logps/chosen": -42.25619125366211, |
|
"logps/rejected": -45.408897399902344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 5.6146480346797034e-05, |
|
"rewards/margins": 9.998455789173022e-05, |
|
"rewards/rejected": -4.3838103010784835e-05, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 3.549778699874878, |
|
"learning_rate": 2.3980815347721823e-08, |
|
"logits/chosen": -1.5360238552093506, |
|
"logits/rejected": -1.5294934511184692, |
|
"logps/chosen": -43.458560943603516, |
|
"logps/rejected": -47.107208251953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00025255040964111686, |
|
"rewards/margins": 0.00011713804269675165, |
|
"rewards/rejected": 0.00013541239604819566, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 3.0489232540130615, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -1.5652011632919312, |
|
"logits/rejected": -1.55386221408844, |
|
"logps/chosen": -43.03472137451172, |
|
"logps/rejected": -43.407081604003906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 2.3424910978064872e-05, |
|
"rewards/margins": 8.2639220636338e-05, |
|
"rewards/rejected": -5.9214326029177755e-05, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 2.841609001159668, |
|
"learning_rate": 2.6378896882494006e-08, |
|
"logits/chosen": -1.480421781539917, |
|
"logits/rejected": -1.4737306833267212, |
|
"logps/chosen": -47.355350494384766, |
|
"logps/rejected": -52.514892578125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 1.830189285101369e-05, |
|
"rewards/margins": -0.00019653179333545268, |
|
"rewards/rejected": 0.00021483367891050875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 2.58459734916687, |
|
"learning_rate": 2.7577937649880097e-08, |
|
"logits/chosen": -1.531930685043335, |
|
"logits/rejected": -1.530457615852356, |
|
"logps/chosen": -44.46145248413086, |
|
"logps/rejected": -48.232242584228516, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 8.398554928135127e-05, |
|
"rewards/margins": -8.254259591922164e-05, |
|
"rewards/rejected": 0.00016652815975248814, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 3.012089252471924, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -1.579641342163086, |
|
"logits/rejected": -1.5689789056777954, |
|
"logps/chosen": -49.132789611816406, |
|
"logps/rejected": -51.18694305419922, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 9.1132904344704e-05, |
|
"rewards/margins": -7.982960960362107e-05, |
|
"rewards/rejected": 0.00017096252122428268, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 4.0211029052734375, |
|
"learning_rate": 2.997601918465228e-08, |
|
"logits/chosen": -1.4552438259124756, |
|
"logits/rejected": -1.4445832967758179, |
|
"logps/chosen": -49.68731689453125, |
|
"logps/rejected": -50.77144241333008, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00020649081852752715, |
|
"rewards/margins": 0.0003089832025580108, |
|
"rewards/rejected": -0.00010249239858239889, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 3.18790864944458, |
|
"learning_rate": 3.1175059952038366e-08, |
|
"logits/chosen": -1.4960715770721436, |
|
"logits/rejected": -1.490350365638733, |
|
"logps/chosen": -50.00872802734375, |
|
"logps/rejected": -51.44108200073242, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 4.101751983398572e-06, |
|
"rewards/margins": -8.562284347135574e-05, |
|
"rewards/rejected": 8.972459909273311e-05, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 3.316842794418335, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -1.5841925144195557, |
|
"logits/rejected": -1.5689890384674072, |
|
"logps/chosen": -46.749019622802734, |
|
"logps/rejected": -49.31301498413086, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00019280468404758722, |
|
"rewards/margins": 8.136655378621072e-05, |
|
"rewards/rejected": 0.00011143812298541889, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 2.9199252128601074, |
|
"learning_rate": 3.3573141486810555e-08, |
|
"logits/chosen": -1.5495785474777222, |
|
"logits/rejected": -1.5337769985198975, |
|
"logps/chosen": -42.458438873291016, |
|
"logps/rejected": -43.72154235839844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00012140088074374944, |
|
"rewards/margins": 0.00014567100151907653, |
|
"rewards/rejected": -2.4270115318358876e-05, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 2.7867319583892822, |
|
"learning_rate": 3.477218225419664e-08, |
|
"logits/chosen": -1.5798470973968506, |
|
"logits/rejected": -1.5693657398223877, |
|
"logps/chosen": -44.48418426513672, |
|
"logps/rejected": -45.79877471923828, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0002844139817170799, |
|
"rewards/margins": 0.0004200335533823818, |
|
"rewards/rejected": -0.0001356196153210476, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 3.150078058242798, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -1.556154489517212, |
|
"logits/rejected": -1.539389967918396, |
|
"logps/chosen": -48.21941375732422, |
|
"logps/rejected": -50.685508728027344, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0001608836610103026, |
|
"rewards/margins": -0.0002904186840169132, |
|
"rewards/rejected": 0.00012953505211044103, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 3.015779972076416, |
|
"learning_rate": 3.717026378896883e-08, |
|
"logits/chosen": -1.4409435987472534, |
|
"logits/rejected": -1.4314186573028564, |
|
"logps/chosen": -48.024295806884766, |
|
"logps/rejected": -50.0726432800293, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00016624320414848626, |
|
"rewards/margins": 0.000264329748461023, |
|
"rewards/rejected": -9.808655886445194e-05, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 2.547288417816162, |
|
"learning_rate": 3.836930455635491e-08, |
|
"logits/chosen": -1.553330659866333, |
|
"logits/rejected": -1.5384843349456787, |
|
"logps/chosen": -47.11131286621094, |
|
"logps/rejected": -51.53942108154297, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 3.6099205317441374e-05, |
|
"rewards/margins": -0.00022887022350914776, |
|
"rewards/rejected": 0.0002649694215506315, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 2.309831380844116, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -1.5248470306396484, |
|
"logits/rejected": -1.5183141231536865, |
|
"logps/chosen": -50.730499267578125, |
|
"logps/rejected": -49.556068420410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00026169692864641547, |
|
"rewards/margins": 0.0001796955766621977, |
|
"rewards/rejected": 8.200139563996345e-05, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 2.6688296794891357, |
|
"learning_rate": 4.07673860911271e-08, |
|
"logits/chosen": -1.5785892009735107, |
|
"logits/rejected": -1.5679844617843628, |
|
"logps/chosen": -51.07538604736328, |
|
"logps/rejected": -52.128135681152344, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.00021654777810908854, |
|
"rewards/margins": 0.0004967943532392383, |
|
"rewards/rejected": -0.00028024654602631927, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 3.666226387023926, |
|
"learning_rate": 4.1966426858513185e-08, |
|
"logits/chosen": -1.5095466375350952, |
|
"logits/rejected": -1.5047543048858643, |
|
"logps/chosen": -45.6449089050293, |
|
"logps/rejected": -48.59443664550781, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00023484873236157, |
|
"rewards/margins": 0.0001965599658433348, |
|
"rewards/rejected": 3.82887119485531e-05, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 3.478895664215088, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -1.5015857219696045, |
|
"logits/rejected": -1.4911534786224365, |
|
"logps/chosen": -53.9161376953125, |
|
"logps/rejected": -56.55745315551758, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0001681277499301359, |
|
"rewards/margins": 0.00018387913587503135, |
|
"rewards/rejected": -1.575138230691664e-05, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 3.750058650970459, |
|
"learning_rate": 4.4364508393285374e-08, |
|
"logits/chosen": -1.4821351766586304, |
|
"logits/rejected": -1.480381727218628, |
|
"logps/chosen": -48.33544158935547, |
|
"logps/rejected": -53.026039123535156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 8.031638571992517e-05, |
|
"rewards/margins": 0.00012994577991776168, |
|
"rewards/rejected": -4.96293832839001e-05, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 2.4797353744506836, |
|
"learning_rate": 4.556354916067146e-08, |
|
"logits/chosen": -1.5723177194595337, |
|
"logits/rejected": -1.565124750137329, |
|
"logps/chosen": -47.00983428955078, |
|
"logps/rejected": -48.0451774597168, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -9.197367762681097e-05, |
|
"rewards/margins": -0.00010503224621061236, |
|
"rewards/rejected": 1.3058568583801389e-05, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 2.919807195663452, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -1.544861078262329, |
|
"logits/rejected": -1.5350227355957031, |
|
"logps/chosen": -48.17707824707031, |
|
"logps/rejected": -51.51404571533203, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.00019117488409392536, |
|
"rewards/margins": 0.00030245125526562333, |
|
"rewards/rejected": -0.0001112763857236132, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 3.4181175231933594, |
|
"learning_rate": 4.796163069544365e-08, |
|
"logits/chosen": -1.5584533214569092, |
|
"logits/rejected": -1.553442120552063, |
|
"logps/chosen": -44.867557525634766, |
|
"logps/rejected": -46.099464416503906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0003581286873668432, |
|
"rewards/margins": 5.371281804400496e-05, |
|
"rewards/rejected": 0.0003044158511329442, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 3.621582269668579, |
|
"learning_rate": 4.916067146282973e-08, |
|
"logits/chosen": -1.5090839862823486, |
|
"logits/rejected": -1.5060720443725586, |
|
"logps/chosen": -47.50264358520508, |
|
"logps/rejected": -49.63032531738281, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00016462437633890659, |
|
"rewards/margins": 0.00023242369934450835, |
|
"rewards/rejected": -6.779931572964415e-05, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 2.4397823810577393, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -1.4693224430084229, |
|
"logits/rejected": -1.4796779155731201, |
|
"logps/chosen": -45.638816833496094, |
|
"logps/rejected": -48.969810485839844, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.000272547040367499, |
|
"rewards/margins": 0.0002472659107297659, |
|
"rewards/rejected": 2.5281125999754295e-05, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 2.2847747802734375, |
|
"learning_rate": 4.999851500573209e-08, |
|
"logits/chosen": -1.497286081314087, |
|
"logits/rejected": -1.4975759983062744, |
|
"logps/chosen": -46.069549560546875, |
|
"logps/rejected": -46.195926666259766, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00028530642157420516, |
|
"rewards/margins": 5.241289545665495e-05, |
|
"rewards/rejected": 0.00023289353703148663, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 2.459535598754883, |
|
"learning_rate": 4.999535180235972e-08, |
|
"logits/chosen": -1.4981845617294312, |
|
"logits/rejected": -1.4901264905929565, |
|
"logps/chosen": -46.00641632080078, |
|
"logps/rejected": -49.42933654785156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00040736678056418896, |
|
"rewards/margins": 0.00017191791266668588, |
|
"rewards/rejected": 0.00023544885334558785, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 3.167510747909546, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -1.5115673542022705, |
|
"logits/rejected": -1.4903723001480103, |
|
"logps/chosen": -53.19926071166992, |
|
"logps/rejected": -51.2760124206543, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00023766886442899704, |
|
"rewards/margins": 7.205204019555822e-05, |
|
"rewards/rejected": 0.00016561683150939643, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 3.773123264312744, |
|
"learning_rate": 4.9983754531428326e-08, |
|
"logits/chosen": -1.5162955522537231, |
|
"logits/rejected": -1.4986896514892578, |
|
"logps/chosen": -53.790428161621094, |
|
"logps/rejected": -55.66992950439453, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004971676971763372, |
|
"rewards/margins": 0.0004848612588830292, |
|
"rewards/rejected": 1.2306480130064301e-05, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 3.7350404262542725, |
|
"learning_rate": 4.997532127910954e-08, |
|
"logits/chosen": -1.5784637928009033, |
|
"logits/rejected": -1.5491013526916504, |
|
"logps/chosen": -52.63544464111328, |
|
"logps/rejected": -53.17797088623047, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00048227087245322764, |
|
"rewards/margins": 0.00017049835878424346, |
|
"rewards/rejected": 0.00031177254277281463, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 3.6733524799346924, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -1.6473217010498047, |
|
"logits/rejected": -1.633281946182251, |
|
"logps/chosen": -42.51477813720703, |
|
"logps/rejected": -45.43607711791992, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.00038208425394259393, |
|
"rewards/margins": 0.000435996160376817, |
|
"rewards/rejected": -5.39118773303926e-05, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 4.256612777709961, |
|
"learning_rate": 4.9953188504838225e-08, |
|
"logits/chosen": -1.5243208408355713, |
|
"logits/rejected": -1.5126068592071533, |
|
"logps/chosen": -46.43769836425781, |
|
"logps/rejected": -49.511756896972656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0005211577517911792, |
|
"rewards/margins": 0.00022547971457242966, |
|
"rewards/rejected": 0.0002956780372187495, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 2.8351917266845703, |
|
"learning_rate": 4.993949053872834e-08, |
|
"logits/chosen": -1.5284124612808228, |
|
"logits/rejected": -1.5051486492156982, |
|
"logps/chosen": -42.646305084228516, |
|
"logps/rejected": -45.89906311035156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0006723391707055271, |
|
"rewards/margins": 0.0005468688905239105, |
|
"rewards/rejected": 0.00012547028018161654, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 2.8914971351623535, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -1.6392762660980225, |
|
"logits/rejected": -1.6216766834259033, |
|
"logps/chosen": -46.00267791748047, |
|
"logps/rejected": -47.987403869628906, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0007155792554840446, |
|
"rewards/margins": 0.0006390741327777505, |
|
"rewards/rejected": 7.650510087842122e-05, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 3.5407791137695312, |
|
"learning_rate": 4.990683626304467e-08, |
|
"logits/chosen": -1.534824013710022, |
|
"logits/rejected": -1.5296251773834229, |
|
"logps/chosen": -53.9051399230957, |
|
"logps/rejected": -56.04487991333008, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.000572909542825073, |
|
"rewards/margins": 0.00040510791586712003, |
|
"rewards/rejected": 0.0001678016851656139, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 3.2243497371673584, |
|
"learning_rate": 4.9887882248931646e-08, |
|
"logits/chosen": -1.458703637123108, |
|
"logits/rejected": -1.4379961490631104, |
|
"logps/chosen": -46.455928802490234, |
|
"logps/rejected": -47.554115295410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0005672247498296201, |
|
"rewards/margins": 0.00012306116695981473, |
|
"rewards/rejected": 0.00044416356831789017, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 3.160867929458618, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -1.485264778137207, |
|
"logits/rejected": -1.4642503261566162, |
|
"logps/chosen": -45.888916015625, |
|
"logps/rejected": -47.80854797363281, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0008015796774998307, |
|
"rewards/margins": 0.0005912004271522164, |
|
"rewards/rejected": 0.0002103792503476143, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 3.078800916671753, |
|
"learning_rate": 4.984472713001416e-08, |
|
"logits/chosen": -1.4299088716506958, |
|
"logits/rejected": -1.4215964078903198, |
|
"logps/chosen": -48.37324905395508, |
|
"logps/rejected": -48.35413360595703, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0007138814544305205, |
|
"rewards/margins": 0.0003910651430487633, |
|
"rewards/rejected": 0.0003228162822779268, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 3.2446839809417725, |
|
"learning_rate": 4.982052905883637e-08, |
|
"logits/chosen": -1.5735007524490356, |
|
"logits/rejected": -1.563230276107788, |
|
"logps/chosen": -48.497169494628906, |
|
"logps/rejected": -49.916038513183594, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0005788643029518425, |
|
"rewards/margins": 0.00021954579278826714, |
|
"rewards/rejected": 0.0003593183937482536, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 2.9157779216766357, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -1.5526814460754395, |
|
"logits/rejected": -1.523316740989685, |
|
"logps/chosen": -52.486785888671875, |
|
"logps/rejected": -54.491912841796875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0009498519939370453, |
|
"rewards/margins": 0.000704078353010118, |
|
"rewards/rejected": 0.00024577361182309687, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 3.174314022064209, |
|
"learning_rate": 4.976690039804555e-08, |
|
"logits/chosen": -1.5769068002700806, |
|
"logits/rejected": -1.5633710622787476, |
|
"logps/chosen": -42.617549896240234, |
|
"logps/rejected": -44.070777893066406, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0007929207058623433, |
|
"rewards/margins": 0.0005030486499890685, |
|
"rewards/rejected": 0.0002898719976656139, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 2.7369155883789062, |
|
"learning_rate": 4.973747357830592e-08, |
|
"logits/chosen": -1.5272729396820068, |
|
"logits/rejected": -1.5258095264434814, |
|
"logps/chosen": -47.52259063720703, |
|
"logps/rejected": -53.1329345703125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0009988851379603148, |
|
"rewards/margins": 0.0007000741316005588, |
|
"rewards/rejected": 0.00029881083173677325, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 2.792400360107422, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -1.6299186944961548, |
|
"logits/rejected": -1.6194143295288086, |
|
"logps/chosen": -45.39707565307617, |
|
"logps/rejected": -49.081199645996094, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.001002423232421279, |
|
"rewards/margins": 0.0009681530063971877, |
|
"rewards/rejected": 3.427025876590051e-05, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 3.9282000064849854, |
|
"learning_rate": 4.967340530291242e-08, |
|
"logits/chosen": -1.5342658758163452, |
|
"logits/rejected": -1.5172860622406006, |
|
"logps/chosen": -50.43634796142578, |
|
"logps/rejected": -51.06001281738281, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0008850853191688657, |
|
"rewards/margins": 0.00033199749304912984, |
|
"rewards/rejected": 0.0005530878552235663, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 2.692594289779663, |
|
"learning_rate": 4.9638768350993755e-08, |
|
"logits/chosen": -1.5681965351104736, |
|
"logits/rejected": -1.5538604259490967, |
|
"logps/chosen": -42.41255569458008, |
|
"logps/rejected": -44.433387756347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0007358186412602663, |
|
"rewards/margins": 9.383581345900893e-05, |
|
"rewards/rejected": 0.0006419828278012574, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 2.3626010417938232, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -1.571014642715454, |
|
"logits/rejected": -1.5637868642807007, |
|
"logps/chosen": -43.03638458251953, |
|
"logps/rejected": -46.532352447509766, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0012007402256131172, |
|
"rewards/margins": 0.000781078590080142, |
|
"rewards/rejected": 0.00041966172284446657, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 2.919173002243042, |
|
"learning_rate": 4.9564300992714914e-08, |
|
"logits/chosen": -1.4291613101959229, |
|
"logits/rejected": -1.4245188236236572, |
|
"logps/chosen": -45.36858367919922, |
|
"logps/rejected": -48.030757904052734, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001236187876202166, |
|
"rewards/margins": 0.0011594726238399744, |
|
"rewards/rejected": 7.671533967368305e-05, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 3.464174747467041, |
|
"learning_rate": 4.952447582110253e-08, |
|
"logits/chosen": -1.6131465435028076, |
|
"logits/rejected": -1.584380865097046, |
|
"logps/chosen": -45.44336700439453, |
|
"logps/rejected": -45.41891860961914, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0014582508010789752, |
|
"rewards/margins": 0.0009963499614968896, |
|
"rewards/rejected": 0.0004619006940629333, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 3.4227089881896973, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -1.5425691604614258, |
|
"logits/rejected": -1.5405323505401611, |
|
"logps/chosen": -47.156578063964844, |
|
"logps/rejected": -50.03830337524414, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0010936754988506436, |
|
"rewards/margins": 0.0011404131073504686, |
|
"rewards/rejected": -4.6737539378227666e-05, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 3.4312925338745117, |
|
"learning_rate": 4.943965649046064e-08, |
|
"logits/chosen": -1.5018900632858276, |
|
"logits/rejected": -1.4744553565979004, |
|
"logps/chosen": -49.83372116088867, |
|
"logps/rejected": -51.13875198364258, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0009614540031179786, |
|
"rewards/margins": 0.0003561762277968228, |
|
"rewards/rejected": 0.0006052777171134949, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 4.607568740844727, |
|
"learning_rate": 4.9394668293879835e-08, |
|
"logits/chosen": -1.4445058107376099, |
|
"logits/rejected": -1.431208848953247, |
|
"logps/chosen": -49.71855926513672, |
|
"logps/rejected": -49.596534729003906, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0016611143946647644, |
|
"rewards/margins": 0.001147769158706069, |
|
"rewards/rejected": 0.0005133452359586954, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 3.329970598220825, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -1.5312420129776, |
|
"logits/rejected": -1.5205342769622803, |
|
"logps/chosen": -47.82133102416992, |
|
"logps/rejected": -50.619808197021484, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0012991353869438171, |
|
"rewards/margins": 0.000877738930284977, |
|
"rewards/rejected": 0.00042139639845117927, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 2.7837576866149902, |
|
"learning_rate": 4.929955065039848e-08, |
|
"logits/chosen": -1.54481840133667, |
|
"logits/rejected": -1.5314353704452515, |
|
"logps/chosen": -46.46809005737305, |
|
"logps/rejected": -49.25910949707031, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0013074753805994987, |
|
"rewards/margins": 0.0009138353052549064, |
|
"rewards/rejected": 0.00039364007534459233, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 2.817760705947876, |
|
"learning_rate": 4.92494278898755e-08, |
|
"logits/chosen": -1.5252554416656494, |
|
"logits/rejected": -1.509037733078003, |
|
"logps/chosen": -41.35044860839844, |
|
"logps/rejected": -43.40355682373047, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0012825509766116738, |
|
"rewards/margins": 0.000999027630314231, |
|
"rewards/rejected": 0.00028352331719361246, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 3.3338074684143066, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -1.4963575601577759, |
|
"logits/rejected": -1.4812816381454468, |
|
"logps/chosen": -49.3184700012207, |
|
"logps/rejected": -52.44392013549805, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0011441984679549932, |
|
"rewards/margins": 0.000659407174680382, |
|
"rewards/rejected": 0.00048479135148227215, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 2.8599021434783936, |
|
"learning_rate": 4.9144072108132725e-08, |
|
"logits/chosen": -1.5103414058685303, |
|
"logits/rejected": -1.4909043312072754, |
|
"logps/chosen": -48.92387771606445, |
|
"logps/rejected": -51.066978454589844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0015517466235905886, |
|
"rewards/margins": 0.0009926140774041414, |
|
"rewards/rejected": 0.0005591326043941081, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 2.899857521057129, |
|
"learning_rate": 4.908884649298937e-08, |
|
"logits/chosen": -1.5037367343902588, |
|
"logits/rejected": -1.5001060962677002, |
|
"logps/chosen": -46.71796417236328, |
|
"logps/rejected": -46.27198791503906, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0012326440773904324, |
|
"rewards/margins": 0.0005613928660750389, |
|
"rewards/rejected": 0.0006712513277307153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 2.8896918296813965, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -1.5269062519073486, |
|
"logits/rejected": -1.510358214378357, |
|
"logps/chosen": -48.91157150268555, |
|
"logps/rejected": -50.0924072265625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.001482138060964644, |
|
"rewards/margins": 0.001297360984608531, |
|
"rewards/rejected": 0.0001847770472522825, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 3.3254764080047607, |
|
"learning_rate": 4.897331922454931e-08, |
|
"logits/chosen": -1.4530668258666992, |
|
"logits/rejected": -1.451474666595459, |
|
"logps/chosen": -45.5156135559082, |
|
"logps/rejected": -48.615623474121094, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.001580315874889493, |
|
"rewards/margins": 0.001087161828763783, |
|
"rewards/rejected": 0.0004931538132950664, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 3.2673330307006836, |
|
"learning_rate": 4.891302569234256e-08, |
|
"logits/chosen": -1.47372305393219, |
|
"logits/rejected": -1.4675675630569458, |
|
"logps/chosen": -43.22402572631836, |
|
"logps/rejected": -45.916168212890625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0018752291798591614, |
|
"rewards/margins": 0.0017130204942077398, |
|
"rewards/rejected": 0.00016220868565142155, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 2.8122148513793945, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -1.49901282787323, |
|
"logits/rejected": -1.4895904064178467, |
|
"logps/chosen": -48.40452575683594, |
|
"logps/rejected": -50.17125701904297, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0016933809965848923, |
|
"rewards/margins": 0.0010203216224908829, |
|
"rewards/rejected": 0.0006730594905093312, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 2.8876476287841797, |
|
"learning_rate": 4.87874000235894e-08, |
|
"logits/chosen": -1.5472230911254883, |
|
"logits/rejected": -1.537274956703186, |
|
"logps/chosen": -49.910091400146484, |
|
"logps/rejected": -53.432830810546875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001698293723165989, |
|
"rewards/margins": 0.0011685159988701344, |
|
"rewards/rejected": 0.00052977807354182, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 3.3339130878448486, |
|
"learning_rate": 4.872207671800876e-08, |
|
"logits/chosen": -1.5249817371368408, |
|
"logits/rejected": -1.5136332511901855, |
|
"logps/chosen": -46.88755416870117, |
|
"logps/rejected": -47.896827697753906, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0017408020794391632, |
|
"rewards/margins": 0.0012650018325075507, |
|
"rewards/rejected": 0.00047580021782778203, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 2.6908926963806152, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -1.5177758932113647, |
|
"logits/rejected": -1.5210834741592407, |
|
"logps/chosen": -44.743553161621094, |
|
"logps/rejected": -47.87196731567383, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.001408976735547185, |
|
"rewards/margins": 0.0010177220683544874, |
|
"rewards/rejected": 0.00039125472540035844, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 2.710764169692993, |
|
"learning_rate": 4.858643212390985e-08, |
|
"logits/chosen": -1.5524133443832397, |
|
"logits/rejected": -1.5305286645889282, |
|
"logps/chosen": -46.923805236816406, |
|
"logps/rejected": -47.56487274169922, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0020391198340803385, |
|
"rewards/margins": 0.001677272142842412, |
|
"rewards/rejected": 0.00036184763303026557, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 2.6352500915527344, |
|
"learning_rate": 4.851612037064643e-08, |
|
"logits/chosen": -1.5101532936096191, |
|
"logits/rejected": -1.5033133029937744, |
|
"logps/chosen": -41.79671859741211, |
|
"logps/rejected": -44.642845153808594, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0022669571917504072, |
|
"rewards/margins": 0.0015860407147556543, |
|
"rewards/rejected": 0.0006809166516177356, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 2.259105682373047, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -1.5194116830825806, |
|
"logits/rejected": -1.5120502710342407, |
|
"logps/chosen": -47.2054328918457, |
|
"logps/rejected": -47.48673629760742, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0022509084083139896, |
|
"rewards/margins": 0.0016614611959084868, |
|
"rewards/rejected": 0.0005894473288208246, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 3.8317296504974365, |
|
"learning_rate": 4.8370542664473805e-08, |
|
"logits/chosen": -1.5284236669540405, |
|
"logits/rejected": -1.517364263534546, |
|
"logps/chosen": -47.17852783203125, |
|
"logps/rejected": -50.44862747192383, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.002234196290373802, |
|
"rewards/margins": 0.001899405149742961, |
|
"rewards/rejected": 0.00033479099511168897, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 2.7945921421051025, |
|
"learning_rate": 4.829528694507624e-08, |
|
"logits/chosen": -1.5346529483795166, |
|
"logits/rejected": -1.5194079875946045, |
|
"logps/chosen": -56.83978271484375, |
|
"logps/rejected": -56.75861358642578, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0023796563036739826, |
|
"rewards/margins": 0.0018080968875437975, |
|
"rewards/rejected": 0.0005715594161301851, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 3.107956647872925, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -1.5742651224136353, |
|
"logits/rejected": -1.5632001161575317, |
|
"logps/chosen": -47.60409927368164, |
|
"logps/rejected": -50.62064743041992, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.001870837644673884, |
|
"rewards/margins": 0.0018480487633496523, |
|
"rewards/rejected": 2.2788974092691205e-05, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 3.052370309829712, |
|
"learning_rate": 4.813986822411833e-08, |
|
"logits/chosen": -1.5950231552124023, |
|
"logits/rejected": -1.5870921611785889, |
|
"logps/chosen": -46.46802520751953, |
|
"logps/rejected": -47.61067581176758, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0018212845316156745, |
|
"rewards/margins": 0.0015648396220058203, |
|
"rewards/rejected": 0.0002564448514021933, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 2.974740505218506, |
|
"learning_rate": 4.805971614785231e-08, |
|
"logits/chosen": -1.5934207439422607, |
|
"logits/rejected": -1.5831716060638428, |
|
"logps/chosen": -44.20450973510742, |
|
"logps/rejected": -45.84693145751953, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.002194278407841921, |
|
"rewards/margins": 0.0017778873443603516, |
|
"rewards/rejected": 0.000416390917962417, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 3.176281690597534, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -1.4426735639572144, |
|
"logits/rejected": -1.4458198547363281, |
|
"logps/chosen": -52.887428283691406, |
|
"logps/rejected": -56.02144241333008, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0018950074445456266, |
|
"rewards/margins": 0.0015232457080855966, |
|
"rewards/rejected": 0.0003717617364600301, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 3.130652904510498, |
|
"learning_rate": 4.7894554735150076e-08, |
|
"logits/chosen": -1.4940052032470703, |
|
"logits/rejected": -1.4864368438720703, |
|
"logps/chosen": -50.41922378540039, |
|
"logps/rejected": -51.92753982543945, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0017981244018301368, |
|
"rewards/margins": 0.0012389495968818665, |
|
"rewards/rejected": 0.0005591747467406094, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 2.4986073970794678, |
|
"learning_rate": 4.7809557008879185e-08, |
|
"logits/chosen": -1.5263350009918213, |
|
"logits/rejected": -1.5145517587661743, |
|
"logps/chosen": -42.057559967041016, |
|
"logps/rejected": -43.96914291381836, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0024824230931699276, |
|
"rewards/margins": 0.0021964963525533676, |
|
"rewards/rejected": 0.00028592668240889907, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 3.264617443084717, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -1.5924278497695923, |
|
"logits/rejected": -1.5789930820465088, |
|
"logps/chosen": -46.364784240722656, |
|
"logps/rejected": -48.81365203857422, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0025308418553322554, |
|
"rewards/margins": 0.001982019515708089, |
|
"rewards/rejected": 0.0005488225724548101, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 2.300140619277954, |
|
"learning_rate": 4.763475739102374e-08, |
|
"logits/chosen": -1.473141074180603, |
|
"logits/rejected": -1.468505620956421, |
|
"logps/chosen": -54.870941162109375, |
|
"logps/rejected": -55.6484489440918, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002576081547886133, |
|
"rewards/margins": 0.0023079104721546173, |
|
"rewards/rejected": 0.0002681712794583291, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 2.9154794216156006, |
|
"learning_rate": 4.754496778713054e-08, |
|
"logits/chosen": -1.4293240308761597, |
|
"logits/rejected": -1.4433194398880005, |
|
"logps/chosen": -46.24222183227539, |
|
"logps/rejected": -50.87912368774414, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0029074286576360464, |
|
"rewards/margins": 0.0015344502171501517, |
|
"rewards/rejected": 0.0013729783240705729, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 2.978912830352783, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -1.5649144649505615, |
|
"logits/rejected": -1.5566378831863403, |
|
"logps/chosen": -48.9169807434082, |
|
"logps/rejected": -51.32488250732422, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0020612571388483047, |
|
"rewards/margins": 0.0017103856662288308, |
|
"rewards/rejected": 0.00035087167634628713, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 3.66062068939209, |
|
"learning_rate": 4.736064054816145e-08, |
|
"logits/chosen": -1.579488754272461, |
|
"logits/rejected": -1.5711945295333862, |
|
"logps/chosen": -44.430999755859375, |
|
"logps/rejected": -47.700523376464844, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.0031583986710757017, |
|
"rewards/margins": 0.0025061373598873615, |
|
"rewards/rejected": 0.0006522616604343057, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 2.772946357727051, |
|
"learning_rate": 4.726611587052933e-08, |
|
"logits/chosen": -1.4304895401000977, |
|
"logits/rejected": -1.4331814050674438, |
|
"logps/chosen": -50.69477462768555, |
|
"logps/rejected": -55.800628662109375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0024115366395562887, |
|
"rewards/margins": 0.0015366144943982363, |
|
"rewards/rejected": 0.0008749221451580524, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 3.8805480003356934, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -1.5389564037322998, |
|
"logits/rejected": -1.5328103303909302, |
|
"logps/chosen": -50.487335205078125, |
|
"logps/rejected": -51.9775390625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.002420094795525074, |
|
"rewards/margins": 0.001630530459806323, |
|
"rewards/rejected": 0.0007895643939264119, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 2.787208080291748, |
|
"learning_rate": 4.707237762197549e-08, |
|
"logits/chosen": -1.5227080583572388, |
|
"logits/rejected": -1.511089563369751, |
|
"logps/chosen": -47.02164840698242, |
|
"logps/rejected": -49.01847839355469, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0033734552562236786, |
|
"rewards/margins": 0.0018900551367551088, |
|
"rewards/rejected": 0.0014834003522992134, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 3.9273159503936768, |
|
"learning_rate": 4.697317767005265e-08, |
|
"logits/chosen": -1.530651330947876, |
|
"logits/rejected": -1.519158959388733, |
|
"logps/chosen": -43.01877212524414, |
|
"logps/rejected": -44.823909759521484, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0028608546126633883, |
|
"rewards/margins": 0.002323360648006201, |
|
"rewards/rejected": 0.0005374938482418656, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 2.8428092002868652, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -1.5484195947647095, |
|
"logits/rejected": -1.5358097553253174, |
|
"logps/chosen": -46.577796936035156, |
|
"logps/rejected": -48.426883697509766, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0024873907677829266, |
|
"rewards/margins": 0.0014864472905173898, |
|
"rewards/rejected": 0.0010009428951889277, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 2.6796295642852783, |
|
"learning_rate": 4.677015097715994e-08, |
|
"logits/chosen": -1.4803837537765503, |
|
"logits/rejected": -1.4727258682250977, |
|
"logps/chosen": -43.49967575073242, |
|
"logps/rejected": -46.801334381103516, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.003408711403608322, |
|
"rewards/margins": 0.0028357082046568394, |
|
"rewards/rejected": 0.0005730032571591437, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 2.420775890350342, |
|
"learning_rate": 4.666633850812825e-08, |
|
"logits/chosen": -1.523738145828247, |
|
"logits/rejected": -1.507525086402893, |
|
"logps/chosen": -46.27619552612305, |
|
"logps/rejected": -48.205955505371094, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.002326422370970249, |
|
"rewards/margins": 0.0019570752047002316, |
|
"rewards/rejected": 0.00036934722447767854, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 2.361326217651367, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -1.4339568614959717, |
|
"logits/rejected": -1.4203391075134277, |
|
"logps/chosen": -41.24075698852539, |
|
"logps/rejected": -44.33374786376953, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0036104682367295027, |
|
"rewards/margins": 0.003143618581816554, |
|
"rewards/rejected": 0.0004668496549129486, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 3.083638906478882, |
|
"learning_rate": 4.6454151812320715e-08, |
|
"logits/chosen": -1.5105210542678833, |
|
"logits/rejected": -1.48483407497406, |
|
"logps/chosen": -47.20132064819336, |
|
"logps/rejected": -48.682518005371094, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0033471949864178896, |
|
"rewards/margins": 0.003038776572793722, |
|
"rewards/rejected": 0.0003084186464548111, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 3.846761465072632, |
|
"learning_rate": 4.6345792501393434e-08, |
|
"logits/chosen": -1.4992892742156982, |
|
"logits/rejected": -1.4937262535095215, |
|
"logps/chosen": -53.687469482421875, |
|
"logps/rejected": -57.72792434692383, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.003973551094532013, |
|
"rewards/margins": 0.003104776842519641, |
|
"rewards/rejected": 0.0008687739027664065, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 3.0516538619995117, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -1.5073257684707642, |
|
"logits/rejected": -1.4990612268447876, |
|
"logps/chosen": -48.099449157714844, |
|
"logps/rejected": -51.08936309814453, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004019264131784439, |
|
"rewards/margins": 0.003163648769259453, |
|
"rewards/rejected": 0.0008556157117709517, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 3.5501997470855713, |
|
"learning_rate": 4.612458003901698e-08, |
|
"logits/chosen": -1.5299046039581299, |
|
"logits/rejected": -1.5243635177612305, |
|
"logps/chosen": -52.48954391479492, |
|
"logps/rejected": -56.07282257080078, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0027964431792497635, |
|
"rewards/margins": 0.0034413684625178576, |
|
"rewards/rejected": -0.0006449251086451113, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 3.1133222579956055, |
|
"learning_rate": 4.6011742437890476e-08, |
|
"logits/chosen": -1.5386149883270264, |
|
"logits/rejected": -1.515916347503662, |
|
"logps/chosen": -47.325008392333984, |
|
"logps/rejected": -48.78601837158203, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.002987902145832777, |
|
"rewards/margins": 0.0024031363427639008, |
|
"rewards/rejected": 0.0005847656866535544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 2.2972168922424316, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -1.547487735748291, |
|
"logits/rejected": -1.5349434614181519, |
|
"logps/chosen": -46.49794387817383, |
|
"logps/rejected": -48.665802001953125, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0025873552076518536, |
|
"rewards/margins": 0.0025315138045698404, |
|
"rewards/rejected": 5.5841472203610465e-05, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 2.634446620941162, |
|
"learning_rate": 4.5781644155290486e-08, |
|
"logits/chosen": -1.4874818325042725, |
|
"logits/rejected": -1.4774929285049438, |
|
"logps/chosen": -45.514652252197266, |
|
"logps/rejected": -46.51914596557617, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.004184176214039326, |
|
"rewards/margins": 0.0034972827415913343, |
|
"rewards/rejected": 0.000686893705278635, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 2.841136932373047, |
|
"learning_rate": 4.566439964877613e-08, |
|
"logits/chosen": -1.522335410118103, |
|
"logits/rejected": -1.5161679983139038, |
|
"logps/chosen": -43.42735290527344, |
|
"logps/rejected": -45.222328186035156, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.002366938628256321, |
|
"rewards/margins": 0.0020665768533945084, |
|
"rewards/rejected": 0.00030036186217330396, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 2.926987409591675, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -1.5708011388778687, |
|
"logits/rejected": -1.5619392395019531, |
|
"logps/chosen": -46.80385971069336, |
|
"logps/rejected": -49.117591857910156, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.004434407688677311, |
|
"rewards/margins": 0.004192848689854145, |
|
"rewards/rejected": 0.00024155872233677655, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 2.6681439876556396, |
|
"learning_rate": 4.542556111376274e-08, |
|
"logits/chosen": -1.5655062198638916, |
|
"logits/rejected": -1.5541120767593384, |
|
"logps/chosen": -48.73239517211914, |
|
"logps/rejected": -50.90290069580078, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.003030436811968684, |
|
"rewards/margins": 0.0029130682814866304, |
|
"rewards/rejected": 0.00011736871965695173, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 3.0899651050567627, |
|
"learning_rate": 4.5303983874626506e-08, |
|
"logits/chosen": -1.5412328243255615, |
|
"logits/rejected": -1.5294244289398193, |
|
"logps/chosen": -50.64223098754883, |
|
"logps/rejected": -51.3464469909668, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002868660492822528, |
|
"rewards/margins": 0.002512868959456682, |
|
"rewards/rejected": 0.00035579182440415025, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 3.553450345993042, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.4599460363388062, |
|
"logits/rejected": -1.4345753192901611, |
|
"logps/chosen": -46.791717529296875, |
|
"logps/rejected": -46.883766174316406, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.003114109393209219, |
|
"rewards/margins": 0.0031202633399516344, |
|
"rewards/rejected": -6.154028142191237e-06, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 3.8637311458587646, |
|
"learning_rate": 4.505655618438363e-08, |
|
"logits/chosen": -1.4245116710662842, |
|
"logits/rejected": -1.4106541872024536, |
|
"logps/chosen": -48.68230438232422, |
|
"logps/rejected": -49.66895294189453, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.002959758508950472, |
|
"rewards/margins": 0.0026471882592886686, |
|
"rewards/rejected": 0.0003125699586234987, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 2.923635244369507, |
|
"learning_rate": 4.4930723126421945e-08, |
|
"logits/chosen": -1.5897529125213623, |
|
"logits/rejected": -1.5666835308074951, |
|
"logps/chosen": -49.047271728515625, |
|
"logps/rejected": -50.365779876708984, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.003261597827076912, |
|
"rewards/margins": 0.0032364025246351957, |
|
"rewards/rejected": 2.519541885703802e-05, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 3.259798049926758, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -1.4653351306915283, |
|
"logits/rejected": -1.4455980062484741, |
|
"logps/chosen": -51.658111572265625, |
|
"logps/rejected": -53.60918426513672, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002871064469218254, |
|
"rewards/margins": 0.0033232111018151045, |
|
"rewards/rejected": -0.0004521465743891895, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 2.646080493927002, |
|
"learning_rate": 4.4674862811918155e-08, |
|
"logits/chosen": -1.4465787410736084, |
|
"logits/rejected": -1.4438260793685913, |
|
"logps/chosen": -43.387939453125, |
|
"logps/rejected": -46.49829864501953, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0034131291322410107, |
|
"rewards/margins": 0.00300223333761096, |
|
"rewards/rejected": 0.0004108964931219816, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 3.445535659790039, |
|
"learning_rate": 4.454485354129966e-08, |
|
"logits/chosen": -1.4952054023742676, |
|
"logits/rejected": -1.489473819732666, |
|
"logps/chosen": -46.59187698364258, |
|
"logps/rejected": -50.117591857910156, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0038944887928664684, |
|
"rewards/margins": 0.0038705854676663876, |
|
"rewards/rejected": 2.390358167758677e-05, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 2.979490041732788, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -1.4463694095611572, |
|
"logits/rejected": -1.4229583740234375, |
|
"logps/chosen": -48.736202239990234, |
|
"logps/rejected": -50.797584533691406, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0038722821045666933, |
|
"rewards/margins": 0.0038511925376951694, |
|
"rewards/rejected": 2.1089776055305265e-05, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 3.6062870025634766, |
|
"learning_rate": 4.42807224682615e-08, |
|
"logits/chosen": -1.503015160560608, |
|
"logits/rejected": -1.490422248840332, |
|
"logps/chosen": -42.87591552734375, |
|
"logps/rejected": -45.96302032470703, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0038226053584367037, |
|
"rewards/margins": 0.004490352235734463, |
|
"rewards/rejected": -0.0006677471101284027, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 2.5306854248046875, |
|
"learning_rate": 4.4146619233165604e-08, |
|
"logits/chosen": -1.551155686378479, |
|
"logits/rejected": -1.5466840267181396, |
|
"logps/chosen": -50.65498733520508, |
|
"logps/rejected": -54.01371383666992, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.002975714858621359, |
|
"rewards/margins": 0.0028749865014106035, |
|
"rewards/rejected": 0.00010072816803585738, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 3.129319906234741, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -1.5055732727050781, |
|
"logits/rejected": -1.5217446088790894, |
|
"logps/chosen": -46.49773025512695, |
|
"logps/rejected": -53.42799758911133, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0027830186299979687, |
|
"rewards/margins": 0.00282860454171896, |
|
"rewards/rejected": -4.558537693810649e-05, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 3.1904845237731934, |
|
"learning_rate": 4.387438449967594e-08, |
|
"logits/chosen": -1.454516053199768, |
|
"logits/rejected": -1.4410918951034546, |
|
"logps/chosen": -45.38161087036133, |
|
"logps/rejected": -47.975074768066406, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004763299599289894, |
|
"rewards/margins": 0.00504193315282464, |
|
"rewards/rejected": -0.00027863297145813704, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 3.450941324234009, |
|
"learning_rate": 4.373627213825983e-08, |
|
"logits/chosen": -1.6091142892837524, |
|
"logits/rejected": -1.5993342399597168, |
|
"logps/chosen": -46.181339263916016, |
|
"logps/rejected": -49.70648193359375, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0049495515413582325, |
|
"rewards/margins": 0.005530401133000851, |
|
"rewards/rejected": -0.0005808495916426182, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 2.4806911945343018, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -1.5663807392120361, |
|
"logits/rejected": -1.563518762588501, |
|
"logps/chosen": -45.587608337402344, |
|
"logps/rejected": -49.11951446533203, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004525287542492151, |
|
"rewards/margins": 0.004190221894532442, |
|
"rewards/rejected": 0.0003350655024405569, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 3.0177292823791504, |
|
"learning_rate": 4.34561059690461e-08, |
|
"logits/chosen": -1.609378457069397, |
|
"logits/rejected": -1.6081126928329468, |
|
"logps/chosen": -47.325008392333984, |
|
"logps/rejected": -48.884395599365234, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0018209271365776658, |
|
"rewards/margins": 0.0013891037087887526, |
|
"rewards/rejected": 0.0004318233986850828, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 2.676215171813965, |
|
"learning_rate": 4.3314071855773314e-08, |
|
"logits/chosen": -1.5698845386505127, |
|
"logits/rejected": -1.5718752145767212, |
|
"logps/chosen": -41.882728576660156, |
|
"logps/rejected": -45.07906723022461, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0035904725082218647, |
|
"rewards/margins": 0.0036595568526536226, |
|
"rewards/rejected": -6.908467912580818e-05, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 3.0929179191589355, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -1.5062233209609985, |
|
"logits/rejected": -1.491188406944275, |
|
"logps/chosen": -46.517478942871094, |
|
"logps/rejected": -49.685264587402344, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004791845567524433, |
|
"rewards/margins": 0.004565626382827759, |
|
"rewards/rejected": 0.00022621969401370734, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 3.0005862712860107, |
|
"learning_rate": 4.3026151493252414e-08, |
|
"logits/chosen": -1.5496270656585693, |
|
"logits/rejected": -1.5283520221710205, |
|
"logps/chosen": -51.4772834777832, |
|
"logps/rejected": -52.89916229248047, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004191336687654257, |
|
"rewards/margins": 0.0047586942091584206, |
|
"rewards/rejected": -0.0005673574050888419, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 3.5196878910064697, |
|
"learning_rate": 4.2880285483616895e-08, |
|
"logits/chosen": -1.5336337089538574, |
|
"logits/rejected": -1.530709981918335, |
|
"logps/chosen": -45.714988708496094, |
|
"logps/rejected": -49.015140533447266, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.003844538237899542, |
|
"rewards/margins": 0.0039407783187925816, |
|
"rewards/rejected": -9.623957157600671e-05, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 2.7437596321105957, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.4035046100616455, |
|
"logits/rejected": -1.398699164390564, |
|
"logps/chosen": -52.25505447387695, |
|
"logps/rejected": -53.14892578125, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004184540826827288, |
|
"rewards/margins": 0.0038869075942784548, |
|
"rewards/rejected": 0.0002976330870296806, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 3.3687148094177246, |
|
"learning_rate": 4.258479307576576e-08, |
|
"logits/chosen": -1.5007444620132446, |
|
"logits/rejected": -1.4955756664276123, |
|
"logps/chosen": -43.784767150878906, |
|
"logps/rejected": -45.702003479003906, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.005375358276069164, |
|
"rewards/margins": 0.005684197880327702, |
|
"rewards/rejected": -0.00030883969157002866, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 2.7493855953216553, |
|
"learning_rate": 4.243518744944626e-08, |
|
"logits/chosen": -1.5043120384216309, |
|
"logits/rejected": -1.5005708932876587, |
|
"logps/chosen": -43.27696228027344, |
|
"logps/rejected": -47.130455017089844, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003993698861449957, |
|
"rewards/margins": 0.004580583423376083, |
|
"rewards/rejected": -0.0005868846783414483, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 3.398181915283203, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -1.449882984161377, |
|
"logits/rejected": -1.4400885105133057, |
|
"logps/chosen": -46.8693962097168, |
|
"logps/rejected": -49.02799987792969, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004130179528146982, |
|
"rewards/margins": 0.0028191660530865192, |
|
"rewards/rejected": 0.001311013475060463, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 3.157179832458496, |
|
"learning_rate": 4.2132309934569e-08, |
|
"logits/chosen": -1.5672855377197266, |
|
"logits/rejected": -1.562207579612732, |
|
"logps/chosen": -43.753822326660156, |
|
"logps/rejected": -46.12738800048828, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0052392082288861275, |
|
"rewards/margins": 0.0038300647865980864, |
|
"rewards/rejected": 0.0014091433258727193, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 2.479654312133789, |
|
"learning_rate": 4.197905933704989e-08, |
|
"logits/chosen": -1.4313812255859375, |
|
"logits/rejected": -1.4214115142822266, |
|
"logps/chosen": -47.267311096191406, |
|
"logps/rejected": -49.97428512573242, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.005089783109724522, |
|
"rewards/margins": 0.007234425283968449, |
|
"rewards/rejected": -0.002144642174243927, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 2.7182703018188477, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -1.486061453819275, |
|
"logits/rejected": -1.492903232574463, |
|
"logps/chosen": -43.82571792602539, |
|
"logps/rejected": -47.75173568725586, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004842250142246485, |
|
"rewards/margins": 0.005944275297224522, |
|
"rewards/rejected": -0.0011020256206393242, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 3.1151888370513916, |
|
"learning_rate": 4.1668988325514434e-08, |
|
"logits/chosen": -1.5238953828811646, |
|
"logits/rejected": -1.5135290622711182, |
|
"logps/chosen": -49.40985107421875, |
|
"logps/rejected": -52.1383171081543, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005019115749746561, |
|
"rewards/margins": 0.006819140166044235, |
|
"rewards/rejected": -0.0018000241834670305, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 3.011781930923462, |
|
"learning_rate": 4.1512189708209844e-08, |
|
"logits/chosen": -1.573925256729126, |
|
"logits/rejected": -1.5644539594650269, |
|
"logps/chosen": -38.25946807861328, |
|
"logps/rejected": -39.431949615478516, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004985497798770666, |
|
"rewards/margins": 0.0047707511112093925, |
|
"rewards/rejected": 0.0002147464983863756, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 3.615877866744995, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -1.504152536392212, |
|
"logits/rejected": -1.4912039041519165, |
|
"logps/chosen": -56.4444694519043, |
|
"logps/rejected": -56.67643356323242, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004153282381594181, |
|
"rewards/margins": 0.005227471701800823, |
|
"rewards/rejected": -0.0010741890873759985, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 2.5767173767089844, |
|
"learning_rate": 4.119512136122882e-08, |
|
"logits/chosen": -1.6086022853851318, |
|
"logits/rejected": -1.6209228038787842, |
|
"logps/chosen": -42.2785530090332, |
|
"logps/rejected": -48.44477081298828, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.004324558191001415, |
|
"rewards/margins": 0.006945625878870487, |
|
"rewards/rejected": -0.002621066989377141, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 3.475733757019043, |
|
"learning_rate": 4.103487392014795e-08, |
|
"logits/chosen": -1.475531816482544, |
|
"logits/rejected": -1.4568547010421753, |
|
"logps/chosen": -46.373329162597656, |
|
"logps/rejected": -51.0313835144043, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005704123992472887, |
|
"rewards/margins": 0.008804896846413612, |
|
"rewards/rejected": -0.0031007737852633, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 2.9147305488586426, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -1.5652912855148315, |
|
"logits/rejected": -1.544355869293213, |
|
"logps/chosen": -42.559722900390625, |
|
"logps/rejected": -45.870784759521484, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004617183469235897, |
|
"rewards/margins": 0.0069281430914998055, |
|
"rewards/rejected": -0.002310959156602621, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 2.4540820121765137, |
|
"learning_rate": 4.0711008825680645e-08, |
|
"logits/chosen": -1.5043457746505737, |
|
"logits/rejected": -1.4851245880126953, |
|
"logps/chosen": -47.31243896484375, |
|
"logps/rejected": -50.1568489074707, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004572049714624882, |
|
"rewards/margins": 0.005410096608102322, |
|
"rewards/rejected": -0.0008380465442314744, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 3.5471794605255127, |
|
"learning_rate": 4.054741393867306e-08, |
|
"logits/chosen": -1.4756460189819336, |
|
"logits/rejected": -1.4643748998641968, |
|
"logps/chosen": -54.061676025390625, |
|
"logps/rejected": -55.685035705566406, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004487854428589344, |
|
"rewards/margins": 0.005812914576381445, |
|
"rewards/rejected": -0.0013250606134533882, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 2.9531631469726562, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -1.5454641580581665, |
|
"logits/rejected": -1.5215752124786377, |
|
"logps/chosen": -44.894840240478516, |
|
"logps/rejected": -47.37709045410156, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004894919227808714, |
|
"rewards/margins": 0.005796266719698906, |
|
"rewards/rejected": -0.000901346851605922, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 3.38706111907959, |
|
"learning_rate": 4.0216956984526784e-08, |
|
"logits/chosen": -1.5542356967926025, |
|
"logits/rejected": -1.550040364265442, |
|
"logps/chosen": -42.899436950683594, |
|
"logps/rejected": -45.57080078125, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.005091445986181498, |
|
"rewards/margins": 0.0070404186844825745, |
|
"rewards/rejected": -0.0019489733967930079, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 3.101982831954956, |
|
"learning_rate": 4.0050118147147446e-08, |
|
"logits/chosen": -1.5179659128189087, |
|
"logits/rejected": -1.509966254234314, |
|
"logps/chosen": -53.40665817260742, |
|
"logps/rejected": -52.11634063720703, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0031676869839429855, |
|
"rewards/margins": 0.0020833946764469147, |
|
"rewards/rejected": 0.0010842925403267145, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 3.091132640838623, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -1.563659906387329, |
|
"logits/rejected": -1.5506489276885986, |
|
"logps/chosen": -47.28032684326172, |
|
"logps/rejected": -51.66279983520508, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005120830610394478, |
|
"rewards/margins": 0.0066344826482236385, |
|
"rewards/rejected": -0.0015136522706598043, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 3.1105594635009766, |
|
"learning_rate": 3.9713278391358724e-08, |
|
"logits/chosen": -1.5746492147445679, |
|
"logits/rejected": -1.5628042221069336, |
|
"logps/chosen": -45.93760681152344, |
|
"logps/rejected": -49.18937301635742, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005176537670195103, |
|
"rewards/margins": 0.005757451057434082, |
|
"rewards/rejected": -0.0005809130962006748, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 2.4670963287353516, |
|
"learning_rate": 3.954330115139328e-08, |
|
"logits/chosen": -1.5431309938430786, |
|
"logits/rejected": -1.5327889919281006, |
|
"logps/chosen": -46.76984405517578, |
|
"logps/rejected": -48.848941802978516, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.004580738488584757, |
|
"rewards/margins": 0.006636142730712891, |
|
"rewards/rejected": -0.0020554042421281338, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 4.051297187805176, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -1.5918155908584595, |
|
"logits/rejected": -1.5730479955673218, |
|
"logps/chosen": -47.90183639526367, |
|
"logps/rejected": -51.474891662597656, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006330148316919804, |
|
"rewards/margins": 0.007905492559075356, |
|
"rewards/rejected": -0.0015753433108329773, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 2.364239454269409, |
|
"learning_rate": 3.920029168997077e-08, |
|
"logits/chosen": -1.5559653043746948, |
|
"logits/rejected": -1.5427926778793335, |
|
"logps/chosen": -48.723899841308594, |
|
"logps/rejected": -51.545448303222656, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004865005612373352, |
|
"rewards/margins": 0.005895170383155346, |
|
"rewards/rejected": -0.001030164072290063, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 3.9678614139556885, |
|
"learning_rate": 3.9027283580662476e-08, |
|
"logits/chosen": -1.5198521614074707, |
|
"logits/rejected": -1.5075480937957764, |
|
"logps/chosen": -49.6241340637207, |
|
"logps/rejected": -52.78132247924805, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005017532967031002, |
|
"rewards/margins": 0.008875850588083267, |
|
"rewards/rejected": -0.003858317853882909, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 3.9845945835113525, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.5015017986297607, |
|
"logits/rejected": -1.488577127456665, |
|
"logps/chosen": -45.922607421875, |
|
"logps/rejected": -50.63169860839844, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.005352140404284, |
|
"rewards/margins": 0.007262455765157938, |
|
"rewards/rejected": -0.0019103146623820066, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 3.0348169803619385, |
|
"learning_rate": 3.867832141277539e-08, |
|
"logits/chosen": -1.5485866069793701, |
|
"logits/rejected": -1.5293524265289307, |
|
"logps/chosen": -49.10395812988281, |
|
"logps/rejected": -51.272361755371094, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004336564801633358, |
|
"rewards/margins": 0.006302339024841785, |
|
"rewards/rejected": -0.0019657742232084274, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 3.4307823181152344, |
|
"learning_rate": 3.850239188479606e-08, |
|
"logits/chosen": -1.4621508121490479, |
|
"logits/rejected": -1.4584615230560303, |
|
"logps/chosen": -46.74929428100586, |
|
"logps/rejected": -49.1221809387207, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004755138885229826, |
|
"rewards/margins": 0.006490757223218679, |
|
"rewards/rejected": -0.0017356185708194971, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 3.5623254776000977, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -1.5857195854187012, |
|
"logits/rejected": -1.5824106931686401, |
|
"logps/chosen": -49.597537994384766, |
|
"logps/rejected": -53.8158073425293, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.00521459337323904, |
|
"rewards/margins": 0.006661839783191681, |
|
"rewards/rejected": -0.0014472461771219969, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 4.247158527374268, |
|
"learning_rate": 3.81476977754933e-08, |
|
"logits/chosen": -1.3996719121932983, |
|
"logits/rejected": -1.3850539922714233, |
|
"logps/chosen": -51.30238723754883, |
|
"logps/rejected": -50.659263610839844, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004295485559850931, |
|
"rewards/margins": 0.006129544228315353, |
|
"rewards/rejected": -0.0018340591341257095, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 2.404221773147583, |
|
"learning_rate": 3.796895812770114e-08, |
|
"logits/chosen": -1.5022691488265991, |
|
"logits/rejected": -1.4923193454742432, |
|
"logps/chosen": -45.77785110473633, |
|
"logps/rejected": -47.2498664855957, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005898059345781803, |
|
"rewards/margins": 0.007563650608062744, |
|
"rewards/rejected": -0.00166559056378901, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 3.0766351222991943, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -1.5271815061569214, |
|
"logits/rejected": -1.5167516469955444, |
|
"logps/chosen": -40.93082046508789, |
|
"logps/rejected": -42.0340461730957, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0035404022783041, |
|
"rewards/margins": 0.004842911381274462, |
|
"rewards/rejected": -0.0013025096850469708, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 2.643650531768799, |
|
"learning_rate": 3.760875646824795e-08, |
|
"logits/chosen": -1.395875334739685, |
|
"logits/rejected": -1.3972784280776978, |
|
"logps/chosen": -46.130271911621094, |
|
"logps/rejected": -48.43110656738281, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.003639611881226301, |
|
"rewards/margins": 0.007139836438000202, |
|
"rewards/rejected": -0.00350022385828197, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 3.6421399116516113, |
|
"learning_rate": 3.742731977727623e-08, |
|
"logits/chosen": -1.5392658710479736, |
|
"logits/rejected": -1.5320463180541992, |
|
"logps/chosen": -45.195159912109375, |
|
"logps/rejected": -49.122718811035156, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.006233454681932926, |
|
"rewards/margins": 0.007511253468692303, |
|
"rewards/rejected": -0.0012777980882674456, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 3.6778624057769775, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.4356926679611206, |
|
"logits/rejected": -1.4204113483428955, |
|
"logps/chosen": -45.431312561035156, |
|
"logps/rejected": -49.4974479675293, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0038647849578410387, |
|
"rewards/margins": 0.007974617183208466, |
|
"rewards/rejected": -0.0041098324581980705, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 2.6512179374694824, |
|
"learning_rate": 3.7061838443196886e-08, |
|
"logits/chosen": -1.5117601156234741, |
|
"logits/rejected": -1.5023605823516846, |
|
"logps/chosen": -50.03330612182617, |
|
"logps/rejected": -52.160560607910156, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006138836033642292, |
|
"rewards/margins": 0.009467920288443565, |
|
"rewards/rejected": -0.003329083789139986, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 3.0043179988861084, |
|
"learning_rate": 3.68778194919179e-08, |
|
"logits/chosen": -1.4723100662231445, |
|
"logits/rejected": -1.4666262865066528, |
|
"logps/chosen": -50.07857894897461, |
|
"logps/rejected": -53.3220100402832, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.008099446073174477, |
|
"rewards/margins": 0.010907831601798534, |
|
"rewards/rejected": -0.0028083862271159887, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 3.6292684078216553, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -1.571274995803833, |
|
"logits/rejected": -1.5532617568969727, |
|
"logps/chosen": -41.95909118652344, |
|
"logps/rejected": -46.51947784423828, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.005592281464487314, |
|
"rewards/margins": 0.008521712385118008, |
|
"rewards/rejected": -0.002929429989308119, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 2.337404727935791, |
|
"learning_rate": 3.6507289698834064e-08, |
|
"logits/chosen": -1.471616506576538, |
|
"logits/rejected": -1.4550542831420898, |
|
"logps/chosen": -43.53514862060547, |
|
"logps/rejected": -46.06405258178711, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.005335145629942417, |
|
"rewards/margins": 0.008476309478282928, |
|
"rewards/rejected": -0.0031411624513566494, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 4.029110431671143, |
|
"learning_rate": 3.6320804903743684e-08, |
|
"logits/chosen": -1.5162017345428467, |
|
"logits/rejected": -1.5111761093139648, |
|
"logps/chosen": -45.39423370361328, |
|
"logps/rejected": -49.17229080200195, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0036242641508579254, |
|
"rewards/margins": 0.009098870679736137, |
|
"rewards/rejected": -0.005474607460200787, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 2.6486778259277344, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -1.495854377746582, |
|
"logits/rejected": -1.4898358583450317, |
|
"logps/chosen": -48.8822135925293, |
|
"logps/rejected": -51.64506912231445, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.004486840683966875, |
|
"rewards/margins": 0.00841403380036354, |
|
"rewards/rejected": -0.003927193582057953, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 3.2701268196105957, |
|
"learning_rate": 3.5945461061099736e-08, |
|
"logits/chosen": -1.4389398097991943, |
|
"logits/rejected": -1.4077644348144531, |
|
"logps/chosen": -50.76543426513672, |
|
"logps/rejected": -49.68498992919922, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006598909851163626, |
|
"rewards/margins": 0.012235610745847225, |
|
"rewards/rejected": -0.0056367008946835995, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 2.968621253967285, |
|
"learning_rate": 3.5756628398668446e-08, |
|
"logits/chosen": -1.5589392185211182, |
|
"logits/rejected": -1.5580534934997559, |
|
"logps/chosen": -51.238853454589844, |
|
"logps/rejected": -53.6939697265625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.003833180759102106, |
|
"rewards/margins": 0.009422466158866882, |
|
"rewards/rejected": -0.005589285399764776, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 2.652916193008423, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -1.557422399520874, |
|
"logits/rejected": -1.543720006942749, |
|
"logps/chosen": -44.34943771362305, |
|
"logps/rejected": -47.97236251831055, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0036465474404394627, |
|
"rewards/margins": 0.007039936725050211, |
|
"rewards/rejected": -0.003393388818949461, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 4.006654739379883, |
|
"learning_rate": 3.5376707961436297e-08, |
|
"logits/chosen": -1.5315957069396973, |
|
"logits/rejected": -1.5152517557144165, |
|
"logps/chosen": -53.33189010620117, |
|
"logps/rejected": -53.46404266357422, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.004476548638194799, |
|
"rewards/margins": 0.005307585000991821, |
|
"rewards/rejected": -0.0008310364792123437, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 2.423352003097534, |
|
"learning_rate": 3.51856468934734e-08, |
|
"logits/chosen": -1.4921244382858276, |
|
"logits/rejected": -1.4953742027282715, |
|
"logps/chosen": -46.34716033935547, |
|
"logps/rejected": -48.64600372314453, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004633346572518349, |
|
"rewards/margins": 0.00402105925604701, |
|
"rewards/rejected": 0.0006122869672253728, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 3.358710289001465, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -1.5787866115570068, |
|
"logits/rejected": -1.570294737815857, |
|
"logps/chosen": -45.49131774902344, |
|
"logps/rejected": -51.6963996887207, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.006135477218776941, |
|
"rewards/margins": 0.008345494046807289, |
|
"rewards/rejected": -0.002210016595199704, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 2.5438549518585205, |
|
"learning_rate": 3.480139021193057e-08, |
|
"logits/chosen": -1.4628260135650635, |
|
"logits/rejected": -1.4626216888427734, |
|
"logps/chosen": -46.477264404296875, |
|
"logps/rejected": -49.938323974609375, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004061608109623194, |
|
"rewards/margins": 0.007341681979596615, |
|
"rewards/rejected": -0.003280073869973421, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 4.097665786743164, |
|
"learning_rate": 3.4608221610008666e-08, |
|
"logits/chosen": -1.554359793663025, |
|
"logits/rejected": -1.5441957712173462, |
|
"logps/chosen": -40.67582321166992, |
|
"logps/rejected": -45.34405517578125, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.005710783414542675, |
|
"rewards/margins": 0.011185348965227604, |
|
"rewards/rejected": -0.005474564619362354, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 2.3061835765838623, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.489579439163208, |
|
"logits/rejected": -1.4888752698898315, |
|
"logps/chosen": -44.27809143066406, |
|
"logps/rejected": -46.678611755371094, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.003133392659947276, |
|
"rewards/margins": 0.008380794897675514, |
|
"rewards/rejected": -0.0052474020048975945, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 2.2423462867736816, |
|
"learning_rate": 3.4219871777684745e-08, |
|
"logits/chosen": -1.5047807693481445, |
|
"logits/rejected": -1.4803636074066162, |
|
"logps/chosen": -48.244483947753906, |
|
"logps/rejected": -49.67208480834961, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004430696833878756, |
|
"rewards/margins": 0.008876333944499493, |
|
"rewards/rejected": -0.004445637576282024, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 3.133010149002075, |
|
"learning_rate": 3.4024717846672364e-08, |
|
"logits/chosen": -1.55430006980896, |
|
"logits/rejected": -1.5407884120941162, |
|
"logps/chosen": -43.84967803955078, |
|
"logps/rejected": -47.088844299316406, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003224353538826108, |
|
"rewards/margins": 0.009443853981792927, |
|
"rewards/rejected": -0.0062195006757974625, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 3.2332677841186523, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -1.5088350772857666, |
|
"logits/rejected": -1.4985536336898804, |
|
"logps/chosen": -48.5511589050293, |
|
"logps/rejected": -53.52519607543945, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004791603423655033, |
|
"rewards/margins": 0.01051848940551281, |
|
"rewards/rejected": -0.005726885981857777, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 3.0702571868896484, |
|
"learning_rate": 3.3632520546559974e-08, |
|
"logits/chosen": -1.477571964263916, |
|
"logits/rejected": -1.4503300189971924, |
|
"logps/chosen": -42.1833610534668, |
|
"logps/rejected": -46.2810173034668, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.005192961078137159, |
|
"rewards/margins": 0.0098468828946352, |
|
"rewards/rejected": -0.004653920419514179, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 3.413985013961792, |
|
"learning_rate": 3.34355047473107e-08, |
|
"logits/chosen": -1.509441614151001, |
|
"logits/rejected": -1.4929606914520264, |
|
"logps/chosen": -49.14087677001953, |
|
"logps/rejected": -50.32634353637695, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.003523029386997223, |
|
"rewards/margins": 0.00808628834784031, |
|
"rewards/rejected": -0.004563258029520512, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 3.261613607406616, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -1.4438692331314087, |
|
"logits/rejected": -1.4406040906906128, |
|
"logps/chosen": -46.01984405517578, |
|
"logps/rejected": -50.35694122314453, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.004411002155393362, |
|
"rewards/margins": 0.009367231279611588, |
|
"rewards/rejected": -0.0049562300555408, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 2.0379562377929688, |
|
"learning_rate": 3.303970809643828e-08, |
|
"logits/chosen": -1.5255157947540283, |
|
"logits/rejected": -1.5278053283691406, |
|
"logps/chosen": -45.327415466308594, |
|
"logps/rejected": -49.04973220825195, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.005483666434884071, |
|
"rewards/margins": 0.008489950560033321, |
|
"rewards/rejected": -0.0030062845908105373, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 2.9973742961883545, |
|
"learning_rate": 3.2840955067685356e-08, |
|
"logits/chosen": -1.563256859779358, |
|
"logits/rejected": -1.5630435943603516, |
|
"logps/chosen": -46.008609771728516, |
|
"logps/rejected": -50.46318054199219, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0052226148545742035, |
|
"rewards/margins": 0.011490847915410995, |
|
"rewards/rejected": -0.006268233992159367, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 2.7613437175750732, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -1.580041766166687, |
|
"logits/rejected": -1.5727280378341675, |
|
"logps/chosen": -38.53578186035156, |
|
"logps/rejected": -43.83596420288086, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0044464850798249245, |
|
"rewards/margins": 0.009077567607164383, |
|
"rewards/rejected": -0.004631082061678171, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 4.180810451507568, |
|
"learning_rate": 3.244180946015008e-08, |
|
"logits/chosen": -1.4442589282989502, |
|
"logits/rejected": -1.435718297958374, |
|
"logps/chosen": -52.15314483642578, |
|
"logps/rejected": -53.91064453125, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.004610007628798485, |
|
"rewards/margins": 0.007277486380189657, |
|
"rewards/rejected": -0.0026674787513911724, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 2.520510196685791, |
|
"learning_rate": 3.224144493965578e-08, |
|
"logits/chosen": -1.5796912908554077, |
|
"logits/rejected": -1.5778270959854126, |
|
"logps/chosen": -43.635459899902344, |
|
"logps/rejected": -45.74983215332031, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.003754237201064825, |
|
"rewards/margins": 0.008241266012191772, |
|
"rewards/rejected": -0.00448702834546566, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 2.8245351314544678, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -1.5312103033065796, |
|
"logits/rejected": -1.5232504606246948, |
|
"logps/chosen": -43.69184112548828, |
|
"logps/rejected": -47.32173156738281, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004152680281549692, |
|
"rewards/margins": 0.011306528933346272, |
|
"rewards/rejected": -0.007153847720474005, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 3.71762752532959, |
|
"learning_rate": 3.183920288821597e-08, |
|
"logits/chosen": -1.4898974895477295, |
|
"logits/rejected": -1.4814989566802979, |
|
"logps/chosen": -45.30360412597656, |
|
"logps/rejected": -49.977867126464844, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004725744016468525, |
|
"rewards/margins": 0.012062130495905876, |
|
"rewards/rejected": -0.0073363869450986385, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 3.813885450363159, |
|
"learning_rate": 3.1637353633225735e-08, |
|
"logits/chosen": -1.5400512218475342, |
|
"logits/rejected": -1.5293104648590088, |
|
"logps/chosen": -41.25631332397461, |
|
"logps/rejected": -45.72794723510742, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.003288590582087636, |
|
"rewards/margins": 0.012727511115372181, |
|
"rewards/rejected": -0.009438920766115189, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 3.278331995010376, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -1.50669264793396, |
|
"logits/rejected": -1.506869912147522, |
|
"logps/chosen": -45.4344367980957, |
|
"logps/rejected": -49.771766662597656, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001246375497430563, |
|
"rewards/margins": 0.00855537410825491, |
|
"rewards/rejected": -0.0073089986108243465, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 3.326122760772705, |
|
"learning_rate": 3.1232269609552875e-08, |
|
"logits/chosen": -1.5182191133499146, |
|
"logits/rejected": -1.5079318284988403, |
|
"logps/chosen": -43.68280792236328, |
|
"logps/rejected": -46.17193603515625, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004092085175216198, |
|
"rewards/margins": 0.009019319899380207, |
|
"rewards/rejected": -0.004927235189825296, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 2.173727035522461, |
|
"learning_rate": 3.102906331660444e-08, |
|
"logits/chosen": -1.5566580295562744, |
|
"logits/rejected": -1.542860507965088, |
|
"logps/chosen": -41.94898223876953, |
|
"logps/rejected": -48.24005126953125, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.005175874102860689, |
|
"rewards/margins": 0.012963724322617054, |
|
"rewards/rejected": -0.007787850685417652, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 3.172349214553833, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -1.4700515270233154, |
|
"logits/rejected": -1.4550468921661377, |
|
"logps/chosen": -43.87895965576172, |
|
"logps/rejected": -47.5235710144043, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0023733568377792835, |
|
"rewards/margins": 0.009878008626401424, |
|
"rewards/rejected": -0.007504651788622141, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 4.150721549987793, |
|
"learning_rate": 3.062139359029599e-08, |
|
"logits/chosen": -1.557370901107788, |
|
"logits/rejected": -1.553252100944519, |
|
"logps/chosen": -46.466400146484375, |
|
"logps/rejected": -48.92326736450195, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.003435027552768588, |
|
"rewards/margins": 0.010258909314870834, |
|
"rewards/rejected": -0.006823881529271603, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 3.4097900390625, |
|
"learning_rate": 3.041695881443437e-08, |
|
"logits/chosen": -1.5758157968521118, |
|
"logits/rejected": -1.5673719644546509, |
|
"logps/chosen": -46.34037399291992, |
|
"logps/rejected": -50.32112121582031, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0035826407838612795, |
|
"rewards/margins": 0.0064225392416119576, |
|
"rewards/rejected": -0.0028398979920893908, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 4.047444820404053, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -1.531328797340393, |
|
"logits/rejected": -1.5289762020111084, |
|
"logps/chosen": -49.777374267578125, |
|
"logps/rejected": -54.45976638793945, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0041795228607952595, |
|
"rewards/margins": 0.01155170239508152, |
|
"rewards/rejected": -0.007372179068624973, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 3.1559255123138428, |
|
"learning_rate": 3.0006961290889077e-08, |
|
"logits/chosen": -1.5212230682373047, |
|
"logits/rejected": -1.4961992502212524, |
|
"logps/chosen": -50.666038513183594, |
|
"logps/rejected": -53.179840087890625, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.005002585239708424, |
|
"rewards/margins": 0.011495384387671947, |
|
"rewards/rejected": -0.006492799613624811, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 2.672394037246704, |
|
"learning_rate": 2.980142736433833e-08, |
|
"logits/chosen": -1.5468004941940308, |
|
"logits/rejected": -1.522716760635376, |
|
"logps/chosen": -44.2849235534668, |
|
"logps/rejected": -44.554542541503906, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0016432057600468397, |
|
"rewards/margins": 0.009843757376074791, |
|
"rewards/rejected": -0.008200552314519882, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 4.110817909240723, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -1.556030035018921, |
|
"logits/rejected": -1.5290006399154663, |
|
"logps/chosen": -51.58045196533203, |
|
"logps/rejected": -53.32768630981445, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0018507679924368858, |
|
"rewards/margins": 0.00991340633481741, |
|
"rewards/rejected": -0.008062639273703098, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 3.1407599449157715, |
|
"learning_rate": 2.9389361421596725e-08, |
|
"logits/chosen": -1.4347405433654785, |
|
"logits/rejected": -1.4316017627716064, |
|
"logps/chosen": -49.19403839111328, |
|
"logps/rejected": -53.545204162597656, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0042905667796730995, |
|
"rewards/margins": 0.012316063046455383, |
|
"rewards/rejected": -0.00802549533545971, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 2.570767879486084, |
|
"learning_rate": 2.9182858371940126e-08, |
|
"logits/chosen": -1.5318742990493774, |
|
"logits/rejected": -1.5175909996032715, |
|
"logps/chosen": -42.606483459472656, |
|
"logps/rejected": -46.17628860473633, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0029234064277261496, |
|
"rewards/margins": 0.013527562841773033, |
|
"rewards/rejected": -0.01060415618121624, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 3.642561674118042, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.4698221683502197, |
|
"logits/rejected": -1.4809458255767822, |
|
"logps/chosen": -41.60303497314453, |
|
"logps/rejected": -45.31582260131836, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0049819787964224815, |
|
"rewards/margins": 0.011425621807575226, |
|
"rewards/rejected": -0.006443643011152744, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 3.2771670818328857, |
|
"learning_rate": 2.8768984696593384e-08, |
|
"logits/chosen": -1.4798085689544678, |
|
"logits/rejected": -1.4632951021194458, |
|
"logps/chosen": -44.77037811279297, |
|
"logps/rejected": -47.931278228759766, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.005007242318242788, |
|
"rewards/margins": 0.013035112991929054, |
|
"rewards/rejected": -0.008027870208024979, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 3.2423009872436523, |
|
"learning_rate": 2.8561643164513637e-08, |
|
"logits/chosen": -1.334378957748413, |
|
"logits/rejected": -1.318719744682312, |
|
"logps/chosen": -51.95469284057617, |
|
"logps/rejected": -54.27800369262695, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.003995453007519245, |
|
"rewards/margins": 0.008951379917562008, |
|
"rewards/rejected": -0.00495592737570405, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 3.3206255435943604, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -1.46604323387146, |
|
"logits/rejected": -1.466449499130249, |
|
"logps/chosen": -50.30503463745117, |
|
"logps/rejected": -52.849761962890625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0029683273751288652, |
|
"rewards/margins": 0.00942517165094614, |
|
"rewards/rejected": -0.006456844508647919, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 4.676600456237793, |
|
"learning_rate": 2.8146223586784573e-08, |
|
"logits/chosen": -1.4574966430664062, |
|
"logits/rejected": -1.4443973302841187, |
|
"logps/chosen": -52.07725143432617, |
|
"logps/rejected": -55.014495849609375, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.003784618806093931, |
|
"rewards/margins": 0.012685844674706459, |
|
"rewards/rejected": -0.00890122540295124, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 3.3971612453460693, |
|
"learning_rate": 2.7938174743416205e-08, |
|
"logits/chosen": -1.3624401092529297, |
|
"logits/rejected": -1.3555347919464111, |
|
"logps/chosen": -51.51061248779297, |
|
"logps/rejected": -55.449851989746094, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0027002261485904455, |
|
"rewards/margins": 0.0111739132553339, |
|
"rewards/rejected": -0.008473685942590237, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 3.0555315017700195, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -1.5037503242492676, |
|
"logits/rejected": -1.4941872358322144, |
|
"logps/chosen": -52.352867126464844, |
|
"logps/rejected": -53.544029235839844, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0034458369482308626, |
|
"rewards/margins": 0.012528976425528526, |
|
"rewards/rejected": -0.009083138778805733, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 3.8352720737457275, |
|
"learning_rate": 2.7521472071516772e-08, |
|
"logits/chosen": -1.4726879596710205, |
|
"logits/rejected": -1.4662566184997559, |
|
"logps/chosen": -43.68360900878906, |
|
"logps/rejected": -47.45799255371094, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.005658530630171299, |
|
"rewards/margins": 0.0100140031427145, |
|
"rewards/rejected": -0.004355472978204489, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 3.8918683528900146, |
|
"learning_rate": 2.731284753546289e-08, |
|
"logits/chosen": -1.480924367904663, |
|
"logits/rejected": -1.4743003845214844, |
|
"logps/chosen": -53.0228157043457, |
|
"logps/rejected": -56.795387268066406, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0002322282234672457, |
|
"rewards/margins": 0.007790303323417902, |
|
"rewards/rejected": -0.008022531867027283, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 4.045823097229004, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -1.5512325763702393, |
|
"logits/rejected": -1.5479477643966675, |
|
"logps/chosen": -47.99006271362305, |
|
"logps/rejected": -53.79365921020508, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.00434616394340992, |
|
"rewards/margins": 0.010301290079951286, |
|
"rewards/rejected": -0.005955126136541367, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 3.306387424468994, |
|
"learning_rate": 2.6895125389333017e-08, |
|
"logits/chosen": -1.5372812747955322, |
|
"logits/rejected": -1.5224703550338745, |
|
"logps/chosen": -48.444435119628906, |
|
"logps/rejected": -52.62398147583008, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.007179601583629847, |
|
"rewards/margins": 0.016431204974651337, |
|
"rewards/rejected": -0.009251603856682777, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 3.1701338291168213, |
|
"learning_rate": 2.6686057143399028e-08, |
|
"logits/chosen": -1.5057289600372314, |
|
"logits/rejected": -1.4977985620498657, |
|
"logps/chosen": -48.5191764831543, |
|
"logps/rejected": -50.016353607177734, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.004845013376325369, |
|
"rewards/margins": 0.010277556255459785, |
|
"rewards/rejected": -0.005432543810456991, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 3.6945786476135254, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -1.4846514463424683, |
|
"logits/rejected": -1.4771755933761597, |
|
"logps/chosen": -52.859474182128906, |
|
"logps/rejected": -58.45153045654297, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.006427395157516003, |
|
"rewards/margins": 0.01427508145570755, |
|
"rewards/rejected": -0.007847686298191547, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 3.2228472232818604, |
|
"learning_rate": 2.626757978793187e-08, |
|
"logits/chosen": -1.50605046749115, |
|
"logits/rejected": -1.4987943172454834, |
|
"logps/chosen": -48.89452362060547, |
|
"logps/rejected": -52.482139587402344, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0004428673128131777, |
|
"rewards/margins": 0.008724686689674854, |
|
"rewards/rejected": -0.009167554788291454, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 2.9732460975646973, |
|
"learning_rate": 2.6058200095628797e-08, |
|
"logits/chosen": -1.5061275959014893, |
|
"logits/rejected": -1.5063308477401733, |
|
"logps/chosen": -40.85835266113281, |
|
"logps/rejected": -46.76417541503906, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.00457608001306653, |
|
"rewards/margins": 0.016967391595244408, |
|
"rewards/rejected": -0.012391313910484314, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 3.2725095748901367, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -1.5664986371994019, |
|
"logits/rejected": -1.5493196249008179, |
|
"logps/chosen": -49.408843994140625, |
|
"logps/rejected": -53.2461051940918, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.002165078418329358, |
|
"rewards/margins": 0.00868967454880476, |
|
"rewards/rejected": -0.00652459729462862, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 3.0531673431396484, |
|
"learning_rate": 2.5639232273487993e-08, |
|
"logits/chosen": -1.4604244232177734, |
|
"logits/rejected": -1.4406094551086426, |
|
"logps/chosen": -44.316959381103516, |
|
"logps/rejected": -47.68523025512695, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004358319565653801, |
|
"rewards/margins": 0.01151906605809927, |
|
"rewards/rejected": -0.007160746958106756, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 3.623836040496826, |
|
"learning_rate": 2.5429673595358142e-08, |
|
"logits/chosen": -1.5242135524749756, |
|
"logits/rejected": -1.5093494653701782, |
|
"logps/chosen": -45.795074462890625, |
|
"logps/rejected": -48.57317352294922, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.001714804908260703, |
|
"rewards/margins": 0.0111106988042593, |
|
"rewards/rejected": -0.009395892731845379, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 3.297855854034424, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.4578922986984253, |
|
"logits/rejected": -1.445966362953186, |
|
"logps/chosen": -52.103614807128906, |
|
"logps/rejected": -55.21147918701172, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0028083932120352983, |
|
"rewards/margins": 0.006879979278892279, |
|
"rewards/rejected": -0.004071586299687624, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 3.762510061264038, |
|
"learning_rate": 2.5010480359492838e-08, |
|
"logits/chosen": -1.463905692100525, |
|
"logits/rejected": -1.4516466856002808, |
|
"logps/chosen": -49.459896087646484, |
|
"logps/rejected": -49.4564094543457, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0037110510747879744, |
|
"rewards/margins": 0.014671266078948975, |
|
"rewards/rejected": -0.010960215702652931, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 2.9843027591705322, |
|
"learning_rate": 2.480087526931091e-08, |
|
"logits/chosen": -1.5036156177520752, |
|
"logits/rejected": -1.4841887950897217, |
|
"logps/chosen": -43.37828826904297, |
|
"logps/rejected": -45.4171142578125, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.003712053643539548, |
|
"rewards/margins": 0.01433002669364214, |
|
"rewards/rejected": -0.010617973282933235, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 3.441652297973633, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -1.4469178915023804, |
|
"logits/rejected": -1.4350392818450928, |
|
"logps/chosen": -55.11457061767578, |
|
"logps/rejected": -56.50977325439453, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.003367173718288541, |
|
"rewards/margins": 0.008350704796612263, |
|
"rewards/rejected": -0.004983530845493078, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 2.8864128589630127, |
|
"learning_rate": 2.4381721815274443e-08, |
|
"logits/chosen": -1.5199902057647705, |
|
"logits/rejected": -1.5132654905319214, |
|
"logps/chosen": -43.241661071777344, |
|
"logps/rejected": -46.4239616394043, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0014256946742534637, |
|
"rewards/margins": 0.012737279757857323, |
|
"rewards/rejected": -0.011311585083603859, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 3.1540913581848145, |
|
"learning_rate": 2.4172202916176936e-08, |
|
"logits/chosen": -1.5634369850158691, |
|
"logits/rejected": -1.5546420812606812, |
|
"logps/chosen": -43.037837982177734, |
|
"logps/rejected": -47.856117248535156, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0006109640235081315, |
|
"rewards/margins": 0.014766094274818897, |
|
"rewards/rejected": -0.0141551299020648, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 3.647859573364258, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -1.4479600191116333, |
|
"logits/rejected": -1.4379609823226929, |
|
"logps/chosen": -41.66529083251953, |
|
"logps/rejected": -45.63011932373047, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.004694860894232988, |
|
"rewards/margins": 0.016697410494089127, |
|
"rewards/rejected": -0.012002546340227127, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 3.6262919902801514, |
|
"learning_rate": 2.3753354414355334e-08, |
|
"logits/chosen": -1.4229185581207275, |
|
"logits/rejected": -1.400246024131775, |
|
"logps/chosen": -53.5576171875, |
|
"logps/rejected": -55.23441696166992, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0002725389786064625, |
|
"rewards/margins": 0.012355529703199863, |
|
"rewards/rejected": -0.012082991190254688, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 3.3609025478363037, |
|
"learning_rate": 2.3544054254951408e-08, |
|
"logits/chosen": -1.4647839069366455, |
|
"logits/rejected": -1.4447661638259888, |
|
"logps/chosen": -42.85536193847656, |
|
"logps/rejected": -48.42116165161133, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.00428318977355957, |
|
"rewards/margins": 0.018426140770316124, |
|
"rewards/rejected": -0.014142952859401703, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 3.3678860664367676, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -1.5585176944732666, |
|
"logits/rejected": -1.5401690006256104, |
|
"logps/chosen": -51.256141662597656, |
|
"logps/rejected": -51.39955520629883, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0024927393533289433, |
|
"rewards/margins": 0.010601336136460304, |
|
"rewards/rejected": -0.008108596317470074, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 3.430454730987549, |
|
"learning_rate": 2.3125775682807826e-08, |
|
"logits/chosen": -1.5538517236709595, |
|
"logits/rejected": -1.552392601966858, |
|
"logps/chosen": -49.91531753540039, |
|
"logps/rejected": -53.7569465637207, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0030004787258803844, |
|
"rewards/margins": 0.01481213141232729, |
|
"rewards/rejected": -0.011811653152108192, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 2.7333407402038574, |
|
"learning_rate": 2.291682667332464e-08, |
|
"logits/chosen": -1.612623929977417, |
|
"logits/rejected": -1.5989640951156616, |
|
"logps/chosen": -46.485713958740234, |
|
"logps/rejected": -49.609825134277344, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.000499680289067328, |
|
"rewards/margins": 0.008998895063996315, |
|
"rewards/rejected": -0.008499214425683022, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 2.9348888397216797, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -1.5304839611053467, |
|
"logits/rejected": -1.5251142978668213, |
|
"logps/chosen": -46.70942306518555, |
|
"logps/rejected": -51.74241256713867, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.00332896551117301, |
|
"rewards/margins": 0.016565581783652306, |
|
"rewards/rejected": -0.013236616738140583, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 3.6855247020721436, |
|
"learning_rate": 2.2499382647765797e-08, |
|
"logits/chosen": -1.4964635372161865, |
|
"logits/rejected": -1.4977600574493408, |
|
"logps/chosen": -48.41642379760742, |
|
"logps/rejected": -52.0125617980957, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 3.586681486922316e-05, |
|
"rewards/margins": 0.011704354546964169, |
|
"rewards/rejected": -0.011668487451970577, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 2.8829994201660156, |
|
"learning_rate": 2.2290916976281427e-08, |
|
"logits/chosen": -1.4776675701141357, |
|
"logits/rejected": -1.4631723165512085, |
|
"logps/chosen": -43.69971466064453, |
|
"logps/rejected": -46.09686279296875, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.00013883263454772532, |
|
"rewards/margins": 0.014544370584189892, |
|
"rewards/rejected": -0.014683201909065247, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 3.6001625061035156, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -1.4721145629882812, |
|
"logits/rejected": -1.4632716178894043, |
|
"logps/chosen": -45.69694519042969, |
|
"logps/rejected": -51.60881805419922, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.000785698473919183, |
|
"rewards/margins": 0.013660689815878868, |
|
"rewards/rejected": -0.012874990701675415, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 3.089428424835205, |
|
"learning_rate": 2.1874571586252177e-08, |
|
"logits/chosen": -1.5453965663909912, |
|
"logits/rejected": -1.5336331129074097, |
|
"logps/chosen": -45.585872650146484, |
|
"logps/rejected": -48.34822082519531, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0007279099081642926, |
|
"rewards/margins": 0.01115675363689661, |
|
"rewards/rejected": -0.010428843088448048, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 2.425853967666626, |
|
"learning_rate": 2.1666721135069037e-08, |
|
"logits/chosen": -1.5158154964447021, |
|
"logits/rejected": -1.502139687538147, |
|
"logps/chosen": -49.875404357910156, |
|
"logps/rejected": -51.33538055419922, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001414857804775238, |
|
"rewards/margins": 0.011725572869181633, |
|
"rewards/rejected": -0.010310716927051544, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 2.6500022411346436, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -1.5762279033660889, |
|
"logits/rejected": -1.5551462173461914, |
|
"logps/chosen": -44.01633834838867, |
|
"logps/rejected": -46.197105407714844, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0036026693414896727, |
|
"rewards/margins": 0.017717977985739708, |
|
"rewards/rejected": -0.014115308411419392, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 3.8977699279785156, |
|
"learning_rate": 2.1251737774480915e-08, |
|
"logits/chosen": -1.5487562417984009, |
|
"logits/rejected": -1.5394015312194824, |
|
"logps/chosen": -53.25288772583008, |
|
"logps/rejected": -55.375213623046875, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0014735452132299542, |
|
"rewards/margins": 0.01222173310816288, |
|
"rewards/rejected": -0.010748187080025673, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 2.550679922103882, |
|
"learning_rate": 2.104463403669264e-08, |
|
"logits/chosen": -1.47690749168396, |
|
"logits/rejected": -1.4564216136932373, |
|
"logps/chosen": -49.03275680541992, |
|
"logps/rejected": -51.274871826171875, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0006415749667212367, |
|
"rewards/margins": 0.01415687520056963, |
|
"rewards/rejected": -0.013515301048755646, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 2.6638097763061523, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.4521634578704834, |
|
"logits/rejected": -1.4361908435821533, |
|
"logps/chosen": -43.8492431640625, |
|
"logps/rejected": -47.552284240722656, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.006942611187696457, |
|
"rewards/margins": 0.019251275807619095, |
|
"rewards/rejected": -0.012308661825954914, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 3.212559700012207, |
|
"learning_rate": 2.063127523779219e-08, |
|
"logits/chosen": -1.4297634363174438, |
|
"logits/rejected": -1.4289541244506836, |
|
"logps/chosen": -44.858970642089844, |
|
"logps/rejected": -51.319175720214844, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.002955908887088299, |
|
"rewards/margins": 0.019690891727805138, |
|
"rewards/rejected": -0.016734981909394264, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 3.859661102294922, |
|
"learning_rate": 2.0425049234096737e-08, |
|
"logits/chosen": -1.485381841659546, |
|
"logits/rejected": -1.4706742763519287, |
|
"logps/chosen": -49.1226921081543, |
|
"logps/rejected": -51.82140350341797, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 9.506577771389857e-05, |
|
"rewards/margins": 0.014713233336806297, |
|
"rewards/rejected": -0.014618167653679848, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 2.6423633098602295, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.4601681232452393, |
|
"logits/rejected": -1.4504835605621338, |
|
"logps/chosen": -49.000816345214844, |
|
"logps/rejected": -52.69274139404297, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0008226001518778503, |
|
"rewards/margins": 0.017380500212311745, |
|
"rewards/rejected": -0.016557898372411728, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 2.8962371349334717, |
|
"learning_rate": 2.0013576501378823e-08, |
|
"logits/chosen": -1.4357713460922241, |
|
"logits/rejected": -1.4260128736495972, |
|
"logps/chosen": -44.66715621948242, |
|
"logps/rejected": -48.878177642822266, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.00762571394443512, |
|
"rewards/margins": 0.02372068539261818, |
|
"rewards/rejected": -0.01609497331082821, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 3.5678348541259766, |
|
"learning_rate": 1.9808358697190426e-08, |
|
"logits/chosen": -1.4616868495941162, |
|
"logits/rejected": -1.4613463878631592, |
|
"logps/chosen": -40.0428466796875, |
|
"logps/rejected": -45.382728576660156, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0007027705432847142, |
|
"rewards/margins": 0.017785217612981796, |
|
"rewards/rejected": -0.01848798617720604, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 3.052267551422119, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -1.4895626306533813, |
|
"logits/rejected": -1.4689642190933228, |
|
"logps/chosen": -41.04018020629883, |
|
"logps/rejected": -46.34733200073242, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0010787765495479107, |
|
"rewards/margins": 0.014137683436274529, |
|
"rewards/rejected": -0.01305890642106533, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 3.034510374069214, |
|
"learning_rate": 1.9399032341961886e-08, |
|
"logits/chosen": -1.4604467153549194, |
|
"logits/rejected": -1.4406030178070068, |
|
"logps/chosen": -44.063720703125, |
|
"logps/rejected": -45.94016647338867, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0032738572917878628, |
|
"rewards/margins": 0.013115392997860909, |
|
"rewards/rejected": -0.009841536171734333, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 3.6909120082855225, |
|
"learning_rate": 1.9194952564874323e-08, |
|
"logits/chosen": -1.4899951219558716, |
|
"logits/rejected": -1.4772499799728394, |
|
"logps/chosen": -49.38003158569336, |
|
"logps/rejected": -52.764427185058594, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0014510613400489092, |
|
"rewards/margins": 0.015302592888474464, |
|
"rewards/rejected": -0.013851528987288475, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 2.995894432067871, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.465537190437317, |
|
"logits/rejected": -1.4445680379867554, |
|
"logps/chosen": -48.079158782958984, |
|
"logps/rejected": -49.74862289428711, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0006449748761951923, |
|
"rewards/margins": 0.014429867267608643, |
|
"rewards/rejected": -0.013784890063107014, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 3.794341802597046, |
|
"learning_rate": 1.8788031540572327e-08, |
|
"logits/chosen": -1.4323540925979614, |
|
"logits/rejected": -1.4187084436416626, |
|
"logps/chosen": -43.358890533447266, |
|
"logps/rejected": -47.263160705566406, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0011780399363487959, |
|
"rewards/margins": 0.017769720405340195, |
|
"rewards/rejected": -0.01659167930483818, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 3.5173821449279785, |
|
"learning_rate": 1.858521889822565e-08, |
|
"logits/chosen": -1.4809000492095947, |
|
"logits/rejected": -1.4711607694625854, |
|
"logps/chosen": -44.77101135253906, |
|
"logps/rejected": -47.36913299560547, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00217301188968122, |
|
"rewards/margins": 0.012151877395808697, |
|
"rewards/rejected": -0.009978866204619408, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 3.0861990451812744, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -1.479575514793396, |
|
"logits/rejected": -1.4646644592285156, |
|
"logps/chosen": -43.409400939941406, |
|
"logps/rejected": -46.272499084472656, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0008953431388363242, |
|
"rewards/margins": 0.015901099890470505, |
|
"rewards/rejected": -0.015005757100880146, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 3.0519216060638428, |
|
"learning_rate": 1.8180960636595234e-08, |
|
"logits/chosen": -1.434323787689209, |
|
"logits/rejected": -1.4235074520111084, |
|
"logps/chosen": -45.47016525268555, |
|
"logps/rejected": -48.84101104736328, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0007277448894456029, |
|
"rewards/margins": 0.018448855727910995, |
|
"rewards/rejected": -0.017721110954880714, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 2.6402575969696045, |
|
"learning_rate": 1.7979543434998015e-08, |
|
"logits/chosen": -1.5166761875152588, |
|
"logits/rejected": -1.5122129917144775, |
|
"logps/chosen": -54.10516357421875, |
|
"logps/rejected": -55.752830505371094, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.002384190447628498, |
|
"rewards/margins": 0.009313153102993965, |
|
"rewards/rejected": -0.011697344481945038, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 3.219054698944092, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -1.4986519813537598, |
|
"logits/rejected": -1.4785531759262085, |
|
"logps/chosen": -48.8413200378418, |
|
"logps/rejected": -50.63405990600586, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.001953819999471307, |
|
"rewards/margins": 0.014699439518153667, |
|
"rewards/rejected": -0.016653258353471756, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 5.369049549102783, |
|
"learning_rate": 1.757820368323213e-08, |
|
"logits/chosen": -1.4477155208587646, |
|
"logits/rejected": -1.431592345237732, |
|
"logps/chosen": -55.639549255371094, |
|
"logps/rejected": -60.893531799316406, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.00035453608143143356, |
|
"rewards/margins": 0.01644885540008545, |
|
"rewards/rejected": -0.01680339314043522, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 2.708446741104126, |
|
"learning_rate": 1.7378309345590803e-08, |
|
"logits/chosen": -1.518417477607727, |
|
"logits/rejected": -1.5215131044387817, |
|
"logps/chosen": -48.15247344970703, |
|
"logps/rejected": -51.795204162597656, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0003538710006978363, |
|
"rewards/margins": 0.01514039933681488, |
|
"rewards/rejected": -0.01478652935475111, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 3.0533721446990967, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -1.5372436046600342, |
|
"logits/rejected": -1.5273748636245728, |
|
"logps/chosen": -45.74188995361328, |
|
"logps/rejected": -50.93372344970703, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.001131549128331244, |
|
"rewards/margins": 0.018048815429210663, |
|
"rewards/rejected": -0.016917267814278603, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 2.965850830078125, |
|
"learning_rate": 1.698014200453624e-08, |
|
"logits/chosen": -1.5121673345565796, |
|
"logits/rejected": -1.5155454874038696, |
|
"logps/chosen": -48.55657196044922, |
|
"logps/rejected": -53.24010467529297, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -5.956319910183083e-06, |
|
"rewards/margins": 0.00807130616158247, |
|
"rewards/rejected": -0.008077261969447136, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 3.131131410598755, |
|
"learning_rate": 1.6781896990642964e-08, |
|
"logits/chosen": -1.4162065982818604, |
|
"logits/rejected": -1.4067705869674683, |
|
"logps/chosen": -53.68306350708008, |
|
"logps/rejected": -55.64463424682617, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0006439717253670096, |
|
"rewards/margins": 0.012254132889211178, |
|
"rewards/rejected": -0.011610162444412708, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 3.7046432495117188, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -1.5447876453399658, |
|
"logits/rejected": -1.5232436656951904, |
|
"logps/chosen": -46.61565017700195, |
|
"logps/rejected": -48.878089904785156, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.002516025211662054, |
|
"rewards/margins": 0.013911202549934387, |
|
"rewards/rejected": -0.016427230089902878, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 3.3615024089813232, |
|
"learning_rate": 1.638715395417418e-08, |
|
"logits/chosen": -1.515842318534851, |
|
"logits/rejected": -1.4996858835220337, |
|
"logps/chosen": -47.76423645019531, |
|
"logps/rejected": -50.23698806762695, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0017537068342790008, |
|
"rewards/margins": 0.012197253294289112, |
|
"rewards/rejected": -0.013950960710644722, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 3.4271042346954346, |
|
"learning_rate": 1.619068368040416e-08, |
|
"logits/chosen": -1.5035583972930908, |
|
"logits/rejected": -1.4936153888702393, |
|
"logps/chosen": -42.36717987060547, |
|
"logps/rejected": -48.00276565551758, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0006282638642005622, |
|
"rewards/margins": 0.017301367595791817, |
|
"rewards/rejected": -0.016673101112246513, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 3.3180654048919678, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -1.4341150522232056, |
|
"logits/rejected": -1.4278676509857178, |
|
"logps/chosen": -46.42858123779297, |
|
"logps/rejected": -48.82233428955078, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.00017052926705218852, |
|
"rewards/margins": 0.01208487804979086, |
|
"rewards/rejected": -0.011914348229765892, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 3.3026318550109863, |
|
"learning_rate": 1.5799614676066906e-08, |
|
"logits/chosen": -1.5610630512237549, |
|
"logits/rejected": -1.5564082860946655, |
|
"logps/chosen": -42.62709045410156, |
|
"logps/rejected": -47.11577606201172, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.001482530147768557, |
|
"rewards/margins": 0.01645783707499504, |
|
"rewards/rejected": -0.017940368503332138, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 2.802278518676758, |
|
"learning_rate": 1.560504343603587e-08, |
|
"logits/chosen": -1.4570392370224, |
|
"logits/rejected": -1.4598525762557983, |
|
"logps/chosen": -47.614715576171875, |
|
"logps/rejected": -53.1636962890625, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0013699980918318033, |
|
"rewards/margins": 0.014168500900268555, |
|
"rewards/rejected": -0.012798503041267395, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 2.706557512283325, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -1.5580412149429321, |
|
"logits/rejected": -1.5532734394073486, |
|
"logps/chosen": -47.86179733276367, |
|
"logps/rejected": -52.08073043823242, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0002923453284893185, |
|
"rewards/margins": 0.015893833711743355, |
|
"rewards/rejected": -0.015601487830281258, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 2.82700777053833, |
|
"learning_rate": 1.5217895867061227e-08, |
|
"logits/chosen": -1.4797217845916748, |
|
"logits/rejected": -1.46820068359375, |
|
"logps/chosen": -49.087162017822266, |
|
"logps/rejected": -51.762550354003906, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0006480285665020347, |
|
"rewards/margins": 0.015934782102704048, |
|
"rewards/rejected": -0.016582807525992393, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0028818443804033, |
|
"grad_norm": 3.225586414337158, |
|
"learning_rate": 1.5025346752993098e-08, |
|
"logits/chosen": -1.473181128501892, |
|
"logits/rejected": -1.4776010513305664, |
|
"logps/chosen": -47.254737854003906, |
|
"logps/rejected": -51.428993225097656, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0032721899915486574, |
|
"rewards/margins": 0.009254529140889645, |
|
"rewards/rejected": -0.012526720762252808, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.010086455331412, |
|
"grad_norm": 3.1906368732452393, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -1.6033878326416016, |
|
"logits/rejected": -1.594612717628479, |
|
"logps/chosen": -44.750770568847656, |
|
"logps/rejected": -49.424808502197266, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.000528767064679414, |
|
"rewards/margins": 0.019820012152194977, |
|
"rewards/rejected": -0.019291242584586143, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 3.4794318675994873, |
|
"learning_rate": 1.4642365541781993e-08, |
|
"logits/chosen": -1.4186664819717407, |
|
"logits/rejected": -1.4020098447799683, |
|
"logps/chosen": -46.425323486328125, |
|
"logps/rejected": -51.33763885498047, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.002099757781252265, |
|
"rewards/margins": 0.01701737754046917, |
|
"rewards/rejected": -0.019117135554552078, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0244956772334293, |
|
"grad_norm": 3.546409845352173, |
|
"learning_rate": 1.4451960366636745e-08, |
|
"logits/chosen": -1.5050753355026245, |
|
"logits/rejected": -1.509311318397522, |
|
"logps/chosen": -50.25437545776367, |
|
"logps/rejected": -54.8748779296875, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.9070675736875273e-05, |
|
"rewards/margins": 0.014723686501383781, |
|
"rewards/rejected": -0.014752757735550404, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.031700288184438, |
|
"grad_norm": 2.9926185607910156, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -1.505336880683899, |
|
"logits/rejected": -1.4901078939437866, |
|
"logps/chosen": -43.937278747558594, |
|
"logps/rejected": -48.619163513183594, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0006590264965780079, |
|
"rewards/margins": 0.016862262040376663, |
|
"rewards/rejected": -0.017521290108561516, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0389048991354466, |
|
"grad_norm": 3.474058151245117, |
|
"learning_rate": 1.407338779981389e-08, |
|
"logits/chosen": -1.4738878011703491, |
|
"logits/rejected": -1.4622704982757568, |
|
"logps/chosen": -41.43745040893555, |
|
"logps/rejected": -46.45849609375, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.001070915604941547, |
|
"rewards/margins": 0.020852236077189445, |
|
"rewards/rejected": -0.02192315086722374, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 3.1651461124420166, |
|
"learning_rate": 1.3885247020224534e-08, |
|
"logits/chosen": -1.4730961322784424, |
|
"logits/rejected": -1.4625325202941895, |
|
"logps/chosen": -40.98516082763672, |
|
"logps/rejected": -44.32816696166992, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0006273157196119428, |
|
"rewards/margins": 0.020260414108633995, |
|
"rewards/rejected": -0.0196330975741148, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.053314121037464, |
|
"grad_norm": 2.788933277130127, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -1.5226420164108276, |
|
"logits/rejected": -1.5108869075775146, |
|
"logps/chosen": -46.892662048339844, |
|
"logps/rejected": -48.15214920043945, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0032084626145660877, |
|
"rewards/margins": 0.011149941943585873, |
|
"rewards/rejected": -0.014358404092490673, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0605187319884726, |
|
"grad_norm": 3.175995111465454, |
|
"learning_rate": 1.3511322595359925e-08, |
|
"logits/chosen": -1.5318832397460938, |
|
"logits/rejected": -1.5205862522125244, |
|
"logps/chosen": -43.323387145996094, |
|
"logps/rejected": -49.04631042480469, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0012531958054751158, |
|
"rewards/margins": 0.019172416999936104, |
|
"rewards/rejected": -0.02042561024427414, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0677233429394812, |
|
"grad_norm": 3.237109422683716, |
|
"learning_rate": 1.3325565235427716e-08, |
|
"logits/chosen": -1.5520436763763428, |
|
"logits/rejected": -1.5436103343963623, |
|
"logps/chosen": -45.29417037963867, |
|
"logps/rejected": -49.32756423950195, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0010891546262428164, |
|
"rewards/margins": 0.01812928169965744, |
|
"rewards/rejected": -0.019218437373638153, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 3.6626226902008057, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -1.4575971364974976, |
|
"logits/rejected": -1.4558467864990234, |
|
"logps/chosen": -45.92155838012695, |
|
"logps/rejected": -49.4353141784668, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0020993829239159822, |
|
"rewards/margins": 0.01558808796107769, |
|
"rewards/rejected": -0.0134887071326375, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.0821325648414986, |
|
"grad_norm": 3.993279218673706, |
|
"learning_rate": 1.2956525509522451e-08, |
|
"logits/chosen": -1.434325933456421, |
|
"logits/rejected": -1.43949556350708, |
|
"logps/chosen": -47.83916473388672, |
|
"logps/rejected": -51.43572998046875, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.001653014332987368, |
|
"rewards/margins": 0.01248462125658989, |
|
"rewards/rejected": -0.01083160750567913, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.089337175792507, |
|
"grad_norm": 3.8572018146514893, |
|
"learning_rate": 1.2773269085518267e-08, |
|
"logits/chosen": -1.5160229206085205, |
|
"logits/rejected": -1.5104598999023438, |
|
"logps/chosen": -52.54634475708008, |
|
"logps/rejected": -56.1825065612793, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0702252641058294e-06, |
|
"rewards/margins": 0.013869213871657848, |
|
"rewards/rejected": -0.013871285133063793, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.096541786743516, |
|
"grad_norm": 2.667027235031128, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -1.5915756225585938, |
|
"logits/rejected": -1.575210452079773, |
|
"logps/chosen": -45.68871307373047, |
|
"logps/rejected": -47.84299087524414, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.004803013987839222, |
|
"rewards/margins": 0.019728917628526688, |
|
"rewards/rejected": -0.024531930685043335, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 2.844864845275879, |
|
"learning_rate": 1.2409347525350775e-08, |
|
"logits/chosen": -1.4988892078399658, |
|
"logits/rejected": -1.480571985244751, |
|
"logps/chosen": -47.45149230957031, |
|
"logps/rejected": -51.376197814941406, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0007760589360259473, |
|
"rewards/margins": 0.020668352022767067, |
|
"rewards/rejected": -0.019892293959856033, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.110951008645533, |
|
"grad_norm": 3.4284279346466064, |
|
"learning_rate": 1.2228707971370421e-08, |
|
"logits/chosen": -1.4963008165359497, |
|
"logits/rejected": -1.477461576461792, |
|
"logps/chosen": -42.0821647644043, |
|
"logps/rejected": -44.58640670776367, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0020004522521048784, |
|
"rewards/margins": 0.01911274902522564, |
|
"rewards/rejected": -0.01711229607462883, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.118155619596542, |
|
"grad_norm": 4.411471843719482, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -1.525394320487976, |
|
"logits/rejected": -1.49559485912323, |
|
"logps/chosen": -52.1498908996582, |
|
"logps/rejected": -55.0552864074707, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0017288762610405684, |
|
"rewards/margins": 0.014565639197826385, |
|
"rewards/rejected": -0.016294512897729874, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.1253602305475505, |
|
"grad_norm": 3.3227975368499756, |
|
"learning_rate": 1.187013480579762e-08, |
|
"logits/chosen": -1.489044189453125, |
|
"logits/rejected": -1.4827308654785156, |
|
"logps/chosen": -45.42039489746094, |
|
"logps/rejected": -49.41254425048828, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0047860899940133095, |
|
"rewards/margins": 0.019023966044187546, |
|
"rewards/rejected": -0.02381005696952343, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 4.395474433898926, |
|
"learning_rate": 1.1692226400418073e-08, |
|
"logits/chosen": -1.4131033420562744, |
|
"logits/rejected": -1.4049489498138428, |
|
"logps/chosen": -49.05443572998047, |
|
"logps/rejected": -52.0718994140625, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.004469198640435934, |
|
"rewards/margins": 0.016008742153644562, |
|
"rewards/rejected": -0.020477941259741783, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.139769452449568, |
|
"grad_norm": 2.495917558670044, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -1.4480403661727905, |
|
"logits/rejected": -1.4422080516815186, |
|
"logps/chosen": -44.43804931640625, |
|
"logps/rejected": -51.04991149902344, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.003528184024617076, |
|
"rewards/margins": 0.018853966146707535, |
|
"rewards/rejected": -0.02238215133547783, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1469740634005765, |
|
"grad_norm": 3.3481624126434326, |
|
"learning_rate": 1.133922847472496e-08, |
|
"logits/chosen": -1.4908777475357056, |
|
"logits/rejected": -1.4869946241378784, |
|
"logps/chosen": -52.52874755859375, |
|
"logps/rejected": -55.038414001464844, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0005473472410812974, |
|
"rewards/margins": 0.017001759260892868, |
|
"rewards/rejected": -0.01754910498857498, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.154178674351585, |
|
"grad_norm": 3.3193447589874268, |
|
"learning_rate": 1.1164163768707952e-08, |
|
"logits/chosen": -1.4650285243988037, |
|
"logits/rejected": -1.4538644552230835, |
|
"logps/chosen": -47.43016052246094, |
|
"logps/rejected": -51.636924743652344, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0007126646814867854, |
|
"rewards/margins": 0.02234521321952343, |
|
"rewards/rejected": -0.023057879880070686, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 3.359654426574707, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -1.4393339157104492, |
|
"logits/rejected": -1.429203748703003, |
|
"logps/chosen": -48.76907730102539, |
|
"logps/rejected": -53.98026657104492, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.00010694740194594488, |
|
"rewards/margins": 0.023278547450900078, |
|
"rewards/rejected": -0.023171598091721535, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1685878962536025, |
|
"grad_norm": 3.054330348968506, |
|
"learning_rate": 1.0816964401097739e-08, |
|
"logits/chosen": -1.4826459884643555, |
|
"logits/rejected": -1.472663164138794, |
|
"logps/chosen": -43.025970458984375, |
|
"logps/rejected": -45.86864471435547, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0005824376130476594, |
|
"rewards/margins": 0.016256345435976982, |
|
"rewards/rejected": -0.016838783398270607, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.175792507204611, |
|
"grad_norm": 3.929755926132202, |
|
"learning_rate": 1.0644854146186406e-08, |
|
"logits/chosen": -1.5146058797836304, |
|
"logits/rejected": -1.49686598777771, |
|
"logps/chosen": -48.23189926147461, |
|
"logps/rejected": -52.9786491394043, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0024410493206232786, |
|
"rewards/margins": 0.021744880825281143, |
|
"rewards/rejected": -0.02418592758476734, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.18299711815562, |
|
"grad_norm": 3.215834856033325, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -1.4946410655975342, |
|
"logits/rejected": -1.4729467630386353, |
|
"logps/chosen": -48.56220626831055, |
|
"logps/rejected": -51.715721130371094, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0010552393505349755, |
|
"rewards/margins": 0.022361181676387787, |
|
"rewards/rejected": -0.02130594104528427, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 2.973313093185425, |
|
"learning_rate": 1.030367298650201e-08, |
|
"logits/chosen": -1.4930821657180786, |
|
"logits/rejected": -1.492546796798706, |
|
"logps/chosen": -48.8010368347168, |
|
"logps/rejected": -53.67090606689453, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.004332934506237507, |
|
"rewards/margins": 0.012475891038775444, |
|
"rewards/rejected": -0.016808826476335526, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1974063400576367, |
|
"grad_norm": 3.869215726852417, |
|
"learning_rate": 1.0134626065355675e-08, |
|
"logits/chosen": -1.5941343307495117, |
|
"logits/rejected": -1.5830258131027222, |
|
"logps/chosen": -49.36521530151367, |
|
"logps/rejected": -52.964378356933594, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0014757805038243532, |
|
"rewards/margins": 0.02204863727092743, |
|
"rewards/rejected": -0.020572859793901443, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.2046109510086453, |
|
"grad_norm": 3.5000784397125244, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -1.491275668144226, |
|
"logits/rejected": -1.471986174583435, |
|
"logps/chosen": -52.319740295410156, |
|
"logps/rejected": -55.521949768066406, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.002091693691909313, |
|
"rewards/margins": 0.019565019756555557, |
|
"rewards/rejected": -0.01747332513332367, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.211815561959654, |
|
"grad_norm": 2.422412395477295, |
|
"learning_rate": 9.799678956121976e-09, |
|
"logits/chosen": -1.436295747756958, |
|
"logits/rejected": -1.4197183847427368, |
|
"logps/chosen": -45.91994094848633, |
|
"logps/rejected": -48.53052520751953, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.002334743272513151, |
|
"rewards/margins": 0.012487743981182575, |
|
"rewards/rejected": -0.014822488650679588, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 3.5557055473327637, |
|
"learning_rate": 9.633802313433314e-09, |
|
"logits/chosen": -1.4133331775665283, |
|
"logits/rejected": -1.4094794988632202, |
|
"logps/chosen": -48.400875091552734, |
|
"logps/rejected": -50.9952278137207, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0015882644802331924, |
|
"rewards/margins": 0.016813434660434723, |
|
"rewards/rejected": -0.018401699140667915, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2262247838616713, |
|
"grad_norm": 2.77536940574646, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -1.4840987920761108, |
|
"logits/rejected": -1.4713189601898193, |
|
"logps/chosen": -47.20307159423828, |
|
"logps/rejected": -48.701622009277344, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0018709308933466673, |
|
"rewards/margins": 0.01850222796201706, |
|
"rewards/rejected": -0.020373158156871796, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.23342939481268, |
|
"grad_norm": 3.2006888389587402, |
|
"learning_rate": 9.305301153307949e-09, |
|
"logits/chosen": -1.4952600002288818, |
|
"logits/rejected": -1.499306321144104, |
|
"logps/chosen": -40.018882751464844, |
|
"logps/rejected": -44.07697296142578, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.005154062993824482, |
|
"rewards/margins": 0.020358018577098846, |
|
"rewards/rejected": -0.025512080639600754, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2406340057636887, |
|
"grad_norm": 2.7660207748413086, |
|
"learning_rate": 9.142699728146336e-09, |
|
"logits/chosen": -1.4331061840057373, |
|
"logits/rejected": -1.4245890378952026, |
|
"logps/chosen": -46.127166748046875, |
|
"logps/rejected": -51.11559295654297, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0028220864478498697, |
|
"rewards/margins": 0.018410906195640564, |
|
"rewards/rejected": -0.02123299241065979, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 2.9993467330932617, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -1.501518726348877, |
|
"logits/rejected": -1.5010533332824707, |
|
"logps/chosen": -44.233001708984375, |
|
"logps/rejected": -49.084808349609375, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0025021065957844257, |
|
"rewards/margins": 0.01857968047261238, |
|
"rewards/rejected": -0.021081790328025818, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.255043227665706, |
|
"grad_norm": 3.6337778568267822, |
|
"learning_rate": 8.820852337865611e-09, |
|
"logits/chosen": -1.5516728162765503, |
|
"logits/rejected": -1.536139965057373, |
|
"logps/chosen": -45.085365295410156, |
|
"logps/rejected": -48.6732063293457, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0014631979865953326, |
|
"rewards/margins": 0.01818043366074562, |
|
"rewards/rejected": -0.0196436308324337, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.2622478386167146, |
|
"grad_norm": 2.8269283771514893, |
|
"learning_rate": 8.661628997289044e-09, |
|
"logits/chosen": -1.4337918758392334, |
|
"logits/rejected": -1.420588731765747, |
|
"logps/chosen": -45.37371826171875, |
|
"logps/rejected": -49.878807067871094, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.001038391375914216, |
|
"rewards/margins": 0.019226137548685074, |
|
"rewards/rejected": -0.020264528691768646, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2694524495677233, |
|
"grad_norm": 2.8211851119995117, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.4207173585891724, |
|
"logits/rejected": -1.4155914783477783, |
|
"logps/chosen": -45.44511032104492, |
|
"logps/rejected": -49.06669998168945, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0009460471337661147, |
|
"rewards/margins": 0.0205953698605299, |
|
"rewards/rejected": -0.01964932307600975, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 3.823362350463867, |
|
"learning_rate": 8.346638988193636e-09, |
|
"logits/chosen": -1.4726402759552002, |
|
"logits/rejected": -1.4704288244247437, |
|
"logps/chosen": -40.57404708862305, |
|
"logps/rejected": -46.3819580078125, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0009540968458168209, |
|
"rewards/margins": 0.02004236914217472, |
|
"rewards/rejected": -0.020996464416384697, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.2838616714697406, |
|
"grad_norm": 4.203202247619629, |
|
"learning_rate": 8.19089446217176e-09, |
|
"logits/chosen": -1.4289653301239014, |
|
"logits/rejected": -1.4073545932769775, |
|
"logps/chosen": -45.77039337158203, |
|
"logps/rejected": -51.22684860229492, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.002206823555752635, |
|
"rewards/margins": 0.02788766846060753, |
|
"rewards/rejected": -0.025680843740701675, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2910662824207493, |
|
"grad_norm": 3.061116933822632, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.4556572437286377, |
|
"logits/rejected": -1.453061819076538, |
|
"logps/chosen": -45.42163848876953, |
|
"logps/rejected": -48.2816276550293, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0038195946253836155, |
|
"rewards/margins": 0.012059660628437996, |
|
"rewards/rejected": -0.015879254788160324, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.298270893371758, |
|
"grad_norm": 3.667120933532715, |
|
"learning_rate": 7.882961107395416e-09, |
|
"logits/chosen": -1.4970252513885498, |
|
"logits/rejected": -1.4873993396759033, |
|
"logps/chosen": -52.32844924926758, |
|
"logps/rejected": -52.70637893676758, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.006024113390594721, |
|
"rewards/margins": 0.013099935837090015, |
|
"rewards/rejected": -0.019124049693346024, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 4.6425042152404785, |
|
"learning_rate": 7.73079392508428e-09, |
|
"logits/chosen": -1.421281099319458, |
|
"logits/rejected": -1.4252192974090576, |
|
"logps/chosen": -49.780357360839844, |
|
"logps/rejected": -56.728782653808594, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.004081489983946085, |
|
"rewards/margins": 0.02246311493217945, |
|
"rewards/rejected": -0.026544606313109398, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3126801152737753, |
|
"grad_norm": 3.641294479370117, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.5132240056991577, |
|
"logits/rejected": -1.5067884922027588, |
|
"logps/chosen": -42.3286247253418, |
|
"logps/rejected": -45.735172271728516, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.004373098723590374, |
|
"rewards/margins": 0.01880384422838688, |
|
"rewards/rejected": -0.02317694202065468, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.319884726224784, |
|
"grad_norm": 4.251941680908203, |
|
"learning_rate": 7.43011203348704e-09, |
|
"logits/chosen": -1.3565527200698853, |
|
"logits/rejected": -1.3517991304397583, |
|
"logps/chosen": -53.13508224487305, |
|
"logps/rejected": -53.89665603637695, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.005780863109976053, |
|
"rewards/margins": 0.015468914993107319, |
|
"rewards/rejected": -0.02124977670609951, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.3270893371757926, |
|
"grad_norm": 3.2468185424804688, |
|
"learning_rate": 7.281618460896344e-09, |
|
"logits/chosen": -1.4836031198501587, |
|
"logits/rejected": -1.4732134342193604, |
|
"logps/chosen": -46.293495178222656, |
|
"logps/rejected": -50.772377014160156, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0019498964538797736, |
|
"rewards/margins": 0.018127668648958206, |
|
"rewards/rejected": -0.02007756568491459, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.3342939481268012, |
|
"grad_norm": 2.987872362136841, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -1.431056022644043, |
|
"logits/rejected": -1.4228883981704712, |
|
"logps/chosen": -45.278480529785156, |
|
"logps/rejected": -48.09427261352539, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.007163041736930609, |
|
"rewards/margins": 0.013337318785488605, |
|
"rewards/rejected": -0.020500360056757927, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.34149855907781, |
|
"grad_norm": 4.016878604888916, |
|
"learning_rate": 6.988378253821981e-09, |
|
"logits/chosen": -1.4581267833709717, |
|
"logits/rejected": -1.4503519535064697, |
|
"logps/chosen": -51.406394958496094, |
|
"logps/rejected": -54.95398712158203, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 8.269222598755732e-05, |
|
"rewards/margins": 0.013455493375658989, |
|
"rewards/rejected": -0.013372799381613731, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3487031700288186, |
|
"grad_norm": 3.061265468597412, |
|
"learning_rate": 6.8436522329140186e-09, |
|
"logits/chosen": -1.4396600723266602, |
|
"logits/rejected": -1.445936918258667, |
|
"logps/chosen": -46.99631881713867, |
|
"logps/rejected": -50.84511184692383, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0030071113724261522, |
|
"rewards/margins": 0.016658034175634384, |
|
"rewards/rejected": -0.019665146246552467, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3559077809798272, |
|
"grad_norm": 3.53556752204895, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.4168142080307007, |
|
"logits/rejected": -1.3971381187438965, |
|
"logps/chosen": -48.588462829589844, |
|
"logps/rejected": -51.615745544433594, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.00601479597389698, |
|
"rewards/margins": 0.020779959857463837, |
|
"rewards/rejected": -0.026794757694005966, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 3.7148492336273193, |
|
"learning_rate": 6.558039223849668e-09, |
|
"logits/chosen": -1.5100008249282837, |
|
"logits/rejected": -1.4907362461090088, |
|
"logps/chosen": -46.292266845703125, |
|
"logps/rejected": -52.69337844848633, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0020173946395516396, |
|
"rewards/margins": 0.024240782484412193, |
|
"rewards/rejected": -0.026258179917931557, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.3703170028818445, |
|
"grad_norm": 2.8915648460388184, |
|
"learning_rate": 6.417172313108471e-09, |
|
"logits/chosen": -1.4239482879638672, |
|
"logits/rejected": -1.4123411178588867, |
|
"logps/chosen": -44.13205337524414, |
|
"logps/rejected": -47.372459411621094, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.008008310571312904, |
|
"rewards/margins": 0.016741162165999413, |
|
"rewards/rejected": -0.024749474599957466, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.377521613832853, |
|
"grad_norm": 2.96709942817688, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -1.5538231134414673, |
|
"logits/rejected": -1.5337212085723877, |
|
"logps/chosen": -39.08244323730469, |
|
"logps/rejected": -45.03202438354492, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.003737329039722681, |
|
"rewards/margins": 0.02486787550151348, |
|
"rewards/rejected": -0.028605204075574875, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3847262247838614, |
|
"grad_norm": 4.765800476074219, |
|
"learning_rate": 6.139367190322714e-09, |
|
"logits/chosen": -1.4922102689743042, |
|
"logits/rejected": -1.4921929836273193, |
|
"logps/chosen": -52.74082565307617, |
|
"logps/rejected": -58.16496658325195, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0037182599771767855, |
|
"rewards/margins": 0.01609138958156109, |
|
"rewards/rejected": -0.019809648394584656, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.39193083573487, |
|
"grad_norm": 2.5080461502075195, |
|
"learning_rate": 6.002448506831171e-09, |
|
"logits/chosen": -1.4787399768829346, |
|
"logits/rejected": -1.474265217781067, |
|
"logps/chosen": -44.079654693603516, |
|
"logps/rejected": -49.27398681640625, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.003509215312078595, |
|
"rewards/margins": 0.0185092780739069, |
|
"rewards/rejected": -0.022018492221832275, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3991354466858787, |
|
"grad_norm": 3.059837818145752, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -1.4946706295013428, |
|
"logits/rejected": -1.4880635738372803, |
|
"logps/chosen": -46.43208694458008, |
|
"logps/rejected": -50.517845153808594, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0064870258793234825, |
|
"rewards/margins": 0.016077209264039993, |
|
"rewards/rejected": -0.0225642379373312, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.4063400576368874, |
|
"grad_norm": 4.633550643920898, |
|
"learning_rate": 5.7326270190645595e-09, |
|
"logits/chosen": -1.327772855758667, |
|
"logits/rejected": -1.3225994110107422, |
|
"logps/chosen": -49.93898391723633, |
|
"logps/rejected": -52.00776290893555, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0053224144503474236, |
|
"rewards/margins": 0.017836768180131912, |
|
"rewards/rejected": -0.02315918542444706, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.413544668587896, |
|
"grad_norm": 3.6820759773254395, |
|
"learning_rate": 5.599743182125938e-09, |
|
"logits/chosen": -1.537332534790039, |
|
"logits/rejected": -1.536694049835205, |
|
"logps/chosen": -48.791236877441406, |
|
"logps/rejected": -54.156349182128906, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0013721368741244078, |
|
"rewards/margins": 0.01742737926542759, |
|
"rewards/rejected": -0.018799515441060066, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 3.4776570796966553, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -1.5602588653564453, |
|
"logits/rejected": -1.5649378299713135, |
|
"logps/chosen": -49.44075012207031, |
|
"logps/rejected": -52.79970169067383, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0048035006038844585, |
|
"rewards/margins": 0.012090938165783882, |
|
"rewards/rejected": -0.01689443737268448, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.4279538904899134, |
|
"grad_norm": 3.6628270149230957, |
|
"learning_rate": 5.33807602740658e-09, |
|
"logits/chosen": -1.5563075542449951, |
|
"logits/rejected": -1.5405874252319336, |
|
"logps/chosen": -41.89576721191406, |
|
"logps/rejected": -47.50288009643555, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0014247519429773092, |
|
"rewards/margins": 0.027519360184669495, |
|
"rewards/rejected": -0.0289441104978323, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.435158501440922, |
|
"grad_norm": 3.8053536415100098, |
|
"learning_rate": 5.209311103746334e-09, |
|
"logits/chosen": -1.475118637084961, |
|
"logits/rejected": -1.4708943367004395, |
|
"logps/chosen": -47.132999420166016, |
|
"logps/rejected": -52.47126007080078, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.003361554117873311, |
|
"rewards/margins": 0.019802602007985115, |
|
"rewards/rejected": -0.023164156824350357, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4423631123919307, |
|
"grad_norm": 4.187764644622803, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.4638832807540894, |
|
"logits/rejected": -1.45353102684021, |
|
"logps/chosen": -44.22564697265625, |
|
"logps/rejected": -48.906394958496094, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0011400593211874366, |
|
"rewards/margins": 0.022827504202723503, |
|
"rewards/rejected": -0.02396756038069725, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4495677233429394, |
|
"grad_norm": 3.6951913833618164, |
|
"learning_rate": 4.955963821400599e-09, |
|
"logits/chosen": -1.524279236793518, |
|
"logits/rejected": -1.506208062171936, |
|
"logps/chosen": -46.91497802734375, |
|
"logps/rejected": -49.75968933105469, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.002611330011859536, |
|
"rewards/margins": 0.0218039583414793, |
|
"rewards/rejected": -0.024415289983153343, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.456772334293948, |
|
"grad_norm": 2.797675371170044, |
|
"learning_rate": 4.831399271982928e-09, |
|
"logits/chosen": -1.3963180780410767, |
|
"logits/rejected": -1.380124568939209, |
|
"logps/chosen": -49.73040008544922, |
|
"logps/rejected": -52.8338737487793, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0018096374114975333, |
|
"rewards/margins": 0.02105477824807167, |
|
"rewards/rejected": -0.022864414379000664, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.4639769452449567, |
|
"grad_norm": 3.9951834678649902, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -1.4993171691894531, |
|
"logits/rejected": -1.4892971515655518, |
|
"logps/chosen": -47.080963134765625, |
|
"logps/rejected": -51.67246627807617, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.003982001915574074, |
|
"rewards/margins": 0.021508801728487015, |
|
"rewards/rejected": -0.02549080178141594, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.4711815561959654, |
|
"grad_norm": 2.9166736602783203, |
|
"learning_rate": 4.58653213790981e-09, |
|
"logits/chosen": -1.4970018863677979, |
|
"logits/rejected": -1.4785791635513306, |
|
"logps/chosen": -47.441192626953125, |
|
"logps/rejected": -52.0790901184082, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.002383728977292776, |
|
"rewards/margins": 0.019747789949178696, |
|
"rewards/rejected": -0.02213152125477791, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 3.378357172012329, |
|
"learning_rate": 4.466246766402773e-09, |
|
"logits/chosen": -1.4705661535263062, |
|
"logits/rejected": -1.4513781070709229, |
|
"logps/chosen": -48.759010314941406, |
|
"logps/rejected": -52.48942947387695, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0021346856374293566, |
|
"rewards/margins": 0.023418936878442764, |
|
"rewards/rejected": -0.025553623214364052, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4855907780979827, |
|
"grad_norm": 3.730109453201294, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -1.5252504348754883, |
|
"logits/rejected": -1.5139144659042358, |
|
"logps/chosen": -44.665199279785156, |
|
"logps/rejected": -48.89989471435547, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.004804068244993687, |
|
"rewards/margins": 0.019236544147133827, |
|
"rewards/rejected": -0.02404061332345009, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4927953890489913, |
|
"grad_norm": 3.4961752891540527, |
|
"learning_rate": 4.230014691678016e-09, |
|
"logits/chosen": -1.4771640300750732, |
|
"logits/rejected": -1.4784762859344482, |
|
"logps/chosen": -49.46748352050781, |
|
"logps/rejected": -51.16899871826172, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.006382669322192669, |
|
"rewards/margins": 0.013793786987662315, |
|
"rewards/rejected": -0.020176459103822708, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 3.152644395828247, |
|
"learning_rate": 4.114084594599707e-09, |
|
"logits/chosen": -1.4625837802886963, |
|
"logits/rejected": -1.4395456314086914, |
|
"logps/chosen": -45.60460662841797, |
|
"logps/rejected": -51.585113525390625, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0029743132181465626, |
|
"rewards/margins": 0.02330465242266655, |
|
"rewards/rejected": -0.026278968900442123, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.5072046109510087, |
|
"grad_norm": 3.1570560932159424, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -1.4798153638839722, |
|
"logits/rejected": -1.457363486289978, |
|
"logps/chosen": -45.63786315917969, |
|
"logps/rejected": -48.74847412109375, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0032687117345631123, |
|
"rewards/margins": 0.01941683515906334, |
|
"rewards/rejected": -0.022685546427965164, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.5144092219020173, |
|
"grad_norm": 3.3458943367004395, |
|
"learning_rate": 3.886637027473949e-09, |
|
"logits/chosen": -1.512085199356079, |
|
"logits/rejected": -1.5080697536468506, |
|
"logps/chosen": -47.53780746459961, |
|
"logps/rejected": -51.640533447265625, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0049394266679883, |
|
"rewards/margins": 0.01969726011157036, |
|
"rewards/rejected": -0.024636687710881233, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.521613832853026, |
|
"grad_norm": 3.2042598724365234, |
|
"learning_rate": 3.775135546051295e-09, |
|
"logits/chosen": -1.4048144817352295, |
|
"logits/rejected": -1.4050971269607544, |
|
"logps/chosen": -46.04724884033203, |
|
"logps/rejected": -50.54692077636719, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.004532016348093748, |
|
"rewards/margins": 0.023406367748975754, |
|
"rewards/rejected": -0.02793838456273079, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5288184438040346, |
|
"grad_norm": 3.420597553253174, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.469005823135376, |
|
"logits/rejected": -1.4607031345367432, |
|
"logps/chosen": -50.867950439453125, |
|
"logps/rejected": -51.225799560546875, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.005590965040028095, |
|
"rewards/margins": 0.01840902492403984, |
|
"rewards/rejected": -0.023999987170100212, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 3.1464619636535645, |
|
"learning_rate": 3.556616377404101e-09, |
|
"logits/chosen": -1.5020486116409302, |
|
"logits/rejected": -1.4901654720306396, |
|
"logps/chosen": -51.85783004760742, |
|
"logps/rejected": -55.8673210144043, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0066805132664740086, |
|
"rewards/margins": 0.02360719069838524, |
|
"rewards/rejected": -0.03028770722448826, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.543227665706052, |
|
"grad_norm": 3.3954811096191406, |
|
"learning_rate": 3.4496140511748125e-09, |
|
"logits/chosen": -1.485480546951294, |
|
"logits/rejected": -1.4669150114059448, |
|
"logps/chosen": -48.15165710449219, |
|
"logps/rejected": -51.049583435058594, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.007361284457147121, |
|
"rewards/margins": 0.02040119096636772, |
|
"rewards/rejected": -0.027762476354837418, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5504322766570606, |
|
"grad_norm": 3.955996513366699, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.4491957426071167, |
|
"logits/rejected": -1.446547269821167, |
|
"logps/chosen": -53.73142623901367, |
|
"logps/rejected": -57.25432205200195, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0023315693251788616, |
|
"rewards/margins": 0.01659976877272129, |
|
"rewards/rejected": -0.01893133856356144, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5576368876080693, |
|
"grad_norm": 3.521127462387085, |
|
"learning_rate": 3.2401615234845693e-09, |
|
"logits/chosen": -1.492701530456543, |
|
"logits/rejected": -1.475007176399231, |
|
"logps/chosen": -54.01853561401367, |
|
"logps/rejected": -57.351951599121094, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0070812939666211605, |
|
"rewards/margins": 0.023319412022829056, |
|
"rewards/rejected": -0.030400704592466354, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.564841498559078, |
|
"grad_norm": 3.1193737983703613, |
|
"learning_rate": 3.1377260456714375e-09, |
|
"logits/chosen": -1.3230210542678833, |
|
"logits/rejected": -1.3108537197113037, |
|
"logps/chosen": -49.07139587402344, |
|
"logps/rejected": -54.1447868347168, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.007112964056432247, |
|
"rewards/margins": 0.020381804555654526, |
|
"rewards/rejected": -0.027494769543409348, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.5720461095100866, |
|
"grad_norm": 3.66926908493042, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -1.5296647548675537, |
|
"logits/rejected": -1.5019731521606445, |
|
"logps/chosen": -51.096221923828125, |
|
"logps/rejected": -53.320556640625, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.006328393705189228, |
|
"rewards/margins": 0.018091212958097458, |
|
"rewards/rejected": -0.02441960759460926, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5792507204610953, |
|
"grad_norm": 3.0272419452667236, |
|
"learning_rate": 2.937472665558541e-09, |
|
"logits/chosen": -1.5538597106933594, |
|
"logits/rejected": -1.5464608669281006, |
|
"logps/chosen": -45.452919006347656, |
|
"logps/rejected": -47.75053024291992, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.008346345275640488, |
|
"rewards/margins": 0.02381196618080139, |
|
"rewards/rejected": -0.03215831145644188, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.586455331412104, |
|
"grad_norm": 4.039034843444824, |
|
"learning_rate": 2.8396688402445053e-09, |
|
"logits/chosen": -1.574406385421753, |
|
"logits/rejected": -1.557455062866211, |
|
"logps/chosen": -45.44925308227539, |
|
"logps/rejected": -51.695777893066406, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.010429826565086842, |
|
"rewards/margins": 0.024167021736502647, |
|
"rewards/rejected": -0.03459685295820236, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 4.070069313049316, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -1.526296854019165, |
|
"logits/rejected": -1.5150690078735352, |
|
"logps/chosen": -53.84295654296875, |
|
"logps/rejected": -57.382408142089844, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.005254354793578386, |
|
"rewards/margins": 0.015645707026124, |
|
"rewards/rejected": -0.02090005949139595, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.6008645533141213, |
|
"grad_norm": 3.0419559478759766, |
|
"learning_rate": 2.6487412946432976e-09, |
|
"logits/chosen": -1.4455441236495972, |
|
"logits/rejected": -1.4326177835464478, |
|
"logps/chosen": -49.550575256347656, |
|
"logps/rejected": -52.355995178222656, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.013003657571971416, |
|
"rewards/margins": 0.02292976900935173, |
|
"rewards/rejected": -0.03593342751264572, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.60806916426513, |
|
"grad_norm": 3.4363460540771484, |
|
"learning_rate": 2.5556309957742024e-09, |
|
"logits/chosen": -1.4442135095596313, |
|
"logits/rejected": -1.4347007274627686, |
|
"logps/chosen": -44.96186447143555, |
|
"logps/rejected": -52.17518997192383, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0013826603535562754, |
|
"rewards/margins": 0.02670016512274742, |
|
"rewards/rejected": -0.02531750500202179, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.6152737752161386, |
|
"grad_norm": 3.359717607498169, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -1.5575648546218872, |
|
"logits/rejected": -1.543250560760498, |
|
"logps/chosen": -50.973533630371094, |
|
"logps/rejected": -53.1517448425293, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.004719638731330633, |
|
"rewards/margins": 0.019542286172509193, |
|
"rewards/rejected": -0.024261925369501114, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.6224783861671472, |
|
"grad_norm": 3.6610288619995117, |
|
"learning_rate": 2.3741500717865987e-09, |
|
"logits/chosen": -1.4447298049926758, |
|
"logits/rejected": -1.456498384475708, |
|
"logps/chosen": -47.38273620605469, |
|
"logps/rejected": -52.238502502441406, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0018823295831680298, |
|
"rewards/margins": 0.01906423084437847, |
|
"rewards/rejected": -0.0209465604275465, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.629682997118156, |
|
"grad_norm": 3.1195054054260254, |
|
"learning_rate": 2.285792204027678e-09, |
|
"logits/chosen": -1.4207738637924194, |
|
"logits/rejected": -1.4102163314819336, |
|
"logps/chosen": -47.51162338256836, |
|
"logps/rejected": -54.73249053955078, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.004845681134611368, |
|
"rewards/margins": 0.02257709763944149, |
|
"rewards/rejected": -0.02742278017103672, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.636887608069164, |
|
"grad_norm": 3.726067066192627, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -1.4994523525238037, |
|
"logits/rejected": -1.4987690448760986, |
|
"logps/chosen": -48.3768196105957, |
|
"logps/rejected": -52.434532165527344, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.004970946349203587, |
|
"rewards/margins": 0.020046500489115715, |
|
"rewards/rejected": -0.025017445906996727, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6440922190201728, |
|
"grad_norm": 3.8218469619750977, |
|
"learning_rate": 2.113872712509254e-09, |
|
"logits/chosen": -1.4067411422729492, |
|
"logits/rejected": -1.3974635601043701, |
|
"logps/chosen": -56.22896194458008, |
|
"logps/rejected": -59.406829833984375, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.009745483286678791, |
|
"rewards/margins": 0.020852208137512207, |
|
"rewards/rejected": -0.030597690492868423, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 3.4928946495056152, |
|
"learning_rate": 2.0303231739801143e-09, |
|
"logits/chosen": -1.410017490386963, |
|
"logits/rejected": -1.3968251943588257, |
|
"logps/chosen": -50.764427185058594, |
|
"logps/rejected": -55.013816833496094, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009240074083209038, |
|
"rewards/margins": 0.01817367412149906, |
|
"rewards/rejected": -0.02741374634206295, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.65850144092219, |
|
"grad_norm": 3.9155476093292236, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -1.573972225189209, |
|
"logits/rejected": -1.558861255645752, |
|
"logps/chosen": -50.125404357910156, |
|
"logps/rejected": -53.09186553955078, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.003737266408279538, |
|
"rewards/margins": 0.01738080568611622, |
|
"rewards/rejected": -0.02111807093024254, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.6657060518731988, |
|
"grad_norm": 3.190427780151367, |
|
"learning_rate": 1.86807387690692e-09, |
|
"logits/chosen": -1.5523760318756104, |
|
"logits/rejected": -1.5453031063079834, |
|
"logps/chosen": -50.241458892822266, |
|
"logps/rejected": -57.71672821044922, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0018135461723431945, |
|
"rewards/margins": 0.03013269044458866, |
|
"rewards/rejected": -0.03194623440504074, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6729106628242074, |
|
"grad_norm": 3.522392511367798, |
|
"learning_rate": 1.789385523818493e-09, |
|
"logits/chosen": -1.4759515523910522, |
|
"logits/rejected": -1.4781670570373535, |
|
"logps/chosen": -45.24992370605469, |
|
"logps/rejected": -51.2096061706543, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0035810978151857853, |
|
"rewards/margins": 0.022562062367796898, |
|
"rewards/rejected": -0.026143159717321396, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.680115273775216, |
|
"grad_norm": 3.612794876098633, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -1.5478874444961548, |
|
"logits/rejected": -1.531994104385376, |
|
"logps/chosen": -51.03803634643555, |
|
"logps/rejected": -53.24555206298828, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.007410322315990925, |
|
"rewards/margins": 0.014138393104076385, |
|
"rewards/rejected": -0.021548714488744736, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.6873198847262247, |
|
"grad_norm": 3.423640727996826, |
|
"learning_rate": 1.6369090654806543e-09, |
|
"logits/chosen": -1.5726535320281982, |
|
"logits/rejected": -1.5603439807891846, |
|
"logps/chosen": -46.89988708496094, |
|
"logps/rejected": -51.72241973876953, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.008618971332907677, |
|
"rewards/margins": 0.017804089933633804, |
|
"rewards/rejected": -0.026423057541251183, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.6945244956772334, |
|
"grad_norm": 3.223950147628784, |
|
"learning_rate": 1.5631316786966498e-09, |
|
"logits/chosen": -1.4826900959014893, |
|
"logits/rejected": -1.4667797088623047, |
|
"logps/chosen": -45.12305450439453, |
|
"logps/rejected": -48.53042984008789, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.006777583155781031, |
|
"rewards/margins": 0.01729729399085045, |
|
"rewards/rejected": -0.024074876680970192, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.701729106628242, |
|
"grad_norm": 4.128338813781738, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -1.5129797458648682, |
|
"logits/rejected": -1.498471736907959, |
|
"logps/chosen": -50.177757263183594, |
|
"logps/rejected": -54.36769485473633, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0021677894983440638, |
|
"rewards/margins": 0.019533688202500343, |
|
"rewards/rejected": -0.021701481193304062, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 3.762078285217285, |
|
"learning_rate": 1.4205245207621508e-09, |
|
"logits/chosen": -1.4353492259979248, |
|
"logits/rejected": -1.4193631410598755, |
|
"logps/chosen": -52.8912239074707, |
|
"logps/rejected": -55.724334716796875, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0020478595979511738, |
|
"rewards/margins": 0.023334335535764694, |
|
"rewards/rejected": -0.02538219466805458, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.7161383285302594, |
|
"grad_norm": 3.871379852294922, |
|
"learning_rate": 1.3517047743059978e-09, |
|
"logits/chosen": -1.5186526775360107, |
|
"logits/rejected": -1.5207148790359497, |
|
"logps/chosen": -49.53696060180664, |
|
"logps/rejected": -55.4676513671875, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0054653664119541645, |
|
"rewards/margins": 0.01927504874765873, |
|
"rewards/rejected": -0.024740414693951607, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.723342939481268, |
|
"grad_norm": 3.295902729034424, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -1.5167324542999268, |
|
"logits/rejected": -1.5084768533706665, |
|
"logps/chosen": -43.50970458984375, |
|
"logps/rejected": -47.10810089111328, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.007235602475702763, |
|
"rewards/margins": 0.01710323989391327, |
|
"rewards/rejected": -0.024338845163583755, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7305475504322767, |
|
"grad_norm": 2.8286335468292236, |
|
"learning_rate": 1.2190571347958422e-09, |
|
"logits/chosen": -1.5425716638565063, |
|
"logits/rejected": -1.5455642938613892, |
|
"logps/chosen": -43.28416061401367, |
|
"logps/rejected": -50.13677978515625, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0002638758742250502, |
|
"rewards/margins": 0.018405336886644363, |
|
"rewards/rejected": -0.018669212237000465, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.7377521613832854, |
|
"grad_norm": 2.9479782581329346, |
|
"learning_rate": 1.1552385663231634e-09, |
|
"logits/chosen": -1.4791892766952515, |
|
"logits/rejected": -1.457729458808899, |
|
"logps/chosen": -48.17145538330078, |
|
"logps/rejected": -50.11743927001953, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.005142406094819307, |
|
"rewards/margins": 0.016895312815904617, |
|
"rewards/rejected": -0.022037718445062637, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.744956772334294, |
|
"grad_norm": 3.1124942302703857, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.4413386583328247, |
|
"logits/rejected": -1.4406673908233643, |
|
"logps/chosen": -46.42212677001953, |
|
"logps/rejected": -49.962947845458984, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.007161378860473633, |
|
"rewards/margins": 0.014661896042525768, |
|
"rewards/rejected": -0.021823275834321976, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7521613832853027, |
|
"grad_norm": 3.0346012115478516, |
|
"learning_rate": 1.0326343625364608e-09, |
|
"logits/chosen": -1.4345784187316895, |
|
"logits/rejected": -1.4189555644989014, |
|
"logps/chosen": -47.113319396972656, |
|
"logps/rejected": -52.565155029296875, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.005450129974633455, |
|
"rewards/margins": 0.02591022290289402, |
|
"rewards/rejected": -0.03136035054922104, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7593659942363113, |
|
"grad_norm": 2.6539506912231445, |
|
"learning_rate": 9.738573457917066e-10, |
|
"logits/chosen": -1.5480695962905884, |
|
"logits/rejected": -1.5419654846191406, |
|
"logps/chosen": -41.202308654785156, |
|
"logps/rejected": -47.30192184448242, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.005542878992855549, |
|
"rewards/margins": 0.022122148424386978, |
|
"rewards/rejected": -0.0276650283485651, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 3.010223865509033, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -1.466994047164917, |
|
"logits/rejected": -1.4610474109649658, |
|
"logps/chosen": -46.274566650390625, |
|
"logps/rejected": -48.29325866699219, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.006465951446443796, |
|
"rewards/margins": 0.009437872096896172, |
|
"rewards/rejected": -0.01590382307767868, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.7737752161383287, |
|
"grad_norm": 3.2893083095550537, |
|
"learning_rate": 8.613741412168113e-10, |
|
"logits/chosen": -1.4859510660171509, |
|
"logits/rejected": -1.4807151556015015, |
|
"logps/chosen": -54.31746292114258, |
|
"logps/rejected": -58.5187873840332, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.003899764269590378, |
|
"rewards/margins": 0.02083163894712925, |
|
"rewards/rejected": -0.024731403216719627, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7809798270893373, |
|
"grad_norm": 3.3477933406829834, |
|
"learning_rate": 8.076758604914802e-10, |
|
"logits/chosen": -1.4456332921981812, |
|
"logits/rejected": -1.4328854084014893, |
|
"logps/chosen": -43.15898895263672, |
|
"logps/rejected": -46.71881866455078, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0022692340426146984, |
|
"rewards/margins": 0.018418530002236366, |
|
"rewards/rejected": -0.020687762647867203, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.7881844380403455, |
|
"grad_norm": 4.6873016357421875, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.442856788635254, |
|
"logits/rejected": -1.4301444292068481, |
|
"logps/chosen": -50.084381103515625, |
|
"logps/rejected": -55.53865432739258, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.006987369619309902, |
|
"rewards/margins": 0.02188916876912117, |
|
"rewards/rejected": -0.028876539319753647, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.795389048991354, |
|
"grad_norm": 3.2509396076202393, |
|
"learning_rate": 7.053848157367315e-10, |
|
"logits/chosen": -1.4659183025360107, |
|
"logits/rejected": -1.4521931409835815, |
|
"logps/chosen": -48.176673889160156, |
|
"logps/rejected": -53.21508026123047, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.001605423167347908, |
|
"rewards/margins": 0.021672086790204048, |
|
"rewards/rejected": -0.023277509957551956, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.802593659942363, |
|
"grad_norm": 2.586207389831543, |
|
"learning_rate": 6.567992423453794e-10, |
|
"logits/chosen": -1.4940398931503296, |
|
"logits/rejected": -1.4875811338424683, |
|
"logps/chosen": -43.39032745361328, |
|
"logps/rejected": -46.69108581542969, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.004483362659811974, |
|
"rewards/margins": 0.019497577100992203, |
|
"rewards/rejected": -0.023980939760804176, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.8097982708933715, |
|
"grad_norm": 3.191453695297241, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -1.4086828231811523, |
|
"logits/rejected": -1.4071089029312134, |
|
"logps/chosen": -47.94374465942383, |
|
"logps/rejected": -53.171348571777344, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.007214091718196869, |
|
"rewards/margins": 0.020340237766504288, |
|
"rewards/rejected": -0.027554329484701157, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.81700288184438, |
|
"grad_norm": 3.6769521236419678, |
|
"learning_rate": 5.647650701205653e-10, |
|
"logits/chosen": -1.5014244318008423, |
|
"logits/rejected": -1.4815281629562378, |
|
"logps/chosen": -54.42211151123047, |
|
"logps/rejected": -58.34537887573242, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.00018626952078193426, |
|
"rewards/margins": 0.026389459148049355, |
|
"rewards/rejected": -0.026575729250907898, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 3.143864631652832, |
|
"learning_rate": 5.213229409093856e-10, |
|
"logits/chosen": -1.5347833633422852, |
|
"logits/rejected": -1.5241453647613525, |
|
"logps/chosen": -52.7486572265625, |
|
"logps/rejected": -57.841835021972656, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.004956572316586971, |
|
"rewards/margins": 0.02456669509410858, |
|
"rewards/rejected": -0.029523268342018127, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.8314121037463975, |
|
"grad_norm": 4.357008457183838, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -1.4576940536499023, |
|
"logits/rejected": -1.4457906484603882, |
|
"logps/chosen": -51.851417541503906, |
|
"logps/rejected": -55.67144012451172, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.007598734460771084, |
|
"rewards/margins": 0.016234243288636208, |
|
"rewards/rejected": -0.023832976818084717, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.838616714697406, |
|
"grad_norm": 3.335747480392456, |
|
"learning_rate": 4.3960386508631595e-10, |
|
"logits/chosen": -1.3839704990386963, |
|
"logits/rejected": -1.383461356163025, |
|
"logps/chosen": -42.68264389038086, |
|
"logps/rejected": -46.677146911621094, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.007006730884313583, |
|
"rewards/margins": 0.016936389729380608, |
|
"rewards/rejected": -0.02394312247633934, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.845821325648415, |
|
"grad_norm": 4.871637344360352, |
|
"learning_rate": 4.013326629880243e-10, |
|
"logits/chosen": -1.4295424222946167, |
|
"logits/rejected": -1.4122328758239746, |
|
"logps/chosen": -50.189979553222656, |
|
"logps/rejected": -54.05989456176758, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.007883711718022823, |
|
"rewards/margins": 0.022242117673158646, |
|
"rewards/rejected": -0.030125826597213745, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8530259365994235, |
|
"grad_norm": 3.459960460662842, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -1.3913426399230957, |
|
"logits/rejected": -1.3863009214401245, |
|
"logps/chosen": -53.34258270263672, |
|
"logps/rejected": -55.69043731689453, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0019184326520189643, |
|
"rewards/margins": 0.012400278821587563, |
|
"rewards/rejected": -0.014318712055683136, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.860230547550432, |
|
"grad_norm": 4.9613542556762695, |
|
"learning_rate": 3.2998038176619e-10, |
|
"logits/chosen": -1.4524660110473633, |
|
"logits/rejected": -1.4362642765045166, |
|
"logps/chosen": -51.490867614746094, |
|
"logps/rejected": -54.9944953918457, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.007123004645109177, |
|
"rewards/margins": 0.01731189154088497, |
|
"rewards/rejected": -0.024434898048639297, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.867435158501441, |
|
"grad_norm": 3.5082051753997803, |
|
"learning_rate": 2.969043184133907e-10, |
|
"logits/chosen": -1.5575920343399048, |
|
"logits/rejected": -1.5562455654144287, |
|
"logps/chosen": -45.0047721862793, |
|
"logps/rejected": -53.420326232910156, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.000279056781437248, |
|
"rewards/margins": 0.02413121983408928, |
|
"rewards/rejected": -0.024410273879766464, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.8746397694524495, |
|
"grad_norm": 3.8809545040130615, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -1.4493497610092163, |
|
"logits/rejected": -1.4356361627578735, |
|
"logps/chosen": -44.4965934753418, |
|
"logps/rejected": -48.013553619384766, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0006090818787924945, |
|
"rewards/margins": 0.028188396245241165, |
|
"rewards/rejected": -0.02757931686937809, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 3.4924869537353516, |
|
"learning_rate": 2.3596397161395607e-10, |
|
"logits/chosen": -1.558559775352478, |
|
"logits/rejected": -1.5367920398712158, |
|
"logps/chosen": -49.601959228515625, |
|
"logps/rejected": -54.7277946472168, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0002869130694307387, |
|
"rewards/margins": 0.025376971811056137, |
|
"rewards/rejected": -0.025090059265494347, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.889048991354467, |
|
"grad_norm": 4.832521915435791, |
|
"learning_rate": 2.0810397202206399e-10, |
|
"logits/chosen": -1.4147446155548096, |
|
"logits/rejected": -1.4100733995437622, |
|
"logps/chosen": -49.89708709716797, |
|
"logps/rejected": -53.32837677001953, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0006277470965869725, |
|
"rewards/margins": 0.01780160702764988, |
|
"rewards/rejected": -0.01717386022210121, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.8962536023054755, |
|
"grad_norm": 3.2794227600097656, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -1.5649325847625732, |
|
"logits/rejected": -1.5557774305343628, |
|
"logps/chosen": -47.87010955810547, |
|
"logps/rejected": -51.79607391357422, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.00826673861593008, |
|
"rewards/margins": 0.020225917920470238, |
|
"rewards/rejected": -0.028492655605077744, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.903458213256484, |
|
"grad_norm": 3.472938299179077, |
|
"learning_rate": 1.5761411253092382e-10, |
|
"logits/chosen": -1.430612325668335, |
|
"logits/rejected": -1.4091012477874756, |
|
"logps/chosen": -46.053260803222656, |
|
"logps/rejected": -48.203346252441406, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.006684750318527222, |
|
"rewards/margins": 0.01832910254597664, |
|
"rewards/rejected": -0.02501385286450386, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.910662824207493, |
|
"grad_norm": 3.624494791030884, |
|
"learning_rate": 1.3498780186031455e-10, |
|
"logits/chosen": -1.4939110279083252, |
|
"logits/rejected": -1.4849644899368286, |
|
"logps/chosen": -53.622901916503906, |
|
"logps/rejected": -57.3227653503418, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.006433118134737015, |
|
"rewards/margins": 0.018010510131716728, |
|
"rewards/rejected": -0.024443628266453743, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.9178674351585014, |
|
"grad_norm": 3.310802936553955, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -1.49782133102417, |
|
"logits/rejected": -1.4898258447647095, |
|
"logps/chosen": -47.12517166137695, |
|
"logps/rejected": -49.63444137573242, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.010405481792986393, |
|
"rewards/margins": 0.014516057446599007, |
|
"rewards/rejected": -0.024921538308262825, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.92507204610951, |
|
"grad_norm": 2.949910879135132, |
|
"learning_rate": 9.498037123825686e-11, |
|
"logits/chosen": -1.5098581314086914, |
|
"logits/rejected": -1.498915433883667, |
|
"logps/chosen": -45.20121383666992, |
|
"logps/rejected": -49.48499298095703, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.004668011330068111, |
|
"rewards/margins": 0.0204728152602911, |
|
"rewards/rejected": -0.025140831246972084, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.9322766570605188, |
|
"grad_norm": 3.276839256286621, |
|
"learning_rate": 7.760206364398614e-11, |
|
"logits/chosen": -1.5862383842468262, |
|
"logits/rejected": -1.5651108026504517, |
|
"logps/chosen": -49.96502685546875, |
|
"logps/rejected": -53.223594665527344, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.009547281078994274, |
|
"rewards/margins": 0.019979404285550117, |
|
"rewards/rejected": -0.029526684433221817, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.9394812680115274, |
|
"grad_norm": 3.859468698501587, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -1.4423165321350098, |
|
"logits/rejected": -1.4334721565246582, |
|
"logps/chosen": -47.239994049072266, |
|
"logps/rejected": -54.47491455078125, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.009559379890561104, |
|
"rewards/margins": 0.021419430151581764, |
|
"rewards/rejected": -0.03097880817949772, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.946685878962536, |
|
"grad_norm": 2.8501765727996826, |
|
"learning_rate": 4.810237191940625e-11, |
|
"logits/chosen": -1.4424220323562622, |
|
"logits/rejected": -1.4332430362701416, |
|
"logps/chosen": -46.96299362182617, |
|
"logps/rejected": -49.96550369262695, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.008839382790029049, |
|
"rewards/margins": 0.01596887595951557, |
|
"rewards/rejected": -0.024808257818222046, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.9538904899135447, |
|
"grad_norm": 3.3934149742126465, |
|
"learning_rate": 3.5983061495617476e-11, |
|
"logits/chosen": -1.5262759923934937, |
|
"logits/rejected": -1.5253779888153076, |
|
"logps/chosen": -51.821449279785156, |
|
"logps/rejected": -57.49546432495117, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.005071837455034256, |
|
"rewards/margins": 0.019206812605261803, |
|
"rewards/rejected": -0.02427865006029606, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9610951008645534, |
|
"grad_norm": 3.1180672645568848, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -1.520560622215271, |
|
"logits/rejected": -1.5057575702667236, |
|
"logps/chosen": -42.085235595703125, |
|
"logps/rejected": -47.49565887451172, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0040741474367678165, |
|
"rewards/margins": 0.02492835372686386, |
|
"rewards/rejected": -0.029002506285905838, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.968299711815562, |
|
"grad_norm": 3.4017624855041504, |
|
"learning_rate": 1.700977115254576e-11, |
|
"logits/chosen": -1.4638285636901855, |
|
"logits/rejected": -1.454246163368225, |
|
"logps/chosen": -46.245887756347656, |
|
"logps/rejected": -51.503753662109375, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.007428421638906002, |
|
"rewards/margins": 0.02083088457584381, |
|
"rewards/rejected": -0.028259307146072388, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9755043227665707, |
|
"grad_norm": 2.984016180038452, |
|
"learning_rate": 1.0157124977230868e-11, |
|
"logits/chosen": -1.4343056678771973, |
|
"logits/rejected": -1.4250215291976929, |
|
"logps/chosen": -43.65608596801758, |
|
"logps/rejected": -47.83146667480469, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0026060601230710745, |
|
"rewards/margins": 0.01812109351158142, |
|
"rewards/rejected": -0.02072715200483799, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9827089337175794, |
|
"grad_norm": 3.555567741394043, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -1.4683890342712402, |
|
"logits/rejected": -1.4556920528411865, |
|
"logps/chosen": -47.792022705078125, |
|
"logps/rejected": -50.41429901123047, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0011950184125453234, |
|
"rewards/margins": 0.018146729096770287, |
|
"rewards/rejected": -0.01934174820780754, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.989913544668588, |
|
"grad_norm": 3.3906075954437256, |
|
"learning_rate": 1.7222391488297406e-12, |
|
"logits/chosen": -1.5164161920547485, |
|
"logits/rejected": -1.5046594142913818, |
|
"logps/chosen": -53.657630920410156, |
|
"logps/rejected": -58.71654510498047, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.003981665708124638, |
|
"rewards/margins": 0.029033973813056946, |
|
"rewards/rejected": -0.03301564231514931, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 3.9542465209960938, |
|
"learning_rate": 1.4059243338693238e-13, |
|
"logits/chosen": -1.4423478841781616, |
|
"logits/rejected": -1.4315330982208252, |
|
"logps/chosen": -48.669158935546875, |
|
"logps/rejected": -53.47356033325195, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0017929436871781945, |
|
"rewards/margins": 0.02205517329275608, |
|
"rewards/rejected": -0.023848116397857666, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4164, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6880504754617968, |
|
"train_runtime": 7518.2292, |
|
"train_samples_per_second": 8.859, |
|
"train_steps_per_second": 0.554 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|