|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9968602825745683, |
|
"eval_steps": 100, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": 0.952304482460022, |
|
"logits/rejected": 0.5888463854789734, |
|
"logps/chosen": -223.79486083984375, |
|
"logps/rejected": -209.482666015625, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": 0.8366600871086121, |
|
"logits/rejected": 0.8544472455978394, |
|
"logps/chosen": -236.2534942626953, |
|
"logps/rejected": -221.8985137939453, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 9.055635746335611e-05, |
|
"rewards/margins": -0.00016146278358064592, |
|
"rewards/rejected": 0.00025201926473528147, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": 0.8331616520881653, |
|
"logits/rejected": 0.9283801317214966, |
|
"logps/chosen": -254.7387237548828, |
|
"logps/rejected": -247.924560546875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0002719077165238559, |
|
"rewards/margins": 0.000702966412063688, |
|
"rewards/rejected": -0.0009748738375492394, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": 0.8597829937934875, |
|
"logits/rejected": 0.9174444079399109, |
|
"logps/chosen": -260.46356201171875, |
|
"logps/rejected": -232.0428924560547, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0010247982572764158, |
|
"rewards/margins": 0.0011705085635185242, |
|
"rewards/rejected": -0.0001457103790016845, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": 0.8120288848876953, |
|
"logits/rejected": 0.9034429788589478, |
|
"logps/chosen": -280.2599792480469, |
|
"logps/rejected": -228.59304809570312, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0004804997588507831, |
|
"rewards/margins": -2.5537923647789285e-05, |
|
"rewards/rejected": -0.00045496178790926933, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": 0.856873631477356, |
|
"logits/rejected": 0.936148464679718, |
|
"logps/chosen": -257.70074462890625, |
|
"logps/rejected": -219.18563842773438, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.7128717192681506e-05, |
|
"rewards/margins": 0.0010264910524711013, |
|
"rewards/rejected": -0.0010536197805777192, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": 0.8756526708602905, |
|
"logits/rejected": 0.9026013612747192, |
|
"logps/chosen": -237.04653930664062, |
|
"logps/rejected": -237.41769409179688, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00045221406617201865, |
|
"rewards/margins": -0.0005772784352302551, |
|
"rewards/rejected": 0.0001250644854735583, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": 0.8502365946769714, |
|
"logits/rejected": 0.878851592540741, |
|
"logps/chosen": -260.7863464355469, |
|
"logps/rejected": -227.58700561523438, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.000642338243778795, |
|
"rewards/margins": -0.0005514883669093251, |
|
"rewards/rejected": -9.084997873287648e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": 0.8864189982414246, |
|
"logits/rejected": 0.9194203615188599, |
|
"logps/chosen": -251.07644653320312, |
|
"logps/rejected": -231.24453735351562, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 8.48618583404459e-05, |
|
"rewards/margins": 0.00046581291826441884, |
|
"rewards/rejected": -0.00038095106719993055, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": 0.8617275953292847, |
|
"logits/rejected": 0.9164407849311829, |
|
"logps/chosen": -225.30056762695312, |
|
"logps/rejected": -241.3493194580078, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.00032968426239676774, |
|
"rewards/margins": 0.0004738254356198013, |
|
"rewards/rejected": -0.0008035098435357213, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999731868769027e-06, |
|
"logits/chosen": 0.9239044189453125, |
|
"logits/rejected": 0.9236629605293274, |
|
"logps/chosen": -242.187744140625, |
|
"logps/rejected": -221.4308624267578, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0005193214165046811, |
|
"rewards/margins": 0.00021600276522804052, |
|
"rewards/rejected": -0.0007353241671808064, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.8150850534439087, |
|
"eval_logits/rejected": 0.917453408241272, |
|
"eval_logps/chosen": -260.2373046875, |
|
"eval_logps/rejected": -231.48963928222656, |
|
"eval_loss": 0.00248239329084754, |
|
"eval_rewards/accuracies": 0.5080000162124634, |
|
"eval_rewards/chosen": 6.857867265352979e-05, |
|
"eval_rewards/margins": 0.0006035350379534066, |
|
"eval_rewards/rejected": -0.0005349563434720039, |
|
"eval_runtime": 327.6723, |
|
"eval_samples_per_second": 6.104, |
|
"eval_steps_per_second": 0.381, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.996716052911017e-06, |
|
"logits/chosen": 0.8340710401535034, |
|
"logits/rejected": 0.8745189905166626, |
|
"logps/chosen": -264.0061950683594, |
|
"logps/rejected": -219.55416870117188, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00015970889944583178, |
|
"rewards/margins": 0.0009235168108716607, |
|
"rewards/rejected": -0.000763807853218168, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": 0.8607433438301086, |
|
"logits/rejected": 0.9696208238601685, |
|
"logps/chosen": -254.9775390625, |
|
"logps/rejected": -219.08029174804688, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00045476845116354525, |
|
"rewards/margins": 0.0009122647461481392, |
|
"rewards/rejected": -0.001367033226415515, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9806521797692184e-06, |
|
"logits/chosen": 0.8772906064987183, |
|
"logits/rejected": 0.8781763911247253, |
|
"logps/chosen": -264.6737365722656, |
|
"logps/rejected": -246.98770141601562, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00038236891850829124, |
|
"rewards/margins": 0.0011148005723953247, |
|
"rewards/rejected": -0.0014971692580729723, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": 0.8523051142692566, |
|
"logits/rejected": 0.9305570721626282, |
|
"logps/chosen": -222.01806640625, |
|
"logps/rejected": -232.17919921875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0008891393663361669, |
|
"rewards/margins": 0.0006222378578968346, |
|
"rewards/rejected": -0.0015113770496100187, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.95129120635556e-06, |
|
"logits/chosen": 0.8730077743530273, |
|
"logits/rejected": 0.9034315943717957, |
|
"logps/chosen": -258.2915954589844, |
|
"logps/rejected": -215.7738037109375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0006225308170542121, |
|
"rewards/margins": 0.002022756729274988, |
|
"rewards/rejected": -0.0026452874299138784, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": 0.7767001390457153, |
|
"logits/rejected": 0.845874011516571, |
|
"logps/chosen": -257.7080383300781, |
|
"logps/rejected": -250.625732421875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0005334648885764182, |
|
"rewards/margins": 0.0022061350755393505, |
|
"rewards/rejected": -0.0027396001387387514, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.908790517010637e-06, |
|
"logits/chosen": 0.9272924661636353, |
|
"logits/rejected": 0.9679857492446899, |
|
"logps/chosen": -239.0647735595703, |
|
"logps/rejected": -252.8076934814453, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.002121446654200554, |
|
"rewards/margins": 0.001315777888521552, |
|
"rewards/rejected": -0.0034372243098914623, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": 0.8513296246528625, |
|
"logits/rejected": 0.8445970416069031, |
|
"logps/chosen": -270.7970886230469, |
|
"logps/rejected": -256.8065185546875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.00029190973145887256, |
|
"rewards/margins": 0.0030645509250462055, |
|
"rewards/rejected": -0.0033564604818820953, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.853377929214243e-06, |
|
"logits/chosen": 0.7836719751358032, |
|
"logits/rejected": 0.8565704226493835, |
|
"logps/chosen": -252.17233276367188, |
|
"logps/rejected": -244.7781219482422, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.001070442027412355, |
|
"rewards/margins": 0.0032107688020914793, |
|
"rewards/rejected": -0.004281210713088512, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": 0.872645378112793, |
|
"logits/rejected": 0.8895421028137207, |
|
"logps/chosen": -237.6038055419922, |
|
"logps/rejected": -233.2046661376953, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0021511532831937075, |
|
"rewards/margins": 0.002740217139944434, |
|
"rewards/rejected": -0.004891370423138142, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.8092045783996582, |
|
"eval_logits/rejected": 0.9120355844497681, |
|
"eval_logps/chosen": -260.39324951171875, |
|
"eval_logps/rejected": -232.11520385742188, |
|
"eval_loss": 0.0022759963758289814, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": -0.0014907378936186433, |
|
"eval_rewards/margins": 0.005300111137330532, |
|
"eval_rewards/rejected": -0.006790849845856428, |
|
"eval_runtime": 327.2093, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 0.382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.785350472409792e-06, |
|
"logits/chosen": 0.9010735750198364, |
|
"logits/rejected": 0.9061653017997742, |
|
"logps/chosen": -232.94741821289062, |
|
"logps/rejected": -228.1055145263672, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.003469156799837947, |
|
"rewards/margins": 0.003427647752687335, |
|
"rewards/rejected": -0.006896805018186569, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": 0.8498672246932983, |
|
"logits/rejected": 0.8839853405952454, |
|
"logps/chosen": -259.6465148925781, |
|
"logps/rejected": -238.8597869873047, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.002919531427323818, |
|
"rewards/margins": 0.00543471472337842, |
|
"rewards/rejected": -0.00835424568504095, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7050727958301505e-06, |
|
"logits/chosen": 0.8881880044937134, |
|
"logits/rejected": 0.8712642788887024, |
|
"logps/chosen": -244.35507202148438, |
|
"logps/rejected": -229.05136108398438, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.004195825196802616, |
|
"rewards/margins": 0.004726833663880825, |
|
"rewards/rejected": -0.008922659792006016, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": 0.8501306772232056, |
|
"logits/rejected": 0.8583809733390808, |
|
"logps/chosen": -281.69854736328125, |
|
"logps/rejected": -234.6123504638672, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.004014792386442423, |
|
"rewards/margins": 0.0071690999902784824, |
|
"rewards/rejected": -0.011183892376720905, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.612975213859487e-06, |
|
"logits/chosen": 0.8360323905944824, |
|
"logits/rejected": 0.9075163006782532, |
|
"logps/chosen": -268.0133361816406, |
|
"logps/rejected": -241.88339233398438, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0047610728070139885, |
|
"rewards/margins": 0.0054484582506120205, |
|
"rewards/rejected": -0.010209531523287296, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": 0.8462463617324829, |
|
"logits/rejected": 0.8862007260322571, |
|
"logps/chosen": -274.3870544433594, |
|
"logps/rejected": -260.1866149902344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.002204451011493802, |
|
"rewards/margins": 0.007608965039253235, |
|
"rewards/rejected": -0.009813414886593819, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.509551399408598e-06, |
|
"logits/chosen": 0.9279536008834839, |
|
"logits/rejected": 0.9327741861343384, |
|
"logps/chosen": -252.70175170898438, |
|
"logps/rejected": -209.066650390625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.004289106000214815, |
|
"rewards/margins": 0.007459082640707493, |
|
"rewards/rejected": -0.01174818817526102, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": 0.9238850474357605, |
|
"logits/rejected": 0.8847238421440125, |
|
"logps/chosen": -246.0789031982422, |
|
"logps/rejected": -253.26327514648438, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.007073036395013332, |
|
"rewards/margins": 0.0031865164637565613, |
|
"rewards/rejected": -0.010259552858769894, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.3953557376679856e-06, |
|
"logits/chosen": 0.8482117652893066, |
|
"logits/rejected": 0.8521090745925903, |
|
"logps/chosen": -260.2223815917969, |
|
"logps/rejected": -255.7244873046875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.006550622172653675, |
|
"rewards/margins": 0.004290700424462557, |
|
"rewards/rejected": -0.01084132306277752, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": 0.8257355690002441, |
|
"logits/rejected": 0.9032806158065796, |
|
"logps/chosen": -249.4975128173828, |
|
"logps/rejected": -243.69088745117188, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.004273009952157736, |
|
"rewards/margins": 0.008262387476861477, |
|
"rewards/rejected": -0.0125353978946805, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.7992061972618103, |
|
"eval_logits/rejected": 0.9022300243377686, |
|
"eval_logps/chosen": -260.91790771484375, |
|
"eval_logps/rejected": -232.84466552734375, |
|
"eval_loss": 0.002206910401582718, |
|
"eval_rewards/accuracies": 0.6700000166893005, |
|
"eval_rewards/chosen": -0.006737456191331148, |
|
"eval_rewards/margins": 0.007348266430199146, |
|
"eval_rewards/rejected": -0.014085723087191582, |
|
"eval_runtime": 327.3645, |
|
"eval_samples_per_second": 6.109, |
|
"eval_steps_per_second": 0.382, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.2710003544234255e-06, |
|
"logits/chosen": 0.851446270942688, |
|
"logits/rejected": 0.8612054586410522, |
|
"logps/chosen": -235.674072265625, |
|
"logps/rejected": -225.989990234375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.007650519721210003, |
|
"rewards/margins": 0.006462027784436941, |
|
"rewards/rejected": -0.014112548902630806, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": 0.859449565410614, |
|
"logits/rejected": 0.8806228637695312, |
|
"logps/chosen": -224.012451171875, |
|
"logps/rejected": -215.5794677734375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.008152343332767487, |
|
"rewards/margins": 0.006258256733417511, |
|
"rewards/rejected": -0.014410600066184998, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.137151834863213e-06, |
|
"logits/chosen": 0.8416824340820312, |
|
"logits/rejected": 0.8180407285690308, |
|
"logps/chosen": -253.04629516601562, |
|
"logps/rejected": -258.2658386230469, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.007537019904702902, |
|
"rewards/margins": 0.008035494945943356, |
|
"rewards/rejected": -0.015572515316307545, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": 0.8714286088943481, |
|
"logits/rejected": 0.870419979095459, |
|
"logps/chosen": -266.2498474121094, |
|
"logps/rejected": -248.62814331054688, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006631535477936268, |
|
"rewards/margins": 0.007086685858666897, |
|
"rewards/rejected": -0.01371822226792574, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.994527650465352e-06, |
|
"logits/chosen": 0.8660160899162292, |
|
"logits/rejected": 0.8818486928939819, |
|
"logps/chosen": -222.5798797607422, |
|
"logps/rejected": -204.8966522216797, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.009565680287778378, |
|
"rewards/margins": 0.006333778612315655, |
|
"rewards/rejected": -0.015899458900094032, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": 0.7974532842636108, |
|
"logits/rejected": 0.802233874797821, |
|
"logps/chosen": -208.30282592773438, |
|
"logps/rejected": -235.695556640625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.008728450164198875, |
|
"rewards/margins": 0.008383492939174175, |
|
"rewards/rejected": -0.017111944034695625, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.843892313117724e-06, |
|
"logits/chosen": 0.8825100064277649, |
|
"logits/rejected": 0.9040519595146179, |
|
"logps/chosen": -268.5409851074219, |
|
"logps/rejected": -241.93862915039062, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.008966553956270218, |
|
"rewards/margins": 0.00720653822645545, |
|
"rewards/rejected": -0.016173092648386955, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": 0.8589727282524109, |
|
"logits/rejected": 0.8763955235481262, |
|
"logps/chosen": -238.5310516357422, |
|
"logps/rejected": -214.51651000976562, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.008340245112776756, |
|
"rewards/margins": 0.009361723437905312, |
|
"rewards/rejected": -0.017701968550682068, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.686053277086401e-06, |
|
"logits/chosen": 0.812663197517395, |
|
"logits/rejected": 0.8946850895881653, |
|
"logps/chosen": -261.9408874511719, |
|
"logps/rejected": -236.6110382080078, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.008398517966270447, |
|
"rewards/margins": 0.008632157929241657, |
|
"rewards/rejected": -0.01703067496418953, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": 0.8278132677078247, |
|
"logits/rejected": 0.8401390314102173, |
|
"logps/chosen": -277.306396484375, |
|
"logps/rejected": -252.70474243164062, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.008408455178141594, |
|
"rewards/margins": 0.007572343107312918, |
|
"rewards/rejected": -0.0159807987511158, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.7884067893028259, |
|
"eval_logits/rejected": 0.8913614153862, |
|
"eval_logps/chosen": -261.16204833984375, |
|
"eval_logps/rejected": -233.21571350097656, |
|
"eval_loss": 0.0021709667053073645, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": -0.009178930893540382, |
|
"eval_rewards/margins": 0.008617207407951355, |
|
"eval_rewards/rejected": -0.017796138301491737, |
|
"eval_runtime": 327.7654, |
|
"eval_samples_per_second": 6.102, |
|
"eval_steps_per_second": 0.381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5218566107988872e-06, |
|
"logits/chosen": 0.8511005640029907, |
|
"logits/rejected": 0.8689740300178528, |
|
"logps/chosen": -269.23675537109375, |
|
"logps/rejected": -234.9521484375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.008150833658874035, |
|
"rewards/margins": 0.009497146122157574, |
|
"rewards/rejected": -0.01764797978103161, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": 0.796908974647522, |
|
"logits/rejected": 0.8615992665290833, |
|
"logps/chosen": -213.23800659179688, |
|
"logps/rejected": -206.04647827148438, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.009716427884995937, |
|
"rewards/margins": 0.006270779762417078, |
|
"rewards/rejected": -0.015987208113074303, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.352182461642929e-06, |
|
"logits/chosen": 0.8233186602592468, |
|
"logits/rejected": 0.8778685331344604, |
|
"logps/chosen": -235.12954711914062, |
|
"logps/rejected": -218.67416381835938, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.008281409740447998, |
|
"rewards/margins": 0.008449633605778217, |
|
"rewards/rejected": -0.01673104241490364, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": 0.8454925417900085, |
|
"logits/rejected": 0.8766401410102844, |
|
"logps/chosen": -280.82562255859375, |
|
"logps/rejected": -242.23635864257812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.006689480505883694, |
|
"rewards/margins": 0.010621527209877968, |
|
"rewards/rejected": -0.017311008647084236, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.177940338091043e-06, |
|
"logits/chosen": 0.8534622192382812, |
|
"logits/rejected": 0.9055653810501099, |
|
"logps/chosen": -252.76089477539062, |
|
"logps/rejected": -221.62551879882812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.007692619226872921, |
|
"rewards/margins": 0.009279204532504082, |
|
"rewards/rejected": -0.016971822828054428, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": 0.8498457074165344, |
|
"logits/rejected": 0.8722270131111145, |
|
"logps/chosen": -258.4722595214844, |
|
"logps/rejected": -225.99447631835938, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.009471247904002666, |
|
"rewards/margins": 0.008289327844977379, |
|
"rewards/rejected": -0.01776057854294777, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0000642344401115e-06, |
|
"logits/chosen": 0.8206149935722351, |
|
"logits/rejected": 0.8616384267807007, |
|
"logps/chosen": -239.18264770507812, |
|
"logps/rejected": -224.2777862548828, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.009907958097755909, |
|
"rewards/margins": 0.007560922298580408, |
|
"rewards/rejected": -0.017468880861997604, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": 0.8556537628173828, |
|
"logits/rejected": 0.9059454202651978, |
|
"logps/chosen": -243.97933959960938, |
|
"logps/rejected": -235.90817260742188, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.009879620745778084, |
|
"rewards/margins": 0.007370662875473499, |
|
"rewards/rejected": -0.01725028082728386, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.8195076242990124e-06, |
|
"logits/chosen": 0.837064266204834, |
|
"logits/rejected": 0.8774217367172241, |
|
"logps/chosen": -239.0427703857422, |
|
"logps/rejected": -215.9508819580078, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.010479142889380455, |
|
"rewards/margins": 0.008913186378777027, |
|
"rewards/rejected": -0.019392330199480057, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": 0.8183882832527161, |
|
"logits/rejected": 0.8531386256217957, |
|
"logps/chosen": -253.97683715820312, |
|
"logps/rejected": -234.94265747070312, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.008641371503472328, |
|
"rewards/margins": 0.009928500279784203, |
|
"rewards/rejected": -0.01856987178325653, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_logits/chosen": 0.7821087837219238, |
|
"eval_logits/rejected": 0.8852795362472534, |
|
"eval_logps/chosen": -261.1851501464844, |
|
"eval_logps/rejected": -233.3613739013672, |
|
"eval_loss": 0.0021211737766861916, |
|
"eval_rewards/accuracies": 0.7099999785423279, |
|
"eval_rewards/chosen": -0.009409956634044647, |
|
"eval_rewards/margins": 0.00984267145395279, |
|
"eval_rewards/rejected": -0.019252628087997437, |
|
"eval_runtime": 325.4617, |
|
"eval_samples_per_second": 6.145, |
|
"eval_steps_per_second": 0.384, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.637238349660819e-06, |
|
"logits/chosen": 0.8250406384468079, |
|
"logits/rejected": 0.9090532064437866, |
|
"logps/chosen": -236.7153778076172, |
|
"logps/rejected": -197.7652587890625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.010482882149517536, |
|
"rewards/margins": 0.00821294542402029, |
|
"rewards/rejected": -0.018695827573537827, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": 0.8841145634651184, |
|
"logits/rejected": 0.891588568687439, |
|
"logps/chosen": -250.2413330078125, |
|
"logps/rejected": -226.20846557617188, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008791481144726276, |
|
"rewards/margins": 0.008898518979549408, |
|
"rewards/rejected": -0.01768999919295311, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4542334329558075e-06, |
|
"logits/chosen": 0.8093138933181763, |
|
"logits/rejected": 0.8301302790641785, |
|
"logps/chosen": -242.32766723632812, |
|
"logps/rejected": -228.98446655273438, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.008460971526801586, |
|
"rewards/margins": 0.009179492481052876, |
|
"rewards/rejected": -0.017640462145209312, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": 0.8050084114074707, |
|
"logits/rejected": 0.8223572969436646, |
|
"logps/chosen": -259.1438903808594, |
|
"logps/rejected": -216.0526885986328, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.009418713860213757, |
|
"rewards/margins": 0.0077890073880553246, |
|
"rewards/rejected": -0.01720772124826908, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.271473839894331e-06, |
|
"logits/chosen": 0.8079057931900024, |
|
"logits/rejected": 0.8310044407844543, |
|
"logps/chosen": -267.4736328125, |
|
"logps/rejected": -249.3809356689453, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008644058369100094, |
|
"rewards/margins": 0.008870486170053482, |
|
"rewards/rejected": -0.01751454547047615, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": 0.7828265428543091, |
|
"logits/rejected": 0.810738205909729, |
|
"logps/chosen": -251.98782348632812, |
|
"logps/rejected": -226.2450408935547, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.008495638146996498, |
|
"rewards/margins": 0.009752650745213032, |
|
"rewards/rejected": -0.018248289823532104, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.089939221172446e-06, |
|
"logits/chosen": 0.8058856129646301, |
|
"logits/rejected": 0.8096176385879517, |
|
"logps/chosen": -270.29681396484375, |
|
"logps/rejected": -230.7064666748047, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.010026035830378532, |
|
"rewards/margins": 0.009994433261454105, |
|
"rewards/rejected": -0.020020468160510063, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": 0.8202878832817078, |
|
"logits/rejected": 0.7905790209770203, |
|
"logps/chosen": -240.6394500732422, |
|
"logps/rejected": -227.01150512695312, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007834648713469505, |
|
"rewards/margins": 0.007603611797094345, |
|
"rewards/rejected": -0.01543826051056385, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9106026612264316e-06, |
|
"logits/chosen": 0.8630008697509766, |
|
"logits/rejected": 0.9225195050239563, |
|
"logps/chosen": -218.4501953125, |
|
"logps/rejected": -214.31399536132812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.008840398862957954, |
|
"rewards/margins": 0.009108386933803558, |
|
"rewards/rejected": -0.017948785796761513, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": 0.8304165601730347, |
|
"logits/rejected": 0.7959300875663757, |
|
"logps/chosen": -262.4768981933594, |
|
"logps/rejected": -230.4768524169922, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.008658383972942829, |
|
"rewards/margins": 0.008863124065101147, |
|
"rewards/rejected": -0.017521508038043976, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_logits/chosen": 0.7814888954162598, |
|
"eval_logits/rejected": 0.8839987516403198, |
|
"eval_logps/chosen": -261.1206970214844, |
|
"eval_logps/rejected": -233.28433227539062, |
|
"eval_loss": 0.002118554199114442, |
|
"eval_rewards/accuracies": 0.6940000057220459, |
|
"eval_rewards/chosen": -0.008765296079218388, |
|
"eval_rewards/margins": 0.009716734290122986, |
|
"eval_rewards/rejected": -0.0184820294380188, |
|
"eval_runtime": 325.1283, |
|
"eval_samples_per_second": 6.151, |
|
"eval_steps_per_second": 0.384, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7344254621846018e-06, |
|
"logits/chosen": 0.7952737212181091, |
|
"logits/rejected": 0.8380396962165833, |
|
"logps/chosen": -264.8700256347656, |
|
"logps/rejected": -251.73214721679688, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.007588304579257965, |
|
"rewards/margins": 0.011967618018388748, |
|
"rewards/rejected": -0.019555922597646713, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": 0.8249796032905579, |
|
"logits/rejected": 0.8937106132507324, |
|
"logps/chosen": -247.3903350830078, |
|
"logps/rejected": -229.36093139648438, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.010081231594085693, |
|
"rewards/margins": 0.006175318732857704, |
|
"rewards/rejected": -0.016256550326943398, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5623519909760953e-06, |
|
"logits/chosen": 0.8032970428466797, |
|
"logits/rejected": 0.8601589202880859, |
|
"logps/chosen": -242.910888671875, |
|
"logps/rejected": -238.8945770263672, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.010011060163378716, |
|
"rewards/margins": 0.007496376521885395, |
|
"rewards/rejected": -0.017507437616586685, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": 0.8834539651870728, |
|
"logits/rejected": 0.8997892141342163, |
|
"logps/chosen": -242.17578125, |
|
"logps/rejected": -259.06671142578125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.008281581103801727, |
|
"rewards/margins": 0.008412448689341545, |
|
"rewards/rejected": -0.016694029793143272, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.3953046172178413e-06, |
|
"logits/chosen": 0.8225077390670776, |
|
"logits/rejected": 0.8323785662651062, |
|
"logps/chosen": -253.43905639648438, |
|
"logps/rejected": -245.92721557617188, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.010358546860516071, |
|
"rewards/margins": 0.009153308346867561, |
|
"rewards/rejected": -0.019511854276061058, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": 0.7964144945144653, |
|
"logits/rejected": 0.8561304807662964, |
|
"logps/chosen": -220.56430053710938, |
|
"logps/rejected": -246.96731567382812, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.008580346591770649, |
|
"rewards/margins": 0.00769965723156929, |
|
"rewards/rejected": -0.016280004754662514, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.2341787690142436e-06, |
|
"logits/chosen": 0.7782607078552246, |
|
"logits/rejected": 0.8575235605239868, |
|
"logps/chosen": -300.5128173828125, |
|
"logps/rejected": -240.5801239013672, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.007187344133853912, |
|
"rewards/margins": 0.01008409820497036, |
|
"rewards/rejected": -0.017271442338824272, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": 0.8010842204093933, |
|
"logits/rejected": 0.8400290608406067, |
|
"logps/chosen": -243.8752899169922, |
|
"logps/rejected": -216.2671661376953, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.007206754293292761, |
|
"rewards/margins": 0.009658637456595898, |
|
"rewards/rejected": -0.01686539314687252, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.079838133172111e-06, |
|
"logits/chosen": 0.8610042333602905, |
|
"logits/rejected": 0.8555776476860046, |
|
"logps/chosen": -258.38946533203125, |
|
"logps/rejected": -238.4031982421875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.008076001890003681, |
|
"rewards/margins": 0.0069586001336574554, |
|
"rewards/rejected": -0.015034601092338562, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": 0.7388730049133301, |
|
"logits/rejected": 0.8165189027786255, |
|
"logps/chosen": -259.45965576171875, |
|
"logps/rejected": -227.28750610351562, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.00880814902484417, |
|
"rewards/margins": 0.007600386627018452, |
|
"rewards/rejected": -0.016408536583185196, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_logits/chosen": 0.7790006995201111, |
|
"eval_logits/rejected": 0.8815683722496033, |
|
"eval_logps/chosen": -261.0787658691406, |
|
"eval_logps/rejected": -233.25596618652344, |
|
"eval_loss": 0.0021041170693933964, |
|
"eval_rewards/accuracies": 0.699999988079071, |
|
"eval_rewards/chosen": -0.00834597460925579, |
|
"eval_rewards/margins": 0.009852716699242592, |
|
"eval_rewards/rejected": -0.018198693171143532, |
|
"eval_runtime": 325.1201, |
|
"eval_samples_per_second": 6.152, |
|
"eval_steps_per_second": 0.384, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.331100255592437e-07, |
|
"logits/chosen": 0.8419380187988281, |
|
"logits/rejected": 0.8946343660354614, |
|
"logps/chosen": -279.4914245605469, |
|
"logps/rejected": -229.0105438232422, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0075461543165147305, |
|
"rewards/margins": 0.010306203737854958, |
|
"rewards/rejected": -0.017852356657385826, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": 0.8257007598876953, |
|
"logits/rejected": 0.8564590215682983, |
|
"logps/chosen": -264.4073486328125, |
|
"logps/rejected": -244.4358673095703, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.007830760441720486, |
|
"rewards/margins": 0.009762524627149105, |
|
"rewards/rejected": -0.01759328506886959, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.947809564230446e-07, |
|
"logits/chosen": 0.8498528599739075, |
|
"logits/rejected": 0.8689123392105103, |
|
"logps/chosen": -265.91986083984375, |
|
"logps/rejected": -252.3328094482422, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.007306605577468872, |
|
"rewards/margins": 0.00969378836452961, |
|
"rewards/rejected": -0.01700039580464363, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": 0.8058244585990906, |
|
"logits/rejected": 0.8203352093696594, |
|
"logps/chosen": -276.05224609375, |
|
"logps/rejected": -252.39712524414062, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.00796983577311039, |
|
"rewards/margins": 0.007479208521544933, |
|
"rewards/rejected": -0.015449045225977898, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.655924144404907e-07, |
|
"logits/chosen": 0.7636196613311768, |
|
"logits/rejected": 0.8203707933425903, |
|
"logps/chosen": -260.9599304199219, |
|
"logps/rejected": -236.13442993164062, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0055490517988801, |
|
"rewards/margins": 0.010982049629092216, |
|
"rewards/rejected": -0.01653110235929489, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": 0.8590337634086609, |
|
"logits/rejected": 0.889240562915802, |
|
"logps/chosen": -256.20843505859375, |
|
"logps/rejected": -218.8590087890625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.009814934805035591, |
|
"rewards/margins": 0.007059067487716675, |
|
"rewards/rejected": -0.016874000430107117, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.462368920983249e-07, |
|
"logits/chosen": 0.8207941055297852, |
|
"logits/rejected": 0.8683902025222778, |
|
"logps/chosen": -241.982666015625, |
|
"logps/rejected": -217.79141235351562, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.009196323342621326, |
|
"rewards/margins": 0.006100159604102373, |
|
"rewards/rejected": -0.015296483412384987, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": 0.8281243443489075, |
|
"logits/rejected": 0.8692743182182312, |
|
"logps/chosen": -284.8575744628906, |
|
"logps/rejected": -258.57464599609375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0074394033290445805, |
|
"rewards/margins": 0.010337688960134983, |
|
"rewards/rejected": -0.01777709275484085, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.373541737087264e-07, |
|
"logits/chosen": 0.8097678422927856, |
|
"logits/rejected": 0.9018150568008423, |
|
"logps/chosen": -265.8307189941406, |
|
"logps/rejected": -224.81103515625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.007008875720202923, |
|
"rewards/margins": 0.009901894256472588, |
|
"rewards/rejected": -0.016910770907998085, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": 0.7832424640655518, |
|
"logits/rejected": 0.7852426767349243, |
|
"logps/chosen": -238.4669952392578, |
|
"logps/rejected": -229.850830078125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.007307013962417841, |
|
"rewards/margins": 0.011516671627759933, |
|
"rewards/rejected": -0.018823683261871338, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": 0.7780749797821045, |
|
"eval_logits/rejected": 0.8810694217681885, |
|
"eval_logps/chosen": -261.0643310546875, |
|
"eval_logps/rejected": -233.27398681640625, |
|
"eval_loss": 0.002105255611240864, |
|
"eval_rewards/accuracies": 0.6940000057220459, |
|
"eval_rewards/chosen": -0.008201568387448788, |
|
"eval_rewards/margins": 0.010177312418818474, |
|
"eval_rewards/rejected": -0.018378881737589836, |
|
"eval_runtime": 324.9688, |
|
"eval_samples_per_second": 6.154, |
|
"eval_steps_per_second": 0.385, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.3952790595787986e-07, |
|
"logits/chosen": 0.9047285914421082, |
|
"logits/rejected": 0.8632117509841919, |
|
"logps/chosen": -249.9727020263672, |
|
"logps/rejected": -233.0714569091797, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.008487801998853683, |
|
"rewards/margins": 0.007260690443217754, |
|
"rewards/rejected": -0.015748491510748863, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": 0.745966374874115, |
|
"logits/rejected": 0.7877084016799927, |
|
"logps/chosen": -275.1504211425781, |
|
"logps/rejected": -252.06570434570312, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.006994744297116995, |
|
"rewards/margins": 0.008207054808735847, |
|
"rewards/rejected": -0.01520179957151413, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.5328246937043526e-07, |
|
"logits/chosen": 0.8438940048217773, |
|
"logits/rejected": 0.8585928082466125, |
|
"logps/chosen": -237.1392059326172, |
|
"logps/rejected": -217.2599334716797, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.009680895134806633, |
|
"rewards/margins": 0.008195875212550163, |
|
"rewards/rejected": -0.017876770347356796, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": 0.835778534412384, |
|
"logits/rejected": 0.8605103492736816, |
|
"logps/chosen": -250.32455444335938, |
|
"logps/rejected": -247.2939453125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.006872941739857197, |
|
"rewards/margins": 0.00862927082926035, |
|
"rewards/rejected": -0.015502211637794971, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.790801674598186e-07, |
|
"logits/chosen": 0.8059646487236023, |
|
"logits/rejected": 0.8204299807548523, |
|
"logps/chosen": -269.0479736328125, |
|
"logps/rejected": -236.54345703125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.007251231465488672, |
|
"rewards/margins": 0.010516216047108173, |
|
"rewards/rejected": -0.017767447978258133, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": 0.8005737066268921, |
|
"logits/rejected": 0.865730881690979, |
|
"logps/chosen": -232.56271362304688, |
|
"logps/rejected": -240.54428100585938, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.008310760371387005, |
|
"rewards/margins": 0.010277243331074715, |
|
"rewards/rejected": -0.018588004633784294, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1731874863145143e-07, |
|
"logits/chosen": 0.8402239680290222, |
|
"logits/rejected": 0.8874310255050659, |
|
"logps/chosen": -262.51519775390625, |
|
"logps/rejected": -224.810302734375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.006995867937803268, |
|
"rewards/margins": 0.009674609638750553, |
|
"rewards/rejected": -0.016670476645231247, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": 0.8549894094467163, |
|
"logits/rejected": 0.8683481216430664, |
|
"logps/chosen": -250.1100311279297, |
|
"logps/rejected": -225.083740234375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007406042423099279, |
|
"rewards/margins": 0.009078353643417358, |
|
"rewards/rejected": -0.016484394669532776, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.832927412229017e-08, |
|
"logits/chosen": 0.785990834236145, |
|
"logits/rejected": 0.9018427729606628, |
|
"logps/chosen": -262.3821716308594, |
|
"logps/rejected": -228.41357421875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.006989480461925268, |
|
"rewards/margins": 0.010014806874096394, |
|
"rewards/rejected": -0.017004288733005524, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": 0.8013142347335815, |
|
"logits/rejected": 0.8696510195732117, |
|
"logps/chosen": -252.10507202148438, |
|
"logps/rejected": -206.7936553955078, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007737092673778534, |
|
"rewards/margins": 0.01070366334170103, |
|
"rewards/rejected": -0.01844075694680214, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_logits/chosen": 0.7806341648101807, |
|
"eval_logits/rejected": 0.8832870721817017, |
|
"eval_logps/chosen": -261.0921936035156, |
|
"eval_logps/rejected": -233.21180725097656, |
|
"eval_loss": 0.002123194048181176, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.008480267599225044, |
|
"eval_rewards/margins": 0.009276580065488815, |
|
"eval_rewards/rejected": -0.01775684952735901, |
|
"eval_runtime": 325.0149, |
|
"eval_samples_per_second": 6.154, |
|
"eval_steps_per_second": 0.385, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.237434340521789e-08, |
|
"logits/chosen": 0.7978643774986267, |
|
"logits/rejected": 0.8687127828598022, |
|
"logps/chosen": -263.38275146484375, |
|
"logps/rejected": -247.8026123046875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.007231117691844702, |
|
"rewards/margins": 0.009652243927121162, |
|
"rewards/rejected": -0.016883360221982002, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": 0.7963850498199463, |
|
"logits/rejected": 0.8160678148269653, |
|
"logps/chosen": -242.1365966796875, |
|
"logps/rejected": -246.0305938720703, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0077395932748913765, |
|
"rewards/margins": 0.007968437857925892, |
|
"rewards/rejected": -0.01570803113281727, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.646686570697062e-09, |
|
"logits/chosen": 0.862303614616394, |
|
"logits/rejected": 0.8678015470504761, |
|
"logps/chosen": -257.33099365234375, |
|
"logps/rejected": -249.9061737060547, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.007601047400385141, |
|
"rewards/margins": 0.009036187082529068, |
|
"rewards/rejected": -0.01663723587989807, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": 0.8371657133102417, |
|
"logits/rejected": 0.8322643041610718, |
|
"logps/chosen": -238.14657592773438, |
|
"logps/rejected": -243.3525390625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.009062298573553562, |
|
"rewards/margins": 0.008633644320070744, |
|
"rewards/rejected": -0.017695942893624306, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.681312309735229e-10, |
|
"logits/chosen": 0.8020931482315063, |
|
"logits/rejected": 0.9026565551757812, |
|
"logps/chosen": -231.6744842529297, |
|
"logps/rejected": -229.53726196289062, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.007398143410682678, |
|
"rewards/margins": 0.009477959014475346, |
|
"rewards/rejected": -0.0168761033564806, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 954, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0021909422920118682, |
|
"train_runtime": 18127.9992, |
|
"train_samples_per_second": 3.372, |
|
"train_steps_per_second": 0.053 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|