|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": -2.4349141120910645, |
|
"logits/rejected": -2.305828332901001, |
|
"logps/chosen": -259.81884765625, |
|
"logps/rejected": -293.43365478515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": -2.403059959411621, |
|
"logits/rejected": -2.388718843460083, |
|
"logps/chosen": -235.5987091064453, |
|
"logps/rejected": -222.26573181152344, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.0011952732456848025, |
|
"rewards/margins": 0.0005173576646484435, |
|
"rewards/rejected": 0.0006779157556593418, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.703125, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": -2.439892292022705, |
|
"logits/rejected": -2.4342308044433594, |
|
"logps/chosen": -253.2171173095703, |
|
"logps/rejected": -252.88656616210938, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.007272551767528057, |
|
"rewards/margins": 0.0015017122495919466, |
|
"rewards/rejected": 0.005770839285105467, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": -2.433501720428467, |
|
"logits/rejected": -2.4373536109924316, |
|
"logps/chosen": -257.414306640625, |
|
"logps/rejected": -245.83837890625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.018377887085080147, |
|
"rewards/margins": 0.0048246318474411964, |
|
"rewards/rejected": 0.013553252443671227, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": -2.4690449237823486, |
|
"logits/rejected": -2.4557018280029297, |
|
"logps/chosen": -251.83187866210938, |
|
"logps/rejected": -219.1208038330078, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03281437233090401, |
|
"rewards/margins": 0.008931155316531658, |
|
"rewards/rejected": 0.023883214220404625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": -2.4257969856262207, |
|
"logits/rejected": -2.50382661819458, |
|
"logps/chosen": -251.3686981201172, |
|
"logps/rejected": -234.69204711914062, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04148787260055542, |
|
"rewards/margins": 0.013229536823928356, |
|
"rewards/rejected": 0.02825833484530449, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": -2.389047622680664, |
|
"logits/rejected": -2.4084537029266357, |
|
"logps/chosen": -237.94482421875, |
|
"logps/rejected": -256.0442199707031, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04300086200237274, |
|
"rewards/margins": 0.017871979624032974, |
|
"rewards/rejected": 0.025128880515694618, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": -2.5107932090759277, |
|
"logits/rejected": -2.441624402999878, |
|
"logps/chosen": -270.041259765625, |
|
"logps/rejected": -262.65423583984375, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05541977286338806, |
|
"rewards/margins": 0.03250492364168167, |
|
"rewards/rejected": 0.02291484735906124, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": -2.5124049186706543, |
|
"logits/rejected": -2.4392712116241455, |
|
"logps/chosen": -271.6557312011719, |
|
"logps/rejected": -247.5332794189453, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.048886366188526154, |
|
"rewards/margins": 0.030922260135412216, |
|
"rewards/rejected": 0.01796409860253334, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": -2.437180995941162, |
|
"logits/rejected": -2.4321937561035156, |
|
"logps/chosen": -233.2532501220703, |
|
"logps/rejected": -220.14111328125, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04492487758398056, |
|
"rewards/margins": 0.04827447608113289, |
|
"rewards/rejected": -0.003349601523950696, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": -2.4792046546936035, |
|
"logits/rejected": -2.489912748336792, |
|
"logps/chosen": -261.0204772949219, |
|
"logps/rejected": -243.6798858642578, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.042254410684108734, |
|
"rewards/margins": 0.06775885075330734, |
|
"rewards/rejected": -0.025504430755972862, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.324150562286377, |
|
"eval_logits/rejected": -2.297302484512329, |
|
"eval_logps/chosen": -264.0424499511719, |
|
"eval_logps/rejected": -247.6175537109375, |
|
"eval_loss": 0.6637194156646729, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.012637840583920479, |
|
"eval_rewards/margins": 0.07619453966617584, |
|
"eval_rewards/rejected": -0.06355669349431992, |
|
"eval_runtime": 451.4766, |
|
"eval_samples_per_second": 4.43, |
|
"eval_steps_per_second": 0.277, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": -2.4320034980773926, |
|
"logits/rejected": -2.367145538330078, |
|
"logps/chosen": -238.0926513671875, |
|
"logps/rejected": -240.7525634765625, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.00833176914602518, |
|
"rewards/margins": 0.0710856094956398, |
|
"rewards/rejected": -0.07941737025976181, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": -2.33853816986084, |
|
"logits/rejected": -2.331645965576172, |
|
"logps/chosen": -263.82879638671875, |
|
"logps/rejected": -240.96682739257812, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.019238661974668503, |
|
"rewards/margins": 0.10400726646184921, |
|
"rewards/rejected": -0.08476860821247101, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.5, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": -2.3021273612976074, |
|
"logits/rejected": -2.2405173778533936, |
|
"logps/chosen": -265.228515625, |
|
"logps/rejected": -267.389892578125, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.15649668872356415, |
|
"rewards/margins": 0.10490702092647552, |
|
"rewards/rejected": -0.2614037096500397, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": -2.269350290298462, |
|
"logits/rejected": -2.1657040119171143, |
|
"logps/chosen": -266.2239685058594, |
|
"logps/rejected": -277.36688232421875, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1390775889158249, |
|
"rewards/margins": 0.1098862886428833, |
|
"rewards/rejected": -0.2489638775587082, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": -2.291912317276001, |
|
"logits/rejected": -2.2098731994628906, |
|
"logps/chosen": -268.5712585449219, |
|
"logps/rejected": -270.6773681640625, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11082954704761505, |
|
"rewards/margins": 0.17063280940055847, |
|
"rewards/rejected": -0.2814623713493347, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": -2.117079496383667, |
|
"logits/rejected": -2.1376585960388184, |
|
"logps/chosen": -268.0408020019531, |
|
"logps/rejected": -271.73858642578125, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.18969736993312836, |
|
"rewards/margins": 0.20424196124076843, |
|
"rewards/rejected": -0.39393937587738037, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": -2.225733995437622, |
|
"logits/rejected": -2.188385248184204, |
|
"logps/chosen": -291.46209716796875, |
|
"logps/rejected": -291.213134765625, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.29510316252708435, |
|
"rewards/margins": 0.19445903599262238, |
|
"rewards/rejected": -0.48956218361854553, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": -2.145782947540283, |
|
"logits/rejected": -2.1198437213897705, |
|
"logps/chosen": -309.0302429199219, |
|
"logps/rejected": -312.53302001953125, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2840114235877991, |
|
"rewards/margins": 0.23209214210510254, |
|
"rewards/rejected": -0.5161035656929016, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": -2.164933204650879, |
|
"logits/rejected": -1.9833053350448608, |
|
"logps/chosen": -289.17510986328125, |
|
"logps/rejected": -285.3663024902344, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.20175080001354218, |
|
"rewards/margins": 0.22905221581459045, |
|
"rewards/rejected": -0.43080300092697144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": -2.1845479011535645, |
|
"logits/rejected": -2.1678760051727295, |
|
"logps/chosen": -355.00604248046875, |
|
"logps/rejected": -341.51348876953125, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5404707789421082, |
|
"rewards/margins": 0.2492518424987793, |
|
"rewards/rejected": -0.7897226214408875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.064443349838257, |
|
"eval_logits/rejected": -1.9985278844833374, |
|
"eval_logps/chosen": -319.2918395996094, |
|
"eval_logps/rejected": -322.12091064453125, |
|
"eval_loss": 0.6174576282501221, |
|
"eval_rewards/accuracies": 0.671999990940094, |
|
"eval_rewards/chosen": -0.5398561954498291, |
|
"eval_rewards/margins": 0.2687341868877411, |
|
"eval_rewards/rejected": -0.8085903525352478, |
|
"eval_runtime": 449.2939, |
|
"eval_samples_per_second": 4.451, |
|
"eval_steps_per_second": 0.278, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": -2.1534504890441895, |
|
"logits/rejected": -2.1233341693878174, |
|
"logps/chosen": -343.18817138671875, |
|
"logps/rejected": -340.7259216308594, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5040772557258606, |
|
"rewards/margins": 0.2695787847042084, |
|
"rewards/rejected": -0.7736560702323914, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": -2.0994200706481934, |
|
"logits/rejected": -1.9647928476333618, |
|
"logps/chosen": -295.4856262207031, |
|
"logps/rejected": -336.73419189453125, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5017188787460327, |
|
"rewards/margins": 0.269430547952652, |
|
"rewards/rejected": -0.7711495161056519, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": -2.008256435394287, |
|
"logits/rejected": -2.0621442794799805, |
|
"logps/chosen": -281.52276611328125, |
|
"logps/rejected": -306.1183166503906, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3256304860115051, |
|
"rewards/margins": 0.3065679669380188, |
|
"rewards/rejected": -0.6321984529495239, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": -1.9556798934936523, |
|
"logits/rejected": -1.8381097316741943, |
|
"logps/chosen": -316.3447265625, |
|
"logps/rejected": -323.200439453125, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3776395618915558, |
|
"rewards/margins": 0.2974005341529846, |
|
"rewards/rejected": -0.675040066242218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.25, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": -1.5747647285461426, |
|
"logits/rejected": -1.4650704860687256, |
|
"logps/chosen": -333.0284118652344, |
|
"logps/rejected": -334.76806640625, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6769969463348389, |
|
"rewards/margins": 0.387776643037796, |
|
"rewards/rejected": -1.064773678779602, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": -1.3563178777694702, |
|
"logits/rejected": -1.1254017353057861, |
|
"logps/chosen": -384.9044189453125, |
|
"logps/rejected": -386.17791748046875, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8524494171142578, |
|
"rewards/margins": 0.5679513216018677, |
|
"rewards/rejected": -1.420400857925415, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.5, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": -0.7365055084228516, |
|
"logits/rejected": -0.5389373302459717, |
|
"logps/chosen": -318.93756103515625, |
|
"logps/rejected": -336.3540954589844, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6196847558021545, |
|
"rewards/margins": 0.42324098944664, |
|
"rewards/rejected": -1.0429257154464722, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": -0.35692664980888367, |
|
"logits/rejected": -0.4370260238647461, |
|
"logps/chosen": -348.50592041015625, |
|
"logps/rejected": -375.6952209472656, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9018071889877319, |
|
"rewards/margins": 0.5076521635055542, |
|
"rewards/rejected": -1.4094593524932861, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": -0.597706139087677, |
|
"logits/rejected": -0.4843805432319641, |
|
"logps/chosen": -354.8358154296875, |
|
"logps/rejected": -397.0480041503906, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7842257022857666, |
|
"rewards/margins": 0.6368502378463745, |
|
"rewards/rejected": -1.4210759401321411, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": -0.4317222237586975, |
|
"logits/rejected": -0.48575448989868164, |
|
"logps/chosen": -346.1421203613281, |
|
"logps/rejected": -373.5080871582031, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8795005083084106, |
|
"rewards/margins": 0.45564961433410645, |
|
"rewards/rejected": -1.3351500034332275, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.11951165646314621, |
|
"eval_logits/rejected": 0.21957716345787048, |
|
"eval_logps/chosen": -349.1536865234375, |
|
"eval_logps/rejected": -377.4862976074219, |
|
"eval_loss": 0.5706533193588257, |
|
"eval_rewards/accuracies": 0.6930000185966492, |
|
"eval_rewards/chosen": -0.8384745717048645, |
|
"eval_rewards/margins": 0.523769736289978, |
|
"eval_rewards/rejected": -1.3622443675994873, |
|
"eval_runtime": 449.2894, |
|
"eval_samples_per_second": 4.451, |
|
"eval_steps_per_second": 0.278, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": -0.18695969879627228, |
|
"logits/rejected": -0.09409158676862717, |
|
"logps/chosen": -356.5097961425781, |
|
"logps/rejected": -384.3717956542969, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.946184515953064, |
|
"rewards/margins": 0.5263864398002625, |
|
"rewards/rejected": -1.4725710153579712, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": -0.17142558097839355, |
|
"logits/rejected": -0.16491857171058655, |
|
"logps/chosen": -350.50146484375, |
|
"logps/rejected": -370.40521240234375, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9111695289611816, |
|
"rewards/margins": 0.42720574140548706, |
|
"rewards/rejected": -1.3383753299713135, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": -0.34910041093826294, |
|
"logits/rejected": -0.32271599769592285, |
|
"logps/chosen": -377.28887939453125, |
|
"logps/rejected": -369.8608703613281, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7913080453872681, |
|
"rewards/margins": 0.46660441160202026, |
|
"rewards/rejected": -1.2579123973846436, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": -0.2663159966468811, |
|
"logits/rejected": -0.3360903859138489, |
|
"logps/chosen": -312.9281311035156, |
|
"logps/rejected": -343.1236877441406, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6925094723701477, |
|
"rewards/margins": 0.4805082380771637, |
|
"rewards/rejected": -1.1730177402496338, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": -0.0721643716096878, |
|
"logits/rejected": -0.15388105809688568, |
|
"logps/chosen": -367.50555419921875, |
|
"logps/rejected": -385.8136291503906, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8829039335250854, |
|
"rewards/margins": 0.5089597702026367, |
|
"rewards/rejected": -1.3918637037277222, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": -0.3347160518169403, |
|
"logits/rejected": -0.02113138698041439, |
|
"logps/chosen": -392.6307678222656, |
|
"logps/rejected": -411.7140197753906, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.851973831653595, |
|
"rewards/margins": 0.5162093043327332, |
|
"rewards/rejected": -1.3681831359863281, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": 0.3518471121788025, |
|
"logits/rejected": 0.3347684442996979, |
|
"logps/chosen": -370.5345764160156, |
|
"logps/rejected": -374.44964599609375, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9835584759712219, |
|
"rewards/margins": 0.5378425717353821, |
|
"rewards/rejected": -1.521401047706604, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": 0.45614609122276306, |
|
"logits/rejected": 0.686242938041687, |
|
"logps/chosen": -351.5860290527344, |
|
"logps/rejected": -388.1460266113281, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1243712902069092, |
|
"rewards/margins": 0.5316659212112427, |
|
"rewards/rejected": -1.6560373306274414, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.625, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": 0.25028011202812195, |
|
"logits/rejected": 0.527495265007019, |
|
"logps/chosen": -387.6080322265625, |
|
"logps/rejected": -400.1957092285156, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1668118238449097, |
|
"rewards/margins": 0.5952257513999939, |
|
"rewards/rejected": -1.7620376348495483, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": 0.5255290269851685, |
|
"logits/rejected": 0.35975736379623413, |
|
"logps/chosen": -298.29302978515625, |
|
"logps/rejected": -343.5097351074219, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8005388379096985, |
|
"rewards/margins": 0.5582879781723022, |
|
"rewards/rejected": -1.3588266372680664, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.7207584381103516, |
|
"eval_logits/rejected": 0.8423263430595398, |
|
"eval_logps/chosen": -346.00152587890625, |
|
"eval_logps/rejected": -382.4471435546875, |
|
"eval_loss": 0.5535975098609924, |
|
"eval_rewards/accuracies": 0.7229999899864197, |
|
"eval_rewards/chosen": -0.8069528937339783, |
|
"eval_rewards/margins": 0.6048997640609741, |
|
"eval_rewards/rejected": -1.4118527173995972, |
|
"eval_runtime": 450.2977, |
|
"eval_samples_per_second": 4.442, |
|
"eval_steps_per_second": 0.278, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": 0.4230332374572754, |
|
"logits/rejected": 0.2597886025905609, |
|
"logps/chosen": -347.322509765625, |
|
"logps/rejected": -403.64508056640625, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9662551879882812, |
|
"rewards/margins": 0.6025245785713196, |
|
"rewards/rejected": -1.568779706954956, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": 0.7549188733100891, |
|
"logits/rejected": 0.3922800123691559, |
|
"logps/chosen": -349.7222595214844, |
|
"logps/rejected": -404.5429992675781, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0141912698745728, |
|
"rewards/margins": 0.6538187265396118, |
|
"rewards/rejected": -1.6680099964141846, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": 0.8512343168258667, |
|
"logits/rejected": 0.7955237627029419, |
|
"logps/chosen": -374.3222351074219, |
|
"logps/rejected": -425.25408935546875, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1482092142105103, |
|
"rewards/margins": 0.606701135635376, |
|
"rewards/rejected": -1.7549104690551758, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.21875, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": 0.484804630279541, |
|
"logits/rejected": 0.20197534561157227, |
|
"logps/chosen": -325.0903625488281, |
|
"logps/rejected": -374.3199157714844, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.72711580991745, |
|
"rewards/margins": 0.6210989952087402, |
|
"rewards/rejected": -1.3482147455215454, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": 0.9636757969856262, |
|
"logits/rejected": 0.9192026257514954, |
|
"logps/chosen": -341.41998291015625, |
|
"logps/rejected": -403.25836181640625, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9017482995986938, |
|
"rewards/margins": 0.7085460424423218, |
|
"rewards/rejected": -1.6102945804595947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": 1.3140493631362915, |
|
"logits/rejected": 0.9456102252006531, |
|
"logps/chosen": -385.99493408203125, |
|
"logps/rejected": -433.5835876464844, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.943748950958252, |
|
"rewards/margins": 0.8300511240959167, |
|
"rewards/rejected": -1.7738001346588135, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.5, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": 1.4614379405975342, |
|
"logits/rejected": 1.2364531755447388, |
|
"logps/chosen": -397.2190856933594, |
|
"logps/rejected": -452.3384704589844, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2018821239471436, |
|
"rewards/margins": 0.7513980865478516, |
|
"rewards/rejected": -1.9532800912857056, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.625, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": 1.343225121498108, |
|
"logits/rejected": 1.212619423866272, |
|
"logps/chosen": -399.1175537109375, |
|
"logps/rejected": -448.16473388671875, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.349334716796875, |
|
"rewards/margins": 0.7152506709098816, |
|
"rewards/rejected": -2.0645856857299805, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": 0.8150955438613892, |
|
"logits/rejected": 0.598046600818634, |
|
"logps/chosen": -360.7403869628906, |
|
"logps/rejected": -376.80316162109375, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.086352825164795, |
|
"rewards/margins": 0.5136397480964661, |
|
"rewards/rejected": -1.5999925136566162, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": 0.45536771416664124, |
|
"logits/rejected": 0.9996572732925415, |
|
"logps/chosen": -347.04986572265625, |
|
"logps/rejected": -361.86285400390625, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7578734159469604, |
|
"rewards/margins": 0.41181692481040955, |
|
"rewards/rejected": -1.1696903705596924, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 1.0708205699920654, |
|
"eval_logits/rejected": 1.2557843923568726, |
|
"eval_logps/chosen": -332.0845947265625, |
|
"eval_logps/rejected": -359.5694580078125, |
|
"eval_loss": 0.5575395226478577, |
|
"eval_rewards/accuracies": 0.7110000252723694, |
|
"eval_rewards/chosen": -0.6677836179733276, |
|
"eval_rewards/margins": 0.515292227268219, |
|
"eval_rewards/rejected": -1.1830756664276123, |
|
"eval_runtime": 450.3565, |
|
"eval_samples_per_second": 4.441, |
|
"eval_steps_per_second": 0.278, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": 1.2414617538452148, |
|
"logits/rejected": 1.1634864807128906, |
|
"logps/chosen": -321.0514831542969, |
|
"logps/rejected": -368.89849853515625, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7744086384773254, |
|
"rewards/margins": 0.5061289668083191, |
|
"rewards/rejected": -1.280537724494934, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": 1.5618733167648315, |
|
"logits/rejected": 1.069901466369629, |
|
"logps/chosen": -353.8107604980469, |
|
"logps/rejected": -423.22064208984375, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0526001453399658, |
|
"rewards/margins": 0.6813434362411499, |
|
"rewards/rejected": -1.7339435815811157, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": 1.3146389722824097, |
|
"logits/rejected": 1.3446049690246582, |
|
"logps/chosen": -429.43133544921875, |
|
"logps/rejected": -472.72540283203125, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.510373830795288, |
|
"rewards/margins": 0.7111620903015137, |
|
"rewards/rejected": -2.2215359210968018, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": 0.9349297285079956, |
|
"logits/rejected": 1.2060667276382446, |
|
"logps/chosen": -431.19757080078125, |
|
"logps/rejected": -456.5882263183594, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5400089025497437, |
|
"rewards/margins": 0.7873141169548035, |
|
"rewards/rejected": -2.3273231983184814, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": 1.4042693376541138, |
|
"logits/rejected": 1.3253014087677002, |
|
"logps/chosen": -383.0726318359375, |
|
"logps/rejected": -433.75079345703125, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2447248697280884, |
|
"rewards/margins": 0.7737690806388855, |
|
"rewards/rejected": -2.018493890762329, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.75, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": 1.8145596981048584, |
|
"logits/rejected": 1.7395979166030884, |
|
"logps/chosen": -346.5498962402344, |
|
"logps/rejected": -437.8565979003906, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9916859865188599, |
|
"rewards/margins": 0.6973880529403687, |
|
"rewards/rejected": -1.689074158668518, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": 2.161806344985962, |
|
"logits/rejected": 2.135643243789673, |
|
"logps/chosen": -366.948974609375, |
|
"logps/rejected": -431.3831481933594, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1190184354782104, |
|
"rewards/margins": 0.7804428935050964, |
|
"rewards/rejected": -1.8994615077972412, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": 1.9935728311538696, |
|
"logits/rejected": 1.705012321472168, |
|
"logps/chosen": -370.86553955078125, |
|
"logps/rejected": -437.19647216796875, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0637662410736084, |
|
"rewards/margins": 0.7619091272354126, |
|
"rewards/rejected": -1.825675368309021, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": 2.146028995513916, |
|
"logits/rejected": 2.347506046295166, |
|
"logps/chosen": -367.67535400390625, |
|
"logps/rejected": -429.10919189453125, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3295910358428955, |
|
"rewards/margins": 0.7955694794654846, |
|
"rewards/rejected": -2.1251604557037354, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": 1.7965142726898193, |
|
"logits/rejected": 1.3090850114822388, |
|
"logps/chosen": -423.20477294921875, |
|
"logps/rejected": -467.678955078125, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.322430968284607, |
|
"rewards/margins": 0.8503682017326355, |
|
"rewards/rejected": -2.1727993488311768, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 2.7062325477600098, |
|
"eval_logits/rejected": 2.842660427093506, |
|
"eval_logps/chosen": -400.8144836425781, |
|
"eval_logps/rejected": -454.5939025878906, |
|
"eval_loss": 0.5359104871749878, |
|
"eval_rewards/accuracies": 0.7310000061988831, |
|
"eval_rewards/chosen": -1.3550822734832764, |
|
"eval_rewards/margins": 0.778237521648407, |
|
"eval_rewards/rejected": -2.133319854736328, |
|
"eval_runtime": 449.3131, |
|
"eval_samples_per_second": 4.451, |
|
"eval_steps_per_second": 0.278, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": 1.6601619720458984, |
|
"logits/rejected": 2.115678310394287, |
|
"logps/chosen": -421.45556640625, |
|
"logps/rejected": -427.3929138183594, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3307194709777832, |
|
"rewards/margins": 0.6118677258491516, |
|
"rewards/rejected": -1.94258713722229, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": 1.341740608215332, |
|
"logits/rejected": 1.3879600763320923, |
|
"logps/chosen": -374.7286071777344, |
|
"logps/rejected": -436.7850646972656, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9597892761230469, |
|
"rewards/margins": 0.8522161245346069, |
|
"rewards/rejected": -1.8120054006576538, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": 2.013131856918335, |
|
"logits/rejected": 1.7528884410858154, |
|
"logps/chosen": -368.9488830566406, |
|
"logps/rejected": -439.6729431152344, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0934112071990967, |
|
"rewards/margins": 0.8812692761421204, |
|
"rewards/rejected": -1.9746805429458618, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": 1.9838985204696655, |
|
"logits/rejected": 2.020540714263916, |
|
"logps/chosen": -377.3255920410156, |
|
"logps/rejected": -431.0025329589844, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2972571849822998, |
|
"rewards/margins": 0.6514188051223755, |
|
"rewards/rejected": -1.9486758708953857, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": 1.998337984085083, |
|
"logits/rejected": 2.040975332260132, |
|
"logps/chosen": -384.91455078125, |
|
"logps/rejected": -467.4063415527344, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4080098867416382, |
|
"rewards/margins": 0.744787335395813, |
|
"rewards/rejected": -2.152797222137451, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": 2.7154483795166016, |
|
"logits/rejected": 2.629507541656494, |
|
"logps/chosen": -394.7875061035156, |
|
"logps/rejected": -470.86932373046875, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6549485921859741, |
|
"rewards/margins": 0.7828361392021179, |
|
"rewards/rejected": -2.4377849102020264, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": 2.115504026412964, |
|
"logits/rejected": 2.440556049346924, |
|
"logps/chosen": -407.92193603515625, |
|
"logps/rejected": -464.93341064453125, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5494651794433594, |
|
"rewards/margins": 0.6849027872085571, |
|
"rewards/rejected": -2.234367847442627, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": 1.77793288230896, |
|
"logits/rejected": 1.8030977249145508, |
|
"logps/chosen": -431.06451416015625, |
|
"logps/rejected": -489.83087158203125, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1827547550201416, |
|
"rewards/margins": 0.8239189386367798, |
|
"rewards/rejected": -2.006673812866211, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": 1.8615095615386963, |
|
"logits/rejected": 2.4596309661865234, |
|
"logps/chosen": -379.59381103515625, |
|
"logps/rejected": -427.30242919921875, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1986615657806396, |
|
"rewards/margins": 0.7092850208282471, |
|
"rewards/rejected": -1.9079463481903076, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.5, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": 1.8667644262313843, |
|
"logits/rejected": 2.414881944656372, |
|
"logps/chosen": -415.29058837890625, |
|
"logps/rejected": -435.1332092285156, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3635553121566772, |
|
"rewards/margins": 0.7062723636627197, |
|
"rewards/rejected": -2.0698277950286865, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": 2.9904465675354004, |
|
"eval_logits/rejected": 3.1370763778686523, |
|
"eval_logps/chosen": -394.3450622558594, |
|
"eval_logps/rejected": -445.326904296875, |
|
"eval_loss": 0.531741201877594, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -1.2903878688812256, |
|
"eval_rewards/margins": 0.7502626776695251, |
|
"eval_rewards/rejected": -2.0406503677368164, |
|
"eval_runtime": 449.2183, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": 2.365293502807617, |
|
"logits/rejected": 2.3199546337127686, |
|
"logps/chosen": -411.216552734375, |
|
"logps/rejected": -493.239501953125, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5279957056045532, |
|
"rewards/margins": 0.8807098269462585, |
|
"rewards/rejected": -2.408705472946167, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": 2.7599196434020996, |
|
"logits/rejected": 2.716507911682129, |
|
"logps/chosen": -426.38134765625, |
|
"logps/rejected": -493.5303649902344, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8960431814193726, |
|
"rewards/margins": 0.8603929281234741, |
|
"rewards/rejected": -2.756436347961426, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": 2.7069733142852783, |
|
"logits/rejected": 2.9215176105499268, |
|
"logps/chosen": -451.400390625, |
|
"logps/rejected": -510.039306640625, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9560165405273438, |
|
"rewards/margins": 0.7693789005279541, |
|
"rewards/rejected": -2.7253952026367188, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": 2.783442258834839, |
|
"logits/rejected": 3.478522777557373, |
|
"logps/chosen": -454.78399658203125, |
|
"logps/rejected": -508.5516052246094, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5752308368682861, |
|
"rewards/margins": 0.9094041585922241, |
|
"rewards/rejected": -2.4846348762512207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": 2.476569890975952, |
|
"logits/rejected": 2.471484661102295, |
|
"logps/chosen": -381.59808349609375, |
|
"logps/rejected": -432.3984375, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1650186777114868, |
|
"rewards/margins": 0.6638428568840027, |
|
"rewards/rejected": -1.8288615942001343, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 7.15625, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": 2.8157763481140137, |
|
"logits/rejected": 2.5621135234832764, |
|
"logps/chosen": -388.00347900390625, |
|
"logps/rejected": -389.6156005859375, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2750461101531982, |
|
"rewards/margins": 0.4696625769138336, |
|
"rewards/rejected": -1.7447086572647095, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": 2.844412088394165, |
|
"logits/rejected": 3.3043785095214844, |
|
"logps/chosen": -435.10986328125, |
|
"logps/rejected": -469.0890197753906, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.465742588043213, |
|
"rewards/margins": 0.7117661237716675, |
|
"rewards/rejected": -2.17750883102417, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.625, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": 3.148547887802124, |
|
"logits/rejected": 3.126634120941162, |
|
"logps/chosen": -391.44647216796875, |
|
"logps/rejected": -427.7322692871094, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3907300233840942, |
|
"rewards/margins": 0.693662703037262, |
|
"rewards/rejected": -2.084392786026001, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.5, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": 3.375934600830078, |
|
"logits/rejected": 2.9477076530456543, |
|
"logps/chosen": -400.77655029296875, |
|
"logps/rejected": -481.9225158691406, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5433567762374878, |
|
"rewards/margins": 0.8481906056404114, |
|
"rewards/rejected": -2.391547441482544, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": 3.302586317062378, |
|
"logits/rejected": 2.6571342945098877, |
|
"logps/chosen": -453.5899353027344, |
|
"logps/rejected": -497.45440673828125, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8011194467544556, |
|
"rewards/margins": 0.7497151494026184, |
|
"rewards/rejected": -2.5508346557617188, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 3.5383141040802, |
|
"eval_logits/rejected": 3.714007616043091, |
|
"eval_logps/chosen": -425.8876647949219, |
|
"eval_logps/rejected": -488.1441650390625, |
|
"eval_loss": 0.5149358510971069, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -1.6058142185211182, |
|
"eval_rewards/margins": 0.8630084991455078, |
|
"eval_rewards/rejected": -2.468822717666626, |
|
"eval_runtime": 450.9184, |
|
"eval_samples_per_second": 4.435, |
|
"eval_steps_per_second": 0.277, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": 2.6567466259002686, |
|
"logits/rejected": 2.880585193634033, |
|
"logps/chosen": -404.9405822753906, |
|
"logps/rejected": -479.71783447265625, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5296947956085205, |
|
"rewards/margins": 0.8995970487594604, |
|
"rewards/rejected": -2.4292919635772705, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": 2.2279410362243652, |
|
"logits/rejected": 2.345160961151123, |
|
"logps/chosen": -459.04241943359375, |
|
"logps/rejected": -514.1052856445312, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7574459314346313, |
|
"rewards/margins": 0.675226092338562, |
|
"rewards/rejected": -2.4326720237731934, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": 1.985507607460022, |
|
"logits/rejected": 2.084001302719116, |
|
"logps/chosen": -481.2820739746094, |
|
"logps/rejected": -523.9012451171875, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8201816082000732, |
|
"rewards/margins": 0.7969433069229126, |
|
"rewards/rejected": -2.6171250343322754, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": 2.884337902069092, |
|
"logits/rejected": 2.2890899181365967, |
|
"logps/chosen": -427.71044921875, |
|
"logps/rejected": -492.051513671875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8584165573120117, |
|
"rewards/margins": 0.7980149984359741, |
|
"rewards/rejected": -2.6564316749572754, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": 2.7502715587615967, |
|
"logits/rejected": 3.0479960441589355, |
|
"logps/chosen": -470.54010009765625, |
|
"logps/rejected": -530.3944091796875, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.191692352294922, |
|
"rewards/margins": 0.729444146156311, |
|
"rewards/rejected": -2.9211363792419434, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.875, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": 2.7881550788879395, |
|
"logits/rejected": 2.5179460048675537, |
|
"logps/chosen": -482.68731689453125, |
|
"logps/rejected": -551.6927490234375, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1960439682006836, |
|
"rewards/margins": 0.8961966633796692, |
|
"rewards/rejected": -3.092240810394287, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": 2.5673789978027344, |
|
"logits/rejected": 2.7013182640075684, |
|
"logps/chosen": -454.56280517578125, |
|
"logps/rejected": -510.7538146972656, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.938982367515564, |
|
"rewards/margins": 0.9660106897354126, |
|
"rewards/rejected": -2.9049932956695557, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": 2.3261733055114746, |
|
"logits/rejected": 2.8744044303894043, |
|
"logps/chosen": -450.94287109375, |
|
"logps/rejected": -544.7605590820312, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6397998332977295, |
|
"rewards/margins": 1.0769449472427368, |
|
"rewards/rejected": -2.716744899749756, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": 3.20257568359375, |
|
"logits/rejected": 3.1647896766662598, |
|
"logps/chosen": -459.48199462890625, |
|
"logps/rejected": -522.6142578125, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9324430227279663, |
|
"rewards/margins": 0.9745299220085144, |
|
"rewards/rejected": -2.906972885131836, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.125, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": 2.845961809158325, |
|
"logits/rejected": 3.169747829437256, |
|
"logps/chosen": -498.2582092285156, |
|
"logps/rejected": -551.0, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3102707862854004, |
|
"rewards/margins": 0.9176058769226074, |
|
"rewards/rejected": -3.227876663208008, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 4.20645809173584, |
|
"eval_logits/rejected": 4.417923450469971, |
|
"eval_logps/chosen": -522.4096069335938, |
|
"eval_logps/rejected": -595.375244140625, |
|
"eval_loss": 0.5125031471252441, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -2.571033239364624, |
|
"eval_rewards/margins": 0.9701002240180969, |
|
"eval_rewards/rejected": -3.541133403778076, |
|
"eval_runtime": 449.2414, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": 3.1661219596862793, |
|
"logits/rejected": 3.1258158683776855, |
|
"logps/chosen": -524.2811889648438, |
|
"logps/rejected": -597.2448120117188, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.592829704284668, |
|
"rewards/margins": 0.9196772575378418, |
|
"rewards/rejected": -3.512507200241089, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 5.875, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": 2.6309409141540527, |
|
"logits/rejected": 2.872328758239746, |
|
"logps/chosen": -511.0491638183594, |
|
"logps/rejected": -565.0892333984375, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4000844955444336, |
|
"rewards/margins": 1.0670232772827148, |
|
"rewards/rejected": -3.4671077728271484, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": 2.71614408493042, |
|
"logits/rejected": 2.5246307849884033, |
|
"logps/chosen": -510.1669921875, |
|
"logps/rejected": -567.8058471679688, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.43577241897583, |
|
"rewards/margins": 0.7605811953544617, |
|
"rewards/rejected": -3.1963534355163574, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": 2.3216631412506104, |
|
"logits/rejected": 2.393738031387329, |
|
"logps/chosen": -490.48681640625, |
|
"logps/rejected": -565.7747802734375, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.194924831390381, |
|
"rewards/margins": 1.1211020946502686, |
|
"rewards/rejected": -3.3160271644592285, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": 2.1706225872039795, |
|
"logits/rejected": 2.3366177082061768, |
|
"logps/chosen": -508.671630859375, |
|
"logps/rejected": -567.91552734375, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.3915352821350098, |
|
"rewards/margins": 0.8179728388786316, |
|
"rewards/rejected": -3.209507703781128, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.625, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": 2.606666088104248, |
|
"logits/rejected": 2.1300926208496094, |
|
"logps/chosen": -512.1990966796875, |
|
"logps/rejected": -589.0401611328125, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3908963203430176, |
|
"rewards/margins": 0.9071556925773621, |
|
"rewards/rejected": -3.2980518341064453, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": 2.07898211479187, |
|
"logits/rejected": 2.355185031890869, |
|
"logps/chosen": -526.248046875, |
|
"logps/rejected": -580.1257934570312, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.5468173027038574, |
|
"rewards/margins": 0.7905376553535461, |
|
"rewards/rejected": -3.337355136871338, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": 1.8555338382720947, |
|
"logits/rejected": 1.950961709022522, |
|
"logps/chosen": -461.7571716308594, |
|
"logps/rejected": -560.3824462890625, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2809879779815674, |
|
"rewards/margins": 0.9975978136062622, |
|
"rewards/rejected": -3.278585910797119, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": 2.0223584175109863, |
|
"logits/rejected": 1.8026624917984009, |
|
"logps/chosen": -522.4490966796875, |
|
"logps/rejected": -621.7756958007812, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.564455509185791, |
|
"rewards/margins": 1.1646798849105835, |
|
"rewards/rejected": -3.729135036468506, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": 2.3971285820007324, |
|
"logits/rejected": 2.2188148498535156, |
|
"logps/chosen": -491.7401428222656, |
|
"logps/rejected": -557.7470703125, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.730563163757324, |
|
"rewards/margins": 0.6435677409172058, |
|
"rewards/rejected": -3.374130964279175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 2.4408442974090576, |
|
"eval_logits/rejected": 2.651709794998169, |
|
"eval_logps/chosen": -527.5897827148438, |
|
"eval_logps/rejected": -608.1039428710938, |
|
"eval_loss": 0.503455638885498, |
|
"eval_rewards/accuracies": 0.7369999885559082, |
|
"eval_rewards/chosen": -2.622835874557495, |
|
"eval_rewards/margins": 1.0455843210220337, |
|
"eval_rewards/rejected": -3.6684203147888184, |
|
"eval_runtime": 449.3106, |
|
"eval_samples_per_second": 4.451, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": 1.7300176620483398, |
|
"logits/rejected": 2.1867687702178955, |
|
"logps/chosen": -528.530517578125, |
|
"logps/rejected": -599.1966552734375, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.534254550933838, |
|
"rewards/margins": 0.9947689771652222, |
|
"rewards/rejected": -3.5290236473083496, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.75, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": 1.3660762310028076, |
|
"logits/rejected": 1.5705347061157227, |
|
"logps/chosen": -476.1238708496094, |
|
"logps/rejected": -563.6986083984375, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1599488258361816, |
|
"rewards/margins": 1.0557085275650024, |
|
"rewards/rejected": -3.2156574726104736, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": 1.2051117420196533, |
|
"logits/rejected": 1.490389347076416, |
|
"logps/chosen": -445.2794494628906, |
|
"logps/rejected": -525.7783813476562, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.794106125831604, |
|
"rewards/margins": 1.1164175271987915, |
|
"rewards/rejected": -2.9105236530303955, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.0, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": 1.5332322120666504, |
|
"logits/rejected": 1.4781954288482666, |
|
"logps/chosen": -453.91705322265625, |
|
"logps/rejected": -533.1343994140625, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.072399854660034, |
|
"rewards/margins": 1.0115078687667847, |
|
"rewards/rejected": -3.0839076042175293, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": 1.4764400720596313, |
|
"logits/rejected": 1.6051571369171143, |
|
"logps/chosen": -488.93017578125, |
|
"logps/rejected": -568.1224365234375, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2974724769592285, |
|
"rewards/margins": 0.9695172309875488, |
|
"rewards/rejected": -3.2669894695281982, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": 1.6312462091445923, |
|
"logits/rejected": 1.5368585586547852, |
|
"logps/chosen": -527.2152099609375, |
|
"logps/rejected": -628.8519897460938, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.6452157497406006, |
|
"rewards/margins": 1.1508594751358032, |
|
"rewards/rejected": -3.7960751056671143, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.125, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": 1.7708513736724854, |
|
"logits/rejected": 2.067249298095703, |
|
"logps/chosen": -618.783203125, |
|
"logps/rejected": -723.3242797851562, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.3241093158721924, |
|
"rewards/margins": 1.1731927394866943, |
|
"rewards/rejected": -4.497302055358887, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": 2.138941526412964, |
|
"logits/rejected": 2.0456833839416504, |
|
"logps/chosen": -570.5878295898438, |
|
"logps/rejected": -641.3853759765625, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.1022305488586426, |
|
"rewards/margins": 0.8927729725837708, |
|
"rewards/rejected": -3.9950034618377686, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": 1.5040104389190674, |
|
"logits/rejected": 1.8883628845214844, |
|
"logps/chosen": -540.3502197265625, |
|
"logps/rejected": -629.1265869140625, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.862849235534668, |
|
"rewards/margins": 1.0459764003753662, |
|
"rewards/rejected": -3.908825635910034, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.5, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": 1.5514931678771973, |
|
"logits/rejected": 1.7437639236450195, |
|
"logps/chosen": -565.4725341796875, |
|
"logps/rejected": -635.3278198242188, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.8363194465637207, |
|
"rewards/margins": 1.0234676599502563, |
|
"rewards/rejected": -3.8597874641418457, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 2.0694196224212646, |
|
"eval_logits/rejected": 2.2637150287628174, |
|
"eval_logps/chosen": -528.3989868164062, |
|
"eval_logps/rejected": -612.6806030273438, |
|
"eval_loss": 0.5027692914009094, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -2.6309285163879395, |
|
"eval_rewards/margins": 1.0832594633102417, |
|
"eval_rewards/rejected": -3.7141873836517334, |
|
"eval_runtime": 449.2444, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": 1.3312523365020752, |
|
"logits/rejected": 1.3924553394317627, |
|
"logps/chosen": -527.3834228515625, |
|
"logps/rejected": -586.1485595703125, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.468048334121704, |
|
"rewards/margins": 0.9593726992607117, |
|
"rewards/rejected": -3.4274210929870605, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 5.875, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": 1.220323920249939, |
|
"logits/rejected": 1.4167616367340088, |
|
"logps/chosen": -527.3272705078125, |
|
"logps/rejected": -584.5949096679688, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.417142868041992, |
|
"rewards/margins": 0.9714761972427368, |
|
"rewards/rejected": -3.3886189460754395, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 9.5, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": 1.8727235794067383, |
|
"logits/rejected": 1.7719242572784424, |
|
"logps/chosen": -512.21484375, |
|
"logps/rejected": -586.517333984375, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.514179229736328, |
|
"rewards/margins": 1.0654077529907227, |
|
"rewards/rejected": -3.5795867443084717, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.5, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": 2.006112813949585, |
|
"logits/rejected": 1.8822578191757202, |
|
"logps/chosen": -504.0730895996094, |
|
"logps/rejected": -584.8082885742188, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.6244325637817383, |
|
"rewards/margins": 0.8445955514907837, |
|
"rewards/rejected": -3.4690279960632324, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.0, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": 1.5363643169403076, |
|
"logits/rejected": 1.6541494131088257, |
|
"logps/chosen": -516.8738403320312, |
|
"logps/rejected": -631.0724487304688, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3589491844177246, |
|
"rewards/margins": 0.9803365468978882, |
|
"rewards/rejected": -3.3392856121063232, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 6.5, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": 1.454318642616272, |
|
"logits/rejected": 1.3277199268341064, |
|
"logps/chosen": -472.8223571777344, |
|
"logps/rejected": -549.0642700195312, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.163424015045166, |
|
"rewards/margins": 0.8882259130477905, |
|
"rewards/rejected": -3.051649808883667, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": 1.2178795337677002, |
|
"logits/rejected": 1.890763521194458, |
|
"logps/chosen": -465.97210693359375, |
|
"logps/rejected": -528.733154296875, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.076791286468506, |
|
"rewards/margins": 0.8051401972770691, |
|
"rewards/rejected": -2.881931781768799, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": 1.706913709640503, |
|
"logits/rejected": 1.0627291202545166, |
|
"logps/chosen": -456.10162353515625, |
|
"logps/rejected": -510.69854736328125, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9897725582122803, |
|
"rewards/margins": 0.8308976888656616, |
|
"rewards/rejected": -2.8206703662872314, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": 1.7154747247695923, |
|
"logits/rejected": 1.9419562816619873, |
|
"logps/chosen": -424.7981872558594, |
|
"logps/rejected": -494.462158203125, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0186305046081543, |
|
"rewards/margins": 0.8082389831542969, |
|
"rewards/rejected": -2.826869487762451, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 9.25, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": 1.758643388748169, |
|
"logits/rejected": 1.7241748571395874, |
|
"logps/chosen": -456.5091247558594, |
|
"logps/rejected": -547.2735595703125, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.281468152999878, |
|
"rewards/margins": 0.8465303182601929, |
|
"rewards/rejected": -3.1279983520507812, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 2.2262675762176514, |
|
"eval_logits/rejected": 2.4042489528656006, |
|
"eval_logps/chosen": -509.4260559082031, |
|
"eval_logps/rejected": -582.6142578125, |
|
"eval_loss": 0.4964662492275238, |
|
"eval_rewards/accuracies": 0.753000020980835, |
|
"eval_rewards/chosen": -2.4411978721618652, |
|
"eval_rewards/margins": 0.9723253846168518, |
|
"eval_rewards/rejected": -3.4135231971740723, |
|
"eval_runtime": 450.7796, |
|
"eval_samples_per_second": 4.437, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": 2.2214999198913574, |
|
"logits/rejected": 1.9609037637710571, |
|
"logps/chosen": -481.3634338378906, |
|
"logps/rejected": -556.6956176757812, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4214730262756348, |
|
"rewards/margins": 1.1080448627471924, |
|
"rewards/rejected": -3.5295181274414062, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": 2.2386937141418457, |
|
"logits/rejected": 1.7253150939941406, |
|
"logps/chosen": -516.8323364257812, |
|
"logps/rejected": -622.4188232421875, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6058599948883057, |
|
"rewards/margins": 1.1149176359176636, |
|
"rewards/rejected": -3.7207775115966797, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": 2.12276554107666, |
|
"logits/rejected": 1.9039256572723389, |
|
"logps/chosen": -525.0025634765625, |
|
"logps/rejected": -638.1419067382812, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7907423973083496, |
|
"rewards/margins": 1.0106900930404663, |
|
"rewards/rejected": -3.8014328479766846, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": 1.8808460235595703, |
|
"logits/rejected": 1.99996817111969, |
|
"logps/chosen": -550.5272216796875, |
|
"logps/rejected": -645.2981567382812, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.9035916328430176, |
|
"rewards/margins": 1.0486520528793335, |
|
"rewards/rejected": -3.9522433280944824, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": 2.11899733543396, |
|
"logits/rejected": 1.9603424072265625, |
|
"logps/chosen": -535.086181640625, |
|
"logps/rejected": -645.9200439453125, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8361852169036865, |
|
"rewards/margins": 1.132416009902954, |
|
"rewards/rejected": -3.9686012268066406, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": 1.9444576501846313, |
|
"logits/rejected": 1.9254415035247803, |
|
"logps/chosen": -544.8070068359375, |
|
"logps/rejected": -617.1060791015625, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.795332431793213, |
|
"rewards/margins": 1.0758298635482788, |
|
"rewards/rejected": -3.871161937713623, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": 1.9484163522720337, |
|
"logits/rejected": 1.8690261840820312, |
|
"logps/chosen": -509.67449951171875, |
|
"logps/rejected": -591.234375, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.839465618133545, |
|
"rewards/margins": 0.8926876187324524, |
|
"rewards/rejected": -3.7321534156799316, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": 2.2301783561706543, |
|
"logits/rejected": 2.2617290019989014, |
|
"logps/chosen": -532.890380859375, |
|
"logps/rejected": -599.6547241210938, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.8411078453063965, |
|
"rewards/margins": 0.9625542759895325, |
|
"rewards/rejected": -3.8036625385284424, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": 1.2555204629898071, |
|
"logits/rejected": 1.2910771369934082, |
|
"logps/chosen": -531.36328125, |
|
"logps/rejected": -612.5070190429688, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5224602222442627, |
|
"rewards/margins": 1.1016714572906494, |
|
"rewards/rejected": -3.624131679534912, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": 1.42030668258667, |
|
"logits/rejected": 1.6860134601593018, |
|
"logps/chosen": -482.36065673828125, |
|
"logps/rejected": -559.9107666015625, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.3926775455474854, |
|
"rewards/margins": 0.9813091158866882, |
|
"rewards/rejected": -3.3739867210388184, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": 2.0121309757232666, |
|
"eval_logits/rejected": 2.206488609313965, |
|
"eval_logps/chosen": -492.31475830078125, |
|
"eval_logps/rejected": -570.6591186523438, |
|
"eval_loss": 0.4941355586051941, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.270085096359253, |
|
"eval_rewards/margins": 1.023887276649475, |
|
"eval_rewards/rejected": -3.2939727306365967, |
|
"eval_runtime": 449.2395, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.625, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": 1.6364719867706299, |
|
"logits/rejected": 1.8250305652618408, |
|
"logps/chosen": -467.3515625, |
|
"logps/rejected": -567.787841796875, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2239139080047607, |
|
"rewards/margins": 1.1083593368530273, |
|
"rewards/rejected": -3.332273483276367, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": 1.3394782543182373, |
|
"logits/rejected": 1.563076138496399, |
|
"logps/chosen": -541.9149169921875, |
|
"logps/rejected": -596.292236328125, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4323418140411377, |
|
"rewards/margins": 0.9971737861633301, |
|
"rewards/rejected": -3.4295153617858887, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": 1.7200143337249756, |
|
"logits/rejected": 1.6109498739242554, |
|
"logps/chosen": -539.2000732421875, |
|
"logps/rejected": -615.1435546875, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6219089031219482, |
|
"rewards/margins": 0.9337452054023743, |
|
"rewards/rejected": -3.5556540489196777, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": 1.7607828378677368, |
|
"logits/rejected": 1.8438133001327515, |
|
"logps/chosen": -535.6881713867188, |
|
"logps/rejected": -615.4736938476562, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7043251991271973, |
|
"rewards/margins": 1.0787712335586548, |
|
"rewards/rejected": -3.7830963134765625, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": 2.0806539058685303, |
|
"logits/rejected": 1.8068488836288452, |
|
"logps/chosen": -544.23291015625, |
|
"logps/rejected": -627.465087890625, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6438353061676025, |
|
"rewards/margins": 0.9974311590194702, |
|
"rewards/rejected": -3.641266345977783, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": 1.998169183731079, |
|
"logits/rejected": 1.928439736366272, |
|
"logps/chosen": -522.8230590820312, |
|
"logps/rejected": -579.4949340820312, |
|
"loss": 0.4773, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.640355110168457, |
|
"rewards/margins": 1.0438846349716187, |
|
"rewards/rejected": -3.6842403411865234, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": 1.564211368560791, |
|
"logits/rejected": 1.8740813732147217, |
|
"logps/chosen": -525.1043090820312, |
|
"logps/rejected": -598.16259765625, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6339075565338135, |
|
"rewards/margins": 1.0636084079742432, |
|
"rewards/rejected": -3.6975159645080566, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": 1.5220708847045898, |
|
"logits/rejected": 1.471215844154358, |
|
"logps/chosen": -577.1598510742188, |
|
"logps/rejected": -613.5559692382812, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.865147829055786, |
|
"rewards/margins": 0.7697300910949707, |
|
"rewards/rejected": -3.6348776817321777, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": 1.776266098022461, |
|
"logits/rejected": 1.908911943435669, |
|
"logps/chosen": -504.511962890625, |
|
"logps/rejected": -586.9796142578125, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7093136310577393, |
|
"rewards/margins": 0.8418199419975281, |
|
"rewards/rejected": -3.551133632659912, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": 1.7001237869262695, |
|
"logits/rejected": 1.5393736362457275, |
|
"logps/chosen": -551.8426513671875, |
|
"logps/rejected": -619.8112182617188, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7164037227630615, |
|
"rewards/margins": 1.046526312828064, |
|
"rewards/rejected": -3.762930393218994, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 2.27836275100708, |
|
"eval_logits/rejected": 2.481665849685669, |
|
"eval_logps/chosen": -527.249267578125, |
|
"eval_logps/rejected": -611.9570922851562, |
|
"eval_loss": 0.4924592673778534, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -2.6194300651550293, |
|
"eval_rewards/margins": 1.0875214338302612, |
|
"eval_rewards/rejected": -3.70695161819458, |
|
"eval_runtime": 449.2776, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": 1.8738467693328857, |
|
"logits/rejected": 1.6231794357299805, |
|
"logps/chosen": -510.99755859375, |
|
"logps/rejected": -603.8427124023438, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.6101596355438232, |
|
"rewards/margins": 1.1430681943893433, |
|
"rewards/rejected": -3.753227710723877, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": 2.0445821285247803, |
|
"logits/rejected": 2.0226082801818848, |
|
"logps/chosen": -511.77435302734375, |
|
"logps/rejected": -593.7333984375, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.699057102203369, |
|
"rewards/margins": 1.1369215250015259, |
|
"rewards/rejected": -3.8359782695770264, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.875, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": 1.337143063545227, |
|
"logits/rejected": 1.4125709533691406, |
|
"logps/chosen": -547.0819091796875, |
|
"logps/rejected": -636.6948852539062, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.638824939727783, |
|
"rewards/margins": 0.9935441017150879, |
|
"rewards/rejected": -3.632368803024292, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": 2.07295298576355, |
|
"logits/rejected": 1.8528960943222046, |
|
"logps/chosen": -519.5410766601562, |
|
"logps/rejected": -614.3350830078125, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.67018461227417, |
|
"rewards/margins": 1.1164127588272095, |
|
"rewards/rejected": -3.7865970134735107, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": 2.217745065689087, |
|
"logits/rejected": 2.1313698291778564, |
|
"logps/chosen": -498.91455078125, |
|
"logps/rejected": -612.3049926757812, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.5271456241607666, |
|
"rewards/margins": 1.1923385858535767, |
|
"rewards/rejected": -3.7194838523864746, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": 1.5606577396392822, |
|
"logits/rejected": 1.8046897649765015, |
|
"logps/chosen": -571.3505859375, |
|
"logps/rejected": -620.4075927734375, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.810910701751709, |
|
"rewards/margins": 0.8456093668937683, |
|
"rewards/rejected": -3.656520128250122, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": 1.7562596797943115, |
|
"logits/rejected": 1.6450916528701782, |
|
"logps/chosen": -557.3902587890625, |
|
"logps/rejected": -651.6214599609375, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7291982173919678, |
|
"rewards/margins": 1.1322641372680664, |
|
"rewards/rejected": -3.861462354660034, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": 1.6876871585845947, |
|
"logits/rejected": 1.8287250995635986, |
|
"logps/chosen": -548.6978759765625, |
|
"logps/rejected": -624.533203125, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.747478723526001, |
|
"rewards/margins": 1.0232455730438232, |
|
"rewards/rejected": -3.770724058151245, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": 1.8295252323150635, |
|
"logits/rejected": 1.6652030944824219, |
|
"logps/chosen": -538.7413330078125, |
|
"logps/rejected": -626.6610717773438, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6121177673339844, |
|
"rewards/margins": 1.0102406740188599, |
|
"rewards/rejected": -3.622358798980713, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.375, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": 1.9342527389526367, |
|
"logits/rejected": 1.8059526681900024, |
|
"logps/chosen": -494.00677490234375, |
|
"logps/rejected": -591.4176025390625, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.608290195465088, |
|
"rewards/margins": 1.0327186584472656, |
|
"rewards/rejected": -3.6410088539123535, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": 2.373870372772217, |
|
"eval_logits/rejected": 2.584756851196289, |
|
"eval_logps/chosen": -527.5073852539062, |
|
"eval_logps/rejected": -612.5421142578125, |
|
"eval_loss": 0.49223849177360535, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -2.622011661529541, |
|
"eval_rewards/margins": 1.09079110622406, |
|
"eval_rewards/rejected": -3.7128028869628906, |
|
"eval_runtime": 450.8468, |
|
"eval_samples_per_second": 4.436, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.0, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": 1.389383316040039, |
|
"logits/rejected": 1.57669198513031, |
|
"logps/chosen": -528.4805297851562, |
|
"logps/rejected": -606.1107177734375, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.586696147918701, |
|
"rewards/margins": 1.0107383728027344, |
|
"rewards/rejected": -3.5974345207214355, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.875, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": 1.4683406352996826, |
|
"logits/rejected": 1.9843635559082031, |
|
"logps/chosen": -538.0323486328125, |
|
"logps/rejected": -633.2237548828125, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.590121269226074, |
|
"rewards/margins": 1.0882453918457031, |
|
"rewards/rejected": -3.6783668994903564, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": 2.1134185791015625, |
|
"logits/rejected": 2.00264310836792, |
|
"logps/chosen": -518.1842651367188, |
|
"logps/rejected": -585.181640625, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.550919532775879, |
|
"rewards/margins": 1.059746503829956, |
|
"rewards/rejected": -3.610666275024414, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": 1.5053377151489258, |
|
"logits/rejected": 1.8114296197891235, |
|
"logps/chosen": -503.01202392578125, |
|
"logps/rejected": -604.1715087890625, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.5767312049865723, |
|
"rewards/margins": 1.0214576721191406, |
|
"rewards/rejected": -3.598188877105713, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": 1.5970503091812134, |
|
"logits/rejected": 2.1210780143737793, |
|
"logps/chosen": -524.966796875, |
|
"logps/rejected": -603.1199951171875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6611809730529785, |
|
"rewards/margins": 1.0476148128509521, |
|
"rewards/rejected": -3.7087955474853516, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": 1.453554391860962, |
|
"logits/rejected": 1.9258426427841187, |
|
"logps/chosen": -492.60064697265625, |
|
"logps/rejected": -562.98095703125, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.3589653968811035, |
|
"rewards/margins": 1.078311562538147, |
|
"rewards/rejected": -3.437276840209961, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": 1.5613019466400146, |
|
"logits/rejected": 2.030231237411499, |
|
"logps/chosen": -481.99029541015625, |
|
"logps/rejected": -551.4531860351562, |
|
"loss": 0.4448, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3367221355438232, |
|
"rewards/margins": 1.1135777235031128, |
|
"rewards/rejected": -3.4502997398376465, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": 1.846142053604126, |
|
"logits/rejected": 2.0114002227783203, |
|
"logps/chosen": -539.5140380859375, |
|
"logps/rejected": -590.5936279296875, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5811474323272705, |
|
"rewards/margins": 1.007107138633728, |
|
"rewards/rejected": -3.588254451751709, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.125, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": 1.6992515325546265, |
|
"logits/rejected": 1.6688053607940674, |
|
"logps/chosen": -487.58135986328125, |
|
"logps/rejected": -601.6525268554688, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4629733562469482, |
|
"rewards/margins": 1.129070520401001, |
|
"rewards/rejected": -3.59204363822937, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": 1.7125355005264282, |
|
"logits/rejected": 1.963112235069275, |
|
"logps/chosen": -518.4107055664062, |
|
"logps/rejected": -589.79541015625, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.61944580078125, |
|
"rewards/margins": 0.8665763735771179, |
|
"rewards/rejected": -3.4860222339630127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 2.2803421020507812, |
|
"eval_logits/rejected": 2.4955389499664307, |
|
"eval_logps/chosen": -516.6763305664062, |
|
"eval_logps/rejected": -600.9805297851562, |
|
"eval_loss": 0.4925038516521454, |
|
"eval_rewards/accuracies": 0.7509999871253967, |
|
"eval_rewards/chosen": -2.5137012004852295, |
|
"eval_rewards/margins": 1.0834852457046509, |
|
"eval_rewards/rejected": -3.597186326980591, |
|
"eval_runtime": 449.4345, |
|
"eval_samples_per_second": 4.45, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": 1.7167911529541016, |
|
"logits/rejected": 1.6851142644882202, |
|
"logps/chosen": -496.5397033691406, |
|
"logps/rejected": -586.4037475585938, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.6186347007751465, |
|
"rewards/margins": 0.9449771046638489, |
|
"rewards/rejected": -3.5636115074157715, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 9.75, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": 1.7546532154083252, |
|
"logits/rejected": 1.7746025323867798, |
|
"logps/chosen": -486.4024353027344, |
|
"logps/rejected": -575.5360717773438, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5622782707214355, |
|
"rewards/margins": 0.9538448452949524, |
|
"rewards/rejected": -3.5161232948303223, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 12.375, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": 1.7500112056732178, |
|
"logits/rejected": 1.6894385814666748, |
|
"logps/chosen": -503.01727294921875, |
|
"logps/rejected": -568.9468994140625, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.571544647216797, |
|
"rewards/margins": 0.7718718647956848, |
|
"rewards/rejected": -3.343416690826416, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": 1.3868858814239502, |
|
"logits/rejected": 1.7116378545761108, |
|
"logps/chosen": -516.5772705078125, |
|
"logps/rejected": -601.1525268554688, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.530223846435547, |
|
"rewards/margins": 0.9710138440132141, |
|
"rewards/rejected": -3.5012378692626953, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": 1.8670718669891357, |
|
"logits/rejected": 2.1231729984283447, |
|
"logps/chosen": -528.6818237304688, |
|
"logps/rejected": -598.3047485351562, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.472653388977051, |
|
"rewards/margins": 1.0345518589019775, |
|
"rewards/rejected": -3.5072052478790283, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": 1.5151128768920898, |
|
"logits/rejected": 1.9168331623077393, |
|
"logps/chosen": -532.9383544921875, |
|
"logps/rejected": -606.9699096679688, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.455781936645508, |
|
"rewards/margins": 1.086609125137329, |
|
"rewards/rejected": -3.542390823364258, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.625, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": 1.9394729137420654, |
|
"logits/rejected": 1.6315431594848633, |
|
"logps/chosen": -506.2244567871094, |
|
"logps/rejected": -598.549072265625, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4361801147460938, |
|
"rewards/margins": 0.9389766454696655, |
|
"rewards/rejected": -3.3751564025878906, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": 1.3666921854019165, |
|
"logits/rejected": 1.5760104656219482, |
|
"logps/chosen": -513.62646484375, |
|
"logps/rejected": -581.5886840820312, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.499276638031006, |
|
"rewards/margins": 0.931717574596405, |
|
"rewards/rejected": -3.4309940338134766, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": 1.6720809936523438, |
|
"logits/rejected": 1.689178228378296, |
|
"logps/chosen": -539.5501098632812, |
|
"logps/rejected": -618.399169921875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.4603431224823, |
|
"rewards/margins": 1.1464570760726929, |
|
"rewards/rejected": -3.6068005561828613, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": 1.6739721298217773, |
|
"logits/rejected": 1.4370070695877075, |
|
"logps/chosen": -520.3096923828125, |
|
"logps/rejected": -609.7615966796875, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.5945487022399902, |
|
"rewards/margins": 1.0985443592071533, |
|
"rewards/rejected": -3.6930930614471436, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 2.3031065464019775, |
|
"eval_logits/rejected": 2.5159618854522705, |
|
"eval_logps/chosen": -515.6121826171875, |
|
"eval_logps/rejected": -599.6626586914062, |
|
"eval_loss": 0.49199017882347107, |
|
"eval_rewards/accuracies": 0.7549999952316284, |
|
"eval_rewards/chosen": -2.5030598640441895, |
|
"eval_rewards/margins": 1.0809484720230103, |
|
"eval_rewards/rejected": -3.58400821685791, |
|
"eval_runtime": 449.2811, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": 1.7647104263305664, |
|
"logits/rejected": 1.7470661401748657, |
|
"logps/chosen": -510.3075256347656, |
|
"logps/rejected": -584.53076171875, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5620687007904053, |
|
"rewards/margins": 0.9733829498291016, |
|
"rewards/rejected": -3.5354514122009277, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": 2.056183338165283, |
|
"logits/rejected": 1.862571358680725, |
|
"logps/chosen": -492.4064025878906, |
|
"logps/rejected": -592.354248046875, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4460253715515137, |
|
"rewards/margins": 1.100265622138977, |
|
"rewards/rejected": -3.5462913513183594, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.125, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": 2.063292980194092, |
|
"logits/rejected": 1.9309425354003906, |
|
"logps/chosen": -481.56402587890625, |
|
"logps/rejected": -577.6758422851562, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.4606540203094482, |
|
"rewards/margins": 1.0394421815872192, |
|
"rewards/rejected": -3.500096082687378, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": 1.4203405380249023, |
|
"logits/rejected": 2.2476806640625, |
|
"logps/chosen": -489.14306640625, |
|
"logps/rejected": -624.822265625, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6157355308532715, |
|
"rewards/margins": 1.1121528148651123, |
|
"rewards/rejected": -3.7278881072998047, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": 2.0200281143188477, |
|
"logits/rejected": 2.246073007583618, |
|
"logps/chosen": -531.7589111328125, |
|
"logps/rejected": -600.7255859375, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6462159156799316, |
|
"rewards/margins": 1.0438668727874756, |
|
"rewards/rejected": -3.6900830268859863, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": 1.8874080181121826, |
|
"logits/rejected": 1.6664152145385742, |
|
"logps/chosen": -526.5037231445312, |
|
"logps/rejected": -639.0104370117188, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.4748384952545166, |
|
"rewards/margins": 1.4183952808380127, |
|
"rewards/rejected": -3.89323353767395, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.625, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": 1.7972297668457031, |
|
"logits/rejected": 1.9597629308700562, |
|
"logps/chosen": -512.8606567382812, |
|
"logps/rejected": -580.591796875, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.616697311401367, |
|
"rewards/margins": 0.8005778193473816, |
|
"rewards/rejected": -3.4172751903533936, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": 1.7437160015106201, |
|
"logits/rejected": 1.9117034673690796, |
|
"logps/chosen": -505.4112243652344, |
|
"logps/rejected": -593.5841674804688, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5735456943511963, |
|
"rewards/margins": 1.0469127893447876, |
|
"rewards/rejected": -3.6204586029052734, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": 1.6604769229888916, |
|
"logits/rejected": 1.3407343626022339, |
|
"logps/chosen": -500.91851806640625, |
|
"logps/rejected": -603.554931640625, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.477205514907837, |
|
"rewards/margins": 1.0765531063079834, |
|
"rewards/rejected": -3.553758144378662, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": 1.9356000423431396, |
|
"logits/rejected": 1.387791395187378, |
|
"logps/chosen": -526.5845947265625, |
|
"logps/rejected": -615.2680053710938, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5586600303649902, |
|
"rewards/margins": 1.0848100185394287, |
|
"rewards/rejected": -3.643470048904419, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 2.298227548599243, |
|
"eval_logits/rejected": 2.511512517929077, |
|
"eval_logps/chosen": -516.33203125, |
|
"eval_logps/rejected": -600.2826538085938, |
|
"eval_loss": 0.49213770031929016, |
|
"eval_rewards/accuracies": 0.7549999952316284, |
|
"eval_rewards/chosen": -2.510258197784424, |
|
"eval_rewards/margins": 1.0799494981765747, |
|
"eval_rewards/rejected": -3.590207576751709, |
|
"eval_runtime": 449.189, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": 1.7006988525390625, |
|
"logits/rejected": 1.82578444480896, |
|
"logps/chosen": -488.0613708496094, |
|
"logps/rejected": -576.953857421875, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.424391984939575, |
|
"rewards/margins": 1.1633926630020142, |
|
"rewards/rejected": -3.5877845287323, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": 1.462934136390686, |
|
"logits/rejected": 1.820743203163147, |
|
"logps/chosen": -547.2937622070312, |
|
"logps/rejected": -571.7078857421875, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.642662286758423, |
|
"rewards/margins": 0.6700358986854553, |
|
"rewards/rejected": -3.3126978874206543, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": 1.7907625436782837, |
|
"logits/rejected": 2.133568048477173, |
|
"logps/chosen": -494.36236572265625, |
|
"logps/rejected": -579.52587890625, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.377741813659668, |
|
"rewards/margins": 1.148186445236206, |
|
"rewards/rejected": -3.525928020477295, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": 1.5523009300231934, |
|
"logits/rejected": 1.7978283166885376, |
|
"logps/chosen": -545.3399658203125, |
|
"logps/rejected": -616.2483520507812, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.570333957672119, |
|
"rewards/margins": 1.0563600063323975, |
|
"rewards/rejected": -3.6266937255859375, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": 1.7202790975570679, |
|
"logits/rejected": 1.7891725301742554, |
|
"logps/chosen": -553.4903564453125, |
|
"logps/rejected": -618.0564575195312, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.56032395362854, |
|
"rewards/margins": 1.0462162494659424, |
|
"rewards/rejected": -3.6065402030944824, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": 1.5807321071624756, |
|
"logits/rejected": 1.6664190292358398, |
|
"logps/chosen": -529.1962890625, |
|
"logps/rejected": -612.3154907226562, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.5308566093444824, |
|
"rewards/margins": 1.0107314586639404, |
|
"rewards/rejected": -3.541588306427002, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": 1.7959083318710327, |
|
"logits/rejected": 1.607792854309082, |
|
"logps/chosen": -529.5757446289062, |
|
"logps/rejected": -606.5616455078125, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.5840096473693848, |
|
"rewards/margins": 0.845914363861084, |
|
"rewards/rejected": -3.4299240112304688, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": 1.670596718788147, |
|
"logits/rejected": 1.6237586736679077, |
|
"logps/chosen": -534.3974609375, |
|
"logps/rejected": -604.6273803710938, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.6218812465667725, |
|
"rewards/margins": 0.8884350061416626, |
|
"rewards/rejected": -3.5103163719177246, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 6.5, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": 1.671463966369629, |
|
"logits/rejected": 1.5730221271514893, |
|
"logps/chosen": -530.3741455078125, |
|
"logps/rejected": -586.624755859375, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.395474672317505, |
|
"rewards/margins": 1.0592764616012573, |
|
"rewards/rejected": -3.4547507762908936, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": 1.872854232788086, |
|
"logits/rejected": 1.524829626083374, |
|
"logps/chosen": -533.9439697265625, |
|
"logps/rejected": -585.8349609375, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.592789649963379, |
|
"rewards/margins": 0.8945242762565613, |
|
"rewards/rejected": -3.487313747406006, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 2.297112464904785, |
|
"eval_logits/rejected": 2.5098061561584473, |
|
"eval_logps/chosen": -516.2830810546875, |
|
"eval_logps/rejected": -600.2637939453125, |
|
"eval_loss": 0.49207010865211487, |
|
"eval_rewards/accuracies": 0.7549999952316284, |
|
"eval_rewards/chosen": -2.509768486022949, |
|
"eval_rewards/margins": 1.0802510976791382, |
|
"eval_rewards/rejected": -3.590019464492798, |
|
"eval_runtime": 451.0621, |
|
"eval_samples_per_second": 4.434, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 1.4962704181671143, |
|
"logits/rejected": 1.806806206703186, |
|
"logps/chosen": -554.9898681640625, |
|
"logps/rejected": -627.3148803710938, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5435588359832764, |
|
"rewards/margins": 1.075156569480896, |
|
"rewards/rejected": -3.618715763092041, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.528951391499704, |
|
"train_runtime": 37787.7404, |
|
"train_samples_per_second": 1.618, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|