|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 2080.0, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 1.6525596380233765, |
|
"log_odds_ratio": -11.146315574645996, |
|
"logps/chosen": -21.964563369750977, |
|
"logps/rejected": -23.61678695678711, |
|
"loss": 723.9557, |
|
"nll_loss": 8.641968727111816, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -4.392912864685059, |
|
"rewards/margins": 0.33044466376304626, |
|
"rewards/rejected": -4.723357677459717, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 816.0, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 1.0747610330581665, |
|
"log_odds_ratio": -8.308441162109375, |
|
"logps/chosen": -19.65680503845215, |
|
"logps/rejected": -20.730966567993164, |
|
"loss": 646.4148, |
|
"nll_loss": 7.853402614593506, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -3.931361436843872, |
|
"rewards/margins": 0.21483144164085388, |
|
"rewards/rejected": -4.14619255065918, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 1408.0, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 4.371163368225098, |
|
"log_odds_ratio": -6.787731170654297, |
|
"logps/chosen": -17.956409454345703, |
|
"logps/rejected": -22.32614517211914, |
|
"loss": 504.665, |
|
"nll_loss": 8.193029403686523, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -3.591282367706299, |
|
"rewards/margins": 0.8739474415779114, |
|
"rewards/rejected": -4.465229511260986, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 10112.0, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": 0.567130446434021, |
|
"log_odds_ratio": -6.8672003746032715, |
|
"logps/chosen": -15.55059814453125, |
|
"logps/rejected": -16.119054794311523, |
|
"loss": 520.7164, |
|
"nll_loss": 7.535510063171387, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -3.110119581222534, |
|
"rewards/margins": 0.11369138956069946, |
|
"rewards/rejected": -3.223810911178589, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 960.0, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 1.732701063156128, |
|
"log_odds_ratio": -2.4201536178588867, |
|
"logps/chosen": -6.048478603363037, |
|
"logps/rejected": -7.7686004638671875, |
|
"loss": 145.2979, |
|
"nll_loss": 3.8266708850860596, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.2096956968307495, |
|
"rewards/margins": 0.34402433037757874, |
|
"rewards/rejected": -1.553720235824585, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 266.0, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.1403735727071762, |
|
"log_odds_ratio": -0.8303444981575012, |
|
"logps/chosen": -1.8173532485961914, |
|
"logps/rejected": -1.9406824111938477, |
|
"loss": 58.133, |
|
"nll_loss": 2.613788604736328, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.3634707033634186, |
|
"rewards/margins": 0.024665823206305504, |
|
"rewards/rejected": -0.38813653588294983, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 208.0, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.187073215842247, |
|
"log_odds_ratio": -0.7795676589012146, |
|
"logps/chosen": -1.5736640691757202, |
|
"logps/rejected": -1.744511604309082, |
|
"loss": 50.3278, |
|
"nll_loss": 2.258103370666504, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3147328495979309, |
|
"rewards/margins": 0.0341695174574852, |
|
"rewards/rejected": -0.3489023745059967, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 178.0, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.2348749339580536, |
|
"log_odds_ratio": -0.7139513492584229, |
|
"logps/chosen": -1.419406533241272, |
|
"logps/rejected": -1.6171458959579468, |
|
"loss": 45.351, |
|
"nll_loss": 2.1722681522369385, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.2838813066482544, |
|
"rewards/margins": 0.039547890424728394, |
|
"rewards/rejected": -0.3234291970729828, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 202.0, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.14743809401988983, |
|
"log_odds_ratio": -0.763064980506897, |
|
"logps/chosen": -1.3785412311553955, |
|
"logps/rejected": -1.4916046857833862, |
|
"loss": 44.0902, |
|
"nll_loss": 2.1061487197875977, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.27570822834968567, |
|
"rewards/margins": 0.02261272445321083, |
|
"rewards/rejected": -0.2983209490776062, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 278.0, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.20397412776947021, |
|
"log_odds_ratio": -0.7111414670944214, |
|
"logps/chosen": -1.3394975662231445, |
|
"logps/rejected": -1.5087169408798218, |
|
"loss": 42.8501, |
|
"nll_loss": 2.211090564727783, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.2678995132446289, |
|
"rewards/margins": 0.03384386748075485, |
|
"rewards/rejected": -0.30174335837364197, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 92.0, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.1584581434726715, |
|
"log_odds_ratio": -0.7220534682273865, |
|
"logps/chosen": -1.243898868560791, |
|
"logps/rejected": -1.3692307472229004, |
|
"loss": 39.7973, |
|
"nll_loss": 2.0482659339904785, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.24877974390983582, |
|
"rewards/margins": 0.02506640926003456, |
|
"rewards/rejected": -0.2738461494445801, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 113.0, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.22705142199993134, |
|
"log_odds_ratio": -0.6849401593208313, |
|
"logps/chosen": -1.1941479444503784, |
|
"logps/rejected": -1.3716175556182861, |
|
"loss": 38.2246, |
|
"nll_loss": 2.0026328563690186, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2388295829296112, |
|
"rewards/margins": 0.03549391031265259, |
|
"rewards/rejected": -0.2743235230445862, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 96.5, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.2287750244140625, |
|
"log_odds_ratio": -0.6806913614273071, |
|
"logps/chosen": -1.1876500844955444, |
|
"logps/rejected": -1.3565670251846313, |
|
"loss": 37.995, |
|
"nll_loss": 1.997527837753296, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.23752999305725098, |
|
"rewards/margins": 0.033783383667469025, |
|
"rewards/rejected": -0.2713133692741394, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 138.0, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.25641298294067383, |
|
"log_odds_ratio": -0.6760451197624207, |
|
"logps/chosen": -1.146054983139038, |
|
"logps/rejected": -1.3470946550369263, |
|
"loss": 36.6471, |
|
"nll_loss": 2.0368056297302246, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.2292109727859497, |
|
"rewards/margins": 0.040207941085100174, |
|
"rewards/rejected": -0.2694189250469208, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 207.0, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.27950650453567505, |
|
"log_odds_ratio": -0.6581438779830933, |
|
"logps/chosen": -1.084201693534851, |
|
"logps/rejected": -1.2829262018203735, |
|
"loss": 34.6792, |
|
"nll_loss": 1.9775358438491821, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.21684034168720245, |
|
"rewards/margins": 0.03974488750100136, |
|
"rewards/rejected": -0.2565852105617523, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 117.0, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.21956244111061096, |
|
"log_odds_ratio": -0.6832794547080994, |
|
"logps/chosen": -1.1284596920013428, |
|
"logps/rejected": -1.2831165790557861, |
|
"loss": 36.0978, |
|
"nll_loss": 1.9256585836410522, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.22569191455841064, |
|
"rewards/margins": 0.03093140758574009, |
|
"rewards/rejected": -0.2566233277320862, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 136.0, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.28352534770965576, |
|
"log_odds_ratio": -0.6638063192367554, |
|
"logps/chosen": -1.0836379528045654, |
|
"logps/rejected": -1.290060043334961, |
|
"loss": 34.6599, |
|
"nll_loss": 1.8572845458984375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.216727614402771, |
|
"rewards/margins": 0.041284408420324326, |
|
"rewards/rejected": -0.25801199674606323, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 225.0, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.24633805453777313, |
|
"log_odds_ratio": -0.6877402067184448, |
|
"logps/chosen": -1.1121197938919067, |
|
"logps/rejected": -1.294413447380066, |
|
"loss": 35.4938, |
|
"nll_loss": 2.044349431991577, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.22242394089698792, |
|
"rewards/margins": 0.03645877167582512, |
|
"rewards/rejected": -0.2588827311992645, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 70.5, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.22184424102306366, |
|
"log_odds_ratio": -0.673058271408081, |
|
"logps/chosen": -1.054860234260559, |
|
"logps/rejected": -1.2138173580169678, |
|
"loss": 33.7487, |
|
"nll_loss": 1.8830925226211548, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.21097204089164734, |
|
"rewards/margins": 0.03179146721959114, |
|
"rewards/rejected": -0.24276351928710938, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 117.0, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.1458374559879303, |
|
"log_odds_ratio": -0.7220640182495117, |
|
"logps/chosen": -1.08286714553833, |
|
"logps/rejected": -1.2099539041519165, |
|
"loss": 34.64, |
|
"nll_loss": 1.8595941066741943, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.21657343208789825, |
|
"rewards/margins": 0.025417348369956017, |
|
"rewards/rejected": -0.24199077486991882, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 116.0, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.21829000115394592, |
|
"log_odds_ratio": -0.665600597858429, |
|
"logps/chosen": -1.0344531536102295, |
|
"logps/rejected": -1.1940683126449585, |
|
"loss": 33.0967, |
|
"nll_loss": 1.769559621810913, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20689065754413605, |
|
"rewards/margins": 0.03192301467061043, |
|
"rewards/rejected": -0.23881368339061737, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 69.5, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.536843478679657, |
|
"log_odds_ratio": -0.5678494572639465, |
|
"logps/chosen": -0.9168744087219238, |
|
"logps/rejected": -1.270845890045166, |
|
"loss": 29.3055, |
|
"nll_loss": 1.7102380990982056, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.18337486684322357, |
|
"rewards/margins": 0.07079432904720306, |
|
"rewards/rejected": -0.25416916608810425, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 69.0, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.6579151153564453, |
|
"log_odds_ratio": -0.5232337117195129, |
|
"logps/chosen": -0.8788490295410156, |
|
"logps/rejected": -1.3019859790802002, |
|
"loss": 28.063, |
|
"nll_loss": 1.8311777114868164, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.17576980590820312, |
|
"rewards/margins": 0.08462737500667572, |
|
"rewards/rejected": -0.26039719581604004, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 78.5, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.6495614051818848, |
|
"log_odds_ratio": -0.5208151340484619, |
|
"logps/chosen": -0.870018482208252, |
|
"logps/rejected": -1.280542254447937, |
|
"loss": 27.8228, |
|
"nll_loss": 2.2432708740234375, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.1740036904811859, |
|
"rewards/margins": 0.08210476487874985, |
|
"rewards/rejected": -0.25610846281051636, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 52.0, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.7151899933815002, |
|
"log_odds_ratio": -0.5032771229743958, |
|
"logps/chosen": -0.8410484194755554, |
|
"logps/rejected": -1.3060276508331299, |
|
"loss": 26.8671, |
|
"nll_loss": 2.1479263305664062, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16820970177650452, |
|
"rewards/margins": 0.09299584478139877, |
|
"rewards/rejected": -0.2612055540084839, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 103.5, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.6010386347770691, |
|
"log_odds_ratio": -0.5377737283706665, |
|
"logps/chosen": -0.8195842504501343, |
|
"logps/rejected": -1.166416049003601, |
|
"loss": 26.2153, |
|
"nll_loss": 1.8921180963516235, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.16391684114933014, |
|
"rewards/margins": 0.06936636567115784, |
|
"rewards/rejected": -0.23328320682048798, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 47.75, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.7158576846122742, |
|
"log_odds_ratio": -0.4928209185600281, |
|
"logps/chosen": -0.83746337890625, |
|
"logps/rejected": -1.288947343826294, |
|
"loss": 26.7787, |
|
"nll_loss": 1.8713912963867188, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.16749267280101776, |
|
"rewards/margins": 0.09029679000377655, |
|
"rewards/rejected": -0.2577894330024719, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 306.0, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.5670473575592041, |
|
"log_odds_ratio": -0.5594123005867004, |
|
"logps/chosen": -0.8860000371932983, |
|
"logps/rejected": -1.2509949207305908, |
|
"loss": 28.3339, |
|
"nll_loss": 1.914190649986267, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.17720001935958862, |
|
"rewards/margins": 0.07299896329641342, |
|
"rewards/rejected": -0.25019896030426025, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 51.75, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.7294235229492188, |
|
"log_odds_ratio": -0.4973994195461273, |
|
"logps/chosen": -0.8396116495132446, |
|
"logps/rejected": -1.3140604496002197, |
|
"loss": 26.8217, |
|
"nll_loss": 1.7945051193237305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16792233288288116, |
|
"rewards/margins": 0.09488976746797562, |
|
"rewards/rejected": -0.262812077999115, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 117.5, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.547488808631897, |
|
"log_odds_ratio": -0.5519485473632812, |
|
"logps/chosen": -0.810029149055481, |
|
"logps/rejected": -1.1416094303131104, |
|
"loss": 25.9107, |
|
"nll_loss": 1.7234855890274048, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.16200582683086395, |
|
"rewards/margins": 0.06631605327129364, |
|
"rewards/rejected": -0.2283218801021576, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 95.5, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.6218387484550476, |
|
"log_odds_ratio": -0.5375393629074097, |
|
"logps/chosen": -0.8497333526611328, |
|
"logps/rejected": -1.2327052354812622, |
|
"loss": 27.165, |
|
"nll_loss": 1.761959433555603, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16994668543338776, |
|
"rewards/margins": 0.07659433782100677, |
|
"rewards/rejected": -0.24654105305671692, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 87.5, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.6225556135177612, |
|
"log_odds_ratio": -0.521335244178772, |
|
"logps/chosen": -0.8453457951545715, |
|
"logps/rejected": -1.211041808128357, |
|
"loss": 27.0398, |
|
"nll_loss": 1.7928802967071533, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16906917095184326, |
|
"rewards/margins": 0.07313917577266693, |
|
"rewards/rejected": -0.242208331823349, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 53.25, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.7242995500564575, |
|
"log_odds_ratio": -0.5051379799842834, |
|
"logps/chosen": -0.8265692591667175, |
|
"logps/rejected": -1.2654168605804443, |
|
"loss": 26.4304, |
|
"nll_loss": 1.7467893362045288, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.16531386971473694, |
|
"rewards/margins": 0.0877695307135582, |
|
"rewards/rejected": -0.25308337807655334, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 60.25, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 1.2396129369735718, |
|
"log_odds_ratio": -0.5153141021728516, |
|
"logps/chosen": -0.8173978924751282, |
|
"logps/rejected": -1.8096736669540405, |
|
"loss": 17.6018, |
|
"nll_loss": 2.2086150646209717, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16347956657409668, |
|
"rewards/margins": 0.19845522940158844, |
|
"rewards/rejected": -0.3619347810745239, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 93.0, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.6574938893318176, |
|
"log_odds_ratio": -0.5215548872947693, |
|
"logps/chosen": -0.8096542358398438, |
|
"logps/rejected": -1.212336778640747, |
|
"loss": 25.8867, |
|
"nll_loss": 1.7505098581314087, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.1619308441877365, |
|
"rewards/margins": 0.08053652942180634, |
|
"rewards/rejected": -0.24246735870838165, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 107.5, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.5455656051635742, |
|
"log_odds_ratio": -0.5534049272537231, |
|
"logps/chosen": -0.8489178419113159, |
|
"logps/rejected": -1.1742994785308838, |
|
"loss": 27.1545, |
|
"nll_loss": 1.7829153537750244, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.1697835773229599, |
|
"rewards/margins": 0.0650763288140297, |
|
"rewards/rejected": -0.234859898686409, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 55.0, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.6557157039642334, |
|
"log_odds_ratio": -0.517691969871521, |
|
"logps/chosen": -0.8089399337768555, |
|
"logps/rejected": -1.222130298614502, |
|
"loss": 25.8522, |
|
"nll_loss": 1.716506004333496, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1617879867553711, |
|
"rewards/margins": 0.08263807743787766, |
|
"rewards/rejected": -0.24442608654499054, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 48.5, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.597545862197876, |
|
"log_odds_ratio": -0.5755712985992432, |
|
"logps/chosen": -0.9348888397216797, |
|
"logps/rejected": -1.3086684942245483, |
|
"loss": 29.841, |
|
"nll_loss": 1.7821638584136963, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.18697777390480042, |
|
"rewards/margins": 0.07475592195987701, |
|
"rewards/rejected": -0.2617337107658386, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 69.0, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.6737447381019592, |
|
"log_odds_ratio": -0.5273191332817078, |
|
"logps/chosen": -0.8384972810745239, |
|
"logps/rejected": -1.252629041671753, |
|
"loss": 26.7925, |
|
"nll_loss": 1.7104995250701904, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1676994413137436, |
|
"rewards/margins": 0.08282637596130371, |
|
"rewards/rejected": -0.2505258023738861, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 83.0, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.6040568351745605, |
|
"log_odds_ratio": -0.5628523230552673, |
|
"logps/chosen": -0.8518487215042114, |
|
"logps/rejected": -1.216110348701477, |
|
"loss": 27.2338, |
|
"nll_loss": 1.7693039178848267, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17036975920200348, |
|
"rewards/margins": 0.07285232841968536, |
|
"rewards/rejected": -0.24322207272052765, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 63.0, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.5323644876480103, |
|
"log_odds_ratio": -0.5567813515663147, |
|
"logps/chosen": -0.8089507818222046, |
|
"logps/rejected": -1.1334668397903442, |
|
"loss": 25.8316, |
|
"nll_loss": 1.6225645542144775, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.1617901623249054, |
|
"rewards/margins": 0.06490319967269897, |
|
"rewards/rejected": -0.22669336199760437, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 44.25, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.667856752872467, |
|
"log_odds_ratio": -0.5315747261047363, |
|
"logps/chosen": -0.9115372896194458, |
|
"logps/rejected": -1.3377487659454346, |
|
"loss": 28.3513, |
|
"nll_loss": 1.766261100769043, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.18230745196342468, |
|
"rewards/margins": 0.08524232357740402, |
|
"rewards/rejected": -0.2675497531890869, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 37.5, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 0.9186259508132935, |
|
"log_odds_ratio": -0.4572061598300934, |
|
"logps/chosen": -0.7931280136108398, |
|
"logps/rejected": -1.2958561182022095, |
|
"loss": 25.3378, |
|
"nll_loss": 1.6770355701446533, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.15862558782100677, |
|
"rewards/margins": 0.10054562240839005, |
|
"rewards/rejected": -0.2591712176799774, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 55.0, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.4065123796463013, |
|
"log_odds_ratio": -0.3432285785675049, |
|
"logps/chosen": -0.5906342267990112, |
|
"logps/rejected": -1.3782751560211182, |
|
"loss": 18.7957, |
|
"nll_loss": 1.6941601037979126, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.11812685430049896, |
|
"rewards/margins": 0.15752817690372467, |
|
"rewards/rejected": -0.275655061006546, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 42.5, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.176323652267456, |
|
"log_odds_ratio": -0.37496405839920044, |
|
"logps/chosen": -0.6447511911392212, |
|
"logps/rejected": -1.292754054069519, |
|
"loss": 20.5752, |
|
"nll_loss": 1.6440188884735107, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.12895023822784424, |
|
"rewards/margins": 0.12960059940814972, |
|
"rewards/rejected": -0.25855082273483276, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 44.75, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.1741787195205688, |
|
"log_odds_ratio": -0.37807196378707886, |
|
"logps/chosen": -0.6166914105415344, |
|
"logps/rejected": -1.2579196691513062, |
|
"loss": 19.7048, |
|
"nll_loss": 1.6379148960113525, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.12333826720714569, |
|
"rewards/margins": 0.12824568152427673, |
|
"rewards/rejected": -0.2515839636325836, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 48.0, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.133759617805481, |
|
"log_odds_ratio": -0.3960319459438324, |
|
"logps/chosen": -0.6456653475761414, |
|
"logps/rejected": -1.2446954250335693, |
|
"loss": 20.6347, |
|
"nll_loss": 1.6851946115493774, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.12913307547569275, |
|
"rewards/margins": 0.11980599164962769, |
|
"rewards/rejected": -0.24893908202648163, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 43.75, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.1987093687057495, |
|
"log_odds_ratio": -0.3707556426525116, |
|
"logps/chosen": -0.6308638453483582, |
|
"logps/rejected": -1.2778990268707275, |
|
"loss": 20.1137, |
|
"nll_loss": 1.6404365301132202, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.1261727660894394, |
|
"rewards/margins": 0.12940703332424164, |
|
"rewards/rejected": -0.25557979941368103, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 39.75, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.2112338542938232, |
|
"log_odds_ratio": -0.36770302057266235, |
|
"logps/chosen": -0.6155804395675659, |
|
"logps/rejected": -1.273896336555481, |
|
"loss": 19.6548, |
|
"nll_loss": 1.6040033102035522, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12311609834432602, |
|
"rewards/margins": 0.13166317343711853, |
|
"rewards/rejected": -0.25477927923202515, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 47.5, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 1.3808326721191406, |
|
"log_odds_ratio": -0.33250147104263306, |
|
"logps/chosen": -0.5851857662200928, |
|
"logps/rejected": -1.349930763244629, |
|
"loss": 18.6798, |
|
"nll_loss": 1.6684551239013672, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.11703716218471527, |
|
"rewards/margins": 0.1529490053653717, |
|
"rewards/rejected": -0.2699861526489258, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 39.0, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 1.3011033535003662, |
|
"log_odds_ratio": -0.3556652069091797, |
|
"logps/chosen": -0.5785160064697266, |
|
"logps/rejected": -1.3025860786437988, |
|
"loss": 18.4302, |
|
"nll_loss": 1.6226260662078857, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11570320278406143, |
|
"rewards/margins": 0.14481402933597565, |
|
"rewards/rejected": -0.2605172097682953, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 45.0, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.2299237251281738, |
|
"log_odds_ratio": -0.37351006269454956, |
|
"logps/chosen": -0.616021454334259, |
|
"logps/rejected": -1.2757210731506348, |
|
"loss": 19.6774, |
|
"nll_loss": 1.6261924505233765, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.1232042908668518, |
|
"rewards/margins": 0.13193991780281067, |
|
"rewards/rejected": -0.25514426827430725, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 49.0, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.2174187898635864, |
|
"log_odds_ratio": -0.38570135831832886, |
|
"logps/chosen": -0.6393834948539734, |
|
"logps/rejected": -1.3296916484832764, |
|
"loss": 20.402, |
|
"nll_loss": 1.623984932899475, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.12787671387195587, |
|
"rewards/margins": 0.13806161284446716, |
|
"rewards/rejected": -0.26593831181526184, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 54.0, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.3414523601531982, |
|
"log_odds_ratio": -0.3324301540851593, |
|
"logps/chosen": -0.5961578488349915, |
|
"logps/rejected": -1.3138689994812012, |
|
"loss": 19.0308, |
|
"nll_loss": 1.646997094154358, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11923156678676605, |
|
"rewards/margins": 0.14354220032691956, |
|
"rewards/rejected": -0.2627738118171692, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 53.0, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 1.2440980672836304, |
|
"log_odds_ratio": -0.3758939504623413, |
|
"logps/chosen": -0.6397903561592102, |
|
"logps/rejected": -1.3174664974212646, |
|
"loss": 20.3568, |
|
"nll_loss": 1.6285263299942017, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.12795805931091309, |
|
"rewards/margins": 0.13553521037101746, |
|
"rewards/rejected": -0.26349326968193054, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 45.25, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 1.3440355062484741, |
|
"log_odds_ratio": -0.3526305556297302, |
|
"logps/chosen": -0.5672257542610168, |
|
"logps/rejected": -1.2784881591796875, |
|
"loss": 18.1008, |
|
"nll_loss": 1.6513664722442627, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.11344514042139053, |
|
"rewards/margins": 0.14225247502326965, |
|
"rewards/rejected": -0.255697637796402, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 95.0, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.297435998916626, |
|
"log_odds_ratio": -0.3628445267677307, |
|
"logps/chosen": -0.6076027154922485, |
|
"logps/rejected": -1.3181374073028564, |
|
"loss": 19.2798, |
|
"nll_loss": 1.665822982788086, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12152054160833359, |
|
"rewards/margins": 0.14210695028305054, |
|
"rewards/rejected": -0.26362746953964233, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 52.5, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.2773778438568115, |
|
"log_odds_ratio": -0.35668572783470154, |
|
"logps/chosen": -0.6447194218635559, |
|
"logps/rejected": -1.3343534469604492, |
|
"loss": 20.5351, |
|
"nll_loss": 1.7252442836761475, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.12894389033317566, |
|
"rewards/margins": 0.13792680203914642, |
|
"rewards/rejected": -0.2668706774711609, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 45.5, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 1.2958707809448242, |
|
"log_odds_ratio": -0.39990872144699097, |
|
"logps/chosen": -0.6433326005935669, |
|
"logps/rejected": -1.3645284175872803, |
|
"loss": 20.4586, |
|
"nll_loss": 1.6844007968902588, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.12866653501987457, |
|
"rewards/margins": 0.1442391574382782, |
|
"rewards/rejected": -0.2729056477546692, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 46.75, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.3666318655014038, |
|
"log_odds_ratio": -0.3610309064388275, |
|
"logps/chosen": -0.606144368648529, |
|
"logps/rejected": -1.3657869100570679, |
|
"loss": 19.2733, |
|
"nll_loss": 1.696372628211975, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.121228888630867, |
|
"rewards/margins": 0.15192851424217224, |
|
"rewards/rejected": -0.27315741777420044, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 43.0, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 1.3116978406906128, |
|
"log_odds_ratio": -0.3458430767059326, |
|
"logps/chosen": -0.6200293302536011, |
|
"logps/rejected": -1.3047711849212646, |
|
"loss": 19.7929, |
|
"nll_loss": 1.6241958141326904, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.12400586903095245, |
|
"rewards/margins": 0.1369483768939972, |
|
"rewards/rejected": -0.26095423102378845, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 97.5, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.3029296398162842, |
|
"log_odds_ratio": -0.3378421366214752, |
|
"logps/chosen": -0.6278452277183533, |
|
"logps/rejected": -1.3267650604248047, |
|
"loss": 20.0527, |
|
"nll_loss": 1.697008728981018, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.12556904554367065, |
|
"rewards/margins": 0.13978396356105804, |
|
"rewards/rejected": -0.2653530240058899, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 45.75, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.3681397438049316, |
|
"log_odds_ratio": -0.33266884088516235, |
|
"logps/chosen": -0.5873227119445801, |
|
"logps/rejected": -1.3235623836517334, |
|
"loss": 18.7285, |
|
"nll_loss": 1.646396279335022, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.11746455729007721, |
|
"rewards/margins": 0.14724794030189514, |
|
"rewards/rejected": -0.26471248269081116, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 65.96502537803045, |
|
"train_runtime": 7151.0417, |
|
"train_samples_per_second": 2.832, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|