gemma-7b-borpo-noisy-5e-5-02-v4 / trainer_state.json
silviasapora's picture
Model save
d966aea verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986666666666667,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 2080.0,
"learning_rate": 7.8125e-06,
"log_odds_chosen": 1.6525596380233765,
"log_odds_ratio": -11.146315574645996,
"logps/chosen": -21.964563369750977,
"logps/rejected": -23.61678695678711,
"loss": 723.9557,
"nll_loss": 8.641968727111816,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -4.392912864685059,
"rewards/margins": 0.33044466376304626,
"rewards/rejected": -4.723357677459717,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 816.0,
"learning_rate": 1.5625e-05,
"log_odds_chosen": 1.0747610330581665,
"log_odds_ratio": -8.308441162109375,
"logps/chosen": -19.65680503845215,
"logps/rejected": -20.730966567993164,
"loss": 646.4148,
"nll_loss": 7.853402614593506,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": -3.931361436843872,
"rewards/margins": 0.21483144164085388,
"rewards/rejected": -4.14619255065918,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 1408.0,
"learning_rate": 2.34375e-05,
"log_odds_chosen": 4.371163368225098,
"log_odds_ratio": -6.787731170654297,
"logps/chosen": -17.956409454345703,
"logps/rejected": -22.32614517211914,
"loss": 504.665,
"nll_loss": 8.193029403686523,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -3.591282367706299,
"rewards/margins": 0.8739474415779114,
"rewards/rejected": -4.465229511260986,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 10112.0,
"learning_rate": 3.125e-05,
"log_odds_chosen": 0.567130446434021,
"log_odds_ratio": -6.8672003746032715,
"logps/chosen": -15.55059814453125,
"logps/rejected": -16.119054794311523,
"loss": 520.7164,
"nll_loss": 7.535510063171387,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": -3.110119581222534,
"rewards/margins": 0.11369138956069946,
"rewards/rejected": -3.223810911178589,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 960.0,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 1.732701063156128,
"log_odds_ratio": -2.4201536178588867,
"logps/chosen": -6.048478603363037,
"logps/rejected": -7.7686004638671875,
"loss": 145.2979,
"nll_loss": 3.8266708850860596,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -1.2096956968307495,
"rewards/margins": 0.34402433037757874,
"rewards/rejected": -1.553720235824585,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 266.0,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.1403735727071762,
"log_odds_ratio": -0.8303444981575012,
"logps/chosen": -1.8173532485961914,
"logps/rejected": -1.9406824111938477,
"loss": 58.133,
"nll_loss": 2.613788604736328,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.3634707033634186,
"rewards/margins": 0.024665823206305504,
"rewards/rejected": -0.38813653588294983,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 208.0,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.187073215842247,
"log_odds_ratio": -0.7795676589012146,
"logps/chosen": -1.5736640691757202,
"logps/rejected": -1.744511604309082,
"loss": 50.3278,
"nll_loss": 2.258103370666504,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.3147328495979309,
"rewards/margins": 0.0341695174574852,
"rewards/rejected": -0.3489023745059967,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 178.0,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.2348749339580536,
"log_odds_ratio": -0.7139513492584229,
"logps/chosen": -1.419406533241272,
"logps/rejected": -1.6171458959579468,
"loss": 45.351,
"nll_loss": 2.1722681522369385,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.2838813066482544,
"rewards/margins": 0.039547890424728394,
"rewards/rejected": -0.3234291970729828,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 202.0,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.14743809401988983,
"log_odds_ratio": -0.763064980506897,
"logps/chosen": -1.3785412311553955,
"logps/rejected": -1.4916046857833862,
"loss": 44.0902,
"nll_loss": 2.1061487197875977,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.27570822834968567,
"rewards/margins": 0.02261272445321083,
"rewards/rejected": -0.2983209490776062,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 278.0,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.20397412776947021,
"log_odds_ratio": -0.7111414670944214,
"logps/chosen": -1.3394975662231445,
"logps/rejected": -1.5087169408798218,
"loss": 42.8501,
"nll_loss": 2.211090564727783,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.2678995132446289,
"rewards/margins": 0.03384386748075485,
"rewards/rejected": -0.30174335837364197,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 92.0,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 0.1584581434726715,
"log_odds_ratio": -0.7220534682273865,
"logps/chosen": -1.243898868560791,
"logps/rejected": -1.3692307472229004,
"loss": 39.7973,
"nll_loss": 2.0482659339904785,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.24877974390983582,
"rewards/margins": 0.02506640926003456,
"rewards/rejected": -0.2738461494445801,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 113.0,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.22705142199993134,
"log_odds_ratio": -0.6849401593208313,
"logps/chosen": -1.1941479444503784,
"logps/rejected": -1.3716175556182861,
"loss": 38.2246,
"nll_loss": 2.0026328563690186,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.2388295829296112,
"rewards/margins": 0.03549391031265259,
"rewards/rejected": -0.2743235230445862,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 96.5,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.2287750244140625,
"log_odds_ratio": -0.6806913614273071,
"logps/chosen": -1.1876500844955444,
"logps/rejected": -1.3565670251846313,
"loss": 37.995,
"nll_loss": 1.997527837753296,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.23752999305725098,
"rewards/margins": 0.033783383667469025,
"rewards/rejected": -0.2713133692741394,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 138.0,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.25641298294067383,
"log_odds_ratio": -0.6760451197624207,
"logps/chosen": -1.146054983139038,
"logps/rejected": -1.3470946550369263,
"loss": 36.6471,
"nll_loss": 2.0368056297302246,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.2292109727859497,
"rewards/margins": 0.040207941085100174,
"rewards/rejected": -0.2694189250469208,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 207.0,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 0.27950650453567505,
"log_odds_ratio": -0.6581438779830933,
"logps/chosen": -1.084201693534851,
"logps/rejected": -1.2829262018203735,
"loss": 34.6792,
"nll_loss": 1.9775358438491821,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.21684034168720245,
"rewards/margins": 0.03974488750100136,
"rewards/rejected": -0.2565852105617523,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 117.0,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 0.21956244111061096,
"log_odds_ratio": -0.6832794547080994,
"logps/chosen": -1.1284596920013428,
"logps/rejected": -1.2831165790557861,
"loss": 36.0978,
"nll_loss": 1.9256585836410522,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.22569191455841064,
"rewards/margins": 0.03093140758574009,
"rewards/rejected": -0.2566233277320862,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 136.0,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 0.28352534770965576,
"log_odds_ratio": -0.6638063192367554,
"logps/chosen": -1.0836379528045654,
"logps/rejected": -1.290060043334961,
"loss": 34.6599,
"nll_loss": 1.8572845458984375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.216727614402771,
"rewards/margins": 0.041284408420324326,
"rewards/rejected": -0.25801199674606323,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 225.0,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.24633805453777313,
"log_odds_ratio": -0.6877402067184448,
"logps/chosen": -1.1121197938919067,
"logps/rejected": -1.294413447380066,
"loss": 35.4938,
"nll_loss": 2.044349431991577,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.22242394089698792,
"rewards/margins": 0.03645877167582512,
"rewards/rejected": -0.2588827311992645,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 70.5,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.22184424102306366,
"log_odds_ratio": -0.673058271408081,
"logps/chosen": -1.054860234260559,
"logps/rejected": -1.2138173580169678,
"loss": 33.7487,
"nll_loss": 1.8830925226211548,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.21097204089164734,
"rewards/margins": 0.03179146721959114,
"rewards/rejected": -0.24276351928710938,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 117.0,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.1458374559879303,
"log_odds_ratio": -0.7220640182495117,
"logps/chosen": -1.08286714553833,
"logps/rejected": -1.2099539041519165,
"loss": 34.64,
"nll_loss": 1.8595941066741943,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.21657343208789825,
"rewards/margins": 0.025417348369956017,
"rewards/rejected": -0.24199077486991882,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 116.0,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.21829000115394592,
"log_odds_ratio": -0.665600597858429,
"logps/chosen": -1.0344531536102295,
"logps/rejected": -1.1940683126449585,
"loss": 33.0967,
"nll_loss": 1.769559621810913,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.20689065754413605,
"rewards/margins": 0.03192301467061043,
"rewards/rejected": -0.23881368339061737,
"step": 105
},
{
"epoch": 1.0429629629629629,
"grad_norm": 69.5,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.536843478679657,
"log_odds_ratio": -0.5678494572639465,
"logps/chosen": -0.9168744087219238,
"logps/rejected": -1.270845890045166,
"loss": 29.3055,
"nll_loss": 1.7102380990982056,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.18337486684322357,
"rewards/margins": 0.07079432904720306,
"rewards/rejected": -0.25416916608810425,
"step": 110
},
{
"epoch": 1.0903703703703704,
"grad_norm": 69.0,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.6579151153564453,
"log_odds_ratio": -0.5232337117195129,
"logps/chosen": -0.8788490295410156,
"logps/rejected": -1.3019859790802002,
"loss": 28.063,
"nll_loss": 1.8311777114868164,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.17576980590820312,
"rewards/margins": 0.08462737500667572,
"rewards/rejected": -0.26039719581604004,
"step": 115
},
{
"epoch": 1.1377777777777778,
"grad_norm": 78.5,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 0.6495614051818848,
"log_odds_ratio": -0.5208151340484619,
"logps/chosen": -0.870018482208252,
"logps/rejected": -1.280542254447937,
"loss": 27.8228,
"nll_loss": 2.2432708740234375,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.1740036904811859,
"rewards/margins": 0.08210476487874985,
"rewards/rejected": -0.25610846281051636,
"step": 120
},
{
"epoch": 1.1851851851851851,
"grad_norm": 52.0,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 0.7151899933815002,
"log_odds_ratio": -0.5032771229743958,
"logps/chosen": -0.8410484194755554,
"logps/rejected": -1.3060276508331299,
"loss": 26.8671,
"nll_loss": 2.1479263305664062,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16820970177650452,
"rewards/margins": 0.09299584478139877,
"rewards/rejected": -0.2612055540084839,
"step": 125
},
{
"epoch": 1.2325925925925927,
"grad_norm": 103.5,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.6010386347770691,
"log_odds_ratio": -0.5377737283706665,
"logps/chosen": -0.8195842504501343,
"logps/rejected": -1.166416049003601,
"loss": 26.2153,
"nll_loss": 1.8921180963516235,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.16391684114933014,
"rewards/margins": 0.06936636567115784,
"rewards/rejected": -0.23328320682048798,
"step": 130
},
{
"epoch": 1.28,
"grad_norm": 47.75,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 0.7158576846122742,
"log_odds_ratio": -0.4928209185600281,
"logps/chosen": -0.83746337890625,
"logps/rejected": -1.288947343826294,
"loss": 26.7787,
"nll_loss": 1.8713912963867188,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.16749267280101776,
"rewards/margins": 0.09029679000377655,
"rewards/rejected": -0.2577894330024719,
"step": 135
},
{
"epoch": 1.3274074074074074,
"grad_norm": 306.0,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 0.5670473575592041,
"log_odds_ratio": -0.5594123005867004,
"logps/chosen": -0.8860000371932983,
"logps/rejected": -1.2509949207305908,
"loss": 28.3339,
"nll_loss": 1.914190649986267,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.17720001935958862,
"rewards/margins": 0.07299896329641342,
"rewards/rejected": -0.25019896030426025,
"step": 140
},
{
"epoch": 1.374814814814815,
"grad_norm": 51.75,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 0.7294235229492188,
"log_odds_ratio": -0.4973994195461273,
"logps/chosen": -0.8396116495132446,
"logps/rejected": -1.3140604496002197,
"loss": 26.8217,
"nll_loss": 1.7945051193237305,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16792233288288116,
"rewards/margins": 0.09488976746797562,
"rewards/rejected": -0.262812077999115,
"step": 145
},
{
"epoch": 1.4222222222222223,
"grad_norm": 117.5,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.547488808631897,
"log_odds_ratio": -0.5519485473632812,
"logps/chosen": -0.810029149055481,
"logps/rejected": -1.1416094303131104,
"loss": 25.9107,
"nll_loss": 1.7234855890274048,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.16200582683086395,
"rewards/margins": 0.06631605327129364,
"rewards/rejected": -0.2283218801021576,
"step": 150
},
{
"epoch": 1.4696296296296296,
"grad_norm": 95.5,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 0.6218387484550476,
"log_odds_ratio": -0.5375393629074097,
"logps/chosen": -0.8497333526611328,
"logps/rejected": -1.2327052354812622,
"loss": 27.165,
"nll_loss": 1.761959433555603,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.16994668543338776,
"rewards/margins": 0.07659433782100677,
"rewards/rejected": -0.24654105305671692,
"step": 155
},
{
"epoch": 1.5170370370370372,
"grad_norm": 87.5,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 0.6225556135177612,
"log_odds_ratio": -0.521335244178772,
"logps/chosen": -0.8453457951545715,
"logps/rejected": -1.211041808128357,
"loss": 27.0398,
"nll_loss": 1.7928802967071533,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16906917095184326,
"rewards/margins": 0.07313917577266693,
"rewards/rejected": -0.242208331823349,
"step": 160
},
{
"epoch": 1.5644444444444443,
"grad_norm": 53.25,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.7242995500564575,
"log_odds_ratio": -0.5051379799842834,
"logps/chosen": -0.8265692591667175,
"logps/rejected": -1.2654168605804443,
"loss": 26.4304,
"nll_loss": 1.7467893362045288,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.16531386971473694,
"rewards/margins": 0.0877695307135582,
"rewards/rejected": -0.25308337807655334,
"step": 165
},
{
"epoch": 1.6118518518518519,
"grad_norm": 60.25,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 1.2396129369735718,
"log_odds_ratio": -0.5153141021728516,
"logps/chosen": -0.8173978924751282,
"logps/rejected": -1.8096736669540405,
"loss": 17.6018,
"nll_loss": 2.2086150646209717,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.16347956657409668,
"rewards/margins": 0.19845522940158844,
"rewards/rejected": -0.3619347810745239,
"step": 170
},
{
"epoch": 1.6592592592592592,
"grad_norm": 93.0,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.6574938893318176,
"log_odds_ratio": -0.5215548872947693,
"logps/chosen": -0.8096542358398438,
"logps/rejected": -1.212336778640747,
"loss": 25.8867,
"nll_loss": 1.7505098581314087,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.1619308441877365,
"rewards/margins": 0.08053652942180634,
"rewards/rejected": -0.24246735870838165,
"step": 175
},
{
"epoch": 1.7066666666666666,
"grad_norm": 107.5,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.5455656051635742,
"log_odds_ratio": -0.5534049272537231,
"logps/chosen": -0.8489178419113159,
"logps/rejected": -1.1742994785308838,
"loss": 27.1545,
"nll_loss": 1.7829153537750244,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.1697835773229599,
"rewards/margins": 0.0650763288140297,
"rewards/rejected": -0.234859898686409,
"step": 180
},
{
"epoch": 1.7540740740740741,
"grad_norm": 55.0,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 0.6557157039642334,
"log_odds_ratio": -0.517691969871521,
"logps/chosen": -0.8089399337768555,
"logps/rejected": -1.222130298614502,
"loss": 25.8522,
"nll_loss": 1.716506004333496,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.1617879867553711,
"rewards/margins": 0.08263807743787766,
"rewards/rejected": -0.24442608654499054,
"step": 185
},
{
"epoch": 1.8014814814814815,
"grad_norm": 48.5,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.597545862197876,
"log_odds_ratio": -0.5755712985992432,
"logps/chosen": -0.9348888397216797,
"logps/rejected": -1.3086684942245483,
"loss": 29.841,
"nll_loss": 1.7821638584136963,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.18697777390480042,
"rewards/margins": 0.07475592195987701,
"rewards/rejected": -0.2617337107658386,
"step": 190
},
{
"epoch": 1.8488888888888888,
"grad_norm": 69.0,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 0.6737447381019592,
"log_odds_ratio": -0.5273191332817078,
"logps/chosen": -0.8384972810745239,
"logps/rejected": -1.252629041671753,
"loss": 26.7925,
"nll_loss": 1.7104995250701904,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.1676994413137436,
"rewards/margins": 0.08282637596130371,
"rewards/rejected": -0.2505258023738861,
"step": 195
},
{
"epoch": 1.8962962962962964,
"grad_norm": 83.0,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.6040568351745605,
"log_odds_ratio": -0.5628523230552673,
"logps/chosen": -0.8518487215042114,
"logps/rejected": -1.216110348701477,
"loss": 27.2338,
"nll_loss": 1.7693039178848267,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.17036975920200348,
"rewards/margins": 0.07285232841968536,
"rewards/rejected": -0.24322207272052765,
"step": 200
},
{
"epoch": 1.9437037037037037,
"grad_norm": 63.0,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.5323644876480103,
"log_odds_ratio": -0.5567813515663147,
"logps/chosen": -0.8089507818222046,
"logps/rejected": -1.1334668397903442,
"loss": 25.8316,
"nll_loss": 1.6225645542144775,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.1617901623249054,
"rewards/margins": 0.06490319967269897,
"rewards/rejected": -0.22669336199760437,
"step": 205
},
{
"epoch": 1.991111111111111,
"grad_norm": 44.25,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.667856752872467,
"log_odds_ratio": -0.5315747261047363,
"logps/chosen": -0.9115372896194458,
"logps/rejected": -1.3377487659454346,
"loss": 28.3513,
"nll_loss": 1.766261100769043,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.18230745196342468,
"rewards/margins": 0.08524232357740402,
"rewards/rejected": -0.2675497531890869,
"step": 210
},
{
"epoch": 2.0385185185185186,
"grad_norm": 37.5,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 0.9186259508132935,
"log_odds_ratio": -0.4572061598300934,
"logps/chosen": -0.7931280136108398,
"logps/rejected": -1.2958561182022095,
"loss": 25.3378,
"nll_loss": 1.6770355701446533,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.15862558782100677,
"rewards/margins": 0.10054562240839005,
"rewards/rejected": -0.2591712176799774,
"step": 215
},
{
"epoch": 2.0859259259259257,
"grad_norm": 55.0,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 1.4065123796463013,
"log_odds_ratio": -0.3432285785675049,
"logps/chosen": -0.5906342267990112,
"logps/rejected": -1.3782751560211182,
"loss": 18.7957,
"nll_loss": 1.6941601037979126,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.11812685430049896,
"rewards/margins": 0.15752817690372467,
"rewards/rejected": -0.275655061006546,
"step": 220
},
{
"epoch": 2.1333333333333333,
"grad_norm": 42.5,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 1.176323652267456,
"log_odds_ratio": -0.37496405839920044,
"logps/chosen": -0.6447511911392212,
"logps/rejected": -1.292754054069519,
"loss": 20.5752,
"nll_loss": 1.6440188884735107,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.12895023822784424,
"rewards/margins": 0.12960059940814972,
"rewards/rejected": -0.25855082273483276,
"step": 225
},
{
"epoch": 2.180740740740741,
"grad_norm": 44.75,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 1.1741787195205688,
"log_odds_ratio": -0.37807196378707886,
"logps/chosen": -0.6166914105415344,
"logps/rejected": -1.2579196691513062,
"loss": 19.7048,
"nll_loss": 1.6379148960113525,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.12333826720714569,
"rewards/margins": 0.12824568152427673,
"rewards/rejected": -0.2515839636325836,
"step": 230
},
{
"epoch": 2.228148148148148,
"grad_norm": 48.0,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 1.133759617805481,
"log_odds_ratio": -0.3960319459438324,
"logps/chosen": -0.6456653475761414,
"logps/rejected": -1.2446954250335693,
"loss": 20.6347,
"nll_loss": 1.6851946115493774,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.12913307547569275,
"rewards/margins": 0.11980599164962769,
"rewards/rejected": -0.24893908202648163,
"step": 235
},
{
"epoch": 2.2755555555555556,
"grad_norm": 43.75,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 1.1987093687057495,
"log_odds_ratio": -0.3707556426525116,
"logps/chosen": -0.6308638453483582,
"logps/rejected": -1.2778990268707275,
"loss": 20.1137,
"nll_loss": 1.6404365301132202,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.1261727660894394,
"rewards/margins": 0.12940703332424164,
"rewards/rejected": -0.25557979941368103,
"step": 240
},
{
"epoch": 2.322962962962963,
"grad_norm": 39.75,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.2112338542938232,
"log_odds_ratio": -0.36770302057266235,
"logps/chosen": -0.6155804395675659,
"logps/rejected": -1.273896336555481,
"loss": 19.6548,
"nll_loss": 1.6040033102035522,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.12311609834432602,
"rewards/margins": 0.13166317343711853,
"rewards/rejected": -0.25477927923202515,
"step": 245
},
{
"epoch": 2.3703703703703702,
"grad_norm": 47.5,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 1.3808326721191406,
"log_odds_ratio": -0.33250147104263306,
"logps/chosen": -0.5851857662200928,
"logps/rejected": -1.349930763244629,
"loss": 18.6798,
"nll_loss": 1.6684551239013672,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.11703716218471527,
"rewards/margins": 0.1529490053653717,
"rewards/rejected": -0.2699861526489258,
"step": 250
},
{
"epoch": 2.417777777777778,
"grad_norm": 39.0,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 1.3011033535003662,
"log_odds_ratio": -0.3556652069091797,
"logps/chosen": -0.5785160064697266,
"logps/rejected": -1.3025860786437988,
"loss": 18.4302,
"nll_loss": 1.6226260662078857,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.11570320278406143,
"rewards/margins": 0.14481402933597565,
"rewards/rejected": -0.2605172097682953,
"step": 255
},
{
"epoch": 2.4651851851851854,
"grad_norm": 45.0,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 1.2299237251281738,
"log_odds_ratio": -0.37351006269454956,
"logps/chosen": -0.616021454334259,
"logps/rejected": -1.2757210731506348,
"loss": 19.6774,
"nll_loss": 1.6261924505233765,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.1232042908668518,
"rewards/margins": 0.13193991780281067,
"rewards/rejected": -0.25514426827430725,
"step": 260
},
{
"epoch": 2.5125925925925925,
"grad_norm": 49.0,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 1.2174187898635864,
"log_odds_ratio": -0.38570135831832886,
"logps/chosen": -0.6393834948539734,
"logps/rejected": -1.3296916484832764,
"loss": 20.402,
"nll_loss": 1.623984932899475,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.12787671387195587,
"rewards/margins": 0.13806161284446716,
"rewards/rejected": -0.26593831181526184,
"step": 265
},
{
"epoch": 2.56,
"grad_norm": 54.0,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 1.3414523601531982,
"log_odds_ratio": -0.3324301540851593,
"logps/chosen": -0.5961578488349915,
"logps/rejected": -1.3138689994812012,
"loss": 19.0308,
"nll_loss": 1.646997094154358,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11923156678676605,
"rewards/margins": 0.14354220032691956,
"rewards/rejected": -0.2627738118171692,
"step": 270
},
{
"epoch": 2.6074074074074076,
"grad_norm": 53.0,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 1.2440980672836304,
"log_odds_ratio": -0.3758939504623413,
"logps/chosen": -0.6397903561592102,
"logps/rejected": -1.3174664974212646,
"loss": 20.3568,
"nll_loss": 1.6285263299942017,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.12795805931091309,
"rewards/margins": 0.13553521037101746,
"rewards/rejected": -0.26349326968193054,
"step": 275
},
{
"epoch": 2.6548148148148147,
"grad_norm": 45.25,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 1.3440355062484741,
"log_odds_ratio": -0.3526305556297302,
"logps/chosen": -0.5672257542610168,
"logps/rejected": -1.2784881591796875,
"loss": 18.1008,
"nll_loss": 1.6513664722442627,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.11344514042139053,
"rewards/margins": 0.14225247502326965,
"rewards/rejected": -0.255697637796402,
"step": 280
},
{
"epoch": 2.7022222222222223,
"grad_norm": 95.0,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 1.297435998916626,
"log_odds_ratio": -0.3628445267677307,
"logps/chosen": -0.6076027154922485,
"logps/rejected": -1.3181374073028564,
"loss": 19.2798,
"nll_loss": 1.665822982788086,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.12152054160833359,
"rewards/margins": 0.14210695028305054,
"rewards/rejected": -0.26362746953964233,
"step": 285
},
{
"epoch": 2.74962962962963,
"grad_norm": 52.5,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 1.2773778438568115,
"log_odds_ratio": -0.35668572783470154,
"logps/chosen": -0.6447194218635559,
"logps/rejected": -1.3343534469604492,
"loss": 20.5351,
"nll_loss": 1.7252442836761475,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.12894389033317566,
"rewards/margins": 0.13792680203914642,
"rewards/rejected": -0.2668706774711609,
"step": 290
},
{
"epoch": 2.797037037037037,
"grad_norm": 45.5,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 1.2958707809448242,
"log_odds_ratio": -0.39990872144699097,
"logps/chosen": -0.6433326005935669,
"logps/rejected": -1.3645284175872803,
"loss": 20.4586,
"nll_loss": 1.6844007968902588,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.12866653501987457,
"rewards/margins": 0.1442391574382782,
"rewards/rejected": -0.2729056477546692,
"step": 295
},
{
"epoch": 2.8444444444444446,
"grad_norm": 46.75,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 1.3666318655014038,
"log_odds_ratio": -0.3610309064388275,
"logps/chosen": -0.606144368648529,
"logps/rejected": -1.3657869100570679,
"loss": 19.2733,
"nll_loss": 1.696372628211975,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.121228888630867,
"rewards/margins": 0.15192851424217224,
"rewards/rejected": -0.27315741777420044,
"step": 300
},
{
"epoch": 2.891851851851852,
"grad_norm": 43.0,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 1.3116978406906128,
"log_odds_ratio": -0.3458430767059326,
"logps/chosen": -0.6200293302536011,
"logps/rejected": -1.3047711849212646,
"loss": 19.7929,
"nll_loss": 1.6241958141326904,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.12400586903095245,
"rewards/margins": 0.1369483768939972,
"rewards/rejected": -0.26095423102378845,
"step": 305
},
{
"epoch": 2.9392592592592592,
"grad_norm": 97.5,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 1.3029296398162842,
"log_odds_ratio": -0.3378421366214752,
"logps/chosen": -0.6278452277183533,
"logps/rejected": -1.3267650604248047,
"loss": 20.0527,
"nll_loss": 1.697008728981018,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.12556904554367065,
"rewards/margins": 0.13978396356105804,
"rewards/rejected": -0.2653530240058899,
"step": 310
},
{
"epoch": 2.986666666666667,
"grad_norm": 45.75,
"learning_rate": 0.0,
"log_odds_chosen": 1.3681397438049316,
"log_odds_ratio": -0.33266884088516235,
"logps/chosen": -0.5873227119445801,
"logps/rejected": -1.3235623836517334,
"loss": 18.7285,
"nll_loss": 1.646396279335022,
"rewards/accuracies": 0.890625,
"rewards/chosen": -0.11746455729007721,
"rewards/margins": 0.14724794030189514,
"rewards/rejected": -0.26471248269081116,
"step": 315
},
{
"epoch": 2.986666666666667,
"step": 315,
"total_flos": 0.0,
"train_loss": 65.96502537803045,
"train_runtime": 7151.0417,
"train_samples_per_second": 2.832,
"train_steps_per_second": 0.044
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}