gemma-7b-borpo-basic-5e-5-02-v5 / trainer_state.json
silviasapora's picture
Model save
48d95c7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9765925925925925,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 1899.5159912109375,
"learning_rate": 7.8125e-06,
"log_odds_chosen": -1.0597587823867798,
"log_odds_ratio": -11.004037857055664,
"logps/chosen": -22.70572280883789,
"logps/rejected": -21.646194458007812,
"loss": 526.7425,
"nll_loss": 9.840391159057617,
"po_loss": 6.620314121246338,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -4.541144847869873,
"rewards/margins": -0.21190576255321503,
"rewards/rejected": -4.3292388916015625,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 558.7034912109375,
"learning_rate": 1.5625e-05,
"log_odds_chosen": -3.1177897453308105,
"log_odds_ratio": -11.049609184265137,
"logps/chosen": -22.010894775390625,
"logps/rejected": -18.89345359802246,
"loss": 510.6521,
"nll_loss": 8.767520904541016,
"po_loss": 7.1903533935546875,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -4.402178764343262,
"rewards/margins": -0.6234878897666931,
"rewards/rejected": -3.778690814971924,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 728.9421997070312,
"learning_rate": 2.34375e-05,
"log_odds_chosen": -1.2930371761322021,
"log_odds_ratio": -11.378229141235352,
"logps/chosen": -22.08417320251465,
"logps/rejected": -20.78976821899414,
"loss": 515.0328,
"nll_loss": 8.051271438598633,
"po_loss": 8.043503761291504,
"rewards/accuracies": 0.515625,
"rewards/chosen": -4.416834831237793,
"rewards/margins": -0.25888124108314514,
"rewards/rejected": -4.157953262329102,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 9066.4345703125,
"learning_rate": 3.125e-05,
"log_odds_chosen": -4.506316184997559,
"log_odds_ratio": -9.819967269897461,
"logps/chosen": -17.74567222595215,
"logps/rejected": -13.241083145141602,
"loss": 425.3085,
"nll_loss": 6.465292453765869,
"po_loss": 6.825601100921631,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -3.5491347312927246,
"rewards/margins": -0.9009180068969727,
"rewards/rejected": -2.648216724395752,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 602.6565551757812,
"learning_rate": 3.90625e-05,
"log_odds_chosen": -0.16372856497764587,
"log_odds_ratio": -2.0679056644439697,
"logps/chosen": -4.469531536102295,
"logps/rejected": -4.290602684020996,
"loss": 123.0824,
"nll_loss": 2.8301875591278076,
"po_loss": 1.0161364078521729,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.8939064145088196,
"rewards/margins": -0.03578581288456917,
"rewards/rejected": -0.8581206202507019,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 375.20941162109375,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.037730950862169266,
"log_odds_ratio": -0.9092292785644531,
"logps/chosen": -1.8757789134979248,
"logps/rejected": -1.9118578433990479,
"loss": 66.581,
"nll_loss": 2.074733018875122,
"po_loss": 0.00592245627194643,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.3751557767391205,
"rewards/margins": 0.007215849123895168,
"rewards/rejected": -0.38237160444259644,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 288.1863098144531,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.2057056725025177,
"log_odds_ratio": -0.8370769619941711,
"logps/chosen": -1.7295191287994385,
"logps/rejected": -1.9166914224624634,
"loss": 61.7867,
"nll_loss": 1.9286344051361084,
"po_loss": 0.002200313610956073,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.34590381383895874,
"rewards/margins": 0.03743448108434677,
"rewards/rejected": -0.3833382725715637,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 85.25788116455078,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.2474113404750824,
"log_odds_ratio": -0.7440091967582703,
"logps/chosen": -1.5026391744613647,
"logps/rejected": -1.7087732553482056,
"loss": 57.7528,
"nll_loss": 1.8044459819793701,
"po_loss": 0.0003297007642686367,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.3005278706550598,
"rewards/margins": 0.04122680425643921,
"rewards/rejected": -0.34175464510917664,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 101.17169189453125,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.20201453566551208,
"log_odds_ratio": -0.7342582941055298,
"logps/chosen": -1.3815350532531738,
"logps/rejected": -1.5576066970825195,
"loss": 53.7768,
"nll_loss": 1.680175542831421,
"po_loss": 0.00034963697544299066,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.27630701661109924,
"rewards/margins": 0.03521432727575302,
"rewards/rejected": -0.31152135133743286,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 69.27424621582031,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.24245789647102356,
"log_odds_ratio": -0.700467586517334,
"logps/chosen": -1.2549350261688232,
"logps/rejected": -1.4581347703933716,
"loss": 49.7115,
"nll_loss": 1.5532619953155518,
"po_loss": 0.00022162115783430636,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.2509870231151581,
"rewards/margins": 0.040639933198690414,
"rewards/rejected": -0.2916269600391388,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 59.69867706298828,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 0.2079828977584839,
"log_odds_ratio": -0.728143036365509,
"logps/chosen": -1.2705906629562378,
"logps/rejected": -1.4351913928985596,
"loss": 50.3722,
"nll_loss": 1.5738521814346313,
"po_loss": 0.0002790199941955507,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.2541181445121765,
"rewards/margins": 0.03292013332247734,
"rewards/rejected": -0.28703826665878296,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 164.81796264648438,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.1613532155752182,
"log_odds_ratio": -0.7294767498970032,
"logps/chosen": -1.2667067050933838,
"logps/rejected": -1.40398108959198,
"loss": 50.8378,
"nll_loss": 1.5884637832641602,
"po_loss": 0.00021803542040288448,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.25334134697914124,
"rewards/margins": 0.027454886585474014,
"rewards/rejected": -0.28079622983932495,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 178.92111206054688,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.21860842406749725,
"log_odds_ratio": -0.7070624232292175,
"logps/chosen": -1.2478413581848145,
"logps/rejected": -1.4164764881134033,
"loss": 48.7673,
"nll_loss": 1.5237150192260742,
"po_loss": 0.00026398617774248123,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.24956829845905304,
"rewards/margins": 0.033726997673511505,
"rewards/rejected": -0.28329527378082275,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 69.5670166015625,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.31238844990730286,
"log_odds_ratio": -0.6625837087631226,
"logps/chosen": -1.1966934204101562,
"logps/rejected": -1.4487974643707275,
"loss": 48.3884,
"nll_loss": 1.5121018886566162,
"po_loss": 3.584091246011667e-05,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.239338681101799,
"rewards/margins": 0.050420790910720825,
"rewards/rejected": -0.28975948691368103,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 129.68734741210938,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 0.262991726398468,
"log_odds_ratio": -0.6946216225624084,
"logps/chosen": -1.1699544191360474,
"logps/rejected": -1.3726341724395752,
"loss": 47.6136,
"nll_loss": 1.487747311592102,
"po_loss": 0.0001775487035047263,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.23399090766906738,
"rewards/margins": 0.04053593799471855,
"rewards/rejected": -0.27452683448791504,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 111.74764251708984,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 0.17025665938854218,
"log_odds_ratio": -0.7084556818008423,
"logps/chosen": -1.1360405683517456,
"logps/rejected": -1.2656519412994385,
"loss": 45.6908,
"nll_loss": 1.4278182983398438,
"po_loss": 1.782720755727496e-05,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.22720813751220703,
"rewards/margins": 0.0259222574532032,
"rewards/rejected": -0.25313037633895874,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 79.55937957763672,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 0.1675080806016922,
"log_odds_ratio": -0.6960164904594421,
"logps/chosen": -1.117205262184143,
"logps/rejected": -1.2469722032546997,
"loss": 46.5408,
"nll_loss": 1.454362154006958,
"po_loss": 3.7405981856863946e-05,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.22344104945659637,
"rewards/margins": 0.025953417643904686,
"rewards/rejected": -0.2493944615125656,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 62.937400817871094,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.28352928161621094,
"log_odds_ratio": -0.655947744846344,
"logps/chosen": -1.0310251712799072,
"logps/rejected": -1.2295501232147217,
"loss": 44.1915,
"nll_loss": 1.3809893131256104,
"po_loss": -6.17957175563788e-06,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.20620505511760712,
"rewards/margins": 0.03970498591661453,
"rewards/rejected": -0.24591004848480225,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 206.94097900390625,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.1304076611995697,
"log_odds_ratio": -0.7256454229354858,
"logps/chosen": -1.1198256015777588,
"logps/rejected": -1.2201364040374756,
"loss": 46.0187,
"nll_loss": 1.4380216598510742,
"po_loss": 6.208533159224316e-05,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.223965123295784,
"rewards/margins": 0.020062167197465897,
"rewards/rejected": -0.2440272867679596,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 120.00648498535156,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.20910552144050598,
"log_odds_ratio": -0.6791337132453918,
"logps/chosen": -1.0920485258102417,
"logps/rejected": -1.2438485622406006,
"loss": 44.6104,
"nll_loss": 1.3940393924713135,
"po_loss": 3.593241126509383e-05,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.2184097319841385,
"rewards/margins": 0.030360007658600807,
"rewards/rejected": -0.24876973032951355,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 68.66105651855469,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.2536667287349701,
"log_odds_ratio": -0.6884052157402039,
"logps/chosen": -1.0762526988983154,
"logps/rejected": -1.289530873298645,
"loss": 45.6209,
"nll_loss": 1.4256471395492554,
"po_loss": 6.704734005325008e-06,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.21525056660175323,
"rewards/margins": 0.042655594646930695,
"rewards/rejected": -0.25790613889694214,
"step": 105
},
{
"epoch": 1.037925925925926,
"grad_norm": 43.638755798339844,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.4813327491283417,
"log_odds_ratio": -0.571753203868866,
"logps/chosen": -0.9467726349830627,
"logps/rejected": -1.2524032592773438,
"loss": 34.5558,
"nll_loss": 1.2083364725112915,
"po_loss": -9.182094072457403e-05,
"rewards/accuracies": 0.6958041787147522,
"rewards/chosen": -0.1893545240163803,
"rewards/margins": 0.06112610921263695,
"rewards/rejected": -0.25048065185546875,
"step": 110
},
{
"epoch": 1.0853333333333333,
"grad_norm": 66.49714660644531,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.6769129037857056,
"log_odds_ratio": -0.5279095768928528,
"logps/chosen": -0.8538272976875305,
"logps/rejected": -1.2733014822006226,
"loss": 36.8982,
"nll_loss": 1.1533045768737793,
"po_loss": -0.00023453465837519616,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1707654595375061,
"rewards/margins": 0.08389485627412796,
"rewards/rejected": -0.25466030836105347,
"step": 115
},
{
"epoch": 1.1327407407407408,
"grad_norm": 63.25371551513672,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 0.5953308343887329,
"log_odds_ratio": -0.5498973727226257,
"logps/chosen": -0.8752245903015137,
"logps/rejected": -1.2550878524780273,
"loss": 37.9873,
"nll_loss": 1.1874502897262573,
"po_loss": -0.000346492015523836,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.17504490911960602,
"rewards/margins": 0.07597266137599945,
"rewards/rejected": -0.25101757049560547,
"step": 120
},
{
"epoch": 1.1801481481481482,
"grad_norm": 55.93903732299805,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 0.576469361782074,
"log_odds_ratio": -0.5385292768478394,
"logps/chosen": -0.9115549921989441,
"logps/rejected": -1.2783465385437012,
"loss": 37.1419,
"nll_loss": 1.161022424697876,
"po_loss": -0.00033691470162011683,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.18231101334095,
"rewards/margins": 0.07335831224918365,
"rewards/rejected": -0.2556692957878113,
"step": 125
},
{
"epoch": 1.2275555555555555,
"grad_norm": 48.382877349853516,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.6674849390983582,
"log_odds_ratio": -0.5325134992599487,
"logps/chosen": -0.8701179623603821,
"logps/rejected": -1.2853254079818726,
"loss": 37.2066,
"nll_loss": 1.1643040180206299,
"po_loss": -0.0015986410435289145,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.1740236133337021,
"rewards/margins": 0.0830414891242981,
"rewards/rejected": -0.2570651173591614,
"step": 130
},
{
"epoch": 1.274962962962963,
"grad_norm": 52.023677825927734,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 0.6190892457962036,
"log_odds_ratio": -0.5239226222038269,
"logps/chosen": -0.8530964851379395,
"logps/rejected": -1.225818395614624,
"loss": 36.3779,
"nll_loss": 1.1369173526763916,
"po_loss": -0.0001083976385416463,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.17061929404735565,
"rewards/margins": 0.07454435527324677,
"rewards/rejected": -0.24516364932060242,
"step": 135
},
{
"epoch": 1.3223703703703704,
"grad_norm": 66.0684585571289,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 0.6582817435264587,
"log_odds_ratio": -0.535703718662262,
"logps/chosen": -0.884809672832489,
"logps/rejected": -1.3024781942367554,
"loss": 37.8405,
"nll_loss": 1.1836440563201904,
"po_loss": -0.0011288595851510763,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.17696192860603333,
"rewards/margins": 0.08353371918201447,
"rewards/rejected": -0.2604956328868866,
"step": 140
},
{
"epoch": 1.3697777777777778,
"grad_norm": 94.56107330322266,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 0.5688942670822144,
"log_odds_ratio": -0.5570982694625854,
"logps/chosen": -0.8914452791213989,
"logps/rejected": -1.2537453174591064,
"loss": 38.2471,
"nll_loss": 1.1957530975341797,
"po_loss": -0.0005301563069224358,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.17828908562660217,
"rewards/margins": 0.07246001064777374,
"rewards/rejected": -0.25074905157089233,
"step": 145
},
{
"epoch": 1.417185185185185,
"grad_norm": 58.81022262573242,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.6168826222419739,
"log_odds_ratio": -0.534443736076355,
"logps/chosen": -0.8365335464477539,
"logps/rejected": -1.2264480590820312,
"loss": 35.7093,
"nll_loss": 1.1160484552383423,
"po_loss": -0.00013402634067460895,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.16730670630931854,
"rewards/margins": 0.07798293977975845,
"rewards/rejected": -0.245289608836174,
"step": 150
},
{
"epoch": 1.4645925925925927,
"grad_norm": 46.942298889160156,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 0.6257598400115967,
"log_odds_ratio": -0.5429662466049194,
"logps/chosen": -0.8669716119766235,
"logps/rejected": -1.270719289779663,
"loss": 36.8711,
"nll_loss": 1.152681589126587,
"po_loss": -0.0004589153977576643,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.1733943372964859,
"rewards/margins": 0.0807495266199112,
"rewards/rejected": -0.2541438639163971,
"step": 155
},
{
"epoch": 1.512,
"grad_norm": 48.96784591674805,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 0.564893364906311,
"log_odds_ratio": -0.5655848383903503,
"logps/chosen": -0.8240112066268921,
"logps/rejected": -1.1826220750808716,
"loss": 35.9479,
"nll_loss": 1.1237273216247559,
"po_loss": -0.0003545045619830489,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.16480223834514618,
"rewards/margins": 0.07172217220067978,
"rewards/rejected": -0.23652443289756775,
"step": 160
},
{
"epoch": 1.5594074074074074,
"grad_norm": 43.50642013549805,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.6325685977935791,
"log_odds_ratio": -0.5345262885093689,
"logps/chosen": -0.8210509419441223,
"logps/rejected": -1.206971526145935,
"loss": 36.8652,
"nll_loss": 1.1524779796600342,
"po_loss": -0.00043968428508378565,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.16421017050743103,
"rewards/margins": 0.07718412578105927,
"rewards/rejected": -0.2413943111896515,
"step": 165
},
{
"epoch": 1.6068148148148147,
"grad_norm": 51.71367645263672,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 0.6004430055618286,
"log_odds_ratio": -0.535349428653717,
"logps/chosen": -0.8440070152282715,
"logps/rejected": -1.230797290802002,
"loss": 36.4736,
"nll_loss": 1.1400222778320312,
"po_loss": -0.0002236413274658844,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.16880139708518982,
"rewards/margins": 0.07735804468393326,
"rewards/rejected": -0.24615943431854248,
"step": 170
},
{
"epoch": 1.6542222222222223,
"grad_norm": 48.140621185302734,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.6727933287620544,
"log_odds_ratio": -0.5192316174507141,
"logps/chosen": -0.83983314037323,
"logps/rejected": -1.2668081521987915,
"loss": 35.4802,
"nll_loss": 1.1090071201324463,
"po_loss": -0.00025099312188103795,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.16796663403511047,
"rewards/margins": 0.08539502322673798,
"rewards/rejected": -0.25336164236068726,
"step": 175
},
{
"epoch": 1.7016296296296296,
"grad_norm": 42.03474044799805,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.5798546671867371,
"log_odds_ratio": -0.5439001321792603,
"logps/chosen": -0.8370186686515808,
"logps/rejected": -1.1941139698028564,
"loss": 36.0393,
"nll_loss": 1.1264227628707886,
"po_loss": -0.000195543936570175,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.16740374267101288,
"rewards/margins": 0.07141904532909393,
"rewards/rejected": -0.2388227880001068,
"step": 180
},
{
"epoch": 1.749037037037037,
"grad_norm": 38.88856887817383,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 0.49664992094039917,
"log_odds_ratio": -0.5907926559448242,
"logps/chosen": -0.8930248022079468,
"logps/rejected": -1.2256276607513428,
"loss": 36.1086,
"nll_loss": 1.1285018920898438,
"po_loss": -0.00010755965195130557,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.17860497534275055,
"rewards/margins": 0.0665205791592598,
"rewards/rejected": -0.24512553215026855,
"step": 185
},
{
"epoch": 1.7964444444444445,
"grad_norm": 46.19615936279297,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.6304856538772583,
"log_odds_ratio": -0.5355208516120911,
"logps/chosen": -0.8472241163253784,
"logps/rejected": -1.229255199432373,
"loss": 36.4206,
"nll_loss": 1.1382454633712769,
"po_loss": -0.00010128335270565003,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.16944481432437897,
"rewards/margins": 0.07640622556209564,
"rewards/rejected": -0.2458510398864746,
"step": 190
},
{
"epoch": 1.8438518518518519,
"grad_norm": 73.05342102050781,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 0.6382489204406738,
"log_odds_ratio": -0.5185028910636902,
"logps/chosen": -0.8499706983566284,
"logps/rejected": -1.2512637376785278,
"loss": 35.7376,
"nll_loss": 1.1169432401657104,
"po_loss": -0.00014365585229825228,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.16999416053295135,
"rewards/margins": 0.08025859296321869,
"rewards/rejected": -0.25025275349617004,
"step": 195
},
{
"epoch": 1.8912592592592592,
"grad_norm": 47.978912353515625,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.6197646260261536,
"log_odds_ratio": -0.5345317721366882,
"logps/chosen": -0.8643702268600464,
"logps/rejected": -1.2556793689727783,
"loss": 36.9782,
"nll_loss": 1.1558464765548706,
"po_loss": -0.0002769582497421652,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.1728740632534027,
"rewards/margins": 0.0782618373632431,
"rewards/rejected": -0.2511358857154846,
"step": 200
},
{
"epoch": 1.9386666666666668,
"grad_norm": 48.29682159423828,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.6153634190559387,
"log_odds_ratio": -0.5462952852249146,
"logps/chosen": -0.8348654508590698,
"logps/rejected": -1.204040765762329,
"loss": 35.4368,
"nll_loss": 1.107660174369812,
"po_loss": -0.0002589077048469335,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.16697311401367188,
"rewards/margins": 0.07383506745100021,
"rewards/rejected": -0.2408081740140915,
"step": 205
},
{
"epoch": 1.986074074074074,
"grad_norm": 37.39937973022461,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.46801671385765076,
"log_odds_ratio": -0.5808194875717163,
"logps/chosen": -0.8600652813911438,
"logps/rejected": -1.1320184469223022,
"loss": 37.1,
"nll_loss": 1.1594690084457397,
"po_loss": -9.401176066603512e-05,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.1720130741596222,
"rewards/margins": 0.05439060926437378,
"rewards/rejected": -0.22640366852283478,
"step": 210
},
{
"epoch": 2.0284444444444443,
"grad_norm": 60.70783233642578,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 0.8869153261184692,
"log_odds_ratio": -0.44785308837890625,
"logps/chosen": -0.6780868768692017,
"logps/rejected": -1.1526471376419067,
"loss": 26.1816,
"nll_loss": 0.9157966375350952,
"po_loss": -0.00035746488720178604,
"rewards/accuracies": 0.7902097702026367,
"rewards/chosen": -0.13561737537384033,
"rewards/margins": 0.09491205215454102,
"rewards/rejected": -0.23052944242954254,
"step": 215
},
{
"epoch": 2.075851851851852,
"grad_norm": 63.7738037109375,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 1.119178056716919,
"log_odds_ratio": -0.4020889699459076,
"logps/chosen": -0.6872093677520752,
"logps/rejected": -1.3119704723358154,
"loss": 28.6045,
"nll_loss": 0.8954778909683228,
"po_loss": -0.0015872030053287745,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.13744190335273743,
"rewards/margins": 0.12495221942663193,
"rewards/rejected": -0.26239413022994995,
"step": 220
},
{
"epoch": 2.1232592592592594,
"grad_norm": 51.26667022705078,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 1.191803216934204,
"log_odds_ratio": -0.3746866285800934,
"logps/chosen": -0.6712603569030762,
"logps/rejected": -1.3162505626678467,
"loss": 27.9395,
"nll_loss": 0.873916506767273,
"po_loss": -0.0008082756539806724,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.13425207138061523,
"rewards/margins": 0.1289980709552765,
"rewards/rejected": -0.26325011253356934,
"step": 225
},
{
"epoch": 2.1706666666666665,
"grad_norm": 50.845035552978516,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 1.267938494682312,
"log_odds_ratio": -0.37167197465896606,
"logps/chosen": -0.646305501461029,
"logps/rejected": -1.347239375114441,
"loss": 27.6319,
"nll_loss": 0.8649285435676575,
"po_loss": -0.0014302517520263791,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1292611062526703,
"rewards/margins": 0.14018678665161133,
"rewards/rejected": -0.2694478929042816,
"step": 230
},
{
"epoch": 2.218074074074074,
"grad_norm": 46.15715789794922,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 1.2005326747894287,
"log_odds_ratio": -0.37836360931396484,
"logps/chosen": -0.63874351978302,
"logps/rejected": -1.281273365020752,
"loss": 28.6655,
"nll_loss": 0.8964711427688599,
"po_loss": -0.0006741798715665936,
"rewards/accuracies": 0.871874988079071,
"rewards/chosen": -0.12774871289730072,
"rewards/margins": 0.12850596010684967,
"rewards/rejected": -0.2562546730041504,
"step": 235
},
{
"epoch": 2.2654814814814817,
"grad_norm": 41.55079650878906,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 1.3142430782318115,
"log_odds_ratio": -0.351533979177475,
"logps/chosen": -0.6246355772018433,
"logps/rejected": -1.360852599143982,
"loss": 26.9269,
"nll_loss": 0.8446337580680847,
"po_loss": -0.00316769746132195,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.12492714077234268,
"rewards/margins": 0.14724338054656982,
"rewards/rejected": -0.2721705138683319,
"step": 240
},
{
"epoch": 2.3128888888888888,
"grad_norm": 41.21688461303711,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.2483962774276733,
"log_odds_ratio": -0.35364586114883423,
"logps/chosen": -0.6280895471572876,
"logps/rejected": -1.3165141344070435,
"loss": 27.6397,
"nll_loss": 0.8649471402168274,
"po_loss": -0.0012058170977979898,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.12561790645122528,
"rewards/margins": 0.1376849114894867,
"rewards/rejected": -0.26330283284187317,
"step": 245
},
{
"epoch": 2.3602962962962963,
"grad_norm": 45.04114532470703,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 1.2950201034545898,
"log_odds_ratio": -0.34702420234680176,
"logps/chosen": -0.5998716354370117,
"logps/rejected": -1.2858121395111084,
"loss": 27.8242,
"nll_loss": 0.8703107833862305,
"po_loss": -0.0008051077020354569,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.11997435241937637,
"rewards/margins": 0.13718809187412262,
"rewards/rejected": -0.2571624219417572,
"step": 250
},
{
"epoch": 2.407703703703704,
"grad_norm": 42.57611083984375,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 1.4084466695785522,
"log_odds_ratio": -0.34137439727783203,
"logps/chosen": -0.6155047416687012,
"logps/rejected": -1.416010856628418,
"loss": 27.1897,
"nll_loss": 0.8556439280509949,
"po_loss": -0.005965453572571278,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.12310095131397247,
"rewards/margins": 0.16010123491287231,
"rewards/rejected": -0.2832021713256836,
"step": 255
},
{
"epoch": 2.455111111111111,
"grad_norm": 37.19707107543945,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 1.2491247653961182,
"log_odds_ratio": -0.35060206055641174,
"logps/chosen": -0.6111522912979126,
"logps/rejected": -1.2990696430206299,
"loss": 27.3026,
"nll_loss": 0.8542858958244324,
"po_loss": -0.001079150359146297,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.12223044782876968,
"rewards/margins": 0.13758344948291779,
"rewards/rejected": -0.25981390476226807,
"step": 260
},
{
"epoch": 2.5025185185185186,
"grad_norm": 46.44743347167969,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 1.178978681564331,
"log_odds_ratio": -0.3793484568595886,
"logps/chosen": -0.6664374470710754,
"logps/rejected": -1.3323941230773926,
"loss": 27.6656,
"nll_loss": 0.8664777874946594,
"po_loss": -0.0019290826749056578,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.1332874894142151,
"rewards/margins": 0.1331913322210312,
"rewards/rejected": -0.26647883653640747,
"step": 265
},
{
"epoch": 2.549925925925926,
"grad_norm": 44.04457473754883,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 1.312464952468872,
"log_odds_ratio": -0.3540407717227936,
"logps/chosen": -0.626907467842102,
"logps/rejected": -1.3609790802001953,
"loss": 26.6912,
"nll_loss": 0.8371696472167969,
"po_loss": -0.003068637801334262,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.1253814995288849,
"rewards/margins": 0.14681431651115417,
"rewards/rejected": -0.2721957862377167,
"step": 270
},
{
"epoch": 2.5973333333333333,
"grad_norm": 48.056907653808594,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 1.2681269645690918,
"log_odds_ratio": -0.35387295484542847,
"logps/chosen": -0.6462396383285522,
"logps/rejected": -1.3675159215927124,
"loss": 26.9575,
"nll_loss": 0.8445149660110474,
"po_loss": -0.0020923474803566933,
"rewards/accuracies": 0.890625,
"rewards/chosen": -0.12924793362617493,
"rewards/margins": 0.14425526559352875,
"rewards/rejected": -0.27350321412086487,
"step": 275
},
{
"epoch": 2.644740740740741,
"grad_norm": 44.935523986816406,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 1.1880494356155396,
"log_odds_ratio": -0.3788948953151703,
"logps/chosen": -0.626964271068573,
"logps/rejected": -1.2733209133148193,
"loss": 27.3931,
"nll_loss": 0.8574727177619934,
"po_loss": -0.0014386550756171346,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.1253928691148758,
"rewards/margins": 0.12927132844924927,
"rewards/rejected": -0.25466424226760864,
"step": 280
},
{
"epoch": 2.6921481481481484,
"grad_norm": 54.157283782958984,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 1.1967148780822754,
"log_odds_ratio": -0.3934488296508789,
"logps/chosen": -0.6908615231513977,
"logps/rejected": -1.3779170513153076,
"loss": 28.7151,
"nll_loss": 0.9004608392715454,
"po_loss": -0.003114379709586501,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.13817231357097626,
"rewards/margins": 0.13741108775138855,
"rewards/rejected": -0.2755833864212036,
"step": 285
},
{
"epoch": 2.7395555555555555,
"grad_norm": 40.38901901245117,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 1.2478935718536377,
"log_odds_ratio": -0.3714280128479004,
"logps/chosen": -0.6672550439834595,
"logps/rejected": -1.3506200313568115,
"loss": 27.6346,
"nll_loss": 0.8646379709243774,
"po_loss": -0.0010563342366367579,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.13345098495483398,
"rewards/margins": 0.1366730034351349,
"rewards/rejected": -0.27012401819229126,
"step": 290
},
{
"epoch": 2.786962962962963,
"grad_norm": 40.278404235839844,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 1.1103378534317017,
"log_odds_ratio": -0.39963197708129883,
"logps/chosen": -0.6754161715507507,
"logps/rejected": -1.2891783714294434,
"loss": 28.7924,
"nll_loss": 0.9006980061531067,
"po_loss": -0.0009352812776342034,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.13508322834968567,
"rewards/margins": 0.12275244295597076,
"rewards/rejected": -0.2578356862068176,
"step": 295
},
{
"epoch": 2.83437037037037,
"grad_norm": 43.64297866821289,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 1.26936936378479,
"log_odds_ratio": -0.37458792328834534,
"logps/chosen": -0.625272274017334,
"logps/rejected": -1.3025916814804077,
"loss": 27.2818,
"nll_loss": 0.8537141680717468,
"po_loss": -0.0011581950820982456,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.12505444884300232,
"rewards/margins": 0.1354638934135437,
"rewards/rejected": -0.2605183720588684,
"step": 300
},
{
"epoch": 2.8817777777777778,
"grad_norm": 49.83470153808594,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 1.3354085683822632,
"log_odds_ratio": -0.3548244833946228,
"logps/chosen": -0.6030293703079224,
"logps/rejected": -1.3301148414611816,
"loss": 26.8635,
"nll_loss": 0.8420251607894897,
"po_loss": -0.002539848443120718,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.12060587108135223,
"rewards/margins": 0.14541709423065186,
"rewards/rejected": -0.2660229504108429,
"step": 305
},
{
"epoch": 2.9291851851851853,
"grad_norm": 41.83040237426758,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 1.4439231157302856,
"log_odds_ratio": -0.3062170743942261,
"logps/chosen": -0.5867719650268555,
"logps/rejected": -1.3927805423736572,
"loss": 27.0062,
"nll_loss": 0.845768928527832,
"po_loss": -0.001825585961341858,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -0.1173543930053711,
"rewards/margins": 0.16120170056819916,
"rewards/rejected": -0.27855610847473145,
"step": 310
},
{
"epoch": 2.9765925925925925,
"grad_norm": 56.19439697265625,
"learning_rate": 0.0,
"log_odds_chosen": 1.2505788803100586,
"log_odds_ratio": -0.35724979639053345,
"logps/chosen": -0.5848164558410645,
"logps/rejected": -1.2180840969085693,
"loss": 25.9054,
"nll_loss": 0.8101914525032043,
"po_loss": -0.0006477964925579727,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.11696330457925797,
"rewards/margins": 0.1266535222530365,
"rewards/rejected": -0.24361681938171387,
"step": 315
},
{
"epoch": 2.9765925925925925,
"step": 315,
"total_flos": 0.0,
"train_loss": 67.51294788178943,
"train_runtime": 9693.4577,
"train_samples_per_second": 2.089,
"train_steps_per_second": 0.032
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}