{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9765925925925925, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 792.38525390625, "learning_rate": 7.8125e-06, "log_odds_chosen": -1.0633289813995361, "log_odds_ratio": -11.008821487426758, "logps/chosen": -22.71021270751953, "logps/rejected": -21.647119522094727, "loss": 313.0364, "nll_loss": 9.83913516998291, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.1355106830596924, "rewards/margins": -0.053154636174440384, "rewards/rejected": -1.0823559761047363, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 428.2666015625, "learning_rate": 1.5625e-05, "log_odds_chosen": -3.141080141067505, "log_odds_ratio": -11.092788696289062, "logps/chosen": -22.066173553466797, "logps/rejected": -18.925434112548828, "loss": 302.7381, "nll_loss": 8.76375961303711, "rewards/accuracies": 0.44062501192092896, "rewards/chosen": -1.1033086776733398, "rewards/margins": -0.1570369303226471, "rewards/rejected": -0.9462717175483704, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 328.1651611328125, "learning_rate": 2.34375e-05, "log_odds_chosen": -1.2912936210632324, "log_odds_ratio": -11.451318740844727, "logps/chosen": -22.174365997314453, "logps/rejected": -20.881633758544922, "loss": 242.2958, "nll_loss": 8.035699844360352, "rewards/accuracies": 0.515625, "rewards/chosen": -1.1087182760238647, "rewards/margins": -0.06463650614023209, "rewards/rejected": -1.0440818071365356, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 1597.0350341796875, "learning_rate": 3.125e-05, "log_odds_chosen": -4.8496503829956055, "log_odds_ratio": -10.526864051818848, "logps/chosen": -18.7955322265625, "logps/rejected": -13.947853088378906, "loss": 242.3466, "nll_loss": 6.613438606262207, "rewards/accuracies": 0.43437498807907104, "rewards/chosen": -0.9397767782211304, "rewards/margins": -0.24238400161266327, "rewards/rejected": -0.6973927021026611, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 696.0065307617188, "learning_rate": 3.90625e-05, "log_odds_chosen": -0.22220773994922638, "log_odds_ratio": -2.4632389545440674, "logps/chosen": -5.1920647621154785, "logps/rejected": -4.958856582641602, "loss": 94.1083, "nll_loss": 2.8615896701812744, "rewards/accuracies": 0.53125, "rewards/chosen": -0.25960323214530945, "rewards/margins": -0.011660419404506683, "rewards/rejected": -0.24794280529022217, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 305.71484375, "learning_rate": 4.6875e-05, "log_odds_chosen": 0.019041310995817184, "log_odds_ratio": -0.9295223355293274, "logps/chosen": -1.8664500713348389, "logps/rejected": -1.887586236000061, "loss": 64.8289, "nll_loss": 2.025871753692627, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.0933225229382515, "rewards/margins": 0.0010567905846983194, "rewards/rejected": -0.09437931329011917, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 324.5325622558594, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 0.1986609697341919, "log_odds_ratio": -0.8607286214828491, "logps/chosen": -1.753347635269165, "logps/rejected": -1.936212182044983, "loss": 61.1388, "nll_loss": 1.9106839895248413, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.08766736835241318, "rewards/margins": 0.00914324913173914, "rewards/rejected": -0.09681062400341034, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 89.67798614501953, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.2364165335893631, "log_odds_ratio": -0.7539523243904114, "logps/chosen": -1.4954068660736084, "logps/rejected": -1.6909542083740234, "loss": 57.2273, "nll_loss": 1.7883838415145874, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.07477033883333206, "rewards/margins": 0.009777378290891647, "rewards/rejected": -0.08454772084951401, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 317.7296142578125, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 0.18531295657157898, "log_odds_ratio": -0.748820424079895, "logps/chosen": -1.391809105873108, "logps/rejected": -1.5561792850494385, "loss": 53.7507, "nll_loss": 1.6797250509262085, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06959046423435211, "rewards/margins": 0.008218500763177872, "rewards/rejected": -0.07780896872282028, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 63.952606201171875, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.2320287525653839, "log_odds_ratio": -0.712664008140564, "logps/chosen": -1.2638026475906372, "logps/rejected": -1.4603520631790161, "loss": 49.7536, "nll_loss": 1.5548087358474731, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.06319012492895126, "rewards/margins": 0.009827475063502789, "rewards/rejected": -0.07301759719848633, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 91.46247863769531, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 0.20604506134986877, "log_odds_ratio": -0.7306782007217407, "logps/chosen": -1.2653909921646118, "logps/rejected": -1.4292025566101074, "loss": 50.2519, "nll_loss": 1.5703781843185425, "rewards/accuracies": 0.546875, "rewards/chosen": -0.06326955556869507, "rewards/margins": 0.008190581575036049, "rewards/rejected": -0.07146013528108597, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 75.0418930053711, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 0.1546681523323059, "log_odds_ratio": -0.7336845397949219, "logps/chosen": -1.2726503610610962, "logps/rejected": -1.4038974046707153, "loss": 50.9159, "nll_loss": 1.5911293029785156, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0636325255036354, "rewards/margins": 0.006562354508787394, "rewards/rejected": -0.07019487768411636, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 68.39701080322266, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 0.20255205035209656, "log_odds_ratio": -0.6995530128479004, "logps/chosen": -1.2141984701156616, "logps/rejected": -1.3691837787628174, "loss": 48.2158, "nll_loss": 1.5067507028579712, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.06070992350578308, "rewards/margins": 0.0077492655254900455, "rewards/rejected": -0.06845919787883759, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 72.79645538330078, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 0.2950156629085541, "log_odds_ratio": -0.6627887487411499, "logps/chosen": -1.174963355064392, "logps/rejected": -1.4096721410751343, "loss": 47.7687, "nll_loss": 1.4927794933319092, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.05874817445874214, "rewards/margins": 0.011735435575246811, "rewards/rejected": -0.07048360258340836, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 130.2486572265625, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 0.28017815947532654, "log_odds_ratio": -0.6927684545516968, "logps/chosen": -1.1511961221694946, "logps/rejected": -1.3688184022903442, "loss": 46.775, "nll_loss": 1.4617594480514526, "rewards/accuracies": 0.625, "rewards/chosen": -0.05755980685353279, "rewards/margins": 0.01088111661374569, "rewards/rejected": -0.06844092905521393, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 197.74847412109375, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 0.17523232102394104, "log_odds_ratio": -0.7097013592720032, "logps/chosen": -1.1325418949127197, "logps/rejected": -1.2717969417572021, "loss": 45.6116, "nll_loss": 1.4253653287887573, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.056627094745635986, "rewards/margins": 0.006962755229324102, "rewards/rejected": -0.06358985602855682, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 188.2830047607422, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 0.16755765676498413, "log_odds_ratio": -0.6960936188697815, "logps/chosen": -1.1235644817352295, "logps/rejected": -1.252456784248352, "loss": 46.597, "nll_loss": 1.4561591148376465, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.056178223341703415, "rewards/margins": 0.006444619502872229, "rewards/rejected": -0.06262283772230148, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 44.02568817138672, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 0.27673009037971497, "log_odds_ratio": -0.6601535677909851, "logps/chosen": -1.0445119142532349, "logps/rejected": -1.2381047010421753, "loss": 44.3561, "nll_loss": 1.3861300945281982, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.05222559720277786, "rewards/margins": 0.0096796415746212, "rewards/rejected": -0.06190522760152817, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 122.10076904296875, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 0.1390208899974823, "log_odds_ratio": -0.7249562740325928, "logps/chosen": -1.1228805780410767, "logps/rejected": -1.2305368185043335, "loss": 46.1332, "nll_loss": 1.4416650533676147, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.05614402890205383, "rewards/margins": 0.00538281537592411, "rewards/rejected": -0.06152684614062309, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 53.44792556762695, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 0.21734675765037537, "log_odds_ratio": -0.6752195358276367, "logps/chosen": -1.0842350721359253, "logps/rejected": -1.2408678531646729, "loss": 44.4538, "nll_loss": 1.3891844749450684, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.05421174690127373, "rewards/margins": 0.007831638678908348, "rewards/rejected": -0.062043387442827225, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 40.07997131347656, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 0.24603669345378876, "log_odds_ratio": -0.6888378262519836, "logps/chosen": -1.0828354358673096, "logps/rejected": -1.2879139184951782, "loss": 45.6011, "nll_loss": 1.4250457286834717, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05414177104830742, "rewards/margins": 0.010253915563225746, "rewards/rejected": -0.06439568847417831, "step": 105 }, { "epoch": 1.037925925925926, "grad_norm": 80.022216796875, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 0.4648367166519165, "log_odds_ratio": -0.5783078074455261, "logps/chosen": -0.9484525918960571, "logps/rejected": -1.244549036026001, "loss": 34.6927, "nll_loss": 1.2130365371704102, "rewards/accuracies": 0.688811182975769, "rewards/chosen": -0.047422636300325394, "rewards/margins": 0.014804825186729431, "rewards/rejected": -0.06222745403647423, "step": 110 }, { "epoch": 1.0853333333333333, "grad_norm": 70.07905578613281, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 0.659235954284668, "log_odds_ratio": -0.5348842740058899, "logps/chosen": -0.8617693185806274, "logps/rejected": -1.2707138061523438, "loss": 37.0131, "nll_loss": 1.156667947769165, "rewards/accuracies": 0.703125, "rewards/chosen": -0.04308847337961197, "rewards/margins": 0.020447226241230965, "rewards/rejected": -0.06353570520877838, "step": 115 }, { "epoch": 1.1327407407407408, "grad_norm": 47.251686096191406, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 0.5908142328262329, "log_odds_ratio": -0.5543237924575806, "logps/chosen": -0.878613293170929, "logps/rejected": -1.2573318481445312, "loss": 37.8893, "nll_loss": 1.1840603351593018, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.043930668383836746, "rewards/margins": 0.018935926258563995, "rewards/rejected": -0.06286659836769104, "step": 120 }, { "epoch": 1.1801481481481482, "grad_norm": 56.60850524902344, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 0.5747486352920532, "log_odds_ratio": -0.5479683876037598, "logps/chosen": -0.9593020677566528, "logps/rejected": -1.3153735399246216, "loss": 37.489, "nll_loss": 1.1715312004089355, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.04796510189771652, "rewards/margins": 0.017803579568862915, "rewards/rejected": -0.06576868146657944, "step": 125 }, { "epoch": 1.2275555555555555, "grad_norm": 54.802001953125, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 0.6732016205787659, "log_odds_ratio": -0.5319503545761108, "logps/chosen": -0.8832129240036011, "logps/rejected": -1.3126264810562134, "loss": 37.4911, "nll_loss": 1.1716387271881104, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04416064918041229, "rewards/margins": 0.021470673382282257, "rewards/rejected": -0.06563132256269455, "step": 130 }, { "epoch": 1.274962962962963, "grad_norm": 69.01300811767578, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 0.6136461496353149, "log_odds_ratio": -0.5238311290740967, "logps/chosen": -0.8545753359794617, "logps/rejected": -1.222575306892395, "loss": 36.2153, "nll_loss": 1.1317381858825684, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.042728766798973083, "rewards/margins": 0.018400002270936966, "rewards/rejected": -0.06112876534461975, "step": 135 }, { "epoch": 1.3223703703703704, "grad_norm": 54.12861251831055, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": 0.6623863577842712, "log_odds_ratio": -0.5327891111373901, "logps/chosen": -0.8905497789382935, "logps/rejected": -1.3153434991836548, "loss": 37.7196, "nll_loss": 1.1787680387496948, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.04452748969197273, "rewards/margins": 0.021239688619971275, "rewards/rejected": -0.06576718389987946, "step": 140 }, { "epoch": 1.3697777777777778, "grad_norm": 46.213619232177734, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 0.591569185256958, "log_odds_ratio": -0.5503125190734863, "logps/chosen": -0.8838316798210144, "logps/rejected": -1.2629492282867432, "loss": 37.9637, "nll_loss": 1.1863839626312256, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.04419158771634102, "rewards/margins": 0.018955877050757408, "rewards/rejected": -0.06314746290445328, "step": 145 }, { "epoch": 1.417185185185185, "grad_norm": 72.21317291259766, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 0.6160995960235596, "log_odds_ratio": -0.5348357558250427, "logps/chosen": -0.8305962681770325, "logps/rejected": -1.2168922424316406, "loss": 35.6404, "nll_loss": 1.113783359527588, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.041529811918735504, "rewards/margins": 0.01931479573249817, "rewards/rejected": -0.06084461882710457, "step": 150 }, { "epoch": 1.4645925925925927, "grad_norm": 52.31929397583008, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 0.6030026078224182, "log_odds_ratio": -0.5483356714248657, "logps/chosen": -0.8655644655227661, "logps/rejected": -1.2535353899002075, "loss": 36.7876, "nll_loss": 1.1496236324310303, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.043278224766254425, "rewards/margins": 0.01939854398369789, "rewards/rejected": -0.06267677247524261, "step": 155 }, { "epoch": 1.512, "grad_norm": 46.16996383666992, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 0.5530454516410828, "log_odds_ratio": -0.571170449256897, "logps/chosen": -0.8337518572807312, "logps/rejected": -1.1828572750091553, "loss": 35.8735, "nll_loss": 1.1210591793060303, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04168759286403656, "rewards/margins": 0.017455268651247025, "rewards/rejected": -0.05914286524057388, "step": 160 }, { "epoch": 1.5594074074074074, "grad_norm": 49.967220306396484, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 0.6309081315994263, "log_odds_ratio": -0.5347853899002075, "logps/chosen": -0.8237282037734985, "logps/rejected": -1.2079499959945679, "loss": 36.6764, "nll_loss": 1.146148920059204, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.041186410933732986, "rewards/margins": 0.019211089238524437, "rewards/rejected": -0.06039750576019287, "step": 165 }, { "epoch": 1.6068148148148147, "grad_norm": 48.000816345214844, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 0.6030667424201965, "log_odds_ratio": -0.5413313508033752, "logps/chosen": -0.8475804328918457, "logps/rejected": -1.2353817224502563, "loss": 36.4891, "nll_loss": 1.1402943134307861, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.042379021644592285, "rewards/margins": 0.019390063360333443, "rewards/rejected": -0.06176908686757088, "step": 170 }, { "epoch": 1.6542222222222223, "grad_norm": 44.61423873901367, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 0.686916172504425, "log_odds_ratio": -0.5163211822509766, "logps/chosen": -0.8324688076972961, "logps/rejected": -1.262216329574585, "loss": 35.1998, "nll_loss": 1.1000096797943115, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.041623443365097046, "rewards/margins": 0.02148738130927086, "rewards/rejected": -0.06311081349849701, "step": 175 }, { "epoch": 1.7016296296296296, "grad_norm": 45.969329833984375, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 0.5815957188606262, "log_odds_ratio": -0.545112133026123, "logps/chosen": -0.8381115794181824, "logps/rejected": -1.1991239786148071, "loss": 35.9405, "nll_loss": 1.1231504678726196, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.04190558195114136, "rewards/margins": 0.01805061474442482, "rewards/rejected": -0.05995619297027588, "step": 180 }, { "epoch": 1.749037037037037, "grad_norm": 41.58064651489258, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 0.4975903630256653, "log_odds_ratio": -0.5937719345092773, "logps/chosen": -0.8929702639579773, "logps/rejected": -1.2248613834381104, "loss": 36.0836, "nll_loss": 1.1276171207427979, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.044648513197898865, "rewards/margins": 0.01659456267952919, "rewards/rejected": -0.06124307960271835, "step": 185 }, { "epoch": 1.7964444444444445, "grad_norm": 60.331520080566406, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 0.614252507686615, "log_odds_ratio": -0.5398948788642883, "logps/chosen": -0.8563615679740906, "logps/rejected": -1.2327303886413574, "loss": 36.4939, "nll_loss": 1.1404454708099365, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04281807690858841, "rewards/margins": 0.01881844364106655, "rewards/rejected": -0.06163651868700981, "step": 190 }, { "epoch": 1.8438518518518519, "grad_norm": 75.5445327758789, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 0.6081385612487793, "log_odds_ratio": -0.5249617099761963, "logps/chosen": -0.8585560917854309, "logps/rejected": -1.2459484338760376, "loss": 35.8636, "nll_loss": 1.1207501888275146, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.042927805334329605, "rewards/margins": 0.019369617104530334, "rewards/rejected": -0.06229741498827934, "step": 195 }, { "epoch": 1.8912592592592592, "grad_norm": 44.663265228271484, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 0.6004494428634644, "log_odds_ratio": -0.5396888852119446, "logps/chosen": -0.8678308725357056, "logps/rejected": -1.2445733547210693, "loss": 36.9004, "nll_loss": 1.1531460285186768, "rewards/accuracies": 0.71875, "rewards/chosen": -0.04339154437184334, "rewards/margins": 0.01883712410926819, "rewards/rejected": -0.062228668481111526, "step": 200 }, { "epoch": 1.9386666666666668, "grad_norm": 43.89218521118164, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 0.6114708185195923, "log_odds_ratio": -0.5463000535964966, "logps/chosen": -0.837258517742157, "logps/rejected": -1.203930139541626, "loss": 35.4146, "nll_loss": 1.106716275215149, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.04186292737722397, "rewards/margins": 0.01833358407020569, "rewards/rejected": -0.06019651144742966, "step": 205 }, { "epoch": 1.986074074074074, "grad_norm": 42.78997039794922, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 0.4906364381313324, "log_odds_ratio": -0.5703830718994141, "logps/chosen": -0.847213625907898, "logps/rejected": -1.1322624683380127, "loss": 36.6807, "nll_loss": 1.146274209022522, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.042360685765743256, "rewards/margins": 0.014252434484660625, "rewards/rejected": -0.05661311745643616, "step": 210 }, { "epoch": 2.0284444444444443, "grad_norm": 103.51607513427734, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 0.9088935256004333, "log_odds_ratio": -0.4468950927257538, "logps/chosen": -0.6783780455589294, "logps/rejected": -1.1631968021392822, "loss": 26.0866, "nll_loss": 0.9121373295783997, "rewards/accuracies": 0.7797203063964844, "rewards/chosen": -0.03391890227794647, "rewards/margins": 0.024240940809249878, "rewards/rejected": -0.05815984681248665, "step": 215 }, { "epoch": 2.075851851851852, "grad_norm": 66.26596069335938, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 1.0980430841445923, "log_odds_ratio": -0.40422552824020386, "logps/chosen": -0.688414454460144, "logps/rejected": -1.2952059507369995, "loss": 28.453, "nll_loss": 0.889173686504364, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.03442072123289108, "rewards/margins": 0.03033958002924919, "rewards/rejected": -0.06476030498743057, "step": 220 }, { "epoch": 2.1232592592592594, "grad_norm": 49.28996658325195, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 1.1917976140975952, "log_odds_ratio": -0.3736167252063751, "logps/chosen": -0.6676262617111206, "logps/rejected": -1.322096586227417, "loss": 27.8484, "nll_loss": 0.8702806234359741, "rewards/accuracies": 0.84375, "rewards/chosen": -0.03338130936026573, "rewards/margins": 0.032723523676395416, "rewards/rejected": -0.06610482931137085, "step": 225 }, { "epoch": 2.1706666666666665, "grad_norm": 51.01750183105469, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 1.2477470636367798, "log_odds_ratio": -0.37411513924598694, "logps/chosen": -0.6474167108535767, "logps/rejected": -1.3315131664276123, "loss": 27.5889, "nll_loss": 0.8621736764907837, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.03237083554267883, "rewards/margins": 0.03420482575893402, "rewards/rejected": -0.06657566130161285, "step": 230 }, { "epoch": 2.218074074074074, "grad_norm": 44.26323699951172, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 1.2007415294647217, "log_odds_ratio": -0.38011908531188965, "logps/chosen": -0.6438184976577759, "logps/rejected": -1.2822532653808594, "loss": 28.5744, "nll_loss": 0.8929685354232788, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.03219092637300491, "rewards/margins": 0.031921736896038055, "rewards/rejected": -0.06411266326904297, "step": 235 }, { "epoch": 2.2654814814814817, "grad_norm": 40.39424514770508, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 1.2823920249938965, "log_odds_ratio": -0.35539510846138, "logps/chosen": -0.6238277554512024, "logps/rejected": -1.3376963138580322, "loss": 27.0898, "nll_loss": 0.846601665019989, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.03119138814508915, "rewards/margins": 0.03569342941045761, "rewards/rejected": -0.06688482314348221, "step": 240 }, { "epoch": 2.3128888888888888, "grad_norm": 39.28328323364258, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 1.199920892715454, "log_odds_ratio": -0.36062198877334595, "logps/chosen": -0.6361005902290344, "logps/rejected": -1.2936389446258545, "loss": 27.7325, "nll_loss": 0.8666588664054871, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.03180502727627754, "rewards/margins": 0.03287691995501518, "rewards/rejected": -0.06468195468187332, "step": 245 }, { "epoch": 2.3602962962962963, "grad_norm": 53.49985885620117, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 1.2467153072357178, "log_odds_ratio": -0.35257259011268616, "logps/chosen": -0.6029318571090698, "logps/rejected": -1.2543420791625977, "loss": 27.8856, "nll_loss": 0.8714394569396973, "rewards/accuracies": 0.859375, "rewards/chosen": -0.03014659322798252, "rewards/margins": 0.03257050737738609, "rewards/rejected": -0.06271710246801376, "step": 250 }, { "epoch": 2.407703703703704, "grad_norm": 37.84031295776367, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 1.3476929664611816, "log_odds_ratio": -0.34865802526474, "logps/chosen": -0.6192291975021362, "logps/rejected": -1.3746944665908813, "loss": 27.4652, "nll_loss": 0.8583399057388306, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.030961457639932632, "rewards/margins": 0.037773266434669495, "rewards/rejected": -0.06873472034931183, "step": 255 }, { "epoch": 2.455111111111111, "grad_norm": 38.391910552978516, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 1.2296994924545288, "log_odds_ratio": -0.3520565629005432, "logps/chosen": -0.6081915497779846, "logps/rejected": -1.2778961658477783, "loss": 27.1321, "nll_loss": 0.8478931188583374, "rewards/accuracies": 0.871874988079071, "rewards/chosen": -0.03040957823395729, "rewards/margins": 0.03348522633314133, "rewards/rejected": -0.06389480084180832, "step": 260 }, { "epoch": 2.5025185185185186, "grad_norm": 45.24284362792969, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 1.1500391960144043, "log_odds_ratio": -0.3759767413139343, "logps/chosen": -0.6576448082923889, "logps/rejected": -1.2901287078857422, "loss": 27.4943, "nll_loss": 0.8592169880867004, "rewards/accuracies": 0.859375, "rewards/chosen": -0.032882239669561386, "rewards/margins": 0.0316241979598999, "rewards/rejected": -0.06450643390417099, "step": 265 }, { "epoch": 2.549925925925926, "grad_norm": 39.35430908203125, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 1.2462385892868042, "log_odds_ratio": -0.36026865243911743, "logps/chosen": -0.634640097618103, "logps/rejected": -1.319314956665039, "loss": 27.0453, "nll_loss": 0.8451893925666809, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.03173200413584709, "rewards/margins": 0.03423374146223068, "rewards/rejected": -0.06596574932336807, "step": 270 }, { "epoch": 2.5973333333333333, "grad_norm": 42.9500846862793, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 1.219395399093628, "log_odds_ratio": -0.36237066984176636, "logps/chosen": -0.651611328125, "logps/rejected": -1.3371680974960327, "loss": 26.9673, "nll_loss": 0.8427650332450867, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.03258056566119194, "rewards/margins": 0.03427784517407417, "rewards/rejected": -0.06685841083526611, "step": 275 }, { "epoch": 2.644740740740741, "grad_norm": 44.0099983215332, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 1.150255799293518, "log_odds_ratio": -0.38557443022727966, "logps/chosen": -0.6256917715072632, "logps/rejected": -1.2452447414398193, "loss": 27.3044, "nll_loss": 0.8532875180244446, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.03128458559513092, "rewards/margins": 0.030977647751569748, "rewards/rejected": -0.062262244522571564, "step": 280 }, { "epoch": 2.6921481481481484, "grad_norm": 47.087276458740234, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 1.1619895696640015, "log_odds_ratio": -0.3987194895744324, "logps/chosen": -0.6959345936775208, "logps/rejected": -1.3641746044158936, "loss": 28.8061, "nll_loss": 0.9002305865287781, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.03479672595858574, "rewards/margins": 0.03341200202703476, "rewards/rejected": -0.0682087391614914, "step": 285 }, { "epoch": 2.7395555555555555, "grad_norm": 44.92427444458008, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 1.2260239124298096, "log_odds_ratio": -0.37591180205345154, "logps/chosen": -0.6699368357658386, "logps/rejected": -1.3378124237060547, "loss": 27.6595, "nll_loss": 0.8643766641616821, "rewards/accuracies": 0.84375, "rewards/chosen": -0.03349684178829193, "rewards/margins": 0.033393777906894684, "rewards/rejected": -0.06689061224460602, "step": 290 }, { "epoch": 2.786962962962963, "grad_norm": 37.753028869628906, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 1.0957053899765015, "log_odds_ratio": -0.40423646569252014, "logps/chosen": -0.6795234084129333, "logps/rejected": -1.2915849685668945, "loss": 28.8872, "nll_loss": 0.9027553796768188, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.03397617116570473, "rewards/margins": 0.030603080987930298, "rewards/rejected": -0.06457925587892532, "step": 295 }, { "epoch": 2.83437037037037, "grad_norm": 45.159854888916016, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 1.23939049243927, "log_odds_ratio": -0.377020925283432, "logps/chosen": -0.6346758604049683, "logps/rejected": -1.291849970817566, "loss": 27.6474, "nll_loss": 0.864010214805603, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.03173379227519035, "rewards/margins": 0.0328587107360363, "rewards/rejected": -0.06459251046180725, "step": 300 }, { "epoch": 2.8817777777777778, "grad_norm": 49.441314697265625, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 1.286255121231079, "log_odds_ratio": -0.3574092984199524, "logps/chosen": -0.599238395690918, "logps/rejected": -1.2815700769424438, "loss": 26.8262, "nll_loss": 0.8383495211601257, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.029961923137307167, "rewards/margins": 0.034116584807634354, "rewards/rejected": -0.06407850980758667, "step": 305 }, { "epoch": 2.9291851851851853, "grad_norm": 41.87241744995117, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 1.366567611694336, "log_odds_ratio": -0.3198963701725006, "logps/chosen": -0.6018384099006653, "logps/rejected": -1.3400609493255615, "loss": 26.929, "nll_loss": 0.8415498733520508, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -0.030091922730207443, "rewards/margins": 0.036911122500896454, "rewards/rejected": -0.0670030489563942, "step": 310 }, { "epoch": 2.9765925925925925, "grad_norm": 52.49939727783203, "learning_rate": 0.0, "log_odds_chosen": 1.2142304182052612, "log_odds_ratio": -0.3629140257835388, "logps/chosen": -0.5843847990036011, "logps/rejected": -1.1980851888656616, "loss": 25.8561, "nll_loss": 0.8080168962478638, "rewards/accuracies": 0.859375, "rewards/chosen": -0.029219243675470352, "rewards/margins": 0.030685018748044968, "rewards/rejected": -0.05990426614880562, "step": 315 }, { "epoch": 2.9765925925925925, "step": 315, "total_flos": 0.0, "train_loss": 53.04288567437066, "train_runtime": 9724.3197, "train_samples_per_second": 2.082, "train_steps_per_second": 0.032 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }