{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9987943737441393, "eval_steps": 400, "global_step": 466, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010716677829872739, "grad_norm": 38.81959429763923, "learning_rate": 6.382978723404255e-08, "logits/chosen": -2.397952079772949, "logits/rejected": -2.391846179962158, "logps/chosen": -0.5666699409484863, "logps/rejected": -0.5553711652755737, "loss": 1.5469, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5666699409484863, "rewards/margins": -0.01129874400794506, "rewards/rejected": -0.5553711652755737, "step": 5 }, { "epoch": 0.021433355659745478, "grad_norm": 17.957819802244767, "learning_rate": 1.276595744680851e-07, "logits/chosen": -2.402738571166992, "logits/rejected": -2.3730971813201904, "logps/chosen": -0.5517541766166687, "logps/rejected": -0.5785264372825623, "loss": 1.5538, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5517541766166687, "rewards/margins": 0.026772266253829002, "rewards/rejected": -0.5785264372825623, "step": 10 }, { "epoch": 0.032150033489618215, "grad_norm": 17.06492283094742, "learning_rate": 1.9148936170212767e-07, "logits/chosen": -2.4437928199768066, "logits/rejected": -2.449697732925415, "logps/chosen": -0.5636163353919983, "logps/rejected": -0.5669411420822144, "loss": 1.5619, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5636163353919983, "rewards/margins": 0.00332476943731308, "rewards/rejected": -0.5669411420822144, "step": 15 }, { "epoch": 0.042866711319490956, "grad_norm": 17.478232600769196, "learning_rate": 2.553191489361702e-07, "logits/chosen": -2.383941650390625, "logits/rejected": -2.3943183422088623, "logps/chosen": -0.5459321737289429, "logps/rejected": -0.5427771806716919, "loss": 1.5322, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5459321737289429, "rewards/margins": -0.00315500283613801, "rewards/rejected": -0.5427771806716919, "step": 20 }, { "epoch": 0.0535833891493637, "grad_norm": 14.134950451452564, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -2.2786340713500977, "logits/rejected": -2.2805464267730713, "logps/chosen": -0.5260549783706665, "logps/rejected": -0.5430394411087036, "loss": 1.5298, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5260549783706665, "rewards/margins": 0.016984451562166214, "rewards/rejected": -0.5430394411087036, "step": 25 }, { "epoch": 0.06430006697923643, "grad_norm": 19.57863908597214, "learning_rate": 3.8297872340425535e-07, "logits/chosen": -2.3897128105163574, "logits/rejected": -2.4030909538269043, "logps/chosen": -0.5465933680534363, "logps/rejected": -0.5372768640518188, "loss": 1.5509, "rewards/accuracies": 0.46875, "rewards/chosen": -0.5465933680534363, "rewards/margins": -0.009316539391875267, "rewards/rejected": -0.5372768640518188, "step": 30 }, { "epoch": 0.07501674480910918, "grad_norm": 24.218016837268095, "learning_rate": 4.4680851063829783e-07, "logits/chosen": -2.453273296356201, "logits/rejected": -2.424668788909912, "logps/chosen": -0.5341351628303528, "logps/rejected": -0.5890725255012512, "loss": 1.5479, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5341351628303528, "rewards/margins": 0.054937295615673065, "rewards/rejected": -0.5890725255012512, "step": 35 }, { "epoch": 0.08573342263898191, "grad_norm": 20.81509422651472, "learning_rate": 5.106382978723404e-07, "logits/chosen": -2.3677382469177246, "logits/rejected": -2.3493103981018066, "logps/chosen": -0.565592885017395, "logps/rejected": -0.5375810861587524, "loss": 1.5573, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.565592885017395, "rewards/margins": -0.02801181748509407, "rewards/rejected": -0.5375810861587524, "step": 40 }, { "epoch": 0.09645010046885466, "grad_norm": 17.50647386551691, "learning_rate": 5.74468085106383e-07, "logits/chosen": -2.3343653678894043, "logits/rejected": -2.32906436920166, "logps/chosen": -0.5402032732963562, "logps/rejected": -0.5591766238212585, "loss": 1.5421, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.5402032732963562, "rewards/margins": 0.018973344936966896, "rewards/rejected": -0.5591766238212585, "step": 45 }, { "epoch": 0.1071667782987274, "grad_norm": 16.375037037224466, "learning_rate": 5.999241095449976e-07, "logits/chosen": -2.3641974925994873, "logits/rejected": -2.3596482276916504, "logps/chosen": -0.5401940941810608, "logps/rejected": -0.525315523147583, "loss": 1.5284, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.5401940941810608, "rewards/margins": -0.01487857848405838, "rewards/rejected": -0.525315523147583, "step": 50 }, { "epoch": 0.11788345612860013, "grad_norm": 26.218018133925373, "learning_rate": 5.994604735812144e-07, "logits/chosen": -2.4210665225982666, "logits/rejected": -2.424318790435791, "logps/chosen": -0.565641462802887, "logps/rejected": -0.5864871740341187, "loss": 1.5392, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.565641462802887, "rewards/margins": 0.020845741033554077, "rewards/rejected": -0.5864871740341187, "step": 55 }, { "epoch": 0.12860013395847286, "grad_norm": 42.6456644243847, "learning_rate": 5.985760137627685e-07, "logits/chosen": -2.325913906097412, "logits/rejected": -2.3350510597229004, "logps/chosen": -0.4915548861026764, "logps/rejected": -0.5130532383918762, "loss": 1.5405, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.4915548861026764, "rewards/margins": 0.02149834856390953, "rewards/rejected": -0.5130532383918762, "step": 60 }, { "epoch": 0.13931681178834562, "grad_norm": 17.49728999516173, "learning_rate": 5.972719729975655e-07, "logits/chosen": -2.3687386512756348, "logits/rejected": -2.3732752799987793, "logps/chosen": -0.5264291167259216, "logps/rejected": -0.5606903433799744, "loss": 1.5441, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5264291167259216, "rewards/margins": 0.03426120802760124, "rewards/rejected": -0.5606903433799744, "step": 65 }, { "epoch": 0.15003348961821836, "grad_norm": 19.04975638080615, "learning_rate": 5.955501838194784e-07, "logits/chosen": -2.2692012786865234, "logits/rejected": -2.2734649181365967, "logps/chosen": -0.5329629778862, "logps/rejected": -0.585782527923584, "loss": 1.5335, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5329629778862, "rewards/margins": 0.05281956121325493, "rewards/rejected": -0.585782527923584, "step": 70 }, { "epoch": 0.1607501674480911, "grad_norm": 16.46150164067359, "learning_rate": 5.934130658131361e-07, "logits/chosen": -2.3084473609924316, "logits/rejected": -2.303145408630371, "logps/chosen": -0.4908691346645355, "logps/rejected": -0.5239783525466919, "loss": 1.5327, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4908691346645355, "rewards/margins": 0.03310922160744667, "rewards/rejected": -0.5239783525466919, "step": 75 }, { "epoch": 0.17146684527796383, "grad_norm": 21.613971984342516, "learning_rate": 5.908636222137454e-07, "logits/chosen": -2.291396141052246, "logits/rejected": -2.3133578300476074, "logps/chosen": -0.48883646726608276, "logps/rejected": -0.5628662109375, "loss": 1.5301, "rewards/accuracies": 0.625, "rewards/chosen": -0.48883646726608276, "rewards/margins": 0.07402969151735306, "rewards/rejected": -0.5628662109375, "step": 80 }, { "epoch": 0.18218352310783656, "grad_norm": 22.2008339670987, "learning_rate": 5.879054356867243e-07, "logits/chosen": -2.328059673309326, "logits/rejected": -2.3216350078582764, "logps/chosen": -0.5081610679626465, "logps/rejected": -0.5643308162689209, "loss": 1.5335, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5081610679626465, "rewards/margins": 0.05616975575685501, "rewards/rejected": -0.5643308162689209, "step": 85 }, { "epoch": 0.19290020093770932, "grad_norm": 19.64419890416597, "learning_rate": 5.84542663293077e-07, "logits/chosen": -2.272433280944824, "logits/rejected": -2.2766337394714355, "logps/chosen": -0.5117042660713196, "logps/rejected": -0.558184802532196, "loss": 1.5348, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5117042660713196, "rewards/margins": 0.046480584889650345, "rewards/rejected": -0.558184802532196, "step": 90 }, { "epoch": 0.20361687876758205, "grad_norm": 17.22227696113398, "learning_rate": 5.807800306475876e-07, "logits/chosen": -2.3275113105773926, "logits/rejected": -2.3395214080810547, "logps/chosen": -0.6069667935371399, "logps/rejected": -0.6348728537559509, "loss": 1.5217, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.6069667935371399, "rewards/margins": 0.027906125411391258, "rewards/rejected": -0.6348728537559509, "step": 95 }, { "epoch": 0.2143335565974548, "grad_norm": 24.396288528469587, "learning_rate": 5.766228252780373e-07, "logits/chosen": -2.368147373199463, "logits/rejected": -2.377194881439209, "logps/chosen": -0.5941327214241028, "logps/rejected": -0.6152836680412292, "loss": 1.5435, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.5941327214241028, "rewards/margins": 0.0211509857326746, "rewards/rejected": -0.6152836680412292, "step": 100 }, { "epoch": 0.22505023442732752, "grad_norm": 16.23048110894587, "learning_rate": 5.720768891947834e-07, "logits/chosen": -2.3831636905670166, "logits/rejected": -2.383808135986328, "logps/chosen": -0.5236924886703491, "logps/rejected": -0.5740348100662231, "loss": 1.517, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5236924886703491, "rewards/margins": 0.05034228041768074, "rewards/rejected": -0.5740348100662231, "step": 105 }, { "epoch": 0.23576691225720026, "grad_norm": 22.95053096013821, "learning_rate": 5.671486106811365e-07, "logits/chosen": -2.4293274879455566, "logits/rejected": -2.4386584758758545, "logps/chosen": -0.5232604146003723, "logps/rejected": -0.5748019218444824, "loss": 1.54, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5232604146003723, "rewards/margins": 0.051541589200496674, "rewards/rejected": -0.5748019218444824, "step": 110 }, { "epoch": 0.24648359008707302, "grad_norm": 30.43281833851228, "learning_rate": 5.618449153160763e-07, "logits/chosen": -2.521904945373535, "logits/rejected": -2.5165414810180664, "logps/chosen": -0.5098231434822083, "logps/rejected": -0.5272140502929688, "loss": 1.5652, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.5098231434822083, "rewards/margins": 0.01739095151424408, "rewards/rejected": -0.5272140502929688, "step": 115 }, { "epoch": 0.2572002679169457, "grad_norm": 18.218447276406668, "learning_rate": 5.56173256241918e-07, "logits/chosen": -2.6067259311676025, "logits/rejected": -2.594320774078369, "logps/chosen": -0.5535318851470947, "logps/rejected": -0.5515246987342834, "loss": 1.553, "rewards/accuracies": 0.5, "rewards/chosen": -0.5535318851470947, "rewards/margins": -0.002007170347496867, "rewards/rejected": -0.5515246987342834, "step": 120 }, { "epoch": 0.2679169457468185, "grad_norm": 19.047294066362046, "learning_rate": 5.501416036906106e-07, "logits/chosen": -2.599743366241455, "logits/rejected": -2.602095603942871, "logps/chosen": -0.5660097599029541, "logps/rejected": -0.5823434591293335, "loss": 1.5462, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5660097599029541, "rewards/margins": 0.01633365824818611, "rewards/rejected": -0.5823434591293335, "step": 125 }, { "epoch": 0.27863362357669125, "grad_norm": 25.91707976400477, "learning_rate": 5.437584337833803e-07, "logits/chosen": -2.6727051734924316, "logits/rejected": -2.6547203063964844, "logps/chosen": -0.5484704375267029, "logps/rejected": -0.5791813731193542, "loss": 1.5217, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.5484704375267029, "rewards/margins": 0.030710989609360695, "rewards/rejected": -0.5791813731193542, "step": 130 }, { "epoch": 0.289350301406564, "grad_norm": 24.930436449789845, "learning_rate": 5.370327166194635e-07, "logits/chosen": -2.6759390830993652, "logits/rejected": -2.688563346862793, "logps/chosen": -0.5326634049415588, "logps/rejected": -0.5721167325973511, "loss": 1.5358, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5326634049415588, "rewards/margins": 0.03945332020521164, "rewards/rejected": -0.5721167325973511, "step": 135 }, { "epoch": 0.3000669792364367, "grad_norm": 25.343122236521907, "learning_rate": 5.299739036706635e-07, "logits/chosen": -2.6917319297790527, "logits/rejected": -2.6787917613983154, "logps/chosen": -0.5328460931777954, "logps/rejected": -0.5668941736221313, "loss": 1.5162, "rewards/accuracies": 0.53125, "rewards/chosen": -0.5328460931777954, "rewards/margins": 0.034048013389110565, "rewards/rejected": -0.5668941736221313, "step": 140 }, { "epoch": 0.31078365706630945, "grad_norm": 20.469732566491704, "learning_rate": 5.225919144994487e-07, "logits/chosen": -2.7843101024627686, "logits/rejected": -2.7595150470733643, "logps/chosen": -0.5282408595085144, "logps/rejected": -0.5839791893959045, "loss": 1.5106, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5282408595085144, "rewards/margins": 0.05573834106326103, "rewards/rejected": -0.5839791893959045, "step": 145 }, { "epoch": 0.3215003348961822, "grad_norm": 22.910568604755916, "learning_rate": 5.148971228192543e-07, "logits/chosen": -2.769007921218872, "logits/rejected": -2.7604851722717285, "logps/chosen": -0.5120500326156616, "logps/rejected": -0.5518966913223267, "loss": 1.5387, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5120500326156616, "rewards/margins": 0.03984668105840683, "rewards/rejected": -0.5518966913223267, "step": 150 }, { "epoch": 0.3322170127260549, "grad_norm": 21.1001003359098, "learning_rate": 5.069003419165781e-07, "logits/chosen": -2.8498682975769043, "logits/rejected": -2.8591020107269287, "logps/chosen": -0.5589969754219055, "logps/rejected": -0.5984258651733398, "loss": 1.5176, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.5589969754219055, "rewards/margins": 0.039428871124982834, "rewards/rejected": -0.5984258651733398, "step": 155 }, { "epoch": 0.34293369055592765, "grad_norm": 26.259271435648458, "learning_rate": 4.986128094553569e-07, "logits/chosen": -2.8450496196746826, "logits/rejected": -2.8269691467285156, "logps/chosen": -0.5768808126449585, "logps/rejected": -0.6496576070785522, "loss": 1.5296, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.5768808126449585, "rewards/margins": 0.07277677953243256, "rewards/rejected": -0.6496576070785522, "step": 160 }, { "epoch": 0.3536503683858004, "grad_norm": 22.95727659422549, "learning_rate": 4.900461716849745e-07, "logits/chosen": -2.9189038276672363, "logits/rejected": -2.8916220664978027, "logps/chosen": -0.5302075147628784, "logps/rejected": -0.5729304552078247, "loss": 1.5205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5302075147628784, "rewards/margins": 0.042722832411527634, "rewards/rejected": -0.5729304552078247, "step": 165 }, { "epoch": 0.3643670462156731, "grad_norm": 17.76635841368691, "learning_rate": 4.812124670740974e-07, "logits/chosen": -2.9066505432128906, "logits/rejected": -2.910203456878662, "logps/chosen": -0.5274362564086914, "logps/rejected": -0.6013033986091614, "loss": 1.5096, "rewards/accuracies": 0.59375, "rewards/chosen": -0.5274362564086914, "rewards/margins": 0.0738670751452446, "rewards/rejected": -0.6013033986091614, "step": 170 }, { "epoch": 0.3750837240455459, "grad_norm": 22.708920984772448, "learning_rate": 4.7212410939333393e-07, "logits/chosen": -2.9743309020996094, "logits/rejected": -2.9548959732055664, "logps/chosen": -0.5641797780990601, "logps/rejected": -0.594096302986145, "loss": 1.5387, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5641797780990601, "rewards/margins": 0.029916446655988693, "rewards/rejected": -0.594096302986145, "step": 175 }, { "epoch": 0.38580040187541864, "grad_norm": 22.445712735196388, "learning_rate": 4.6279387027049207e-07, "logits/chosen": -3.0900559425354004, "logits/rejected": -3.091893434524536, "logps/chosen": -0.5932881236076355, "logps/rejected": -0.6334934234619141, "loss": 1.5474, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5932881236076355, "rewards/margins": 0.040205273777246475, "rewards/rejected": -0.6334934234619141, "step": 180 }, { "epoch": 0.3965170797052914, "grad_norm": 30.045592149378802, "learning_rate": 4.5323486124294974e-07, "logits/chosen": -3.1286568641662598, "logits/rejected": -3.1514105796813965, "logps/chosen": -0.5893043279647827, "logps/rejected": -0.6178286075592041, "loss": 1.52, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5893043279647827, "rewards/margins": 0.02852421998977661, "rewards/rejected": -0.6178286075592041, "step": 185 }, { "epoch": 0.4072337575351641, "grad_norm": 19.66079670307951, "learning_rate": 4.434605153323596e-07, "logits/chosen": -3.0138182640075684, "logits/rejected": -3.027487277984619, "logps/chosen": -0.5490652322769165, "logps/rejected": -0.7248018383979797, "loss": 1.5211, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5490652322769165, "rewards/margins": 0.17573660612106323, "rewards/rejected": -0.7248018383979797, "step": 190 }, { "epoch": 0.41795043536503684, "grad_norm": 23.76319559347926, "learning_rate": 4.334845681675802e-07, "logits/chosen": -3.263240098953247, "logits/rejected": -3.234492063522339, "logps/chosen": -0.523744523525238, "logps/rejected": -0.5552490949630737, "loss": 1.5291, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.523744523525238, "rewards/margins": 0.03150450438261032, "rewards/rejected": -0.5552490949630737, "step": 195 }, { "epoch": 0.4286671131949096, "grad_norm": 22.67820361436796, "learning_rate": 4.233210386823613e-07, "logits/chosen": -3.1373372077941895, "logits/rejected": -3.1572506427764893, "logps/chosen": -0.49712926149368286, "logps/rejected": -0.5306284427642822, "loss": 1.5161, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.49712926149368286, "rewards/margins": 0.033499158918857574, "rewards/rejected": -0.5306284427642822, "step": 200 }, { "epoch": 0.4393837910247823, "grad_norm": 26.07083589291384, "learning_rate": 4.129842094149083e-07, "logits/chosen": -3.277681827545166, "logits/rejected": -3.2762560844421387, "logps/chosen": -0.4885168969631195, "logps/rejected": -0.5218795537948608, "loss": 1.5331, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4885168969631195, "rewards/margins": 0.03336270898580551, "rewards/rejected": -0.5218795537948608, "step": 205 }, { "epoch": 0.45010046885465504, "grad_norm": 21.312251857603005, "learning_rate": 4.024886064370107e-07, "logits/chosen": -3.2972412109375, "logits/rejected": -3.2872118949890137, "logps/chosen": -0.509524941444397, "logps/rejected": -0.5547453761100769, "loss": 1.5062, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.509524941444397, "rewards/margins": 0.04522045701742172, "rewards/rejected": -0.5547453761100769, "step": 210 }, { "epoch": 0.4608171466845278, "grad_norm": 22.199858140310962, "learning_rate": 3.9184897894093836e-07, "logits/chosen": -3.3184287548065186, "logits/rejected": -3.3149967193603516, "logps/chosen": -0.5457042455673218, "logps/rejected": -0.5951209664344788, "loss": 1.4947, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5457042455673218, "rewards/margins": 0.049416683614254, "rewards/rejected": -0.5951209664344788, "step": 215 }, { "epoch": 0.4715338245144005, "grad_norm": 26.637536554337164, "learning_rate": 3.8108027851279425e-07, "logits/chosen": -3.3705334663391113, "logits/rejected": -3.348128080368042, "logps/chosen": -0.5251081585884094, "logps/rejected": -0.6058255434036255, "loss": 1.533, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5251081585884094, "rewards/margins": 0.08071742951869965, "rewards/rejected": -0.6058255434036255, "step": 220 }, { "epoch": 0.4822505023442733, "grad_norm": 20.985181328903547, "learning_rate": 3.701976381214462e-07, "logits/chosen": -3.441849946975708, "logits/rejected": -3.436166286468506, "logps/chosen": -0.5670623183250427, "logps/rejected": -0.6455426812171936, "loss": 1.5056, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.5670623183250427, "rewards/margins": 0.0784803032875061, "rewards/rejected": -0.6455426812171936, "step": 225 }, { "epoch": 0.49296718017414604, "grad_norm": 26.37006096745831, "learning_rate": 3.5921635085256784e-07, "logits/chosen": -3.3313636779785156, "logits/rejected": -3.316943407058716, "logps/chosen": -0.5642744898796082, "logps/rejected": -0.6056590676307678, "loss": 1.4952, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.5642744898796082, "rewards/margins": 0.041384514421224594, "rewards/rejected": -0.6056590676307678, "step": 230 }, { "epoch": 0.5036838580040187, "grad_norm": 21.351561473671772, "learning_rate": 3.4815184841767167e-07, "logits/chosen": -3.3021767139434814, "logits/rejected": -3.2890796661376953, "logps/chosen": -0.5324856042861938, "logps/rejected": -0.6350933313369751, "loss": 1.5016, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5324856042861938, "rewards/margins": 0.10260789096355438, "rewards/rejected": -0.6350933313369751, "step": 235 }, { "epoch": 0.5144005358338914, "grad_norm": 25.25074061630777, "learning_rate": 3.3701967946833387e-07, "logits/chosen": -3.2254951000213623, "logits/rejected": -3.232588529586792, "logps/chosen": -0.5533393621444702, "logps/rejected": -0.6351491212844849, "loss": 1.5163, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5533393621444702, "rewards/margins": 0.08180973678827286, "rewards/rejected": -0.6351491212844849, "step": 240 }, { "epoch": 0.5251172136637642, "grad_norm": 25.884047164441235, "learning_rate": 3.258354877460875e-07, "logits/chosen": -3.2459404468536377, "logits/rejected": -3.2240214347839355, "logps/chosen": -0.5927519202232361, "logps/rejected": -0.6273369193077087, "loss": 1.5018, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5927519202232361, "rewards/margins": 0.03458496928215027, "rewards/rejected": -0.6273369193077087, "step": 245 }, { "epoch": 0.535833891493637, "grad_norm": 24.128535335441807, "learning_rate": 3.1461499009868705e-07, "logits/chosen": -3.229731321334839, "logits/rejected": -3.2193870544433594, "logps/chosen": -0.6217538118362427, "logps/rejected": -0.6148039102554321, "loss": 1.5303, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6217538118362427, "rewards/margins": -0.006949885282665491, "rewards/rejected": -0.6148039102554321, "step": 250 }, { "epoch": 0.5465505693235098, "grad_norm": 22.132762654520068, "learning_rate": 3.033739543936404e-07, "logits/chosen": -3.251239776611328, "logits/rejected": -3.251615047454834, "logps/chosen": -0.5319584608078003, "logps/rejected": -0.6038156747817993, "loss": 1.4956, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5319584608078003, "rewards/margins": 0.0718572586774826, "rewards/rejected": -0.6038156747817993, "step": 255 }, { "epoch": 0.5572672471533825, "grad_norm": 20.467314286083344, "learning_rate": 2.921281773600424e-07, "logits/chosen": -3.172785520553589, "logits/rejected": -3.191011905670166, "logps/chosen": -0.5374451875686646, "logps/rejected": -0.6392644047737122, "loss": 1.5245, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5374451875686646, "rewards/margins": 0.10181926190853119, "rewards/rejected": -0.6392644047737122, "step": 260 }, { "epoch": 0.5679839249832552, "grad_norm": 21.60647847703085, "learning_rate": 2.808934623898511e-07, "logits/chosen": -3.1863551139831543, "logits/rejected": -3.1833932399749756, "logps/chosen": -0.5692937970161438, "logps/rejected": -0.6735215783119202, "loss": 1.4938, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5692937970161438, "rewards/margins": 0.10422778129577637, "rewards/rejected": -0.6735215783119202, "step": 265 }, { "epoch": 0.578700602813128, "grad_norm": 25.07152854176611, "learning_rate": 2.696855973298007e-07, "logits/chosen": -3.1901869773864746, "logits/rejected": -3.183385133743286, "logps/chosen": -0.536370575428009, "logps/rejected": -0.5907222628593445, "loss": 1.5085, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.536370575428009, "rewards/margins": 0.05435168743133545, "rewards/rejected": -0.5907222628593445, "step": 270 }, { "epoch": 0.5894172806430007, "grad_norm": 18.910155061928734, "learning_rate": 2.585203322951589e-07, "logits/chosen": -3.274017810821533, "logits/rejected": -3.2836010456085205, "logps/chosen": -0.4976142942905426, "logps/rejected": -0.5548876523971558, "loss": 1.5118, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4976142942905426, "rewards/margins": 0.05727345868945122, "rewards/rejected": -0.5548876523971558, "step": 275 }, { "epoch": 0.6001339584728734, "grad_norm": 21.74570057366724, "learning_rate": 2.47413357536509e-07, "logits/chosen": -3.2025809288024902, "logits/rejected": -3.1752185821533203, "logps/chosen": -0.569342851638794, "logps/rejected": -0.623855471611023, "loss": 1.5032, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.569342851638794, "rewards/margins": 0.054512638598680496, "rewards/rejected": -0.623855471611023, "step": 280 }, { "epoch": 0.6108506363027462, "grad_norm": 25.233010095872693, "learning_rate": 2.3638028139065624e-07, "logits/chosen": -3.230616331100464, "logits/rejected": -3.2365059852600098, "logps/chosen": -0.5778087973594666, "logps/rejected": -0.5905576944351196, "loss": 1.521, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5778087973594666, "rewards/margins": 0.012748857028782368, "rewards/rejected": -0.5905576944351196, "step": 285 }, { "epoch": 0.6215673141326189, "grad_norm": 52.99096570461333, "learning_rate": 2.2543660834664724e-07, "logits/chosen": -3.3016669750213623, "logits/rejected": -3.28556489944458, "logps/chosen": -0.5023082494735718, "logps/rejected": -0.5799704790115356, "loss": 1.496, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5023082494735718, "rewards/margins": 0.07766219973564148, "rewards/rejected": -0.5799704790115356, "step": 290 }, { "epoch": 0.6322839919624916, "grad_norm": 22.86414304018135, "learning_rate": 2.1459771725772267e-07, "logits/chosen": -3.2204766273498535, "logits/rejected": -3.243717908859253, "logps/chosen": -0.5589109063148499, "logps/rejected": -0.6346697807312012, "loss": 1.4948, "rewards/accuracies": 0.59375, "rewards/chosen": -0.5589109063148499, "rewards/margins": 0.07575888931751251, "rewards/rejected": -0.6346697807312012, "step": 295 }, { "epoch": 0.6430006697923644, "grad_norm": 24.245750925813, "learning_rate": 2.0387883972982259e-07, "logits/chosen": -3.3650691509246826, "logits/rejected": -3.367690324783325, "logps/chosen": -0.5385848879814148, "logps/rejected": -0.6212387681007385, "loss": 1.4876, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5385848879814148, "rewards/margins": 0.08265385776758194, "rewards/rejected": -0.6212387681007385, "step": 300 }, { "epoch": 0.6537173476222371, "grad_norm": 23.847848751632885, "learning_rate": 1.9329503871701592e-07, "logits/chosen": -3.309741973876953, "logits/rejected": -3.281573534011841, "logps/chosen": -0.5459524989128113, "logps/rejected": -0.5997665524482727, "loss": 1.497, "rewards/accuracies": 0.625, "rewards/chosen": -0.5459524989128113, "rewards/margins": 0.05381406098604202, "rewards/rejected": -0.5997665524482727, "step": 305 }, { "epoch": 0.6644340254521098, "grad_norm": 27.809403889861738, "learning_rate": 1.8286118735393015e-07, "logits/chosen": -3.3402085304260254, "logits/rejected": -3.3407912254333496, "logps/chosen": -0.5383692979812622, "logps/rejected": -0.5971530079841614, "loss": 1.4859, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5383692979812622, "rewards/margins": 0.058783747255802155, "rewards/rejected": -0.5971530079841614, "step": 310 }, { "epoch": 0.6751507032819826, "grad_norm": 20.70533413800174, "learning_rate": 1.7259194805493042e-07, "logits/chosen": -3.2731971740722656, "logits/rejected": -3.2823867797851562, "logps/chosen": -0.5446811318397522, "logps/rejected": -0.606468915939331, "loss": 1.4954, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5446811318397522, "rewards/margins": 0.06178779527544975, "rewards/rejected": -0.606468915939331, "step": 315 }, { "epoch": 0.6858673811118553, "grad_norm": 23.184720457564282, "learning_rate": 1.6250175190941725e-07, "logits/chosen": -3.3182265758514404, "logits/rejected": -3.2908051013946533, "logps/chosen": -0.5561486482620239, "logps/rejected": -0.5695281028747559, "loss": 1.5159, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5561486482620239, "rewards/margins": 0.0133795365691185, "rewards/rejected": -0.5695281028747559, "step": 320 }, { "epoch": 0.696584058941728, "grad_norm": 27.421134529600376, "learning_rate": 1.5260477840220057e-07, "logits/chosen": -3.309216260910034, "logits/rejected": -3.318588972091675, "logps/chosen": -0.5043013095855713, "logps/rejected": -0.5974953770637512, "loss": 1.4727, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5043013095855713, "rewards/margins": 0.09319403767585754, "rewards/rejected": -0.5974953770637512, "step": 325 }, { "epoch": 0.7073007367716008, "grad_norm": 24.907218358327214, "learning_rate": 1.4291493548744542e-07, "logits/chosen": -3.2981224060058594, "logits/rejected": -3.269383668899536, "logps/chosen": -0.5354940295219421, "logps/rejected": -0.5932218432426453, "loss": 1.492, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5354940295219421, "rewards/margins": 0.05772777646780014, "rewards/rejected": -0.5932218432426453, "step": 330 }, { "epoch": 0.7180174146014735, "grad_norm": 40.51550261895159, "learning_rate": 1.334458400441933e-07, "logits/chosen": -3.3821797370910645, "logits/rejected": -3.373931884765625, "logps/chosen": -0.5368712544441223, "logps/rejected": -0.6348738670349121, "loss": 1.5027, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5368712544441223, "rewards/margins": 0.09800264984369278, "rewards/rejected": -0.6348738670349121, "step": 335 }, { "epoch": 0.7287340924313462, "grad_norm": 25.305322827743687, "learning_rate": 1.2421079874092336e-07, "logits/chosen": -3.311006546020508, "logits/rejected": -3.2969226837158203, "logps/chosen": -0.5727181434631348, "logps/rejected": -0.6479278802871704, "loss": 1.516, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5727181434631348, "rewards/margins": 0.07520972192287445, "rewards/rejected": -0.6479278802871704, "step": 340 }, { "epoch": 0.739450770261219, "grad_norm": 22.209687488331966, "learning_rate": 1.1522278933604484e-07, "logits/chosen": -3.3381361961364746, "logits/rejected": -3.3484432697296143, "logps/chosen": -0.5813694596290588, "logps/rejected": -0.6797500848770142, "loss": 1.4932, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5813694596290588, "rewards/margins": 0.09838052839040756, "rewards/rejected": -0.6797500848770142, "step": 345 }, { "epoch": 0.7501674480910918, "grad_norm": 22.960003013518822, "learning_rate": 1.0649444244059717e-07, "logits/chosen": -3.32041597366333, "logits/rejected": -3.3459019660949707, "logps/chosen": -0.5482354164123535, "logps/rejected": -0.6205809712409973, "loss": 1.4932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5482354164123535, "rewards/margins": 0.07234560698270798, "rewards/rejected": -0.6205809712409973, "step": 350 }, { "epoch": 0.7608841259209645, "grad_norm": 25.349782143414405, "learning_rate": 9.803802376878795e-08, "logits/chosen": -3.3137733936309814, "logits/rejected": -3.3020172119140625, "logps/chosen": -0.5893365740776062, "logps/rejected": -0.6166855096817017, "loss": 1.4957, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.5893365740776062, "rewards/margins": 0.02734885737299919, "rewards/rejected": -0.6166855096817017, "step": 355 }, { "epoch": 0.7716008037508373, "grad_norm": 22.556111949917664, "learning_rate": 8.98654169013098e-08, "logits/chosen": -3.3032424449920654, "logits/rejected": -3.288992404937744, "logps/chosen": -0.521614670753479, "logps/rejected": -0.5875999331474304, "loss": 1.5004, "rewards/accuracies": 0.59375, "rewards/chosen": -0.521614670753479, "rewards/margins": 0.06598522514104843, "rewards/rejected": -0.5875999331474304, "step": 360 }, { "epoch": 0.78231748158071, "grad_norm": 23.52734286974739, "learning_rate": 8.198810658566058e-08, "logits/chosen": -3.3537094593048096, "logits/rejected": -3.348142147064209, "logps/chosen": -0.5639868974685669, "logps/rejected": -0.6276763677597046, "loss": 1.5179, "rewards/accuracies": 0.625, "rewards/chosen": -0.5639868974685669, "rewards/margins": 0.06368952244520187, "rewards/rejected": -0.6276763677597046, "step": 365 }, { "epoch": 0.7930341594105828, "grad_norm": 35.138524177158146, "learning_rate": 7.441716259693182e-08, "logits/chosen": -3.3430557250976562, "logits/rejected": -3.365880250930786, "logps/chosen": -0.5710283517837524, "logps/rejected": -0.6515873670578003, "loss": 1.5075, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5710283517837524, "rewards/margins": 0.08055897057056427, "rewards/rejected": -0.6515873670578003, "step": 370 }, { "epoch": 0.8037508372404555, "grad_norm": 23.46216038728244, "learning_rate": 6.716322418174835e-08, "logits/chosen": -3.309415102005005, "logits/rejected": -3.2904000282287598, "logps/chosen": -0.6181533336639404, "logps/rejected": -0.7165501713752747, "loss": 1.4916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6181533336639404, "rewards/margins": 0.09839687496423721, "rewards/rejected": -0.7165501713752747, "step": 375 }, { "epoch": 0.8144675150703282, "grad_norm": 26.38220306063447, "learning_rate": 6.023648510721696e-08, "logits/chosen": -3.391897678375244, "logits/rejected": -3.358309268951416, "logps/chosen": -0.5505380630493164, "logps/rejected": -0.6369754076004028, "loss": 1.5034, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.5505380630493164, "rewards/margins": 0.08643738180398941, "rewards/rejected": -0.6369754076004028, "step": 380 }, { "epoch": 0.825184192900201, "grad_norm": 24.084433663112158, "learning_rate": 5.364667933589596e-08, "logits/chosen": -3.2913315296173096, "logits/rejected": -3.3047938346862793, "logps/chosen": -0.5729898810386658, "logps/rejected": -0.6550789475440979, "loss": 1.4915, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5729898810386658, "rewards/margins": 0.08208902180194855, "rewards/rejected": -0.6550789475440979, "step": 385 }, { "epoch": 0.8359008707300737, "grad_norm": 25.406153364539314, "learning_rate": 4.74030673469165e-08, "logits/chosen": -3.3330624103546143, "logits/rejected": -3.293489933013916, "logps/chosen": -0.613795280456543, "logps/rejected": -0.6435045003890991, "loss": 1.5266, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.613795280456543, "rewards/margins": 0.029709184542298317, "rewards/rejected": -0.6435045003890991, "step": 390 }, { "epoch": 0.8466175485599464, "grad_norm": 24.427651638617984, "learning_rate": 4.1514423122476606e-08, "logits/chosen": -3.3390536308288574, "logits/rejected": -3.3218231201171875, "logps/chosen": -0.5534718632698059, "logps/rejected": -0.600857138633728, "loss": 1.507, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5534718632698059, "rewards/margins": 0.04738527163863182, "rewards/rejected": -0.600857138633728, "step": 395 }, { "epoch": 0.8573342263898192, "grad_norm": 26.673472503173954, "learning_rate": 3.598902181799717e-08, "logits/chosen": -3.298213481903076, "logits/rejected": -3.2662785053253174, "logps/chosen": -0.5154682397842407, "logps/rejected": -0.6383693814277649, "loss": 1.4828, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5154682397842407, "rewards/margins": 0.12290115654468536, "rewards/rejected": -0.6383693814277649, "step": 400 }, { "epoch": 0.8573342263898192, "eval_logits/chosen": -3.409546136856079, "eval_logits/rejected": -3.403578758239746, "eval_logps/chosen": -0.5692862868309021, "eval_logps/rejected": -0.6257904171943665, "eval_loss": 1.5201373100280762, "eval_rewards/accuracies": 0.5691489577293396, "eval_rewards/chosen": -0.5692862868309021, "eval_rewards/margins": 0.05650414153933525, "eval_rewards/rejected": -0.6257904171943665, "eval_runtime": 432.4468, "eval_samples_per_second": 6.923, "eval_steps_per_second": 0.435, "step": 400 }, { "epoch": 0.8680509042196919, "grad_norm": 24.460303851950272, "learning_rate": 3.0834628133265293e-08, "logits/chosen": -3.308946132659912, "logits/rejected": -3.293513536453247, "logps/chosen": -0.5684244632720947, "logps/rejected": -0.6289744973182678, "loss": 1.5056, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.5684244632720947, "rewards/margins": 0.06054999679327011, "rewards/rejected": -0.6289744973182678, "step": 405 }, { "epoch": 0.8787675820495646, "grad_norm": 22.9693279472293, "learning_rate": 2.6058485400908248e-08, "logits/chosen": -3.358743190765381, "logits/rejected": -3.3271114826202393, "logps/chosen": -0.5511162877082825, "logps/rejected": -0.5816215872764587, "loss": 1.4873, "rewards/accuracies": 0.53125, "rewards/chosen": -0.5511162877082825, "rewards/margins": 0.030505258589982986, "rewards/rejected": -0.5816215872764587, "step": 410 }, { "epoch": 0.8894842598794374, "grad_norm": 25.421672297355457, "learning_rate": 2.1667305407530255e-08, "logits/chosen": -3.2762393951416016, "logits/rejected": -3.2448742389678955, "logps/chosen": -0.548682689666748, "logps/rejected": -0.6257365942001343, "loss": 1.4819, "rewards/accuracies": 0.65625, "rewards/chosen": -0.548682689666748, "rewards/margins": 0.07705400884151459, "rewards/rejected": -0.6257365942001343, "step": 415 }, { "epoch": 0.9002009377093101, "grad_norm": 27.04902626819421, "learning_rate": 1.7667258961816723e-08, "logits/chosen": -3.2720954418182373, "logits/rejected": -3.2802345752716064, "logps/chosen": -0.5331937074661255, "logps/rejected": -0.573731541633606, "loss": 1.5172, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.5331937074661255, "rewards/margins": 0.04053787142038345, "rewards/rejected": -0.573731541633606, "step": 420 }, { "epoch": 0.9109176155391828, "grad_norm": 27.138507466839215, "learning_rate": 1.4063967222860872e-08, "logits/chosen": -3.280641555786133, "logits/rejected": -3.268662214279175, "logps/chosen": -0.5173559188842773, "logps/rejected": -0.6271434426307678, "loss": 1.4903, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5173559188842773, "rewards/margins": 0.10978756844997406, "rewards/rejected": -0.6271434426307678, "step": 425 }, { "epoch": 0.9216342933690556, "grad_norm": 36.01846846843038, "learning_rate": 1.086249380089782e-08, "logits/chosen": -3.33141827583313, "logits/rejected": -3.3434956073760986, "logps/chosen": -0.615702748298645, "logps/rejected": -0.7053866386413574, "loss": 1.4914, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.615702748298645, "rewards/margins": 0.0896839126944542, "rewards/rejected": -0.7053866386413574, "step": 430 }, { "epoch": 0.9323509711989283, "grad_norm": 29.36505146378418, "learning_rate": 8.067337641547777e-09, "logits/chosen": -3.3964333534240723, "logits/rejected": -3.4042282104492188, "logps/chosen": -0.5159146189689636, "logps/rejected": -0.6444130539894104, "loss": 1.4866, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5159146189689636, "rewards/margins": 0.12849843502044678, "rewards/rejected": -0.6444130539894104, "step": 435 }, { "epoch": 0.943067649028801, "grad_norm": 23.999063503219116, "learning_rate": 5.682426703567034e-09, "logits/chosen": -3.2346031665802, "logits/rejected": -3.2309417724609375, "logps/chosen": -0.52639240026474, "logps/rejected": -0.6208442449569702, "loss": 1.4676, "rewards/accuracies": 0.625, "rewards/chosen": -0.52639240026474, "rewards/margins": 0.09445185959339142, "rewards/rejected": -0.6208442449569702, "step": 440 }, { "epoch": 0.9537843268586738, "grad_norm": 28.590598764799935, "learning_rate": 3.7111124389918146e-09, "logits/chosen": -3.2654852867126465, "logits/rejected": -3.264702558517456, "logps/chosen": -0.5419159531593323, "logps/rejected": -0.6305166482925415, "loss": 1.4876, "rewards/accuracies": 0.59375, "rewards/chosen": -0.5419159531593323, "rewards/margins": 0.08860062062740326, "rewards/rejected": -0.6305166482925415, "step": 445 }, { "epoch": 0.9645010046885466, "grad_norm": 23.071429282207497, "learning_rate": 2.156165083431627e-09, "logits/chosen": -3.2962241172790527, "logits/rejected": -3.283967971801758, "logps/chosen": -0.5432751774787903, "logps/rejected": -0.6289895176887512, "loss": 1.4804, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5432751774787903, "rewards/margins": 0.08571438491344452, "rewards/rejected": -0.6289895176887512, "step": 450 }, { "epoch": 0.9752176825184193, "grad_norm": 25.76239166835448, "learning_rate": 1.019769763130851e-09, "logits/chosen": -3.2721996307373047, "logits/rejected": -3.271106243133545, "logps/chosen": -0.5389679670333862, "logps/rejected": -0.6317923665046692, "loss": 1.4855, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5389679670333862, "rewards/margins": 0.09282433986663818, "rewards/rejected": -0.6317923665046692, "step": 455 }, { "epoch": 0.9859343603482921, "grad_norm": 29.67085990506561, "learning_rate": 3.0352342426868125e-10, "logits/chosen": -3.287473678588867, "logits/rejected": -3.29301381111145, "logps/chosen": -0.5480049252510071, "logps/rejected": -0.6534655690193176, "loss": 1.4972, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5480049252510071, "rewards/margins": 0.10546054691076279, "rewards/rejected": -0.6534655690193176, "step": 460 }, { "epoch": 0.9966510381781648, "grad_norm": 29.944439651933386, "learning_rate": 8.432588813089836e-12, "logits/chosen": -3.3211536407470703, "logits/rejected": -3.304069995880127, "logps/chosen": -0.583086371421814, "logps/rejected": -0.6369927525520325, "loss": 1.5004, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.583086371421814, "rewards/margins": 0.05390629172325134, "rewards/rejected": -0.6369927525520325, "step": 465 }, { "epoch": 0.9987943737441393, "step": 466, "total_flos": 0.0, "train_loss": 1.5151263257976253, "train_runtime": 19305.9847, "train_samples_per_second": 3.093, "train_steps_per_second": 0.024 } ], "logging_steps": 5, "max_steps": 466, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }