{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02093692750588851, "grad_norm": 85.02439880371094, "learning_rate": 3.333333333333333e-07, "logits/chosen": -3.096651315689087, "logits/rejected": -3.0814244747161865, "logps/chosen": -295.3846130371094, "logps/rejected": -279.3940124511719, "loss": 0.692, "rewards/accuracies": 0.4281249940395355, "rewards/chosen": 0.002148410538211465, "rewards/margins": 0.004025185946375132, "rewards/rejected": -0.0018767757574096322, "step": 10 }, { "epoch": 0.04187385501177702, "grad_norm": 74.03569030761719, "learning_rate": 4.998555145953054e-07, "logits/chosen": -3.083890199661255, "logits/rejected": -3.068505048751831, "logps/chosen": -278.1134338378906, "logps/rejected": -266.706298828125, "loss": 0.6728, "rewards/accuracies": 0.628125011920929, "rewards/chosen": 0.011356602422893047, "rewards/margins": 0.07497048377990723, "rewards/rejected": -0.0636138841509819, "step": 20 }, { "epoch": 0.06281078251766553, "grad_norm": 67.47853088378906, "learning_rate": 4.98700633214251e-07, "logits/chosen": -3.0271506309509277, "logits/rejected": -3.0370867252349854, "logps/chosen": -246.0901336669922, "logps/rejected": -250.2740478515625, "loss": 0.6305, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": 0.018177634105086327, "rewards/margins": 0.28142982721328735, "rewards/rejected": -0.2632521986961365, "step": 30 }, { "epoch": 0.08374771002355404, "grad_norm": 75.60296630859375, "learning_rate": 4.963962085412632e-07, "logits/chosen": -3.030393123626709, "logits/rejected": -3.009413242340088, "logps/chosen": -298.85662841796875, "logps/rejected": -275.070068359375, "loss": 0.6267, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03324083238840103, "rewards/margins": 0.2483668327331543, "rewards/rejected": -0.28160765767097473, "step": 40 }, { "epoch": 0.10468463752944256, "grad_norm": 69.39188385009766, "learning_rate": 4.929528920808854e-07, "logits/chosen": -3.052746534347534, "logits/rejected": -3.066401720046997, "logps/chosen": -281.92706298828125, "logps/rejected": -246.51901245117188, "loss": 0.6084, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.020495222881436348, "rewards/margins": 0.40510186553001404, "rewards/rejected": -0.42559710144996643, "step": 50 }, { "epoch": 0.12562156503533106, "grad_norm": 83.05278015136719, "learning_rate": 4.883865995197318e-07, "logits/chosen": -3.035808563232422, "logits/rejected": -3.0392653942108154, "logps/chosen": -290.5362548828125, "logps/rejected": -272.5738830566406, "loss": 0.5792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.049367621541023254, "rewards/margins": 0.44638770818710327, "rewards/rejected": -0.49575528502464294, "step": 60 }, { "epoch": 0.14655849254121958, "grad_norm": 68.99510955810547, "learning_rate": 4.82718437161051e-07, "logits/chosen": -3.0192034244537354, "logits/rejected": -3.006897449493408, "logps/chosen": -265.6653747558594, "logps/rejected": -260.2899169921875, "loss": 0.5846, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.11491725593805313, "rewards/margins": 0.38759148120880127, "rewards/rejected": -0.5025087594985962, "step": 70 }, { "epoch": 0.16749542004710807, "grad_norm": 63.006248474121094, "learning_rate": 4.7597460436723613e-07, "logits/chosen": -3.007894992828369, "logits/rejected": -2.984534740447998, "logps/chosen": -291.2572326660156, "logps/rejected": -261.5260009765625, "loss": 0.5843, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1174750104546547, "rewards/margins": 0.4169933795928955, "rewards/rejected": -0.5344683527946472, "step": 80 }, { "epoch": 0.1884323475529966, "grad_norm": 69.54000854492188, "learning_rate": 4.68186272461214e-07, "logits/chosen": -3.0481808185577393, "logits/rejected": -3.036348819732666, "logps/chosen": -273.8735656738281, "logps/rejected": -258.81866455078125, "loss": 0.5849, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.09798178821802139, "rewards/margins": 0.40805816650390625, "rewards/rejected": -0.5060399770736694, "step": 90 }, { "epoch": 0.2093692750588851, "grad_norm": 75.06998443603516, "learning_rate": 4.593894406464536e-07, "logits/chosen": -3.038364887237549, "logits/rejected": -3.0354368686676025, "logps/chosen": -296.1470031738281, "logps/rejected": -286.38592529296875, "loss": 0.5834, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08966656774282455, "rewards/margins": 0.5078560709953308, "rewards/rejected": -0.5975226759910583, "step": 100 }, { "epoch": 0.23030620256477363, "grad_norm": 137.9207305908203, "learning_rate": 4.496247696115597e-07, "logits/chosen": -3.039151191711426, "logits/rejected": -3.0391647815704346, "logps/chosen": -303.8061828613281, "logps/rejected": -295.7118225097656, "loss": 0.5804, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.07505225390195847, "rewards/margins": 0.6039966344833374, "rewards/rejected": -0.6790488958358765, "step": 110 }, { "epoch": 0.2512431300706621, "grad_norm": 77.84745788574219, "learning_rate": 4.3893739358856455e-07, "logits/chosen": -3.008737087249756, "logits/rejected": -2.9903557300567627, "logps/chosen": -305.4298095703125, "logps/rejected": -278.39947509765625, "loss": 0.5582, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.14889295399188995, "rewards/margins": 0.5994052886962891, "rewards/rejected": -0.7482982277870178, "step": 120 }, { "epoch": 0.2721800575765506, "grad_norm": 67.5359115600586, "learning_rate": 4.273767117336217e-07, "logits/chosen": -3.0301320552825928, "logits/rejected": -3.012173891067505, "logps/chosen": -308.94891357421875, "logps/rejected": -295.3975524902344, "loss": 0.5478, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.14121344685554504, "rewards/margins": 0.6831844449043274, "rewards/rejected": -0.82439786195755, "step": 130 }, { "epoch": 0.29311698508243916, "grad_norm": 70.47966766357422, "learning_rate": 4.1499615979437983e-07, "logits/chosen": -2.9864563941955566, "logits/rejected": -2.9899039268493652, "logps/chosen": -279.08477783203125, "logps/rejected": -257.7115173339844, "loss": 0.5548, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09717626124620438, "rewards/margins": 0.624592661857605, "rewards/rejected": -0.7217689752578735, "step": 140 }, { "epoch": 0.31405391258832765, "grad_norm": 90.4140396118164, "learning_rate": 4.018529631194369e-07, "logits/chosen": -2.9848761558532715, "logits/rejected": -2.9709620475769043, "logps/chosen": -281.3067932128906, "logps/rejected": -271.0277099609375, "loss": 0.5703, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.2502523362636566, "rewards/margins": 0.6211402416229248, "rewards/rejected": -0.871392548084259, "step": 150 }, { "epoch": 0.33499084009421615, "grad_norm": 68.7781753540039, "learning_rate": 3.8800787215151164e-07, "logits/chosen": -3.032036066055298, "logits/rejected": -3.009941339492798, "logps/chosen": -321.748779296875, "logps/rejected": -281.04107666015625, "loss": 0.5392, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.12777641415596008, "rewards/margins": 0.6283574104309082, "rewards/rejected": -0.7561337947845459, "step": 160 }, { "epoch": 0.3559277676001047, "grad_norm": 66.1634292602539, "learning_rate": 3.7352488162693715e-07, "logits/chosen": -3.0462286472320557, "logits/rejected": -3.030794620513916, "logps/chosen": -274.5036926269531, "logps/rejected": -251.90499877929688, "loss": 0.5505, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.12955203652381897, "rewards/margins": 0.6119082570075989, "rewards/rejected": -0.7414603233337402, "step": 170 }, { "epoch": 0.3768646951059932, "grad_norm": 75.37867736816406, "learning_rate": 3.584709347793895e-07, "logits/chosen": -3.058922052383423, "logits/rejected": -3.0691912174224854, "logps/chosen": -301.69635009765625, "logps/rejected": -248.55593872070312, "loss": 0.5508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2145983725786209, "rewards/margins": 0.5311049222946167, "rewards/rejected": -0.7457033395767212, "step": 180 }, { "epoch": 0.39780162261188173, "grad_norm": 75.07308959960938, "learning_rate": 3.4291561391508185e-07, "logits/chosen": -3.0233283042907715, "logits/rejected": -3.0086400508880615, "logps/chosen": -278.5184326171875, "logps/rejected": -270.7456970214844, "loss": 0.5632, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28689366579055786, "rewards/margins": 0.6087759733200073, "rewards/rejected": -0.8956696391105652, "step": 190 }, { "epoch": 0.4187385501177702, "grad_norm": 71.18640899658203, "learning_rate": 3.2693081878964544e-07, "logits/chosen": -3.0013060569763184, "logits/rejected": -3.005615472793579, "logps/chosen": -292.04852294921875, "logps/rejected": -276.50811767578125, "loss": 0.5475, "rewards/accuracies": 0.75, "rewards/chosen": -0.20541390776634216, "rewards/margins": 0.6916528940200806, "rewards/rejected": -0.8970667719841003, "step": 200 }, { "epoch": 0.4396754776236587, "grad_norm": 85.28279113769531, "learning_rate": 3.1059043427330314e-07, "logits/chosen": -2.9617443084716797, "logits/rejected": -2.9682388305664062, "logps/chosen": -261.1861572265625, "logps/rejected": -263.7696838378906, "loss": 0.533, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.204990416765213, "rewards/margins": 0.7386445999145508, "rewards/rejected": -0.9436351656913757, "step": 210 }, { "epoch": 0.46061240512954726, "grad_norm": 70.95091247558594, "learning_rate": 2.9396998884045234e-07, "logits/chosen": -3.0342681407928467, "logits/rejected": -3.040320873260498, "logps/chosen": -300.98077392578125, "logps/rejected": -272.7954406738281, "loss": 0.5389, "rewards/accuracies": 0.734375, "rewards/chosen": -0.25737327337265015, "rewards/margins": 0.695563018321991, "rewards/rejected": -0.9529362916946411, "step": 220 }, { "epoch": 0.48154933263543576, "grad_norm": 64.26698303222656, "learning_rate": 2.7714630546218634e-07, "logits/chosen": -3.1135382652282715, "logits/rejected": -3.1126351356506348, "logps/chosen": -326.8101806640625, "logps/rejected": -296.044921875, "loss": 0.5438, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.2218112051486969, "rewards/margins": 0.7040417790412903, "rewards/rejected": -0.9258529543876648, "step": 230 }, { "epoch": 0.5024862601413242, "grad_norm": 85.34664154052734, "learning_rate": 2.6019714651539645e-07, "logits/chosen": -3.0325405597686768, "logits/rejected": -3.017796516418457, "logps/chosen": -297.9241638183594, "logps/rejected": -286.4637756347656, "loss": 0.5647, "rewards/accuracies": 0.734375, "rewards/chosen": -0.274208128452301, "rewards/margins": 0.7521761655807495, "rewards/rejected": -1.0263843536376953, "step": 240 }, { "epoch": 0.5234231876472127, "grad_norm": 70.70326232910156, "learning_rate": 2.4320085434975556e-07, "logits/chosen": -3.0199804306030273, "logits/rejected": -3.01350736618042, "logps/chosen": -284.5586853027344, "logps/rejected": -259.7466125488281, "loss": 0.56, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.2730976641178131, "rewards/margins": 0.7632043957710266, "rewards/rejected": -1.036302089691162, "step": 250 }, { "epoch": 0.5443601151531012, "grad_norm": 63.27799606323242, "learning_rate": 2.2623598917395436e-07, "logits/chosen": -2.9862048625946045, "logits/rejected": -3.020139217376709, "logps/chosen": -296.0469665527344, "logps/rejected": -276.1849365234375, "loss": 0.5463, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23287267982959747, "rewards/margins": 0.7090679407119751, "rewards/rejected": -0.9419406652450562, "step": 260 }, { "epoch": 0.5652970426589898, "grad_norm": 66.7594223022461, "learning_rate": 2.0938096593494853e-07, "logits/chosen": -3.041605234146118, "logits/rejected": -3.052452325820923, "logps/chosen": -286.18707275390625, "logps/rejected": -260.3746032714844, "loss": 0.5256, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.11122454702854156, "rewards/margins": 0.802563488483429, "rewards/rejected": -0.9137881398200989, "step": 270 }, { "epoch": 0.5862339701648783, "grad_norm": 88.30416107177734, "learning_rate": 1.9271369186863618e-07, "logits/chosen": -3.0525062084198, "logits/rejected": -3.0589468479156494, "logps/chosen": -284.6452941894531, "logps/rejected": -277.75067138671875, "loss": 0.5551, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22388038039207458, "rewards/margins": 0.6198626756668091, "rewards/rejected": -0.8437430262565613, "step": 280 }, { "epoch": 0.6071708976707668, "grad_norm": 65.08110809326172, "learning_rate": 1.763112063972739e-07, "logits/chosen": -3.044279098510742, "logits/rejected": -3.0555179119110107, "logps/chosen": -285.0969543457031, "logps/rejected": -259.02142333984375, "loss": 0.5278, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -0.16408179700374603, "rewards/margins": 0.8104633092880249, "rewards/rejected": -0.9745450019836426, "step": 290 }, { "epoch": 0.6281078251766553, "grad_norm": 87.96784210205078, "learning_rate": 1.602493250381003e-07, "logits/chosen": -3.0667061805725098, "logits/rejected": -3.064436435699463, "logps/chosen": -287.88372802734375, "logps/rejected": -248.08615112304688, "loss": 0.564, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27062320709228516, "rewards/margins": 0.6274420022964478, "rewards/rejected": -0.8980652093887329, "step": 300 }, { "epoch": 0.6490447526825438, "grad_norm": 67.1192398071289, "learning_rate": 1.446022889690875e-07, "logits/chosen": -3.0603392124176025, "logits/rejected": -3.0506479740142822, "logps/chosen": -275.33941650390625, "logps/rejected": -292.2793884277344, "loss": 0.5304, "rewards/accuracies": 0.734375, "rewards/chosen": -0.27399036288261414, "rewards/margins": 0.7174574136734009, "rewards/rejected": -0.9914478063583374, "step": 310 }, { "epoch": 0.6699816801884323, "grad_norm": 68.73091125488281, "learning_rate": 1.2944242187160015e-07, "logits/chosen": -3.0304224491119385, "logits/rejected": -3.0630006790161133, "logps/chosen": -265.5944519042969, "logps/rejected": -270.86041259765625, "loss": 0.5819, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.20603282749652863, "rewards/margins": 0.8553716540336609, "rewards/rejected": -1.0614043474197388, "step": 320 }, { "epoch": 0.6909186076943209, "grad_norm": 78.73789978027344, "learning_rate": 1.1483979563610069e-07, "logits/chosen": -3.044661045074463, "logits/rejected": -3.035492181777954, "logps/chosen": -274.28204345703125, "logps/rejected": -274.99151611328125, "loss": 0.5374, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.1646738052368164, "rewards/margins": 0.8839667439460754, "rewards/rejected": -1.048640489578247, "step": 330 }, { "epoch": 0.7118555352002094, "grad_norm": 70.24629211425781, "learning_rate": 1.0086190647607529e-07, "logits/chosen": -3.0631115436553955, "logits/rejected": -3.089351177215576, "logps/chosen": -287.9900817871094, "logps/rejected": -272.482421875, "loss": 0.5607, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.11857350915670395, "rewards/margins": 0.8544532060623169, "rewards/rejected": -0.9730268716812134, "step": 340 }, { "epoch": 0.7327924627060979, "grad_norm": 96.91629791259766, "learning_rate": 8.757336294724687e-08, "logits/chosen": -3.068084239959717, "logits/rejected": -3.0875658988952637, "logps/chosen": -291.7541198730469, "logps/rejected": -258.79132080078125, "loss": 0.5348, "rewards/accuracies": 0.703125, "rewards/chosen": -0.15175102651119232, "rewards/margins": 0.8772052526473999, "rewards/rejected": -1.028956413269043, "step": 350 }, { "epoch": 0.7537293902119864, "grad_norm": 69.54812622070312, "learning_rate": 7.503558731410958e-08, "logits/chosen": -3.07660174369812, "logits/rejected": -3.0733513832092285, "logps/chosen": -252.8855438232422, "logps/rejected": -264.5438232421875, "loss": 0.5477, "rewards/accuracies": 0.71875, "rewards/chosen": -0.31728893518447876, "rewards/margins": 0.6826174259185791, "rewards/rejected": -0.9999063611030579, "step": 360 }, { "epoch": 0.7746663177178749, "grad_norm": 68.41463470458984, "learning_rate": 6.330653164412908e-08, "logits/chosen": -3.0837528705596924, "logits/rejected": -3.074859619140625, "logps/chosen": -292.6845703125, "logps/rejected": -274.19189453125, "loss": 0.5639, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.18258486688137054, "rewards/margins": 0.7360013723373413, "rewards/rejected": -0.9185863733291626, "step": 370 }, { "epoch": 0.7956032452237635, "grad_norm": 73.8513412475586, "learning_rate": 5.2440409941877456e-08, "logits/chosen": -3.080451250076294, "logits/rejected": -3.1014645099639893, "logps/chosen": -282.2720642089844, "logps/rejected": -274.5783996582031, "loss": 0.5627, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.17349520325660706, "rewards/margins": 0.7617751359939575, "rewards/rejected": -0.9352704286575317, "step": 380 }, { "epoch": 0.816540172729652, "grad_norm": 62.425689697265625, "learning_rate": 4.248744756122985e-08, "logits/chosen": -3.1146225929260254, "logits/rejected": -3.1159985065460205, "logps/chosen": -284.4311828613281, "logps/rejected": -270.375244140625, "loss": 0.5397, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1737302988767624, "rewards/margins": 0.7495090365409851, "rewards/rejected": -0.9232394099235535, "step": 390 }, { "epoch": 0.8374771002355405, "grad_norm": 67.75579833984375, "learning_rate": 3.349364905389032e-08, "logits/chosen": -3.039133071899414, "logits/rejected": -3.0417704582214355, "logps/chosen": -289.43792724609375, "logps/rejected": -279.08123779296875, "loss": 0.5557, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.19276252388954163, "rewards/margins": 0.713485062122345, "rewards/rejected": -0.906247615814209, "step": 400 }, { "epoch": 0.8584140277414289, "grad_norm": 60.96617126464844, "learning_rate": 2.550058552729639e-08, "logits/chosen": -3.0589489936828613, "logits/rejected": -3.0491528511047363, "logps/chosen": -298.5786437988281, "logps/rejected": -275.2989807128906, "loss": 0.5378, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.22146447002887726, "rewards/margins": 0.7704640626907349, "rewards/rejected": -0.9919285774230957, "step": 410 }, { "epoch": 0.8793509552473174, "grad_norm": 57.156639099121094, "learning_rate": 1.854520249477551e-08, "logits/chosen": -3.0775399208068848, "logits/rejected": -3.0917420387268066, "logps/chosen": -281.49053955078125, "logps/rejected": -252.451416015625, "loss": 0.5338, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.14828899502754211, "rewards/margins": 0.7465869188308716, "rewards/rejected": -0.8948760032653809, "step": 420 }, { "epoch": 0.9002878827532059, "grad_norm": 80.24808502197266, "learning_rate": 1.265964910610884e-08, "logits/chosen": -3.1026782989501953, "logits/rejected": -3.111166477203369, "logps/chosen": -285.04193115234375, "logps/rejected": -284.14410400390625, "loss": 0.5455, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1942686289548874, "rewards/margins": 0.8707529306411743, "rewards/rejected": -1.0650215148925781, "step": 430 }, { "epoch": 0.9212248102590945, "grad_norm": 61.17852020263672, "learning_rate": 7.871129547831062e-09, "logits/chosen": -3.0820913314819336, "logits/rejected": -3.0653717517852783, "logps/chosen": -278.7796325683594, "logps/rejected": -235.0684814453125, "loss": 0.5408, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.23389343917369843, "rewards/margins": 0.6883670091629028, "rewards/rejected": -0.9222604632377625, "step": 440 }, { "epoch": 0.942161737764983, "grad_norm": 85.3263168334961, "learning_rate": 4.201777300124249e-09, "logits/chosen": -3.0574049949645996, "logits/rejected": -3.0575528144836426, "logps/chosen": -273.01531982421875, "logps/rejected": -243.1544189453125, "loss": 0.5495, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.13399073481559753, "rewards/margins": 0.6954258680343628, "rewards/rejected": -0.8294164538383484, "step": 450 }, { "epoch": 0.9630986652708715, "grad_norm": 67.3755874633789, "learning_rate": 1.6685528315146802e-09, "logits/chosen": -3.0953588485717773, "logits/rejected": -3.0970802307128906, "logps/chosen": -282.9346618652344, "logps/rejected": -261.16497802734375, "loss": 0.5443, "rewards/accuracies": 0.71875, "rewards/chosen": -0.24288193881511688, "rewards/margins": 0.7198012471199036, "rewards/rejected": -0.9626832008361816, "step": 460 }, { "epoch": 0.98403559277676, "grad_norm": 61.79122543334961, "learning_rate": 2.831652042480093e-10, "logits/chosen": -3.086475372314453, "logits/rejected": -3.0854830741882324, "logps/chosen": -301.7154235839844, "logps/rejected": -291.1816101074219, "loss": 0.5439, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.232115238904953, "rewards/margins": 0.7247028350830078, "rewards/rejected": -0.9568179845809937, "step": 470 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 5.005717235969294e+18, "train_loss": 0.5631812908364542, "train_runtime": 18694.5367, "train_samples_per_second": 3.27, "train_steps_per_second": 0.026 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 256, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.005717235969294e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }