|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.16410929679166325, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016410929679166325, |
|
"grad_norm": 0.1563968062400818, |
|
"learning_rate": 9.999992612842675e-06, |
|
"loss": 0.6605, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003282185935833265, |
|
"grad_norm": 0.15270280838012695, |
|
"learning_rate": 9.999970451392527e-06, |
|
"loss": 0.6491, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0049232789037498975, |
|
"grad_norm": 0.17153096199035645, |
|
"learning_rate": 9.999933515715042e-06, |
|
"loss": 0.6383, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00656437187166653, |
|
"grad_norm": 0.172921285033226, |
|
"learning_rate": 9.999881805919356e-06, |
|
"loss": 0.6506, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008205464839583163, |
|
"grad_norm": 0.16361959278583527, |
|
"learning_rate": 9.999815322158266e-06, |
|
"loss": 0.6067, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009846557807499795, |
|
"grad_norm": 0.12810567021369934, |
|
"learning_rate": 9.999734064628224e-06, |
|
"loss": 0.6157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011487650775416428, |
|
"grad_norm": 0.13882791996002197, |
|
"learning_rate": 9.999638033569334e-06, |
|
"loss": 0.6163, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01312874374333306, |
|
"grad_norm": 0.08581311255693436, |
|
"learning_rate": 9.999527229265353e-06, |
|
"loss": 0.5795, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014769836711249693, |
|
"grad_norm": 0.09464729577302933, |
|
"learning_rate": 9.999401652043697e-06, |
|
"loss": 0.5908, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.016410929679166325, |
|
"grad_norm": 0.08246736973524094, |
|
"learning_rate": 9.999261302275424e-06, |
|
"loss": 0.5681, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.018052022647082958, |
|
"grad_norm": 0.07881084084510803, |
|
"learning_rate": 9.999106180375251e-06, |
|
"loss": 0.5566, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01969311561499959, |
|
"grad_norm": 0.07179544121026993, |
|
"learning_rate": 9.998936286801541e-06, |
|
"loss": 0.5806, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.021334208582916223, |
|
"grad_norm": 0.11372455954551697, |
|
"learning_rate": 9.99875162205631e-06, |
|
"loss": 0.541, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.022975301550832855, |
|
"grad_norm": 0.07621748745441437, |
|
"learning_rate": 9.998552186685211e-06, |
|
"loss": 0.5421, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.024616394518749488, |
|
"grad_norm": 0.07502977550029755, |
|
"learning_rate": 9.998337981277552e-06, |
|
"loss": 0.5634, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02625748748666612, |
|
"grad_norm": 0.06710907071828842, |
|
"learning_rate": 9.998109006466281e-06, |
|
"loss": 0.5322, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.027898580454582753, |
|
"grad_norm": 0.066213458776474, |
|
"learning_rate": 9.997865262927984e-06, |
|
"loss": 0.5474, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.029539673422499385, |
|
"grad_norm": 0.0674639567732811, |
|
"learning_rate": 9.997606751382894e-06, |
|
"loss": 0.5525, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.031180766390416018, |
|
"grad_norm": 0.07476690411567688, |
|
"learning_rate": 9.997333472594872e-06, |
|
"loss": 0.526, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03282185935833265, |
|
"grad_norm": 0.05499599874019623, |
|
"learning_rate": 9.997045427371423e-06, |
|
"loss": 0.5262, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03446295232624928, |
|
"grad_norm": 0.06396327167749405, |
|
"learning_rate": 9.996742616563682e-06, |
|
"loss": 0.5141, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.036104045294165915, |
|
"grad_norm": 0.06143304333090782, |
|
"learning_rate": 9.99642504106641e-06, |
|
"loss": 0.5107, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03774513826208255, |
|
"grad_norm": 0.0630095973610878, |
|
"learning_rate": 9.996092701818004e-06, |
|
"loss": 0.5177, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03938623122999918, |
|
"grad_norm": 0.059215761721134186, |
|
"learning_rate": 9.995745599800476e-06, |
|
"loss": 0.5098, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04102732419791581, |
|
"grad_norm": 0.05862729996442795, |
|
"learning_rate": 9.995383736039465e-06, |
|
"loss": 0.5027, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.042668417165832445, |
|
"grad_norm": 0.05443592369556427, |
|
"learning_rate": 9.995007111604232e-06, |
|
"loss": 0.4688, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04430951013374908, |
|
"grad_norm": 0.05443759262561798, |
|
"learning_rate": 9.994615727607648e-06, |
|
"loss": 0.4988, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04595060310166571, |
|
"grad_norm": 0.05553797259926796, |
|
"learning_rate": 9.994209585206201e-06, |
|
"loss": 0.4864, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04759169606958234, |
|
"grad_norm": 0.055965058505535126, |
|
"learning_rate": 9.993788685599985e-06, |
|
"loss": 0.4768, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.049232789037498975, |
|
"grad_norm": 0.06474044173955917, |
|
"learning_rate": 9.993353030032701e-06, |
|
"loss": 0.4942, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05087388200541561, |
|
"grad_norm": 0.0566246323287487, |
|
"learning_rate": 9.992902619791652e-06, |
|
"loss": 0.4888, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.05251497497333224, |
|
"grad_norm": 0.0555800199508667, |
|
"learning_rate": 9.992437456207738e-06, |
|
"loss": 0.4687, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05415606794124887, |
|
"grad_norm": 0.0563640259206295, |
|
"learning_rate": 9.991957540655453e-06, |
|
"loss": 0.4923, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.055797160909165505, |
|
"grad_norm": 0.05194167420268059, |
|
"learning_rate": 9.991462874552882e-06, |
|
"loss": 0.4811, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05743825387708214, |
|
"grad_norm": 0.1205214262008667, |
|
"learning_rate": 9.990953459361696e-06, |
|
"loss": 0.4696, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05907934684499877, |
|
"grad_norm": 0.0542314276099205, |
|
"learning_rate": 9.990429296587148e-06, |
|
"loss": 0.4547, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0607204398129154, |
|
"grad_norm": 0.06125911697745323, |
|
"learning_rate": 9.989890387778065e-06, |
|
"loss": 0.4817, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.062361532780832035, |
|
"grad_norm": 0.06071058660745621, |
|
"learning_rate": 9.98933673452685e-06, |
|
"loss": 0.4553, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06400262574874867, |
|
"grad_norm": 0.05769752338528633, |
|
"learning_rate": 9.98876833846947e-06, |
|
"loss": 0.4632, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0656437187166653, |
|
"grad_norm": 0.05761849135160446, |
|
"learning_rate": 9.988185201285461e-06, |
|
"loss": 0.4439, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06728481168458193, |
|
"grad_norm": 0.05703369528055191, |
|
"learning_rate": 9.987587324697912e-06, |
|
"loss": 0.4506, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06892590465249857, |
|
"grad_norm": 0.05599252134561539, |
|
"learning_rate": 9.986974710473467e-06, |
|
"loss": 0.4573, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0705669976204152, |
|
"grad_norm": 0.05374148488044739, |
|
"learning_rate": 9.986347360422316e-06, |
|
"loss": 0.4555, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.07220809058833183, |
|
"grad_norm": 0.054009810090065, |
|
"learning_rate": 9.985705276398193e-06, |
|
"loss": 0.4436, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07384918355624846, |
|
"grad_norm": 0.07604236155748367, |
|
"learning_rate": 9.985048460298367e-06, |
|
"loss": 0.4583, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0754902765241651, |
|
"grad_norm": 0.052760086953639984, |
|
"learning_rate": 9.984376914063643e-06, |
|
"loss": 0.4409, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07713136949208173, |
|
"grad_norm": 0.06065182387828827, |
|
"learning_rate": 9.983690639678343e-06, |
|
"loss": 0.4637, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07877246245999836, |
|
"grad_norm": 0.06539740413427353, |
|
"learning_rate": 9.982989639170319e-06, |
|
"loss": 0.4636, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08041355542791499, |
|
"grad_norm": 0.06656944006681442, |
|
"learning_rate": 9.982273914610927e-06, |
|
"loss": 0.4487, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.08205464839583163, |
|
"grad_norm": 0.05745495483279228, |
|
"learning_rate": 9.981543468115039e-06, |
|
"loss": 0.4393, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08369574136374826, |
|
"grad_norm": 0.06525252759456635, |
|
"learning_rate": 9.98079830184102e-06, |
|
"loss": 0.4713, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08533683433166489, |
|
"grad_norm": 0.0555146224796772, |
|
"learning_rate": 9.980038417990736e-06, |
|
"loss": 0.4278, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08697792729958152, |
|
"grad_norm": 0.0773826315999031, |
|
"learning_rate": 9.979263818809542e-06, |
|
"loss": 0.4527, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08861902026749816, |
|
"grad_norm": 0.06476614624261856, |
|
"learning_rate": 9.978474506586269e-06, |
|
"loss": 0.4404, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09026011323541479, |
|
"grad_norm": 0.06019666790962219, |
|
"learning_rate": 9.977670483653228e-06, |
|
"loss": 0.4414, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.09190120620333142, |
|
"grad_norm": 0.06592460721731186, |
|
"learning_rate": 9.976851752386196e-06, |
|
"loss": 0.4313, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09354229917124805, |
|
"grad_norm": 0.06817147135734558, |
|
"learning_rate": 9.976018315204412e-06, |
|
"loss": 0.4357, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.09518339213916469, |
|
"grad_norm": 0.06738044321537018, |
|
"learning_rate": 9.97517017457057e-06, |
|
"loss": 0.4511, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09682448510708132, |
|
"grad_norm": 0.07049284130334854, |
|
"learning_rate": 9.974307332990806e-06, |
|
"loss": 0.447, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09846557807499795, |
|
"grad_norm": 0.06609766185283661, |
|
"learning_rate": 9.973429793014703e-06, |
|
"loss": 0.4576, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10010667104291458, |
|
"grad_norm": 0.06007273495197296, |
|
"learning_rate": 9.972537557235267e-06, |
|
"loss": 0.4397, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.10174776401083122, |
|
"grad_norm": 0.0659220889210701, |
|
"learning_rate": 9.971630628288935e-06, |
|
"loss": 0.4538, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10338885697874785, |
|
"grad_norm": 0.08680638670921326, |
|
"learning_rate": 9.970709008855557e-06, |
|
"loss": 0.442, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.10502994994666448, |
|
"grad_norm": 0.06552395224571228, |
|
"learning_rate": 9.969772701658393e-06, |
|
"loss": 0.4195, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10667104291458111, |
|
"grad_norm": 0.06147119030356407, |
|
"learning_rate": 9.968821709464101e-06, |
|
"loss": 0.4419, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.10831213588249775, |
|
"grad_norm": 0.06841282546520233, |
|
"learning_rate": 9.967856035082732e-06, |
|
"loss": 0.4313, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10995322885041438, |
|
"grad_norm": 0.07704174518585205, |
|
"learning_rate": 9.966875681367724e-06, |
|
"loss": 0.4373, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.11159432181833101, |
|
"grad_norm": 0.06586287170648575, |
|
"learning_rate": 9.965880651215885e-06, |
|
"loss": 0.4475, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11323541478624764, |
|
"grad_norm": 0.13131344318389893, |
|
"learning_rate": 9.964870947567396e-06, |
|
"loss": 0.42, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.11487650775416428, |
|
"grad_norm": 0.06351525336503983, |
|
"learning_rate": 9.963846573405791e-06, |
|
"loss": 0.4247, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11651760072208091, |
|
"grad_norm": 0.06860467791557312, |
|
"learning_rate": 9.962807531757955e-06, |
|
"loss": 0.432, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.11815869368999754, |
|
"grad_norm": 0.06938762962818146, |
|
"learning_rate": 9.961753825694112e-06, |
|
"loss": 0.4324, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11979978665791417, |
|
"grad_norm": 0.06990928202867508, |
|
"learning_rate": 9.960685458327824e-06, |
|
"loss": 0.4136, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1214408796258308, |
|
"grad_norm": 0.06884902715682983, |
|
"learning_rate": 9.959602432815964e-06, |
|
"loss": 0.4301, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12308197259374744, |
|
"grad_norm": 0.06610533595085144, |
|
"learning_rate": 9.958504752358729e-06, |
|
"loss": 0.418, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.12472306556166407, |
|
"grad_norm": 0.07162132859230042, |
|
"learning_rate": 9.957392420199612e-06, |
|
"loss": 0.4335, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1263641585295807, |
|
"grad_norm": 0.07637803256511688, |
|
"learning_rate": 9.956265439625401e-06, |
|
"loss": 0.4508, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.12800525149749734, |
|
"grad_norm": 0.06776853650808334, |
|
"learning_rate": 9.955123813966172e-06, |
|
"loss": 0.4235, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12964634446541395, |
|
"grad_norm": 0.06781169027090073, |
|
"learning_rate": 9.953967546595272e-06, |
|
"loss": 0.4296, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.1312874374333306, |
|
"grad_norm": 0.06995800137519836, |
|
"learning_rate": 9.952796640929309e-06, |
|
"loss": 0.4328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13292853040124722, |
|
"grad_norm": 0.06544926762580872, |
|
"learning_rate": 9.951611100428151e-06, |
|
"loss": 0.4235, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.13456962336916387, |
|
"grad_norm": 0.07403396815061569, |
|
"learning_rate": 9.95041092859491e-06, |
|
"loss": 0.4362, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13621071633708048, |
|
"grad_norm": 0.06964828819036484, |
|
"learning_rate": 9.949196128975925e-06, |
|
"loss": 0.4134, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.13785180930499713, |
|
"grad_norm": 0.07269076257944107, |
|
"learning_rate": 9.947966705160765e-06, |
|
"loss": 0.4288, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13949290227291375, |
|
"grad_norm": 0.0716971680521965, |
|
"learning_rate": 9.946722660782209e-06, |
|
"loss": 0.4113, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1411339952408304, |
|
"grad_norm": 0.06757480651140213, |
|
"learning_rate": 9.945463999516236e-06, |
|
"loss": 0.4311, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14277508820874701, |
|
"grad_norm": 0.07381222397089005, |
|
"learning_rate": 9.944190725082019e-06, |
|
"loss": 0.4313, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.14441618117666366, |
|
"grad_norm": 0.07273319363594055, |
|
"learning_rate": 9.94290284124191e-06, |
|
"loss": 0.4195, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14605727414458028, |
|
"grad_norm": 0.07356058806180954, |
|
"learning_rate": 9.941600351801426e-06, |
|
"loss": 0.425, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.14769836711249693, |
|
"grad_norm": 0.07552187144756317, |
|
"learning_rate": 9.940283260609248e-06, |
|
"loss": 0.4295, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14933946008041354, |
|
"grad_norm": 0.0747319757938385, |
|
"learning_rate": 9.938951571557198e-06, |
|
"loss": 0.4426, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1509805530483302, |
|
"grad_norm": 0.06968298554420471, |
|
"learning_rate": 9.937605288580237e-06, |
|
"loss": 0.4244, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1526216460162468, |
|
"grad_norm": 0.07281242311000824, |
|
"learning_rate": 9.936244415656443e-06, |
|
"loss": 0.4263, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.15426273898416346, |
|
"grad_norm": 0.07637212425470352, |
|
"learning_rate": 9.934868956807012e-06, |
|
"loss": 0.4217, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15590383195208007, |
|
"grad_norm": 0.07728656381368637, |
|
"learning_rate": 9.933478916096235e-06, |
|
"loss": 0.4214, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.15754492491999672, |
|
"grad_norm": 0.09080182015895844, |
|
"learning_rate": 9.932074297631494e-06, |
|
"loss": 0.4143, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15918601788791334, |
|
"grad_norm": 0.07812851667404175, |
|
"learning_rate": 9.930655105563241e-06, |
|
"loss": 0.4307, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.16082711085582999, |
|
"grad_norm": 0.0779787003993988, |
|
"learning_rate": 9.929221344084994e-06, |
|
"loss": 0.433, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1624682038237466, |
|
"grad_norm": 0.0747016966342926, |
|
"learning_rate": 9.927773017433325e-06, |
|
"loss": 0.4026, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.16410929679166325, |
|
"grad_norm": 0.07794748246669769, |
|
"learning_rate": 9.926310129887836e-06, |
|
"loss": 0.4331, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 9138, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.57326968815616e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|