{ "best_metric": 1.1340564489364624, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.5621925509486999, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0028109627547434997, "grad_norm": 0.14595530927181244, "learning_rate": 1e-05, "loss": 1.615, "step": 1 }, { "epoch": 0.0028109627547434997, "eval_loss": 1.2795743942260742, "eval_runtime": 84.3689, "eval_samples_per_second": 7.1, "eval_steps_per_second": 1.778, "step": 1 }, { "epoch": 0.005621925509486999, "grad_norm": 0.13871502876281738, "learning_rate": 2e-05, "loss": 1.4278, "step": 2 }, { "epoch": 0.008432888264230498, "grad_norm": 0.13428433239459991, "learning_rate": 3e-05, "loss": 1.3684, "step": 3 }, { "epoch": 0.011243851018973999, "grad_norm": 0.13588163256645203, "learning_rate": 4e-05, "loss": 1.4611, "step": 4 }, { "epoch": 0.014054813773717497, "grad_norm": 0.13623295724391937, "learning_rate": 5e-05, "loss": 1.3713, "step": 5 }, { "epoch": 0.016865776528460996, "grad_norm": 0.14679689705371857, "learning_rate": 6e-05, "loss": 1.6388, "step": 6 }, { "epoch": 0.019676739283204497, "grad_norm": 0.13014540076255798, "learning_rate": 7e-05, "loss": 1.2928, "step": 7 }, { "epoch": 0.022487702037947997, "grad_norm": 0.14622603356838226, "learning_rate": 8e-05, "loss": 1.4067, "step": 8 }, { "epoch": 0.025298664792691498, "grad_norm": 0.1453775018453598, "learning_rate": 9e-05, "loss": 1.4303, "step": 9 }, { "epoch": 0.028109627547434995, "grad_norm": 0.14096730947494507, "learning_rate": 0.0001, "loss": 1.4121, "step": 10 }, { "epoch": 0.030920590302178495, "grad_norm": 0.1445743590593338, "learning_rate": 9.999316524962345e-05, "loss": 1.3271, "step": 11 }, { "epoch": 0.03373155305692199, "grad_norm": 0.16167715191841125, "learning_rate": 9.997266286704631e-05, "loss": 1.5619, "step": 12 }, { "epoch": 0.036542515811665496, "grad_norm": 0.15229864418506622, "learning_rate": 9.993849845741524e-05, "loss": 1.4586, "step": 13 }, { "epoch": 0.03935347856640899, "grad_norm": 0.1474287509918213, "learning_rate": 9.989068136093873e-05, "loss": 1.5177, "step": 14 }, { "epoch": 0.0421644413211525, "grad_norm": 0.1248803585767746, "learning_rate": 9.98292246503335e-05, "loss": 1.3532, "step": 15 }, { "epoch": 0.044975404075895994, "grad_norm": 0.14056053757667542, "learning_rate": 9.975414512725057e-05, "loss": 1.5619, "step": 16 }, { "epoch": 0.04778636683063949, "grad_norm": 0.1264151781797409, "learning_rate": 9.966546331768191e-05, "loss": 1.3178, "step": 17 }, { "epoch": 0.050597329585382995, "grad_norm": 0.12755592167377472, "learning_rate": 9.956320346634876e-05, "loss": 1.4588, "step": 18 }, { "epoch": 0.05340829234012649, "grad_norm": 0.13023661077022552, "learning_rate": 9.944739353007344e-05, "loss": 1.5323, "step": 19 }, { "epoch": 0.05621925509486999, "grad_norm": 0.12780460715293884, "learning_rate": 9.931806517013612e-05, "loss": 1.4957, "step": 20 }, { "epoch": 0.059030217849613494, "grad_norm": 0.12764661014080048, "learning_rate": 9.917525374361912e-05, "loss": 0.9073, "step": 21 }, { "epoch": 0.06184118060435699, "grad_norm": 0.12089043855667114, "learning_rate": 9.901899829374047e-05, "loss": 0.8821, "step": 22 }, { "epoch": 0.0646521433591005, "grad_norm": 0.11314330995082855, "learning_rate": 9.884934153917997e-05, "loss": 0.8205, "step": 23 }, { "epoch": 0.06746310611384398, "grad_norm": 0.128267303109169, "learning_rate": 9.86663298624003e-05, "loss": 0.8299, "step": 24 }, { "epoch": 0.07027406886858749, "grad_norm": 0.13578976690769196, "learning_rate": 9.847001329696653e-05, "loss": 0.9728, "step": 25 }, { "epoch": 0.07308503162333099, "grad_norm": 0.11824839562177658, "learning_rate": 9.826044551386744e-05, "loss": 0.7145, "step": 26 }, { "epoch": 0.0758959943780745, "grad_norm": 0.13589346408843994, "learning_rate": 9.803768380684242e-05, "loss": 0.9693, "step": 27 }, { "epoch": 0.07870695713281799, "grad_norm": 0.13335402309894562, "learning_rate": 9.780178907671789e-05, "loss": 0.8476, "step": 28 }, { "epoch": 0.08151791988756149, "grad_norm": 0.14058586955070496, "learning_rate": 9.755282581475769e-05, "loss": 0.9686, "step": 29 }, { "epoch": 0.084328882642305, "grad_norm": 0.14462514221668243, "learning_rate": 9.729086208503174e-05, "loss": 0.8823, "step": 30 }, { "epoch": 0.08713984539704848, "grad_norm": 0.1474841982126236, "learning_rate": 9.701596950580806e-05, "loss": 0.9784, "step": 31 }, { "epoch": 0.08995080815179199, "grad_norm": 0.158616304397583, "learning_rate": 9.672822322997305e-05, "loss": 0.9347, "step": 32 }, { "epoch": 0.09276177090653549, "grad_norm": 0.1760544329881668, "learning_rate": 9.642770192448536e-05, "loss": 1.0842, "step": 33 }, { "epoch": 0.09557273366127898, "grad_norm": 0.1678563505411148, "learning_rate": 9.611448774886924e-05, "loss": 0.948, "step": 34 }, { "epoch": 0.09838369641602249, "grad_norm": 0.17710591852664948, "learning_rate": 9.578866633275288e-05, "loss": 1.0793, "step": 35 }, { "epoch": 0.10119465917076599, "grad_norm": 0.1860406994819641, "learning_rate": 9.545032675245813e-05, "loss": 0.8863, "step": 36 }, { "epoch": 0.10400562192550948, "grad_norm": 0.1834695041179657, "learning_rate": 9.509956150664796e-05, "loss": 1.1723, "step": 37 }, { "epoch": 0.10681658468025299, "grad_norm": 0.19306811690330505, "learning_rate": 9.473646649103818e-05, "loss": 0.9949, "step": 38 }, { "epoch": 0.10962754743499649, "grad_norm": 0.17946985363960266, "learning_rate": 9.43611409721806e-05, "loss": 0.833, "step": 39 }, { "epoch": 0.11243851018973998, "grad_norm": 0.20696531236171722, "learning_rate": 9.397368756032445e-05, "loss": 1.2203, "step": 40 }, { "epoch": 0.11524947294448348, "grad_norm": 0.20919673144817352, "learning_rate": 9.357421218136386e-05, "loss": 1.1978, "step": 41 }, { "epoch": 0.11806043569922699, "grad_norm": 0.21747088432312012, "learning_rate": 9.316282404787871e-05, "loss": 1.0537, "step": 42 }, { "epoch": 0.12087139845397049, "grad_norm": 0.21661460399627686, "learning_rate": 9.273963562927695e-05, "loss": 1.1814, "step": 43 }, { "epoch": 0.12368236120871398, "grad_norm": 0.21449671685695648, "learning_rate": 9.230476262104677e-05, "loss": 0.9883, "step": 44 }, { "epoch": 0.12649332396345747, "grad_norm": 0.257315069437027, "learning_rate": 9.185832391312644e-05, "loss": 1.2025, "step": 45 }, { "epoch": 0.129304286718201, "grad_norm": 0.21534892916679382, "learning_rate": 9.140044155740101e-05, "loss": 0.8393, "step": 46 }, { "epoch": 0.13211524947294448, "grad_norm": 0.2745459973812103, "learning_rate": 9.093124073433463e-05, "loss": 1.0887, "step": 47 }, { "epoch": 0.13492621222768797, "grad_norm": 0.24269066751003265, "learning_rate": 9.045084971874738e-05, "loss": 0.8241, "step": 48 }, { "epoch": 0.1377371749824315, "grad_norm": 0.4028018116950989, "learning_rate": 8.995939984474624e-05, "loss": 1.03, "step": 49 }, { "epoch": 0.14054813773717498, "grad_norm": 0.3536010980606079, "learning_rate": 8.945702546981969e-05, "loss": 0.5716, "step": 50 }, { "epoch": 0.14054813773717498, "eval_loss": 1.2065433263778687, "eval_runtime": 85.3934, "eval_samples_per_second": 7.015, "eval_steps_per_second": 1.757, "step": 50 }, { "epoch": 0.1433591004919185, "grad_norm": 4.709478378295898, "learning_rate": 8.894386393810563e-05, "loss": 1.5611, "step": 51 }, { "epoch": 0.14617006324666199, "grad_norm": 0.30073219537734985, "learning_rate": 8.842005554284296e-05, "loss": 1.2047, "step": 52 }, { "epoch": 0.14898102600140548, "grad_norm": 0.1945256143808365, "learning_rate": 8.788574348801675e-05, "loss": 1.1813, "step": 53 }, { "epoch": 0.151791988756149, "grad_norm": 0.18454934656620026, "learning_rate": 8.73410738492077e-05, "loss": 1.5916, "step": 54 }, { "epoch": 0.15460295151089248, "grad_norm": 0.1515914499759674, "learning_rate": 8.678619553365659e-05, "loss": 1.4081, "step": 55 }, { "epoch": 0.15741391426563597, "grad_norm": 0.13022002577781677, "learning_rate": 8.622126023955446e-05, "loss": 1.2496, "step": 56 }, { "epoch": 0.1602248770203795, "grad_norm": 0.13143523037433624, "learning_rate": 8.564642241456986e-05, "loss": 1.3947, "step": 57 }, { "epoch": 0.16303583977512298, "grad_norm": 0.12690874934196472, "learning_rate": 8.506183921362443e-05, "loss": 1.2596, "step": 58 }, { "epoch": 0.16584680252986647, "grad_norm": 0.12422709912061691, "learning_rate": 8.44676704559283e-05, "loss": 1.343, "step": 59 }, { "epoch": 0.16865776528461, "grad_norm": 0.12969334423542023, "learning_rate": 8.386407858128706e-05, "loss": 1.392, "step": 60 }, { "epoch": 0.17146872803935348, "grad_norm": 0.12702302634716034, "learning_rate": 8.32512286056924e-05, "loss": 1.3217, "step": 61 }, { "epoch": 0.17427969079409697, "grad_norm": 0.1328938603401184, "learning_rate": 8.262928807620843e-05, "loss": 1.4824, "step": 62 }, { "epoch": 0.1770906535488405, "grad_norm": 0.1313563585281372, "learning_rate": 8.199842702516583e-05, "loss": 1.523, "step": 63 }, { "epoch": 0.17990161630358398, "grad_norm": 0.12059923261404037, "learning_rate": 8.135881792367686e-05, "loss": 1.2967, "step": 64 }, { "epoch": 0.18271257905832747, "grad_norm": 0.12688899040222168, "learning_rate": 8.07106356344834e-05, "loss": 1.4306, "step": 65 }, { "epoch": 0.18552354181307099, "grad_norm": 0.12246695905923843, "learning_rate": 8.005405736415126e-05, "loss": 1.3235, "step": 66 }, { "epoch": 0.18833450456781448, "grad_norm": 0.1242348924279213, "learning_rate": 7.938926261462366e-05, "loss": 1.4104, "step": 67 }, { "epoch": 0.19114546732255797, "grad_norm": 0.13645027577877045, "learning_rate": 7.871643313414718e-05, "loss": 1.4968, "step": 68 }, { "epoch": 0.19395643007730148, "grad_norm": 0.13051196932792664, "learning_rate": 7.803575286758364e-05, "loss": 1.5518, "step": 69 }, { "epoch": 0.19676739283204497, "grad_norm": 0.13176923990249634, "learning_rate": 7.734740790612136e-05, "loss": 1.4094, "step": 70 }, { "epoch": 0.19957835558678846, "grad_norm": 0.11671514809131622, "learning_rate": 7.66515864363997e-05, "loss": 0.8881, "step": 71 }, { "epoch": 0.20238931834153198, "grad_norm": 0.10778027772903442, "learning_rate": 7.594847868906076e-05, "loss": 0.8403, "step": 72 }, { "epoch": 0.20520028109627547, "grad_norm": 0.116300567984581, "learning_rate": 7.52382768867422e-05, "loss": 0.9927, "step": 73 }, { "epoch": 0.20801124385101896, "grad_norm": 0.11594687402248383, "learning_rate": 7.452117519152542e-05, "loss": 0.8545, "step": 74 }, { "epoch": 0.21082220660576248, "grad_norm": 0.11859302967786789, "learning_rate": 7.379736965185368e-05, "loss": 0.8622, "step": 75 }, { "epoch": 0.21363316936050597, "grad_norm": 0.12879763543605804, "learning_rate": 7.30670581489344e-05, "loss": 1.0428, "step": 76 }, { "epoch": 0.21644413211524946, "grad_norm": 0.1271115094423294, "learning_rate": 7.233044034264034e-05, "loss": 0.8829, "step": 77 }, { "epoch": 0.21925509486999298, "grad_norm": 0.13602392375469208, "learning_rate": 7.158771761692464e-05, "loss": 0.8915, "step": 78 }, { "epoch": 0.22206605762473647, "grad_norm": 0.13157889246940613, "learning_rate": 7.083909302476453e-05, "loss": 0.9382, "step": 79 }, { "epoch": 0.22487702037947996, "grad_norm": 0.13888955116271973, "learning_rate": 7.008477123264848e-05, "loss": 0.9532, "step": 80 }, { "epoch": 0.22768798313422348, "grad_norm": 0.13553336262702942, "learning_rate": 6.932495846462261e-05, "loss": 0.9201, "step": 81 }, { "epoch": 0.23049894588896697, "grad_norm": 0.12914668023586273, "learning_rate": 6.855986244591104e-05, "loss": 0.7391, "step": 82 }, { "epoch": 0.23330990864371048, "grad_norm": 0.14608892798423767, "learning_rate": 6.778969234612584e-05, "loss": 1.0011, "step": 83 }, { "epoch": 0.23612087139845397, "grad_norm": 0.1511840671300888, "learning_rate": 6.701465872208216e-05, "loss": 0.8691, "step": 84 }, { "epoch": 0.23893183415319746, "grad_norm": 0.15799711644649506, "learning_rate": 6.623497346023418e-05, "loss": 0.8982, "step": 85 }, { "epoch": 0.24174279690794098, "grad_norm": 0.166089728474617, "learning_rate": 6.545084971874738e-05, "loss": 1.057, "step": 86 }, { "epoch": 0.24455375966268447, "grad_norm": 0.1631898581981659, "learning_rate": 6.466250186922325e-05, "loss": 1.0667, "step": 87 }, { "epoch": 0.24736472241742796, "grad_norm": 0.17566239833831787, "learning_rate": 6.387014543809223e-05, "loss": 1.0864, "step": 88 }, { "epoch": 0.2501756851721715, "grad_norm": 0.17442883551120758, "learning_rate": 6.307399704769099e-05, "loss": 0.8744, "step": 89 }, { "epoch": 0.25298664792691494, "grad_norm": 0.19199849665164948, "learning_rate": 6.227427435703997e-05, "loss": 1.2237, "step": 90 }, { "epoch": 0.25579761068165846, "grad_norm": 0.19623346626758575, "learning_rate": 6.147119600233758e-05, "loss": 1.0817, "step": 91 }, { "epoch": 0.258608573436402, "grad_norm": 0.21419471502304077, "learning_rate": 6.066498153718735e-05, "loss": 1.1695, "step": 92 }, { "epoch": 0.26141953619114544, "grad_norm": 0.21495421230793, "learning_rate": 5.985585137257401e-05, "loss": 1.1842, "step": 93 }, { "epoch": 0.26423049894588896, "grad_norm": 0.21394377946853638, "learning_rate": 5.90440267166055e-05, "loss": 0.9014, "step": 94 }, { "epoch": 0.2670414617006325, "grad_norm": 0.2387458235025406, "learning_rate": 5.8229729514036705e-05, "loss": 1.1359, "step": 95 }, { "epoch": 0.26985242445537594, "grad_norm": 0.21522080898284912, "learning_rate": 5.74131823855921e-05, "loss": 1.0008, "step": 96 }, { "epoch": 0.27266338721011946, "grad_norm": 0.27252939343452454, "learning_rate": 5.6594608567103456e-05, "loss": 1.1734, "step": 97 }, { "epoch": 0.275474349964863, "grad_norm": 0.26594337821006775, "learning_rate": 5.577423184847932e-05, "loss": 1.0443, "step": 98 }, { "epoch": 0.2782853127196065, "grad_norm": 0.2911270558834076, "learning_rate": 5.495227651252315e-05, "loss": 0.8558, "step": 99 }, { "epoch": 0.28109627547434995, "grad_norm": 0.2107650637626648, "learning_rate": 5.4128967273616625e-05, "loss": 0.2094, "step": 100 }, { "epoch": 0.28109627547434995, "eval_loss": 1.1504157781600952, "eval_runtime": 85.2851, "eval_samples_per_second": 7.024, "eval_steps_per_second": 1.759, "step": 100 }, { "epoch": 0.2839072382290935, "grad_norm": 0.19906841218471527, "learning_rate": 5.330452921628497e-05, "loss": 1.3064, "step": 101 }, { "epoch": 0.286718200983837, "grad_norm": 0.17362119257450104, "learning_rate": 5.247918773366112e-05, "loss": 1.3952, "step": 102 }, { "epoch": 0.28952916373858045, "grad_norm": 0.1592879295349121, "learning_rate": 5.165316846586541e-05, "loss": 1.2231, "step": 103 }, { "epoch": 0.29234012649332397, "grad_norm": 0.14367029070854187, "learning_rate": 5.0826697238317935e-05, "loss": 1.0226, "step": 104 }, { "epoch": 0.2951510892480675, "grad_norm": 0.13938987255096436, "learning_rate": 5e-05, "loss": 1.3075, "step": 105 }, { "epoch": 0.29796205200281095, "grad_norm": 0.14711101353168488, "learning_rate": 4.917330276168208e-05, "loss": 1.4146, "step": 106 }, { "epoch": 0.30077301475755447, "grad_norm": 0.12315893918275833, "learning_rate": 4.834683153413459e-05, "loss": 1.2684, "step": 107 }, { "epoch": 0.303583977512298, "grad_norm": 0.13632048666477203, "learning_rate": 4.7520812266338885e-05, "loss": 1.4743, "step": 108 }, { "epoch": 0.30639494026704145, "grad_norm": 0.11947023868560791, "learning_rate": 4.669547078371504e-05, "loss": 1.3492, "step": 109 }, { "epoch": 0.30920590302178497, "grad_norm": 0.11925946921110153, "learning_rate": 4.5871032726383386e-05, "loss": 1.4018, "step": 110 }, { "epoch": 0.3120168657765285, "grad_norm": 0.11408793926239014, "learning_rate": 4.504772348747687e-05, "loss": 1.1567, "step": 111 }, { "epoch": 0.31482782853127195, "grad_norm": 0.10827047377824783, "learning_rate": 4.4225768151520694e-05, "loss": 1.1115, "step": 112 }, { "epoch": 0.31763879128601546, "grad_norm": 0.11432784795761108, "learning_rate": 4.3405391432896555e-05, "loss": 1.2277, "step": 113 }, { "epoch": 0.320449754040759, "grad_norm": 0.12372433394193649, "learning_rate": 4.2586817614407895e-05, "loss": 1.3614, "step": 114 }, { "epoch": 0.32326071679550245, "grad_norm": 0.12038455903530121, "learning_rate": 4.17702704859633e-05, "loss": 1.37, "step": 115 }, { "epoch": 0.32607167955024596, "grad_norm": 0.12178530544042587, "learning_rate": 4.095597328339452e-05, "loss": 1.4235, "step": 116 }, { "epoch": 0.3288826423049895, "grad_norm": 0.122628353536129, "learning_rate": 4.0144148627425993e-05, "loss": 1.4401, "step": 117 }, { "epoch": 0.33169360505973294, "grad_norm": 0.13086412847042084, "learning_rate": 3.933501846281267e-05, "loss": 1.6775, "step": 118 }, { "epoch": 0.33450456781447646, "grad_norm": 0.1191410943865776, "learning_rate": 3.852880399766243e-05, "loss": 1.403, "step": 119 }, { "epoch": 0.33731553056922, "grad_norm": 0.12145199626684189, "learning_rate": 3.772572564296005e-05, "loss": 1.3555, "step": 120 }, { "epoch": 0.34012649332396344, "grad_norm": 0.10900752246379852, "learning_rate": 3.6926002952309016e-05, "loss": 1.1942, "step": 121 }, { "epoch": 0.34293745607870696, "grad_norm": 0.10437808185815811, "learning_rate": 3.612985456190778e-05, "loss": 0.8917, "step": 122 }, { "epoch": 0.3457484188334505, "grad_norm": 0.10854747891426086, "learning_rate": 3.533749813077677e-05, "loss": 0.9686, "step": 123 }, { "epoch": 0.34855938158819394, "grad_norm": 0.10949317365884781, "learning_rate": 3.4549150281252636e-05, "loss": 0.809, "step": 124 }, { "epoch": 0.35137034434293746, "grad_norm": 0.10705723613500595, "learning_rate": 3.3765026539765834e-05, "loss": 0.8198, "step": 125 }, { "epoch": 0.354181307097681, "grad_norm": 0.11876116693019867, "learning_rate": 3.298534127791785e-05, "loss": 0.9964, "step": 126 }, { "epoch": 0.35699226985242444, "grad_norm": 0.11575191468000412, "learning_rate": 3.221030765387417e-05, "loss": 0.9049, "step": 127 }, { "epoch": 0.35980323260716796, "grad_norm": 0.11619889736175537, "learning_rate": 3.144013755408895e-05, "loss": 0.8849, "step": 128 }, { "epoch": 0.3626141953619115, "grad_norm": 0.13133642077445984, "learning_rate": 3.0675041535377405e-05, "loss": 0.8731, "step": 129 }, { "epoch": 0.36542515811665494, "grad_norm": 0.12494848668575287, "learning_rate": 2.991522876735154e-05, "loss": 0.9458, "step": 130 }, { "epoch": 0.36823612087139845, "grad_norm": 0.1314287930727005, "learning_rate": 2.916090697523549e-05, "loss": 1.0028, "step": 131 }, { "epoch": 0.37104708362614197, "grad_norm": 0.12119800597429276, "learning_rate": 2.8412282383075363e-05, "loss": 0.785, "step": 132 }, { "epoch": 0.37385804638088543, "grad_norm": 0.13152635097503662, "learning_rate": 2.766955965735968e-05, "loss": 0.8102, "step": 133 }, { "epoch": 0.37666900913562895, "grad_norm": 0.13744279742240906, "learning_rate": 2.693294185106562e-05, "loss": 0.9912, "step": 134 }, { "epoch": 0.37947997189037247, "grad_norm": 0.1392669528722763, "learning_rate": 2.6202630348146324e-05, "loss": 0.9372, "step": 135 }, { "epoch": 0.38229093464511593, "grad_norm": 0.15417630970478058, "learning_rate": 2.547882480847461e-05, "loss": 1.0982, "step": 136 }, { "epoch": 0.38510189739985945, "grad_norm": 0.14992496371269226, "learning_rate": 2.476172311325783e-05, "loss": 0.9687, "step": 137 }, { "epoch": 0.38791286015460297, "grad_norm": 0.1466035395860672, "learning_rate": 2.405152131093926e-05, "loss": 0.89, "step": 138 }, { "epoch": 0.39072382290934643, "grad_norm": 0.16423414647579193, "learning_rate": 2.3348413563600325e-05, "loss": 1.0407, "step": 139 }, { "epoch": 0.39353478566408995, "grad_norm": 0.16661831736564636, "learning_rate": 2.2652592093878666e-05, "loss": 1.1279, "step": 140 }, { "epoch": 0.39634574841883347, "grad_norm": 0.19206121563911438, "learning_rate": 2.196424713241637e-05, "loss": 1.0473, "step": 141 }, { "epoch": 0.39915671117357693, "grad_norm": 0.2245406061410904, "learning_rate": 2.128356686585282e-05, "loss": 1.1602, "step": 142 }, { "epoch": 0.40196767392832045, "grad_norm": 0.19214637577533722, "learning_rate": 2.061073738537635e-05, "loss": 1.0542, "step": 143 }, { "epoch": 0.40477863668306396, "grad_norm": 0.19177591800689697, "learning_rate": 1.9945942635848748e-05, "loss": 0.9993, "step": 144 }, { "epoch": 0.4075895994378074, "grad_norm": 0.21935953199863434, "learning_rate": 1.928936436551661e-05, "loss": 1.2079, "step": 145 }, { "epoch": 0.41040056219255094, "grad_norm": 0.23886896669864655, "learning_rate": 1.8641182076323148e-05, "loss": 1.327, "step": 146 }, { "epoch": 0.41321152494729446, "grad_norm": 0.20663587749004364, "learning_rate": 1.800157297483417e-05, "loss": 0.8727, "step": 147 }, { "epoch": 0.4160224877020379, "grad_norm": 0.23780639469623566, "learning_rate": 1.7370711923791567e-05, "loss": 0.9077, "step": 148 }, { "epoch": 0.41883345045678144, "grad_norm": 0.23140735924243927, "learning_rate": 1.6748771394307585e-05, "loss": 0.6117, "step": 149 }, { "epoch": 0.42164441321152496, "grad_norm": 0.3450438976287842, "learning_rate": 1.6135921418712956e-05, "loss": 0.7388, "step": 150 }, { "epoch": 0.42164441321152496, "eval_loss": 1.135201334953308, "eval_runtime": 85.3695, "eval_samples_per_second": 7.017, "eval_steps_per_second": 1.757, "step": 150 }, { "epoch": 0.4244553759662684, "grad_norm": 0.1362743079662323, "learning_rate": 1.553232954407171e-05, "loss": 1.3902, "step": 151 }, { "epoch": 0.42726633872101194, "grad_norm": 0.13644061982631683, "learning_rate": 1.4938160786375572e-05, "loss": 1.515, "step": 152 }, { "epoch": 0.43007730147575546, "grad_norm": 0.13651560246944427, "learning_rate": 1.435357758543015e-05, "loss": 1.4548, "step": 153 }, { "epoch": 0.4328882642304989, "grad_norm": 0.13482840359210968, "learning_rate": 1.3778739760445552e-05, "loss": 1.4043, "step": 154 }, { "epoch": 0.43569922698524244, "grad_norm": 0.13234271109104156, "learning_rate": 1.3213804466343421e-05, "loss": 1.3855, "step": 155 }, { "epoch": 0.43851018973998596, "grad_norm": 0.12307897955179214, "learning_rate": 1.2658926150792322e-05, "loss": 1.2278, "step": 156 }, { "epoch": 0.4413211524947294, "grad_norm": 0.124372698366642, "learning_rate": 1.2114256511983274e-05, "loss": 1.2112, "step": 157 }, { "epoch": 0.44413211524947294, "grad_norm": 0.12010175734758377, "learning_rate": 1.157994445715706e-05, "loss": 1.3216, "step": 158 }, { "epoch": 0.44694307800421645, "grad_norm": 0.12309254705905914, "learning_rate": 1.1056136061894384e-05, "loss": 1.3443, "step": 159 }, { "epoch": 0.4497540407589599, "grad_norm": 0.12732581794261932, "learning_rate": 1.0542974530180327e-05, "loss": 1.4406, "step": 160 }, { "epoch": 0.45256500351370343, "grad_norm": 0.13361938297748566, "learning_rate": 1.0040600155253765e-05, "loss": 1.5874, "step": 161 }, { "epoch": 0.45537596626844695, "grad_norm": 0.12167298048734665, "learning_rate": 9.549150281252633e-06, "loss": 1.3533, "step": 162 }, { "epoch": 0.45818692902319047, "grad_norm": 0.12136907130479813, "learning_rate": 9.068759265665384e-06, "loss": 1.3793, "step": 163 }, { "epoch": 0.46099789177793393, "grad_norm": 0.12473531812429428, "learning_rate": 8.599558442598998e-06, "loss": 1.4031, "step": 164 }, { "epoch": 0.46380885453267745, "grad_norm": 0.12949693202972412, "learning_rate": 8.141676086873572e-06, "loss": 1.5296, "step": 165 }, { "epoch": 0.46661981728742097, "grad_norm": 0.12189978361129761, "learning_rate": 7.695237378953223e-06, "loss": 1.3504, "step": 166 }, { "epoch": 0.46943078004216443, "grad_norm": 0.11518795788288116, "learning_rate": 7.260364370723044e-06, "loss": 1.1917, "step": 167 }, { "epoch": 0.47224174279690795, "grad_norm": 0.12326858192682266, "learning_rate": 6.837175952121306e-06, "loss": 1.3341, "step": 168 }, { "epoch": 0.47505270555165147, "grad_norm": 0.12631583213806152, "learning_rate": 6.425787818636131e-06, "loss": 1.3899, "step": 169 }, { "epoch": 0.47786366830639493, "grad_norm": 0.12838424742221832, "learning_rate": 6.026312439675552e-06, "loss": 1.4159, "step": 170 }, { "epoch": 0.48067463106113845, "grad_norm": 0.11090139299631119, "learning_rate": 5.6388590278194096e-06, "loss": 1.0122, "step": 171 }, { "epoch": 0.48348559381588196, "grad_norm": 0.11023272573947906, "learning_rate": 5.263533508961827e-06, "loss": 0.8288, "step": 172 }, { "epoch": 0.4862965565706254, "grad_norm": 0.12130605429410934, "learning_rate": 4.900438493352055e-06, "loss": 0.9378, "step": 173 }, { "epoch": 0.48910751932536894, "grad_norm": 0.11250211298465729, "learning_rate": 4.549673247541875e-06, "loss": 0.8911, "step": 174 }, { "epoch": 0.49191848208011246, "grad_norm": 0.1276254802942276, "learning_rate": 4.2113336672471245e-06, "loss": 0.8033, "step": 175 }, { "epoch": 0.4947294448348559, "grad_norm": 0.1163821816444397, "learning_rate": 3.885512251130763e-06, "loss": 0.7885, "step": 176 }, { "epoch": 0.49754040758959944, "grad_norm": 0.12443026900291443, "learning_rate": 3.5722980755146517e-06, "loss": 0.9391, "step": 177 }, { "epoch": 0.500351370344343, "grad_norm": 0.12332401424646378, "learning_rate": 3.271776770026963e-06, "loss": 0.9616, "step": 178 }, { "epoch": 0.5031623330990864, "grad_norm": 0.1353314071893692, "learning_rate": 2.9840304941919415e-06, "loss": 0.9899, "step": 179 }, { "epoch": 0.5059732958538299, "grad_norm": 0.12577523291110992, "learning_rate": 2.7091379149682685e-06, "loss": 0.9051, "step": 180 }, { "epoch": 0.5087842586085735, "grad_norm": 0.13708637654781342, "learning_rate": 2.4471741852423237e-06, "loss": 0.9972, "step": 181 }, { "epoch": 0.5115952213633169, "grad_norm": 0.14662538468837738, "learning_rate": 2.1982109232821178e-06, "loss": 0.9101, "step": 182 }, { "epoch": 0.5144061841180604, "grad_norm": 0.13014757633209229, "learning_rate": 1.962316193157593e-06, "loss": 0.847, "step": 183 }, { "epoch": 0.517217146872804, "grad_norm": 0.14786508679389954, "learning_rate": 1.7395544861325718e-06, "loss": 0.9398, "step": 184 }, { "epoch": 0.5200281096275474, "grad_norm": 0.15716198086738586, "learning_rate": 1.5299867030334814e-06, "loss": 0.985, "step": 185 }, { "epoch": 0.5228390723822909, "grad_norm": 0.1636255532503128, "learning_rate": 1.333670137599713e-06, "loss": 0.9809, "step": 186 }, { "epoch": 0.5256500351370345, "grad_norm": 0.1572836935520172, "learning_rate": 1.1506584608200367e-06, "loss": 0.9595, "step": 187 }, { "epoch": 0.5284609978917779, "grad_norm": 0.17926397919654846, "learning_rate": 9.810017062595322e-07, "loss": 1.1744, "step": 188 }, { "epoch": 0.5312719606465214, "grad_norm": 0.18034204840660095, "learning_rate": 8.247462563808817e-07, "loss": 1.1004, "step": 189 }, { "epoch": 0.534082923401265, "grad_norm": 0.18945005536079407, "learning_rate": 6.819348298638839e-07, "loss": 1.2035, "step": 190 }, { "epoch": 0.5368938861560084, "grad_norm": 0.16576136648654938, "learning_rate": 5.526064699265753e-07, "loss": 0.7945, "step": 191 }, { "epoch": 0.5397048489107519, "grad_norm": 0.19269107282161713, "learning_rate": 4.367965336512403e-07, "loss": 1.0795, "step": 192 }, { "epoch": 0.5425158116654955, "grad_norm": 0.20619481801986694, "learning_rate": 3.3453668231809286e-07, "loss": 1.0719, "step": 193 }, { "epoch": 0.5453267744202389, "grad_norm": 0.22908565402030945, "learning_rate": 2.458548727494292e-07, "loss": 1.3424, "step": 194 }, { "epoch": 0.5481377371749825, "grad_norm": 0.2186325490474701, "learning_rate": 1.7077534966650766e-07, "loss": 1.0559, "step": 195 }, { "epoch": 0.550948699929726, "grad_norm": 0.20421196520328522, "learning_rate": 1.0931863906127327e-07, "loss": 0.8532, "step": 196 }, { "epoch": 0.5537596626844694, "grad_norm": 0.20993749797344208, "learning_rate": 6.150154258476315e-08, "loss": 0.8476, "step": 197 }, { "epoch": 0.556570625439213, "grad_norm": 0.2661491632461548, "learning_rate": 2.7337132953697554e-08, "loss": 1.0219, "step": 198 }, { "epoch": 0.5593815881939564, "grad_norm": 0.24732057750225067, "learning_rate": 6.834750376549792e-09, "loss": 0.6943, "step": 199 }, { "epoch": 0.5621925509486999, "grad_norm": 0.25222060084342957, "learning_rate": 0.0, "loss": 0.5718, "step": 200 }, { "epoch": 0.5621925509486999, "eval_loss": 1.1340564489364624, "eval_runtime": 85.4353, "eval_samples_per_second": 7.011, "eval_steps_per_second": 1.756, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.6804452979441664e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }