{ "best_metric": 0.19376881420612335, "best_model_checkpoint": "d:\\\\whisper-large-v3-pt-3000h-4\\checkpoint-5529", "epoch": 10.0, "eval_steps": 500, "global_step": 55290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004521613311629589, "grad_norm": 2.3342323303222656, "learning_rate": 2.5e-08, "loss": 0.0451, "step": 25 }, { "epoch": 0.009043226623259178, "grad_norm": 2.0830228328704834, "learning_rate": 5e-08, "loss": 0.0393, "step": 50 }, { "epoch": 0.013564839934888768, "grad_norm": 3.438735008239746, "learning_rate": 7.5e-08, "loss": 0.0355, "step": 75 }, { "epoch": 0.018086453246518357, "grad_norm": 14.74426555633545, "learning_rate": 1e-07, "loss": 0.0359, "step": 100 }, { "epoch": 0.022608066558147948, "grad_norm": 2.5502216815948486, "learning_rate": 1.25e-07, "loss": 0.028, "step": 125 }, { "epoch": 0.027129679869777535, "grad_norm": 6.838224411010742, "learning_rate": 1.5e-07, "loss": 0.0214, "step": 150 }, { "epoch": 0.03165129318140713, "grad_norm": 6.352050304412842, "learning_rate": 1.75e-07, "loss": 0.0336, "step": 175 }, { "epoch": 0.03617290649303671, "grad_norm": 8.222951889038086, "learning_rate": 2e-07, "loss": 0.028, "step": 200 }, { "epoch": 0.040694519804666304, "grad_norm": 5.666439533233643, "learning_rate": 2.25e-07, "loss": 0.0295, "step": 225 }, { "epoch": 0.045216133116295895, "grad_norm": 0.2596105635166168, "learning_rate": 2.5e-07, "loss": 0.0338, "step": 250 }, { "epoch": 0.049737746427925486, "grad_norm": 4.480187892913818, "learning_rate": 2.75e-07, "loss": 0.0365, "step": 275 }, { "epoch": 0.05425935973955507, "grad_norm": 0.934718132019043, "learning_rate": 3e-07, "loss": 0.0284, "step": 300 }, { "epoch": 0.05878097305118466, "grad_norm": 1.4461249113082886, "learning_rate": 3.25e-07, "loss": 0.0199, "step": 325 }, { "epoch": 0.06330258636281426, "grad_norm": 3.453277587890625, "learning_rate": 3.5e-07, "loss": 0.0401, "step": 350 }, { "epoch": 0.06782419967444384, "grad_norm": 6.596461772918701, "learning_rate": 3.75e-07, "loss": 0.0522, "step": 375 }, { "epoch": 0.07234581298607343, "grad_norm": 0.7677063345909119, "learning_rate": 3.99e-07, "loss": 0.1004, "step": 400 }, { "epoch": 0.07686742629770302, "grad_norm": 7.2375264167785645, "learning_rate": 4.24e-07, "loss": 0.0313, "step": 425 }, { "epoch": 0.08138903960933261, "grad_norm": 4.461888790130615, "learning_rate": 4.49e-07, "loss": 0.0358, "step": 450 }, { "epoch": 0.0859106529209622, "grad_norm": 2.9905307292938232, "learning_rate": 4.7399999999999993e-07, "loss": 0.035, "step": 475 }, { "epoch": 0.09043226623259179, "grad_norm": 2.32279896736145, "learning_rate": 4.99e-07, "loss": 0.0296, "step": 500 }, { "epoch": 0.09495387954422138, "grad_norm": 8.56678581237793, "learning_rate": 5.24e-07, "loss": 0.0357, "step": 525 }, { "epoch": 0.09947549285585097, "grad_norm": 2.7547428607940674, "learning_rate": 5.490000000000001e-07, "loss": 0.0378, "step": 550 }, { "epoch": 0.10399710616748056, "grad_norm": 1.2645742893218994, "learning_rate": 5.739999999999999e-07, "loss": 0.0434, "step": 575 }, { "epoch": 0.10851871947911014, "grad_norm": 2.233053684234619, "learning_rate": 5.989999999999999e-07, "loss": 0.042, "step": 600 }, { "epoch": 0.11304033279073973, "grad_norm": 13.10155200958252, "learning_rate": 6.24e-07, "loss": 0.0454, "step": 625 }, { "epoch": 0.11756194610236932, "grad_norm": 9.450150489807129, "learning_rate": 6.49e-07, "loss": 0.0331, "step": 650 }, { "epoch": 0.12208355941399891, "grad_norm": 0.5919239521026611, "learning_rate": 6.74e-07, "loss": 0.033, "step": 675 }, { "epoch": 0.12660517272562852, "grad_norm": 24.587112426757812, "learning_rate": 6.989999999999999e-07, "loss": 0.0311, "step": 700 }, { "epoch": 0.13112678603725808, "grad_norm": 19.61304473876953, "learning_rate": 7.24e-07, "loss": 0.0359, "step": 725 }, { "epoch": 0.13564839934888767, "grad_norm": 20.30430030822754, "learning_rate": 7.489999999999999e-07, "loss": 0.0531, "step": 750 }, { "epoch": 0.14017001266051726, "grad_norm": 16.841527938842773, "learning_rate": 7.74e-07, "loss": 0.0761, "step": 775 }, { "epoch": 0.14469162597214685, "grad_norm": 18.845434188842773, "learning_rate": 7.99e-07, "loss": 0.0871, "step": 800 }, { "epoch": 0.14921323928377644, "grad_norm": 14.67888355255127, "learning_rate": 8.24e-07, "loss": 0.0414, "step": 825 }, { "epoch": 0.15373485259540604, "grad_norm": 6.383974075317383, "learning_rate": 8.489999999999999e-07, "loss": 0.0344, "step": 850 }, { "epoch": 0.15825646590703563, "grad_norm": 7.403707504272461, "learning_rate": 8.739999999999999e-07, "loss": 0.0317, "step": 875 }, { "epoch": 0.16277807921866522, "grad_norm": 0.7672566771507263, "learning_rate": 8.99e-07, "loss": 0.0319, "step": 900 }, { "epoch": 0.1672996925302948, "grad_norm": 3.935624599456787, "learning_rate": 9.24e-07, "loss": 0.0364, "step": 925 }, { "epoch": 0.1718213058419244, "grad_norm": 6.021194934844971, "learning_rate": 9.489999999999999e-07, "loss": 0.0378, "step": 950 }, { "epoch": 0.176342919153554, "grad_norm": 0.7235254645347595, "learning_rate": 9.74e-07, "loss": 0.0386, "step": 975 }, { "epoch": 0.18086453246518358, "grad_norm": 5.369808673858643, "learning_rate": 9.989999999999999e-07, "loss": 0.0307, "step": 1000 }, { "epoch": 0.18538614577681317, "grad_norm": 4.743622303009033, "learning_rate": 9.995579296371338e-07, "loss": 0.0555, "step": 1025 }, { "epoch": 0.18990775908844276, "grad_norm": 12.76374340057373, "learning_rate": 9.99097439675815e-07, "loss": 0.0478, "step": 1050 }, { "epoch": 0.19442937240007235, "grad_norm": 4.498536109924316, "learning_rate": 9.986369497144962e-07, "loss": 0.0443, "step": 1075 }, { "epoch": 0.19895098571170194, "grad_norm": 9.623854637145996, "learning_rate": 9.981764597531773e-07, "loss": 0.044, "step": 1100 }, { "epoch": 0.20347259902333154, "grad_norm": 1.4003485441207886, "learning_rate": 9.977159697918585e-07, "loss": 0.042, "step": 1125 }, { "epoch": 0.20799421233496113, "grad_norm": 3.358299493789673, "learning_rate": 9.972554798305397e-07, "loss": 0.0422, "step": 1150 }, { "epoch": 0.21251582564659072, "grad_norm": 33.19310760498047, "learning_rate": 9.967949898692208e-07, "loss": 0.0443, "step": 1175 }, { "epoch": 0.21703743895822028, "grad_norm": 7.122214317321777, "learning_rate": 9.96334499907902e-07, "loss": 0.0962, "step": 1200 }, { "epoch": 0.22155905226984987, "grad_norm": 8.756924629211426, "learning_rate": 9.958740099465832e-07, "loss": 0.0399, "step": 1225 }, { "epoch": 0.22608066558147946, "grad_norm": 3.779580593109131, "learning_rate": 9.954135199852643e-07, "loss": 0.035, "step": 1250 }, { "epoch": 0.23060227889310905, "grad_norm": 15.683145523071289, "learning_rate": 9.949530300239455e-07, "loss": 0.042, "step": 1275 }, { "epoch": 0.23512389220473864, "grad_norm": 12.767990112304688, "learning_rate": 9.944925400626267e-07, "loss": 0.0517, "step": 1300 }, { "epoch": 0.23964550551636823, "grad_norm": 0.5977092981338501, "learning_rate": 9.940320501013078e-07, "loss": 0.0266, "step": 1325 }, { "epoch": 0.24416711882799783, "grad_norm": 2.613328218460083, "learning_rate": 9.935715601399888e-07, "loss": 0.0302, "step": 1350 }, { "epoch": 0.24868873213962742, "grad_norm": 3.43841290473938, "learning_rate": 9.9311107017867e-07, "loss": 0.0604, "step": 1375 }, { "epoch": 0.25321034545125704, "grad_norm": 1.4975024461746216, "learning_rate": 9.926505802173513e-07, "loss": 0.0524, "step": 1400 }, { "epoch": 0.25773195876288657, "grad_norm": 6.18322229385376, "learning_rate": 9.921900902560325e-07, "loss": 0.0318, "step": 1425 }, { "epoch": 0.26225357207451616, "grad_norm": 3.4327890872955322, "learning_rate": 9.917296002947134e-07, "loss": 0.0449, "step": 1450 }, { "epoch": 0.26677518538614575, "grad_norm": 19.299654006958008, "learning_rate": 9.912691103333946e-07, "loss": 0.0371, "step": 1475 }, { "epoch": 0.27129679869777534, "grad_norm": 17.62229347229004, "learning_rate": 9.908086203720758e-07, "loss": 0.046, "step": 1500 }, { "epoch": 0.27581841200940493, "grad_norm": 1.671424150466919, "learning_rate": 9.90348130410757e-07, "loss": 0.0391, "step": 1525 }, { "epoch": 0.2803400253210345, "grad_norm": 21.561243057250977, "learning_rate": 9.898876404494381e-07, "loss": 0.0729, "step": 1550 }, { "epoch": 0.2848616386326641, "grad_norm": 7.39694356918335, "learning_rate": 9.894271504881193e-07, "loss": 0.052, "step": 1575 }, { "epoch": 0.2893832519442937, "grad_norm": 1.9565777778625488, "learning_rate": 9.889850801252534e-07, "loss": 0.082, "step": 1600 }, { "epoch": 0.2939048652559233, "grad_norm": 1.8609459400177002, "learning_rate": 9.885245901639343e-07, "loss": 0.0446, "step": 1625 }, { "epoch": 0.2984264785675529, "grad_norm": 4.542464733123779, "learning_rate": 9.880641002026155e-07, "loss": 0.0353, "step": 1650 }, { "epoch": 0.3029480918791825, "grad_norm": 0.291640967130661, "learning_rate": 9.876036102412967e-07, "loss": 0.0359, "step": 1675 }, { "epoch": 0.30746970519081207, "grad_norm": 0.6001373529434204, "learning_rate": 9.871431202799778e-07, "loss": 0.031, "step": 1700 }, { "epoch": 0.31199131850244166, "grad_norm": 12.14961051940918, "learning_rate": 9.86682630318659e-07, "loss": 0.0447, "step": 1725 }, { "epoch": 0.31651293181407125, "grad_norm": 23.949186325073242, "learning_rate": 9.862221403573402e-07, "loss": 0.0479, "step": 1750 }, { "epoch": 0.32103454512570084, "grad_norm": 4.505954742431641, "learning_rate": 9.857616503960213e-07, "loss": 0.0481, "step": 1775 }, { "epoch": 0.32555615843733043, "grad_norm": 2.596393585205078, "learning_rate": 9.853011604347025e-07, "loss": 0.0482, "step": 1800 }, { "epoch": 0.33007777174896, "grad_norm": 8.913126945495605, "learning_rate": 9.848406704733837e-07, "loss": 0.0612, "step": 1825 }, { "epoch": 0.3345993850605896, "grad_norm": 5.877014636993408, "learning_rate": 9.843801805120648e-07, "loss": 0.0476, "step": 1850 }, { "epoch": 0.3391209983722192, "grad_norm": 22.622203826904297, "learning_rate": 9.83919690550746e-07, "loss": 0.0447, "step": 1875 }, { "epoch": 0.3436426116838488, "grad_norm": 18.984943389892578, "learning_rate": 9.834592005894271e-07, "loss": 0.0543, "step": 1900 }, { "epoch": 0.3481642249954784, "grad_norm": 13.041614532470703, "learning_rate": 9.829987106281083e-07, "loss": 0.0393, "step": 1925 }, { "epoch": 0.352685838307108, "grad_norm": 23.434789657592773, "learning_rate": 9.825382206667893e-07, "loss": 0.0488, "step": 1950 }, { "epoch": 0.35720745161873757, "grad_norm": 7.045641899108887, "learning_rate": 9.820777307054706e-07, "loss": 0.0557, "step": 1975 }, { "epoch": 0.36172906493036716, "grad_norm": 17.288244247436523, "learning_rate": 9.816172407441518e-07, "loss": 0.0821, "step": 2000 }, { "epoch": 0.36625067824199675, "grad_norm": 4.284151077270508, "learning_rate": 9.81156750782833e-07, "loss": 0.0526, "step": 2025 }, { "epoch": 0.37077229155362634, "grad_norm": 9.124343872070312, "learning_rate": 9.80696260821514e-07, "loss": 0.0309, "step": 2050 }, { "epoch": 0.37529390486525593, "grad_norm": 4.401906490325928, "learning_rate": 9.80235770860195e-07, "loss": 0.046, "step": 2075 }, { "epoch": 0.3798155181768855, "grad_norm": 0.29334700107574463, "learning_rate": 9.797752808988765e-07, "loss": 0.0349, "step": 2100 }, { "epoch": 0.3843371314885151, "grad_norm": 4.6845197677612305, "learning_rate": 9.793147909375574e-07, "loss": 0.0398, "step": 2125 }, { "epoch": 0.3888587448001447, "grad_norm": 10.254193305969238, "learning_rate": 9.788543009762386e-07, "loss": 0.0441, "step": 2150 }, { "epoch": 0.3933803581117743, "grad_norm": 9.839592933654785, "learning_rate": 9.783938110149198e-07, "loss": 0.0427, "step": 2175 }, { "epoch": 0.3979019714234039, "grad_norm": 7.145606517791748, "learning_rate": 9.77933321053601e-07, "loss": 0.0364, "step": 2200 }, { "epoch": 0.4024235847350335, "grad_norm": 13.468842506408691, "learning_rate": 9.77472831092282e-07, "loss": 0.0578, "step": 2225 }, { "epoch": 0.40694519804666307, "grad_norm": 5.4638519287109375, "learning_rate": 9.770123411309633e-07, "loss": 0.0596, "step": 2250 }, { "epoch": 0.41146681135829266, "grad_norm": 3.9997127056121826, "learning_rate": 9.765518511696444e-07, "loss": 0.0304, "step": 2275 }, { "epoch": 0.41598842466992225, "grad_norm": 1.2957385778427124, "learning_rate": 9.760913612083256e-07, "loss": 0.0308, "step": 2300 }, { "epoch": 0.42051003798155184, "grad_norm": 15.630707740783691, "learning_rate": 9.756308712470068e-07, "loss": 0.0538, "step": 2325 }, { "epoch": 0.42503165129318143, "grad_norm": 0.9495472311973572, "learning_rate": 9.75170381285688e-07, "loss": 0.0484, "step": 2350 }, { "epoch": 0.42955326460481097, "grad_norm": 8.299753189086914, "learning_rate": 9.74709891324369e-07, "loss": 0.0941, "step": 2375 }, { "epoch": 0.43407487791644056, "grad_norm": 2.055479049682617, "learning_rate": 9.742494013630503e-07, "loss": 0.0871, "step": 2400 }, { "epoch": 0.43859649122807015, "grad_norm": 2.5821428298950195, "learning_rate": 9.737889114017314e-07, "loss": 0.0319, "step": 2425 }, { "epoch": 0.44311810453969974, "grad_norm": 15.306851387023926, "learning_rate": 9.733284214404126e-07, "loss": 0.0397, "step": 2450 }, { "epoch": 0.44763971785132933, "grad_norm": 12.804640769958496, "learning_rate": 9.728679314790938e-07, "loss": 0.0397, "step": 2475 }, { "epoch": 0.4521613311629589, "grad_norm": 2.8858513832092285, "learning_rate": 9.72407441517775e-07, "loss": 0.0423, "step": 2500 }, { "epoch": 0.4566829444745885, "grad_norm": 5.125776767730713, "learning_rate": 9.71946951556456e-07, "loss": 0.0372, "step": 2525 }, { "epoch": 0.4612045577862181, "grad_norm": 3.445122718811035, "learning_rate": 9.71486461595137e-07, "loss": 0.0597, "step": 2550 }, { "epoch": 0.4657261710978477, "grad_norm": 1.8609939813613892, "learning_rate": 9.710259716338184e-07, "loss": 0.0356, "step": 2575 }, { "epoch": 0.4702477844094773, "grad_norm": 6.8225998878479, "learning_rate": 9.705654816724996e-07, "loss": 0.0341, "step": 2600 }, { "epoch": 0.4747693977211069, "grad_norm": 20.26767349243164, "learning_rate": 9.701049917111808e-07, "loss": 0.049, "step": 2625 }, { "epoch": 0.47929101103273647, "grad_norm": 5.631345748901367, "learning_rate": 9.696445017498617e-07, "loss": 0.0616, "step": 2650 }, { "epoch": 0.48381262434436606, "grad_norm": 20.55868148803711, "learning_rate": 9.691840117885429e-07, "loss": 0.062, "step": 2675 }, { "epoch": 0.48833423765599565, "grad_norm": 32.73431396484375, "learning_rate": 9.687235218272243e-07, "loss": 0.0653, "step": 2700 }, { "epoch": 0.49285585096762524, "grad_norm": 1.6700994968414307, "learning_rate": 9.682630318659052e-07, "loss": 0.0486, "step": 2725 }, { "epoch": 0.49737746427925483, "grad_norm": 14.762917518615723, "learning_rate": 9.678025419045864e-07, "loss": 0.0641, "step": 2750 }, { "epoch": 0.5018990775908845, "grad_norm": 1.297577977180481, "learning_rate": 9.673420519432675e-07, "loss": 0.0625, "step": 2775 }, { "epoch": 0.5064206909025141, "grad_norm": 0.5399802327156067, "learning_rate": 9.668815619819487e-07, "loss": 0.0931, "step": 2800 }, { "epoch": 0.5109423042141437, "grad_norm": 7.880299091339111, "learning_rate": 9.664210720206299e-07, "loss": 0.0397, "step": 2825 }, { "epoch": 0.5154639175257731, "grad_norm": 5.446402549743652, "learning_rate": 9.65960582059311e-07, "loss": 0.0327, "step": 2850 }, { "epoch": 0.5199855308374027, "grad_norm": 10.012272834777832, "learning_rate": 9.655000920979922e-07, "loss": 0.0405, "step": 2875 }, { "epoch": 0.5245071441490323, "grad_norm": 4.505229949951172, "learning_rate": 9.650396021366734e-07, "loss": 0.0372, "step": 2900 }, { "epoch": 0.5290287574606619, "grad_norm": 0.4095817804336548, "learning_rate": 9.645791121753545e-07, "loss": 0.0447, "step": 2925 }, { "epoch": 0.5335503707722915, "grad_norm": 1.1292011737823486, "learning_rate": 9.641186222140357e-07, "loss": 0.0306, "step": 2950 }, { "epoch": 0.5380719840839211, "grad_norm": 2.467514991760254, "learning_rate": 9.636581322527169e-07, "loss": 0.056, "step": 2975 }, { "epoch": 0.5425935973955507, "grad_norm": 2.929332971572876, "learning_rate": 9.63197642291398e-07, "loss": 0.0551, "step": 3000 }, { "epoch": 0.5471152107071803, "grad_norm": 4.477908134460449, "learning_rate": 9.627371523300792e-07, "loss": 0.0396, "step": 3025 }, { "epoch": 0.5516368240188099, "grad_norm": 2.9823644161224365, "learning_rate": 9.622766623687604e-07, "loss": 0.0394, "step": 3050 }, { "epoch": 0.5561584373304395, "grad_norm": 19.203933715820312, "learning_rate": 9.618161724074415e-07, "loss": 0.0474, "step": 3075 }, { "epoch": 0.560680050642069, "grad_norm": 3.774156093597412, "learning_rate": 9.613556824461227e-07, "loss": 0.0459, "step": 3100 }, { "epoch": 0.5652016639536986, "grad_norm": 8.178613662719727, "learning_rate": 9.608951924848039e-07, "loss": 0.0704, "step": 3125 }, { "epoch": 0.5697232772653282, "grad_norm": 21.61182403564453, "learning_rate": 9.604347025234848e-07, "loss": 0.0728, "step": 3150 }, { "epoch": 0.5742448905769578, "grad_norm": 19.002914428710938, "learning_rate": 9.599742125621662e-07, "loss": 0.0839, "step": 3175 }, { "epoch": 0.5787665038885874, "grad_norm": 4.789153099060059, "learning_rate": 9.595137226008474e-07, "loss": 0.0816, "step": 3200 }, { "epoch": 0.583288117200217, "grad_norm": 0.4933367669582367, "learning_rate": 9.590532326395283e-07, "loss": 0.0534, "step": 3225 }, { "epoch": 0.5878097305118466, "grad_norm": 1.4477636814117432, "learning_rate": 9.585927426782095e-07, "loss": 0.0479, "step": 3250 }, { "epoch": 0.5923313438234762, "grad_norm": 5.407076358795166, "learning_rate": 9.581322527168907e-07, "loss": 0.0328, "step": 3275 }, { "epoch": 0.5968529571351058, "grad_norm": 6.760631084442139, "learning_rate": 9.57671762755572e-07, "loss": 0.0421, "step": 3300 }, { "epoch": 0.6013745704467354, "grad_norm": 11.133447647094727, "learning_rate": 9.57211272794253e-07, "loss": 0.0315, "step": 3325 }, { "epoch": 0.605896183758365, "grad_norm": 8.827392578125, "learning_rate": 9.567507828329342e-07, "loss": 0.0533, "step": 3350 }, { "epoch": 0.6104177970699945, "grad_norm": 4.74755334854126, "learning_rate": 9.562902928716153e-07, "loss": 0.0505, "step": 3375 }, { "epoch": 0.6149394103816241, "grad_norm": 6.094758987426758, "learning_rate": 9.558298029102965e-07, "loss": 0.0374, "step": 3400 }, { "epoch": 0.6194610236932537, "grad_norm": 7.404526710510254, "learning_rate": 9.553693129489777e-07, "loss": 0.0274, "step": 3425 }, { "epoch": 0.6239826370048833, "grad_norm": 2.237982749938965, "learning_rate": 9.549088229876588e-07, "loss": 0.0395, "step": 3450 }, { "epoch": 0.6285042503165129, "grad_norm": 20.463314056396484, "learning_rate": 9.5444833302634e-07, "loss": 0.0596, "step": 3475 }, { "epoch": 0.6330258636281425, "grad_norm": 12.629451751708984, "learning_rate": 9.539878430650212e-07, "loss": 0.0329, "step": 3500 }, { "epoch": 0.6375474769397721, "grad_norm": 19.752243041992188, "learning_rate": 9.535273531037022e-07, "loss": 0.0343, "step": 3525 }, { "epoch": 0.6420690902514017, "grad_norm": 7.476962089538574, "learning_rate": 9.530668631423835e-07, "loss": 0.0724, "step": 3550 }, { "epoch": 0.6465907035630313, "grad_norm": 31.683101654052734, "learning_rate": 9.526063731810647e-07, "loss": 0.07, "step": 3575 }, { "epoch": 0.6511123168746609, "grad_norm": 3.1800105571746826, "learning_rate": 9.521458832197457e-07, "loss": 0.1358, "step": 3600 }, { "epoch": 0.6556339301862905, "grad_norm": 7.005350589752197, "learning_rate": 9.516853932584269e-07, "loss": 0.0643, "step": 3625 }, { "epoch": 0.66015554349792, "grad_norm": 5.049868583679199, "learning_rate": 9.51224903297108e-07, "loss": 0.0469, "step": 3650 }, { "epoch": 0.6646771568095496, "grad_norm": 6.004695415496826, "learning_rate": 9.507644133357893e-07, "loss": 0.0377, "step": 3675 }, { "epoch": 0.6691987701211792, "grad_norm": 14.023488998413086, "learning_rate": 9.503039233744704e-07, "loss": 0.0457, "step": 3700 }, { "epoch": 0.6737203834328088, "grad_norm": 0.5092989802360535, "learning_rate": 9.498434334131515e-07, "loss": 0.0467, "step": 3725 }, { "epoch": 0.6782419967444384, "grad_norm": 9.694477081298828, "learning_rate": 9.493829434518327e-07, "loss": 0.0307, "step": 3750 }, { "epoch": 0.682763610056068, "grad_norm": 8.313312530517578, "learning_rate": 9.489224534905139e-07, "loss": 0.0602, "step": 3775 }, { "epoch": 0.6872852233676976, "grad_norm": 9.547676086425781, "learning_rate": 9.48461963529195e-07, "loss": 0.051, "step": 3800 }, { "epoch": 0.6918068366793272, "grad_norm": 5.230764865875244, "learning_rate": 9.480014735678762e-07, "loss": 0.0463, "step": 3825 }, { "epoch": 0.6963284499909568, "grad_norm": 6.57362699508667, "learning_rate": 9.475409836065573e-07, "loss": 0.0564, "step": 3850 }, { "epoch": 0.7008500633025864, "grad_norm": 8.198570251464844, "learning_rate": 9.470804936452384e-07, "loss": 0.0345, "step": 3875 }, { "epoch": 0.705371676614216, "grad_norm": 12.034746170043945, "learning_rate": 9.466200036839197e-07, "loss": 0.0728, "step": 3900 }, { "epoch": 0.7098932899258455, "grad_norm": 14.528310775756836, "learning_rate": 9.461595137226009e-07, "loss": 0.0473, "step": 3925 }, { "epoch": 0.7144149032374751, "grad_norm": 1.0361833572387695, "learning_rate": 9.456990237612819e-07, "loss": 0.0566, "step": 3950 }, { "epoch": 0.7189365165491047, "grad_norm": 18.10868263244629, "learning_rate": 9.452385337999631e-07, "loss": 0.0647, "step": 3975 }, { "epoch": 0.7234581298607343, "grad_norm": 0.4723142981529236, "learning_rate": 9.447780438386443e-07, "loss": 0.0936, "step": 4000 }, { "epoch": 0.7279797431723639, "grad_norm": 2.1907668113708496, "learning_rate": 9.443175538773254e-07, "loss": 0.0548, "step": 4025 }, { "epoch": 0.7325013564839935, "grad_norm": 9.852395057678223, "learning_rate": 9.438570639160066e-07, "loss": 0.0568, "step": 4050 }, { "epoch": 0.7370229697956231, "grad_norm": 4.197534084320068, "learning_rate": 9.433965739546878e-07, "loss": 0.0393, "step": 4075 }, { "epoch": 0.7415445831072527, "grad_norm": 2.691697835922241, "learning_rate": 9.429360839933689e-07, "loss": 0.0486, "step": 4100 }, { "epoch": 0.7460661964188823, "grad_norm": 6.5368452072143555, "learning_rate": 9.4247559403205e-07, "loss": 0.0346, "step": 4125 }, { "epoch": 0.7505878097305119, "grad_norm": 5.945896148681641, "learning_rate": 9.420151040707313e-07, "loss": 0.0583, "step": 4150 }, { "epoch": 0.7551094230421415, "grad_norm": 0.8333092331886292, "learning_rate": 9.415546141094124e-07, "loss": 0.0532, "step": 4175 }, { "epoch": 0.759631036353771, "grad_norm": 6.2607879638671875, "learning_rate": 9.410941241480935e-07, "loss": 0.0341, "step": 4200 }, { "epoch": 0.7641526496654006, "grad_norm": 20.586143493652344, "learning_rate": 9.406336341867747e-07, "loss": 0.0403, "step": 4225 }, { "epoch": 0.7686742629770302, "grad_norm": 7.338830947875977, "learning_rate": 9.401731442254558e-07, "loss": 0.0727, "step": 4250 }, { "epoch": 0.7731958762886598, "grad_norm": 0.2722490429878235, "learning_rate": 9.39712654264137e-07, "loss": 0.0489, "step": 4275 }, { "epoch": 0.7777174896002894, "grad_norm": 0.15249581634998322, "learning_rate": 9.392521643028182e-07, "loss": 0.0519, "step": 4300 }, { "epoch": 0.782239102911919, "grad_norm": 2.320035457611084, "learning_rate": 9.38810093939952e-07, "loss": 0.1012, "step": 4325 }, { "epoch": 0.7867607162235486, "grad_norm": 21.31127166748047, "learning_rate": 9.383496039786333e-07, "loss": 0.0479, "step": 4350 }, { "epoch": 0.7912823295351782, "grad_norm": 0.8797614574432373, "learning_rate": 9.378891140173144e-07, "loss": 0.054, "step": 4375 }, { "epoch": 0.7958039428468078, "grad_norm": 12.751317977905273, "learning_rate": 9.374286240559955e-07, "loss": 0.1115, "step": 4400 }, { "epoch": 0.8003255561584374, "grad_norm": 3.1984500885009766, "learning_rate": 9.369681340946767e-07, "loss": 0.0431, "step": 4425 }, { "epoch": 0.804847169470067, "grad_norm": 4.108559608459473, "learning_rate": 9.365076441333579e-07, "loss": 0.0326, "step": 4450 }, { "epoch": 0.8093687827816965, "grad_norm": 9.827606201171875, "learning_rate": 9.36047154172039e-07, "loss": 0.0454, "step": 4475 }, { "epoch": 0.8138903960933261, "grad_norm": 6.922112941741943, "learning_rate": 9.355866642107202e-07, "loss": 0.0472, "step": 4500 }, { "epoch": 0.8184120094049557, "grad_norm": 3.6166677474975586, "learning_rate": 9.351261742494014e-07, "loss": 0.0495, "step": 4525 }, { "epoch": 0.8229336227165853, "grad_norm": 6.462372303009033, "learning_rate": 9.346656842880824e-07, "loss": 0.0273, "step": 4550 }, { "epoch": 0.8274552360282149, "grad_norm": 6.296558856964111, "learning_rate": 9.342051943267636e-07, "loss": 0.0557, "step": 4575 }, { "epoch": 0.8319768493398445, "grad_norm": 4.71564245223999, "learning_rate": 9.337447043654449e-07, "loss": 0.0524, "step": 4600 }, { "epoch": 0.8364984626514741, "grad_norm": 5.095015048980713, "learning_rate": 9.332842144041259e-07, "loss": 0.0468, "step": 4625 }, { "epoch": 0.8410200759631037, "grad_norm": 1.5702929496765137, "learning_rate": 9.328237244428071e-07, "loss": 0.0524, "step": 4650 }, { "epoch": 0.8455416892747333, "grad_norm": 13.924525260925293, "learning_rate": 9.323632344814883e-07, "loss": 0.0681, "step": 4675 }, { "epoch": 0.8500633025863629, "grad_norm": 1.5610660314559937, "learning_rate": 9.319027445201694e-07, "loss": 0.0361, "step": 4700 }, { "epoch": 0.8545849158979925, "grad_norm": 15.312824249267578, "learning_rate": 9.314422545588506e-07, "loss": 0.0799, "step": 4725 }, { "epoch": 0.8591065292096219, "grad_norm": 12.912773132324219, "learning_rate": 9.309817645975318e-07, "loss": 0.0768, "step": 4750 }, { "epoch": 0.8636281425212515, "grad_norm": 0.2809099853038788, "learning_rate": 9.305212746362129e-07, "loss": 0.0832, "step": 4775 }, { "epoch": 0.8681497558328811, "grad_norm": 16.709447860717773, "learning_rate": 9.30060784674894e-07, "loss": 0.1101, "step": 4800 }, { "epoch": 0.8726713691445107, "grad_norm": 3.765148401260376, "learning_rate": 9.296002947135751e-07, "loss": 0.0513, "step": 4825 }, { "epoch": 0.8771929824561403, "grad_norm": 5.886707782745361, "learning_rate": 9.291398047522564e-07, "loss": 0.0414, "step": 4850 }, { "epoch": 0.8817145957677699, "grad_norm": 5.642549991607666, "learning_rate": 9.286793147909375e-07, "loss": 0.0418, "step": 4875 }, { "epoch": 0.8862362090793995, "grad_norm": 2.7111074924468994, "learning_rate": 9.282188248296186e-07, "loss": 0.0352, "step": 4900 }, { "epoch": 0.8907578223910291, "grad_norm": 8.905896186828613, "learning_rate": 9.277583348682998e-07, "loss": 0.0417, "step": 4925 }, { "epoch": 0.8952794357026587, "grad_norm": 5.965810775756836, "learning_rate": 9.27297844906981e-07, "loss": 0.056, "step": 4950 }, { "epoch": 0.8998010490142883, "grad_norm": 14.578080177307129, "learning_rate": 9.268373549456621e-07, "loss": 0.054, "step": 4975 }, { "epoch": 0.9043226623259178, "grad_norm": 8.001260757446289, "learning_rate": 9.263768649843433e-07, "loss": 0.0379, "step": 5000 }, { "epoch": 0.9088442756375474, "grad_norm": 10.563271522521973, "learning_rate": 9.259163750230245e-07, "loss": 0.0492, "step": 5025 }, { "epoch": 0.913365888949177, "grad_norm": 6.896139621734619, "learning_rate": 9.254558850617055e-07, "loss": 0.0541, "step": 5050 }, { "epoch": 0.9178875022608066, "grad_norm": 0.6722992658615112, "learning_rate": 9.249953951003868e-07, "loss": 0.0523, "step": 5075 }, { "epoch": 0.9224091155724362, "grad_norm": 0.5435565710067749, "learning_rate": 9.24534905139068e-07, "loss": 0.0554, "step": 5100 }, { "epoch": 0.9269307288840658, "grad_norm": 17.15158462524414, "learning_rate": 9.240744151777491e-07, "loss": 0.1066, "step": 5125 }, { "epoch": 0.9314523421956954, "grad_norm": 4.256832599639893, "learning_rate": 9.236139252164302e-07, "loss": 0.0761, "step": 5150 }, { "epoch": 0.935973955507325, "grad_norm": 1.2044847011566162, "learning_rate": 9.231534352551114e-07, "loss": 0.05, "step": 5175 }, { "epoch": 0.9404955688189546, "grad_norm": 36.5662956237793, "learning_rate": 9.226929452937926e-07, "loss": 0.1034, "step": 5200 }, { "epoch": 0.9450171821305842, "grad_norm": 6.578591823577881, "learning_rate": 9.222324553324737e-07, "loss": 0.0497, "step": 5225 }, { "epoch": 0.9495387954422138, "grad_norm": 0.6094868779182434, "learning_rate": 9.217719653711549e-07, "loss": 0.0396, "step": 5250 }, { "epoch": 0.9540604087538433, "grad_norm": 2.0846738815307617, "learning_rate": 9.21311475409836e-07, "loss": 0.0271, "step": 5275 }, { "epoch": 0.9585820220654729, "grad_norm": 2.4080617427825928, "learning_rate": 9.208509854485171e-07, "loss": 0.0568, "step": 5300 }, { "epoch": 0.9631036353771025, "grad_norm": 4.527153968811035, "learning_rate": 9.203904954871984e-07, "loss": 0.0346, "step": 5325 }, { "epoch": 0.9676252486887321, "grad_norm": 4.270484924316406, "learning_rate": 9.199300055258795e-07, "loss": 0.0649, "step": 5350 }, { "epoch": 0.9721468620003617, "grad_norm": 0.19271469116210938, "learning_rate": 9.194695155645607e-07, "loss": 0.0349, "step": 5375 }, { "epoch": 0.9766684753119913, "grad_norm": 3.624760389328003, "learning_rate": 9.190090256032418e-07, "loss": 0.0332, "step": 5400 }, { "epoch": 0.9811900886236209, "grad_norm": 12.83198070526123, "learning_rate": 9.185485356419229e-07, "loss": 0.0392, "step": 5425 }, { "epoch": 0.9857117019352505, "grad_norm": 2.507450580596924, "learning_rate": 9.180880456806042e-07, "loss": 0.0561, "step": 5450 }, { "epoch": 0.9902333152468801, "grad_norm": 3.1027333736419678, "learning_rate": 9.176275557192853e-07, "loss": 0.0411, "step": 5475 }, { "epoch": 0.9947549285585097, "grad_norm": 53.23537826538086, "learning_rate": 9.171670657579664e-07, "loss": 0.0892, "step": 5500 }, { "epoch": 0.9992765418701393, "grad_norm": 53.92917251586914, "learning_rate": 9.167065757966476e-07, "loss": 0.0849, "step": 5525 }, { "epoch": 1.0, "eval_loss": 0.19376881420612335, "eval_runtime": 8685.9066, "eval_samples_per_second": 1.093, "eval_steps_per_second": 0.137, "eval_wer": 0.10807174887892376, "step": 5529 }, { "epoch": 1.003798155181769, "grad_norm": 1.1583398580551147, "learning_rate": 9.162460858353286e-07, "loss": 0.0418, "step": 5550 }, { "epoch": 1.0083197684933984, "grad_norm": 3.819171190261841, "learning_rate": 9.157855958740099e-07, "loss": 0.0223, "step": 5575 }, { "epoch": 1.0128413818050281, "grad_norm": 12.470620155334473, "learning_rate": 9.153251059126911e-07, "loss": 0.0322, "step": 5600 }, { "epoch": 1.0173629951166576, "grad_norm": 3.7826218605041504, "learning_rate": 9.148646159513723e-07, "loss": 0.0159, "step": 5625 }, { "epoch": 1.0218846084282873, "grad_norm": 3.368657112121582, "learning_rate": 9.144041259900533e-07, "loss": 0.0354, "step": 5650 }, { "epoch": 1.0264062217399168, "grad_norm": 5.757375717163086, "learning_rate": 9.139436360287345e-07, "loss": 0.0212, "step": 5675 }, { "epoch": 1.0309278350515463, "grad_norm": 0.24591568112373352, "learning_rate": 9.134831460674158e-07, "loss": 0.0354, "step": 5700 }, { "epoch": 1.035449448363176, "grad_norm": 1.0728533267974854, "learning_rate": 9.130226561060968e-07, "loss": 0.0162, "step": 5725 }, { "epoch": 1.0399710616748055, "grad_norm": 4.130620956420898, "learning_rate": 9.12562166144778e-07, "loss": 0.0286, "step": 5750 }, { "epoch": 1.0444926749864352, "grad_norm": 19.77248764038086, "learning_rate": 9.121016761834591e-07, "loss": 0.0346, "step": 5775 }, { "epoch": 1.0490142882980646, "grad_norm": 1.552537202835083, "learning_rate": 9.116411862221404e-07, "loss": 0.0164, "step": 5800 }, { "epoch": 1.0535359016096943, "grad_norm": 0.2987182140350342, "learning_rate": 9.111806962608215e-07, "loss": 0.0423, "step": 5825 }, { "epoch": 1.0580575149213238, "grad_norm": 2.336726188659668, "learning_rate": 9.107202062995026e-07, "loss": 0.0431, "step": 5850 }, { "epoch": 1.0625791282329535, "grad_norm": 1.1774622201919556, "learning_rate": 9.102597163381838e-07, "loss": 0.0238, "step": 5875 }, { "epoch": 1.067100741544583, "grad_norm": 1.2562298774719238, "learning_rate": 9.097992263768649e-07, "loss": 0.0502, "step": 5900 }, { "epoch": 1.0716223548562127, "grad_norm": 15.588605880737305, "learning_rate": 9.093387364155461e-07, "loss": 0.0622, "step": 5925 }, { "epoch": 1.0761439681678422, "grad_norm": 2.4600257873535156, "learning_rate": 9.088782464542273e-07, "loss": 0.0476, "step": 5950 }, { "epoch": 1.080665581479472, "grad_norm": 7.753191947937012, "learning_rate": 9.084177564929084e-07, "loss": 0.035, "step": 5975 }, { "epoch": 1.0851871947911014, "grad_norm": 6.821231365203857, "learning_rate": 9.079572665315895e-07, "loss": 0.0212, "step": 6000 }, { "epoch": 1.089708808102731, "grad_norm": 2.2424540519714355, "learning_rate": 9.074967765702707e-07, "loss": 0.0163, "step": 6025 }, { "epoch": 1.0942304214143606, "grad_norm": 23.894805908203125, "learning_rate": 9.07036286608952e-07, "loss": 0.0329, "step": 6050 }, { "epoch": 1.0987520347259903, "grad_norm": 0.27833691239356995, "learning_rate": 9.06575796647633e-07, "loss": 0.0378, "step": 6075 }, { "epoch": 1.1032736480376197, "grad_norm": 1.7873796224594116, "learning_rate": 9.061153066863142e-07, "loss": 0.035, "step": 6100 }, { "epoch": 1.1077952613492494, "grad_norm": 8.95418930053711, "learning_rate": 9.056548167249954e-07, "loss": 0.0346, "step": 6125 }, { "epoch": 1.112316874660879, "grad_norm": 0.6353895664215088, "learning_rate": 9.051943267636764e-07, "loss": 0.0225, "step": 6150 }, { "epoch": 1.1168384879725086, "grad_norm": 4.148350715637207, "learning_rate": 9.047338368023577e-07, "loss": 0.0375, "step": 6175 }, { "epoch": 1.121360101284138, "grad_norm": 3.327193021774292, "learning_rate": 9.042733468410389e-07, "loss": 0.0269, "step": 6200 }, { "epoch": 1.1258817145957678, "grad_norm": 4.31793212890625, "learning_rate": 9.038128568797199e-07, "loss": 0.0242, "step": 6225 }, { "epoch": 1.1304033279073973, "grad_norm": 20.431732177734375, "learning_rate": 9.033523669184011e-07, "loss": 0.0222, "step": 6250 }, { "epoch": 1.134924941219027, "grad_norm": 6.784418106079102, "learning_rate": 9.028918769570823e-07, "loss": 0.0488, "step": 6275 }, { "epoch": 1.1394465545306565, "grad_norm": 21.859533309936523, "learning_rate": 9.024313869957635e-07, "loss": 0.0518, "step": 6300 }, { "epoch": 1.1439681678422862, "grad_norm": 2.1316771507263184, "learning_rate": 9.019708970344446e-07, "loss": 0.0403, "step": 6325 }, { "epoch": 1.1484897811539156, "grad_norm": 2.723567247390747, "learning_rate": 9.015104070731258e-07, "loss": 0.0487, "step": 6350 }, { "epoch": 1.1530113944655453, "grad_norm": 4.712471961975098, "learning_rate": 9.010499171118069e-07, "loss": 0.0139, "step": 6375 }, { "epoch": 1.1575330077771748, "grad_norm": 2.7237842082977295, "learning_rate": 9.00589427150488e-07, "loss": 0.053, "step": 6400 }, { "epoch": 1.1620546210888045, "grad_norm": 3.783311605453491, "learning_rate": 9.001289371891693e-07, "loss": 0.0447, "step": 6425 }, { "epoch": 1.166576234400434, "grad_norm": 4.771798610687256, "learning_rate": 8.996684472278504e-07, "loss": 0.0306, "step": 6450 }, { "epoch": 1.1710978477120637, "grad_norm": 0.3322123885154724, "learning_rate": 8.992079572665316e-07, "loss": 0.024, "step": 6475 }, { "epoch": 1.1756194610236932, "grad_norm": 17.802349090576172, "learning_rate": 8.987658869036655e-07, "loss": 0.0317, "step": 6500 }, { "epoch": 1.180141074335323, "grad_norm": 0.25371697545051575, "learning_rate": 8.983053969423466e-07, "loss": 0.0405, "step": 6525 }, { "epoch": 1.1846626876469524, "grad_norm": 2.539607524871826, "learning_rate": 8.978449069810278e-07, "loss": 0.0199, "step": 6550 }, { "epoch": 1.189184300958582, "grad_norm": 2.3740954399108887, "learning_rate": 8.973844170197089e-07, "loss": 0.0252, "step": 6575 }, { "epoch": 1.1937059142702116, "grad_norm": 2.0089480876922607, "learning_rate": 8.9692392705839e-07, "loss": 0.0091, "step": 6600 }, { "epoch": 1.1982275275818413, "grad_norm": 0.539943516254425, "learning_rate": 8.964634370970713e-07, "loss": 0.0479, "step": 6625 }, { "epoch": 1.2027491408934707, "grad_norm": 15.776597023010254, "learning_rate": 8.960029471357525e-07, "loss": 0.0337, "step": 6650 }, { "epoch": 1.2072707542051004, "grad_norm": 43.65888977050781, "learning_rate": 8.955424571744335e-07, "loss": 0.04, "step": 6675 }, { "epoch": 1.21179236751673, "grad_norm": 0.6316529512405396, "learning_rate": 8.950819672131147e-07, "loss": 0.0464, "step": 6700 }, { "epoch": 1.2163139808283596, "grad_norm": 0.06351311504840851, "learning_rate": 8.946214772517959e-07, "loss": 0.0596, "step": 6725 }, { "epoch": 1.220835594139989, "grad_norm": 9.572014808654785, "learning_rate": 8.94160987290477e-07, "loss": 0.0398, "step": 6750 }, { "epoch": 1.2253572074516188, "grad_norm": 9.598252296447754, "learning_rate": 8.937004973291582e-07, "loss": 0.0344, "step": 6775 }, { "epoch": 1.2298788207632483, "grad_norm": 1.5910439491271973, "learning_rate": 8.932400073678394e-07, "loss": 0.0265, "step": 6800 }, { "epoch": 1.234400434074878, "grad_norm": 10.684225082397461, "learning_rate": 8.927795174065205e-07, "loss": 0.0264, "step": 6825 }, { "epoch": 1.2389220473865075, "grad_norm": 11.20118236541748, "learning_rate": 8.923190274452016e-07, "loss": 0.0345, "step": 6850 }, { "epoch": 1.2434436606981372, "grad_norm": 12.899706840515137, "learning_rate": 8.918585374838829e-07, "loss": 0.0316, "step": 6875 }, { "epoch": 1.2479652740097666, "grad_norm": 18.632545471191406, "learning_rate": 8.91398047522564e-07, "loss": 0.033, "step": 6900 }, { "epoch": 1.2524868873213963, "grad_norm": 0.7847491502761841, "learning_rate": 8.909375575612451e-07, "loss": 0.0292, "step": 6925 }, { "epoch": 1.2570085006330258, "grad_norm": 1.0317540168762207, "learning_rate": 8.904770675999262e-07, "loss": 0.0286, "step": 6950 }, { "epoch": 1.2615301139446555, "grad_norm": 0.7965870499610901, "learning_rate": 8.900165776386075e-07, "loss": 0.0222, "step": 6975 }, { "epoch": 1.266051727256285, "grad_norm": 12.810919761657715, "learning_rate": 8.895560876772886e-07, "loss": 0.0369, "step": 7000 }, { "epoch": 1.2705733405679147, "grad_norm": 12.809525489807129, "learning_rate": 8.890955977159697e-07, "loss": 0.0381, "step": 7025 }, { "epoch": 1.2750949538795442, "grad_norm": 4.802799224853516, "learning_rate": 8.886351077546509e-07, "loss": 0.0274, "step": 7050 }, { "epoch": 1.279616567191174, "grad_norm": 37.88527297973633, "learning_rate": 8.881746177933321e-07, "loss": 0.0266, "step": 7075 }, { "epoch": 1.2841381805028034, "grad_norm": 23.25122833251953, "learning_rate": 8.877141278320132e-07, "loss": 0.0487, "step": 7100 }, { "epoch": 1.2886597938144329, "grad_norm": 25.70662498474121, "learning_rate": 8.872536378706944e-07, "loss": 0.0652, "step": 7125 }, { "epoch": 1.2931814071260626, "grad_norm": 3.8632333278656006, "learning_rate": 8.867931479093756e-07, "loss": 0.0442, "step": 7150 }, { "epoch": 1.2977030204376923, "grad_norm": 1.3248151540756226, "learning_rate": 8.863326579480566e-07, "loss": 0.0229, "step": 7175 }, { "epoch": 1.3022246337493217, "grad_norm": 1.7153706550598145, "learning_rate": 8.858721679867378e-07, "loss": 0.0341, "step": 7200 }, { "epoch": 1.3067462470609512, "grad_norm": 8.373719215393066, "learning_rate": 8.854116780254191e-07, "loss": 0.0422, "step": 7225 }, { "epoch": 1.311267860372581, "grad_norm": 2.403066635131836, "learning_rate": 8.849511880641001e-07, "loss": 0.0287, "step": 7250 }, { "epoch": 1.3157894736842106, "grad_norm": 1.0376137495040894, "learning_rate": 8.844906981027813e-07, "loss": 0.0113, "step": 7275 }, { "epoch": 1.32031108699584, "grad_norm": 5.192943096160889, "learning_rate": 8.840302081414625e-07, "loss": 0.0301, "step": 7300 }, { "epoch": 1.3248327003074696, "grad_norm": 1.6459161043167114, "learning_rate": 8.835697181801436e-07, "loss": 0.044, "step": 7325 }, { "epoch": 1.3293543136190993, "grad_norm": 9.943346977233887, "learning_rate": 8.831092282188248e-07, "loss": 0.0132, "step": 7350 }, { "epoch": 1.333875926930729, "grad_norm": 5.858924865722656, "learning_rate": 8.82648738257506e-07, "loss": 0.0192, "step": 7375 }, { "epoch": 1.3383975402423585, "grad_norm": 6.490448474884033, "learning_rate": 8.821882482961871e-07, "loss": 0.0239, "step": 7400 }, { "epoch": 1.342919153553988, "grad_norm": 0.2476533055305481, "learning_rate": 8.817277583348682e-07, "loss": 0.0463, "step": 7425 }, { "epoch": 1.3474407668656176, "grad_norm": 14.549626350402832, "learning_rate": 8.812672683735494e-07, "loss": 0.0507, "step": 7450 }, { "epoch": 1.3519623801772473, "grad_norm": 2.189336061477661, "learning_rate": 8.808067784122306e-07, "loss": 0.0331, "step": 7475 }, { "epoch": 1.3564839934888768, "grad_norm": 0.9585964679718018, "learning_rate": 8.803462884509118e-07, "loss": 0.0572, "step": 7500 }, { "epoch": 1.3610056068005063, "grad_norm": 0.9413008093833923, "learning_rate": 8.798857984895929e-07, "loss": 0.0847, "step": 7525 }, { "epoch": 1.365527220112136, "grad_norm": 1.1694642305374146, "learning_rate": 8.79425308528274e-07, "loss": 0.0355, "step": 7550 }, { "epoch": 1.3700488334237657, "grad_norm": 10.240534782409668, "learning_rate": 8.789648185669552e-07, "loss": 0.0264, "step": 7575 }, { "epoch": 1.3745704467353952, "grad_norm": 0.40524232387542725, "learning_rate": 8.785043286056364e-07, "loss": 0.0277, "step": 7600 }, { "epoch": 1.3790920600470247, "grad_norm": 0.6719425916671753, "learning_rate": 8.780438386443175e-07, "loss": 0.027, "step": 7625 }, { "epoch": 1.3836136733586544, "grad_norm": 3.0596706867218018, "learning_rate": 8.775833486829987e-07, "loss": 0.0251, "step": 7650 }, { "epoch": 1.388135286670284, "grad_norm": 3.454193592071533, "learning_rate": 8.771228587216797e-07, "loss": 0.0334, "step": 7675 }, { "epoch": 1.3926568999819136, "grad_norm": 3.9943997859954834, "learning_rate": 8.76662368760361e-07, "loss": 0.0312, "step": 7700 }, { "epoch": 1.397178513293543, "grad_norm": 8.968663215637207, "learning_rate": 8.762018787990422e-07, "loss": 0.027, "step": 7725 }, { "epoch": 1.4017001266051727, "grad_norm": 1.615293025970459, "learning_rate": 8.757413888377234e-07, "loss": 0.0246, "step": 7750 }, { "epoch": 1.4062217399168024, "grad_norm": 14.491230964660645, "learning_rate": 8.752808988764044e-07, "loss": 0.019, "step": 7775 }, { "epoch": 1.410743353228432, "grad_norm": 0.586275577545166, "learning_rate": 8.748204089150856e-07, "loss": 0.0315, "step": 7800 }, { "epoch": 1.4152649665400614, "grad_norm": 16.67391586303711, "learning_rate": 8.743599189537669e-07, "loss": 0.0468, "step": 7825 }, { "epoch": 1.419786579851691, "grad_norm": 7.967570781707764, "learning_rate": 8.738994289924479e-07, "loss": 0.0443, "step": 7850 }, { "epoch": 1.4243081931633208, "grad_norm": 3.4274497032165527, "learning_rate": 8.734389390311291e-07, "loss": 0.0464, "step": 7875 }, { "epoch": 1.4288298064749503, "grad_norm": 14.454466819763184, "learning_rate": 8.729784490698102e-07, "loss": 0.0594, "step": 7900 }, { "epoch": 1.4333514197865798, "grad_norm": 34.33506774902344, "learning_rate": 8.725179591084913e-07, "loss": 0.1098, "step": 7925 }, { "epoch": 1.4378730330982095, "grad_norm": 5.2832770347595215, "learning_rate": 8.720574691471726e-07, "loss": 0.035, "step": 7950 }, { "epoch": 1.4423946464098392, "grad_norm": 2.5055034160614014, "learning_rate": 8.715969791858537e-07, "loss": 0.0261, "step": 7975 }, { "epoch": 1.4469162597214686, "grad_norm": 2.449373483657837, "learning_rate": 8.711364892245349e-07, "loss": 0.024, "step": 8000 }, { "epoch": 1.4514378730330981, "grad_norm": 3.93390154838562, "learning_rate": 8.70675999263216e-07, "loss": 0.0248, "step": 8025 }, { "epoch": 1.4559594863447278, "grad_norm": 14.35305404663086, "learning_rate": 8.702155093018971e-07, "loss": 0.0237, "step": 8050 }, { "epoch": 1.4604810996563573, "grad_norm": 7.074910640716553, "learning_rate": 8.697550193405784e-07, "loss": 0.0284, "step": 8075 }, { "epoch": 1.465002712967987, "grad_norm": 21.563901901245117, "learning_rate": 8.692945293792595e-07, "loss": 0.0421, "step": 8100 }, { "epoch": 1.4695243262796165, "grad_norm": 8.473495483398438, "learning_rate": 8.688340394179406e-07, "loss": 0.0194, "step": 8125 }, { "epoch": 1.4740459395912462, "grad_norm": 0.6098468899726868, "learning_rate": 8.683735494566218e-07, "loss": 0.0392, "step": 8150 }, { "epoch": 1.4785675529028757, "grad_norm": 29.32798957824707, "learning_rate": 8.679130594953029e-07, "loss": 0.0503, "step": 8175 }, { "epoch": 1.4830891662145054, "grad_norm": 7.364591121673584, "learning_rate": 8.674525695339841e-07, "loss": 0.0227, "step": 8200 }, { "epoch": 1.4876107795261349, "grad_norm": 8.35572338104248, "learning_rate": 8.669920795726653e-07, "loss": 0.0346, "step": 8225 }, { "epoch": 1.4921323928377646, "grad_norm": 9.818826675415039, "learning_rate": 8.665315896113465e-07, "loss": 0.0484, "step": 8250 }, { "epoch": 1.496654006149394, "grad_norm": 8.03248405456543, "learning_rate": 8.660710996500275e-07, "loss": 0.0605, "step": 8275 }, { "epoch": 1.5011756194610237, "grad_norm": 7.9050397872924805, "learning_rate": 8.656106096887087e-07, "loss": 0.0609, "step": 8300 }, { "epoch": 1.5056972327726532, "grad_norm": 0.5489145517349243, "learning_rate": 8.6515011972739e-07, "loss": 0.041, "step": 8325 }, { "epoch": 1.510218846084283, "grad_norm": 1.3398199081420898, "learning_rate": 8.64689629766071e-07, "loss": 0.0458, "step": 8350 }, { "epoch": 1.5147404593959126, "grad_norm": 23.95371437072754, "learning_rate": 8.642291398047522e-07, "loss": 0.0322, "step": 8375 }, { "epoch": 1.519262072707542, "grad_norm": 6.187991142272949, "learning_rate": 8.637686498434334e-07, "loss": 0.0186, "step": 8400 }, { "epoch": 1.5237836860191716, "grad_norm": 2.8521807193756104, "learning_rate": 8.633081598821146e-07, "loss": 0.0353, "step": 8425 }, { "epoch": 1.5283052993308013, "grad_norm": 17.128206253051758, "learning_rate": 8.628476699207957e-07, "loss": 0.0217, "step": 8450 }, { "epoch": 1.532826912642431, "grad_norm": 1.29264235496521, "learning_rate": 8.623871799594769e-07, "loss": 0.02, "step": 8475 }, { "epoch": 1.5373485259540605, "grad_norm": 1.0612270832061768, "learning_rate": 8.61926689998158e-07, "loss": 0.0273, "step": 8500 }, { "epoch": 1.54187013926569, "grad_norm": 21.39075469970703, "learning_rate": 8.614662000368391e-07, "loss": 0.0326, "step": 8525 }, { "epoch": 1.5463917525773194, "grad_norm": 22.907485961914062, "learning_rate": 8.610057100755204e-07, "loss": 0.0493, "step": 8550 }, { "epoch": 1.5509133658889491, "grad_norm": 0.9095446467399597, "learning_rate": 8.605452201142015e-07, "loss": 0.0203, "step": 8575 }, { "epoch": 1.5554349792005788, "grad_norm": 19.75260353088379, "learning_rate": 8.600847301528826e-07, "loss": 0.021, "step": 8600 }, { "epoch": 1.5599565925122083, "grad_norm": 14.637717247009277, "learning_rate": 8.596242401915637e-07, "loss": 0.0392, "step": 8625 }, { "epoch": 1.5644782058238378, "grad_norm": 16.156036376953125, "learning_rate": 8.591821698286977e-07, "loss": 0.0496, "step": 8650 }, { "epoch": 1.5689998191354675, "grad_norm": 0.6795814037322998, "learning_rate": 8.587216798673789e-07, "loss": 0.031, "step": 8675 }, { "epoch": 1.5735214324470972, "grad_norm": 9.835176467895508, "learning_rate": 8.5826118990606e-07, "loss": 0.0413, "step": 8700 }, { "epoch": 1.5780430457587267, "grad_norm": 98.9009017944336, "learning_rate": 8.578006999447411e-07, "loss": 0.0764, "step": 8725 }, { "epoch": 1.5825646590703562, "grad_norm": 11.466421127319336, "learning_rate": 8.573402099834223e-07, "loss": 0.0362, "step": 8750 }, { "epoch": 1.5870862723819859, "grad_norm": 1.9184726476669312, "learning_rate": 8.568797200221036e-07, "loss": 0.0199, "step": 8775 }, { "epoch": 1.5916078856936156, "grad_norm": 1.2715715169906616, "learning_rate": 8.564192300607846e-07, "loss": 0.0273, "step": 8800 }, { "epoch": 1.596129499005245, "grad_norm": 0.3399398624897003, "learning_rate": 8.559587400994658e-07, "loss": 0.0271, "step": 8825 }, { "epoch": 1.6006511123168745, "grad_norm": 3.984431028366089, "learning_rate": 8.55498250138147e-07, "loss": 0.0395, "step": 8850 }, { "epoch": 1.6051727256285042, "grad_norm": 0.13510115444660187, "learning_rate": 8.550377601768281e-07, "loss": 0.0264, "step": 8875 }, { "epoch": 1.609694338940134, "grad_norm": 1.5098395347595215, "learning_rate": 8.545772702155093e-07, "loss": 0.0365, "step": 8900 }, { "epoch": 1.6142159522517634, "grad_norm": 1.5568790435791016, "learning_rate": 8.541167802541905e-07, "loss": 0.0348, "step": 8925 }, { "epoch": 1.6187375655633929, "grad_norm": 15.226325988769531, "learning_rate": 8.536562902928715e-07, "loss": 0.032, "step": 8950 }, { "epoch": 1.6232591788750226, "grad_norm": 4.606542587280273, "learning_rate": 8.531958003315527e-07, "loss": 0.0397, "step": 8975 }, { "epoch": 1.6277807921866523, "grad_norm": 0.1431085765361786, "learning_rate": 8.52735310370234e-07, "loss": 0.04, "step": 9000 }, { "epoch": 1.6323024054982818, "grad_norm": 8.834503173828125, "learning_rate": 8.522748204089151e-07, "loss": 0.0199, "step": 9025 }, { "epoch": 1.6368240188099112, "grad_norm": 0.43929988145828247, "learning_rate": 8.518143304475962e-07, "loss": 0.0521, "step": 9050 }, { "epoch": 1.641345632121541, "grad_norm": 0.239268958568573, "learning_rate": 8.513538404862773e-07, "loss": 0.028, "step": 9075 }, { "epoch": 1.6458672454331706, "grad_norm": 7.775051593780518, "learning_rate": 8.508933505249585e-07, "loss": 0.0435, "step": 9100 }, { "epoch": 1.6503888587448001, "grad_norm": 1.0783268213272095, "learning_rate": 8.504328605636397e-07, "loss": 0.09, "step": 9125 }, { "epoch": 1.6549104720564296, "grad_norm": 6.303003787994385, "learning_rate": 8.499723706023208e-07, "loss": 0.0371, "step": 9150 }, { "epoch": 1.6594320853680593, "grad_norm": 0.0859726294875145, "learning_rate": 8.49511880641002e-07, "loss": 0.0309, "step": 9175 }, { "epoch": 1.663953698679689, "grad_norm": 7.407548427581787, "learning_rate": 8.490513906796831e-07, "loss": 0.0267, "step": 9200 }, { "epoch": 1.6684753119913185, "grad_norm": 0.10481081902980804, "learning_rate": 8.485909007183642e-07, "loss": 0.023, "step": 9225 }, { "epoch": 1.672996925302948, "grad_norm": 4.49674129486084, "learning_rate": 8.481304107570455e-07, "loss": 0.0226, "step": 9250 }, { "epoch": 1.6775185386145777, "grad_norm": 3.7796387672424316, "learning_rate": 8.476699207957267e-07, "loss": 0.0452, "step": 9275 }, { "epoch": 1.6820401519262074, "grad_norm": 4.558553218841553, "learning_rate": 8.472094308344077e-07, "loss": 0.0302, "step": 9300 }, { "epoch": 1.6865617652378369, "grad_norm": 1.0844203233718872, "learning_rate": 8.467489408730889e-07, "loss": 0.0213, "step": 9325 }, { "epoch": 1.6910833785494663, "grad_norm": 0.6152500510215759, "learning_rate": 8.462884509117701e-07, "loss": 0.028, "step": 9350 }, { "epoch": 1.695604991861096, "grad_norm": 0.83628249168396, "learning_rate": 8.458279609504512e-07, "loss": 0.0249, "step": 9375 }, { "epoch": 1.7001266051727257, "grad_norm": 4.9916157722473145, "learning_rate": 8.453674709891324e-07, "loss": 0.0326, "step": 9400 }, { "epoch": 1.7046482184843552, "grad_norm": 0.2095576971769333, "learning_rate": 8.449069810278136e-07, "loss": 0.0494, "step": 9425 }, { "epoch": 1.7091698317959847, "grad_norm": 16.670848846435547, "learning_rate": 8.444464910664947e-07, "loss": 0.0248, "step": 9450 }, { "epoch": 1.7136914451076144, "grad_norm": 1.0165280103683472, "learning_rate": 8.439860011051758e-07, "loss": 0.0371, "step": 9475 }, { "epoch": 1.718213058419244, "grad_norm": 19.8568115234375, "learning_rate": 8.435255111438571e-07, "loss": 0.03, "step": 9500 }, { "epoch": 1.7227346717308736, "grad_norm": 29.754867553710938, "learning_rate": 8.430650211825382e-07, "loss": 0.0836, "step": 9525 }, { "epoch": 1.727256285042503, "grad_norm": 10.585617065429688, "learning_rate": 8.426045312212193e-07, "loss": 0.0253, "step": 9550 }, { "epoch": 1.7317778983541328, "grad_norm": 0.41567254066467285, "learning_rate": 8.421440412599005e-07, "loss": 0.0419, "step": 9575 }, { "epoch": 1.7362995116657625, "grad_norm": 14.9446439743042, "learning_rate": 8.416835512985817e-07, "loss": 0.0346, "step": 9600 }, { "epoch": 1.740821124977392, "grad_norm": 11.810590744018555, "learning_rate": 8.412230613372628e-07, "loss": 0.0399, "step": 9625 }, { "epoch": 1.7453427382890214, "grad_norm": 4.900815486907959, "learning_rate": 8.40762571375944e-07, "loss": 0.0328, "step": 9650 }, { "epoch": 1.7498643516006511, "grad_norm": 43.67582702636719, "learning_rate": 8.403020814146251e-07, "loss": 0.0329, "step": 9675 }, { "epoch": 1.7543859649122808, "grad_norm": 9.560347557067871, "learning_rate": 8.398415914533063e-07, "loss": 0.0455, "step": 9700 }, { "epoch": 1.7589075782239103, "grad_norm": 20.836200714111328, "learning_rate": 8.393811014919875e-07, "loss": 0.0347, "step": 9725 }, { "epoch": 1.7634291915355398, "grad_norm": 1.0109299421310425, "learning_rate": 8.389206115306686e-07, "loss": 0.0171, "step": 9750 }, { "epoch": 1.7679508048471695, "grad_norm": 15.896967887878418, "learning_rate": 8.384601215693498e-07, "loss": 0.0558, "step": 9775 }, { "epoch": 1.7724724181587992, "grad_norm": 8.316397666931152, "learning_rate": 8.379996316080308e-07, "loss": 0.042, "step": 9800 }, { "epoch": 1.7769940314704287, "grad_norm": 2.5963985919952393, "learning_rate": 8.37539141646712e-07, "loss": 0.051, "step": 9825 }, { "epoch": 1.7815156447820581, "grad_norm": 0.8551808595657349, "learning_rate": 8.370786516853933e-07, "loss": 0.0379, "step": 9850 }, { "epoch": 1.7860372580936879, "grad_norm": 7.102266311645508, "learning_rate": 8.366181617240745e-07, "loss": 0.0252, "step": 9875 }, { "epoch": 1.7905588714053176, "grad_norm": 1.0050630569458008, "learning_rate": 8.361576717627555e-07, "loss": 0.0523, "step": 9900 }, { "epoch": 1.795080484716947, "grad_norm": 0.23266096413135529, "learning_rate": 8.356971818014367e-07, "loss": 0.0771, "step": 9925 }, { "epoch": 1.7996020980285765, "grad_norm": 2.397165298461914, "learning_rate": 8.352366918401178e-07, "loss": 0.0383, "step": 9950 }, { "epoch": 1.8041237113402062, "grad_norm": 3.823277711868286, "learning_rate": 8.34776201878799e-07, "loss": 0.0244, "step": 9975 }, { "epoch": 1.808645324651836, "grad_norm": 0.4726651906967163, "learning_rate": 8.343157119174802e-07, "loss": 0.0263, "step": 10000 }, { "epoch": 1.8131669379634654, "grad_norm": 1.6218931674957275, "learning_rate": 8.338552219561613e-07, "loss": 0.0141, "step": 10025 }, { "epoch": 1.8176885512750949, "grad_norm": 10.145578384399414, "learning_rate": 8.333947319948424e-07, "loss": 0.026, "step": 10050 }, { "epoch": 1.8222101645867246, "grad_norm": 10.49772834777832, "learning_rate": 8.329342420335236e-07, "loss": 0.0373, "step": 10075 }, { "epoch": 1.8267317778983543, "grad_norm": 25.12136459350586, "learning_rate": 8.324737520722048e-07, "loss": 0.0335, "step": 10100 }, { "epoch": 1.8312533912099838, "grad_norm": 0.4937836229801178, "learning_rate": 8.32013262110886e-07, "loss": 0.038, "step": 10125 }, { "epoch": 1.8357750045216132, "grad_norm": 0.8513910174369812, "learning_rate": 8.315527721495671e-07, "loss": 0.0258, "step": 10150 }, { "epoch": 1.840296617833243, "grad_norm": 16.558271408081055, "learning_rate": 8.310922821882482e-07, "loss": 0.0564, "step": 10175 }, { "epoch": 1.8448182311448726, "grad_norm": 4.88476037979126, "learning_rate": 8.306317922269294e-07, "loss": 0.0285, "step": 10200 }, { "epoch": 1.8493398444565021, "grad_norm": 4.210046291351318, "learning_rate": 8.301713022656106e-07, "loss": 0.0293, "step": 10225 }, { "epoch": 1.8538614577681316, "grad_norm": 11.491304397583008, "learning_rate": 8.297108123042917e-07, "loss": 0.0585, "step": 10250 }, { "epoch": 1.8583830710797613, "grad_norm": 11.61664867401123, "learning_rate": 8.292503223429729e-07, "loss": 0.0506, "step": 10275 }, { "epoch": 1.862904684391391, "grad_norm": 2.719242572784424, "learning_rate": 8.28789832381654e-07, "loss": 0.0484, "step": 10300 }, { "epoch": 1.8674262977030205, "grad_norm": 1.5411864519119263, "learning_rate": 8.283293424203352e-07, "loss": 0.0472, "step": 10325 }, { "epoch": 1.87194791101465, "grad_norm": 9.415594100952148, "learning_rate": 8.278688524590164e-07, "loss": 0.0792, "step": 10350 }, { "epoch": 1.8764695243262797, "grad_norm": 7.970459938049316, "learning_rate": 8.274083624976976e-07, "loss": 0.0345, "step": 10375 }, { "epoch": 1.8809911376379094, "grad_norm": 1.801161289215088, "learning_rate": 8.269478725363786e-07, "loss": 0.0373, "step": 10400 }, { "epoch": 1.8855127509495389, "grad_norm": 0.596969485282898, "learning_rate": 8.264873825750598e-07, "loss": 0.0399, "step": 10425 }, { "epoch": 1.8900343642611683, "grad_norm": 2.700634717941284, "learning_rate": 8.260268926137411e-07, "loss": 0.0269, "step": 10450 }, { "epoch": 1.8945559775727978, "grad_norm": 5.090729713439941, "learning_rate": 8.255664026524221e-07, "loss": 0.026, "step": 10475 }, { "epoch": 1.8990775908844275, "grad_norm": 2.83105206489563, "learning_rate": 8.251059126911033e-07, "loss": 0.0266, "step": 10500 }, { "epoch": 1.9035992041960572, "grad_norm": 0.7878080010414124, "learning_rate": 8.246454227297845e-07, "loss": 0.0449, "step": 10525 }, { "epoch": 1.9081208175076867, "grad_norm": 14.082945823669434, "learning_rate": 8.241849327684655e-07, "loss": 0.027, "step": 10550 }, { "epoch": 1.9126424308193162, "grad_norm": 0.5400319695472717, "learning_rate": 8.237244428071468e-07, "loss": 0.0546, "step": 10575 }, { "epoch": 1.9171640441309459, "grad_norm": 17.12287712097168, "learning_rate": 8.23263952845828e-07, "loss": 0.0318, "step": 10600 }, { "epoch": 1.9216856574425756, "grad_norm": 4.993497848510742, "learning_rate": 8.228034628845091e-07, "loss": 0.0428, "step": 10625 }, { "epoch": 1.926207270754205, "grad_norm": 31.576929092407227, "learning_rate": 8.223429729231902e-07, "loss": 0.0434, "step": 10650 }, { "epoch": 1.9307288840658345, "grad_norm": 1.9357125759124756, "learning_rate": 8.218824829618713e-07, "loss": 0.033, "step": 10675 }, { "epoch": 1.9352504973774642, "grad_norm": 0.617363691329956, "learning_rate": 8.214219930005526e-07, "loss": 0.0639, "step": 10700 }, { "epoch": 1.939772110689094, "grad_norm": 0.753123939037323, "learning_rate": 8.209615030392337e-07, "loss": 0.0357, "step": 10725 }, { "epoch": 1.9442937240007234, "grad_norm": 0.2875419855117798, "learning_rate": 8.205010130779148e-07, "loss": 0.0286, "step": 10750 }, { "epoch": 1.948815337312353, "grad_norm": 1.6829754114151, "learning_rate": 8.20040523116596e-07, "loss": 0.0285, "step": 10775 }, { "epoch": 1.9533369506239826, "grad_norm": 1.1577789783477783, "learning_rate": 8.195800331552772e-07, "loss": 0.018, "step": 10800 }, { "epoch": 1.9578585639356123, "grad_norm": 9.512741088867188, "learning_rate": 8.191195431939583e-07, "loss": 0.031, "step": 10825 }, { "epoch": 1.9623801772472418, "grad_norm": 2.2050418853759766, "learning_rate": 8.186590532326395e-07, "loss": 0.0314, "step": 10850 }, { "epoch": 1.9669017905588713, "grad_norm": 15.566596984863281, "learning_rate": 8.181985632713207e-07, "loss": 0.0376, "step": 10875 }, { "epoch": 1.971423403870501, "grad_norm": 8.69605827331543, "learning_rate": 8.177380733100017e-07, "loss": 0.0321, "step": 10900 }, { "epoch": 1.9759450171821307, "grad_norm": 8.651535987854004, "learning_rate": 8.172775833486829e-07, "loss": 0.0212, "step": 10925 }, { "epoch": 1.9804666304937601, "grad_norm": 1.8405441045761108, "learning_rate": 8.168170933873642e-07, "loss": 0.032, "step": 10950 }, { "epoch": 1.9849882438053896, "grad_norm": 15.064764976501465, "learning_rate": 8.163566034260452e-07, "loss": 0.0133, "step": 10975 }, { "epoch": 1.9895098571170193, "grad_norm": 0.0899849534034729, "learning_rate": 8.158961134647264e-07, "loss": 0.0469, "step": 11000 }, { "epoch": 1.994031470428649, "grad_norm": 18.745182037353516, "learning_rate": 8.154540431018604e-07, "loss": 0.0688, "step": 11025 }, { "epoch": 1.9985530837402785, "grad_norm": 0.08827279508113861, "learning_rate": 8.149935531405416e-07, "loss": 0.0788, "step": 11050 }, { "epoch": 2.0, "eval_loss": 0.22891011834144592, "eval_runtime": 8887.2881, "eval_samples_per_second": 1.068, "eval_steps_per_second": 0.134, "eval_wer": 0.10608584240871237, "step": 11058 }, { "epoch": 2.003074697051908, "grad_norm": 1.0333837270736694, "learning_rate": 8.145330631792226e-07, "loss": 0.0349, "step": 11075 }, { "epoch": 2.007596310363538, "grad_norm": 1.1579056978225708, "learning_rate": 8.140725732179038e-07, "loss": 0.0172, "step": 11100 }, { "epoch": 2.0121179236751674, "grad_norm": 0.37552839517593384, "learning_rate": 8.136120832565849e-07, "loss": 0.0151, "step": 11125 }, { "epoch": 2.016639536986797, "grad_norm": 23.273653030395508, "learning_rate": 8.131515932952662e-07, "loss": 0.0298, "step": 11150 }, { "epoch": 2.0211611502984264, "grad_norm": 8.169917106628418, "learning_rate": 8.126911033339473e-07, "loss": 0.0215, "step": 11175 }, { "epoch": 2.0256827636100563, "grad_norm": 1.1066598892211914, "learning_rate": 8.122306133726284e-07, "loss": 0.0222, "step": 11200 }, { "epoch": 2.0302043769216858, "grad_norm": 0.5222472548484802, "learning_rate": 8.117701234113096e-07, "loss": 0.0125, "step": 11225 }, { "epoch": 2.0347259902333152, "grad_norm": 0.707737922668457, "learning_rate": 8.113096334499907e-07, "loss": 0.0275, "step": 11250 }, { "epoch": 2.0392476035449447, "grad_norm": 2.295354127883911, "learning_rate": 8.108491434886719e-07, "loss": 0.023, "step": 11275 }, { "epoch": 2.0437692168565746, "grad_norm": 1.3817616701126099, "learning_rate": 8.103886535273531e-07, "loss": 0.0125, "step": 11300 }, { "epoch": 2.048290830168204, "grad_norm": 2.0756027698516846, "learning_rate": 8.099281635660342e-07, "loss": 0.0187, "step": 11325 }, { "epoch": 2.0528124434798336, "grad_norm": 0.05818900838494301, "learning_rate": 8.094676736047153e-07, "loss": 0.0278, "step": 11350 }, { "epoch": 2.057334056791463, "grad_norm": 6.99597692489624, "learning_rate": 8.090071836433965e-07, "loss": 0.0241, "step": 11375 }, { "epoch": 2.0618556701030926, "grad_norm": 0.15094341337680817, "learning_rate": 8.085466936820778e-07, "loss": 0.0223, "step": 11400 }, { "epoch": 2.0663772834147225, "grad_norm": 0.17221419513225555, "learning_rate": 8.080862037207588e-07, "loss": 0.0257, "step": 11425 }, { "epoch": 2.070898896726352, "grad_norm": 0.5105612277984619, "learning_rate": 8.0762571375944e-07, "loss": 0.0523, "step": 11450 }, { "epoch": 2.0754205100379814, "grad_norm": 1.4951982498168945, "learning_rate": 8.071652237981212e-07, "loss": 0.0247, "step": 11475 }, { "epoch": 2.079942123349611, "grad_norm": 5.619513988494873, "learning_rate": 8.067047338368023e-07, "loss": 0.0181, "step": 11500 }, { "epoch": 2.084463736661241, "grad_norm": 21.839784622192383, "learning_rate": 8.062442438754835e-07, "loss": 0.0221, "step": 11525 }, { "epoch": 2.0889853499728703, "grad_norm": 4.323668003082275, "learning_rate": 8.057837539141647e-07, "loss": 0.0296, "step": 11550 }, { "epoch": 2.0935069632845, "grad_norm": 0.17072859406471252, "learning_rate": 8.053232639528457e-07, "loss": 0.0196, "step": 11575 }, { "epoch": 2.0980285765961293, "grad_norm": 13.157007217407227, "learning_rate": 8.048627739915269e-07, "loss": 0.0238, "step": 11600 }, { "epoch": 2.102550189907759, "grad_norm": 0.13449828326702118, "learning_rate": 8.044022840302082e-07, "loss": 0.0157, "step": 11625 }, { "epoch": 2.1070718032193887, "grad_norm": 9.773667335510254, "learning_rate": 8.039417940688893e-07, "loss": 0.0167, "step": 11650 }, { "epoch": 2.111593416531018, "grad_norm": 0.01844405196607113, "learning_rate": 8.034813041075704e-07, "loss": 0.0062, "step": 11675 }, { "epoch": 2.1161150298426477, "grad_norm": 2.560438394546509, "learning_rate": 8.030208141462516e-07, "loss": 0.0193, "step": 11700 }, { "epoch": 2.1206366431542776, "grad_norm": 0.5435400605201721, "learning_rate": 8.025603241849327e-07, "loss": 0.0203, "step": 11725 }, { "epoch": 2.125158256465907, "grad_norm": 63.197235107421875, "learning_rate": 8.020998342236139e-07, "loss": 0.0358, "step": 11750 }, { "epoch": 2.1296798697775365, "grad_norm": 1.3490138053894043, "learning_rate": 8.016393442622951e-07, "loss": 0.013, "step": 11775 }, { "epoch": 2.134201483089166, "grad_norm": 34.209476470947266, "learning_rate": 8.011788543009762e-07, "loss": 0.0351, "step": 11800 }, { "epoch": 2.138723096400796, "grad_norm": 0.7151913642883301, "learning_rate": 8.007183643396574e-07, "loss": 0.0258, "step": 11825 }, { "epoch": 2.1432447097124254, "grad_norm": 0.9076653718948364, "learning_rate": 8.002578743783384e-07, "loss": 0.0662, "step": 11850 }, { "epoch": 2.147766323024055, "grad_norm": 0.8418449759483337, "learning_rate": 7.997973844170197e-07, "loss": 0.0301, "step": 11875 }, { "epoch": 2.1522879363356844, "grad_norm": 0.5430082082748413, "learning_rate": 7.993368944557009e-07, "loss": 0.0166, "step": 11900 }, { "epoch": 2.1568095496473143, "grad_norm": 0.6853590607643127, "learning_rate": 7.988764044943819e-07, "loss": 0.0148, "step": 11925 }, { "epoch": 2.161331162958944, "grad_norm": 0.5972227454185486, "learning_rate": 7.984159145330631e-07, "loss": 0.0177, "step": 11950 }, { "epoch": 2.1658527762705733, "grad_norm": 2.1670873165130615, "learning_rate": 7.979554245717443e-07, "loss": 0.0274, "step": 11975 }, { "epoch": 2.1703743895822027, "grad_norm": 18.318012237548828, "learning_rate": 7.974949346104254e-07, "loss": 0.029, "step": 12000 }, { "epoch": 2.1748960028938327, "grad_norm": 0.31500276923179626, "learning_rate": 7.970344446491066e-07, "loss": 0.012, "step": 12025 }, { "epoch": 2.179417616205462, "grad_norm": 0.21801598370075226, "learning_rate": 7.965739546877878e-07, "loss": 0.0121, "step": 12050 }, { "epoch": 2.1839392295170916, "grad_norm": 1.37786865234375, "learning_rate": 7.961134647264689e-07, "loss": 0.0066, "step": 12075 }, { "epoch": 2.188460842828721, "grad_norm": 32.29001998901367, "learning_rate": 7.9565297476515e-07, "loss": 0.0341, "step": 12100 }, { "epoch": 2.192982456140351, "grad_norm": 2.302133083343506, "learning_rate": 7.951924848038313e-07, "loss": 0.0188, "step": 12125 }, { "epoch": 2.1975040694519805, "grad_norm": 14.034008979797363, "learning_rate": 7.947319948425124e-07, "loss": 0.0117, "step": 12150 }, { "epoch": 2.20202568276361, "grad_norm": 0.3767974078655243, "learning_rate": 7.942715048811935e-07, "loss": 0.0259, "step": 12175 }, { "epoch": 2.2065472960752395, "grad_norm": 0.26384684443473816, "learning_rate": 7.938110149198747e-07, "loss": 0.0371, "step": 12200 }, { "epoch": 2.2110689093868694, "grad_norm": 8.888740539550781, "learning_rate": 7.933505249585559e-07, "loss": 0.0336, "step": 12225 }, { "epoch": 2.215590522698499, "grad_norm": 0.19948595762252808, "learning_rate": 7.928900349972371e-07, "loss": 0.0474, "step": 12250 }, { "epoch": 2.2201121360101284, "grad_norm": 2.2132930755615234, "learning_rate": 7.924295450359182e-07, "loss": 0.0406, "step": 12275 }, { "epoch": 2.224633749321758, "grad_norm": 11.718713760375977, "learning_rate": 7.919690550745993e-07, "loss": 0.0195, "step": 12300 }, { "epoch": 2.2291553626333878, "grad_norm": 5.642462730407715, "learning_rate": 7.915085651132805e-07, "loss": 0.0187, "step": 12325 }, { "epoch": 2.2336769759450172, "grad_norm": 4.084228992462158, "learning_rate": 7.910480751519617e-07, "loss": 0.0126, "step": 12350 }, { "epoch": 2.2381985892566467, "grad_norm": 1.1005765199661255, "learning_rate": 7.905875851906428e-07, "loss": 0.0167, "step": 12375 }, { "epoch": 2.242720202568276, "grad_norm": 9.070086479187012, "learning_rate": 7.90127095229324e-07, "loss": 0.0182, "step": 12400 }, { "epoch": 2.247241815879906, "grad_norm": 15.3062162399292, "learning_rate": 7.896666052680051e-07, "loss": 0.0156, "step": 12425 }, { "epoch": 2.2517634291915356, "grad_norm": 0.09264446794986725, "learning_rate": 7.892061153066862e-07, "loss": 0.0237, "step": 12450 }, { "epoch": 2.256285042503165, "grad_norm": 3.3248021602630615, "learning_rate": 7.887456253453675e-07, "loss": 0.0139, "step": 12475 }, { "epoch": 2.2608066558147946, "grad_norm": 3.0979135036468506, "learning_rate": 7.882851353840487e-07, "loss": 0.0063, "step": 12500 }, { "epoch": 2.2653282691264245, "grad_norm": 11.612130165100098, "learning_rate": 7.878246454227297e-07, "loss": 0.04, "step": 12525 }, { "epoch": 2.269849882438054, "grad_norm": 4.221678256988525, "learning_rate": 7.873641554614109e-07, "loss": 0.0393, "step": 12550 }, { "epoch": 2.2743714957496834, "grad_norm": 11.065829277038574, "learning_rate": 7.869036655000921e-07, "loss": 0.0286, "step": 12575 }, { "epoch": 2.278893109061313, "grad_norm": 1.366445779800415, "learning_rate": 7.864431755387732e-07, "loss": 0.028, "step": 12600 }, { "epoch": 2.283414722372943, "grad_norm": 0.7951880693435669, "learning_rate": 7.859826855774544e-07, "loss": 0.0661, "step": 12625 }, { "epoch": 2.2879363356845723, "grad_norm": 10.447066307067871, "learning_rate": 7.855221956161356e-07, "loss": 0.0484, "step": 12650 }, { "epoch": 2.292457948996202, "grad_norm": 0.32853183150291443, "learning_rate": 7.850617056548166e-07, "loss": 0.0239, "step": 12675 }, { "epoch": 2.2969795623078313, "grad_norm": 2.0133612155914307, "learning_rate": 7.846012156934978e-07, "loss": 0.0106, "step": 12700 }, { "epoch": 2.301501175619461, "grad_norm": 1.5879937410354614, "learning_rate": 7.841407257321791e-07, "loss": 0.0151, "step": 12725 }, { "epoch": 2.3060227889310907, "grad_norm": 3.2537899017333984, "learning_rate": 7.836802357708602e-07, "loss": 0.021, "step": 12750 }, { "epoch": 2.31054440224272, "grad_norm": 0.4321633577346802, "learning_rate": 7.832197458095413e-07, "loss": 0.0135, "step": 12775 }, { "epoch": 2.3150660155543497, "grad_norm": 17.50613021850586, "learning_rate": 7.827592558482224e-07, "loss": 0.024, "step": 12800 }, { "epoch": 2.319587628865979, "grad_norm": 10.587005615234375, "learning_rate": 7.822987658869036e-07, "loss": 0.0155, "step": 12825 }, { "epoch": 2.324109242177609, "grad_norm": 1.703659176826477, "learning_rate": 7.818382759255848e-07, "loss": 0.0204, "step": 12850 }, { "epoch": 2.3286308554892385, "grad_norm": 0.26982223987579346, "learning_rate": 7.813777859642659e-07, "loss": 0.0087, "step": 12875 }, { "epoch": 2.333152468800868, "grad_norm": 4.538456916809082, "learning_rate": 7.809172960029471e-07, "loss": 0.0177, "step": 12900 }, { "epoch": 2.337674082112498, "grad_norm": 5.056499481201172, "learning_rate": 7.804568060416282e-07, "loss": 0.0163, "step": 12925 }, { "epoch": 2.3421956954241274, "grad_norm": 0.5478576421737671, "learning_rate": 7.799963160803094e-07, "loss": 0.0149, "step": 12950 }, { "epoch": 2.346717308735757, "grad_norm": 1.6986396312713623, "learning_rate": 7.795358261189906e-07, "loss": 0.0341, "step": 12975 }, { "epoch": 2.3512389220473864, "grad_norm": 0.026538992300629616, "learning_rate": 7.790753361576718e-07, "loss": 0.0143, "step": 13000 }, { "epoch": 2.355760535359016, "grad_norm": 0.38667038083076477, "learning_rate": 7.786148461963528e-07, "loss": 0.0079, "step": 13025 }, { "epoch": 2.360282148670646, "grad_norm": 15.939850807189941, "learning_rate": 7.78154356235034e-07, "loss": 0.0372, "step": 13050 }, { "epoch": 2.3648037619822753, "grad_norm": 18.573139190673828, "learning_rate": 7.776938662737153e-07, "loss": 0.0441, "step": 13075 }, { "epoch": 2.3693253752939047, "grad_norm": 3.2550344467163086, "learning_rate": 7.772333763123963e-07, "loss": 0.027, "step": 13100 }, { "epoch": 2.3738469886055347, "grad_norm": 1.5440771579742432, "learning_rate": 7.767728863510775e-07, "loss": 0.0277, "step": 13125 }, { "epoch": 2.378368601917164, "grad_norm": 1.0547950267791748, "learning_rate": 7.763123963897587e-07, "loss": 0.0278, "step": 13150 }, { "epoch": 2.3828902152287936, "grad_norm": 0.08282533288002014, "learning_rate": 7.758519064284398e-07, "loss": 0.0101, "step": 13175 }, { "epoch": 2.387411828540423, "grad_norm": 0.24389539659023285, "learning_rate": 7.75391416467121e-07, "loss": 0.0233, "step": 13200 }, { "epoch": 2.3919334418520526, "grad_norm": 9.251720428466797, "learning_rate": 7.749309265058022e-07, "loss": 0.047, "step": 13225 }, { "epoch": 2.3964550551636825, "grad_norm": 2.2844269275665283, "learning_rate": 7.744704365444833e-07, "loss": 0.0162, "step": 13250 }, { "epoch": 2.400976668475312, "grad_norm": 3.2137227058410645, "learning_rate": 7.740099465831644e-07, "loss": 0.0121, "step": 13275 }, { "epoch": 2.4054982817869415, "grad_norm": 11.308737754821777, "learning_rate": 7.735494566218456e-07, "loss": 0.0299, "step": 13300 }, { "epoch": 2.4100198950985714, "grad_norm": 3.3836469650268555, "learning_rate": 7.730889666605268e-07, "loss": 0.0133, "step": 13325 }, { "epoch": 2.414541508410201, "grad_norm": 0.13357259333133698, "learning_rate": 7.726284766992079e-07, "loss": 0.0223, "step": 13350 }, { "epoch": 2.4190631217218304, "grad_norm": 0.5216515064239502, "learning_rate": 7.721864063363418e-07, "loss": 0.0212, "step": 13375 }, { "epoch": 2.42358473503346, "grad_norm": 4.0334320068359375, "learning_rate": 7.71725916375023e-07, "loss": 0.0367, "step": 13400 }, { "epoch": 2.4281063483450893, "grad_norm": 8.48493766784668, "learning_rate": 7.712654264137042e-07, "loss": 0.0254, "step": 13425 }, { "epoch": 2.4326279616567192, "grad_norm": 1.6405227184295654, "learning_rate": 7.708049364523853e-07, "loss": 0.0331, "step": 13450 }, { "epoch": 2.4371495749683487, "grad_norm": 13.649563789367676, "learning_rate": 7.703444464910664e-07, "loss": 0.018, "step": 13475 }, { "epoch": 2.441671188279978, "grad_norm": 0.7964933514595032, "learning_rate": 7.698839565297476e-07, "loss": 0.0235, "step": 13500 }, { "epoch": 2.446192801591608, "grad_norm": 0.13087065517902374, "learning_rate": 7.694234665684289e-07, "loss": 0.0215, "step": 13525 }, { "epoch": 2.4507144149032376, "grad_norm": 5.35853385925293, "learning_rate": 7.689629766071099e-07, "loss": 0.0178, "step": 13550 }, { "epoch": 2.455236028214867, "grad_norm": 3.688849687576294, "learning_rate": 7.685024866457911e-07, "loss": 0.0099, "step": 13575 }, { "epoch": 2.4597576415264966, "grad_norm": 0.2602083384990692, "learning_rate": 7.680419966844723e-07, "loss": 0.0205, "step": 13600 }, { "epoch": 2.464279254838126, "grad_norm": 0.10222572088241577, "learning_rate": 7.675815067231533e-07, "loss": 0.012, "step": 13625 }, { "epoch": 2.468800868149756, "grad_norm": 1.9992151260375977, "learning_rate": 7.671210167618346e-07, "loss": 0.0295, "step": 13650 }, { "epoch": 2.4733224814613854, "grad_norm": 3.8986308574676514, "learning_rate": 7.666605268005158e-07, "loss": 0.0447, "step": 13675 }, { "epoch": 2.477844094773015, "grad_norm": 2.5787339210510254, "learning_rate": 7.662000368391968e-07, "loss": 0.0233, "step": 13700 }, { "epoch": 2.482365708084645, "grad_norm": 3.943392276763916, "learning_rate": 7.65739546877878e-07, "loss": 0.0214, "step": 13725 }, { "epoch": 2.4868873213962743, "grad_norm": 4.015535831451416, "learning_rate": 7.652790569165592e-07, "loss": 0.0146, "step": 13750 }, { "epoch": 2.491408934707904, "grad_norm": 1.5608233213424683, "learning_rate": 7.648185669552404e-07, "loss": 0.0171, "step": 13775 }, { "epoch": 2.4959305480195333, "grad_norm": 1.696368932723999, "learning_rate": 7.643580769939215e-07, "loss": 0.0122, "step": 13800 }, { "epoch": 2.5004521613311628, "grad_norm": 1.1217238903045654, "learning_rate": 7.638975870326027e-07, "loss": 0.0128, "step": 13825 }, { "epoch": 2.5049737746427927, "grad_norm": 40.497745513916016, "learning_rate": 7.634370970712838e-07, "loss": 0.0277, "step": 13850 }, { "epoch": 2.509495387954422, "grad_norm": 6.063665390014648, "learning_rate": 7.629766071099649e-07, "loss": 0.0118, "step": 13875 }, { "epoch": 2.5140170012660517, "grad_norm": 8.675702095031738, "learning_rate": 7.625161171486462e-07, "loss": 0.0259, "step": 13900 }, { "epoch": 2.5185386145776816, "grad_norm": 0.8335000872612, "learning_rate": 7.620556271873273e-07, "loss": 0.0147, "step": 13925 }, { "epoch": 2.523060227889311, "grad_norm": 8.890750885009766, "learning_rate": 7.615951372260084e-07, "loss": 0.0226, "step": 13950 }, { "epoch": 2.5275818412009405, "grad_norm": 0.20721301436424255, "learning_rate": 7.611346472646895e-07, "loss": 0.0103, "step": 13975 }, { "epoch": 2.53210345451257, "grad_norm": 0.5705264806747437, "learning_rate": 7.606741573033707e-07, "loss": 0.0094, "step": 14000 }, { "epoch": 2.5366250678241995, "grad_norm": 0.07163272053003311, "learning_rate": 7.60213667342052e-07, "loss": 0.0177, "step": 14025 }, { "epoch": 2.5411466811358294, "grad_norm": 0.8082312345504761, "learning_rate": 7.59753177380733e-07, "loss": 0.0177, "step": 14050 }, { "epoch": 2.545668294447459, "grad_norm": 0.3273601830005646, "learning_rate": 7.592926874194142e-07, "loss": 0.0429, "step": 14075 }, { "epoch": 2.5501899077590884, "grad_norm": 1.8662065267562866, "learning_rate": 7.588321974580954e-07, "loss": 0.0163, "step": 14100 }, { "epoch": 2.5547115210707183, "grad_norm": 0.7974827289581299, "learning_rate": 7.583717074967764e-07, "loss": 0.0159, "step": 14125 }, { "epoch": 2.559233134382348, "grad_norm": 12.264116287231445, "learning_rate": 7.579112175354577e-07, "loss": 0.0329, "step": 14150 }, { "epoch": 2.5637547476939773, "grad_norm": 0.07791896164417267, "learning_rate": 7.574507275741389e-07, "loss": 0.0214, "step": 14175 }, { "epoch": 2.5682763610056067, "grad_norm": 0.7379089593887329, "learning_rate": 7.5699023761282e-07, "loss": 0.0366, "step": 14200 }, { "epoch": 2.5727979743172362, "grad_norm": 91.46224975585938, "learning_rate": 7.565297476515011e-07, "loss": 0.027, "step": 14225 }, { "epoch": 2.5773195876288657, "grad_norm": 14.806313514709473, "learning_rate": 7.560692576901824e-07, "loss": 0.0488, "step": 14250 }, { "epoch": 2.5818412009404956, "grad_norm": 2.083322286605835, "learning_rate": 7.556087677288635e-07, "loss": 0.031, "step": 14275 }, { "epoch": 2.586362814252125, "grad_norm": 4.879816055297852, "learning_rate": 7.551482777675446e-07, "loss": 0.0153, "step": 14300 }, { "epoch": 2.5908844275637546, "grad_norm": 6.237574100494385, "learning_rate": 7.546877878062258e-07, "loss": 0.0141, "step": 14325 }, { "epoch": 2.5954060408753845, "grad_norm": 0.12708225846290588, "learning_rate": 7.542272978449069e-07, "loss": 0.0101, "step": 14350 }, { "epoch": 2.599927654187014, "grad_norm": 1.2891823053359985, "learning_rate": 7.537668078835881e-07, "loss": 0.0215, "step": 14375 }, { "epoch": 2.6044492674986435, "grad_norm": 0.09198635071516037, "learning_rate": 7.533063179222693e-07, "loss": 0.0202, "step": 14400 }, { "epoch": 2.608970880810273, "grad_norm": 0.42182183265686035, "learning_rate": 7.528458279609504e-07, "loss": 0.0115, "step": 14425 }, { "epoch": 2.6134924941219024, "grad_norm": 0.22351473569869995, "learning_rate": 7.523853379996316e-07, "loss": 0.0133, "step": 14450 }, { "epoch": 2.6180141074335324, "grad_norm": 6.165104389190674, "learning_rate": 7.519248480383127e-07, "loss": 0.0381, "step": 14475 }, { "epoch": 2.622535720745162, "grad_norm": 4.592835903167725, "learning_rate": 7.514643580769939e-07, "loss": 0.0118, "step": 14500 }, { "epoch": 2.6270573340567913, "grad_norm": 13.439335823059082, "learning_rate": 7.510038681156751e-07, "loss": 0.0324, "step": 14525 }, { "epoch": 2.6315789473684212, "grad_norm": 3.1381280422210693, "learning_rate": 7.505433781543562e-07, "loss": 0.0383, "step": 14550 }, { "epoch": 2.6361005606800507, "grad_norm": 3.8765087127685547, "learning_rate": 7.500828881930373e-07, "loss": 0.03, "step": 14575 }, { "epoch": 2.64062217399168, "grad_norm": 12.755854606628418, "learning_rate": 7.496223982317185e-07, "loss": 0.0356, "step": 14600 }, { "epoch": 2.6451437873033097, "grad_norm": 48.162261962890625, "learning_rate": 7.491619082703998e-07, "loss": 0.0244, "step": 14625 }, { "epoch": 2.649665400614939, "grad_norm": 55.78163528442383, "learning_rate": 7.487014183090808e-07, "loss": 0.0495, "step": 14650 }, { "epoch": 2.654187013926569, "grad_norm": 4.578949451446533, "learning_rate": 7.48240928347762e-07, "loss": 0.0292, "step": 14675 }, { "epoch": 2.6587086272381986, "grad_norm": 6.509306907653809, "learning_rate": 7.477804383864432e-07, "loss": 0.0193, "step": 14700 }, { "epoch": 2.663230240549828, "grad_norm": 4.97738790512085, "learning_rate": 7.473199484251242e-07, "loss": 0.0223, "step": 14725 }, { "epoch": 2.667751853861458, "grad_norm": 7.346843242645264, "learning_rate": 7.468594584638055e-07, "loss": 0.0244, "step": 14750 }, { "epoch": 2.6722734671730874, "grad_norm": 0.34532052278518677, "learning_rate": 7.463989685024867e-07, "loss": 0.0158, "step": 14775 }, { "epoch": 2.676795080484717, "grad_norm": 1.6974883079528809, "learning_rate": 7.459384785411677e-07, "loss": 0.0172, "step": 14800 }, { "epoch": 2.6813166937963464, "grad_norm": 4.6452507972717285, "learning_rate": 7.454779885798489e-07, "loss": 0.034, "step": 14825 }, { "epoch": 2.685838307107976, "grad_norm": 40.83687973022461, "learning_rate": 7.4501749861853e-07, "loss": 0.0238, "step": 14850 }, { "epoch": 2.690359920419606, "grad_norm": 0.27361202239990234, "learning_rate": 7.445570086572113e-07, "loss": 0.0217, "step": 14875 }, { "epoch": 2.6948815337312353, "grad_norm": 0.088538758456707, "learning_rate": 7.440965186958924e-07, "loss": 0.0216, "step": 14900 }, { "epoch": 2.6994031470428648, "grad_norm": 0.36908406019210815, "learning_rate": 7.436360287345735e-07, "loss": 0.0163, "step": 14925 }, { "epoch": 2.7039247603544947, "grad_norm": 0.3927018344402313, "learning_rate": 7.431755387732547e-07, "loss": 0.0272, "step": 14950 }, { "epoch": 2.708446373666124, "grad_norm": 0.08495494723320007, "learning_rate": 7.427150488119359e-07, "loss": 0.0321, "step": 14975 }, { "epoch": 2.7129679869777537, "grad_norm": 16.534454345703125, "learning_rate": 7.42254558850617e-07, "loss": 0.0346, "step": 15000 }, { "epoch": 2.717489600289383, "grad_norm": 10.199457168579102, "learning_rate": 7.417940688892982e-07, "loss": 0.0236, "step": 15025 }, { "epoch": 2.7220112136010126, "grad_norm": 12.526575088500977, "learning_rate": 7.413335789279793e-07, "loss": 0.0344, "step": 15050 }, { "epoch": 2.7265328269126425, "grad_norm": 0.6113793253898621, "learning_rate": 7.408730889666604e-07, "loss": 0.0549, "step": 15075 }, { "epoch": 2.731054440224272, "grad_norm": 2.575866460800171, "learning_rate": 7.404125990053417e-07, "loss": 0.0188, "step": 15100 }, { "epoch": 2.7355760535359015, "grad_norm": 0.5728238821029663, "learning_rate": 7.399521090440229e-07, "loss": 0.0248, "step": 15125 }, { "epoch": 2.7400976668475314, "grad_norm": 5.137115478515625, "learning_rate": 7.394916190827039e-07, "loss": 0.0234, "step": 15150 }, { "epoch": 2.744619280159161, "grad_norm": 2.0585458278656006, "learning_rate": 7.390311291213851e-07, "loss": 0.0193, "step": 15175 }, { "epoch": 2.7491408934707904, "grad_norm": 2.6761465072631836, "learning_rate": 7.385706391600663e-07, "loss": 0.0242, "step": 15200 }, { "epoch": 2.75366250678242, "grad_norm": 0.3999291956424713, "learning_rate": 7.381101491987474e-07, "loss": 0.0193, "step": 15225 }, { "epoch": 2.7581841200940493, "grad_norm": 8.237870216369629, "learning_rate": 7.376496592374286e-07, "loss": 0.0278, "step": 15250 }, { "epoch": 2.7627057334056793, "grad_norm": 8.460784912109375, "learning_rate": 7.371891692761098e-07, "loss": 0.0194, "step": 15275 }, { "epoch": 2.7672273467173087, "grad_norm": 13.062602043151855, "learning_rate": 7.367286793147908e-07, "loss": 0.032, "step": 15300 }, { "epoch": 2.7717489600289382, "grad_norm": 7.502108097076416, "learning_rate": 7.36268189353472e-07, "loss": 0.0076, "step": 15325 }, { "epoch": 2.776270573340568, "grad_norm": 11.163969039916992, "learning_rate": 7.358076993921533e-07, "loss": 0.024, "step": 15350 }, { "epoch": 2.7807921866521976, "grad_norm": 1.2433866262435913, "learning_rate": 7.353472094308344e-07, "loss": 0.0276, "step": 15375 }, { "epoch": 2.785313799963827, "grad_norm": 0.21064484119415283, "learning_rate": 7.348867194695155e-07, "loss": 0.0104, "step": 15400 }, { "epoch": 2.7898354132754566, "grad_norm": 39.60307312011719, "learning_rate": 7.344446491066495e-07, "loss": 0.0408, "step": 15425 }, { "epoch": 2.794357026587086, "grad_norm": 1.9793568849563599, "learning_rate": 7.339841591453306e-07, "loss": 0.059, "step": 15450 }, { "epoch": 2.798878639898716, "grad_norm": 0.8816681504249573, "learning_rate": 7.335236691840118e-07, "loss": 0.069, "step": 15475 }, { "epoch": 2.8034002532103455, "grad_norm": 1.217022180557251, "learning_rate": 7.330631792226929e-07, "loss": 0.0182, "step": 15500 }, { "epoch": 2.807921866521975, "grad_norm": 10.408825874328613, "learning_rate": 7.32602689261374e-07, "loss": 0.0192, "step": 15525 }, { "epoch": 2.812443479833605, "grad_norm": 0.13745278120040894, "learning_rate": 7.321421993000553e-07, "loss": 0.02, "step": 15550 }, { "epoch": 2.8169650931452344, "grad_norm": 4.462474346160889, "learning_rate": 7.316817093387364e-07, "loss": 0.0229, "step": 15575 }, { "epoch": 2.821486706456864, "grad_norm": 0.2782382369041443, "learning_rate": 7.312212193774175e-07, "loss": 0.0209, "step": 15600 }, { "epoch": 2.8260083197684933, "grad_norm": 9.691481590270996, "learning_rate": 7.307607294160987e-07, "loss": 0.0226, "step": 15625 }, { "epoch": 2.830529933080123, "grad_norm": 0.038128096610307693, "learning_rate": 7.303002394547798e-07, "loss": 0.0076, "step": 15650 }, { "epoch": 2.8350515463917527, "grad_norm": 3.7006304264068604, "learning_rate": 7.29839749493461e-07, "loss": 0.0116, "step": 15675 }, { "epoch": 2.839573159703382, "grad_norm": 0.9358986616134644, "learning_rate": 7.293792595321422e-07, "loss": 0.0176, "step": 15700 }, { "epoch": 2.8440947730150117, "grad_norm": 6.940121173858643, "learning_rate": 7.289187695708234e-07, "loss": 0.018, "step": 15725 }, { "epoch": 2.8486163863266416, "grad_norm": 0.16799919307231903, "learning_rate": 7.284582796095044e-07, "loss": 0.0191, "step": 15750 }, { "epoch": 2.853137999638271, "grad_norm": 8.835237503051758, "learning_rate": 7.279977896481856e-07, "loss": 0.023, "step": 15775 }, { "epoch": 2.8576596129499006, "grad_norm": 41.18638229370117, "learning_rate": 7.275372996868669e-07, "loss": 0.0451, "step": 15800 }, { "epoch": 2.86218122626153, "grad_norm": 7.924957752227783, "learning_rate": 7.270768097255479e-07, "loss": 0.017, "step": 15825 }, { "epoch": 2.8667028395731595, "grad_norm": 0.04123552888631821, "learning_rate": 7.266163197642291e-07, "loss": 0.0736, "step": 15850 }, { "epoch": 2.8712244528847894, "grad_norm": 0.38446855545043945, "learning_rate": 7.261558298029103e-07, "loss": 0.0227, "step": 15875 }, { "epoch": 2.875746066196419, "grad_norm": 2.7168097496032715, "learning_rate": 7.256953398415914e-07, "loss": 0.0294, "step": 15900 }, { "epoch": 2.8802676795080484, "grad_norm": 6.317012786865234, "learning_rate": 7.252348498802726e-07, "loss": 0.0313, "step": 15925 }, { "epoch": 2.8847892928196783, "grad_norm": 7.5447821617126465, "learning_rate": 7.247743599189538e-07, "loss": 0.02, "step": 15950 }, { "epoch": 2.889310906131308, "grad_norm": 0.06321001052856445, "learning_rate": 7.243138699576349e-07, "loss": 0.0197, "step": 15975 }, { "epoch": 2.8938325194429373, "grad_norm": 0.14826831221580505, "learning_rate": 7.23853379996316e-07, "loss": 0.0111, "step": 16000 }, { "epoch": 2.8983541327545668, "grad_norm": 10.19642448425293, "learning_rate": 7.233928900349971e-07, "loss": 0.0235, "step": 16025 }, { "epoch": 2.9028757460661962, "grad_norm": 0.09074613451957703, "learning_rate": 7.229324000736784e-07, "loss": 0.0177, "step": 16050 }, { "epoch": 2.907397359377826, "grad_norm": 50.30870819091797, "learning_rate": 7.224719101123595e-07, "loss": 0.0166, "step": 16075 }, { "epoch": 2.9119189726894557, "grad_norm": 0.3094925880432129, "learning_rate": 7.220114201510406e-07, "loss": 0.0154, "step": 16100 }, { "epoch": 2.916440586001085, "grad_norm": 0.837853193283081, "learning_rate": 7.215509301897218e-07, "loss": 0.0331, "step": 16125 }, { "epoch": 2.9209621993127146, "grad_norm": 1.2888524532318115, "learning_rate": 7.210904402284031e-07, "loss": 0.0331, "step": 16150 }, { "epoch": 2.9254838126243445, "grad_norm": 0.05001299828290939, "learning_rate": 7.206299502670841e-07, "loss": 0.021, "step": 16175 }, { "epoch": 2.930005425935974, "grad_norm": 4.152213096618652, "learning_rate": 7.201694603057653e-07, "loss": 0.0376, "step": 16200 }, { "epoch": 2.9345270392476035, "grad_norm": 3.798003911972046, "learning_rate": 7.197089703444465e-07, "loss": 0.0612, "step": 16225 }, { "epoch": 2.939048652559233, "grad_norm": 0.4344462454319, "learning_rate": 7.192484803831275e-07, "loss": 0.0331, "step": 16250 }, { "epoch": 2.9435702658708625, "grad_norm": 12.142309188842773, "learning_rate": 7.187879904218088e-07, "loss": 0.0369, "step": 16275 }, { "epoch": 2.9480918791824924, "grad_norm": 1.0713512897491455, "learning_rate": 7.1832750046049e-07, "loss": 0.0201, "step": 16300 }, { "epoch": 2.952613492494122, "grad_norm": 12.534268379211426, "learning_rate": 7.17867010499171e-07, "loss": 0.0232, "step": 16325 }, { "epoch": 2.9571351058057513, "grad_norm": 2.9991679191589355, "learning_rate": 7.174065205378522e-07, "loss": 0.0212, "step": 16350 }, { "epoch": 2.9616567191173813, "grad_norm": 0.7451911568641663, "learning_rate": 7.169460305765334e-07, "loss": 0.0128, "step": 16375 }, { "epoch": 2.9661783324290107, "grad_norm": 0.15179577469825745, "learning_rate": 7.164855406152146e-07, "loss": 0.0148, "step": 16400 }, { "epoch": 2.9706999457406402, "grad_norm": 0.2197951227426529, "learning_rate": 7.160250506538957e-07, "loss": 0.0202, "step": 16425 }, { "epoch": 2.9752215590522697, "grad_norm": 8.621418952941895, "learning_rate": 7.155645606925769e-07, "loss": 0.0297, "step": 16450 }, { "epoch": 2.979743172363899, "grad_norm": 1.0757030248641968, "learning_rate": 7.15104070731258e-07, "loss": 0.0164, "step": 16475 }, { "epoch": 2.984264785675529, "grad_norm": 7.468885898590088, "learning_rate": 7.146435807699391e-07, "loss": 0.0222, "step": 16500 }, { "epoch": 2.9887863989871586, "grad_norm": 22.351499557495117, "learning_rate": 7.141830908086204e-07, "loss": 0.032, "step": 16525 }, { "epoch": 2.993308012298788, "grad_norm": 1.1031124591827393, "learning_rate": 7.137226008473015e-07, "loss": 0.0178, "step": 16550 }, { "epoch": 2.997829625610418, "grad_norm": 0.33143824338912964, "learning_rate": 7.132621108859827e-07, "loss": 0.0183, "step": 16575 }, { "epoch": 3.0, "eval_loss": 0.28094977140426636, "eval_runtime": 8563.7182, "eval_samples_per_second": 1.109, "eval_steps_per_second": 0.139, "eval_wer": 0.10791159513132607, "step": 16587 }, { "epoch": 3.0023512389220475, "grad_norm": 2.0394110679626465, "learning_rate": 7.128016209246638e-07, "loss": 0.0198, "step": 16600 }, { "epoch": 3.006872852233677, "grad_norm": 0.34627845883369446, "learning_rate": 7.123411309633449e-07, "loss": 0.0163, "step": 16625 }, { "epoch": 3.0113944655453064, "grad_norm": 0.1096586138010025, "learning_rate": 7.118806410020262e-07, "loss": 0.012, "step": 16650 }, { "epoch": 3.0159160788569364, "grad_norm": 0.8496006727218628, "learning_rate": 7.114201510407073e-07, "loss": 0.0078, "step": 16675 }, { "epoch": 3.020437692168566, "grad_norm": 0.45903900265693665, "learning_rate": 7.109596610793884e-07, "loss": 0.0113, "step": 16700 }, { "epoch": 3.0249593054801953, "grad_norm": 0.18622107803821564, "learning_rate": 7.104991711180696e-07, "loss": 0.0177, "step": 16725 }, { "epoch": 3.029480918791825, "grad_norm": 0.07214221358299255, "learning_rate": 7.100386811567507e-07, "loss": 0.0167, "step": 16750 }, { "epoch": 3.0340025321034547, "grad_norm": 1.387891173362732, "learning_rate": 7.095781911954319e-07, "loss": 0.0085, "step": 16775 }, { "epoch": 3.038524145415084, "grad_norm": 79.67159271240234, "learning_rate": 7.091177012341131e-07, "loss": 0.0121, "step": 16800 }, { "epoch": 3.0430457587267137, "grad_norm": 11.743141174316406, "learning_rate": 7.086572112727943e-07, "loss": 0.0093, "step": 16825 }, { "epoch": 3.047567372038343, "grad_norm": 18.43634605407715, "learning_rate": 7.081967213114753e-07, "loss": 0.0224, "step": 16850 }, { "epoch": 3.052088985349973, "grad_norm": 0.045197684317827225, "learning_rate": 7.077362313501566e-07, "loss": 0.0062, "step": 16875 }, { "epoch": 3.0566105986616026, "grad_norm": 0.12885162234306335, "learning_rate": 7.072757413888378e-07, "loss": 0.0214, "step": 16900 }, { "epoch": 3.061132211973232, "grad_norm": 0.09277495741844177, "learning_rate": 7.068152514275188e-07, "loss": 0.0275, "step": 16925 }, { "epoch": 3.0656538252848615, "grad_norm": 15.500121116638184, "learning_rate": 7.063547614662e-07, "loss": 0.0328, "step": 16950 }, { "epoch": 3.0701754385964914, "grad_norm": 21.585752487182617, "learning_rate": 7.058942715048811e-07, "loss": 0.0304, "step": 16975 }, { "epoch": 3.074697051908121, "grad_norm": 2.1149275302886963, "learning_rate": 7.054337815435624e-07, "loss": 0.0315, "step": 17000 }, { "epoch": 3.0792186652197504, "grad_norm": 0.16735130548477173, "learning_rate": 7.049732915822435e-07, "loss": 0.0136, "step": 17025 }, { "epoch": 3.08374027853138, "grad_norm": 0.7384225130081177, "learning_rate": 7.045128016209246e-07, "loss": 0.0059, "step": 17050 }, { "epoch": 3.0882618918430094, "grad_norm": 0.19367511570453644, "learning_rate": 7.040523116596058e-07, "loss": 0.01, "step": 17075 }, { "epoch": 3.0927835051546393, "grad_norm": 0.5572232604026794, "learning_rate": 7.035918216982869e-07, "loss": 0.0135, "step": 17100 }, { "epoch": 3.0973051184662688, "grad_norm": 0.05385562777519226, "learning_rate": 7.031313317369681e-07, "loss": 0.0184, "step": 17125 }, { "epoch": 3.1018267317778982, "grad_norm": 4.722483158111572, "learning_rate": 7.026708417756493e-07, "loss": 0.0039, "step": 17150 }, { "epoch": 3.1063483450895277, "grad_norm": 0.6491204500198364, "learning_rate": 7.022103518143304e-07, "loss": 0.0147, "step": 17175 }, { "epoch": 3.1108699584011577, "grad_norm": 0.014111626893281937, "learning_rate": 7.017498618530115e-07, "loss": 0.0119, "step": 17200 }, { "epoch": 3.115391571712787, "grad_norm": 5.42165470123291, "learning_rate": 7.012893718916927e-07, "loss": 0.0116, "step": 17225 }, { "epoch": 3.1199131850244166, "grad_norm": 7.389101982116699, "learning_rate": 7.00828881930374e-07, "loss": 0.0103, "step": 17250 }, { "epoch": 3.124434798336046, "grad_norm": 0.3708292841911316, "learning_rate": 7.00368391969055e-07, "loss": 0.0074, "step": 17275 }, { "epoch": 3.128956411647676, "grad_norm": 0.08553273230791092, "learning_rate": 6.999079020077362e-07, "loss": 0.0081, "step": 17300 }, { "epoch": 3.1334780249593055, "grad_norm": 5.075300216674805, "learning_rate": 6.994474120464174e-07, "loss": 0.0204, "step": 17325 }, { "epoch": 3.137999638270935, "grad_norm": 8.977065086364746, "learning_rate": 6.989869220850984e-07, "loss": 0.0095, "step": 17350 }, { "epoch": 3.1425212515825645, "grad_norm": 3.1058037281036377, "learning_rate": 6.985264321237797e-07, "loss": 0.0618, "step": 17375 }, { "epoch": 3.1470428648941944, "grad_norm": 4.081079959869385, "learning_rate": 6.980659421624609e-07, "loss": 0.0441, "step": 17400 }, { "epoch": 3.151564478205824, "grad_norm": 32.8135986328125, "learning_rate": 6.976054522011419e-07, "loss": 0.0182, "step": 17425 }, { "epoch": 3.1560860915174533, "grad_norm": 0.21175616979599, "learning_rate": 6.971449622398231e-07, "loss": 0.0125, "step": 17450 }, { "epoch": 3.160607704829083, "grad_norm": 0.16236978769302368, "learning_rate": 6.966844722785043e-07, "loss": 0.0121, "step": 17475 }, { "epoch": 3.1651293181407127, "grad_norm": 35.438865661621094, "learning_rate": 6.962239823171855e-07, "loss": 0.0111, "step": 17500 }, { "epoch": 3.1696509314523422, "grad_norm": 0.0932527482509613, "learning_rate": 6.957634923558666e-07, "loss": 0.0118, "step": 17525 }, { "epoch": 3.1741725447639717, "grad_norm": 9.384880065917969, "learning_rate": 6.953030023945478e-07, "loss": 0.0151, "step": 17550 }, { "epoch": 3.178694158075601, "grad_norm": 0.24784910678863525, "learning_rate": 6.948425124332289e-07, "loss": 0.0106, "step": 17575 }, { "epoch": 3.183215771387231, "grad_norm": 0.9011788964271545, "learning_rate": 6.943820224719101e-07, "loss": 0.0109, "step": 17600 }, { "epoch": 3.1877373846988606, "grad_norm": 1.6620635986328125, "learning_rate": 6.939215325105913e-07, "loss": 0.0124, "step": 17625 }, { "epoch": 3.19225899801049, "grad_norm": 0.09768769890069962, "learning_rate": 6.934610425492724e-07, "loss": 0.0131, "step": 17650 }, { "epoch": 3.1967806113221195, "grad_norm": 3.7721073627471924, "learning_rate": 6.930005525879535e-07, "loss": 0.0315, "step": 17675 }, { "epoch": 3.2013022246337495, "grad_norm": 4.030418872833252, "learning_rate": 6.925400626266346e-07, "loss": 0.0216, "step": 17700 }, { "epoch": 3.205823837945379, "grad_norm": 16.64514923095703, "learning_rate": 6.920795726653159e-07, "loss": 0.0309, "step": 17725 }, { "epoch": 3.2103454512570084, "grad_norm": 0.3508654236793518, "learning_rate": 6.916190827039971e-07, "loss": 0.0075, "step": 17750 }, { "epoch": 3.214867064568638, "grad_norm": 0.19037003815174103, "learning_rate": 6.911585927426781e-07, "loss": 0.0547, "step": 17775 }, { "epoch": 3.219388677880268, "grad_norm": 0.2579357624053955, "learning_rate": 6.906981027813593e-07, "loss": 0.0483, "step": 17800 }, { "epoch": 3.2239102911918973, "grad_norm": 0.3658471405506134, "learning_rate": 6.902376128200405e-07, "loss": 0.0135, "step": 17825 }, { "epoch": 3.228431904503527, "grad_norm": 7.055116176605225, "learning_rate": 6.897771228587216e-07, "loss": 0.0192, "step": 17850 }, { "epoch": 3.2329535178151563, "grad_norm": 0.09685884416103363, "learning_rate": 6.893166328974028e-07, "loss": 0.0104, "step": 17875 }, { "epoch": 3.237475131126786, "grad_norm": 0.2775970697402954, "learning_rate": 6.88856142936084e-07, "loss": 0.0112, "step": 17900 }, { "epoch": 3.2419967444384157, "grad_norm": 0.04564272239804268, "learning_rate": 6.883956529747651e-07, "loss": 0.0167, "step": 17925 }, { "epoch": 3.246518357750045, "grad_norm": 0.1531757116317749, "learning_rate": 6.879351630134462e-07, "loss": 0.0034, "step": 17950 }, { "epoch": 3.2510399710616746, "grad_norm": 0.12003475427627563, "learning_rate": 6.874746730521275e-07, "loss": 0.0287, "step": 17975 }, { "epoch": 3.2555615843733046, "grad_norm": 0.1408698409795761, "learning_rate": 6.870141830908086e-07, "loss": 0.0058, "step": 18000 }, { "epoch": 3.260083197684934, "grad_norm": 0.21101155877113342, "learning_rate": 6.865536931294897e-07, "loss": 0.0224, "step": 18025 }, { "epoch": 3.2646048109965635, "grad_norm": 3.5580930709838867, "learning_rate": 6.861116227666237e-07, "loss": 0.0336, "step": 18050 }, { "epoch": 3.269126424308193, "grad_norm": 3.6865243911743164, "learning_rate": 6.856511328053049e-07, "loss": 0.0253, "step": 18075 }, { "epoch": 3.273648037619823, "grad_norm": 0.7238378524780273, "learning_rate": 6.85190642843986e-07, "loss": 0.0028, "step": 18100 }, { "epoch": 3.2781696509314524, "grad_norm": 0.032546211034059525, "learning_rate": 6.847301528826671e-07, "loss": 0.011, "step": 18125 }, { "epoch": 3.282691264243082, "grad_norm": 0.2295057624578476, "learning_rate": 6.842696629213482e-07, "loss": 0.0241, "step": 18150 }, { "epoch": 3.2872128775547114, "grad_norm": 0.37478190660476685, "learning_rate": 6.838091729600295e-07, "loss": 0.0487, "step": 18175 }, { "epoch": 3.2917344908663413, "grad_norm": 2.0155792236328125, "learning_rate": 6.833486829987106e-07, "loss": 0.0273, "step": 18200 }, { "epoch": 3.2962561041779708, "grad_norm": 1.1389836072921753, "learning_rate": 6.828881930373917e-07, "loss": 0.0132, "step": 18225 }, { "epoch": 3.3007777174896002, "grad_norm": 7.253364086151123, "learning_rate": 6.824277030760729e-07, "loss": 0.0085, "step": 18250 }, { "epoch": 3.3052993308012297, "grad_norm": 17.829906463623047, "learning_rate": 6.819672131147541e-07, "loss": 0.0189, "step": 18275 }, { "epoch": 3.3098209441128597, "grad_norm": 3.49690842628479, "learning_rate": 6.815067231534352e-07, "loss": 0.01, "step": 18300 }, { "epoch": 3.314342557424489, "grad_norm": 0.6538553237915039, "learning_rate": 6.810462331921164e-07, "loss": 0.0097, "step": 18325 }, { "epoch": 3.3188641707361186, "grad_norm": 29.16781234741211, "learning_rate": 6.805857432307976e-07, "loss": 0.0162, "step": 18350 }, { "epoch": 3.323385784047748, "grad_norm": 0.38442912697792053, "learning_rate": 6.801252532694786e-07, "loss": 0.013, "step": 18375 }, { "epoch": 3.327907397359378, "grad_norm": 16.521053314208984, "learning_rate": 6.796647633081598e-07, "loss": 0.0153, "step": 18400 }, { "epoch": 3.3324290106710075, "grad_norm": 1.947344422340393, "learning_rate": 6.792042733468411e-07, "loss": 0.0088, "step": 18425 }, { "epoch": 3.336950623982637, "grad_norm": 0.08833196014165878, "learning_rate": 6.787437833855221e-07, "loss": 0.0169, "step": 18450 }, { "epoch": 3.3414722372942665, "grad_norm": 0.04213396832346916, "learning_rate": 6.782832934242033e-07, "loss": 0.0048, "step": 18475 }, { "epoch": 3.345993850605896, "grad_norm": 14.339350700378418, "learning_rate": 6.778228034628845e-07, "loss": 0.0322, "step": 18500 }, { "epoch": 3.350515463917526, "grad_norm": 2.3550896644592285, "learning_rate": 6.773623135015656e-07, "loss": 0.0079, "step": 18525 }, { "epoch": 3.3550370772291553, "grad_norm": 0.04377694055438042, "learning_rate": 6.769018235402468e-07, "loss": 0.0408, "step": 18550 }, { "epoch": 3.359558690540785, "grad_norm": 66.76377868652344, "learning_rate": 6.76441333578928e-07, "loss": 0.0646, "step": 18575 }, { "epoch": 3.3640803038524147, "grad_norm": 24.474994659423828, "learning_rate": 6.759808436176091e-07, "loss": 0.0251, "step": 18600 }, { "epoch": 3.3686019171640442, "grad_norm": 4.777419090270996, "learning_rate": 6.755203536562902e-07, "loss": 0.0127, "step": 18625 }, { "epoch": 3.3731235304756737, "grad_norm": 1.3446674346923828, "learning_rate": 6.750598636949714e-07, "loss": 0.0246, "step": 18650 }, { "epoch": 3.377645143787303, "grad_norm": 5.482788562774658, "learning_rate": 6.745993737336526e-07, "loss": 0.0141, "step": 18675 }, { "epoch": 3.3821667570989327, "grad_norm": 1.5254578590393066, "learning_rate": 6.741388837723337e-07, "loss": 0.019, "step": 18700 }, { "epoch": 3.3866883704105626, "grad_norm": 2.620288372039795, "learning_rate": 6.736783938110149e-07, "loss": 0.0257, "step": 18725 }, { "epoch": 3.391209983722192, "grad_norm": 5.762913703918457, "learning_rate": 6.73217903849696e-07, "loss": 0.0033, "step": 18750 }, { "epoch": 3.3957315970338215, "grad_norm": 5.787083625793457, "learning_rate": 6.727574138883773e-07, "loss": 0.0093, "step": 18775 }, { "epoch": 3.4002532103454515, "grad_norm": 2.400695562362671, "learning_rate": 6.722969239270584e-07, "loss": 0.0103, "step": 18800 }, { "epoch": 3.404774823657081, "grad_norm": 0.05644823983311653, "learning_rate": 6.718364339657395e-07, "loss": 0.0058, "step": 18825 }, { "epoch": 3.4092964369687104, "grad_norm": 0.2891055941581726, "learning_rate": 6.713759440044207e-07, "loss": 0.0188, "step": 18850 }, { "epoch": 3.41381805028034, "grad_norm": 0.8349182605743408, "learning_rate": 6.709154540431017e-07, "loss": 0.0295, "step": 18875 }, { "epoch": 3.4183396635919694, "grad_norm": 0.06508205085992813, "learning_rate": 6.70454964081783e-07, "loss": 0.0079, "step": 18900 }, { "epoch": 3.4228612769035993, "grad_norm": 8.653030395507812, "learning_rate": 6.699944741204642e-07, "loss": 0.0152, "step": 18925 }, { "epoch": 3.427382890215229, "grad_norm": 0.2311927080154419, "learning_rate": 6.695339841591454e-07, "loss": 0.0414, "step": 18950 }, { "epoch": 3.4319045035268583, "grad_norm": 55.472957611083984, "learning_rate": 6.690734941978264e-07, "loss": 0.0386, "step": 18975 }, { "epoch": 3.436426116838488, "grad_norm": 0.4529309570789337, "learning_rate": 6.686130042365076e-07, "loss": 0.0494, "step": 19000 }, { "epoch": 3.4409477301501177, "grad_norm": 1.5187815427780151, "learning_rate": 6.681525142751889e-07, "loss": 0.0166, "step": 19025 }, { "epoch": 3.445469343461747, "grad_norm": 0.964755654335022, "learning_rate": 6.676920243138699e-07, "loss": 0.0076, "step": 19050 }, { "epoch": 3.4499909567733766, "grad_norm": 22.352828979492188, "learning_rate": 6.672315343525511e-07, "loss": 0.019, "step": 19075 }, { "epoch": 3.454512570085006, "grad_norm": 0.08743809163570404, "learning_rate": 6.667710443912322e-07, "loss": 0.0132, "step": 19100 }, { "epoch": 3.459034183396636, "grad_norm": 0.2849852740764618, "learning_rate": 6.663105544299133e-07, "loss": 0.0206, "step": 19125 }, { "epoch": 3.4635557967082655, "grad_norm": 1.0750819444656372, "learning_rate": 6.658500644685946e-07, "loss": 0.0206, "step": 19150 }, { "epoch": 3.468077410019895, "grad_norm": 2.5783636569976807, "learning_rate": 6.653895745072757e-07, "loss": 0.0086, "step": 19175 }, { "epoch": 3.472599023331525, "grad_norm": 3.71582293510437, "learning_rate": 6.649290845459569e-07, "loss": 0.0055, "step": 19200 }, { "epoch": 3.4771206366431544, "grad_norm": 0.3580998182296753, "learning_rate": 6.64468594584638e-07, "loss": 0.0128, "step": 19225 }, { "epoch": 3.481642249954784, "grad_norm": 20.23494529724121, "learning_rate": 6.640081046233191e-07, "loss": 0.032, "step": 19250 }, { "epoch": 3.4861638632664134, "grad_norm": 0.634772539138794, "learning_rate": 6.635476146620004e-07, "loss": 0.0041, "step": 19275 }, { "epoch": 3.490685476578043, "grad_norm": 9.67199993133545, "learning_rate": 6.630871247006815e-07, "loss": 0.0264, "step": 19300 }, { "epoch": 3.4952070898896728, "grad_norm": 0.13094273209571838, "learning_rate": 6.626266347393626e-07, "loss": 0.0177, "step": 19325 }, { "epoch": 3.4997287032013022, "grad_norm": 57.88798141479492, "learning_rate": 6.621661447780438e-07, "loss": 0.0171, "step": 19350 }, { "epoch": 3.5042503165129317, "grad_norm": 0.32330283522605896, "learning_rate": 6.617056548167249e-07, "loss": 0.0333, "step": 19375 }, { "epoch": 3.5087719298245617, "grad_norm": 2.477999210357666, "learning_rate": 6.612451648554061e-07, "loss": 0.0257, "step": 19400 }, { "epoch": 3.513293543136191, "grad_norm": 4.913309097290039, "learning_rate": 6.607846748940873e-07, "loss": 0.014, "step": 19425 }, { "epoch": 3.5178151564478206, "grad_norm": 1.5322345495224, "learning_rate": 6.603241849327685e-07, "loss": 0.0214, "step": 19450 }, { "epoch": 3.52233676975945, "grad_norm": 0.07133228331804276, "learning_rate": 6.598636949714495e-07, "loss": 0.0148, "step": 19475 }, { "epoch": 3.5268583830710796, "grad_norm": 13.74171257019043, "learning_rate": 6.594032050101308e-07, "loss": 0.0121, "step": 19500 }, { "epoch": 3.5313799963827095, "grad_norm": 27.33115577697754, "learning_rate": 6.58942715048812e-07, "loss": 0.013, "step": 19525 }, { "epoch": 3.535901609694339, "grad_norm": 0.22666044533252716, "learning_rate": 6.58482225087493e-07, "loss": 0.0145, "step": 19550 }, { "epoch": 3.5404232230059685, "grad_norm": 13.335356712341309, "learning_rate": 6.580217351261742e-07, "loss": 0.0061, "step": 19575 }, { "epoch": 3.5449448363175984, "grad_norm": 4.350337505340576, "learning_rate": 6.575612451648554e-07, "loss": 0.0038, "step": 19600 }, { "epoch": 3.549466449629228, "grad_norm": 0.13537144660949707, "learning_rate": 6.571007552035366e-07, "loss": 0.0125, "step": 19625 }, { "epoch": 3.5539880629408573, "grad_norm": 56.16394805908203, "learning_rate": 6.566402652422177e-07, "loss": 0.0213, "step": 19650 }, { "epoch": 3.558509676252487, "grad_norm": 0.10288344323635101, "learning_rate": 6.561797752808989e-07, "loss": 0.0156, "step": 19675 }, { "epoch": 3.5630312895641163, "grad_norm": 3.5337467193603516, "learning_rate": 6.5571928531958e-07, "loss": 0.019, "step": 19700 }, { "epoch": 3.5675529028757462, "grad_norm": 0.20034463703632355, "learning_rate": 6.552587953582611e-07, "loss": 0.05, "step": 19725 }, { "epoch": 3.5720745161873757, "grad_norm": 0.22690874338150024, "learning_rate": 6.547983053969424e-07, "loss": 0.0217, "step": 19750 }, { "epoch": 3.576596129499005, "grad_norm": 26.994596481323242, "learning_rate": 6.543378154356235e-07, "loss": 0.0343, "step": 19775 }, { "epoch": 3.581117742810635, "grad_norm": 5.310791492462158, "learning_rate": 6.538773254743046e-07, "loss": 0.0325, "step": 19800 }, { "epoch": 3.5856393561222646, "grad_norm": 1.536670207977295, "learning_rate": 6.534168355129857e-07, "loss": 0.0057, "step": 19825 }, { "epoch": 3.590160969433894, "grad_norm": 4.136430263519287, "learning_rate": 6.529563455516669e-07, "loss": 0.0292, "step": 19850 }, { "epoch": 3.5946825827455235, "grad_norm": 0.5038244724273682, "learning_rate": 6.524958555903482e-07, "loss": 0.0159, "step": 19875 }, { "epoch": 3.599204196057153, "grad_norm": 0.03355490043759346, "learning_rate": 6.520353656290292e-07, "loss": 0.0137, "step": 19900 }, { "epoch": 3.6037258093687825, "grad_norm": 1.1418379545211792, "learning_rate": 6.515748756677104e-07, "loss": 0.0043, "step": 19925 }, { "epoch": 3.6082474226804124, "grad_norm": 56.00255584716797, "learning_rate": 6.511143857063916e-07, "loss": 0.034, "step": 19950 }, { "epoch": 3.612769035992042, "grad_norm": 0.5989301204681396, "learning_rate": 6.506538957450726e-07, "loss": 0.0094, "step": 19975 }, { "epoch": 3.6172906493036714, "grad_norm": 5.834647178649902, "learning_rate": 6.501934057837539e-07, "loss": 0.0104, "step": 20000 }, { "epoch": 3.6218122626153013, "grad_norm": 0.1490633189678192, "learning_rate": 6.497329158224351e-07, "loss": 0.0167, "step": 20025 }, { "epoch": 3.626333875926931, "grad_norm": 18.232946395874023, "learning_rate": 6.492724258611161e-07, "loss": 0.0139, "step": 20050 }, { "epoch": 3.6308554892385603, "grad_norm": 0.08834528177976608, "learning_rate": 6.488119358997973e-07, "loss": 0.0253, "step": 20075 }, { "epoch": 3.6353771025501898, "grad_norm": 0.3102453351020813, "learning_rate": 6.483698655369313e-07, "loss": 0.0146, "step": 20100 }, { "epoch": 3.6398987158618192, "grad_norm": 0.26993054151535034, "learning_rate": 6.479093755756125e-07, "loss": 0.0187, "step": 20125 }, { "epoch": 3.644420329173449, "grad_norm": 0.2688346803188324, "learning_rate": 6.474488856142935e-07, "loss": 0.0304, "step": 20150 }, { "epoch": 3.6489419424850786, "grad_norm": 0.11477080732584, "learning_rate": 6.469883956529747e-07, "loss": 0.0515, "step": 20175 }, { "epoch": 3.653463555796708, "grad_norm": 2.2574214935302734, "learning_rate": 6.46527905691656e-07, "loss": 0.0248, "step": 20200 }, { "epoch": 3.657985169108338, "grad_norm": 0.9582886099815369, "learning_rate": 6.460674157303371e-07, "loss": 0.0128, "step": 20225 }, { "epoch": 3.6625067824199675, "grad_norm": 17.577070236206055, "learning_rate": 6.456069257690182e-07, "loss": 0.0189, "step": 20250 }, { "epoch": 3.667028395731597, "grad_norm": 8.872756004333496, "learning_rate": 6.451464358076993e-07, "loss": 0.0113, "step": 20275 }, { "epoch": 3.6715500090432265, "grad_norm": 0.408764123916626, "learning_rate": 6.446859458463805e-07, "loss": 0.0155, "step": 20300 }, { "epoch": 3.676071622354856, "grad_norm": 0.4208977520465851, "learning_rate": 6.442254558850617e-07, "loss": 0.0044, "step": 20325 }, { "epoch": 3.680593235666486, "grad_norm": 0.39012089371681213, "learning_rate": 6.437649659237428e-07, "loss": 0.0161, "step": 20350 }, { "epoch": 3.6851148489781154, "grad_norm": 0.0061714984476566315, "learning_rate": 6.43304475962424e-07, "loss": 0.0113, "step": 20375 }, { "epoch": 3.689636462289745, "grad_norm": 0.03614750877022743, "learning_rate": 6.428439860011051e-07, "loss": 0.0049, "step": 20400 }, { "epoch": 3.6941580756013748, "grad_norm": 0.015124999918043613, "learning_rate": 6.423834960397862e-07, "loss": 0.0111, "step": 20425 }, { "epoch": 3.6986796889130042, "grad_norm": 1.7233740091323853, "learning_rate": 6.419230060784675e-07, "loss": 0.0185, "step": 20450 }, { "epoch": 3.7032013022246337, "grad_norm": 1.9191359281539917, "learning_rate": 6.414625161171487e-07, "loss": 0.0043, "step": 20475 }, { "epoch": 3.707722915536263, "grad_norm": 0.6458703875541687, "learning_rate": 6.410020261558297e-07, "loss": 0.0183, "step": 20500 }, { "epoch": 3.7122445288478927, "grad_norm": 0.0197773240506649, "learning_rate": 6.405415361945109e-07, "loss": 0.0244, "step": 20525 }, { "epoch": 3.7167661421595226, "grad_norm": 0.8223174214363098, "learning_rate": 6.400810462331921e-07, "loss": 0.0311, "step": 20550 }, { "epoch": 3.721287755471152, "grad_norm": 42.07274627685547, "learning_rate": 6.396205562718732e-07, "loss": 0.0578, "step": 20575 }, { "epoch": 3.7258093687827816, "grad_norm": 0.2349138706922531, "learning_rate": 6.391600663105544e-07, "loss": 0.0253, "step": 20600 }, { "epoch": 3.7303309820944115, "grad_norm": 0.18693946301937103, "learning_rate": 6.386995763492356e-07, "loss": 0.0104, "step": 20625 }, { "epoch": 3.734852595406041, "grad_norm": 3.6481404304504395, "learning_rate": 6.382390863879167e-07, "loss": 0.0067, "step": 20650 }, { "epoch": 3.7393742087176705, "grad_norm": 0.06991686671972275, "learning_rate": 6.377785964265979e-07, "loss": 0.0086, "step": 20675 }, { "epoch": 3.7438958220293, "grad_norm": 0.06903531402349472, "learning_rate": 6.373181064652791e-07, "loss": 0.012, "step": 20700 }, { "epoch": 3.7484174353409294, "grad_norm": 0.3118179142475128, "learning_rate": 6.368576165039602e-07, "loss": 0.0145, "step": 20725 }, { "epoch": 3.7529390486525593, "grad_norm": 0.13410675525665283, "learning_rate": 6.363971265426413e-07, "loss": 0.0116, "step": 20750 }, { "epoch": 3.757460661964189, "grad_norm": 6.128607273101807, "learning_rate": 6.359366365813225e-07, "loss": 0.0147, "step": 20775 }, { "epoch": 3.7619822752758183, "grad_norm": 16.329174041748047, "learning_rate": 6.354761466200037e-07, "loss": 0.0289, "step": 20800 }, { "epoch": 3.7665038885874482, "grad_norm": 0.022927312180399895, "learning_rate": 6.350156566586848e-07, "loss": 0.0194, "step": 20825 }, { "epoch": 3.7710255018990777, "grad_norm": 0.08002068102359772, "learning_rate": 6.34555166697366e-07, "loss": 0.0276, "step": 20850 }, { "epoch": 3.775547115210707, "grad_norm": 6.620906352996826, "learning_rate": 6.340946767360471e-07, "loss": 0.0303, "step": 20875 }, { "epoch": 3.7800687285223367, "grad_norm": 20.082733154296875, "learning_rate": 6.336341867747283e-07, "loss": 0.0285, "step": 20900 }, { "epoch": 3.784590341833966, "grad_norm": 0.4795994758605957, "learning_rate": 6.331736968134095e-07, "loss": 0.0164, "step": 20925 }, { "epoch": 3.789111955145596, "grad_norm": 0.2975751757621765, "learning_rate": 6.327132068520906e-07, "loss": 0.0246, "step": 20950 }, { "epoch": 3.7936335684572255, "grad_norm": 1.9404077529907227, "learning_rate": 6.322527168907718e-07, "loss": 0.0274, "step": 20975 }, { "epoch": 3.798155181768855, "grad_norm": 1.5092577934265137, "learning_rate": 6.317922269294528e-07, "loss": 0.0286, "step": 21000 }, { "epoch": 3.802676795080485, "grad_norm": 3.025921106338501, "learning_rate": 6.31331736968134e-07, "loss": 0.0139, "step": 21025 }, { "epoch": 3.8071984083921144, "grad_norm": 0.11709149926900864, "learning_rate": 6.308712470068153e-07, "loss": 0.0067, "step": 21050 }, { "epoch": 3.811720021703744, "grad_norm": 3.3579108715057373, "learning_rate": 6.304107570454963e-07, "loss": 0.0077, "step": 21075 }, { "epoch": 3.8162416350153734, "grad_norm": 4.9033050537109375, "learning_rate": 6.299502670841775e-07, "loss": 0.0124, "step": 21100 }, { "epoch": 3.820763248327003, "grad_norm": 0.054667115211486816, "learning_rate": 6.294897771228587e-07, "loss": 0.0147, "step": 21125 }, { "epoch": 3.825284861638633, "grad_norm": 20.414718627929688, "learning_rate": 6.290292871615398e-07, "loss": 0.018, "step": 21150 }, { "epoch": 3.8298064749502623, "grad_norm": 0.12169066816568375, "learning_rate": 6.28568797200221e-07, "loss": 0.0266, "step": 21175 }, { "epoch": 3.8343280882618918, "grad_norm": 0.21765799820423126, "learning_rate": 6.281083072389022e-07, "loss": 0.0061, "step": 21200 }, { "epoch": 3.8388497015735217, "grad_norm": 0.43716397881507874, "learning_rate": 6.276478172775833e-07, "loss": 0.0114, "step": 21225 }, { "epoch": 3.843371314885151, "grad_norm": 1.5071337223052979, "learning_rate": 6.271873273162644e-07, "loss": 0.0111, "step": 21250 }, { "epoch": 3.8478929281967806, "grad_norm": 1.3820738792419434, "learning_rate": 6.267268373549456e-07, "loss": 0.0088, "step": 21275 }, { "epoch": 3.85241454150841, "grad_norm": 18.006547927856445, "learning_rate": 6.262663473936268e-07, "loss": 0.0628, "step": 21300 }, { "epoch": 3.8569361548200396, "grad_norm": 0.09640432149171829, "learning_rate": 6.25805857432308e-07, "loss": 0.0324, "step": 21325 }, { "epoch": 3.8614577681316695, "grad_norm": 86.30760192871094, "learning_rate": 6.253453674709891e-07, "loss": 0.0348, "step": 21350 }, { "epoch": 3.865979381443299, "grad_norm": 20.232952117919922, "learning_rate": 6.248848775096702e-07, "loss": 0.0564, "step": 21375 }, { "epoch": 3.8705009947549285, "grad_norm": 0.07119341939687729, "learning_rate": 6.244243875483515e-07, "loss": 0.0311, "step": 21400 }, { "epoch": 3.8750226080665584, "grad_norm": 1.7042196989059448, "learning_rate": 6.239638975870326e-07, "loss": 0.0114, "step": 21425 }, { "epoch": 3.879544221378188, "grad_norm": 0.36282435059547424, "learning_rate": 6.235034076257137e-07, "loss": 0.0227, "step": 21450 }, { "epoch": 3.8840658346898174, "grad_norm": 0.15856370329856873, "learning_rate": 6.230429176643949e-07, "loss": 0.0134, "step": 21475 }, { "epoch": 3.888587448001447, "grad_norm": 22.399805068969727, "learning_rate": 6.22582427703076e-07, "loss": 0.0165, "step": 21500 }, { "epoch": 3.8931090613130763, "grad_norm": 1.0881842374801636, "learning_rate": 6.221219377417572e-07, "loss": 0.0081, "step": 21525 }, { "epoch": 3.8976306746247062, "grad_norm": 0.025745024904608727, "learning_rate": 6.216614477804384e-07, "loss": 0.012, "step": 21550 }, { "epoch": 3.9021522879363357, "grad_norm": 0.05021649971604347, "learning_rate": 6.212009578191196e-07, "loss": 0.0035, "step": 21575 }, { "epoch": 3.906673901247965, "grad_norm": 0.08221199363470078, "learning_rate": 6.207404678578006e-07, "loss": 0.0131, "step": 21600 }, { "epoch": 3.911195514559595, "grad_norm": 0.9112662672996521, "learning_rate": 6.202799778964818e-07, "loss": 0.0087, "step": 21625 }, { "epoch": 3.9157171278712246, "grad_norm": 3.2704386711120605, "learning_rate": 6.198194879351631e-07, "loss": 0.0057, "step": 21650 }, { "epoch": 3.920238741182854, "grad_norm": 0.2193220853805542, "learning_rate": 6.193589979738441e-07, "loss": 0.0154, "step": 21675 }, { "epoch": 3.9247603544944836, "grad_norm": 1.7677456140518188, "learning_rate": 6.188985080125253e-07, "loss": 0.0338, "step": 21700 }, { "epoch": 3.929281967806113, "grad_norm": 4.569864273071289, "learning_rate": 6.184380180512065e-07, "loss": 0.045, "step": 21725 }, { "epoch": 3.933803581117743, "grad_norm": 1.5949956178665161, "learning_rate": 6.179775280898875e-07, "loss": 0.0126, "step": 21750 }, { "epoch": 3.9383251944293725, "grad_norm": 0.5718483924865723, "learning_rate": 6.175170381285688e-07, "loss": 0.0407, "step": 21775 }, { "epoch": 3.942846807741002, "grad_norm": 0.3438205122947693, "learning_rate": 6.1705654816725e-07, "loss": 0.0312, "step": 21800 }, { "epoch": 3.9473684210526314, "grad_norm": 0.23237770795822144, "learning_rate": 6.165960582059311e-07, "loss": 0.0075, "step": 21825 }, { "epoch": 3.9518900343642613, "grad_norm": 0.4722042381763458, "learning_rate": 6.161355682446122e-07, "loss": 0.0142, "step": 21850 }, { "epoch": 3.956411647675891, "grad_norm": 3.872162342071533, "learning_rate": 6.156750782832933e-07, "loss": 0.0061, "step": 21875 }, { "epoch": 3.9609332609875203, "grad_norm": 12.684093475341797, "learning_rate": 6.152145883219746e-07, "loss": 0.0123, "step": 21900 }, { "epoch": 3.96545487429915, "grad_norm": 1.105630874633789, "learning_rate": 6.147540983606557e-07, "loss": 0.0066, "step": 21925 }, { "epoch": 3.9699764876107793, "grad_norm": 0.050917476415634155, "learning_rate": 6.142936083993368e-07, "loss": 0.0095, "step": 21950 }, { "epoch": 3.974498100922409, "grad_norm": 6.288554668426514, "learning_rate": 6.13833118438018e-07, "loss": 0.0039, "step": 21975 }, { "epoch": 3.9790197142340387, "grad_norm": 0.09361649304628372, "learning_rate": 6.133726284766991e-07, "loss": 0.0174, "step": 22000 }, { "epoch": 3.983541327545668, "grad_norm": 0.36472177505493164, "learning_rate": 6.129121385153803e-07, "loss": 0.012, "step": 22025 }, { "epoch": 3.988062940857298, "grad_norm": 0.4556725025177002, "learning_rate": 6.124516485540615e-07, "loss": 0.012, "step": 22050 }, { "epoch": 3.9925845541689275, "grad_norm": 4.3723225593566895, "learning_rate": 6.119911585927427e-07, "loss": 0.0076, "step": 22075 }, { "epoch": 3.997106167480557, "grad_norm": 0.04663983732461929, "learning_rate": 6.115490882298766e-07, "loss": 0.0322, "step": 22100 }, { "epoch": 4.0, "eval_loss": 0.3087974786758423, "eval_runtime": 8497.8744, "eval_samples_per_second": 1.117, "eval_steps_per_second": 0.14, "eval_wer": 0.10576553491351698, "step": 22116 }, { "epoch": 4.0016277807921865, "grad_norm": 0.3466642498970032, "learning_rate": 6.110885982685577e-07, "loss": 0.0404, "step": 22125 }, { "epoch": 4.006149394103816, "grad_norm": 0.15252432227134705, "learning_rate": 6.106281083072389e-07, "loss": 0.0096, "step": 22150 }, { "epoch": 4.0106710074154455, "grad_norm": 19.444740295410156, "learning_rate": 6.101676183459201e-07, "loss": 0.0159, "step": 22175 }, { "epoch": 4.015192620727076, "grad_norm": 10.760107040405273, "learning_rate": 6.097071283846011e-07, "loss": 0.0068, "step": 22200 }, { "epoch": 4.019714234038705, "grad_norm": 0.07839302718639374, "learning_rate": 6.092466384232824e-07, "loss": 0.0026, "step": 22225 }, { "epoch": 4.024235847350335, "grad_norm": 0.3171479105949402, "learning_rate": 6.087861484619636e-07, "loss": 0.0058, "step": 22250 }, { "epoch": 4.028757460661964, "grad_norm": 0.2635548412799835, "learning_rate": 6.083256585006446e-07, "loss": 0.0104, "step": 22275 }, { "epoch": 4.033279073973594, "grad_norm": 0.7347187995910645, "learning_rate": 6.078651685393258e-07, "loss": 0.0041, "step": 22300 }, { "epoch": 4.037800687285223, "grad_norm": 0.03082045540213585, "learning_rate": 6.074046785780069e-07, "loss": 0.0023, "step": 22325 }, { "epoch": 4.042322300596853, "grad_norm": 0.030160456895828247, "learning_rate": 6.069441886166882e-07, "loss": 0.0023, "step": 22350 }, { "epoch": 4.046843913908482, "grad_norm": 22.204673767089844, "learning_rate": 6.064836986553693e-07, "loss": 0.0166, "step": 22375 }, { "epoch": 4.051365527220113, "grad_norm": 5.911830425262451, "learning_rate": 6.060232086940504e-07, "loss": 0.0026, "step": 22400 }, { "epoch": 4.055887140531742, "grad_norm": 0.5570561289787292, "learning_rate": 6.055627187327316e-07, "loss": 0.0142, "step": 22425 }, { "epoch": 4.0604087538433715, "grad_norm": 0.12296438962221146, "learning_rate": 6.051022287714127e-07, "loss": 0.0138, "step": 22450 }, { "epoch": 4.064930367155001, "grad_norm": 1.0237263441085815, "learning_rate": 6.046417388100939e-07, "loss": 0.0244, "step": 22475 }, { "epoch": 4.0694519804666305, "grad_norm": 15.541071891784668, "learning_rate": 6.041812488487751e-07, "loss": 0.0174, "step": 22500 }, { "epoch": 4.07397359377826, "grad_norm": 2.008063793182373, "learning_rate": 6.037207588874562e-07, "loss": 0.0325, "step": 22525 }, { "epoch": 4.078495207089889, "grad_norm": 1.934888243675232, "learning_rate": 6.032602689261373e-07, "loss": 0.0051, "step": 22550 }, { "epoch": 4.083016820401519, "grad_norm": 14.471735000610352, "learning_rate": 6.027997789648186e-07, "loss": 0.0077, "step": 22575 }, { "epoch": 4.087538433713149, "grad_norm": 0.05337606742978096, "learning_rate": 6.023392890034998e-07, "loss": 0.012, "step": 22600 }, { "epoch": 4.092060047024779, "grad_norm": 0.21558235585689545, "learning_rate": 6.018787990421808e-07, "loss": 0.0116, "step": 22625 }, { "epoch": 4.096581660336408, "grad_norm": 11.160529136657715, "learning_rate": 6.01418309080862e-07, "loss": 0.0083, "step": 22650 }, { "epoch": 4.101103273648038, "grad_norm": 0.33503735065460205, "learning_rate": 6.009578191195432e-07, "loss": 0.0071, "step": 22675 }, { "epoch": 4.105624886959667, "grad_norm": 0.04343891143798828, "learning_rate": 6.004973291582243e-07, "loss": 0.0057, "step": 22700 }, { "epoch": 4.110146500271297, "grad_norm": 0.017316769808530807, "learning_rate": 6.000368391969055e-07, "loss": 0.0085, "step": 22725 }, { "epoch": 4.114668113582926, "grad_norm": 2.94707989692688, "learning_rate": 5.995763492355867e-07, "loss": 0.0085, "step": 22750 }, { "epoch": 4.119189726894556, "grad_norm": 0.5166690349578857, "learning_rate": 5.991158592742677e-07, "loss": 0.0085, "step": 22775 }, { "epoch": 4.123711340206185, "grad_norm": 0.27040883898735046, "learning_rate": 5.986553693129489e-07, "loss": 0.0115, "step": 22800 }, { "epoch": 4.1282329535178155, "grad_norm": 0.2504747807979584, "learning_rate": 5.981948793516302e-07, "loss": 0.0193, "step": 22825 }, { "epoch": 4.132754566829445, "grad_norm": 0.23941832780838013, "learning_rate": 5.977343893903113e-07, "loss": 0.0184, "step": 22850 }, { "epoch": 4.1372761801410745, "grad_norm": 0.060054145753383636, "learning_rate": 5.972738994289924e-07, "loss": 0.0327, "step": 22875 }, { "epoch": 4.141797793452704, "grad_norm": 2.8317267894744873, "learning_rate": 5.968134094676736e-07, "loss": 0.0261, "step": 22900 }, { "epoch": 4.146319406764333, "grad_norm": 0.7358568906784058, "learning_rate": 5.963529195063547e-07, "loss": 0.0361, "step": 22925 }, { "epoch": 4.150841020075963, "grad_norm": 0.17320603132247925, "learning_rate": 5.958924295450359e-07, "loss": 0.0153, "step": 22950 }, { "epoch": 4.155362633387592, "grad_norm": 7.578225612640381, "learning_rate": 5.954319395837171e-07, "loss": 0.0065, "step": 22975 }, { "epoch": 4.159884246699222, "grad_norm": 0.20573076605796814, "learning_rate": 5.949714496223982e-07, "loss": 0.008, "step": 23000 }, { "epoch": 4.164405860010852, "grad_norm": 0.031806666404008865, "learning_rate": 5.945109596610793e-07, "loss": 0.0039, "step": 23025 }, { "epoch": 4.168927473322482, "grad_norm": 1.0935386419296265, "learning_rate": 5.940504696997604e-07, "loss": 0.0039, "step": 23050 }, { "epoch": 4.173449086634111, "grad_norm": 3.248675584793091, "learning_rate": 5.935899797384417e-07, "loss": 0.0084, "step": 23075 }, { "epoch": 4.177970699945741, "grad_norm": 0.0767347663640976, "learning_rate": 5.931294897771229e-07, "loss": 0.0062, "step": 23100 }, { "epoch": 4.18249231325737, "grad_norm": 0.03708457574248314, "learning_rate": 5.92668999815804e-07, "loss": 0.0085, "step": 23125 }, { "epoch": 4.187013926569, "grad_norm": 0.16320359706878662, "learning_rate": 5.922085098544851e-07, "loss": 0.0053, "step": 23150 }, { "epoch": 4.191535539880629, "grad_norm": 0.7727710008621216, "learning_rate": 5.917480198931663e-07, "loss": 0.0291, "step": 23175 }, { "epoch": 4.196057153192259, "grad_norm": 0.0653238445520401, "learning_rate": 5.912875299318474e-07, "loss": 0.0165, "step": 23200 }, { "epoch": 4.200578766503889, "grad_norm": 18.652076721191406, "learning_rate": 5.908270399705286e-07, "loss": 0.0119, "step": 23225 }, { "epoch": 4.205100379815518, "grad_norm": 1.3249595165252686, "learning_rate": 5.903665500092098e-07, "loss": 0.0127, "step": 23250 }, { "epoch": 4.209621993127148, "grad_norm": 0.1082233116030693, "learning_rate": 5.899060600478909e-07, "loss": 0.0312, "step": 23275 }, { "epoch": 4.214143606438777, "grad_norm": 0.7809641361236572, "learning_rate": 5.894455700865721e-07, "loss": 0.0206, "step": 23300 }, { "epoch": 4.218665219750407, "grad_norm": 0.12851662933826447, "learning_rate": 5.889850801252533e-07, "loss": 0.0284, "step": 23325 }, { "epoch": 4.223186833062036, "grad_norm": 1.1004611253738403, "learning_rate": 5.885245901639344e-07, "loss": 0.0192, "step": 23350 }, { "epoch": 4.227708446373666, "grad_norm": 22.368629455566406, "learning_rate": 5.880641002026155e-07, "loss": 0.0076, "step": 23375 }, { "epoch": 4.232230059685295, "grad_norm": 0.16481293737888336, "learning_rate": 5.876036102412967e-07, "loss": 0.0121, "step": 23400 }, { "epoch": 4.236751672996926, "grad_norm": 0.09105231612920761, "learning_rate": 5.871431202799779e-07, "loss": 0.0055, "step": 23425 }, { "epoch": 4.241273286308555, "grad_norm": 0.06610502302646637, "learning_rate": 5.86682630318659e-07, "loss": 0.0051, "step": 23450 }, { "epoch": 4.245794899620185, "grad_norm": 1.563685417175293, "learning_rate": 5.862221403573402e-07, "loss": 0.0034, "step": 23475 }, { "epoch": 4.250316512931814, "grad_norm": 0.07692436873912811, "learning_rate": 5.857616503960213e-07, "loss": 0.0049, "step": 23500 }, { "epoch": 4.254838126243444, "grad_norm": 0.42418307065963745, "learning_rate": 5.853011604347025e-07, "loss": 0.0021, "step": 23525 }, { "epoch": 4.259359739555073, "grad_norm": 0.6450216174125671, "learning_rate": 5.848406704733837e-07, "loss": 0.0117, "step": 23550 }, { "epoch": 4.263881352866703, "grad_norm": 3.0769999027252197, "learning_rate": 5.843801805120648e-07, "loss": 0.0021, "step": 23575 }, { "epoch": 4.268402966178332, "grad_norm": 0.060876231640577316, "learning_rate": 5.83919690550746e-07, "loss": 0.0241, "step": 23600 }, { "epoch": 4.272924579489962, "grad_norm": 10.53986930847168, "learning_rate": 5.834592005894271e-07, "loss": 0.0232, "step": 23625 }, { "epoch": 4.277446192801592, "grad_norm": 17.494129180908203, "learning_rate": 5.829987106281082e-07, "loss": 0.0222, "step": 23650 }, { "epoch": 4.281967806113221, "grad_norm": 0.1705417037010193, "learning_rate": 5.825382206667895e-07, "loss": 0.0187, "step": 23675 }, { "epoch": 4.286489419424851, "grad_norm": 2.707817316055298, "learning_rate": 5.820777307054707e-07, "loss": 0.0327, "step": 23700 }, { "epoch": 4.29101103273648, "grad_norm": 1.4121204614639282, "learning_rate": 5.816172407441517e-07, "loss": 0.0243, "step": 23725 }, { "epoch": 4.29553264604811, "grad_norm": 7.615264415740967, "learning_rate": 5.811567507828329e-07, "loss": 0.0081, "step": 23750 }, { "epoch": 4.300054259359739, "grad_norm": 0.06863299757242203, "learning_rate": 5.806962608215141e-07, "loss": 0.0112, "step": 23775 }, { "epoch": 4.304575872671369, "grad_norm": 0.018180107697844505, "learning_rate": 5.802357708601952e-07, "loss": 0.0046, "step": 23800 }, { "epoch": 4.309097485982999, "grad_norm": 0.43104514479637146, "learning_rate": 5.797752808988764e-07, "loss": 0.0055, "step": 23825 }, { "epoch": 4.313619099294629, "grad_norm": 0.0967748612165451, "learning_rate": 5.793147909375576e-07, "loss": 0.0046, "step": 23850 }, { "epoch": 4.318140712606258, "grad_norm": 0.15935564041137695, "learning_rate": 5.788543009762386e-07, "loss": 0.0086, "step": 23875 }, { "epoch": 4.322662325917888, "grad_norm": 0.141365185379982, "learning_rate": 5.783938110149198e-07, "loss": 0.0167, "step": 23900 }, { "epoch": 4.327183939229517, "grad_norm": 0.17591165006160736, "learning_rate": 5.779333210536011e-07, "loss": 0.0064, "step": 23925 }, { "epoch": 4.3317055525411465, "grad_norm": 6.465728759765625, "learning_rate": 5.774728310922822e-07, "loss": 0.004, "step": 23950 }, { "epoch": 4.336227165852776, "grad_norm": 0.12863588333129883, "learning_rate": 5.770123411309633e-07, "loss": 0.0163, "step": 23975 }, { "epoch": 4.3407487791644055, "grad_norm": 0.3128308355808258, "learning_rate": 5.765518511696444e-07, "loss": 0.0216, "step": 24000 }, { "epoch": 4.345270392476036, "grad_norm": 6.302773475646973, "learning_rate": 5.760913612083256e-07, "loss": 0.0115, "step": 24025 }, { "epoch": 4.349792005787665, "grad_norm": 0.05059755593538284, "learning_rate": 5.756308712470068e-07, "loss": 0.0064, "step": 24050 }, { "epoch": 4.354313619099295, "grad_norm": 0.18986685574054718, "learning_rate": 5.75170381285688e-07, "loss": 0.0277, "step": 24075 }, { "epoch": 4.358835232410924, "grad_norm": 9.643892288208008, "learning_rate": 5.747098913243691e-07, "loss": 0.0373, "step": 24100 }, { "epoch": 4.363356845722554, "grad_norm": 1.0030667781829834, "learning_rate": 5.742494013630502e-07, "loss": 0.0188, "step": 24125 }, { "epoch": 4.367878459034183, "grad_norm": 0.44967132806777954, "learning_rate": 5.737889114017314e-07, "loss": 0.0095, "step": 24150 }, { "epoch": 4.372400072345813, "grad_norm": 2.169846534729004, "learning_rate": 5.733284214404126e-07, "loss": 0.0168, "step": 24175 }, { "epoch": 4.376921685657442, "grad_norm": 0.01923939771950245, "learning_rate": 5.728679314790938e-07, "loss": 0.0095, "step": 24200 }, { "epoch": 4.381443298969073, "grad_norm": 9.444828033447266, "learning_rate": 5.724074415177748e-07, "loss": 0.0053, "step": 24225 }, { "epoch": 4.385964912280702, "grad_norm": 60.225868225097656, "learning_rate": 5.71946951556456e-07, "loss": 0.0052, "step": 24250 }, { "epoch": 4.3904865255923315, "grad_norm": 5.413934230804443, "learning_rate": 5.714864615951373e-07, "loss": 0.0125, "step": 24275 }, { "epoch": 4.395008138903961, "grad_norm": 0.14292512834072113, "learning_rate": 5.710259716338183e-07, "loss": 0.0211, "step": 24300 }, { "epoch": 4.3995297522155905, "grad_norm": 0.2194162905216217, "learning_rate": 5.705654816724995e-07, "loss": 0.0125, "step": 24325 }, { "epoch": 4.40405136552722, "grad_norm": 0.029263151809573174, "learning_rate": 5.701049917111807e-07, "loss": 0.0045, "step": 24350 }, { "epoch": 4.4085729788388495, "grad_norm": 0.2611495554447174, "learning_rate": 5.696445017498617e-07, "loss": 0.0031, "step": 24375 }, { "epoch": 4.413094592150479, "grad_norm": 0.025238435715436935, "learning_rate": 5.69184011788543e-07, "loss": 0.0155, "step": 24400 }, { "epoch": 4.417616205462108, "grad_norm": 0.043699052184820175, "learning_rate": 5.687235218272242e-07, "loss": 0.0156, "step": 24425 }, { "epoch": 4.422137818773739, "grad_norm": 0.04374052584171295, "learning_rate": 5.682630318659053e-07, "loss": 0.0133, "step": 24450 }, { "epoch": 4.426659432085368, "grad_norm": 3.1334991455078125, "learning_rate": 5.678025419045864e-07, "loss": 0.0383, "step": 24475 }, { "epoch": 4.431181045396998, "grad_norm": 0.17290575802326202, "learning_rate": 5.673420519432676e-07, "loss": 0.0098, "step": 24500 }, { "epoch": 4.435702658708627, "grad_norm": 0.5227589011192322, "learning_rate": 5.668815619819488e-07, "loss": 0.0863, "step": 24525 }, { "epoch": 4.440224272020257, "grad_norm": 1.993817687034607, "learning_rate": 5.664210720206299e-07, "loss": 0.0161, "step": 24550 }, { "epoch": 4.444745885331886, "grad_norm": 0.021811481565237045, "learning_rate": 5.659605820593111e-07, "loss": 0.0101, "step": 24575 }, { "epoch": 4.449267498643516, "grad_norm": 0.055265914648771286, "learning_rate": 5.655000920979922e-07, "loss": 0.0117, "step": 24600 }, { "epoch": 4.453789111955146, "grad_norm": 0.027848972007632256, "learning_rate": 5.650396021366734e-07, "loss": 0.0046, "step": 24625 }, { "epoch": 4.4583107252667755, "grad_norm": 0.613524317741394, "learning_rate": 5.645791121753546e-07, "loss": 0.0079, "step": 24650 }, { "epoch": 4.462832338578405, "grad_norm": 0.329345166683197, "learning_rate": 5.641186222140357e-07, "loss": 0.0096, "step": 24675 }, { "epoch": 4.4673539518900345, "grad_norm": 0.07431361824274063, "learning_rate": 5.636581322527169e-07, "loss": 0.0033, "step": 24700 }, { "epoch": 4.471875565201664, "grad_norm": 0.01413232646882534, "learning_rate": 5.63197642291398e-07, "loss": 0.0145, "step": 24725 }, { "epoch": 4.476397178513293, "grad_norm": 0.03763017803430557, "learning_rate": 5.627371523300791e-07, "loss": 0.0093, "step": 24750 }, { "epoch": 4.480918791824923, "grad_norm": 0.8524808287620544, "learning_rate": 5.622766623687604e-07, "loss": 0.0118, "step": 24775 }, { "epoch": 4.485440405136552, "grad_norm": 0.10143906623125076, "learning_rate": 5.618161724074415e-07, "loss": 0.0203, "step": 24800 }, { "epoch": 4.489962018448182, "grad_norm": 3.6819851398468018, "learning_rate": 5.613556824461226e-07, "loss": 0.0046, "step": 24825 }, { "epoch": 4.494483631759812, "grad_norm": 17.471813201904297, "learning_rate": 5.608951924848038e-07, "loss": 0.0092, "step": 24850 }, { "epoch": 4.499005245071442, "grad_norm": 0.03576793521642685, "learning_rate": 5.604347025234851e-07, "loss": 0.0171, "step": 24875 }, { "epoch": 4.503526858383071, "grad_norm": 1.3747385740280151, "learning_rate": 5.599742125621661e-07, "loss": 0.0433, "step": 24900 }, { "epoch": 4.508048471694701, "grad_norm": 0.658429741859436, "learning_rate": 5.595137226008473e-07, "loss": 0.0272, "step": 24925 }, { "epoch": 4.51257008500633, "grad_norm": 2.7316272258758545, "learning_rate": 5.590532326395284e-07, "loss": 0.0061, "step": 24950 }, { "epoch": 4.51709169831796, "grad_norm": 0.3981630206108093, "learning_rate": 5.585927426782095e-07, "loss": 0.0078, "step": 24975 }, { "epoch": 4.521613311629589, "grad_norm": 0.21992315351963043, "learning_rate": 5.581322527168908e-07, "loss": 0.0076, "step": 25000 }, { "epoch": 4.5261349249412195, "grad_norm": 7.3034796714782715, "learning_rate": 5.576717627555719e-07, "loss": 0.0073, "step": 25025 }, { "epoch": 4.530656538252849, "grad_norm": 0.10995008796453476, "learning_rate": 5.572112727942531e-07, "loss": 0.0064, "step": 25050 }, { "epoch": 4.5351781515644785, "grad_norm": 0.31085583567619324, "learning_rate": 5.567507828329342e-07, "loss": 0.0081, "step": 25075 }, { "epoch": 4.539699764876108, "grad_norm": 0.062460754066705704, "learning_rate": 5.562902928716153e-07, "loss": 0.0158, "step": 25100 }, { "epoch": 4.544221378187737, "grad_norm": 0.5231966376304626, "learning_rate": 5.558298029102966e-07, "loss": 0.0048, "step": 25125 }, { "epoch": 4.548742991499367, "grad_norm": 0.23824147880077362, "learning_rate": 5.553693129489777e-07, "loss": 0.0172, "step": 25150 }, { "epoch": 4.553264604810996, "grad_norm": 0.060738705098629, "learning_rate": 5.549088229876588e-07, "loss": 0.0127, "step": 25175 }, { "epoch": 4.557786218122626, "grad_norm": 0.017272014170885086, "learning_rate": 5.5444833302634e-07, "loss": 0.0124, "step": 25200 }, { "epoch": 4.562307831434255, "grad_norm": 0.1754794865846634, "learning_rate": 5.539878430650211e-07, "loss": 0.0148, "step": 25225 }, { "epoch": 4.566829444745886, "grad_norm": 0.2678035795688629, "learning_rate": 5.535273531037023e-07, "loss": 0.0127, "step": 25250 }, { "epoch": 4.571351058057515, "grad_norm": 10.760842323303223, "learning_rate": 5.530668631423835e-07, "loss": 0.0245, "step": 25275 }, { "epoch": 4.575872671369145, "grad_norm": 0.16525591909885406, "learning_rate": 5.526247927795174e-07, "loss": 0.0371, "step": 25300 }, { "epoch": 4.580394284680774, "grad_norm": 1.5673601627349854, "learning_rate": 5.521643028181985e-07, "loss": 0.0264, "step": 25325 }, { "epoch": 4.584915897992404, "grad_norm": 0.20957525074481964, "learning_rate": 5.517038128568797e-07, "loss": 0.0167, "step": 25350 }, { "epoch": 4.589437511304033, "grad_norm": 0.5806902050971985, "learning_rate": 5.512433228955609e-07, "loss": 0.0187, "step": 25375 }, { "epoch": 4.593959124615663, "grad_norm": 0.487657368183136, "learning_rate": 5.507828329342419e-07, "loss": 0.0049, "step": 25400 }, { "epoch": 4.598480737927293, "grad_norm": 0.2314232587814331, "learning_rate": 5.503223429729231e-07, "loss": 0.0065, "step": 25425 }, { "epoch": 4.603002351238922, "grad_norm": 0.11486486345529556, "learning_rate": 5.498618530116044e-07, "loss": 0.0032, "step": 25450 }, { "epoch": 4.607523964550552, "grad_norm": 0.23516380786895752, "learning_rate": 5.494013630502855e-07, "loss": 0.0088, "step": 25475 }, { "epoch": 4.612045577862181, "grad_norm": 0.26957616209983826, "learning_rate": 5.489408730889666e-07, "loss": 0.011, "step": 25500 }, { "epoch": 4.616567191173811, "grad_norm": 0.9121783375740051, "learning_rate": 5.484803831276478e-07, "loss": 0.0125, "step": 25525 }, { "epoch": 4.62108880448544, "grad_norm": 0.0874478742480278, "learning_rate": 5.480198931663289e-07, "loss": 0.0015, "step": 25550 }, { "epoch": 4.62561041779707, "grad_norm": 0.13613806664943695, "learning_rate": 5.475594032050101e-07, "loss": 0.005, "step": 25575 }, { "epoch": 4.630132031108699, "grad_norm": 12.003376960754395, "learning_rate": 5.470989132436913e-07, "loss": 0.0104, "step": 25600 }, { "epoch": 4.634653644420329, "grad_norm": 0.1077638640999794, "learning_rate": 5.466384232823724e-07, "loss": 0.0035, "step": 25625 }, { "epoch": 4.639175257731958, "grad_norm": 0.20498277246952057, "learning_rate": 5.461779333210536e-07, "loss": 0.0327, "step": 25650 }, { "epoch": 4.643696871043589, "grad_norm": 0.12293770909309387, "learning_rate": 5.457174433597347e-07, "loss": 0.0208, "step": 25675 }, { "epoch": 4.648218484355218, "grad_norm": 0.13977064192295074, "learning_rate": 5.452569533984159e-07, "loss": 0.0279, "step": 25700 }, { "epoch": 4.652740097666848, "grad_norm": 0.14199872314929962, "learning_rate": 5.447964634370971e-07, "loss": 0.0407, "step": 25725 }, { "epoch": 4.657261710978477, "grad_norm": 0.16517263650894165, "learning_rate": 5.443359734757782e-07, "loss": 0.0084, "step": 25750 }, { "epoch": 4.661783324290107, "grad_norm": 4.908987045288086, "learning_rate": 5.438754835144593e-07, "loss": 0.0096, "step": 25775 }, { "epoch": 4.666304937601736, "grad_norm": 2.0926215648651123, "learning_rate": 5.434149935531405e-07, "loss": 0.0069, "step": 25800 }, { "epoch": 4.6708265509133655, "grad_norm": 37.50233840942383, "learning_rate": 5.429545035918217e-07, "loss": 0.0127, "step": 25825 }, { "epoch": 4.675348164224996, "grad_norm": 3.6432156562805176, "learning_rate": 5.424940136305028e-07, "loss": 0.0037, "step": 25850 }, { "epoch": 4.679869777536625, "grad_norm": 0.005684335716068745, "learning_rate": 5.42033523669184e-07, "loss": 0.016, "step": 25875 }, { "epoch": 4.684391390848255, "grad_norm": 0.06579804420471191, "learning_rate": 5.415730337078652e-07, "loss": 0.0046, "step": 25900 }, { "epoch": 4.688913004159884, "grad_norm": 18.67939567565918, "learning_rate": 5.411125437465462e-07, "loss": 0.0067, "step": 25925 }, { "epoch": 4.693434617471514, "grad_norm": 0.035297270864248276, "learning_rate": 5.406520537852275e-07, "loss": 0.0092, "step": 25950 }, { "epoch": 4.697956230783143, "grad_norm": 0.08447632938623428, "learning_rate": 5.401915638239087e-07, "loss": 0.0152, "step": 25975 }, { "epoch": 4.702477844094773, "grad_norm": 0.2718825042247772, "learning_rate": 5.397310738625897e-07, "loss": 0.0267, "step": 26000 }, { "epoch": 4.706999457406402, "grad_norm": 0.02587636187672615, "learning_rate": 5.392705839012709e-07, "loss": 0.0032, "step": 26025 }, { "epoch": 4.711521070718032, "grad_norm": 0.295260488986969, "learning_rate": 5.388100939399522e-07, "loss": 0.0249, "step": 26050 }, { "epoch": 4.716042684029662, "grad_norm": 0.10036912560462952, "learning_rate": 5.383496039786333e-07, "loss": 0.0248, "step": 26075 }, { "epoch": 4.720564297341292, "grad_norm": 19.557443618774414, "learning_rate": 5.378891140173144e-07, "loss": 0.0269, "step": 26100 }, { "epoch": 4.725085910652921, "grad_norm": 13.175033569335938, "learning_rate": 5.374286240559955e-07, "loss": 0.0286, "step": 26125 }, { "epoch": 4.7296075239645505, "grad_norm": 28.173160552978516, "learning_rate": 5.369681340946767e-07, "loss": 0.0069, "step": 26150 }, { "epoch": 4.73412913727618, "grad_norm": 0.07096391171216965, "learning_rate": 5.365076441333579e-07, "loss": 0.0089, "step": 26175 }, { "epoch": 4.7386507505878095, "grad_norm": 0.07818924635648727, "learning_rate": 5.36047154172039e-07, "loss": 0.004, "step": 26200 }, { "epoch": 4.743172363899439, "grad_norm": 2.088768243789673, "learning_rate": 5.355866642107202e-07, "loss": 0.0118, "step": 26225 }, { "epoch": 4.747693977211069, "grad_norm": 0.3656996488571167, "learning_rate": 5.351261742494013e-07, "loss": 0.0029, "step": 26250 }, { "epoch": 4.752215590522699, "grad_norm": 0.016400739550590515, "learning_rate": 5.346656842880824e-07, "loss": 0.0068, "step": 26275 }, { "epoch": 4.756737203834328, "grad_norm": 0.023505523800849915, "learning_rate": 5.342051943267637e-07, "loss": 0.0053, "step": 26300 }, { "epoch": 4.761258817145958, "grad_norm": 0.033994659781455994, "learning_rate": 5.337447043654449e-07, "loss": 0.0084, "step": 26325 }, { "epoch": 4.765780430457587, "grad_norm": 0.4282836318016052, "learning_rate": 5.332842144041259e-07, "loss": 0.0178, "step": 26350 }, { "epoch": 4.770302043769217, "grad_norm": 0.9596586227416992, "learning_rate": 5.328237244428071e-07, "loss": 0.0055, "step": 26375 }, { "epoch": 4.774823657080846, "grad_norm": 0.07100539654493332, "learning_rate": 5.323632344814883e-07, "loss": 0.0035, "step": 26400 }, { "epoch": 4.779345270392476, "grad_norm": 20.814708709716797, "learning_rate": 5.319027445201694e-07, "loss": 0.0018, "step": 26425 }, { "epoch": 4.783866883704105, "grad_norm": 0.1001884788274765, "learning_rate": 5.314422545588506e-07, "loss": 0.0169, "step": 26450 }, { "epoch": 4.7883884970157355, "grad_norm": 10.18614387512207, "learning_rate": 5.309817645975318e-07, "loss": 0.0833, "step": 26475 }, { "epoch": 4.792910110327365, "grad_norm": 5.15158224105835, "learning_rate": 5.305212746362128e-07, "loss": 0.0395, "step": 26500 }, { "epoch": 4.7974317236389945, "grad_norm": 0.10631446540355682, "learning_rate": 5.30060784674894e-07, "loss": 0.019, "step": 26525 }, { "epoch": 4.801953336950624, "grad_norm": 0.15538524091243744, "learning_rate": 5.296002947135753e-07, "loss": 0.0075, "step": 26550 }, { "epoch": 4.8064749502622535, "grad_norm": 1.2634289264678955, "learning_rate": 5.291398047522564e-07, "loss": 0.0111, "step": 26575 }, { "epoch": 4.810996563573883, "grad_norm": 0.11970185488462448, "learning_rate": 5.286793147909375e-07, "loss": 0.009, "step": 26600 }, { "epoch": 4.815518176885512, "grad_norm": 3.1821625232696533, "learning_rate": 5.282188248296187e-07, "loss": 0.008, "step": 26625 }, { "epoch": 4.820039790197143, "grad_norm": 1.2722951173782349, "learning_rate": 5.277583348682998e-07, "loss": 0.0083, "step": 26650 }, { "epoch": 4.824561403508772, "grad_norm": 0.16242074966430664, "learning_rate": 5.27297844906981e-07, "loss": 0.0093, "step": 26675 }, { "epoch": 4.829083016820402, "grad_norm": 0.08178609609603882, "learning_rate": 5.268373549456622e-07, "loss": 0.0163, "step": 26700 }, { "epoch": 4.833604630132031, "grad_norm": 0.05239911004900932, "learning_rate": 5.263768649843433e-07, "loss": 0.0047, "step": 26725 }, { "epoch": 4.838126243443661, "grad_norm": 0.11282465606927872, "learning_rate": 5.259163750230244e-07, "loss": 0.0057, "step": 26750 }, { "epoch": 4.84264785675529, "grad_norm": 0.11248558014631271, "learning_rate": 5.254558850617057e-07, "loss": 0.011, "step": 26775 }, { "epoch": 4.84716947006692, "grad_norm": 0.020478971302509308, "learning_rate": 5.249953951003868e-07, "loss": 0.013, "step": 26800 }, { "epoch": 4.851691083378549, "grad_norm": 0.0279947929084301, "learning_rate": 5.24534905139068e-07, "loss": 0.016, "step": 26825 }, { "epoch": 4.856212696690179, "grad_norm": 2.424943208694458, "learning_rate": 5.24074415177749e-07, "loss": 0.0214, "step": 26850 }, { "epoch": 4.860734310001809, "grad_norm": 0.05842322111129761, "learning_rate": 5.236139252164302e-07, "loss": 0.0067, "step": 26875 }, { "epoch": 4.8652559233134385, "grad_norm": 11.81139087677002, "learning_rate": 5.231534352551115e-07, "loss": 0.023, "step": 26900 }, { "epoch": 4.869777536625068, "grad_norm": 4.321495056152344, "learning_rate": 5.226929452937926e-07, "loss": 0.0276, "step": 26925 }, { "epoch": 4.874299149936697, "grad_norm": 26.13640022277832, "learning_rate": 5.222324553324737e-07, "loss": 0.0133, "step": 26950 }, { "epoch": 4.878820763248327, "grad_norm": 0.09919006377458572, "learning_rate": 5.217719653711549e-07, "loss": 0.01, "step": 26975 }, { "epoch": 4.883342376559956, "grad_norm": 2.89017653465271, "learning_rate": 5.21311475409836e-07, "loss": 0.0199, "step": 27000 }, { "epoch": 4.887863989871586, "grad_norm": 0.1923578828573227, "learning_rate": 5.208509854485172e-07, "loss": 0.0055, "step": 27025 }, { "epoch": 4.892385603183216, "grad_norm": 0.05905037745833397, "learning_rate": 5.203904954871984e-07, "loss": 0.007, "step": 27050 }, { "epoch": 4.896907216494846, "grad_norm": 4.7976837158203125, "learning_rate": 5.199300055258795e-07, "loss": 0.0095, "step": 27075 }, { "epoch": 4.901428829806475, "grad_norm": 0.5176482200622559, "learning_rate": 5.194695155645606e-07, "loss": 0.0068, "step": 27100 }, { "epoch": 4.905950443118105, "grad_norm": 0.15327677130699158, "learning_rate": 5.190090256032418e-07, "loss": 0.0016, "step": 27125 }, { "epoch": 4.910472056429734, "grad_norm": 23.123516082763672, "learning_rate": 5.18548535641923e-07, "loss": 0.0126, "step": 27150 }, { "epoch": 4.914993669741364, "grad_norm": 5.405014514923096, "learning_rate": 5.180880456806041e-07, "loss": 0.0063, "step": 27175 }, { "epoch": 4.919515283052993, "grad_norm": 0.03432038426399231, "learning_rate": 5.176275557192853e-07, "loss": 0.0085, "step": 27200 }, { "epoch": 4.924036896364623, "grad_norm": 0.012576217763125896, "learning_rate": 5.171670657579664e-07, "loss": 0.0089, "step": 27225 }, { "epoch": 4.928558509676252, "grad_norm": 0.03322821483016014, "learning_rate": 5.167065757966476e-07, "loss": 0.0117, "step": 27250 }, { "epoch": 4.9330801229878825, "grad_norm": 0.14308449625968933, "learning_rate": 5.162460858353288e-07, "loss": 0.0083, "step": 27275 }, { "epoch": 4.937601736299512, "grad_norm": 39.83473205566406, "learning_rate": 5.157855958740099e-07, "loss": 0.0412, "step": 27300 }, { "epoch": 4.942123349611141, "grad_norm": 0.15658709406852722, "learning_rate": 5.153251059126911e-07, "loss": 0.048, "step": 27325 }, { "epoch": 4.946644962922771, "grad_norm": 0.5169302821159363, "learning_rate": 5.148646159513722e-07, "loss": 0.0125, "step": 27350 }, { "epoch": 4.9511665762344, "grad_norm": 0.021764138713479042, "learning_rate": 5.144041259900533e-07, "loss": 0.0044, "step": 27375 }, { "epoch": 4.95568818954603, "grad_norm": 0.2565416395664215, "learning_rate": 5.139436360287346e-07, "loss": 0.0134, "step": 27400 }, { "epoch": 4.960209802857659, "grad_norm": 3.5746593475341797, "learning_rate": 5.134831460674158e-07, "loss": 0.0144, "step": 27425 }, { "epoch": 4.96473141616929, "grad_norm": 1.1976155042648315, "learning_rate": 5.130226561060968e-07, "loss": 0.0015, "step": 27450 }, { "epoch": 4.969253029480919, "grad_norm": 1.560697078704834, "learning_rate": 5.12562166144778e-07, "loss": 0.0105, "step": 27475 }, { "epoch": 4.973774642792549, "grad_norm": 0.21413296461105347, "learning_rate": 5.121016761834593e-07, "loss": 0.0086, "step": 27500 }, { "epoch": 4.978296256104178, "grad_norm": 0.1948905736207962, "learning_rate": 5.116411862221403e-07, "loss": 0.0142, "step": 27525 }, { "epoch": 4.982817869415808, "grad_norm": 0.0875588059425354, "learning_rate": 5.111806962608215e-07, "loss": 0.0086, "step": 27550 }, { "epoch": 4.987339482727437, "grad_norm": 0.18821197748184204, "learning_rate": 5.107202062995027e-07, "loss": 0.0343, "step": 27575 }, { "epoch": 4.991861096039067, "grad_norm": 3.243891954421997, "learning_rate": 5.102597163381837e-07, "loss": 0.0066, "step": 27600 }, { "epoch": 4.996382709350696, "grad_norm": 0.04505661129951477, "learning_rate": 5.09799226376865e-07, "loss": 0.0273, "step": 27625 }, { "epoch": 5.0, "eval_loss": 0.32219141721725464, "eval_runtime": 8558.7304, "eval_samples_per_second": 1.109, "eval_steps_per_second": 0.139, "eval_wer": 0.1037636130685458, "step": 27645 }, { "epoch": 5.0009043226623255, "grad_norm": 3.0571129322052, "learning_rate": 5.093387364155462e-07, "loss": 0.0432, "step": 27650 }, { "epoch": 5.005425935973956, "grad_norm": 0.28156787157058716, "learning_rate": 5.088782464542273e-07, "loss": 0.015, "step": 27675 }, { "epoch": 5.009947549285585, "grad_norm": 1.9075210094451904, "learning_rate": 5.084177564929084e-07, "loss": 0.0056, "step": 27700 }, { "epoch": 5.014469162597215, "grad_norm": 0.8747662305831909, "learning_rate": 5.079572665315896e-07, "loss": 0.0115, "step": 27725 }, { "epoch": 5.018990775908844, "grad_norm": 0.4982765316963196, "learning_rate": 5.074967765702708e-07, "loss": 0.0029, "step": 27750 }, { "epoch": 5.023512389220474, "grad_norm": 0.06921929121017456, "learning_rate": 5.070362866089519e-07, "loss": 0.0133, "step": 27775 }, { "epoch": 5.028034002532103, "grad_norm": 0.3169139623641968, "learning_rate": 5.06575796647633e-07, "loss": 0.0059, "step": 27800 }, { "epoch": 5.032555615843733, "grad_norm": 0.11565407365560532, "learning_rate": 5.061153066863142e-07, "loss": 0.0092, "step": 27825 }, { "epoch": 5.037077229155362, "grad_norm": 0.03337372466921806, "learning_rate": 5.056548167249953e-07, "loss": 0.0045, "step": 27850 }, { "epoch": 5.041598842466993, "grad_norm": 0.03493885695934296, "learning_rate": 5.051943267636765e-07, "loss": 0.0024, "step": 27875 }, { "epoch": 5.046120455778622, "grad_norm": 0.018299061805009842, "learning_rate": 5.047338368023577e-07, "loss": 0.0072, "step": 27900 }, { "epoch": 5.050642069090252, "grad_norm": 0.10773641616106033, "learning_rate": 5.042733468410389e-07, "loss": 0.0122, "step": 27925 }, { "epoch": 5.055163682401881, "grad_norm": 0.8263195157051086, "learning_rate": 5.038128568797199e-07, "loss": 0.0025, "step": 27950 }, { "epoch": 5.0596852957135106, "grad_norm": 0.03416213020682335, "learning_rate": 5.033523669184011e-07, "loss": 0.0017, "step": 27975 }, { "epoch": 5.06420690902514, "grad_norm": 0.05288705974817276, "learning_rate": 5.028918769570824e-07, "loss": 0.0196, "step": 28000 }, { "epoch": 5.0687285223367695, "grad_norm": 1.1040371656417847, "learning_rate": 5.024313869957634e-07, "loss": 0.0162, "step": 28025 }, { "epoch": 5.073250135648399, "grad_norm": 1.0380799770355225, "learning_rate": 5.019708970344446e-07, "loss": 0.0872, "step": 28050 }, { "epoch": 5.077771748960029, "grad_norm": 1.2944865226745605, "learning_rate": 5.015104070731258e-07, "loss": 0.0196, "step": 28075 }, { "epoch": 5.082293362271659, "grad_norm": 0.10234280675649643, "learning_rate": 5.010499171118068e-07, "loss": 0.0039, "step": 28100 }, { "epoch": 5.086814975583288, "grad_norm": 0.2737126350402832, "learning_rate": 5.005894271504881e-07, "loss": 0.0138, "step": 28125 }, { "epoch": 5.091336588894918, "grad_norm": 0.04862267151474953, "learning_rate": 5.001289371891693e-07, "loss": 0.0084, "step": 28150 }, { "epoch": 5.095858202206547, "grad_norm": 0.12347660213708878, "learning_rate": 4.996684472278504e-07, "loss": 0.0039, "step": 28175 }, { "epoch": 5.100379815518177, "grad_norm": 1.5366371870040894, "learning_rate": 4.992079572665316e-07, "loss": 0.0024, "step": 28200 }, { "epoch": 5.104901428829806, "grad_norm": 0.11356142163276672, "learning_rate": 4.987474673052128e-07, "loss": 0.0069, "step": 28225 }, { "epoch": 5.109423042141436, "grad_norm": 0.019394779577851295, "learning_rate": 4.982869773438938e-07, "loss": 0.0026, "step": 28250 }, { "epoch": 5.113944655453066, "grad_norm": 12.068894386291504, "learning_rate": 4.97826487382575e-07, "loss": 0.0064, "step": 28275 }, { "epoch": 5.118466268764696, "grad_norm": 3.972506523132324, "learning_rate": 4.973659974212562e-07, "loss": 0.004, "step": 28300 }, { "epoch": 5.122987882076325, "grad_norm": 0.024081800132989883, "learning_rate": 4.969055074599373e-07, "loss": 0.0079, "step": 28325 }, { "epoch": 5.1275094953879545, "grad_norm": 0.2852739095687866, "learning_rate": 4.964450174986185e-07, "loss": 0.0111, "step": 28350 }, { "epoch": 5.132031108699584, "grad_norm": 0.1766853779554367, "learning_rate": 4.959845275372997e-07, "loss": 0.0042, "step": 28375 }, { "epoch": 5.1365527220112135, "grad_norm": 0.1434965282678604, "learning_rate": 4.955240375759808e-07, "loss": 0.0018, "step": 28400 }, { "epoch": 5.141074335322843, "grad_norm": 15.574809074401855, "learning_rate": 4.95063547614662e-07, "loss": 0.0219, "step": 28425 }, { "epoch": 5.1455959486344724, "grad_norm": 0.07658454775810242, "learning_rate": 4.946030576533432e-07, "loss": 0.0361, "step": 28450 }, { "epoch": 5.150117561946102, "grad_norm": 2.8722949028015137, "learning_rate": 4.941425676920243e-07, "loss": 0.0081, "step": 28475 }, { "epoch": 5.154639175257732, "grad_norm": 0.16390861570835114, "learning_rate": 4.936820777307055e-07, "loss": 0.0024, "step": 28500 }, { "epoch": 5.159160788569362, "grad_norm": 0.5398291945457458, "learning_rate": 4.932215877693866e-07, "loss": 0.0106, "step": 28525 }, { "epoch": 5.163682401880991, "grad_norm": 10.156366348266602, "learning_rate": 4.927610978080677e-07, "loss": 0.0137, "step": 28550 }, { "epoch": 5.168204015192621, "grad_norm": 0.013218329288065434, "learning_rate": 4.923006078467489e-07, "loss": 0.0104, "step": 28575 }, { "epoch": 5.17272562850425, "grad_norm": 8.923846244812012, "learning_rate": 4.9184011788543e-07, "loss": 0.0106, "step": 28600 }, { "epoch": 5.17724724181588, "grad_norm": 0.07841784507036209, "learning_rate": 4.913796279241112e-07, "loss": 0.0065, "step": 28625 }, { "epoch": 5.181768855127509, "grad_norm": 0.3685607314109802, "learning_rate": 4.909191379627924e-07, "loss": 0.0118, "step": 28650 }, { "epoch": 5.186290468439139, "grad_norm": 1.6632095575332642, "learning_rate": 4.904586480014736e-07, "loss": 0.0019, "step": 28675 }, { "epoch": 5.190812081750769, "grad_norm": 0.10080606490373611, "learning_rate": 4.899981580401547e-07, "loss": 0.0027, "step": 28700 }, { "epoch": 5.1953336950623985, "grad_norm": 4.444336891174316, "learning_rate": 4.895376680788359e-07, "loss": 0.0029, "step": 28725 }, { "epoch": 5.199855308374028, "grad_norm": 5.826324939727783, "learning_rate": 4.89077178117517e-07, "loss": 0.0058, "step": 28750 }, { "epoch": 5.2043769216856575, "grad_norm": 0.09290173649787903, "learning_rate": 4.886166881561982e-07, "loss": 0.0015, "step": 28775 }, { "epoch": 5.208898534997287, "grad_norm": 0.0847366601228714, "learning_rate": 4.881561981948794e-07, "loss": 0.0143, "step": 28800 }, { "epoch": 5.213420148308916, "grad_norm": 0.4414973855018616, "learning_rate": 4.876957082335604e-07, "loss": 0.012, "step": 28825 }, { "epoch": 5.217941761620546, "grad_norm": 1.8146724700927734, "learning_rate": 4.872352182722416e-07, "loss": 0.0213, "step": 28850 }, { "epoch": 5.222463374932175, "grad_norm": 0.3021218478679657, "learning_rate": 4.867747283109228e-07, "loss": 0.0052, "step": 28875 }, { "epoch": 5.226984988243806, "grad_norm": 0.32385706901550293, "learning_rate": 4.863142383496039e-07, "loss": 0.006, "step": 28900 }, { "epoch": 5.231506601555435, "grad_norm": 0.3742549419403076, "learning_rate": 4.858537483882851e-07, "loss": 0.0088, "step": 28925 }, { "epoch": 5.236028214867065, "grad_norm": 0.1462940126657486, "learning_rate": 4.853932584269663e-07, "loss": 0.003, "step": 28950 }, { "epoch": 5.240549828178694, "grad_norm": 0.248609721660614, "learning_rate": 4.849327684656474e-07, "loss": 0.0124, "step": 28975 }, { "epoch": 5.245071441490324, "grad_norm": 0.13235850632190704, "learning_rate": 4.844722785043286e-07, "loss": 0.0111, "step": 29000 }, { "epoch": 5.249593054801953, "grad_norm": 0.1916670799255371, "learning_rate": 4.840117885430098e-07, "loss": 0.0034, "step": 29025 }, { "epoch": 5.254114668113583, "grad_norm": 0.0536821186542511, "learning_rate": 4.835512985816909e-07, "loss": 0.0025, "step": 29050 }, { "epoch": 5.258636281425212, "grad_norm": 0.03594660758972168, "learning_rate": 4.83090808620372e-07, "loss": 0.0005, "step": 29075 }, { "epoch": 5.2631578947368425, "grad_norm": 0.012173148803412914, "learning_rate": 4.826303186590533e-07, "loss": 0.0037, "step": 29100 }, { "epoch": 5.267679508048472, "grad_norm": 0.033210985362529755, "learning_rate": 4.821698286977343e-07, "loss": 0.0014, "step": 29125 }, { "epoch": 5.272201121360101, "grad_norm": 0.131244495511055, "learning_rate": 4.817093387364155e-07, "loss": 0.0023, "step": 29150 }, { "epoch": 5.276722734671731, "grad_norm": 6.246963977813721, "learning_rate": 4.812488487750967e-07, "loss": 0.002, "step": 29175 }, { "epoch": 5.28124434798336, "grad_norm": 0.4974225163459778, "learning_rate": 4.807883588137778e-07, "loss": 0.0056, "step": 29200 }, { "epoch": 5.28576596129499, "grad_norm": 16.300535202026367, "learning_rate": 4.80327868852459e-07, "loss": 0.0446, "step": 29225 }, { "epoch": 5.290287574606619, "grad_norm": 2.680767059326172, "learning_rate": 4.798673788911402e-07, "loss": 0.0483, "step": 29250 }, { "epoch": 5.294809187918249, "grad_norm": 8.133724212646484, "learning_rate": 4.794068889298213e-07, "loss": 0.0121, "step": 29275 }, { "epoch": 5.299330801229879, "grad_norm": 0.09077363461256027, "learning_rate": 4.789463989685025e-07, "loss": 0.0048, "step": 29300 }, { "epoch": 5.303852414541509, "grad_norm": 36.61612319946289, "learning_rate": 4.784859090071836e-07, "loss": 0.0182, "step": 29325 }, { "epoch": 5.308374027853138, "grad_norm": 6.475874900817871, "learning_rate": 4.780254190458648e-07, "loss": 0.0075, "step": 29350 }, { "epoch": 5.312895641164768, "grad_norm": 0.09107718616724014, "learning_rate": 4.775649290845459e-07, "loss": 0.003, "step": 29375 }, { "epoch": 5.317417254476397, "grad_norm": 0.0676039382815361, "learning_rate": 4.771044391232271e-07, "loss": 0.0139, "step": 29400 }, { "epoch": 5.321938867788027, "grad_norm": 0.01544503029435873, "learning_rate": 4.7664394916190827e-07, "loss": 0.0118, "step": 29425 }, { "epoch": 5.326460481099656, "grad_norm": 65.15055847167969, "learning_rate": 4.761834592005894e-07, "loss": 0.0057, "step": 29450 }, { "epoch": 5.330982094411286, "grad_norm": 0.08361693471670151, "learning_rate": 4.7572296923927055e-07, "loss": 0.0023, "step": 29475 }, { "epoch": 5.335503707722916, "grad_norm": 0.10150730609893799, "learning_rate": 4.752624792779517e-07, "loss": 0.0152, "step": 29500 }, { "epoch": 5.340025321034545, "grad_norm": 0.7418703436851501, "learning_rate": 4.748019893166329e-07, "loss": 0.0181, "step": 29525 }, { "epoch": 5.344546934346175, "grad_norm": 0.09599591046571732, "learning_rate": 4.7434149935531405e-07, "loss": 0.0076, "step": 29550 }, { "epoch": 5.349068547657804, "grad_norm": 0.28607824444770813, "learning_rate": 4.7388100939399516e-07, "loss": 0.0053, "step": 29575 }, { "epoch": 5.353590160969434, "grad_norm": 0.02649850957095623, "learning_rate": 4.7342051943267633e-07, "loss": 0.0114, "step": 29600 }, { "epoch": 5.358111774281063, "grad_norm": 19.19407081604004, "learning_rate": 4.729600294713575e-07, "loss": 0.0629, "step": 29625 }, { "epoch": 5.362633387592693, "grad_norm": 0.16849057376384735, "learning_rate": 4.7251795910849143e-07, "loss": 0.0308, "step": 29650 }, { "epoch": 5.367155000904322, "grad_norm": 4.074181079864502, "learning_rate": 4.720574691471726e-07, "loss": 0.0024, "step": 29675 }, { "epoch": 5.371676614215953, "grad_norm": 0.3396848142147064, "learning_rate": 4.715969791858537e-07, "loss": 0.019, "step": 29700 }, { "epoch": 5.376198227527582, "grad_norm": 1.8326916694641113, "learning_rate": 4.7113648922453487e-07, "loss": 0.0116, "step": 29725 }, { "epoch": 5.380719840839212, "grad_norm": 1.045644760131836, "learning_rate": 4.7067599926321604e-07, "loss": 0.0014, "step": 29750 }, { "epoch": 5.385241454150841, "grad_norm": 0.034128542989492416, "learning_rate": 4.702155093018972e-07, "loss": 0.0032, "step": 29775 }, { "epoch": 5.389763067462471, "grad_norm": 0.24559779465198517, "learning_rate": 4.6975501934057837e-07, "loss": 0.0092, "step": 29800 }, { "epoch": 5.3942846807741, "grad_norm": 0.08045164495706558, "learning_rate": 4.692945293792595e-07, "loss": 0.0039, "step": 29825 }, { "epoch": 5.3988062940857295, "grad_norm": 0.2354724407196045, "learning_rate": 4.6883403941794065e-07, "loss": 0.0062, "step": 29850 }, { "epoch": 5.403327907397359, "grad_norm": 0.02797023393213749, "learning_rate": 4.683735494566218e-07, "loss": 0.0071, "step": 29875 }, { "epoch": 5.407849520708989, "grad_norm": 2.4346461296081543, "learning_rate": 4.67913059495303e-07, "loss": 0.005, "step": 29900 }, { "epoch": 5.412371134020619, "grad_norm": 0.08410675823688507, "learning_rate": 4.6745256953398415e-07, "loss": 0.0008, "step": 29925 }, { "epoch": 5.416892747332248, "grad_norm": 1.741323709487915, "learning_rate": 4.6699207957266526e-07, "loss": 0.0129, "step": 29950 }, { "epoch": 5.421414360643878, "grad_norm": 0.08235138654708862, "learning_rate": 4.665315896113465e-07, "loss": 0.0283, "step": 29975 }, { "epoch": 5.425935973955507, "grad_norm": 20.520862579345703, "learning_rate": 4.660710996500276e-07, "loss": 0.0526, "step": 30000 }, { "epoch": 5.430457587267137, "grad_norm": 0.11165034025907516, "learning_rate": 4.6561060968870876e-07, "loss": 0.0368, "step": 30025 }, { "epoch": 5.434979200578766, "grad_norm": 2.317666530609131, "learning_rate": 4.6515011972738993e-07, "loss": 0.0265, "step": 30050 }, { "epoch": 5.439500813890396, "grad_norm": 9.61573314666748, "learning_rate": 4.6468962976607104e-07, "loss": 0.0167, "step": 30075 }, { "epoch": 5.444022427202025, "grad_norm": 0.6778357028961182, "learning_rate": 4.6422913980475226e-07, "loss": 0.0099, "step": 30100 }, { "epoch": 5.448544040513656, "grad_norm": 9.778840065002441, "learning_rate": 4.637686498434334e-07, "loss": 0.0052, "step": 30125 }, { "epoch": 5.453065653825285, "grad_norm": 1.3489108085632324, "learning_rate": 4.6330815988211454e-07, "loss": 0.0032, "step": 30150 }, { "epoch": 5.4575872671369146, "grad_norm": 0.05276188254356384, "learning_rate": 4.628476699207957e-07, "loss": 0.0079, "step": 30175 }, { "epoch": 5.462108880448544, "grad_norm": 2.3165993690490723, "learning_rate": 4.6238717995947687e-07, "loss": 0.0239, "step": 30200 }, { "epoch": 5.4666304937601735, "grad_norm": 0.06526318937540054, "learning_rate": 4.6192668999815804e-07, "loss": 0.0006, "step": 30225 }, { "epoch": 5.471152107071803, "grad_norm": 12.61938190460205, "learning_rate": 4.6146620003683915e-07, "loss": 0.0061, "step": 30250 }, { "epoch": 5.4756737203834325, "grad_norm": 1.8737666606903076, "learning_rate": 4.6100571007552037e-07, "loss": 0.0126, "step": 30275 }, { "epoch": 5.480195333695063, "grad_norm": 0.7588403820991516, "learning_rate": 4.605452201142015e-07, "loss": 0.0029, "step": 30300 }, { "epoch": 5.484716947006692, "grad_norm": 0.04945962131023407, "learning_rate": 4.6008473015288265e-07, "loss": 0.0232, "step": 30325 }, { "epoch": 5.489238560318322, "grad_norm": 0.03633030131459236, "learning_rate": 4.596242401915638e-07, "loss": 0.0049, "step": 30350 }, { "epoch": 5.493760173629951, "grad_norm": 0.049966856837272644, "learning_rate": 4.5916375023024493e-07, "loss": 0.0095, "step": 30375 }, { "epoch": 5.498281786941581, "grad_norm": 0.017245082184672356, "learning_rate": 4.5870326026892615e-07, "loss": 0.0172, "step": 30400 }, { "epoch": 5.50280340025321, "grad_norm": 4.209415435791016, "learning_rate": 4.5824277030760726e-07, "loss": 0.0076, "step": 30425 }, { "epoch": 5.50732501356484, "grad_norm": 0.1449451893568039, "learning_rate": 4.5778228034628843e-07, "loss": 0.0268, "step": 30450 }, { "epoch": 5.511846626876469, "grad_norm": 6.67424201965332, "learning_rate": 4.573217903849696e-07, "loss": 0.0155, "step": 30475 }, { "epoch": 5.516368240188099, "grad_norm": 0.1047978401184082, "learning_rate": 4.568613004236507e-07, "loss": 0.003, "step": 30500 }, { "epoch": 5.520889853499729, "grad_norm": 11.987727165222168, "learning_rate": 4.5640081046233193e-07, "loss": 0.006, "step": 30525 }, { "epoch": 5.5254114668113585, "grad_norm": 0.01995599828660488, "learning_rate": 4.5594032050101304e-07, "loss": 0.0024, "step": 30550 }, { "epoch": 5.529933080122988, "grad_norm": 0.24483761191368103, "learning_rate": 4.554798305396942e-07, "loss": 0.0054, "step": 30575 }, { "epoch": 5.5344546934346175, "grad_norm": 0.03671187534928322, "learning_rate": 4.550193405783754e-07, "loss": 0.014, "step": 30600 }, { "epoch": 5.538976306746247, "grad_norm": 13.608981132507324, "learning_rate": 4.545588506170565e-07, "loss": 0.0025, "step": 30625 }, { "epoch": 5.5434979200578764, "grad_norm": 68.79994201660156, "learning_rate": 4.540983606557377e-07, "loss": 0.0027, "step": 30650 }, { "epoch": 5.548019533369506, "grad_norm": 0.06913666427135468, "learning_rate": 4.536378706944188e-07, "loss": 0.0039, "step": 30675 }, { "epoch": 5.552541146681136, "grad_norm": 0.04184752330183983, "learning_rate": 4.5317738073310004e-07, "loss": 0.0105, "step": 30700 }, { "epoch": 5.557062759992766, "grad_norm": 22.15312957763672, "learning_rate": 4.5271689077178115e-07, "loss": 0.0089, "step": 30725 }, { "epoch": 5.561584373304395, "grad_norm": 0.17298342287540436, "learning_rate": 4.5225640081046227e-07, "loss": 0.0253, "step": 30750 }, { "epoch": 5.566105986616025, "grad_norm": 17.002382278442383, "learning_rate": 4.517959108491435e-07, "loss": 0.024, "step": 30775 }, { "epoch": 5.570627599927654, "grad_norm": 0.07881984114646912, "learning_rate": 4.513354208878246e-07, "loss": 0.0185, "step": 30800 }, { "epoch": 5.575149213239284, "grad_norm": 1.0953749418258667, "learning_rate": 4.508749309265058e-07, "loss": 0.0238, "step": 30825 }, { "epoch": 5.579670826550913, "grad_norm": 0.5996679067611694, "learning_rate": 4.5041444096518693e-07, "loss": 0.0408, "step": 30850 }, { "epoch": 5.584192439862543, "grad_norm": 0.037622638046741486, "learning_rate": 4.499539510038681e-07, "loss": 0.0082, "step": 30875 }, { "epoch": 5.588714053174172, "grad_norm": 0.8418008685112, "learning_rate": 4.4949346104254926e-07, "loss": 0.0049, "step": 30900 }, { "epoch": 5.5932356664858025, "grad_norm": 0.01251909602433443, "learning_rate": 4.490329710812304e-07, "loss": 0.0055, "step": 30925 }, { "epoch": 5.597757279797432, "grad_norm": 2.921856641769409, "learning_rate": 4.485724811199116e-07, "loss": 0.0089, "step": 30950 }, { "epoch": 5.6022788931090615, "grad_norm": 45.41884231567383, "learning_rate": 4.481119911585927e-07, "loss": 0.0098, "step": 30975 }, { "epoch": 5.606800506420691, "grad_norm": 11.943808555603027, "learning_rate": 4.4765150119727393e-07, "loss": 0.0201, "step": 31000 }, { "epoch": 5.61132211973232, "grad_norm": 0.03331173211336136, "learning_rate": 4.4719101123595504e-07, "loss": 0.0075, "step": 31025 }, { "epoch": 5.61584373304395, "grad_norm": 19.69361686706543, "learning_rate": 4.4673052127463615e-07, "loss": 0.011, "step": 31050 }, { "epoch": 5.620365346355579, "grad_norm": 1.9068259000778198, "learning_rate": 4.4627003131331737e-07, "loss": 0.0067, "step": 31075 }, { "epoch": 5.62488695966721, "grad_norm": 0.019552985206246376, "learning_rate": 4.458095413519985e-07, "loss": 0.0192, "step": 31100 }, { "epoch": 5.629408572978839, "grad_norm": 0.02558848075568676, "learning_rate": 4.453490513906797e-07, "loss": 0.0064, "step": 31125 }, { "epoch": 5.633930186290469, "grad_norm": 0.023367932066321373, "learning_rate": 4.448885614293608e-07, "loss": 0.0093, "step": 31150 }, { "epoch": 5.638451799602098, "grad_norm": 0.04951472207903862, "learning_rate": 4.4442807146804193e-07, "loss": 0.0056, "step": 31175 }, { "epoch": 5.642973412913728, "grad_norm": 0.042895007878541946, "learning_rate": 4.4396758150672315e-07, "loss": 0.0086, "step": 31200 }, { "epoch": 5.647495026225357, "grad_norm": 3.2334864139556885, "learning_rate": 4.4350709154540426e-07, "loss": 0.0169, "step": 31225 }, { "epoch": 5.652016639536987, "grad_norm": 4.943774700164795, "learning_rate": 4.430466015840855e-07, "loss": 0.0401, "step": 31250 }, { "epoch": 5.656538252848616, "grad_norm": 0.135379821062088, "learning_rate": 4.425861116227666e-07, "loss": 0.0066, "step": 31275 }, { "epoch": 5.661059866160246, "grad_norm": 0.023317676037549973, "learning_rate": 4.421256216614477e-07, "loss": 0.0035, "step": 31300 }, { "epoch": 5.665581479471875, "grad_norm": 0.1724722981452942, "learning_rate": 4.4166513170012893e-07, "loss": 0.0106, "step": 31325 }, { "epoch": 5.670103092783505, "grad_norm": 0.0674796774983406, "learning_rate": 4.4120464173881004e-07, "loss": 0.0078, "step": 31350 }, { "epoch": 5.674624706095135, "grad_norm": 0.15505366027355194, "learning_rate": 4.4074415177749126e-07, "loss": 0.0069, "step": 31375 }, { "epoch": 5.679146319406764, "grad_norm": 7.8905134201049805, "learning_rate": 4.402836618161724e-07, "loss": 0.0128, "step": 31400 }, { "epoch": 5.683667932718394, "grad_norm": 24.007476806640625, "learning_rate": 4.398231718548536e-07, "loss": 0.0102, "step": 31425 }, { "epoch": 5.688189546030023, "grad_norm": 0.03173492103815079, "learning_rate": 4.393626818935347e-07, "loss": 0.0056, "step": 31450 }, { "epoch": 5.692711159341653, "grad_norm": 0.4066329300403595, "learning_rate": 4.389021919322158e-07, "loss": 0.0019, "step": 31475 }, { "epoch": 5.697232772653282, "grad_norm": 10.2057466506958, "learning_rate": 4.3844170197089704e-07, "loss": 0.0063, "step": 31500 }, { "epoch": 5.701754385964913, "grad_norm": 0.02208542451262474, "learning_rate": 4.3798121200957815e-07, "loss": 0.0202, "step": 31525 }, { "epoch": 5.706275999276542, "grad_norm": 0.38067665696144104, "learning_rate": 4.3752072204825937e-07, "loss": 0.0076, "step": 31550 }, { "epoch": 5.710797612588172, "grad_norm": 0.04764077439904213, "learning_rate": 4.370602320869405e-07, "loss": 0.0016, "step": 31575 }, { "epoch": 5.715319225899801, "grad_norm": 0.04114853963255882, "learning_rate": 4.365997421256216e-07, "loss": 0.0109, "step": 31600 }, { "epoch": 5.719840839211431, "grad_norm": 0.229792058467865, "learning_rate": 4.361392521643028e-07, "loss": 0.0155, "step": 31625 }, { "epoch": 5.72436245252306, "grad_norm": 2.7913074493408203, "learning_rate": 4.3567876220298393e-07, "loss": 0.0199, "step": 31650 }, { "epoch": 5.72888406583469, "grad_norm": 0.0458095520734787, "learning_rate": 4.3521827224166515e-07, "loss": 0.0096, "step": 31675 }, { "epoch": 5.733405679146319, "grad_norm": 2.6488194465637207, "learning_rate": 4.3475778228034626e-07, "loss": 0.0095, "step": 31700 }, { "epoch": 5.7379272924579485, "grad_norm": 0.013299129903316498, "learning_rate": 4.3429729231902743e-07, "loss": 0.0081, "step": 31725 }, { "epoch": 5.742448905769579, "grad_norm": 0.6266424059867859, "learning_rate": 4.338368023577086e-07, "loss": 0.0009, "step": 31750 }, { "epoch": 5.746970519081208, "grad_norm": 13.529029846191406, "learning_rate": 4.333763123963897e-07, "loss": 0.0102, "step": 31775 }, { "epoch": 5.751492132392838, "grad_norm": 0.0697498694062233, "learning_rate": 4.3291582243507093e-07, "loss": 0.0053, "step": 31800 }, { "epoch": 5.756013745704467, "grad_norm": 0.053803663700819016, "learning_rate": 4.3245533247375204e-07, "loss": 0.0016, "step": 31825 }, { "epoch": 5.760535359016097, "grad_norm": 0.23834507167339325, "learning_rate": 4.319948425124332e-07, "loss": 0.0018, "step": 31850 }, { "epoch": 5.765056972327726, "grad_norm": 0.01928309164941311, "learning_rate": 4.315343525511144e-07, "loss": 0.019, "step": 31875 }, { "epoch": 5.769578585639356, "grad_norm": 0.20884621143341064, "learning_rate": 4.310738625897955e-07, "loss": 0.0331, "step": 31900 }, { "epoch": 5.774100198950986, "grad_norm": 28.804271697998047, "learning_rate": 4.306133726284767e-07, "loss": 0.0103, "step": 31925 }, { "epoch": 5.778621812262616, "grad_norm": 16.59792137145996, "learning_rate": 4.301528826671578e-07, "loss": 0.005, "step": 31950 }, { "epoch": 5.783143425574245, "grad_norm": 13.525711059570312, "learning_rate": 4.29692392705839e-07, "loss": 0.0197, "step": 31975 }, { "epoch": 5.787665038885875, "grad_norm": 0.2153875082731247, "learning_rate": 4.2923190274452015e-07, "loss": 0.0028, "step": 32000 }, { "epoch": 5.792186652197504, "grad_norm": 21.80754852294922, "learning_rate": 4.2877141278320127e-07, "loss": 0.0509, "step": 32025 }, { "epoch": 5.7967082655091335, "grad_norm": 0.027552086859941483, "learning_rate": 4.283109228218825e-07, "loss": 0.0455, "step": 32050 }, { "epoch": 5.801229878820763, "grad_norm": 0.5974065065383911, "learning_rate": 4.278504328605636e-07, "loss": 0.003, "step": 32075 }, { "epoch": 5.8057514921323925, "grad_norm": 0.0764407068490982, "learning_rate": 4.273899428992448e-07, "loss": 0.0073, "step": 32100 }, { "epoch": 5.810273105444022, "grad_norm": 0.00713876448571682, "learning_rate": 4.2692945293792593e-07, "loss": 0.0016, "step": 32125 }, { "epoch": 5.814794718755652, "grad_norm": 2.5511715412139893, "learning_rate": 4.264689629766071e-07, "loss": 0.0018, "step": 32150 }, { "epoch": 5.819316332067282, "grad_norm": 0.024245627224445343, "learning_rate": 4.2600847301528826e-07, "loss": 0.0072, "step": 32175 }, { "epoch": 5.823837945378911, "grad_norm": 0.7642532587051392, "learning_rate": 4.255479830539694e-07, "loss": 0.0063, "step": 32200 }, { "epoch": 5.828359558690541, "grad_norm": 0.008916143327951431, "learning_rate": 4.250874930926506e-07, "loss": 0.0016, "step": 32225 }, { "epoch": 5.83288117200217, "grad_norm": 0.04523088410496712, "learning_rate": 4.246270031313317e-07, "loss": 0.0068, "step": 32250 }, { "epoch": 5.8374027853138, "grad_norm": 0.01864382065832615, "learning_rate": 4.241665131700129e-07, "loss": 0.0062, "step": 32275 }, { "epoch": 5.841924398625429, "grad_norm": 0.09099319577217102, "learning_rate": 4.2370602320869404e-07, "loss": 0.0081, "step": 32300 }, { "epoch": 5.84644601193706, "grad_norm": 0.14763249456882477, "learning_rate": 4.2324553324737516e-07, "loss": 0.008, "step": 32325 }, { "epoch": 5.850967625248689, "grad_norm": 0.08158791810274124, "learning_rate": 4.227850432860564e-07, "loss": 0.0096, "step": 32350 }, { "epoch": 5.8554892385603186, "grad_norm": 0.14870816469192505, "learning_rate": 4.223245533247375e-07, "loss": 0.0019, "step": 32375 }, { "epoch": 5.860010851871948, "grad_norm": 0.02300347574055195, "learning_rate": 4.2186406336341865e-07, "loss": 0.0054, "step": 32400 }, { "epoch": 5.8645324651835775, "grad_norm": 1.2022995948791504, "learning_rate": 4.214035734020998e-07, "loss": 0.0355, "step": 32425 }, { "epoch": 5.869054078495207, "grad_norm": 1.5367909669876099, "learning_rate": 4.20943083440781e-07, "loss": 0.0319, "step": 32450 }, { "epoch": 5.8735756918068365, "grad_norm": 0.14563749730587006, "learning_rate": 4.2048259347946215e-07, "loss": 0.0059, "step": 32475 }, { "epoch": 5.878097305118466, "grad_norm": 0.21176332235336304, "learning_rate": 4.2002210351814327e-07, "loss": 0.0023, "step": 32500 }, { "epoch": 5.882618918430095, "grad_norm": 0.14737099409103394, "learning_rate": 4.1956161355682443e-07, "loss": 0.0029, "step": 32525 }, { "epoch": 5.887140531741726, "grad_norm": 0.05561397597193718, "learning_rate": 4.191011235955056e-07, "loss": 0.0041, "step": 32550 }, { "epoch": 5.891662145053355, "grad_norm": 0.1818033903837204, "learning_rate": 4.1864063363418676e-07, "loss": 0.0079, "step": 32575 }, { "epoch": 5.896183758364985, "grad_norm": 6.998167514801025, "learning_rate": 4.1818014367286793e-07, "loss": 0.0076, "step": 32600 }, { "epoch": 5.900705371676614, "grad_norm": 0.168256476521492, "learning_rate": 4.1771965371154904e-07, "loss": 0.0046, "step": 32625 }, { "epoch": 5.905226984988244, "grad_norm": 0.041657544672489166, "learning_rate": 4.172591637502302e-07, "loss": 0.0054, "step": 32650 }, { "epoch": 5.909748598299873, "grad_norm": 40.278446197509766, "learning_rate": 4.167986737889114e-07, "loss": 0.0125, "step": 32675 }, { "epoch": 5.914270211611503, "grad_norm": 0.1068694144487381, "learning_rate": 4.1633818382759254e-07, "loss": 0.0023, "step": 32700 }, { "epoch": 5.918791824923133, "grad_norm": 0.24333126842975616, "learning_rate": 4.158776938662737e-07, "loss": 0.0289, "step": 32725 }, { "epoch": 5.9233134382347625, "grad_norm": 0.16926701366901398, "learning_rate": 4.154172039049548e-07, "loss": 0.0123, "step": 32750 }, { "epoch": 5.927835051546392, "grad_norm": 3.9394893646240234, "learning_rate": 4.1495671394363604e-07, "loss": 0.0244, "step": 32775 }, { "epoch": 5.9323566648580215, "grad_norm": 0.06976808607578278, "learning_rate": 4.1449622398231715e-07, "loss": 0.0098, "step": 32800 }, { "epoch": 5.936878278169651, "grad_norm": 24.640043258666992, "learning_rate": 4.140357340209983e-07, "loss": 0.0225, "step": 32825 }, { "epoch": 5.9413998914812804, "grad_norm": 4.303247451782227, "learning_rate": 4.135752440596795e-07, "loss": 0.0136, "step": 32850 }, { "epoch": 5.94592150479291, "grad_norm": 0.46884116530418396, "learning_rate": 4.1311475409836065e-07, "loss": 0.0156, "step": 32875 }, { "epoch": 5.950443118104539, "grad_norm": 0.9783020615577698, "learning_rate": 4.126542641370418e-07, "loss": 0.0114, "step": 32900 }, { "epoch": 5.954964731416169, "grad_norm": 0.5886393785476685, "learning_rate": 4.1219377417572293e-07, "loss": 0.0077, "step": 32925 }, { "epoch": 5.959486344727799, "grad_norm": 0.019434532150626183, "learning_rate": 4.117332842144041e-07, "loss": 0.0025, "step": 32950 }, { "epoch": 5.964007958039429, "grad_norm": 2.6205480098724365, "learning_rate": 4.1127279425308527e-07, "loss": 0.0026, "step": 32975 }, { "epoch": 5.968529571351058, "grad_norm": 0.25357627868652344, "learning_rate": 4.1081230429176643e-07, "loss": 0.0056, "step": 33000 }, { "epoch": 5.973051184662688, "grad_norm": 9.764650344848633, "learning_rate": 4.103518143304476e-07, "loss": 0.005, "step": 33025 }, { "epoch": 5.977572797974317, "grad_norm": 0.15983889997005463, "learning_rate": 4.098913243691287e-07, "loss": 0.0093, "step": 33050 }, { "epoch": 5.982094411285947, "grad_norm": 0.44210419058799744, "learning_rate": 4.094308344078099e-07, "loss": 0.006, "step": 33075 }, { "epoch": 5.986616024597576, "grad_norm": 7.316641807556152, "learning_rate": 4.0897034444649104e-07, "loss": 0.0096, "step": 33100 }, { "epoch": 5.9911376379092065, "grad_norm": 0.040955204516649246, "learning_rate": 4.085098544851722e-07, "loss": 0.002, "step": 33125 }, { "epoch": 5.995659251220836, "grad_norm": 28.565380096435547, "learning_rate": 4.080493645238534e-07, "loss": 0.0204, "step": 33150 }, { "epoch": 6.0, "eval_loss": 0.35324448347091675, "eval_runtime": 8693.2226, "eval_samples_per_second": 1.092, "eval_steps_per_second": 0.137, "eval_wer": 0.10656630365150545, "step": 33174 }, { "epoch": 6.0001808645324655, "grad_norm": 1.004451870918274, "learning_rate": 4.0760729416098725e-07, "loss": 0.0388, "step": 33175 }, { "epoch": 6.004702477844095, "grad_norm": 0.3171859383583069, "learning_rate": 4.071468041996684e-07, "loss": 0.0116, "step": 33200 }, { "epoch": 6.009224091155724, "grad_norm": 0.3744235634803772, "learning_rate": 4.066863142383496e-07, "loss": 0.0065, "step": 33225 }, { "epoch": 6.013745704467354, "grad_norm": 0.04872240498661995, "learning_rate": 4.0622582427703075e-07, "loss": 0.0118, "step": 33250 }, { "epoch": 6.018267317778983, "grad_norm": 0.20710858702659607, "learning_rate": 4.057653343157119e-07, "loss": 0.0018, "step": 33275 }, { "epoch": 6.022788931090613, "grad_norm": 0.6177895665168762, "learning_rate": 4.0530484435439303e-07, "loss": 0.0079, "step": 33300 }, { "epoch": 6.027310544402242, "grad_norm": 0.042505986988544464, "learning_rate": 4.0484435439307425e-07, "loss": 0.0122, "step": 33325 }, { "epoch": 6.031832157713873, "grad_norm": 0.018311861902475357, "learning_rate": 4.0438386443175536e-07, "loss": 0.0019, "step": 33350 }, { "epoch": 6.036353771025502, "grad_norm": 0.05716663971543312, "learning_rate": 4.0392337447043653e-07, "loss": 0.0006, "step": 33375 }, { "epoch": 6.040875384337132, "grad_norm": 0.03125373646616936, "learning_rate": 4.034628845091177e-07, "loss": 0.0008, "step": 33400 }, { "epoch": 6.045396997648761, "grad_norm": 0.04944000765681267, "learning_rate": 4.030023945477988e-07, "loss": 0.0112, "step": 33425 }, { "epoch": 6.049918610960391, "grad_norm": 3.482018232345581, "learning_rate": 4.0254190458648003e-07, "loss": 0.0068, "step": 33450 }, { "epoch": 6.05444022427202, "grad_norm": 0.051414769142866135, "learning_rate": 4.0208141462516114e-07, "loss": 0.0032, "step": 33475 }, { "epoch": 6.05896183758365, "grad_norm": 1.4644255638122559, "learning_rate": 4.016209246638423e-07, "loss": 0.0123, "step": 33500 }, { "epoch": 6.063483450895279, "grad_norm": 0.038726359605789185, "learning_rate": 4.011604347025235e-07, "loss": 0.0028, "step": 33525 }, { "epoch": 6.068005064206909, "grad_norm": 0.45498254895210266, "learning_rate": 4.006999447412046e-07, "loss": 0.019, "step": 33550 }, { "epoch": 6.072526677518539, "grad_norm": 2.0771706104278564, "learning_rate": 4.002394547798858e-07, "loss": 0.0319, "step": 33575 }, { "epoch": 6.077048290830168, "grad_norm": 0.0325147807598114, "learning_rate": 3.997789648185669e-07, "loss": 0.0073, "step": 33600 }, { "epoch": 6.081569904141798, "grad_norm": 0.1332240253686905, "learning_rate": 3.993184748572481e-07, "loss": 0.0051, "step": 33625 }, { "epoch": 6.086091517453427, "grad_norm": 0.13362543284893036, "learning_rate": 3.9885798489592925e-07, "loss": 0.0042, "step": 33650 }, { "epoch": 6.090613130765057, "grad_norm": 0.09919251501560211, "learning_rate": 3.9839749493461037e-07, "loss": 0.0028, "step": 33675 }, { "epoch": 6.095134744076686, "grad_norm": 0.2754238545894623, "learning_rate": 3.979370049732916e-07, "loss": 0.0031, "step": 33700 }, { "epoch": 6.099656357388316, "grad_norm": 0.028968511149287224, "learning_rate": 3.974765150119727e-07, "loss": 0.0141, "step": 33725 }, { "epoch": 6.104177970699946, "grad_norm": 0.2524532675743103, "learning_rate": 3.970160250506539e-07, "loss": 0.0071, "step": 33750 }, { "epoch": 6.108699584011576, "grad_norm": 0.031802672892808914, "learning_rate": 3.9655553508933503e-07, "loss": 0.0012, "step": 33775 }, { "epoch": 6.113221197323205, "grad_norm": 0.05196872353553772, "learning_rate": 3.9609504512801614e-07, "loss": 0.0072, "step": 33800 }, { "epoch": 6.117742810634835, "grad_norm": 0.03085019811987877, "learning_rate": 3.9563455516669736e-07, "loss": 0.002, "step": 33825 }, { "epoch": 6.122264423946464, "grad_norm": 0.0648372620344162, "learning_rate": 3.951740652053785e-07, "loss": 0.0254, "step": 33850 }, { "epoch": 6.126786037258094, "grad_norm": 0.00866938941180706, "learning_rate": 3.947135752440597e-07, "loss": 0.0108, "step": 33875 }, { "epoch": 6.131307650569723, "grad_norm": 0.19999012351036072, "learning_rate": 3.942530852827408e-07, "loss": 0.0069, "step": 33900 }, { "epoch": 6.1358292638813525, "grad_norm": 0.04982515051960945, "learning_rate": 3.93792595321422e-07, "loss": 0.0016, "step": 33925 }, { "epoch": 6.140350877192983, "grad_norm": 2.1079819202423096, "learning_rate": 3.9333210536010314e-07, "loss": 0.0117, "step": 33950 }, { "epoch": 6.144872490504612, "grad_norm": 0.12225229293107986, "learning_rate": 3.9287161539878426e-07, "loss": 0.0389, "step": 33975 }, { "epoch": 6.149394103816242, "grad_norm": 0.014936073683202267, "learning_rate": 3.924111254374655e-07, "loss": 0.0173, "step": 34000 }, { "epoch": 6.153915717127871, "grad_norm": 0.05773633345961571, "learning_rate": 3.919506354761466e-07, "loss": 0.0046, "step": 34025 }, { "epoch": 6.158437330439501, "grad_norm": 0.7598387598991394, "learning_rate": 3.914901455148278e-07, "loss": 0.013, "step": 34050 }, { "epoch": 6.16295894375113, "grad_norm": 48.75155258178711, "learning_rate": 3.910296555535089e-07, "loss": 0.0044, "step": 34075 }, { "epoch": 6.16748055706276, "grad_norm": 0.024905025959014893, "learning_rate": 3.9056916559219003e-07, "loss": 0.0021, "step": 34100 }, { "epoch": 6.172002170374389, "grad_norm": 0.01038370281457901, "learning_rate": 3.9010867563087125e-07, "loss": 0.0016, "step": 34125 }, { "epoch": 6.176523783686019, "grad_norm": 2.034224510192871, "learning_rate": 3.8964818566955237e-07, "loss": 0.0096, "step": 34150 }, { "epoch": 6.181045396997649, "grad_norm": 1.9965308904647827, "learning_rate": 3.891876957082336e-07, "loss": 0.0176, "step": 34175 }, { "epoch": 6.185567010309279, "grad_norm": 108.9871826171875, "learning_rate": 3.887272057469147e-07, "loss": 0.0172, "step": 34200 }, { "epoch": 6.190088623620908, "grad_norm": 0.04911862686276436, "learning_rate": 3.882667157855958e-07, "loss": 0.0012, "step": 34225 }, { "epoch": 6.1946102369325375, "grad_norm": 2.3518688678741455, "learning_rate": 3.8780622582427703e-07, "loss": 0.003, "step": 34250 }, { "epoch": 6.199131850244167, "grad_norm": 0.08073610067367554, "learning_rate": 3.8734573586295814e-07, "loss": 0.0228, "step": 34275 }, { "epoch": 6.2036534635557965, "grad_norm": 0.04245547577738762, "learning_rate": 3.8688524590163936e-07, "loss": 0.0008, "step": 34300 }, { "epoch": 6.208175076867426, "grad_norm": 0.04015149176120758, "learning_rate": 3.864247559403205e-07, "loss": 0.003, "step": 34325 }, { "epoch": 6.2126966901790555, "grad_norm": 0.040025342255830765, "learning_rate": 3.859642659790016e-07, "loss": 0.0283, "step": 34350 }, { "epoch": 6.217218303490686, "grad_norm": 4.162205696105957, "learning_rate": 3.855037760176828e-07, "loss": 0.0259, "step": 34375 }, { "epoch": 6.221739916802315, "grad_norm": 0.10629791021347046, "learning_rate": 3.850432860563639e-07, "loss": 0.0046, "step": 34400 }, { "epoch": 6.226261530113945, "grad_norm": 0.05209145322442055, "learning_rate": 3.8458279609504514e-07, "loss": 0.0063, "step": 34425 }, { "epoch": 6.230783143425574, "grad_norm": 0.03029513917863369, "learning_rate": 3.8412230613372625e-07, "loss": 0.0035, "step": 34450 }, { "epoch": 6.235304756737204, "grad_norm": 0.0416143536567688, "learning_rate": 3.836618161724075e-07, "loss": 0.0064, "step": 34475 }, { "epoch": 6.239826370048833, "grad_norm": 17.89125633239746, "learning_rate": 3.832013262110886e-07, "loss": 0.0103, "step": 34500 }, { "epoch": 6.244347983360463, "grad_norm": 0.08937986195087433, "learning_rate": 3.827408362497697e-07, "loss": 0.0102, "step": 34525 }, { "epoch": 6.248869596672092, "grad_norm": 0.012076592072844505, "learning_rate": 3.822803462884509e-07, "loss": 0.0012, "step": 34550 }, { "epoch": 6.2533912099837226, "grad_norm": 13.431784629821777, "learning_rate": 3.8181985632713203e-07, "loss": 0.0029, "step": 34575 }, { "epoch": 6.257912823295352, "grad_norm": 0.15255410969257355, "learning_rate": 3.8135936636581325e-07, "loss": 0.0056, "step": 34600 }, { "epoch": 6.2624344366069815, "grad_norm": 0.7227760553359985, "learning_rate": 3.8089887640449437e-07, "loss": 0.0048, "step": 34625 }, { "epoch": 6.266956049918611, "grad_norm": 0.022182561457157135, "learning_rate": 3.804383864431755e-07, "loss": 0.0055, "step": 34650 }, { "epoch": 6.2714776632302405, "grad_norm": 0.10832487791776657, "learning_rate": 3.799778964818567e-07, "loss": 0.0141, "step": 34675 }, { "epoch": 6.27599927654187, "grad_norm": 0.08272892981767654, "learning_rate": 3.795174065205378e-07, "loss": 0.022, "step": 34700 }, { "epoch": 6.280520889853499, "grad_norm": 0.10777498036623001, "learning_rate": 3.7905691655921903e-07, "loss": 0.0062, "step": 34725 }, { "epoch": 6.285042503165129, "grad_norm": 0.1464318186044693, "learning_rate": 3.7859642659790014e-07, "loss": 0.0078, "step": 34750 }, { "epoch": 6.289564116476759, "grad_norm": 2.59875750541687, "learning_rate": 3.781359366365813e-07, "loss": 0.0364, "step": 34775 }, { "epoch": 6.294085729788389, "grad_norm": 0.6128166913986206, "learning_rate": 3.776754466752625e-07, "loss": 0.0119, "step": 34800 }, { "epoch": 6.298607343100018, "grad_norm": 0.3331466019153595, "learning_rate": 3.772149567139436e-07, "loss": 0.0147, "step": 34825 }, { "epoch": 6.303128956411648, "grad_norm": 1.0157824754714966, "learning_rate": 3.767544667526248e-07, "loss": 0.005, "step": 34850 }, { "epoch": 6.307650569723277, "grad_norm": 0.20274540781974792, "learning_rate": 3.762939767913059e-07, "loss": 0.0027, "step": 34875 }, { "epoch": 6.312172183034907, "grad_norm": 0.29477596282958984, "learning_rate": 3.758334868299871e-07, "loss": 0.006, "step": 34900 }, { "epoch": 6.316693796346536, "grad_norm": 0.10228332132101059, "learning_rate": 3.7537299686866825e-07, "loss": 0.002, "step": 34925 }, { "epoch": 6.321215409658166, "grad_norm": 0.08996782451868057, "learning_rate": 3.7491250690734937e-07, "loss": 0.0048, "step": 34950 }, { "epoch": 6.325737022969796, "grad_norm": 0.012035650201141834, "learning_rate": 3.744520169460306e-07, "loss": 0.0157, "step": 34975 }, { "epoch": 6.3302586362814255, "grad_norm": 0.035582542419433594, "learning_rate": 3.739915269847117e-07, "loss": 0.0112, "step": 35000 }, { "epoch": 6.334780249593055, "grad_norm": 0.01056807953864336, "learning_rate": 3.7353103702339287e-07, "loss": 0.0008, "step": 35025 }, { "epoch": 6.3393018629046844, "grad_norm": 0.2861970067024231, "learning_rate": 3.7307054706207403e-07, "loss": 0.0057, "step": 35050 }, { "epoch": 6.343823476216314, "grad_norm": 0.013209059834480286, "learning_rate": 3.7261005710075515e-07, "loss": 0.0134, "step": 35075 }, { "epoch": 6.348345089527943, "grad_norm": 0.1460546851158142, "learning_rate": 3.7214956713943636e-07, "loss": 0.0073, "step": 35100 }, { "epoch": 6.352866702839573, "grad_norm": 0.09581249952316284, "learning_rate": 3.716890771781175e-07, "loss": 0.0104, "step": 35125 }, { "epoch": 6.357388316151202, "grad_norm": 0.08900044858455658, "learning_rate": 3.712285872167987e-07, "loss": 0.0052, "step": 35150 }, { "epoch": 6.361909929462833, "grad_norm": 0.3841894865036011, "learning_rate": 3.707680972554798e-07, "loss": 0.0195, "step": 35175 }, { "epoch": 6.366431542774462, "grad_norm": 0.09828022122383118, "learning_rate": 3.70307607294161e-07, "loss": 0.012, "step": 35200 }, { "epoch": 6.370953156086092, "grad_norm": 6.788888931274414, "learning_rate": 3.6984711733284214e-07, "loss": 0.023, "step": 35225 }, { "epoch": 6.375474769397721, "grad_norm": 2.6568901538848877, "learning_rate": 3.6938662737152326e-07, "loss": 0.0014, "step": 35250 }, { "epoch": 6.379996382709351, "grad_norm": 0.021253295242786407, "learning_rate": 3.689261374102045e-07, "loss": 0.0089, "step": 35275 }, { "epoch": 6.38451799602098, "grad_norm": 0.7334450483322144, "learning_rate": 3.684656474488856e-07, "loss": 0.0017, "step": 35300 }, { "epoch": 6.38903960933261, "grad_norm": 0.41396549344062805, "learning_rate": 3.6800515748756676e-07, "loss": 0.0033, "step": 35325 }, { "epoch": 6.393561222644239, "grad_norm": 1.214400291442871, "learning_rate": 3.675446675262479e-07, "loss": 0.0016, "step": 35350 }, { "epoch": 6.3980828359558695, "grad_norm": 0.03277917578816414, "learning_rate": 3.6708417756492903e-07, "loss": 0.0009, "step": 35375 }, { "epoch": 6.402604449267499, "grad_norm": 0.1065434068441391, "learning_rate": 3.6662368760361025e-07, "loss": 0.0034, "step": 35400 }, { "epoch": 6.407126062579128, "grad_norm": 1.2677608728408813, "learning_rate": 3.6616319764229137e-07, "loss": 0.0024, "step": 35425 }, { "epoch": 6.411647675890758, "grad_norm": 0.034879542887210846, "learning_rate": 3.6570270768097253e-07, "loss": 0.0147, "step": 35450 }, { "epoch": 6.416169289202387, "grad_norm": 0.009821565821766853, "learning_rate": 3.652422177196537e-07, "loss": 0.0084, "step": 35475 }, { "epoch": 6.420690902514017, "grad_norm": 0.15989068150520325, "learning_rate": 3.6478172775833487e-07, "loss": 0.0253, "step": 35500 }, { "epoch": 6.425212515825646, "grad_norm": 0.021616969257593155, "learning_rate": 3.6432123779701603e-07, "loss": 0.0385, "step": 35525 }, { "epoch": 6.429734129137276, "grad_norm": 64.16004180908203, "learning_rate": 3.6386074783569715e-07, "loss": 0.0218, "step": 35550 }, { "epoch": 6.434255742448906, "grad_norm": 2.1929166316986084, "learning_rate": 3.634002578743783e-07, "loss": 0.0276, "step": 35575 }, { "epoch": 6.438777355760536, "grad_norm": 0.3426229655742645, "learning_rate": 3.629397679130595e-07, "loss": 0.0047, "step": 35600 }, { "epoch": 6.443298969072165, "grad_norm": 0.9598920345306396, "learning_rate": 3.6247927795174064e-07, "loss": 0.0171, "step": 35625 }, { "epoch": 6.447820582383795, "grad_norm": 0.39060238003730774, "learning_rate": 3.620187879904218e-07, "loss": 0.011, "step": 35650 }, { "epoch": 6.452342195695424, "grad_norm": 0.9227154850959778, "learning_rate": 3.615582980291029e-07, "loss": 0.023, "step": 35675 }, { "epoch": 6.456863809007054, "grad_norm": 5.520915985107422, "learning_rate": 3.610978080677841e-07, "loss": 0.0012, "step": 35700 }, { "epoch": 6.461385422318683, "grad_norm": 0.09858091920614243, "learning_rate": 3.6063731810646526e-07, "loss": 0.0038, "step": 35725 }, { "epoch": 6.4659070356303125, "grad_norm": 0.02428305707871914, "learning_rate": 3.601768281451464e-07, "loss": 0.004, "step": 35750 }, { "epoch": 6.470428648941942, "grad_norm": 0.07016027718782425, "learning_rate": 3.597163381838276e-07, "loss": 0.0006, "step": 35775 }, { "epoch": 6.474950262253572, "grad_norm": 0.1356564462184906, "learning_rate": 3.592558482225087e-07, "loss": 0.0025, "step": 35800 }, { "epoch": 6.479471875565202, "grad_norm": 0.056458380073308945, "learning_rate": 3.587953582611899e-07, "loss": 0.0128, "step": 35825 }, { "epoch": 6.483993488876831, "grad_norm": 2.0865302085876465, "learning_rate": 3.5833486829987103e-07, "loss": 0.0052, "step": 35850 }, { "epoch": 6.488515102188461, "grad_norm": 0.2598702013492584, "learning_rate": 3.578743783385522e-07, "loss": 0.0079, "step": 35875 }, { "epoch": 6.49303671550009, "grad_norm": 2.376058578491211, "learning_rate": 3.5741388837723337e-07, "loss": 0.0096, "step": 35900 }, { "epoch": 6.49755832881172, "grad_norm": 0.022796526551246643, "learning_rate": 3.5695339841591453e-07, "loss": 0.0178, "step": 35925 }, { "epoch": 6.502079942123349, "grad_norm": 17.341182708740234, "learning_rate": 3.564929084545957e-07, "loss": 0.0093, "step": 35950 }, { "epoch": 6.50660155543498, "grad_norm": 0.10820627212524414, "learning_rate": 3.560324184932768e-07, "loss": 0.0426, "step": 35975 }, { "epoch": 6.511123168746609, "grad_norm": 0.10570292919874191, "learning_rate": 3.55571928531958e-07, "loss": 0.0118, "step": 36000 }, { "epoch": 6.515644782058239, "grad_norm": 5.8104658126831055, "learning_rate": 3.5511143857063914e-07, "loss": 0.0083, "step": 36025 }, { "epoch": 6.520166395369868, "grad_norm": 0.9492124915122986, "learning_rate": 3.546509486093203e-07, "loss": 0.0157, "step": 36050 }, { "epoch": 6.524688008681498, "grad_norm": 0.022533750161528587, "learning_rate": 3.541904586480015e-07, "loss": 0.0021, "step": 36075 }, { "epoch": 6.529209621993127, "grad_norm": 0.35308748483657837, "learning_rate": 3.537299686866826e-07, "loss": 0.0015, "step": 36100 }, { "epoch": 6.5337312353047565, "grad_norm": 1.7888010740280151, "learning_rate": 3.5326947872536376e-07, "loss": 0.004, "step": 36125 }, { "epoch": 6.538252848616386, "grad_norm": 0.1663166582584381, "learning_rate": 3.528089887640449e-07, "loss": 0.0048, "step": 36150 }, { "epoch": 6.5427744619280155, "grad_norm": 0.4084545969963074, "learning_rate": 3.523484988027261e-07, "loss": 0.0004, "step": 36175 }, { "epoch": 6.547296075239646, "grad_norm": 0.039476677775382996, "learning_rate": 3.5188800884140726e-07, "loss": 0.0006, "step": 36200 }, { "epoch": 6.551817688551275, "grad_norm": 0.0072963847778737545, "learning_rate": 3.514275188800884e-07, "loss": 0.008, "step": 36225 }, { "epoch": 6.556339301862905, "grad_norm": 0.01364920660853386, "learning_rate": 3.5096702891876954e-07, "loss": 0.0077, "step": 36250 }, { "epoch": 6.560860915174534, "grad_norm": 0.796404242515564, "learning_rate": 3.505065389574507e-07, "loss": 0.0229, "step": 36275 }, { "epoch": 6.565382528486164, "grad_norm": 0.11998113989830017, "learning_rate": 3.5004604899613187e-07, "loss": 0.0266, "step": 36300 }, { "epoch": 6.569904141797793, "grad_norm": 0.41521695256233215, "learning_rate": 3.4958555903481303e-07, "loss": 0.0025, "step": 36325 }, { "epoch": 6.574425755109423, "grad_norm": 0.07342156767845154, "learning_rate": 3.491250690734942e-07, "loss": 0.0189, "step": 36350 }, { "epoch": 6.578947368421053, "grad_norm": 0.369840145111084, "learning_rate": 3.486645791121753e-07, "loss": 0.0209, "step": 36375 }, { "epoch": 6.583468981732683, "grad_norm": 0.04731602966785431, "learning_rate": 3.482040891508565e-07, "loss": 0.0051, "step": 36400 }, { "epoch": 6.587990595044312, "grad_norm": 0.21776604652404785, "learning_rate": 3.4774359918953765e-07, "loss": 0.0017, "step": 36425 }, { "epoch": 6.5925122083559415, "grad_norm": 0.05085720121860504, "learning_rate": 3.472831092282188e-07, "loss": 0.0014, "step": 36450 }, { "epoch": 6.597033821667571, "grad_norm": 1.6889132261276245, "learning_rate": 3.468226192669e-07, "loss": 0.0029, "step": 36475 }, { "epoch": 6.6015554349792005, "grad_norm": 7.945947170257568, "learning_rate": 3.4636212930558114e-07, "loss": 0.0041, "step": 36500 }, { "epoch": 6.60607704829083, "grad_norm": 0.7570422291755676, "learning_rate": 3.4590163934426226e-07, "loss": 0.0015, "step": 36525 }, { "epoch": 6.6105986616024595, "grad_norm": 0.05319717898964882, "learning_rate": 3.454411493829434e-07, "loss": 0.0042, "step": 36550 }, { "epoch": 6.615120274914089, "grad_norm": 0.012651624158024788, "learning_rate": 3.449806594216246e-07, "loss": 0.0028, "step": 36575 }, { "epoch": 6.619641888225719, "grad_norm": 0.06157555803656578, "learning_rate": 3.4452016946030576e-07, "loss": 0.0004, "step": 36600 }, { "epoch": 6.624163501537349, "grad_norm": 0.009101797826588154, "learning_rate": 3.440596794989869e-07, "loss": 0.0071, "step": 36625 }, { "epoch": 6.628685114848978, "grad_norm": 0.08508434146642685, "learning_rate": 3.435991895376681e-07, "loss": 0.0046, "step": 36650 }, { "epoch": 6.633206728160608, "grad_norm": 0.026564456522464752, "learning_rate": 3.431386995763492e-07, "loss": 0.0059, "step": 36675 }, { "epoch": 6.637728341472237, "grad_norm": 0.935742199420929, "learning_rate": 3.4267820961503037e-07, "loss": 0.0014, "step": 36700 }, { "epoch": 6.642249954783867, "grad_norm": 7.623640060424805, "learning_rate": 3.4221771965371153e-07, "loss": 0.009, "step": 36725 }, { "epoch": 6.646771568095496, "grad_norm": 0.023885022848844528, "learning_rate": 3.417572296923927e-07, "loss": 0.0017, "step": 36750 }, { "epoch": 6.6512931814071266, "grad_norm": 0.20136716961860657, "learning_rate": 3.4129673973107387e-07, "loss": 0.0513, "step": 36775 }, { "epoch": 6.655814794718756, "grad_norm": 0.06223003938794136, "learning_rate": 3.40836249769755e-07, "loss": 0.0029, "step": 36800 }, { "epoch": 6.6603364080303855, "grad_norm": 0.20519289374351501, "learning_rate": 3.4037575980843615e-07, "loss": 0.0041, "step": 36825 }, { "epoch": 6.664858021342015, "grad_norm": 101.21697998046875, "learning_rate": 3.399152698471173e-07, "loss": 0.0043, "step": 36850 }, { "epoch": 6.6693796346536445, "grad_norm": 0.015112169086933136, "learning_rate": 3.394547798857985e-07, "loss": 0.0037, "step": 36875 }, { "epoch": 6.673901247965274, "grad_norm": 0.8060915470123291, "learning_rate": 3.3899428992447965e-07, "loss": 0.0029, "step": 36900 }, { "epoch": 6.678422861276903, "grad_norm": 0.3591912090778351, "learning_rate": 3.3853379996316076e-07, "loss": 0.0042, "step": 36925 }, { "epoch": 6.682944474588533, "grad_norm": 0.3402554392814636, "learning_rate": 3.38073310001842e-07, "loss": 0.0005, "step": 36950 }, { "epoch": 6.687466087900162, "grad_norm": 0.0494844950735569, "learning_rate": 3.376128200405231e-07, "loss": 0.0041, "step": 36975 }, { "epoch": 6.691987701211792, "grad_norm": 0.08879272639751434, "learning_rate": 3.3715233007920426e-07, "loss": 0.0045, "step": 37000 }, { "epoch": 6.696509314523422, "grad_norm": 8.207866668701172, "learning_rate": 3.366918401178854e-07, "loss": 0.0093, "step": 37025 }, { "epoch": 6.701030927835052, "grad_norm": 0.1084265485405922, "learning_rate": 3.3624976975501935e-07, "loss": 0.0175, "step": 37050 }, { "epoch": 6.705552541146681, "grad_norm": 0.03057611919939518, "learning_rate": 3.3578927979370047e-07, "loss": 0.0027, "step": 37075 }, { "epoch": 6.710074154458311, "grad_norm": 0.14145521819591522, "learning_rate": 3.353287898323817e-07, "loss": 0.0155, "step": 37100 }, { "epoch": 6.71459576776994, "grad_norm": 0.03683609515428543, "learning_rate": 3.348682998710628e-07, "loss": 0.0121, "step": 37125 }, { "epoch": 6.71911738108157, "grad_norm": 0.2000865340232849, "learning_rate": 3.344078099097439e-07, "loss": 0.0104, "step": 37150 }, { "epoch": 6.723638994393199, "grad_norm": 1.1748223304748535, "learning_rate": 3.3394731994842513e-07, "loss": 0.0266, "step": 37175 }, { "epoch": 6.7281606077048295, "grad_norm": 0.04432156682014465, "learning_rate": 3.3348682998710625e-07, "loss": 0.0047, "step": 37200 }, { "epoch": 6.732682221016459, "grad_norm": 0.1042926087975502, "learning_rate": 3.3302634002578746e-07, "loss": 0.0017, "step": 37225 }, { "epoch": 6.7372038343280884, "grad_norm": 0.025923365727066994, "learning_rate": 3.325658500644686e-07, "loss": 0.0023, "step": 37250 }, { "epoch": 6.741725447639718, "grad_norm": 5.998639106750488, "learning_rate": 3.321053601031497e-07, "loss": 0.0066, "step": 37275 }, { "epoch": 6.746247060951347, "grad_norm": 0.038715049624443054, "learning_rate": 3.316448701418309e-07, "loss": 0.0114, "step": 37300 }, { "epoch": 6.750768674262977, "grad_norm": 0.2970021963119507, "learning_rate": 3.31184380180512e-07, "loss": 0.0028, "step": 37325 }, { "epoch": 6.755290287574606, "grad_norm": 0.021920220926404, "learning_rate": 3.3072389021919324e-07, "loss": 0.0028, "step": 37350 }, { "epoch": 6.759811900886236, "grad_norm": 0.009460126049816608, "learning_rate": 3.3026340025787436e-07, "loss": 0.0038, "step": 37375 }, { "epoch": 6.764333514197865, "grad_norm": 10.395684242248535, "learning_rate": 3.298029102965555e-07, "loss": 0.004, "step": 37400 }, { "epoch": 6.768855127509496, "grad_norm": 0.3913702070713043, "learning_rate": 3.293424203352367e-07, "loss": 0.007, "step": 37425 }, { "epoch": 6.773376740821125, "grad_norm": 0.009950965642929077, "learning_rate": 3.288819303739178e-07, "loss": 0.0049, "step": 37450 }, { "epoch": 6.777898354132755, "grad_norm": 0.06827165186405182, "learning_rate": 3.28421440412599e-07, "loss": 0.0006, "step": 37475 }, { "epoch": 6.782419967444384, "grad_norm": 0.044129110872745514, "learning_rate": 3.2796095045128013e-07, "loss": 0.0108, "step": 37500 }, { "epoch": 6.786941580756014, "grad_norm": 0.08213396370410919, "learning_rate": 3.2750046048996135e-07, "loss": 0.001, "step": 37525 }, { "epoch": 6.791463194067643, "grad_norm": 0.8949390053749084, "learning_rate": 3.2703997052864247e-07, "loss": 0.0508, "step": 37550 }, { "epoch": 6.795984807379273, "grad_norm": 3.0352182388305664, "learning_rate": 3.265794805673236e-07, "loss": 0.0366, "step": 37575 }, { "epoch": 6.800506420690903, "grad_norm": 0.03377075120806694, "learning_rate": 3.261189906060048e-07, "loss": 0.0061, "step": 37600 }, { "epoch": 6.805028034002532, "grad_norm": 0.012610839679837227, "learning_rate": 3.256585006446859e-07, "loss": 0.0029, "step": 37625 }, { "epoch": 6.809549647314162, "grad_norm": 0.3035992681980133, "learning_rate": 3.2519801068336713e-07, "loss": 0.0075, "step": 37650 }, { "epoch": 6.814071260625791, "grad_norm": 0.06034184619784355, "learning_rate": 3.2473752072204824e-07, "loss": 0.0042, "step": 37675 }, { "epoch": 6.818592873937421, "grad_norm": 0.10909536480903625, "learning_rate": 3.2427703076072936e-07, "loss": 0.0116, "step": 37700 }, { "epoch": 6.82311448724905, "grad_norm": 13.51667594909668, "learning_rate": 3.238165407994106e-07, "loss": 0.0039, "step": 37725 }, { "epoch": 6.82763610056068, "grad_norm": 0.026187343522906303, "learning_rate": 3.233560508380917e-07, "loss": 0.0117, "step": 37750 }, { "epoch": 6.832157713872309, "grad_norm": 0.04201328754425049, "learning_rate": 3.228955608767729e-07, "loss": 0.0197, "step": 37775 }, { "epoch": 6.836679327183939, "grad_norm": 0.05836571007966995, "learning_rate": 3.22435070915454e-07, "loss": 0.0122, "step": 37800 }, { "epoch": 6.841200940495569, "grad_norm": 0.005761469714343548, "learning_rate": 3.219745809541352e-07, "loss": 0.0194, "step": 37825 }, { "epoch": 6.845722553807199, "grad_norm": 0.019468627870082855, "learning_rate": 3.2151409099281636e-07, "loss": 0.0053, "step": 37850 }, { "epoch": 6.850244167118828, "grad_norm": 0.005167699884623289, "learning_rate": 3.2105360103149747e-07, "loss": 0.0013, "step": 37875 }, { "epoch": 6.854765780430458, "grad_norm": 0.06815607845783234, "learning_rate": 3.205931110701787e-07, "loss": 0.0078, "step": 37900 }, { "epoch": 6.859287393742087, "grad_norm": 0.05461608245968819, "learning_rate": 3.201326211088598e-07, "loss": 0.0046, "step": 37925 }, { "epoch": 6.8638090070537165, "grad_norm": 0.3847118020057678, "learning_rate": 3.1967213114754097e-07, "loss": 0.0215, "step": 37950 }, { "epoch": 6.868330620365346, "grad_norm": 0.15587353706359863, "learning_rate": 3.1921164118622213e-07, "loss": 0.03, "step": 37975 }, { "epoch": 6.872852233676976, "grad_norm": 0.06245379522442818, "learning_rate": 3.1875115122490325e-07, "loss": 0.0102, "step": 38000 }, { "epoch": 6.877373846988606, "grad_norm": 0.03094295971095562, "learning_rate": 3.1829066126358447e-07, "loss": 0.0065, "step": 38025 }, { "epoch": 6.881895460300235, "grad_norm": 0.1180916577577591, "learning_rate": 3.178301713022656e-07, "loss": 0.0013, "step": 38050 }, { "epoch": 6.886417073611865, "grad_norm": 0.5813055634498596, "learning_rate": 3.1736968134094675e-07, "loss": 0.0071, "step": 38075 }, { "epoch": 6.890938686923494, "grad_norm": 0.14167048037052155, "learning_rate": 3.169091913796279e-07, "loss": 0.0033, "step": 38100 }, { "epoch": 6.895460300235124, "grad_norm": 0.1619337499141693, "learning_rate": 3.164487014183091e-07, "loss": 0.0045, "step": 38125 }, { "epoch": 6.899981913546753, "grad_norm": 0.03847223520278931, "learning_rate": 3.1598821145699024e-07, "loss": 0.0043, "step": 38150 }, { "epoch": 6.904503526858383, "grad_norm": 0.11694706231355667, "learning_rate": 3.1552772149567136e-07, "loss": 0.0097, "step": 38175 }, { "epoch": 6.909025140170012, "grad_norm": 0.07207904756069183, "learning_rate": 3.150672315343526e-07, "loss": 0.0004, "step": 38200 }, { "epoch": 6.913546753481643, "grad_norm": 1.5510050058364868, "learning_rate": 3.146067415730337e-07, "loss": 0.007, "step": 38225 }, { "epoch": 6.918068366793272, "grad_norm": 0.13146886229515076, "learning_rate": 3.1414625161171486e-07, "loss": 0.0121, "step": 38250 }, { "epoch": 6.922589980104902, "grad_norm": 0.09875814616680145, "learning_rate": 3.13685761650396e-07, "loss": 0.0128, "step": 38275 }, { "epoch": 6.927111593416531, "grad_norm": 0.02351992577314377, "learning_rate": 3.1322527168907714e-07, "loss": 0.017, "step": 38300 }, { "epoch": 6.9316332067281605, "grad_norm": 1.6462205648422241, "learning_rate": 3.1276478172775835e-07, "loss": 0.0443, "step": 38325 }, { "epoch": 6.93615482003979, "grad_norm": 1.7699528932571411, "learning_rate": 3.1230429176643947e-07, "loss": 0.0535, "step": 38350 }, { "epoch": 6.9406764333514195, "grad_norm": 0.2231673002243042, "learning_rate": 3.1184380180512063e-07, "loss": 0.0369, "step": 38375 }, { "epoch": 6.94519804666305, "grad_norm": 4.6962738037109375, "learning_rate": 3.113833118438018e-07, "loss": 0.006, "step": 38400 }, { "epoch": 6.949719659974679, "grad_norm": 0.08258053660392761, "learning_rate": 3.109228218824829e-07, "loss": 0.0052, "step": 38425 }, { "epoch": 6.954241273286309, "grad_norm": 1.8889635801315308, "learning_rate": 3.1046233192116413e-07, "loss": 0.0061, "step": 38450 }, { "epoch": 6.958762886597938, "grad_norm": 0.09585348516702652, "learning_rate": 3.1000184195984525e-07, "loss": 0.0047, "step": 38475 }, { "epoch": 6.963284499909568, "grad_norm": 0.10662294924259186, "learning_rate": 3.095413519985264e-07, "loss": 0.0076, "step": 38500 }, { "epoch": 6.967806113221197, "grad_norm": 0.006054690573364496, "learning_rate": 3.090808620372076e-07, "loss": 0.0095, "step": 38525 }, { "epoch": 6.972327726532827, "grad_norm": 27.81556510925293, "learning_rate": 3.0862037207588875e-07, "loss": 0.0016, "step": 38550 }, { "epoch": 6.976849339844456, "grad_norm": 0.030143573880195618, "learning_rate": 3.081598821145699e-07, "loss": 0.0136, "step": 38575 }, { "epoch": 6.981370953156086, "grad_norm": 2.358839750289917, "learning_rate": 3.07699392153251e-07, "loss": 0.0021, "step": 38600 }, { "epoch": 6.985892566467716, "grad_norm": 12.556412696838379, "learning_rate": 3.072389021919322e-07, "loss": 0.0012, "step": 38625 }, { "epoch": 6.9904141797793455, "grad_norm": 0.13631652295589447, "learning_rate": 3.0677841223061336e-07, "loss": 0.0066, "step": 38650 }, { "epoch": 6.994935793090975, "grad_norm": 41.28492736816406, "learning_rate": 3.063179222692945e-07, "loss": 0.0265, "step": 38675 }, { "epoch": 6.9994574064026045, "grad_norm": 0.12053684145212173, "learning_rate": 3.058574323079757e-07, "loss": 0.0605, "step": 38700 }, { "epoch": 7.0, "eval_loss": 0.3541729748249054, "eval_runtime": 8626.509, "eval_samples_per_second": 1.101, "eval_steps_per_second": 0.138, "eval_wer": 0.10525304292120435, "step": 38703 }, { "epoch": 7.003979019714234, "grad_norm": 1.1688233613967896, "learning_rate": 3.053969423466568e-07, "loss": 0.0075, "step": 38725 }, { "epoch": 7.0085006330258635, "grad_norm": 0.06878869980573654, "learning_rate": 3.0493645238533797e-07, "loss": 0.0013, "step": 38750 }, { "epoch": 7.013022246337493, "grad_norm": 0.12803910672664642, "learning_rate": 3.0447596242401914e-07, "loss": 0.006, "step": 38775 }, { "epoch": 7.017543859649122, "grad_norm": 11.873225212097168, "learning_rate": 3.040154724627003e-07, "loss": 0.0017, "step": 38800 }, { "epoch": 7.022065472960753, "grad_norm": 0.09927644580602646, "learning_rate": 3.0355498250138147e-07, "loss": 0.0011, "step": 38825 }, { "epoch": 7.026587086272382, "grad_norm": 0.014069181866943836, "learning_rate": 3.030944925400626e-07, "loss": 0.0007, "step": 38850 }, { "epoch": 7.031108699584012, "grad_norm": 3.6158812046051025, "learning_rate": 3.026340025787438e-07, "loss": 0.0076, "step": 38875 }, { "epoch": 7.035630312895641, "grad_norm": 0.049011897295713425, "learning_rate": 3.021735126174249e-07, "loss": 0.0027, "step": 38900 }, { "epoch": 7.040151926207271, "grad_norm": 0.02783900685608387, "learning_rate": 3.017130226561061e-07, "loss": 0.0026, "step": 38925 }, { "epoch": 7.0446735395189, "grad_norm": 0.024276690557599068, "learning_rate": 3.0125253269478725e-07, "loss": 0.0018, "step": 38950 }, { "epoch": 7.04919515283053, "grad_norm": 0.052325211465358734, "learning_rate": 3.007920427334684e-07, "loss": 0.0089, "step": 38975 }, { "epoch": 7.053716766142159, "grad_norm": 0.021905574947595596, "learning_rate": 3.003315527721496e-07, "loss": 0.0027, "step": 39000 }, { "epoch": 7.0582383794537895, "grad_norm": 0.013508542440831661, "learning_rate": 2.998710628108307e-07, "loss": 0.002, "step": 39025 }, { "epoch": 7.062759992765419, "grad_norm": 0.052605342119932175, "learning_rate": 2.9941057284951186e-07, "loss": 0.0069, "step": 39050 }, { "epoch": 7.0672816060770485, "grad_norm": 0.578596830368042, "learning_rate": 2.98950082888193e-07, "loss": 0.0269, "step": 39075 }, { "epoch": 7.071803219388678, "grad_norm": 21.0313663482666, "learning_rate": 2.984895929268742e-07, "loss": 0.0125, "step": 39100 }, { "epoch": 7.076324832700307, "grad_norm": 0.19108355045318604, "learning_rate": 2.9802910296555536e-07, "loss": 0.0074, "step": 39125 }, { "epoch": 7.080846446011937, "grad_norm": 2.0000555515289307, "learning_rate": 2.9756861300423647e-07, "loss": 0.0028, "step": 39150 }, { "epoch": 7.085368059323566, "grad_norm": 0.03665238618850708, "learning_rate": 2.9710812304291764e-07, "loss": 0.0046, "step": 39175 }, { "epoch": 7.089889672635196, "grad_norm": 0.03791901841759682, "learning_rate": 2.966476330815988e-07, "loss": 0.0016, "step": 39200 }, { "epoch": 7.094411285946826, "grad_norm": 0.016226934269070625, "learning_rate": 2.9618714312027997e-07, "loss": 0.0028, "step": 39225 }, { "epoch": 7.098932899258456, "grad_norm": 12.398660659790039, "learning_rate": 2.9572665315896113e-07, "loss": 0.0017, "step": 39250 }, { "epoch": 7.103454512570085, "grad_norm": 0.012261465191841125, "learning_rate": 2.952661631976423e-07, "loss": 0.001, "step": 39275 }, { "epoch": 7.107976125881715, "grad_norm": 14.556396484375, "learning_rate": 2.948056732363234e-07, "loss": 0.0109, "step": 39300 }, { "epoch": 7.112497739193344, "grad_norm": 0.11572438478469849, "learning_rate": 2.943451832750046e-07, "loss": 0.0049, "step": 39325 }, { "epoch": 7.117019352504974, "grad_norm": 0.28125593066215515, "learning_rate": 2.9388469331368575e-07, "loss": 0.0114, "step": 39350 }, { "epoch": 7.121540965816603, "grad_norm": 0.05056861415505409, "learning_rate": 2.934242033523669e-07, "loss": 0.0012, "step": 39375 }, { "epoch": 7.126062579128233, "grad_norm": 0.22000250220298767, "learning_rate": 2.929637133910481e-07, "loss": 0.0305, "step": 39400 }, { "epoch": 7.130584192439863, "grad_norm": 0.020098086446523666, "learning_rate": 2.925032234297292e-07, "loss": 0.0013, "step": 39425 }, { "epoch": 7.1351058057514924, "grad_norm": 0.026488734409213066, "learning_rate": 2.9204273346841036e-07, "loss": 0.0356, "step": 39450 }, { "epoch": 7.139627419063122, "grad_norm": 0.024454880505800247, "learning_rate": 2.915822435070915e-07, "loss": 0.0077, "step": 39475 }, { "epoch": 7.144149032374751, "grad_norm": 0.017868295311927795, "learning_rate": 2.911217535457727e-07, "loss": 0.0215, "step": 39500 }, { "epoch": 7.148670645686381, "grad_norm": 1.8522053956985474, "learning_rate": 2.9066126358445386e-07, "loss": 0.0043, "step": 39525 }, { "epoch": 7.15319225899801, "grad_norm": 0.20884989202022552, "learning_rate": 2.90200773623135e-07, "loss": 0.0055, "step": 39550 }, { "epoch": 7.15771387230964, "grad_norm": 0.1342606246471405, "learning_rate": 2.8974028366181614e-07, "loss": 0.0012, "step": 39575 }, { "epoch": 7.162235485621269, "grad_norm": 0.14675575494766235, "learning_rate": 2.892797937004973e-07, "loss": 0.0013, "step": 39600 }, { "epoch": 7.1667570989329, "grad_norm": 0.034802380949258804, "learning_rate": 2.8881930373917847e-07, "loss": 0.0014, "step": 39625 }, { "epoch": 7.171278712244529, "grad_norm": 0.03734385594725609, "learning_rate": 2.8835881377785964e-07, "loss": 0.0026, "step": 39650 }, { "epoch": 7.175800325556159, "grad_norm": 0.7827721834182739, "learning_rate": 2.878983238165408e-07, "loss": 0.0009, "step": 39675 }, { "epoch": 7.180321938867788, "grad_norm": 0.06746553629636765, "learning_rate": 2.8743783385522197e-07, "loss": 0.0024, "step": 39700 }, { "epoch": 7.184843552179418, "grad_norm": 0.2701489329338074, "learning_rate": 2.869773438939031e-07, "loss": 0.0058, "step": 39725 }, { "epoch": 7.189365165491047, "grad_norm": 0.021761702373623848, "learning_rate": 2.8651685393258425e-07, "loss": 0.0078, "step": 39750 }, { "epoch": 7.193886778802677, "grad_norm": 0.004122884478420019, "learning_rate": 2.860563639712654e-07, "loss": 0.0009, "step": 39775 }, { "epoch": 7.198408392114306, "grad_norm": 0.05977817252278328, "learning_rate": 2.855958740099466e-07, "loss": 0.0061, "step": 39800 }, { "epoch": 7.2029300054259355, "grad_norm": 0.7055386900901794, "learning_rate": 2.8513538404862775e-07, "loss": 0.0058, "step": 39825 }, { "epoch": 7.207451618737566, "grad_norm": 13.389866828918457, "learning_rate": 2.8467489408730886e-07, "loss": 0.0142, "step": 39850 }, { "epoch": 7.211973232049195, "grad_norm": 0.13937248289585114, "learning_rate": 2.8421440412599e-07, "loss": 0.0028, "step": 39875 }, { "epoch": 7.216494845360825, "grad_norm": 0.6235303282737732, "learning_rate": 2.837539141646712e-07, "loss": 0.0083, "step": 39900 }, { "epoch": 7.221016458672454, "grad_norm": 2.9348771572113037, "learning_rate": 2.8329342420335236e-07, "loss": 0.0374, "step": 39925 }, { "epoch": 7.225538071984084, "grad_norm": 3.077686071395874, "learning_rate": 2.828329342420335e-07, "loss": 0.0082, "step": 39950 }, { "epoch": 7.230059685295713, "grad_norm": 0.22787833213806152, "learning_rate": 2.8237244428071464e-07, "loss": 0.0012, "step": 39975 }, { "epoch": 7.234581298607343, "grad_norm": 0.06242289021611214, "learning_rate": 2.8191195431939586e-07, "loss": 0.0061, "step": 40000 }, { "epoch": 7.239102911918972, "grad_norm": 0.9802669882774353, "learning_rate": 2.8145146435807697e-07, "loss": 0.0092, "step": 40025 }, { "epoch": 7.243624525230603, "grad_norm": 0.012614204548299313, "learning_rate": 2.8099097439675814e-07, "loss": 0.0005, "step": 40050 }, { "epoch": 7.248146138542232, "grad_norm": 0.013021476566791534, "learning_rate": 2.805304844354393e-07, "loss": 0.0101, "step": 40075 }, { "epoch": 7.252667751853862, "grad_norm": 0.008002633228898048, "learning_rate": 2.800699944741204e-07, "loss": 0.0083, "step": 40100 }, { "epoch": 7.257189365165491, "grad_norm": 0.04996323958039284, "learning_rate": 2.7960950451280164e-07, "loss": 0.0011, "step": 40125 }, { "epoch": 7.2617109784771205, "grad_norm": 11.185647964477539, "learning_rate": 2.7914901455148275e-07, "loss": 0.0034, "step": 40150 }, { "epoch": 7.26623259178875, "grad_norm": 0.5441507697105408, "learning_rate": 2.786885245901639e-07, "loss": 0.012, "step": 40175 }, { "epoch": 7.2707542051003795, "grad_norm": 0.020439432933926582, "learning_rate": 2.782280346288451e-07, "loss": 0.0094, "step": 40200 }, { "epoch": 7.275275818412009, "grad_norm": 1.3434791564941406, "learning_rate": 2.77785964265979e-07, "loss": 0.0097, "step": 40225 }, { "epoch": 7.279797431723639, "grad_norm": 0.302569180727005, "learning_rate": 2.773254743046601e-07, "loss": 0.0277, "step": 40250 }, { "epoch": 7.284319045035269, "grad_norm": 0.06300198286771774, "learning_rate": 2.7686498434334134e-07, "loss": 0.0133, "step": 40275 }, { "epoch": 7.288840658346898, "grad_norm": 22.47533416748047, "learning_rate": 2.7640449438202246e-07, "loss": 0.0173, "step": 40300 }, { "epoch": 7.293362271658528, "grad_norm": 0.6076884865760803, "learning_rate": 2.7594400442070357e-07, "loss": 0.003, "step": 40325 }, { "epoch": 7.297883884970157, "grad_norm": 0.18815143406391144, "learning_rate": 2.754835144593848e-07, "loss": 0.0089, "step": 40350 }, { "epoch": 7.302405498281787, "grad_norm": 0.004078809637576342, "learning_rate": 2.750230244980659e-07, "loss": 0.0007, "step": 40375 }, { "epoch": 7.306927111593416, "grad_norm": 0.06133987382054329, "learning_rate": 2.745625345367471e-07, "loss": 0.0008, "step": 40400 }, { "epoch": 7.311448724905046, "grad_norm": 0.43629419803619385, "learning_rate": 2.7410204457542824e-07, "loss": 0.0055, "step": 40425 }, { "epoch": 7.315970338216676, "grad_norm": 16.26517105102539, "learning_rate": 2.736415546141094e-07, "loss": 0.0016, "step": 40450 }, { "epoch": 7.320491951528306, "grad_norm": 0.05941370874643326, "learning_rate": 2.7318106465279057e-07, "loss": 0.0071, "step": 40475 }, { "epoch": 7.325013564839935, "grad_norm": 0.15563565492630005, "learning_rate": 2.727205746914717e-07, "loss": 0.0013, "step": 40500 }, { "epoch": 7.3295351781515645, "grad_norm": 0.5691679120063782, "learning_rate": 2.722600847301529e-07, "loss": 0.0005, "step": 40525 }, { "epoch": 7.334056791463194, "grad_norm": 0.004803875926882029, "learning_rate": 2.71799594768834e-07, "loss": 0.008, "step": 40550 }, { "epoch": 7.3385784047748235, "grad_norm": 0.08438849449157715, "learning_rate": 2.7133910480751523e-07, "loss": 0.0012, "step": 40575 }, { "epoch": 7.343100018086453, "grad_norm": 10.578065872192383, "learning_rate": 2.7087861484619635e-07, "loss": 0.0042, "step": 40600 }, { "epoch": 7.347621631398082, "grad_norm": 0.00701162638142705, "learning_rate": 2.7041812488487746e-07, "loss": 0.0098, "step": 40625 }, { "epoch": 7.352143244709713, "grad_norm": 0.029753483831882477, "learning_rate": 2.699576349235587e-07, "loss": 0.0032, "step": 40650 }, { "epoch": 7.356664858021342, "grad_norm": 0.18117420375347137, "learning_rate": 2.694971449622398e-07, "loss": 0.0027, "step": 40675 }, { "epoch": 7.361186471332972, "grad_norm": 0.9770309329032898, "learning_rate": 2.69036655000921e-07, "loss": 0.0229, "step": 40700 }, { "epoch": 7.365708084644601, "grad_norm": 0.0630781352519989, "learning_rate": 2.685761650396021e-07, "loss": 0.0112, "step": 40725 }, { "epoch": 7.370229697956231, "grad_norm": 0.0655864030122757, "learning_rate": 2.6811567507828324e-07, "loss": 0.0023, "step": 40750 }, { "epoch": 7.37475131126786, "grad_norm": 0.28010591864585876, "learning_rate": 2.6765518511696446e-07, "loss": 0.0016, "step": 40775 }, { "epoch": 7.37927292457949, "grad_norm": 0.04630829766392708, "learning_rate": 2.6719469515564557e-07, "loss": 0.0011, "step": 40800 }, { "epoch": 7.383794537891119, "grad_norm": 0.03749445080757141, "learning_rate": 2.667342051943268e-07, "loss": 0.0038, "step": 40825 }, { "epoch": 7.3883161512027495, "grad_norm": 0.22030295431613922, "learning_rate": 2.662737152330079e-07, "loss": 0.0049, "step": 40850 }, { "epoch": 7.392837764514379, "grad_norm": 0.07691410928964615, "learning_rate": 2.6581322527168907e-07, "loss": 0.0004, "step": 40875 }, { "epoch": 7.3973593778260085, "grad_norm": 0.027430010959506035, "learning_rate": 2.6535273531037023e-07, "loss": 0.0022, "step": 40900 }, { "epoch": 7.401880991137638, "grad_norm": 0.07501472532749176, "learning_rate": 2.6489224534905135e-07, "loss": 0.0013, "step": 40925 }, { "epoch": 7.4064026044492675, "grad_norm": 0.004892929922789335, "learning_rate": 2.6443175538773257e-07, "loss": 0.0044, "step": 40950 }, { "epoch": 7.410924217760897, "grad_norm": 0.11225175857543945, "learning_rate": 2.639712654264137e-07, "loss": 0.0145, "step": 40975 }, { "epoch": 7.415445831072526, "grad_norm": 0.01234606932848692, "learning_rate": 2.6351077546509485e-07, "loss": 0.0007, "step": 41000 }, { "epoch": 7.419967444384156, "grad_norm": 20.348722457885742, "learning_rate": 2.63050285503776e-07, "loss": 0.0173, "step": 41025 }, { "epoch": 7.424489057695786, "grad_norm": 0.01828809268772602, "learning_rate": 2.625897955424571e-07, "loss": 0.0154, "step": 41050 }, { "epoch": 7.429010671007416, "grad_norm": 0.1676705926656723, "learning_rate": 2.6212930558113835e-07, "loss": 0.0254, "step": 41075 }, { "epoch": 7.433532284319045, "grad_norm": 0.04343516007065773, "learning_rate": 2.6166881561981946e-07, "loss": 0.0271, "step": 41100 }, { "epoch": 7.438053897630675, "grad_norm": 0.06619753688573837, "learning_rate": 2.612083256585006e-07, "loss": 0.0081, "step": 41125 }, { "epoch": 7.442575510942304, "grad_norm": 0.2102379947900772, "learning_rate": 2.607478356971818e-07, "loss": 0.0045, "step": 41150 }, { "epoch": 7.447097124253934, "grad_norm": 0.020573345944285393, "learning_rate": 2.6028734573586296e-07, "loss": 0.0027, "step": 41175 }, { "epoch": 7.451618737565563, "grad_norm": 0.0597822479903698, "learning_rate": 2.598268557745441e-07, "loss": 0.0018, "step": 41200 }, { "epoch": 7.456140350877193, "grad_norm": 2.0168440341949463, "learning_rate": 2.5936636581322524e-07, "loss": 0.0125, "step": 41225 }, { "epoch": 7.460661964188823, "grad_norm": 0.42403435707092285, "learning_rate": 2.5890587585190646e-07, "loss": 0.0032, "step": 41250 }, { "epoch": 7.4651835775004525, "grad_norm": 0.03630862757563591, "learning_rate": 2.5844538589058757e-07, "loss": 0.0011, "step": 41275 }, { "epoch": 7.469705190812082, "grad_norm": 0.008765432052314281, "learning_rate": 2.5798489592926874e-07, "loss": 0.0095, "step": 41300 }, { "epoch": 7.474226804123711, "grad_norm": 0.019622275605797768, "learning_rate": 2.575244059679499e-07, "loss": 0.0237, "step": 41325 }, { "epoch": 7.478748417435341, "grad_norm": 0.14788568019866943, "learning_rate": 2.57063916006631e-07, "loss": 0.0071, "step": 41350 }, { "epoch": 7.48327003074697, "grad_norm": 9.201263427734375, "learning_rate": 2.5660342604531223e-07, "loss": 0.0059, "step": 41375 }, { "epoch": 7.4877916440586, "grad_norm": 9.586228370666504, "learning_rate": 2.5614293608399335e-07, "loss": 0.001, "step": 41400 }, { "epoch": 7.492313257370229, "grad_norm": 0.035080842673778534, "learning_rate": 2.556824461226745e-07, "loss": 0.008, "step": 41425 }, { "epoch": 7.496834870681859, "grad_norm": 0.09500127285718918, "learning_rate": 2.552219561613557e-07, "loss": 0.0203, "step": 41450 }, { "epoch": 7.501356483993489, "grad_norm": 0.030213013291358948, "learning_rate": 2.547614662000368e-07, "loss": 0.0226, "step": 41475 }, { "epoch": 7.505878097305119, "grad_norm": 0.16554242372512817, "learning_rate": 2.54300976238718e-07, "loss": 0.0396, "step": 41500 }, { "epoch": 7.510399710616748, "grad_norm": 0.049443017691373825, "learning_rate": 2.538404862773991e-07, "loss": 0.0085, "step": 41525 }, { "epoch": 7.514921323928378, "grad_norm": 0.045605577528476715, "learning_rate": 2.533799963160803e-07, "loss": 0.0045, "step": 41550 }, { "epoch": 7.519442937240007, "grad_norm": 0.023337364196777344, "learning_rate": 2.5291950635476146e-07, "loss": 0.0102, "step": 41575 }, { "epoch": 7.523964550551637, "grad_norm": 0.78485107421875, "learning_rate": 2.524590163934426e-07, "loss": 0.0024, "step": 41600 }, { "epoch": 7.528486163863266, "grad_norm": 0.014340350404381752, "learning_rate": 2.519985264321238e-07, "loss": 0.0019, "step": 41625 }, { "epoch": 7.5330077771748964, "grad_norm": 0.004216828849166632, "learning_rate": 2.515380364708049e-07, "loss": 0.003, "step": 41650 }, { "epoch": 7.537529390486526, "grad_norm": 14.32552719116211, "learning_rate": 2.5107754650948607e-07, "loss": 0.0066, "step": 41675 }, { "epoch": 7.542051003798155, "grad_norm": 0.160021111369133, "learning_rate": 2.5061705654816724e-07, "loss": 0.0053, "step": 41700 }, { "epoch": 7.546572617109785, "grad_norm": 0.020444253459572792, "learning_rate": 2.501565665868484e-07, "loss": 0.0011, "step": 41725 }, { "epoch": 7.551094230421414, "grad_norm": 1.1539157629013062, "learning_rate": 2.4969607662552957e-07, "loss": 0.0011, "step": 41750 }, { "epoch": 7.555615843733044, "grad_norm": 0.10138887166976929, "learning_rate": 2.4923558666421074e-07, "loss": 0.001, "step": 41775 }, { "epoch": 7.560137457044673, "grad_norm": 0.034172624349594116, "learning_rate": 2.4877509670289185e-07, "loss": 0.0013, "step": 41800 }, { "epoch": 7.564659070356303, "grad_norm": 0.02480352483689785, "learning_rate": 2.48314606741573e-07, "loss": 0.01, "step": 41825 }, { "epoch": 7.569180683667932, "grad_norm": 0.03536754474043846, "learning_rate": 2.478541167802542e-07, "loss": 0.0208, "step": 41850 }, { "epoch": 7.573702296979563, "grad_norm": 0.20276236534118652, "learning_rate": 2.4739362681893535e-07, "loss": 0.0131, "step": 41875 }, { "epoch": 7.578223910291192, "grad_norm": 0.03076460212469101, "learning_rate": 2.469331368576165e-07, "loss": 0.0082, "step": 41900 }, { "epoch": 7.582745523602822, "grad_norm": 0.057225510478019714, "learning_rate": 2.464726468962977e-07, "loss": 0.0082, "step": 41925 }, { "epoch": 7.587267136914451, "grad_norm": 0.07822202891111374, "learning_rate": 2.460121569349788e-07, "loss": 0.0038, "step": 41950 }, { "epoch": 7.591788750226081, "grad_norm": 0.01712547056376934, "learning_rate": 2.4555166697365996e-07, "loss": 0.0063, "step": 41975 }, { "epoch": 7.59631036353771, "grad_norm": 0.06029786914587021, "learning_rate": 2.450911770123411e-07, "loss": 0.0069, "step": 42000 }, { "epoch": 7.6008319768493395, "grad_norm": 0.025808099657297134, "learning_rate": 2.446306870510223e-07, "loss": 0.0042, "step": 42025 }, { "epoch": 7.60535359016097, "grad_norm": 0.01600913517177105, "learning_rate": 2.4417019708970346e-07, "loss": 0.0007, "step": 42050 }, { "epoch": 7.609875203472599, "grad_norm": 7.5506815910339355, "learning_rate": 2.4370970712838457e-07, "loss": 0.0054, "step": 42075 }, { "epoch": 7.614396816784229, "grad_norm": 0.04834285005927086, "learning_rate": 2.4324921716706574e-07, "loss": 0.0015, "step": 42100 }, { "epoch": 7.618918430095858, "grad_norm": 0.04366715997457504, "learning_rate": 2.427887272057469e-07, "loss": 0.0037, "step": 42125 }, { "epoch": 7.623440043407488, "grad_norm": 0.01586577482521534, "learning_rate": 2.4232823724442807e-07, "loss": 0.017, "step": 42150 }, { "epoch": 7.627961656719117, "grad_norm": 21.55689239501953, "learning_rate": 2.4186774728310924e-07, "loss": 0.0098, "step": 42175 }, { "epoch": 7.632483270030747, "grad_norm": 0.019474711269140244, "learning_rate": 2.4140725732179035e-07, "loss": 0.008, "step": 42200 }, { "epoch": 7.637004883342376, "grad_norm": 0.20361949503421783, "learning_rate": 2.409467673604715e-07, "loss": 0.0021, "step": 42225 }, { "epoch": 7.641526496654006, "grad_norm": 0.061369773000478745, "learning_rate": 2.404862773991527e-07, "loss": 0.0167, "step": 42250 }, { "epoch": 7.646048109965636, "grad_norm": 0.03836612030863762, "learning_rate": 2.400442070362866e-07, "loss": 0.0152, "step": 42275 }, { "epoch": 7.650569723277266, "grad_norm": 0.6904728412628174, "learning_rate": 2.395837170749677e-07, "loss": 0.0254, "step": 42300 }, { "epoch": 7.655091336588895, "grad_norm": 9.530180931091309, "learning_rate": 2.391232271136489e-07, "loss": 0.0247, "step": 42325 }, { "epoch": 7.6596129499005245, "grad_norm": 0.010562293231487274, "learning_rate": 2.3866273715233006e-07, "loss": 0.0021, "step": 42350 }, { "epoch": 7.664134563212154, "grad_norm": 0.032406773418188095, "learning_rate": 2.3820224719101122e-07, "loss": 0.0026, "step": 42375 }, { "epoch": 7.6686561765237835, "grad_norm": 7.6475958824157715, "learning_rate": 2.377417572296924e-07, "loss": 0.0095, "step": 42400 }, { "epoch": 7.673177789835413, "grad_norm": 0.03494368493556976, "learning_rate": 2.3728126726837353e-07, "loss": 0.0029, "step": 42425 }, { "epoch": 7.677699403147043, "grad_norm": 14.583782196044922, "learning_rate": 2.368207773070547e-07, "loss": 0.0128, "step": 42450 }, { "epoch": 7.682221016458673, "grad_norm": 0.12016825377941132, "learning_rate": 2.3636028734573584e-07, "loss": 0.0124, "step": 42475 }, { "epoch": 7.686742629770302, "grad_norm": 0.11401164531707764, "learning_rate": 2.35899797384417e-07, "loss": 0.0006, "step": 42500 }, { "epoch": 7.691264243081932, "grad_norm": 0.030007168650627136, "learning_rate": 2.3543930742309817e-07, "loss": 0.0013, "step": 42525 }, { "epoch": 7.695785856393561, "grad_norm": 0.01560743898153305, "learning_rate": 2.3497881746177933e-07, "loss": 0.0132, "step": 42550 }, { "epoch": 7.700307469705191, "grad_norm": 0.03253242000937462, "learning_rate": 2.3451832750046047e-07, "loss": 0.0104, "step": 42575 }, { "epoch": 7.70482908301682, "grad_norm": 0.004338828381150961, "learning_rate": 2.3405783753914164e-07, "loss": 0.0108, "step": 42600 }, { "epoch": 7.70935069632845, "grad_norm": 0.06748280674219131, "learning_rate": 2.3359734757782278e-07, "loss": 0.0169, "step": 42625 }, { "epoch": 7.713872309640079, "grad_norm": 0.0437793992459774, "learning_rate": 2.3313685761650395e-07, "loss": 0.0142, "step": 42650 }, { "epoch": 7.718393922951709, "grad_norm": 0.06706016510725021, "learning_rate": 2.326763676551851e-07, "loss": 0.0117, "step": 42675 }, { "epoch": 7.722915536263339, "grad_norm": 25.65509796142578, "learning_rate": 2.3221587769386628e-07, "loss": 0.0218, "step": 42700 }, { "epoch": 7.7274371495749685, "grad_norm": 0.0024367687292397022, "learning_rate": 2.3175538773254742e-07, "loss": 0.0028, "step": 42725 }, { "epoch": 7.731958762886598, "grad_norm": 0.3435537815093994, "learning_rate": 2.3129489777122856e-07, "loss": 0.0041, "step": 42750 }, { "epoch": 7.7364803761982275, "grad_norm": 0.23171645402908325, "learning_rate": 2.3083440780990972e-07, "loss": 0.0054, "step": 42775 }, { "epoch": 7.741001989509857, "grad_norm": 0.06181083992123604, "learning_rate": 2.303739178485909e-07, "loss": 0.0006, "step": 42800 }, { "epoch": 7.745523602821486, "grad_norm": 0.10873863101005554, "learning_rate": 2.2991342788727206e-07, "loss": 0.0035, "step": 42825 }, { "epoch": 7.750045216133116, "grad_norm": 0.0161910280585289, "learning_rate": 2.2945293792595322e-07, "loss": 0.0048, "step": 42850 }, { "epoch": 7.754566829444746, "grad_norm": 0.07214026153087616, "learning_rate": 2.2899244796463434e-07, "loss": 0.002, "step": 42875 }, { "epoch": 7.759088442756376, "grad_norm": 0.01364390179514885, "learning_rate": 2.285319580033155e-07, "loss": 0.0007, "step": 42900 }, { "epoch": 7.763610056068005, "grad_norm": 0.023643679916858673, "learning_rate": 2.2807146804199667e-07, "loss": 0.0022, "step": 42925 }, { "epoch": 7.768131669379635, "grad_norm": 0.021116966381669044, "learning_rate": 2.2761097808067784e-07, "loss": 0.0101, "step": 42950 }, { "epoch": 7.772653282691264, "grad_norm": 0.10896778851747513, "learning_rate": 2.27150488119359e-07, "loss": 0.0031, "step": 42975 }, { "epoch": 7.777174896002894, "grad_norm": 0.010054018348455429, "learning_rate": 2.2668999815804017e-07, "loss": 0.0057, "step": 43000 }, { "epoch": 7.781696509314523, "grad_norm": 0.06566222012042999, "learning_rate": 2.2622950819672128e-07, "loss": 0.007, "step": 43025 }, { "epoch": 7.786218122626153, "grad_norm": 0.05465374514460564, "learning_rate": 2.2576901823540245e-07, "loss": 0.0005, "step": 43050 }, { "epoch": 7.790739735937782, "grad_norm": 0.6215785145759583, "learning_rate": 2.2530852827408361e-07, "loss": 0.0265, "step": 43075 }, { "epoch": 7.7952613492494125, "grad_norm": 3.957580089569092, "learning_rate": 2.2484803831276478e-07, "loss": 0.0708, "step": 43100 }, { "epoch": 7.799782962561042, "grad_norm": 0.034929584711790085, "learning_rate": 2.2438754835144595e-07, "loss": 0.0043, "step": 43125 }, { "epoch": 7.8043045758726715, "grad_norm": 0.06470832228660583, "learning_rate": 2.2392705839012706e-07, "loss": 0.0059, "step": 43150 }, { "epoch": 7.808826189184301, "grad_norm": 3.028578758239746, "learning_rate": 2.2346656842880823e-07, "loss": 0.0054, "step": 43175 }, { "epoch": 7.81334780249593, "grad_norm": 0.060492563992738724, "learning_rate": 2.230060784674894e-07, "loss": 0.0099, "step": 43200 }, { "epoch": 7.81786941580756, "grad_norm": 1.7288694381713867, "learning_rate": 2.2254558850617056e-07, "loss": 0.0016, "step": 43225 }, { "epoch": 7.822391029119189, "grad_norm": 0.06679031252861023, "learning_rate": 2.2208509854485172e-07, "loss": 0.0042, "step": 43250 }, { "epoch": 7.82691264243082, "grad_norm": 17.381139755249023, "learning_rate": 2.216246085835329e-07, "loss": 0.0047, "step": 43275 }, { "epoch": 7.831434255742449, "grad_norm": 0.03778848424553871, "learning_rate": 2.21164118622214e-07, "loss": 0.0077, "step": 43300 }, { "epoch": 7.835955869054079, "grad_norm": 0.009451249614357948, "learning_rate": 2.2070362866089517e-07, "loss": 0.0021, "step": 43325 }, { "epoch": 7.840477482365708, "grad_norm": 25.109268188476562, "learning_rate": 2.2024313869957634e-07, "loss": 0.0042, "step": 43350 }, { "epoch": 7.844999095677338, "grad_norm": 0.015822693705558777, "learning_rate": 2.197826487382575e-07, "loss": 0.0063, "step": 43375 }, { "epoch": 7.849520708988967, "grad_norm": 0.016655854880809784, "learning_rate": 2.1932215877693867e-07, "loss": 0.0081, "step": 43400 }, { "epoch": 7.854042322300597, "grad_norm": 0.25390344858169556, "learning_rate": 2.188616688156198e-07, "loss": 0.0037, "step": 43425 }, { "epoch": 7.858563935612226, "grad_norm": 0.09659765660762787, "learning_rate": 2.1840117885430095e-07, "loss": 0.0146, "step": 43450 }, { "epoch": 7.863085548923856, "grad_norm": 0.028894655406475067, "learning_rate": 2.1794068889298211e-07, "loss": 0.0147, "step": 43475 }, { "epoch": 7.867607162235486, "grad_norm": 0.015346791595220566, "learning_rate": 2.1748019893166328e-07, "loss": 0.0241, "step": 43500 }, { "epoch": 7.872128775547115, "grad_norm": 0.43620771169662476, "learning_rate": 2.1701970897034445e-07, "loss": 0.0101, "step": 43525 }, { "epoch": 7.876650388858745, "grad_norm": 0.023062733933329582, "learning_rate": 2.1655921900902561e-07, "loss": 0.0048, "step": 43550 }, { "epoch": 7.881172002170374, "grad_norm": 0.011817359365522861, "learning_rate": 2.1609872904770675e-07, "loss": 0.0033, "step": 43575 }, { "epoch": 7.885693615482004, "grad_norm": 0.3834367096424103, "learning_rate": 2.156382390863879e-07, "loss": 0.0012, "step": 43600 }, { "epoch": 7.890215228793633, "grad_norm": 4.274073600769043, "learning_rate": 2.1517774912506906e-07, "loss": 0.0077, "step": 43625 }, { "epoch": 7.894736842105263, "grad_norm": 0.024657847359776497, "learning_rate": 2.1471725916375023e-07, "loss": 0.0069, "step": 43650 }, { "epoch": 7.899258455416893, "grad_norm": 7.262208461761475, "learning_rate": 2.142567692024314e-07, "loss": 0.0073, "step": 43675 }, { "epoch": 7.903780068728523, "grad_norm": 0.035356614738702774, "learning_rate": 2.1379627924111253e-07, "loss": 0.0162, "step": 43700 }, { "epoch": 7.908301682040152, "grad_norm": 0.07969994097948074, "learning_rate": 2.133357892797937e-07, "loss": 0.0014, "step": 43725 }, { "epoch": 7.912823295351782, "grad_norm": 0.013595969416201115, "learning_rate": 2.1287529931847484e-07, "loss": 0.0012, "step": 43750 }, { "epoch": 7.917344908663411, "grad_norm": 0.02247740514576435, "learning_rate": 2.12414809357156e-07, "loss": 0.0012, "step": 43775 }, { "epoch": 7.921866521975041, "grad_norm": 0.07739664614200592, "learning_rate": 2.1195431939583717e-07, "loss": 0.0047, "step": 43800 }, { "epoch": 7.92638813528667, "grad_norm": 0.32251256704330444, "learning_rate": 2.114938294345183e-07, "loss": 0.0056, "step": 43825 }, { "epoch": 7.9309097485983, "grad_norm": 0.007755752187222242, "learning_rate": 2.1103333947319948e-07, "loss": 0.0188, "step": 43850 }, { "epoch": 7.935431361909929, "grad_norm": 8.146416664123535, "learning_rate": 2.1057284951188062e-07, "loss": 0.014, "step": 43875 }, { "epoch": 7.939952975221559, "grad_norm": 22.875139236450195, "learning_rate": 2.1011235955056178e-07, "loss": 0.0218, "step": 43900 }, { "epoch": 7.944474588533189, "grad_norm": 0.0895143672823906, "learning_rate": 2.0965186958924295e-07, "loss": 0.0331, "step": 43925 }, { "epoch": 7.948996201844818, "grad_norm": 0.019100898876786232, "learning_rate": 2.0919137962792411e-07, "loss": 0.0013, "step": 43950 }, { "epoch": 7.953517815156448, "grad_norm": 0.0297053474932909, "learning_rate": 2.0873088966660525e-07, "loss": 0.0007, "step": 43975 }, { "epoch": 7.958039428468077, "grad_norm": 0.4429755210876465, "learning_rate": 2.0827039970528642e-07, "loss": 0.006, "step": 44000 }, { "epoch": 7.962561041779707, "grad_norm": 0.021714074537158012, "learning_rate": 2.0780990974396756e-07, "loss": 0.0126, "step": 44025 }, { "epoch": 7.967082655091336, "grad_norm": 0.048985399305820465, "learning_rate": 2.0734941978264873e-07, "loss": 0.0055, "step": 44050 }, { "epoch": 7.971604268402967, "grad_norm": 0.22394807636737823, "learning_rate": 2.068889298213299e-07, "loss": 0.0006, "step": 44075 }, { "epoch": 7.976125881714596, "grad_norm": 0.030540427193045616, "learning_rate": 2.0642843986001103e-07, "loss": 0.0146, "step": 44100 }, { "epoch": 7.980647495026226, "grad_norm": 1.285352110862732, "learning_rate": 2.059679498986922e-07, "loss": 0.0027, "step": 44125 }, { "epoch": 7.985169108337855, "grad_norm": 0.025696493685245514, "learning_rate": 2.0550745993737336e-07, "loss": 0.0072, "step": 44150 }, { "epoch": 7.989690721649485, "grad_norm": 0.1506178379058838, "learning_rate": 2.050469699760545e-07, "loss": 0.0024, "step": 44175 }, { "epoch": 7.994212334961114, "grad_norm": 0.03548096492886543, "learning_rate": 2.0458648001473567e-07, "loss": 0.0208, "step": 44200 }, { "epoch": 7.9987339482727435, "grad_norm": 1.0646295547485352, "learning_rate": 2.0412599005341684e-07, "loss": 0.043, "step": 44225 }, { "epoch": 8.0, "eval_loss": 0.36690396070480347, "eval_runtime": 8374.4331, "eval_samples_per_second": 1.134, "eval_steps_per_second": 0.142, "eval_wer": 0.10485265855221013, "step": 44232 }, { "epoch": 8.003255561584373, "grad_norm": 0.08020028471946716, "learning_rate": 2.0366550009209798e-07, "loss": 0.02, "step": 44250 }, { "epoch": 8.007777174896002, "grad_norm": 0.013830600306391716, "learning_rate": 2.0320501013077914e-07, "loss": 0.0032, "step": 44275 }, { "epoch": 8.012298788207632, "grad_norm": 0.05772462114691734, "learning_rate": 2.027445201694603e-07, "loss": 0.0008, "step": 44300 }, { "epoch": 8.016820401519261, "grad_norm": 0.021993961185216904, "learning_rate": 2.0228403020814145e-07, "loss": 0.0041, "step": 44325 }, { "epoch": 8.021342014830891, "grad_norm": 5.4001383781433105, "learning_rate": 2.0182354024682262e-07, "loss": 0.0048, "step": 44350 }, { "epoch": 8.02586362814252, "grad_norm": 0.08318978548049927, "learning_rate": 2.0136305028550375e-07, "loss": 0.0045, "step": 44375 }, { "epoch": 8.030385241454152, "grad_norm": 0.09250658750534058, "learning_rate": 2.0090256032418492e-07, "loss": 0.0032, "step": 44400 }, { "epoch": 8.034906854765781, "grad_norm": 0.23829813301563263, "learning_rate": 2.004420703628661e-07, "loss": 0.0019, "step": 44425 }, { "epoch": 8.03942846807741, "grad_norm": 0.016779888421297073, "learning_rate": 1.9998158040154725e-07, "loss": 0.0033, "step": 44450 }, { "epoch": 8.04395008138904, "grad_norm": 0.08098112046718597, "learning_rate": 1.995210904402284e-07, "loss": 0.0017, "step": 44475 }, { "epoch": 8.04847169470067, "grad_norm": 0.042912207543849945, "learning_rate": 1.9906060047890953e-07, "loss": 0.0038, "step": 44500 }, { "epoch": 8.052993308012299, "grad_norm": 0.052388470619916916, "learning_rate": 1.986001105175907e-07, "loss": 0.0004, "step": 44525 }, { "epoch": 8.057514921323929, "grad_norm": 3.6011130809783936, "learning_rate": 1.9813962055627187e-07, "loss": 0.0064, "step": 44550 }, { "epoch": 8.062036534635558, "grad_norm": 0.05901797115802765, "learning_rate": 1.9767913059495303e-07, "loss": 0.0098, "step": 44575 }, { "epoch": 8.066558147947188, "grad_norm": 0.08754336833953857, "learning_rate": 1.9721864063363417e-07, "loss": 0.0342, "step": 44600 }, { "epoch": 8.071079761258817, "grad_norm": 33.210994720458984, "learning_rate": 1.9675815067231534e-07, "loss": 0.0427, "step": 44625 }, { "epoch": 8.075601374570446, "grad_norm": 0.3902020752429962, "learning_rate": 1.9629766071099648e-07, "loss": 0.0331, "step": 44650 }, { "epoch": 8.080122987882076, "grad_norm": 0.5214375853538513, "learning_rate": 1.9583717074967764e-07, "loss": 0.0022, "step": 44675 }, { "epoch": 8.084644601193705, "grad_norm": 0.03465314209461212, "learning_rate": 1.953766807883588e-07, "loss": 0.0085, "step": 44700 }, { "epoch": 8.089166214505335, "grad_norm": 5.756930828094482, "learning_rate": 1.9491619082703998e-07, "loss": 0.0024, "step": 44725 }, { "epoch": 8.093687827816964, "grad_norm": 0.10550093650817871, "learning_rate": 1.9445570086572112e-07, "loss": 0.0109, "step": 44750 }, { "epoch": 8.098209441128594, "grad_norm": 2.2892873287200928, "learning_rate": 1.9399521090440226e-07, "loss": 0.0022, "step": 44775 }, { "epoch": 8.102731054440225, "grad_norm": 0.008341608569025993, "learning_rate": 1.9353472094308342e-07, "loss": 0.0004, "step": 44800 }, { "epoch": 8.107252667751855, "grad_norm": 4.5169782638549805, "learning_rate": 1.930742309817646e-07, "loss": 0.0021, "step": 44825 }, { "epoch": 8.111774281063484, "grad_norm": 0.0315685048699379, "learning_rate": 1.9261374102044575e-07, "loss": 0.007, "step": 44850 }, { "epoch": 8.116295894375114, "grad_norm": 0.12863166630268097, "learning_rate": 1.9215325105912692e-07, "loss": 0.0056, "step": 44875 }, { "epoch": 8.120817507686743, "grad_norm": 0.6251371502876282, "learning_rate": 1.9169276109780806e-07, "loss": 0.0006, "step": 44900 }, { "epoch": 8.125339120998373, "grad_norm": 0.8310458064079285, "learning_rate": 1.912322711364892e-07, "loss": 0.0084, "step": 44925 }, { "epoch": 8.129860734310002, "grad_norm": 0.02388242445886135, "learning_rate": 1.9077178117517037e-07, "loss": 0.0113, "step": 44950 }, { "epoch": 8.134382347621631, "grad_norm": 0.00754801370203495, "learning_rate": 1.9031129121385153e-07, "loss": 0.0031, "step": 44975 }, { "epoch": 8.138903960933261, "grad_norm": 0.41686248779296875, "learning_rate": 1.898508012525327e-07, "loss": 0.0042, "step": 45000 }, { "epoch": 8.14342557424489, "grad_norm": 3.3315136432647705, "learning_rate": 1.8939031129121387e-07, "loss": 0.0198, "step": 45025 }, { "epoch": 8.14794718755652, "grad_norm": 0.44836312532424927, "learning_rate": 1.8892982132989498e-07, "loss": 0.0191, "step": 45050 }, { "epoch": 8.15246880086815, "grad_norm": 0.06881590187549591, "learning_rate": 1.8846933136857614e-07, "loss": 0.0016, "step": 45075 }, { "epoch": 8.156990414179779, "grad_norm": 0.020737141370773315, "learning_rate": 1.880088414072573e-07, "loss": 0.0054, "step": 45100 }, { "epoch": 8.161512027491408, "grad_norm": 0.033259179443120956, "learning_rate": 1.8754835144593848e-07, "loss": 0.0007, "step": 45125 }, { "epoch": 8.166033640803038, "grad_norm": 14.501016616821289, "learning_rate": 1.8708786148461964e-07, "loss": 0.0066, "step": 45150 }, { "epoch": 8.170555254114667, "grad_norm": 0.03625442460179329, "learning_rate": 1.866273715233008e-07, "loss": 0.0036, "step": 45175 }, { "epoch": 8.175076867426299, "grad_norm": 0.027331039309501648, "learning_rate": 1.8616688156198192e-07, "loss": 0.0058, "step": 45200 }, { "epoch": 8.179598480737928, "grad_norm": 1.0191278457641602, "learning_rate": 1.857063916006631e-07, "loss": 0.0185, "step": 45225 }, { "epoch": 8.184120094049558, "grad_norm": 0.65291428565979, "learning_rate": 1.8524590163934426e-07, "loss": 0.0031, "step": 45250 }, { "epoch": 8.188641707361187, "grad_norm": 0.14517787098884583, "learning_rate": 1.8478541167802542e-07, "loss": 0.0004, "step": 45275 }, { "epoch": 8.193163320672816, "grad_norm": 0.016481177881360054, "learning_rate": 1.8434334131515933e-07, "loss": 0.0271, "step": 45300 }, { "epoch": 8.197684933984446, "grad_norm": 2.0038397312164307, "learning_rate": 1.838828513538405e-07, "loss": 0.0157, "step": 45325 }, { "epoch": 8.202206547296075, "grad_norm": 0.07896800339221954, "learning_rate": 1.8342236139252163e-07, "loss": 0.0092, "step": 45350 }, { "epoch": 8.206728160607705, "grad_norm": 0.04571648687124252, "learning_rate": 1.829618714312028e-07, "loss": 0.0112, "step": 45375 }, { "epoch": 8.211249773919334, "grad_norm": 0.03423347696661949, "learning_rate": 1.8250138146988394e-07, "loss": 0.013, "step": 45400 }, { "epoch": 8.215771387230964, "grad_norm": 34.946712493896484, "learning_rate": 1.820408915085651e-07, "loss": 0.0322, "step": 45425 }, { "epoch": 8.220293000542593, "grad_norm": 0.0242567490786314, "learning_rate": 1.8158040154724627e-07, "loss": 0.0108, "step": 45450 }, { "epoch": 8.224814613854223, "grad_norm": 0.04625415802001953, "learning_rate": 1.8111991158592744e-07, "loss": 0.0005, "step": 45475 }, { "epoch": 8.229336227165852, "grad_norm": 0.16521072387695312, "learning_rate": 1.8065942162460858e-07, "loss": 0.0038, "step": 45500 }, { "epoch": 8.233857840477482, "grad_norm": 0.01613481342792511, "learning_rate": 1.8019893166328972e-07, "loss": 0.0036, "step": 45525 }, { "epoch": 8.238379453789111, "grad_norm": 8.090846061706543, "learning_rate": 1.7973844170197088e-07, "loss": 0.011, "step": 45550 }, { "epoch": 8.24290106710074, "grad_norm": 0.062387678772211075, "learning_rate": 1.7927795174065205e-07, "loss": 0.0022, "step": 45575 }, { "epoch": 8.24742268041237, "grad_norm": 0.11970767378807068, "learning_rate": 1.7881746177933321e-07, "loss": 0.0078, "step": 45600 }, { "epoch": 8.251944293724002, "grad_norm": 0.30487823486328125, "learning_rate": 1.7835697181801435e-07, "loss": 0.0005, "step": 45625 }, { "epoch": 8.256465907035631, "grad_norm": 0.12851175665855408, "learning_rate": 1.7789648185669552e-07, "loss": 0.0041, "step": 45650 }, { "epoch": 8.26098752034726, "grad_norm": 0.7285523414611816, "learning_rate": 1.7743599189537666e-07, "loss": 0.0012, "step": 45675 }, { "epoch": 8.26550913365889, "grad_norm": 0.006563634146004915, "learning_rate": 1.7697550193405783e-07, "loss": 0.0006, "step": 45700 }, { "epoch": 8.27003074697052, "grad_norm": 0.08779245615005493, "learning_rate": 1.76515011972739e-07, "loss": 0.0176, "step": 45725 }, { "epoch": 8.274552360282149, "grad_norm": 1.7844347953796387, "learning_rate": 1.7605452201142016e-07, "loss": 0.0035, "step": 45750 }, { "epoch": 8.279073973593778, "grad_norm": 0.026561090722680092, "learning_rate": 1.755940320501013e-07, "loss": 0.0154, "step": 45775 }, { "epoch": 8.283595586905408, "grad_norm": 0.08869388699531555, "learning_rate": 1.7513354208878244e-07, "loss": 0.0189, "step": 45800 }, { "epoch": 8.288117200217037, "grad_norm": 0.09968849271535873, "learning_rate": 1.746730521274636e-07, "loss": 0.0057, "step": 45825 }, { "epoch": 8.292638813528667, "grad_norm": 0.7243178486824036, "learning_rate": 1.7421256216614477e-07, "loss": 0.0214, "step": 45850 }, { "epoch": 8.297160426840296, "grad_norm": 0.019970480352640152, "learning_rate": 1.7375207220482594e-07, "loss": 0.0058, "step": 45875 }, { "epoch": 8.301682040151926, "grad_norm": 0.08685352653265, "learning_rate": 1.732915822435071e-07, "loss": 0.0053, "step": 45900 }, { "epoch": 8.306203653463555, "grad_norm": 0.02851727232336998, "learning_rate": 1.7283109228218822e-07, "loss": 0.0085, "step": 45925 }, { "epoch": 8.310725266775185, "grad_norm": 0.10517676919698715, "learning_rate": 1.7237060232086938e-07, "loss": 0.0043, "step": 45950 }, { "epoch": 8.315246880086814, "grad_norm": 0.03421909734606743, "learning_rate": 1.7191011235955055e-07, "loss": 0.0009, "step": 45975 }, { "epoch": 8.319768493398444, "grad_norm": 8.051795959472656, "learning_rate": 1.7144962239823171e-07, "loss": 0.0012, "step": 46000 }, { "epoch": 8.324290106710075, "grad_norm": 0.04528782516717911, "learning_rate": 1.7098913243691288e-07, "loss": 0.0049, "step": 46025 }, { "epoch": 8.328811720021704, "grad_norm": 0.024573039263486862, "learning_rate": 1.7052864247559405e-07, "loss": 0.0055, "step": 46050 }, { "epoch": 8.333333333333334, "grad_norm": 13.220183372497559, "learning_rate": 1.7006815251427516e-07, "loss": 0.0102, "step": 46075 }, { "epoch": 8.337854946644963, "grad_norm": 3.289721727371216, "learning_rate": 1.6960766255295633e-07, "loss": 0.0292, "step": 46100 }, { "epoch": 8.342376559956593, "grad_norm": 10.872947692871094, "learning_rate": 1.691471725916375e-07, "loss": 0.0028, "step": 46125 }, { "epoch": 8.346898173268222, "grad_norm": 0.3125823438167572, "learning_rate": 1.6868668263031866e-07, "loss": 0.0007, "step": 46150 }, { "epoch": 8.351419786579852, "grad_norm": 42.284271240234375, "learning_rate": 1.6822619266899983e-07, "loss": 0.0214, "step": 46175 }, { "epoch": 8.355941399891481, "grad_norm": 40.723052978515625, "learning_rate": 1.6776570270768097e-07, "loss": 0.0119, "step": 46200 }, { "epoch": 8.36046301320311, "grad_norm": 15.188941955566406, "learning_rate": 1.673052127463621e-07, "loss": 0.0174, "step": 46225 }, { "epoch": 8.36498462651474, "grad_norm": 0.19278444349765778, "learning_rate": 1.6684472278504327e-07, "loss": 0.0107, "step": 46250 }, { "epoch": 8.36950623982637, "grad_norm": 0.961004912853241, "learning_rate": 1.6638423282372444e-07, "loss": 0.0023, "step": 46275 }, { "epoch": 8.374027853138, "grad_norm": 0.09470321238040924, "learning_rate": 1.659237428624056e-07, "loss": 0.0023, "step": 46300 }, { "epoch": 8.378549466449629, "grad_norm": 0.10626068711280823, "learning_rate": 1.6546325290108677e-07, "loss": 0.0022, "step": 46325 }, { "epoch": 8.383071079761258, "grad_norm": 0.07527956366539001, "learning_rate": 1.6500276293976788e-07, "loss": 0.0021, "step": 46350 }, { "epoch": 8.387592693072888, "grad_norm": 0.07098814100027084, "learning_rate": 1.6454227297844905e-07, "loss": 0.0032, "step": 46375 }, { "epoch": 8.392114306384517, "grad_norm": 0.06297345459461212, "learning_rate": 1.6408178301713022e-07, "loss": 0.0009, "step": 46400 }, { "epoch": 8.396635919696148, "grad_norm": 0.6129382252693176, "learning_rate": 1.6362129305581138e-07, "loss": 0.0023, "step": 46425 }, { "epoch": 8.401157533007778, "grad_norm": 0.07391338050365448, "learning_rate": 1.6316080309449255e-07, "loss": 0.0192, "step": 46450 }, { "epoch": 8.405679146319407, "grad_norm": 0.009829241782426834, "learning_rate": 1.627003131331737e-07, "loss": 0.0009, "step": 46475 }, { "epoch": 8.410200759631037, "grad_norm": 0.02008306048810482, "learning_rate": 1.6223982317185483e-07, "loss": 0.0012, "step": 46500 }, { "epoch": 8.414722372942666, "grad_norm": 0.032892853021621704, "learning_rate": 1.61779333210536e-07, "loss": 0.0117, "step": 46525 }, { "epoch": 8.419243986254296, "grad_norm": 0.3454951047897339, "learning_rate": 1.6131884324921716e-07, "loss": 0.0041, "step": 46550 }, { "epoch": 8.423765599565925, "grad_norm": 41.06159973144531, "learning_rate": 1.6085835328789833e-07, "loss": 0.0189, "step": 46575 }, { "epoch": 8.428287212877555, "grad_norm": 11.987919807434082, "learning_rate": 1.603978633265795e-07, "loss": 0.01, "step": 46600 }, { "epoch": 8.432808826189184, "grad_norm": 0.24590350687503815, "learning_rate": 1.5993737336526063e-07, "loss": 0.0019, "step": 46625 }, { "epoch": 8.437330439500814, "grad_norm": 0.11725660413503647, "learning_rate": 1.5947688340394177e-07, "loss": 0.0196, "step": 46650 }, { "epoch": 8.441852052812443, "grad_norm": 0.04116886481642723, "learning_rate": 1.5901639344262294e-07, "loss": 0.0011, "step": 46675 }, { "epoch": 8.446373666124073, "grad_norm": 0.03313690423965454, "learning_rate": 1.585559034813041e-07, "loss": 0.0051, "step": 46700 }, { "epoch": 8.450895279435702, "grad_norm": 0.1109694391489029, "learning_rate": 1.5809541351998527e-07, "loss": 0.0042, "step": 46725 }, { "epoch": 8.455416892747332, "grad_norm": 0.2245902568101883, "learning_rate": 1.576349235586664e-07, "loss": 0.0059, "step": 46750 }, { "epoch": 8.459938506058961, "grad_norm": 0.014154641889035702, "learning_rate": 1.5717443359734758e-07, "loss": 0.0023, "step": 46775 }, { "epoch": 8.46446011937059, "grad_norm": 0.16116198897361755, "learning_rate": 1.5671394363602872e-07, "loss": 0.0158, "step": 46800 }, { "epoch": 8.468981732682222, "grad_norm": 0.00956847332417965, "learning_rate": 1.5625345367470988e-07, "loss": 0.0024, "step": 46825 }, { "epoch": 8.473503345993851, "grad_norm": 0.007467388175427914, "learning_rate": 1.5579296371339105e-07, "loss": 0.0074, "step": 46850 }, { "epoch": 8.47802495930548, "grad_norm": 0.028611036017537117, "learning_rate": 1.553324737520722e-07, "loss": 0.0052, "step": 46875 }, { "epoch": 8.48254657261711, "grad_norm": 0.23077279329299927, "learning_rate": 1.5487198379075336e-07, "loss": 0.0137, "step": 46900 }, { "epoch": 8.48706818592874, "grad_norm": 0.1357181966304779, "learning_rate": 1.5441149382943452e-07, "loss": 0.0082, "step": 46925 }, { "epoch": 8.49158979924037, "grad_norm": 12.11319351196289, "learning_rate": 1.5395100386811566e-07, "loss": 0.0021, "step": 46950 }, { "epoch": 8.496111412551999, "grad_norm": 1.0207489728927612, "learning_rate": 1.5349051390679683e-07, "loss": 0.0012, "step": 46975 }, { "epoch": 8.500633025863628, "grad_norm": 0.05066627636551857, "learning_rate": 1.53030023945478e-07, "loss": 0.0033, "step": 47000 }, { "epoch": 8.505154639175258, "grad_norm": 0.18461652100086212, "learning_rate": 1.5256953398415913e-07, "loss": 0.0051, "step": 47025 }, { "epoch": 8.509676252486887, "grad_norm": 0.058383241295814514, "learning_rate": 1.521090440228403e-07, "loss": 0.0079, "step": 47050 }, { "epoch": 8.514197865798517, "grad_norm": 0.08401685953140259, "learning_rate": 1.5164855406152144e-07, "loss": 0.005, "step": 47075 }, { "epoch": 8.518719479110146, "grad_norm": 0.12280333787202835, "learning_rate": 1.511880641002026e-07, "loss": 0.0027, "step": 47100 }, { "epoch": 8.523241092421776, "grad_norm": 0.0439714677631855, "learning_rate": 1.5072757413888377e-07, "loss": 0.002, "step": 47125 }, { "epoch": 8.527762705733405, "grad_norm": 0.02209680713713169, "learning_rate": 1.502670841775649e-07, "loss": 0.0009, "step": 47150 }, { "epoch": 8.532284319045035, "grad_norm": 6.216536521911621, "learning_rate": 1.4980659421624608e-07, "loss": 0.0197, "step": 47175 }, { "epoch": 8.536805932356664, "grad_norm": 0.030912378802895546, "learning_rate": 1.4934610425492724e-07, "loss": 0.0003, "step": 47200 }, { "epoch": 8.541327545668295, "grad_norm": 0.02260858565568924, "learning_rate": 1.4888561429360838e-07, "loss": 0.0013, "step": 47225 }, { "epoch": 8.545849158979925, "grad_norm": 0.04053138568997383, "learning_rate": 1.4842512433228955e-07, "loss": 0.0036, "step": 47250 }, { "epoch": 8.550370772291554, "grad_norm": 0.10423174500465393, "learning_rate": 1.4796463437097072e-07, "loss": 0.0005, "step": 47275 }, { "epoch": 8.554892385603184, "grad_norm": 0.060733165591955185, "learning_rate": 1.4750414440965186e-07, "loss": 0.0017, "step": 47300 }, { "epoch": 8.559413998914813, "grad_norm": 0.07485374808311462, "learning_rate": 1.4704365444833302e-07, "loss": 0.0007, "step": 47325 }, { "epoch": 8.563935612226443, "grad_norm": 0.03115830197930336, "learning_rate": 1.465831644870142e-07, "loss": 0.0112, "step": 47350 }, { "epoch": 8.568457225538072, "grad_norm": 0.02164495922625065, "learning_rate": 1.4612267452569533e-07, "loss": 0.0006, "step": 47375 }, { "epoch": 8.572978838849702, "grad_norm": 0.024691320955753326, "learning_rate": 1.456621845643765e-07, "loss": 0.0259, "step": 47400 }, { "epoch": 8.577500452161331, "grad_norm": 17.621110916137695, "learning_rate": 1.4520169460305763e-07, "loss": 0.0218, "step": 47425 }, { "epoch": 8.58202206547296, "grad_norm": 4.349635601043701, "learning_rate": 1.447412046417388e-07, "loss": 0.0055, "step": 47450 }, { "epoch": 8.58654367878459, "grad_norm": 1.4642325639724731, "learning_rate": 1.4428071468041997e-07, "loss": 0.0065, "step": 47475 }, { "epoch": 8.59106529209622, "grad_norm": 0.0913548618555069, "learning_rate": 1.4382022471910113e-07, "loss": 0.0034, "step": 47500 }, { "epoch": 8.595586905407849, "grad_norm": 0.025698378682136536, "learning_rate": 1.4335973475778227e-07, "loss": 0.0023, "step": 47525 }, { "epoch": 8.600108518719479, "grad_norm": 0.07771757990121841, "learning_rate": 1.428992447964634e-07, "loss": 0.0022, "step": 47550 }, { "epoch": 8.604630132031108, "grad_norm": 5.4854254722595215, "learning_rate": 1.4243875483514458e-07, "loss": 0.005, "step": 47575 }, { "epoch": 8.609151745342738, "grad_norm": 0.031100405380129814, "learning_rate": 1.4197826487382574e-07, "loss": 0.0067, "step": 47600 }, { "epoch": 8.613673358654367, "grad_norm": 0.015138168819248676, "learning_rate": 1.415177749125069e-07, "loss": 0.0094, "step": 47625 }, { "epoch": 8.618194971965998, "grad_norm": 1.461853265762329, "learning_rate": 1.4105728495118808e-07, "loss": 0.002, "step": 47650 }, { "epoch": 8.622716585277628, "grad_norm": 0.05030835047364235, "learning_rate": 1.4059679498986922e-07, "loss": 0.0014, "step": 47675 }, { "epoch": 8.627238198589257, "grad_norm": 0.15744911134243011, "learning_rate": 1.4013630502855036e-07, "loss": 0.0006, "step": 47700 }, { "epoch": 8.631759811900887, "grad_norm": 0.6732610464096069, "learning_rate": 1.3967581506723152e-07, "loss": 0.0024, "step": 47725 }, { "epoch": 8.636281425212516, "grad_norm": 3.470303773880005, "learning_rate": 1.392153251059127e-07, "loss": 0.0079, "step": 47750 }, { "epoch": 8.640803038524146, "grad_norm": 27.621957778930664, "learning_rate": 1.3875483514459386e-07, "loss": 0.0164, "step": 47775 }, { "epoch": 8.645324651835775, "grad_norm": 0.01883932389318943, "learning_rate": 1.38294345183275e-07, "loss": 0.0145, "step": 47800 }, { "epoch": 8.649846265147405, "grad_norm": 0.08440113812685013, "learning_rate": 1.3783385522195614e-07, "loss": 0.0188, "step": 47825 }, { "epoch": 8.654367878459034, "grad_norm": 0.0456276573240757, "learning_rate": 1.373733652606373e-07, "loss": 0.0116, "step": 47850 }, { "epoch": 8.658889491770664, "grad_norm": 8.457013130187988, "learning_rate": 1.3691287529931847e-07, "loss": 0.005, "step": 47875 }, { "epoch": 8.663411105082293, "grad_norm": 2.870605707168579, "learning_rate": 1.3645238533799963e-07, "loss": 0.0051, "step": 47900 }, { "epoch": 8.667932718393923, "grad_norm": 0.0406540222465992, "learning_rate": 1.359918953766808e-07, "loss": 0.0103, "step": 47925 }, { "epoch": 8.672454331705552, "grad_norm": 0.12393586337566376, "learning_rate": 1.3553140541536194e-07, "loss": 0.0089, "step": 47950 }, { "epoch": 8.676975945017182, "grad_norm": 0.04684567451477051, "learning_rate": 1.3507091545404308e-07, "loss": 0.005, "step": 47975 }, { "epoch": 8.681497558328811, "grad_norm": 0.005455177277326584, "learning_rate": 1.3461042549272425e-07, "loss": 0.0011, "step": 48000 }, { "epoch": 8.686019171640442, "grad_norm": 0.0036073036026209593, "learning_rate": 1.341499355314054e-07, "loss": 0.0109, "step": 48025 }, { "epoch": 8.690540784952072, "grad_norm": 0.015157288871705532, "learning_rate": 1.3368944557008658e-07, "loss": 0.0028, "step": 48050 }, { "epoch": 8.695062398263701, "grad_norm": 0.045515723526477814, "learning_rate": 1.3322895560876774e-07, "loss": 0.0061, "step": 48075 }, { "epoch": 8.69958401157533, "grad_norm": 0.3657865822315216, "learning_rate": 1.3276846564744886e-07, "loss": 0.0024, "step": 48100 }, { "epoch": 8.70410562488696, "grad_norm": 0.009580901823937893, "learning_rate": 1.3230797568613002e-07, "loss": 0.0132, "step": 48125 }, { "epoch": 8.70862723819859, "grad_norm": 0.033869609236717224, "learning_rate": 1.318474857248112e-07, "loss": 0.0049, "step": 48150 }, { "epoch": 8.71314885151022, "grad_norm": 11.958950996398926, "learning_rate": 1.3138699576349236e-07, "loss": 0.0098, "step": 48175 }, { "epoch": 8.717670464821849, "grad_norm": 0.02979426644742489, "learning_rate": 1.3092650580217352e-07, "loss": 0.0011, "step": 48200 }, { "epoch": 8.722192078133478, "grad_norm": 1.0056304931640625, "learning_rate": 1.304660158408547e-07, "loss": 0.0352, "step": 48225 }, { "epoch": 8.726713691445108, "grad_norm": 1.8282511234283447, "learning_rate": 1.300055258795358e-07, "loss": 0.0204, "step": 48250 }, { "epoch": 8.731235304756737, "grad_norm": 0.0187783632427454, "learning_rate": 1.2954503591821697e-07, "loss": 0.0129, "step": 48275 }, { "epoch": 8.735756918068367, "grad_norm": 0.3944437801837921, "learning_rate": 1.2908454595689813e-07, "loss": 0.0009, "step": 48300 }, { "epoch": 8.740278531379996, "grad_norm": 11.324723243713379, "learning_rate": 1.286240559955793e-07, "loss": 0.0015, "step": 48325 }, { "epoch": 8.744800144691625, "grad_norm": 0.03956815227866173, "learning_rate": 1.2816356603426047e-07, "loss": 0.003, "step": 48350 }, { "epoch": 8.749321758003255, "grad_norm": 0.06833972781896591, "learning_rate": 1.277030760729416e-07, "loss": 0.0005, "step": 48375 }, { "epoch": 8.753843371314884, "grad_norm": 0.029176251962780952, "learning_rate": 1.2724258611162275e-07, "loss": 0.001, "step": 48400 }, { "epoch": 8.758364984626514, "grad_norm": 0.02110159769654274, "learning_rate": 1.267820961503039e-07, "loss": 0.0009, "step": 48425 }, { "epoch": 8.762886597938145, "grad_norm": 0.023354971781373024, "learning_rate": 1.2632160618898508e-07, "loss": 0.0012, "step": 48450 }, { "epoch": 8.767408211249775, "grad_norm": 0.0024972474202513695, "learning_rate": 1.2586111622766625e-07, "loss": 0.0014, "step": 48475 }, { "epoch": 8.771929824561404, "grad_norm": 10.001263618469238, "learning_rate": 1.2540062626634739e-07, "loss": 0.0187, "step": 48500 }, { "epoch": 8.776451437873034, "grad_norm": 0.003959618508815765, "learning_rate": 1.2494013630502855e-07, "loss": 0.0181, "step": 48525 }, { "epoch": 8.780973051184663, "grad_norm": 0.05257127806544304, "learning_rate": 1.2447964634370972e-07, "loss": 0.0041, "step": 48550 }, { "epoch": 8.785494664496293, "grad_norm": 1.0013961791992188, "learning_rate": 1.2401915638239086e-07, "loss": 0.0028, "step": 48575 }, { "epoch": 8.790016277807922, "grad_norm": 0.04151635989546776, "learning_rate": 1.2355866642107202e-07, "loss": 0.0439, "step": 48600 }, { "epoch": 8.794537891119552, "grad_norm": 0.07857254147529602, "learning_rate": 1.230981764597532e-07, "loss": 0.0176, "step": 48625 }, { "epoch": 8.799059504431181, "grad_norm": 0.0398498959839344, "learning_rate": 1.2263768649843433e-07, "loss": 0.0035, "step": 48650 }, { "epoch": 8.80358111774281, "grad_norm": 0.18911878764629364, "learning_rate": 1.221771965371155e-07, "loss": 0.0041, "step": 48675 }, { "epoch": 8.80810273105444, "grad_norm": 0.5053550601005554, "learning_rate": 1.2171670657579666e-07, "loss": 0.0029, "step": 48700 }, { "epoch": 8.81262434436607, "grad_norm": 0.11117665469646454, "learning_rate": 1.212562166144778e-07, "loss": 0.0022, "step": 48725 }, { "epoch": 8.817145957677699, "grad_norm": 0.03761090338230133, "learning_rate": 1.2079572665315897e-07, "loss": 0.0014, "step": 48750 }, { "epoch": 8.821667570989328, "grad_norm": 0.0750727653503418, "learning_rate": 1.203352366918401e-07, "loss": 0.0051, "step": 48775 }, { "epoch": 8.826189184300958, "grad_norm": 0.1517859250307083, "learning_rate": 1.1987474673052127e-07, "loss": 0.0072, "step": 48800 }, { "epoch": 8.830710797612587, "grad_norm": 0.02724577859044075, "learning_rate": 1.1941425676920244e-07, "loss": 0.0025, "step": 48825 }, { "epoch": 8.835232410924217, "grad_norm": 0.02841174229979515, "learning_rate": 1.1895376680788358e-07, "loss": 0.0017, "step": 48850 }, { "epoch": 8.839754024235848, "grad_norm": 0.03608907014131546, "learning_rate": 1.1849327684656473e-07, "loss": 0.0041, "step": 48875 }, { "epoch": 8.844275637547478, "grad_norm": 0.022049210965633392, "learning_rate": 1.180327868852459e-07, "loss": 0.0011, "step": 48900 }, { "epoch": 8.848797250859107, "grad_norm": 13.626410484313965, "learning_rate": 1.1757229692392705e-07, "loss": 0.0021, "step": 48925 }, { "epoch": 8.853318864170737, "grad_norm": 0.021010667085647583, "learning_rate": 1.171118069626082e-07, "loss": 0.0022, "step": 48950 }, { "epoch": 8.857840477482366, "grad_norm": 0.009972944855690002, "learning_rate": 1.1665131700128937e-07, "loss": 0.0047, "step": 48975 }, { "epoch": 8.862362090793996, "grad_norm": 0.04962944611907005, "learning_rate": 1.1619082703997052e-07, "loss": 0.0036, "step": 49000 }, { "epoch": 8.866883704105625, "grad_norm": 1.5837059020996094, "learning_rate": 1.1573033707865168e-07, "loss": 0.0017, "step": 49025 }, { "epoch": 8.871405317417254, "grad_norm": 4.073685646057129, "learning_rate": 1.1526984711733284e-07, "loss": 0.0092, "step": 49050 }, { "epoch": 8.875926930728884, "grad_norm": 0.0222158282995224, "learning_rate": 1.1480935715601398e-07, "loss": 0.0051, "step": 49075 }, { "epoch": 8.880448544040513, "grad_norm": 0.055091459304094315, "learning_rate": 1.1434886719469515e-07, "loss": 0.0077, "step": 49100 }, { "epoch": 8.884970157352143, "grad_norm": 0.4387997090816498, "learning_rate": 1.1388837723337632e-07, "loss": 0.0013, "step": 49125 }, { "epoch": 8.889491770663772, "grad_norm": 0.020865125581622124, "learning_rate": 1.1342788727205746e-07, "loss": 0.0005, "step": 49150 }, { "epoch": 8.894013383975402, "grad_norm": 0.05737067386507988, "learning_rate": 1.1296739731073862e-07, "loss": 0.0016, "step": 49175 }, { "epoch": 8.898534997287031, "grad_norm": 0.04351663962006569, "learning_rate": 1.1250690734941979e-07, "loss": 0.0049, "step": 49200 }, { "epoch": 8.90305661059866, "grad_norm": 0.09150709211826324, "learning_rate": 1.1204641738810093e-07, "loss": 0.0047, "step": 49225 }, { "epoch": 8.907578223910292, "grad_norm": 0.019495923072099686, "learning_rate": 1.115859274267821e-07, "loss": 0.0004, "step": 49250 }, { "epoch": 8.912099837221922, "grad_norm": 0.002537541324272752, "learning_rate": 1.1112543746546326e-07, "loss": 0.0021, "step": 49275 }, { "epoch": 8.916621450533551, "grad_norm": 0.00815527979284525, "learning_rate": 1.106649475041444e-07, "loss": 0.0002, "step": 49300 }, { "epoch": 8.92114306384518, "grad_norm": 4.410140037536621, "learning_rate": 1.1020445754282557e-07, "loss": 0.0086, "step": 49325 }, { "epoch": 8.92566467715681, "grad_norm": 0.3327310383319855, "learning_rate": 1.0974396758150672e-07, "loss": 0.0179, "step": 49350 }, { "epoch": 8.93018629046844, "grad_norm": 0.052819494158029556, "learning_rate": 1.0930189721864062e-07, "loss": 0.0199, "step": 49375 }, { "epoch": 8.934707903780069, "grad_norm": 29.91364097595215, "learning_rate": 1.0884140725732179e-07, "loss": 0.0274, "step": 49400 }, { "epoch": 8.939229517091698, "grad_norm": Infinity, "learning_rate": 1.0839933689445569e-07, "loss": 0.031, "step": 49425 }, { "epoch": 8.943751130403328, "grad_norm": 0.09751866012811661, "learning_rate": 1.0793884693313686e-07, "loss": 0.025, "step": 49450 }, { "epoch": 8.948272743714957, "grad_norm": 0.38296425342559814, "learning_rate": 1.0747835697181801e-07, "loss": 0.004, "step": 49475 }, { "epoch": 8.952794357026587, "grad_norm": 0.05933375656604767, "learning_rate": 1.0701786701049917e-07, "loss": 0.0018, "step": 49500 }, { "epoch": 8.957315970338216, "grad_norm": 0.14764128625392914, "learning_rate": 1.0655737704918032e-07, "loss": 0.0005, "step": 49525 }, { "epoch": 8.961837583649846, "grad_norm": 0.034122079610824585, "learning_rate": 1.0609688708786148e-07, "loss": 0.0029, "step": 49550 }, { "epoch": 8.966359196961475, "grad_norm": 0.04565449431538582, "learning_rate": 1.0563639712654264e-07, "loss": 0.0004, "step": 49575 }, { "epoch": 8.970880810273105, "grad_norm": 0.01443118043243885, "learning_rate": 1.0517590716522379e-07, "loss": 0.0041, "step": 49600 }, { "epoch": 8.975402423584734, "grad_norm": 0.005753234960138798, "learning_rate": 1.0471541720390494e-07, "loss": 0.0026, "step": 49625 }, { "epoch": 8.979924036896364, "grad_norm": 0.02034948766231537, "learning_rate": 1.0425492724258611e-07, "loss": 0.0007, "step": 49650 }, { "epoch": 8.984445650207995, "grad_norm": 0.012954095378518105, "learning_rate": 1.0379443728126726e-07, "loss": 0.0028, "step": 49675 }, { "epoch": 8.988967263519624, "grad_norm": 11.640181541442871, "learning_rate": 1.0333394731994842e-07, "loss": 0.0033, "step": 49700 }, { "epoch": 8.993488876831254, "grad_norm": 0.07725071907043457, "learning_rate": 1.0287345735862958e-07, "loss": 0.0125, "step": 49725 }, { "epoch": 8.998010490142883, "grad_norm": 0.12377389520406723, "learning_rate": 1.0241296739731073e-07, "loss": 0.0204, "step": 49750 }, { "epoch": 9.0, "eval_loss": 0.370661199092865, "eval_runtime": 8351.69, "eval_samples_per_second": 1.137, "eval_steps_per_second": 0.142, "eval_wer": 0.10357142857142858, "step": 49761 }, { "epoch": 9.002532103454513, "grad_norm": 0.027261001989245415, "learning_rate": 1.0195247743599189e-07, "loss": 0.0049, "step": 49775 }, { "epoch": 9.007053716766142, "grad_norm": 0.1393139511346817, "learning_rate": 1.0149198747467305e-07, "loss": 0.0031, "step": 49800 }, { "epoch": 9.011575330077772, "grad_norm": 0.31711727380752563, "learning_rate": 1.010314975133542e-07, "loss": 0.0015, "step": 49825 }, { "epoch": 9.016096943389401, "grad_norm": 0.021990245208144188, "learning_rate": 1.0057100755203536e-07, "loss": 0.0064, "step": 49850 }, { "epoch": 9.02061855670103, "grad_norm": 0.028167344629764557, "learning_rate": 1.0011051759071653e-07, "loss": 0.0019, "step": 49875 }, { "epoch": 9.02514017001266, "grad_norm": 12.446762084960938, "learning_rate": 9.965002762939767e-08, "loss": 0.0111, "step": 49900 }, { "epoch": 9.02966178332429, "grad_norm": 0.024779673665761948, "learning_rate": 9.918953766807883e-08, "loss": 0.0027, "step": 49925 }, { "epoch": 9.03418339663592, "grad_norm": 2.8062970638275146, "learning_rate": 9.872904770676e-08, "loss": 0.0046, "step": 49950 }, { "epoch": 9.038705009947549, "grad_norm": 0.011668199673295021, "learning_rate": 9.826855774544114e-08, "loss": 0.0315, "step": 49975 }, { "epoch": 9.043226623259178, "grad_norm": 0.005240909289568663, "learning_rate": 9.78080677841223e-08, "loss": 0.0067, "step": 50000 }, { "epoch": 9.047748236570808, "grad_norm": 0.015816286206245422, "learning_rate": 9.734757782280347e-08, "loss": 0.0018, "step": 50025 }, { "epoch": 9.052269849882437, "grad_norm": 0.0809655636548996, "learning_rate": 9.688708786148461e-08, "loss": 0.0169, "step": 50050 }, { "epoch": 9.056791463194068, "grad_norm": 0.1855606585741043, "learning_rate": 9.642659790016578e-08, "loss": 0.0003, "step": 50075 }, { "epoch": 9.061313076505698, "grad_norm": 1.0818848609924316, "learning_rate": 9.596610793884693e-08, "loss": 0.0114, "step": 50100 }, { "epoch": 9.065834689817327, "grad_norm": 1.6838890314102173, "learning_rate": 9.550561797752808e-08, "loss": 0.0142, "step": 50125 }, { "epoch": 9.070356303128957, "grad_norm": 0.051931653171777725, "learning_rate": 9.504512801620925e-08, "loss": 0.0267, "step": 50150 }, { "epoch": 9.074877916440586, "grad_norm": 0.03803849592804909, "learning_rate": 9.45846380548904e-08, "loss": 0.0079, "step": 50175 }, { "epoch": 9.079399529752216, "grad_norm": 0.17854449152946472, "learning_rate": 9.412414809357155e-08, "loss": 0.0032, "step": 50200 }, { "epoch": 9.083921143063845, "grad_norm": 0.07141181081533432, "learning_rate": 9.366365813225272e-08, "loss": 0.0009, "step": 50225 }, { "epoch": 9.088442756375475, "grad_norm": 0.11261973530054092, "learning_rate": 9.320316817093386e-08, "loss": 0.0006, "step": 50250 }, { "epoch": 9.092964369687104, "grad_norm": 0.009887372143566608, "learning_rate": 9.274267820961503e-08, "loss": 0.0028, "step": 50275 }, { "epoch": 9.097485982998734, "grad_norm": 0.03133253753185272, "learning_rate": 9.228218824829618e-08, "loss": 0.0106, "step": 50300 }, { "epoch": 9.102007596310363, "grad_norm": 0.11163907498121262, "learning_rate": 9.182169828697733e-08, "loss": 0.0012, "step": 50325 }, { "epoch": 9.106529209621993, "grad_norm": 0.004720740485936403, "learning_rate": 9.13612083256585e-08, "loss": 0.0051, "step": 50350 }, { "epoch": 9.111050822933622, "grad_norm": 0.031010426580905914, "learning_rate": 9.090071836433965e-08, "loss": 0.0008, "step": 50375 }, { "epoch": 9.115572436245252, "grad_norm": 0.02335488423705101, "learning_rate": 9.04402284030208e-08, "loss": 0.0129, "step": 50400 }, { "epoch": 9.120094049556881, "grad_norm": 0.0240317415446043, "learning_rate": 8.997973844170197e-08, "loss": 0.0024, "step": 50425 }, { "epoch": 9.12461566286851, "grad_norm": 0.03213008865714073, "learning_rate": 8.951924848038312e-08, "loss": 0.0052, "step": 50450 }, { "epoch": 9.129137276180142, "grad_norm": 0.04678371921181679, "learning_rate": 8.905875851906428e-08, "loss": 0.0014, "step": 50475 }, { "epoch": 9.133658889491771, "grad_norm": 1.1161173582077026, "learning_rate": 8.859826855774543e-08, "loss": 0.0005, "step": 50500 }, { "epoch": 9.1381805028034, "grad_norm": 0.1399179995059967, "learning_rate": 8.81377785964266e-08, "loss": 0.0063, "step": 50525 }, { "epoch": 9.14270211611503, "grad_norm": 0.05243779718875885, "learning_rate": 8.767728863510775e-08, "loss": 0.002, "step": 50550 }, { "epoch": 9.14722372942666, "grad_norm": 0.03155532851815224, "learning_rate": 8.72167986737889e-08, "loss": 0.0164, "step": 50575 }, { "epoch": 9.15174534273829, "grad_norm": 0.025026287883520126, "learning_rate": 8.675630871247007e-08, "loss": 0.0037, "step": 50600 }, { "epoch": 9.156266956049919, "grad_norm": 0.03365040570497513, "learning_rate": 8.629581875115122e-08, "loss": 0.0006, "step": 50625 }, { "epoch": 9.160788569361548, "grad_norm": 0.01698467880487442, "learning_rate": 8.583532878983237e-08, "loss": 0.0075, "step": 50650 }, { "epoch": 9.165310182673178, "grad_norm": 2.0226006507873535, "learning_rate": 8.537483882851354e-08, "loss": 0.0007, "step": 50675 }, { "epoch": 9.169831795984807, "grad_norm": 0.2596571445465088, "learning_rate": 8.49143488671947e-08, "loss": 0.0019, "step": 50700 }, { "epoch": 9.174353409296437, "grad_norm": 0.1478416472673416, "learning_rate": 8.445385890587585e-08, "loss": 0.0014, "step": 50725 }, { "epoch": 9.178875022608066, "grad_norm": 0.015776338055729866, "learning_rate": 8.399336894455701e-08, "loss": 0.0152, "step": 50750 }, { "epoch": 9.183396635919696, "grad_norm": 0.011738612316548824, "learning_rate": 8.353287898323815e-08, "loss": 0.0007, "step": 50775 }, { "epoch": 9.187918249231325, "grad_norm": 0.031315308064222336, "learning_rate": 8.307238902191932e-08, "loss": 0.0013, "step": 50800 }, { "epoch": 9.192439862542955, "grad_norm": 0.19211354851722717, "learning_rate": 8.261189906060049e-08, "loss": 0.001, "step": 50825 }, { "epoch": 9.196961475854584, "grad_norm": 0.03229336068034172, "learning_rate": 8.215140909928163e-08, "loss": 0.0097, "step": 50850 }, { "epoch": 9.201483089166215, "grad_norm": 0.010991367511451244, "learning_rate": 8.169091913796279e-08, "loss": 0.0149, "step": 50875 }, { "epoch": 9.206004702477845, "grad_norm": 0.014507956802845001, "learning_rate": 8.123042917664394e-08, "loss": 0.0074, "step": 50900 }, { "epoch": 9.210526315789474, "grad_norm": 2.2018537521362305, "learning_rate": 8.07699392153251e-08, "loss": 0.0245, "step": 50925 }, { "epoch": 9.215047929101104, "grad_norm": 0.4151630699634552, "learning_rate": 8.030944925400626e-08, "loss": 0.0402, "step": 50950 }, { "epoch": 9.219569542412733, "grad_norm": 0.041080426424741745, "learning_rate": 7.98489592926874e-08, "loss": 0.0219, "step": 50975 }, { "epoch": 9.224091155724363, "grad_norm": 0.026268433779478073, "learning_rate": 7.938846933136857e-08, "loss": 0.0091, "step": 51000 }, { "epoch": 9.228612769035992, "grad_norm": 0.04076811671257019, "learning_rate": 7.892797937004974e-08, "loss": 0.0012, "step": 51025 }, { "epoch": 9.233134382347622, "grad_norm": 0.011797059327363968, "learning_rate": 7.846748940873088e-08, "loss": 0.005, "step": 51050 }, { "epoch": 9.237655995659251, "grad_norm": 2.813450574874878, "learning_rate": 7.800699944741204e-08, "loss": 0.001, "step": 51075 }, { "epoch": 9.24217760897088, "grad_norm": 0.11257357895374298, "learning_rate": 7.754650948609321e-08, "loss": 0.0089, "step": 51100 }, { "epoch": 9.24669922228251, "grad_norm": 0.01619679108262062, "learning_rate": 7.708601952477435e-08, "loss": 0.0009, "step": 51125 }, { "epoch": 9.25122083559414, "grad_norm": 0.0227675624191761, "learning_rate": 7.662552956345551e-08, "loss": 0.001, "step": 51150 }, { "epoch": 9.25574244890577, "grad_norm": 19.843429565429688, "learning_rate": 7.616503960213668e-08, "loss": 0.0012, "step": 51175 }, { "epoch": 9.260264062217399, "grad_norm": 0.17110028862953186, "learning_rate": 7.570454964081782e-08, "loss": 0.0048, "step": 51200 }, { "epoch": 9.264785675529028, "grad_norm": 0.034038007259368896, "learning_rate": 7.524405967949899e-08, "loss": 0.0061, "step": 51225 }, { "epoch": 9.269307288840658, "grad_norm": 0.8703758716583252, "learning_rate": 7.478356971818014e-08, "loss": 0.0041, "step": 51250 }, { "epoch": 9.273828902152289, "grad_norm": 0.019711392000317574, "learning_rate": 7.432307975686129e-08, "loss": 0.0105, "step": 51275 }, { "epoch": 9.278350515463918, "grad_norm": 12.372844696044922, "learning_rate": 7.386258979554246e-08, "loss": 0.0258, "step": 51300 }, { "epoch": 9.282872128775548, "grad_norm": 0.03570927679538727, "learning_rate": 7.340209983422361e-08, "loss": 0.027, "step": 51325 }, { "epoch": 9.287393742087177, "grad_norm": 0.2612115144729614, "learning_rate": 7.294160987290476e-08, "loss": 0.0166, "step": 51350 }, { "epoch": 9.291915355398807, "grad_norm": 0.10630948096513748, "learning_rate": 7.248111991158593e-08, "loss": 0.0233, "step": 51375 }, { "epoch": 9.296436968710436, "grad_norm": 0.08389817923307419, "learning_rate": 7.202062995026708e-08, "loss": 0.0012, "step": 51400 }, { "epoch": 9.300958582022066, "grad_norm": 0.012098311446607113, "learning_rate": 7.156013998894824e-08, "loss": 0.0049, "step": 51425 }, { "epoch": 9.305480195333695, "grad_norm": 1.7195242643356323, "learning_rate": 7.109965002762939e-08, "loss": 0.0013, "step": 51450 }, { "epoch": 9.310001808645325, "grad_norm": 0.005529410671442747, "learning_rate": 7.063916006631056e-08, "loss": 0.0005, "step": 51475 }, { "epoch": 9.314523421956954, "grad_norm": 14.70275592803955, "learning_rate": 7.017867010499171e-08, "loss": 0.0074, "step": 51500 }, { "epoch": 9.319045035268584, "grad_norm": 0.03697911649942398, "learning_rate": 6.971818014367286e-08, "loss": 0.0019, "step": 51525 }, { "epoch": 9.323566648580213, "grad_norm": 0.023678451776504517, "learning_rate": 6.927610978080678e-08, "loss": 0.0067, "step": 51550 }, { "epoch": 9.328088261891843, "grad_norm": 0.056655462831258774, "learning_rate": 6.881561981948793e-08, "loss": 0.0029, "step": 51575 }, { "epoch": 9.332609875203472, "grad_norm": 0.2097851186990738, "learning_rate": 6.835512985816909e-08, "loss": 0.0002, "step": 51600 }, { "epoch": 9.337131488515102, "grad_norm": 0.030322756618261337, "learning_rate": 6.789463989685025e-08, "loss": 0.02, "step": 51625 }, { "epoch": 9.341653101826731, "grad_norm": 0.05204546079039574, "learning_rate": 6.74341499355314e-08, "loss": 0.0002, "step": 51650 }, { "epoch": 9.34617471513836, "grad_norm": 5.798641204833984, "learning_rate": 6.697365997421256e-08, "loss": 0.0012, "step": 51675 }, { "epoch": 9.350696328449992, "grad_norm": 1.0529894828796387, "learning_rate": 6.651317001289372e-08, "loss": 0.016, "step": 51700 }, { "epoch": 9.355217941761621, "grad_norm": 0.03712335228919983, "learning_rate": 6.605268005157486e-08, "loss": 0.0015, "step": 51725 }, { "epoch": 9.35973955507325, "grad_norm": 0.20770879089832306, "learning_rate": 6.559219009025603e-08, "loss": 0.0245, "step": 51750 }, { "epoch": 9.36426116838488, "grad_norm": 0.034741051495075226, "learning_rate": 6.51317001289372e-08, "loss": 0.0384, "step": 51775 }, { "epoch": 9.36878278169651, "grad_norm": 0.06246571242809296, "learning_rate": 6.467121016761834e-08, "loss": 0.0029, "step": 51800 }, { "epoch": 9.37330439500814, "grad_norm": 0.019802218303084373, "learning_rate": 6.42107202062995e-08, "loss": 0.0017, "step": 51825 }, { "epoch": 9.377826008319769, "grad_norm": 0.035850297659635544, "learning_rate": 6.375023024498067e-08, "loss": 0.0024, "step": 51850 }, { "epoch": 9.382347621631398, "grad_norm": 0.01637556403875351, "learning_rate": 6.328974028366181e-08, "loss": 0.0005, "step": 51875 }, { "epoch": 9.386869234943028, "grad_norm": 0.04414265602827072, "learning_rate": 6.282925032234297e-08, "loss": 0.0038, "step": 51900 }, { "epoch": 9.391390848254657, "grad_norm": 0.048500921577215195, "learning_rate": 6.236876036102413e-08, "loss": 0.0088, "step": 51925 }, { "epoch": 9.395912461566287, "grad_norm": 0.22167189419269562, "learning_rate": 6.190827039970528e-08, "loss": 0.0033, "step": 51950 }, { "epoch": 9.400434074877916, "grad_norm": 0.02152320370078087, "learning_rate": 6.144778043838645e-08, "loss": 0.0014, "step": 51975 }, { "epoch": 9.404955688189546, "grad_norm": 0.016991982236504555, "learning_rate": 6.09872904770676e-08, "loss": 0.007, "step": 52000 }, { "epoch": 9.409477301501175, "grad_norm": 0.01895890012383461, "learning_rate": 6.052680051574875e-08, "loss": 0.0049, "step": 52025 }, { "epoch": 9.413998914812804, "grad_norm": 0.02496323734521866, "learning_rate": 6.006631055442992e-08, "loss": 0.0107, "step": 52050 }, { "epoch": 9.418520528124434, "grad_norm": 5.717367649078369, "learning_rate": 5.960582059311107e-08, "loss": 0.0118, "step": 52075 }, { "epoch": 9.423042141436065, "grad_norm": 0.02132461778819561, "learning_rate": 5.9145330631792224e-08, "loss": 0.008, "step": 52100 }, { "epoch": 9.427563754747695, "grad_norm": 0.018231956288218498, "learning_rate": 5.8684840670473384e-08, "loss": 0.0376, "step": 52125 }, { "epoch": 9.432085368059324, "grad_norm": 19.91530418395996, "learning_rate": 5.822435070915454e-08, "loss": 0.0246, "step": 52150 }, { "epoch": 9.436606981370954, "grad_norm": 0.037468716502189636, "learning_rate": 5.7763860747835697e-08, "loss": 0.0306, "step": 52175 }, { "epoch": 9.441128594682583, "grad_norm": 0.013554728589951992, "learning_rate": 5.730337078651685e-08, "loss": 0.0054, "step": 52200 }, { "epoch": 9.445650207994213, "grad_norm": 0.023764220997691154, "learning_rate": 5.684288082519801e-08, "loss": 0.006, "step": 52225 }, { "epoch": 9.450171821305842, "grad_norm": 0.040070127695798874, "learning_rate": 5.638239086387916e-08, "loss": 0.0008, "step": 52250 }, { "epoch": 9.454693434617472, "grad_norm": 0.10571596771478653, "learning_rate": 5.592190090256032e-08, "loss": 0.0013, "step": 52275 }, { "epoch": 9.459215047929101, "grad_norm": 0.14270947873592377, "learning_rate": 5.546141094124148e-08, "loss": 0.0129, "step": 52300 }, { "epoch": 9.46373666124073, "grad_norm": 0.04533557966351509, "learning_rate": 5.5000920979922634e-08, "loss": 0.0006, "step": 52325 }, { "epoch": 9.46825827455236, "grad_norm": 0.06353598833084106, "learning_rate": 5.454043101860379e-08, "loss": 0.0005, "step": 52350 }, { "epoch": 9.47277988786399, "grad_norm": 0.016834860667586327, "learning_rate": 5.4079941057284954e-08, "loss": 0.0031, "step": 52375 }, { "epoch": 9.477301501175619, "grad_norm": 0.012295857071876526, "learning_rate": 5.3619451095966107e-08, "loss": 0.0003, "step": 52400 }, { "epoch": 9.481823114487248, "grad_norm": 10.514667510986328, "learning_rate": 5.315896113464726e-08, "loss": 0.008, "step": 52425 }, { "epoch": 9.486344727798878, "grad_norm": 0.813252329826355, "learning_rate": 5.269847117332842e-08, "loss": 0.0014, "step": 52450 }, { "epoch": 9.490866341110507, "grad_norm": 2.42742657661438, "learning_rate": 5.223798121200958e-08, "loss": 0.0026, "step": 52475 }, { "epoch": 9.495387954422139, "grad_norm": 0.03881525248289108, "learning_rate": 5.177749125069073e-08, "loss": 0.0107, "step": 52500 }, { "epoch": 9.499909567733768, "grad_norm": 0.09639015793800354, "learning_rate": 5.1317001289371885e-08, "loss": 0.0052, "step": 52525 }, { "epoch": 9.504431181045398, "grad_norm": 0.16553708910942078, "learning_rate": 5.085651132805305e-08, "loss": 0.0188, "step": 52550 }, { "epoch": 9.508952794357027, "grad_norm": 9.378378868103027, "learning_rate": 5.0396021366734204e-08, "loss": 0.0339, "step": 52575 }, { "epoch": 9.513474407668657, "grad_norm": 0.2166759967803955, "learning_rate": 4.993553140541536e-08, "loss": 0.0012, "step": 52600 }, { "epoch": 9.517996020980286, "grad_norm": 0.026047270745038986, "learning_rate": 4.9475041444096517e-08, "loss": 0.0091, "step": 52625 }, { "epoch": 9.522517634291916, "grad_norm": 0.03240982070565224, "learning_rate": 4.9014551482777676e-08, "loss": 0.0085, "step": 52650 }, { "epoch": 9.527039247603545, "grad_norm": 0.07469449937343597, "learning_rate": 4.855406152145883e-08, "loss": 0.0016, "step": 52675 }, { "epoch": 9.531560860915175, "grad_norm": 0.034668173640966415, "learning_rate": 4.809357156013999e-08, "loss": 0.0008, "step": 52700 }, { "epoch": 9.536082474226804, "grad_norm": 0.00523386849090457, "learning_rate": 4.763308159882114e-08, "loss": 0.0074, "step": 52725 }, { "epoch": 9.540604087538433, "grad_norm": 0.018630068749189377, "learning_rate": 4.71725916375023e-08, "loss": 0.0108, "step": 52750 }, { "epoch": 9.545125700850063, "grad_norm": 0.02157723344862461, "learning_rate": 4.671210167618346e-08, "loss": 0.0078, "step": 52775 }, { "epoch": 9.549647314161692, "grad_norm": 0.016346458345651627, "learning_rate": 4.6251611714864614e-08, "loss": 0.0123, "step": 52800 }, { "epoch": 9.554168927473322, "grad_norm": 0.027240611612796783, "learning_rate": 4.579112175354577e-08, "loss": 0.0002, "step": 52825 }, { "epoch": 9.558690540784951, "grad_norm": 1.3178123235702515, "learning_rate": 4.533063179222693e-08, "loss": 0.0038, "step": 52850 }, { "epoch": 9.56321215409658, "grad_norm": 0.025551458820700645, "learning_rate": 4.4870141830908086e-08, "loss": 0.011, "step": 52875 }, { "epoch": 9.56773376740821, "grad_norm": 0.06759845465421677, "learning_rate": 4.440965186958924e-08, "loss": 0.0013, "step": 52900 }, { "epoch": 9.572255380719842, "grad_norm": 0.01742335967719555, "learning_rate": 4.394916190827039e-08, "loss": 0.0124, "step": 52925 }, { "epoch": 9.576776994031471, "grad_norm": 0.09457490593194962, "learning_rate": 4.348867194695156e-08, "loss": 0.0146, "step": 52950 }, { "epoch": 9.5812986073431, "grad_norm": 0.11490760743618011, "learning_rate": 4.302818198563271e-08, "loss": 0.0145, "step": 52975 }, { "epoch": 9.58582022065473, "grad_norm": 0.17125943303108215, "learning_rate": 4.2567692024313865e-08, "loss": 0.0047, "step": 53000 }, { "epoch": 9.59034183396636, "grad_norm": 0.08460250496864319, "learning_rate": 4.2107202062995024e-08, "loss": 0.0045, "step": 53025 }, { "epoch": 9.594863447277989, "grad_norm": 0.5364235639572144, "learning_rate": 4.1646712101676184e-08, "loss": 0.0014, "step": 53050 }, { "epoch": 9.599385060589618, "grad_norm": 0.2898434102535248, "learning_rate": 4.118622214035734e-08, "loss": 0.0034, "step": 53075 }, { "epoch": 9.603906673901248, "grad_norm": 0.003405811497941613, "learning_rate": 4.0725732179038496e-08, "loss": 0.0064, "step": 53100 }, { "epoch": 9.608428287212877, "grad_norm": 0.03517955541610718, "learning_rate": 4.0265242217719656e-08, "loss": 0.005, "step": 53125 }, { "epoch": 9.612949900524507, "grad_norm": 0.005290856584906578, "learning_rate": 3.980475225640081e-08, "loss": 0.006, "step": 53150 }, { "epoch": 9.617471513836136, "grad_norm": 0.026814907789230347, "learning_rate": 3.934426229508197e-08, "loss": 0.0026, "step": 53175 }, { "epoch": 9.621993127147766, "grad_norm": 0.03042653575539589, "learning_rate": 3.888377233376312e-08, "loss": 0.0061, "step": 53200 }, { "epoch": 9.626514740459395, "grad_norm": 0.010026361793279648, "learning_rate": 3.842328237244428e-08, "loss": 0.0002, "step": 53225 }, { "epoch": 9.631036353771025, "grad_norm": 0.01914265938103199, "learning_rate": 3.7962792411125434e-08, "loss": 0.005, "step": 53250 }, { "epoch": 9.635557967082654, "grad_norm": 0.05811558663845062, "learning_rate": 3.7502302449806594e-08, "loss": 0.0126, "step": 53275 }, { "epoch": 9.640079580394286, "grad_norm": 0.013714855536818504, "learning_rate": 3.704181248848775e-08, "loss": 0.0215, "step": 53300 }, { "epoch": 9.644601193705915, "grad_norm": 0.0346146784722805, "learning_rate": 3.6581322527168906e-08, "loss": 0.0144, "step": 53325 }, { "epoch": 9.649122807017545, "grad_norm": 0.0485786534845829, "learning_rate": 3.6120832565850066e-08, "loss": 0.015, "step": 53350 }, { "epoch": 9.653644420329174, "grad_norm": 0.06555884331464767, "learning_rate": 3.566034260453122e-08, "loss": 0.0078, "step": 53375 }, { "epoch": 9.658166033640804, "grad_norm": 0.03746599331498146, "learning_rate": 3.519985264321237e-08, "loss": 0.0021, "step": 53400 }, { "epoch": 9.662687646952433, "grad_norm": 0.04957371950149536, "learning_rate": 3.473936268189354e-08, "loss": 0.009, "step": 53425 }, { "epoch": 9.667209260264062, "grad_norm": 0.020930081605911255, "learning_rate": 3.427887272057469e-08, "loss": 0.0011, "step": 53450 }, { "epoch": 9.671730873575692, "grad_norm": 0.014865943230688572, "learning_rate": 3.3818382759255844e-08, "loss": 0.0005, "step": 53475 }, { "epoch": 9.676252486887321, "grad_norm": 0.025175156071782112, "learning_rate": 3.3357892797937004e-08, "loss": 0.0012, "step": 53500 }, { "epoch": 9.680774100198951, "grad_norm": 0.03397619351744652, "learning_rate": 3.2897402836618163e-08, "loss": 0.0011, "step": 53525 }, { "epoch": 9.68529571351058, "grad_norm": 0.02394242398440838, "learning_rate": 3.2436912875299316e-08, "loss": 0.0036, "step": 53550 }, { "epoch": 9.68981732682221, "grad_norm": 0.08645796030759811, "learning_rate": 3.197642291398047e-08, "loss": 0.0005, "step": 53575 }, { "epoch": 9.69433894013384, "grad_norm": 0.08837512135505676, "learning_rate": 3.151593295266163e-08, "loss": 0.0034, "step": 53600 }, { "epoch": 9.698860553445469, "grad_norm": 0.0059136999770998955, "learning_rate": 3.105544299134279e-08, "loss": 0.0028, "step": 53625 }, { "epoch": 9.703382166757098, "grad_norm": 0.06296961009502411, "learning_rate": 3.059495303002394e-08, "loss": 0.0057, "step": 53650 }, { "epoch": 9.707903780068728, "grad_norm": 24.09531593322754, "learning_rate": 3.01344630687051e-08, "loss": 0.0048, "step": 53675 }, { "epoch": 9.712425393380357, "grad_norm": 0.07932830601930618, "learning_rate": 2.9673973107386258e-08, "loss": 0.0354, "step": 53700 }, { "epoch": 9.716947006691989, "grad_norm": 0.6279693841934204, "learning_rate": 2.9213483146067417e-08, "loss": 0.0024, "step": 53725 }, { "epoch": 9.721468620003618, "grad_norm": 0.018382834270596504, "learning_rate": 2.875299318474857e-08, "loss": 0.0033, "step": 53750 }, { "epoch": 9.725990233315247, "grad_norm": 0.13161396980285645, "learning_rate": 2.829250322342973e-08, "loss": 0.0269, "step": 53775 }, { "epoch": 9.730511846626877, "grad_norm": 0.08527792245149612, "learning_rate": 2.7832013262110883e-08, "loss": 0.0098, "step": 53800 }, { "epoch": 9.735033459938506, "grad_norm": 0.020729778334498405, "learning_rate": 2.7371523300792042e-08, "loss": 0.0041, "step": 53825 }, { "epoch": 9.739555073250136, "grad_norm": 0.19172513484954834, "learning_rate": 2.69110333394732e-08, "loss": 0.0041, "step": 53850 }, { "epoch": 9.744076686561765, "grad_norm": 0.03183312341570854, "learning_rate": 2.6450543378154355e-08, "loss": 0.0012, "step": 53875 }, { "epoch": 9.748598299873395, "grad_norm": 5.034526824951172, "learning_rate": 2.599005341683551e-08, "loss": 0.0018, "step": 53900 }, { "epoch": 9.753119913185024, "grad_norm": 0.0769072026014328, "learning_rate": 2.552956345551667e-08, "loss": 0.0005, "step": 53925 }, { "epoch": 9.757641526496654, "grad_norm": 0.032027099281549454, "learning_rate": 2.5069073494197824e-08, "loss": 0.0056, "step": 53950 }, { "epoch": 9.762163139808283, "grad_norm": 0.08156726509332657, "learning_rate": 2.4608583532878984e-08, "loss": 0.0068, "step": 53975 }, { "epoch": 9.766684753119913, "grad_norm": 0.010565202683210373, "learning_rate": 2.4148093571560137e-08, "loss": 0.0032, "step": 54000 }, { "epoch": 9.771206366431542, "grad_norm": 0.027527930215001106, "learning_rate": 2.3687603610241296e-08, "loss": 0.0003, "step": 54025 }, { "epoch": 9.775727979743172, "grad_norm": 0.07169647514820099, "learning_rate": 2.3227113648922452e-08, "loss": 0.0066, "step": 54050 }, { "epoch": 9.780249593054801, "grad_norm": 0.0017488193698227406, "learning_rate": 2.276662368760361e-08, "loss": 0.0004, "step": 54075 }, { "epoch": 9.78477120636643, "grad_norm": 0.011047018691897392, "learning_rate": 2.2306133726284765e-08, "loss": 0.0077, "step": 54100 }, { "epoch": 9.78929281967806, "grad_norm": 0.05196432024240494, "learning_rate": 2.1845643764965925e-08, "loss": 0.0067, "step": 54125 }, { "epoch": 9.793814432989691, "grad_norm": 0.19424718618392944, "learning_rate": 2.1385153803647078e-08, "loss": 0.0115, "step": 54150 }, { "epoch": 9.798336046301321, "grad_norm": 0.016021044924855232, "learning_rate": 2.0924663842328237e-08, "loss": 0.0218, "step": 54175 }, { "epoch": 9.80285765961295, "grad_norm": 0.031037848442792892, "learning_rate": 2.0482593479462148e-08, "loss": 0.0081, "step": 54200 }, { "epoch": 9.80737927292458, "grad_norm": 5.485635757446289, "learning_rate": 2.0022103518143304e-08, "loss": 0.0037, "step": 54225 }, { "epoch": 9.81190088623621, "grad_norm": 0.02747327648103237, "learning_rate": 1.956161355682446e-08, "loss": 0.0044, "step": 54250 }, { "epoch": 9.816422499547839, "grad_norm": 0.1059911772608757, "learning_rate": 1.9101123595505617e-08, "loss": 0.0038, "step": 54275 }, { "epoch": 9.820944112859468, "grad_norm": 0.010986040346324444, "learning_rate": 1.8640633634186776e-08, "loss": 0.0066, "step": 54300 }, { "epoch": 9.825465726171098, "grad_norm": 0.17033414542675018, "learning_rate": 1.818014367286793e-08, "loss": 0.0047, "step": 54325 }, { "epoch": 9.829987339482727, "grad_norm": 0.007194284815341234, "learning_rate": 1.771965371154909e-08, "loss": 0.0036, "step": 54350 }, { "epoch": 9.834508952794357, "grad_norm": 0.02418128214776516, "learning_rate": 1.7259163750230242e-08, "loss": 0.002, "step": 54375 }, { "epoch": 9.839030566105986, "grad_norm": 0.11744951456785202, "learning_rate": 1.67986737889114e-08, "loss": 0.0021, "step": 54400 }, { "epoch": 9.843552179417616, "grad_norm": 0.0121499327942729, "learning_rate": 1.6338183827592558e-08, "loss": 0.0079, "step": 54425 }, { "epoch": 9.848073792729245, "grad_norm": 0.014060701243579388, "learning_rate": 1.5877693866273714e-08, "loss": 0.001, "step": 54450 }, { "epoch": 9.852595406040875, "grad_norm": 2.7887675762176514, "learning_rate": 1.541720390495487e-08, "loss": 0.0038, "step": 54475 }, { "epoch": 9.857117019352504, "grad_norm": 0.006395564880222082, "learning_rate": 1.4956713943636027e-08, "loss": 0.029, "step": 54500 }, { "epoch": 9.861638632664135, "grad_norm": 0.06236935779452324, "learning_rate": 1.4496223982317185e-08, "loss": 0.0107, "step": 54525 }, { "epoch": 9.866160245975765, "grad_norm": 4.1836981773376465, "learning_rate": 1.4035734020998342e-08, "loss": 0.0343, "step": 54550 }, { "epoch": 9.870681859287394, "grad_norm": 0.0794193297624588, "learning_rate": 1.3575244059679499e-08, "loss": 0.0146, "step": 54575 }, { "epoch": 9.875203472599024, "grad_norm": 0.023560110479593277, "learning_rate": 1.3114754098360655e-08, "loss": 0.0052, "step": 54600 }, { "epoch": 9.879725085910653, "grad_norm": 0.2117091715335846, "learning_rate": 1.2654264137041811e-08, "loss": 0.005, "step": 54625 }, { "epoch": 9.884246699222283, "grad_norm": 0.0047377352602779865, "learning_rate": 1.2193774175722968e-08, "loss": 0.0005, "step": 54650 }, { "epoch": 9.888768312533912, "grad_norm": 0.25400134921073914, "learning_rate": 1.1733284214404126e-08, "loss": 0.008, "step": 54675 }, { "epoch": 9.893289925845542, "grad_norm": 0.09486464411020279, "learning_rate": 1.1272794253085282e-08, "loss": 0.0044, "step": 54700 }, { "epoch": 9.897811539157171, "grad_norm": 0.02684643305838108, "learning_rate": 1.0812304291766438e-08, "loss": 0.0069, "step": 54725 }, { "epoch": 9.9023331524688, "grad_norm": 0.025187574326992035, "learning_rate": 1.0351814330447595e-08, "loss": 0.0007, "step": 54750 }, { "epoch": 9.90685476578043, "grad_norm": 0.034975674003362656, "learning_rate": 9.891324369128753e-09, "loss": 0.0071, "step": 54775 }, { "epoch": 9.91137637909206, "grad_norm": 15.95773983001709, "learning_rate": 9.430834407809909e-09, "loss": 0.011, "step": 54800 }, { "epoch": 9.91589799240369, "grad_norm": 0.026726465672254562, "learning_rate": 8.970344446491065e-09, "loss": 0.0006, "step": 54825 }, { "epoch": 9.920419605715319, "grad_norm": 0.020367203280329704, "learning_rate": 8.509854485172221e-09, "loss": 0.0035, "step": 54850 }, { "epoch": 9.924941219026948, "grad_norm": 0.12988033890724182, "learning_rate": 8.049364523853381e-09, "loss": 0.005, "step": 54875 }, { "epoch": 9.929462832338578, "grad_norm": 0.06096798926591873, "learning_rate": 7.588874562534537e-09, "loss": 0.004, "step": 54900 }, { "epoch": 9.933984445650207, "grad_norm": 0.9536722898483276, "learning_rate": 7.128384601215693e-09, "loss": 0.0036, "step": 54925 }, { "epoch": 9.938506058961838, "grad_norm": 17.520708084106445, "learning_rate": 6.66789463989685e-09, "loss": 0.022, "step": 54950 }, { "epoch": 9.943027672273468, "grad_norm": 0.010756449773907661, "learning_rate": 6.207404678578006e-09, "loss": 0.0402, "step": 54975 }, { "epoch": 9.947549285585097, "grad_norm": 0.015861673280596733, "learning_rate": 5.746914717259163e-09, "loss": 0.0017, "step": 55000 }, { "epoch": 9.952070898896727, "grad_norm": 0.008379822596907616, "learning_rate": 5.2864247559403205e-09, "loss": 0.0052, "step": 55025 }, { "epoch": 9.956592512208356, "grad_norm": 0.15586745738983154, "learning_rate": 4.825934794621478e-09, "loss": 0.0054, "step": 55050 }, { "epoch": 9.961114125519986, "grad_norm": 0.022011611610651016, "learning_rate": 4.365444833302634e-09, "loss": 0.0012, "step": 55075 }, { "epoch": 9.965635738831615, "grad_norm": 0.012549543753266335, "learning_rate": 3.90495487198379e-09, "loss": 0.0007, "step": 55100 }, { "epoch": 9.970157352143245, "grad_norm": 0.8270652890205383, "learning_rate": 3.4444649106649474e-09, "loss": 0.0063, "step": 55125 }, { "epoch": 9.974678965454874, "grad_norm": 0.0325373113155365, "learning_rate": 2.983974949346104e-09, "loss": 0.0008, "step": 55150 }, { "epoch": 9.979200578766504, "grad_norm": 0.015854543074965477, "learning_rate": 2.523484988027261e-09, "loss": 0.0065, "step": 55175 }, { "epoch": 9.983722192078133, "grad_norm": 16.109289169311523, "learning_rate": 2.0629950267084176e-09, "loss": 0.0122, "step": 55200 }, { "epoch": 9.988243805389763, "grad_norm": 3.6053996086120605, "learning_rate": 1.6025050653895745e-09, "loss": 0.0018, "step": 55225 }, { "epoch": 9.992765418701392, "grad_norm": 0.009253941476345062, "learning_rate": 1.1420151040707312e-09, "loss": 0.0008, "step": 55250 }, { "epoch": 9.997287032013022, "grad_norm": 11.093884468078613, "learning_rate": 6.815251427518879e-10, "loss": 0.0159, "step": 55275 }, { "epoch": 10.0, "eval_loss": 0.3697284162044525, "eval_runtime": 8323.367, "eval_samples_per_second": 1.141, "eval_steps_per_second": 0.143, "eval_wer": 0.10309096732863549, "step": 55290 }, { "epoch": 10.0, "step": 55290, "total_flos": 7.517848352823706e+20, "train_loss": 0.018776766294569685, "train_runtime": 360719.5114, "train_samples_per_second": 0.613, "train_steps_per_second": 0.153 } ], "logging_steps": 25, "max_steps": 55290, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.517848352823706e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }