{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 61000, "global_step": 118428, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016887898132198466, "grad_norm": 6.124098777770996, "learning_rate": 8.443806467955755e-09, "loss": 0.7584, "step": 10 }, { "epoch": 0.0003377579626439693, "grad_norm": 9.54223918914795, "learning_rate": 1.688761293591151e-08, "loss": 0.7852, "step": 20 }, { "epoch": 0.000506636943965954, "grad_norm": 8.574740409851074, "learning_rate": 2.5331419403867266e-08, "loss": 0.8087, "step": 30 }, { "epoch": 0.0006755159252879386, "grad_norm": 6.908961296081543, "learning_rate": 3.377522587182302e-08, "loss": 0.8122, "step": 40 }, { "epoch": 0.0008443949066099234, "grad_norm": 8.199871063232422, "learning_rate": 4.2219032339778776e-08, "loss": 0.7607, "step": 50 }, { "epoch": 0.001013273887931908, "grad_norm": 9.224539756774902, "learning_rate": 5.066283880773453e-08, "loss": 0.7932, "step": 60 }, { "epoch": 0.0011821528692538927, "grad_norm": 7.442521572113037, "learning_rate": 5.910664527569028e-08, "loss": 0.7758, "step": 70 }, { "epoch": 0.0013510318505758772, "grad_norm": 9.004443168640137, "learning_rate": 6.755045174364604e-08, "loss": 0.7878, "step": 80 }, { "epoch": 0.001519910831897862, "grad_norm": 8.884103775024414, "learning_rate": 7.59942582116018e-08, "loss": 0.771, "step": 90 }, { "epoch": 0.0016887898132198467, "grad_norm": 7.017683506011963, "learning_rate": 8.443806467955755e-08, "loss": 0.7849, "step": 100 }, { "epoch": 0.0018576687945418312, "grad_norm": 8.55809211730957, "learning_rate": 9.288187114751331e-08, "loss": 0.7424, "step": 110 }, { "epoch": 0.002026547775863816, "grad_norm": 8.427170753479004, "learning_rate": 1.0132567761546906e-07, "loss": 0.7474, "step": 120 }, { "epoch": 0.0021954267571858005, "grad_norm": 7.609618663787842, "learning_rate": 1.0976948408342482e-07, "loss": 0.7366, "step": 130 }, { "epoch": 0.0023643057385077854, "grad_norm": 8.364843368530273, "learning_rate": 1.1821329055138056e-07, "loss": 0.7373, "step": 140 }, { "epoch": 0.00253318471982977, "grad_norm": 6.007495403289795, "learning_rate": 1.2665709701933633e-07, "loss": 0.711, "step": 150 }, { "epoch": 0.0027020637011517545, "grad_norm": 6.817158222198486, "learning_rate": 1.3510090348729208e-07, "loss": 0.6962, "step": 160 }, { "epoch": 0.0028709426824737394, "grad_norm": 6.550057888031006, "learning_rate": 1.4354470995524782e-07, "loss": 0.6962, "step": 170 }, { "epoch": 0.003039821663795724, "grad_norm": 6.238892078399658, "learning_rate": 1.519885164232036e-07, "loss": 0.6643, "step": 180 }, { "epoch": 0.0032087006451177085, "grad_norm": 6.482685565948486, "learning_rate": 1.6043232289115936e-07, "loss": 0.6332, "step": 190 }, { "epoch": 0.0033775796264396934, "grad_norm": 5.430366039276123, "learning_rate": 1.688761293591151e-07, "loss": 0.5953, "step": 200 }, { "epoch": 0.003546458607761678, "grad_norm": 4.316599369049072, "learning_rate": 1.7731993582707087e-07, "loss": 0.5356, "step": 210 }, { "epoch": 0.0037153375890836625, "grad_norm": 4.39082670211792, "learning_rate": 1.8576374229502662e-07, "loss": 0.5373, "step": 220 }, { "epoch": 0.0038842165704056474, "grad_norm": 4.379445552825928, "learning_rate": 1.9420754876298236e-07, "loss": 0.5074, "step": 230 }, { "epoch": 0.004053095551727632, "grad_norm": 3.68558669090271, "learning_rate": 2.0265135523093813e-07, "loss": 0.4788, "step": 240 }, { "epoch": 0.0042219745330496165, "grad_norm": 3.8847837448120117, "learning_rate": 2.1109516169889387e-07, "loss": 0.4199, "step": 250 }, { "epoch": 0.004390853514371601, "grad_norm": 3.790949821472168, "learning_rate": 2.1953896816684964e-07, "loss": 0.3651, "step": 260 }, { "epoch": 0.004559732495693586, "grad_norm": 3.7978429794311523, "learning_rate": 2.2798277463480539e-07, "loss": 0.344, "step": 270 }, { "epoch": 0.004728611477015571, "grad_norm": 3.1303465366363525, "learning_rate": 2.3642658110276113e-07, "loss": 0.3191, "step": 280 }, { "epoch": 0.004897490458337555, "grad_norm": 3.4057981967926025, "learning_rate": 2.448703875707169e-07, "loss": 0.3014, "step": 290 }, { "epoch": 0.00506636943965954, "grad_norm": 3.529587507247925, "learning_rate": 2.5331419403867267e-07, "loss": 0.2874, "step": 300 }, { "epoch": 0.0052352484209815245, "grad_norm": 2.866547107696533, "learning_rate": 2.617580005066284e-07, "loss": 0.2689, "step": 310 }, { "epoch": 0.005404127402303509, "grad_norm": 4.073023796081543, "learning_rate": 2.7020180697458416e-07, "loss": 0.2485, "step": 320 }, { "epoch": 0.005573006383625494, "grad_norm": 2.518500566482544, "learning_rate": 2.786456134425399e-07, "loss": 0.235, "step": 330 }, { "epoch": 0.005741885364947479, "grad_norm": 2.671142578125, "learning_rate": 2.8708941991049564e-07, "loss": 0.2152, "step": 340 }, { "epoch": 0.005910764346269463, "grad_norm": 2.4722273349761963, "learning_rate": 2.955332263784514e-07, "loss": 0.2113, "step": 350 }, { "epoch": 0.006079643327591448, "grad_norm": 2.653853416442871, "learning_rate": 3.039770328464072e-07, "loss": 0.1988, "step": 360 }, { "epoch": 0.0062485223089134324, "grad_norm": 2.7545671463012695, "learning_rate": 3.1242083931436295e-07, "loss": 0.2032, "step": 370 }, { "epoch": 0.006417401290235417, "grad_norm": 2.2809853553771973, "learning_rate": 3.208646457823187e-07, "loss": 0.1765, "step": 380 }, { "epoch": 0.006586280271557402, "grad_norm": 2.044936180114746, "learning_rate": 3.2930845225027444e-07, "loss": 0.1815, "step": 390 }, { "epoch": 0.006755159252879387, "grad_norm": 2.0984537601470947, "learning_rate": 3.377522587182302e-07, "loss": 0.1678, "step": 400 }, { "epoch": 0.006924038234201371, "grad_norm": 2.357839822769165, "learning_rate": 3.46196065186186e-07, "loss": 0.1689, "step": 410 }, { "epoch": 0.007092917215523356, "grad_norm": 3.886837959289551, "learning_rate": 3.5463987165414175e-07, "loss": 0.141, "step": 420 }, { "epoch": 0.00726179619684534, "grad_norm": 2.7527294158935547, "learning_rate": 3.6308367812209746e-07, "loss": 0.1555, "step": 430 }, { "epoch": 0.007430675178167325, "grad_norm": 2.3636205196380615, "learning_rate": 3.7152748459005323e-07, "loss": 0.1709, "step": 440 }, { "epoch": 0.00759955415948931, "grad_norm": 2.285834312438965, "learning_rate": 3.7997129105800895e-07, "loss": 0.1855, "step": 450 }, { "epoch": 0.007768433140811295, "grad_norm": 2.638777017593384, "learning_rate": 3.884150975259647e-07, "loss": 0.1676, "step": 460 }, { "epoch": 0.00793731212213328, "grad_norm": 3.0030341148376465, "learning_rate": 3.9685890399392054e-07, "loss": 0.1432, "step": 470 }, { "epoch": 0.008106191103455264, "grad_norm": 2.8257715702056885, "learning_rate": 4.0530271046187626e-07, "loss": 0.1404, "step": 480 }, { "epoch": 0.008275070084777248, "grad_norm": 2.8925344944000244, "learning_rate": 4.1374651692983203e-07, "loss": 0.1444, "step": 490 }, { "epoch": 0.008443949066099233, "grad_norm": 2.230567693710327, "learning_rate": 4.2219032339778775e-07, "loss": 0.1277, "step": 500 }, { "epoch": 0.008612828047421217, "grad_norm": 2.5820159912109375, "learning_rate": 4.3063412986574346e-07, "loss": 0.1355, "step": 510 }, { "epoch": 0.008781707028743202, "grad_norm": 2.4616146087646484, "learning_rate": 4.390779363336993e-07, "loss": 0.1198, "step": 520 }, { "epoch": 0.008950586010065186, "grad_norm": 2.4409024715423584, "learning_rate": 4.4752174280165506e-07, "loss": 0.1256, "step": 530 }, { "epoch": 0.009119464991387173, "grad_norm": 2.6446640491485596, "learning_rate": 4.5596554926961077e-07, "loss": 0.1338, "step": 540 }, { "epoch": 0.009288343972709157, "grad_norm": 3.2052366733551025, "learning_rate": 4.6440935573756654e-07, "loss": 0.1717, "step": 550 }, { "epoch": 0.009457222954031142, "grad_norm": 2.2928459644317627, "learning_rate": 4.7285316220552226e-07, "loss": 0.1142, "step": 560 }, { "epoch": 0.009626101935353126, "grad_norm": 2.3701438903808594, "learning_rate": 4.81296968673478e-07, "loss": 0.1286, "step": 570 }, { "epoch": 0.00979498091667511, "grad_norm": 2.3280882835388184, "learning_rate": 4.897407751414338e-07, "loss": 0.1439, "step": 580 }, { "epoch": 0.009963859897997095, "grad_norm": 2.250161647796631, "learning_rate": 4.981845816093895e-07, "loss": 0.1246, "step": 590 }, { "epoch": 0.01013273887931908, "grad_norm": 2.4442455768585205, "learning_rate": 5.066283880773453e-07, "loss": 0.1372, "step": 600 }, { "epoch": 0.010301617860641064, "grad_norm": 2.673264503479004, "learning_rate": 5.150721945453011e-07, "loss": 0.1141, "step": 610 }, { "epoch": 0.010470496841963049, "grad_norm": 1.9719061851501465, "learning_rate": 5.235160010132568e-07, "loss": 0.1265, "step": 620 }, { "epoch": 0.010639375823285033, "grad_norm": 1.9867478609085083, "learning_rate": 5.319598074812126e-07, "loss": 0.1216, "step": 630 }, { "epoch": 0.010808254804607018, "grad_norm": 1.8611189126968384, "learning_rate": 5.404036139491683e-07, "loss": 0.1504, "step": 640 }, { "epoch": 0.010977133785929002, "grad_norm": 2.331552028656006, "learning_rate": 5.488474204171241e-07, "loss": 0.1025, "step": 650 }, { "epoch": 0.011146012767250989, "grad_norm": 2.593984603881836, "learning_rate": 5.572912268850799e-07, "loss": 0.1129, "step": 660 }, { "epoch": 0.011314891748572973, "grad_norm": 2.429565668106079, "learning_rate": 5.657350333530356e-07, "loss": 0.1012, "step": 670 }, { "epoch": 0.011483770729894958, "grad_norm": 2.273768901824951, "learning_rate": 5.741788398209913e-07, "loss": 0.1056, "step": 680 }, { "epoch": 0.011652649711216942, "grad_norm": 1.9504871368408203, "learning_rate": 5.826226462889471e-07, "loss": 0.12, "step": 690 }, { "epoch": 0.011821528692538927, "grad_norm": 2.3728129863739014, "learning_rate": 5.910664527569028e-07, "loss": 0.1062, "step": 700 }, { "epoch": 0.011990407673860911, "grad_norm": 2.3623464107513428, "learning_rate": 5.995102592248586e-07, "loss": 0.1009, "step": 710 }, { "epoch": 0.012159286655182896, "grad_norm": 2.4266059398651123, "learning_rate": 6.079540656928144e-07, "loss": 0.1001, "step": 720 }, { "epoch": 0.01232816563650488, "grad_norm": 2.247767925262451, "learning_rate": 6.163978721607701e-07, "loss": 0.1187, "step": 730 }, { "epoch": 0.012497044617826865, "grad_norm": 2.351445436477661, "learning_rate": 6.248416786287259e-07, "loss": 0.1044, "step": 740 }, { "epoch": 0.01266592359914885, "grad_norm": 2.2113847732543945, "learning_rate": 6.332854850966816e-07, "loss": 0.0934, "step": 750 }, { "epoch": 0.012834802580470834, "grad_norm": 2.4774203300476074, "learning_rate": 6.417292915646374e-07, "loss": 0.094, "step": 760 }, { "epoch": 0.013003681561792818, "grad_norm": 2.0832836627960205, "learning_rate": 6.501730980325932e-07, "loss": 0.104, "step": 770 }, { "epoch": 0.013172560543114805, "grad_norm": 1.9768595695495605, "learning_rate": 6.586169045005489e-07, "loss": 0.0742, "step": 780 }, { "epoch": 0.01334143952443679, "grad_norm": 2.4108877182006836, "learning_rate": 6.670607109685047e-07, "loss": 0.0883, "step": 790 }, { "epoch": 0.013510318505758774, "grad_norm": 2.0879578590393066, "learning_rate": 6.755045174364604e-07, "loss": 0.0973, "step": 800 }, { "epoch": 0.013679197487080758, "grad_norm": 1.985593557357788, "learning_rate": 6.839483239044161e-07, "loss": 0.0933, "step": 810 }, { "epoch": 0.013848076468402743, "grad_norm": 2.6899261474609375, "learning_rate": 6.92392130372372e-07, "loss": 0.0838, "step": 820 }, { "epoch": 0.014016955449724727, "grad_norm": 2.872298240661621, "learning_rate": 7.008359368403277e-07, "loss": 0.0918, "step": 830 }, { "epoch": 0.014185834431046712, "grad_norm": 1.903915524482727, "learning_rate": 7.092797433082835e-07, "loss": 0.0974, "step": 840 }, { "epoch": 0.014354713412368696, "grad_norm": 1.3832244873046875, "learning_rate": 7.177235497762391e-07, "loss": 0.0948, "step": 850 }, { "epoch": 0.01452359239369068, "grad_norm": 1.790627121925354, "learning_rate": 7.261673562441949e-07, "loss": 0.0834, "step": 860 }, { "epoch": 0.014692471375012665, "grad_norm": 1.9478427171707153, "learning_rate": 7.346111627121506e-07, "loss": 0.0808, "step": 870 }, { "epoch": 0.01486135035633465, "grad_norm": 1.600297212600708, "learning_rate": 7.430549691801065e-07, "loss": 0.0829, "step": 880 }, { "epoch": 0.015030229337656634, "grad_norm": 1.8773210048675537, "learning_rate": 7.514987756480623e-07, "loss": 0.1066, "step": 890 }, { "epoch": 0.01519910831897862, "grad_norm": 1.5813443660736084, "learning_rate": 7.599425821160179e-07, "loss": 0.0898, "step": 900 }, { "epoch": 0.015367987300300605, "grad_norm": 2.158681631088257, "learning_rate": 7.683863885839737e-07, "loss": 0.0838, "step": 910 }, { "epoch": 0.01553686628162259, "grad_norm": 2.44648814201355, "learning_rate": 7.768301950519294e-07, "loss": 0.0989, "step": 920 }, { "epoch": 0.015705745262944572, "grad_norm": 1.8075037002563477, "learning_rate": 7.852740015198853e-07, "loss": 0.0693, "step": 930 }, { "epoch": 0.01587462424426656, "grad_norm": 2.129730463027954, "learning_rate": 7.937178079878411e-07, "loss": 0.0712, "step": 940 }, { "epoch": 0.01604350322558854, "grad_norm": 2.2190091609954834, "learning_rate": 8.021616144557967e-07, "loss": 0.082, "step": 950 }, { "epoch": 0.016212382206910528, "grad_norm": 2.0665814876556396, "learning_rate": 8.106054209237525e-07, "loss": 0.0925, "step": 960 }, { "epoch": 0.016381261188232514, "grad_norm": 3.085664987564087, "learning_rate": 8.190492273917082e-07, "loss": 0.1055, "step": 970 }, { "epoch": 0.016550140169554497, "grad_norm": 2.095896005630493, "learning_rate": 8.274930338596641e-07, "loss": 0.0713, "step": 980 }, { "epoch": 0.016719019150876483, "grad_norm": 1.5914567708969116, "learning_rate": 8.359368403276198e-07, "loss": 0.0778, "step": 990 }, { "epoch": 0.016887898132198466, "grad_norm": 1.750190019607544, "learning_rate": 8.443806467955755e-07, "loss": 0.0966, "step": 1000 }, { "epoch": 0.017056777113520452, "grad_norm": 2.2729129791259766, "learning_rate": 8.528244532635313e-07, "loss": 0.0821, "step": 1010 }, { "epoch": 0.017225656094842435, "grad_norm": 1.8570390939712524, "learning_rate": 8.612682597314869e-07, "loss": 0.0781, "step": 1020 }, { "epoch": 0.01739453507616442, "grad_norm": 1.6545851230621338, "learning_rate": 8.697120661994427e-07, "loss": 0.0772, "step": 1030 }, { "epoch": 0.017563414057486404, "grad_norm": 2.4885058403015137, "learning_rate": 8.781558726673986e-07, "loss": 0.078, "step": 1040 }, { "epoch": 0.01773229303880839, "grad_norm": 1.8946738243103027, "learning_rate": 8.865996791353543e-07, "loss": 0.065, "step": 1050 }, { "epoch": 0.017901172020130373, "grad_norm": 1.7587921619415283, "learning_rate": 8.950434856033101e-07, "loss": 0.0865, "step": 1060 }, { "epoch": 0.01807005100145236, "grad_norm": 2.3431482315063477, "learning_rate": 9.034872920712657e-07, "loss": 0.0619, "step": 1070 }, { "epoch": 0.018238929982774345, "grad_norm": 1.971683382987976, "learning_rate": 9.119310985392215e-07, "loss": 0.0776, "step": 1080 }, { "epoch": 0.01840780896409633, "grad_norm": 2.2187342643737793, "learning_rate": 9.203749050071773e-07, "loss": 0.0799, "step": 1090 }, { "epoch": 0.018576687945418315, "grad_norm": 1.925542950630188, "learning_rate": 9.288187114751331e-07, "loss": 0.07, "step": 1100 }, { "epoch": 0.018745566926740297, "grad_norm": 1.871279239654541, "learning_rate": 9.372625179430889e-07, "loss": 0.065, "step": 1110 }, { "epoch": 0.018914445908062284, "grad_norm": 1.5438754558563232, "learning_rate": 9.457063244110445e-07, "loss": 0.0812, "step": 1120 }, { "epoch": 0.019083324889384266, "grad_norm": 2.5270166397094727, "learning_rate": 9.541501308790004e-07, "loss": 0.0732, "step": 1130 }, { "epoch": 0.019252203870706253, "grad_norm": 2.9459540843963623, "learning_rate": 9.62593937346956e-07, "loss": 0.0881, "step": 1140 }, { "epoch": 0.019421082852028235, "grad_norm": 2.7640421390533447, "learning_rate": 9.710377438149119e-07, "loss": 0.0767, "step": 1150 }, { "epoch": 0.01958996183335022, "grad_norm": 2.233302593231201, "learning_rate": 9.794815502828676e-07, "loss": 0.0755, "step": 1160 }, { "epoch": 0.019758840814672204, "grad_norm": 1.7127326726913452, "learning_rate": 9.879253567508233e-07, "loss": 0.0815, "step": 1170 }, { "epoch": 0.01992771979599419, "grad_norm": 1.8620339632034302, "learning_rate": 9.96369163218779e-07, "loss": 0.0793, "step": 1180 }, { "epoch": 0.020096598777316177, "grad_norm": 2.1453492641448975, "learning_rate": 1.0048129696867347e-06, "loss": 0.0915, "step": 1190 }, { "epoch": 0.02026547775863816, "grad_norm": 1.7759499549865723, "learning_rate": 1.0132567761546907e-06, "loss": 0.0779, "step": 1200 }, { "epoch": 0.020434356739960146, "grad_norm": 1.761272668838501, "learning_rate": 1.0217005826226464e-06, "loss": 0.0775, "step": 1210 }, { "epoch": 0.02060323572128213, "grad_norm": 1.9524143934249878, "learning_rate": 1.0301443890906021e-06, "loss": 0.0592, "step": 1220 }, { "epoch": 0.020772114702604115, "grad_norm": 1.8950692415237427, "learning_rate": 1.0385881955585578e-06, "loss": 0.0762, "step": 1230 }, { "epoch": 0.020940993683926098, "grad_norm": 1.7781260013580322, "learning_rate": 1.0470320020265135e-06, "loss": 0.07, "step": 1240 }, { "epoch": 0.021109872665248084, "grad_norm": 2.3268065452575684, "learning_rate": 1.0554758084944695e-06, "loss": 0.0817, "step": 1250 }, { "epoch": 0.021278751646570067, "grad_norm": 1.7378801107406616, "learning_rate": 1.0639196149624252e-06, "loss": 0.0696, "step": 1260 }, { "epoch": 0.021447630627892053, "grad_norm": 1.5386779308319092, "learning_rate": 1.072363421430381e-06, "loss": 0.0683, "step": 1270 }, { "epoch": 0.021616509609214036, "grad_norm": 2.1556782722473145, "learning_rate": 1.0808072278983366e-06, "loss": 0.0756, "step": 1280 }, { "epoch": 0.021785388590536022, "grad_norm": 2.0031321048736572, "learning_rate": 1.0892510343662923e-06, "loss": 0.0854, "step": 1290 }, { "epoch": 0.021954267571858005, "grad_norm": 2.435290813446045, "learning_rate": 1.0976948408342483e-06, "loss": 0.0787, "step": 1300 }, { "epoch": 0.02212314655317999, "grad_norm": 2.3034555912017822, "learning_rate": 1.1061386473022038e-06, "loss": 0.0826, "step": 1310 }, { "epoch": 0.022292025534501977, "grad_norm": 2.143369674682617, "learning_rate": 1.1145824537701597e-06, "loss": 0.0652, "step": 1320 }, { "epoch": 0.02246090451582396, "grad_norm": 1.701810359954834, "learning_rate": 1.1230262602381154e-06, "loss": 0.0581, "step": 1330 }, { "epoch": 0.022629783497145946, "grad_norm": 1.7126011848449707, "learning_rate": 1.1314700667060711e-06, "loss": 0.0558, "step": 1340 }, { "epoch": 0.02279866247846793, "grad_norm": 1.8444150686264038, "learning_rate": 1.139913873174027e-06, "loss": 0.071, "step": 1350 }, { "epoch": 0.022967541459789916, "grad_norm": 1.6395370960235596, "learning_rate": 1.1483576796419826e-06, "loss": 0.0661, "step": 1360 }, { "epoch": 0.0231364204411119, "grad_norm": 2.3289730548858643, "learning_rate": 1.1568014861099385e-06, "loss": 0.0691, "step": 1370 }, { "epoch": 0.023305299422433885, "grad_norm": 2.3995473384857178, "learning_rate": 1.1652452925778942e-06, "loss": 0.0788, "step": 1380 }, { "epoch": 0.023474178403755867, "grad_norm": 2.1529455184936523, "learning_rate": 1.17368909904585e-06, "loss": 0.0545, "step": 1390 }, { "epoch": 0.023643057385077854, "grad_norm": 2.087756395339966, "learning_rate": 1.1821329055138056e-06, "loss": 0.058, "step": 1400 }, { "epoch": 0.023811936366399836, "grad_norm": 2.0652964115142822, "learning_rate": 1.1905767119817614e-06, "loss": 0.0613, "step": 1410 }, { "epoch": 0.023980815347721823, "grad_norm": 1.8417154550552368, "learning_rate": 1.1990205184497173e-06, "loss": 0.0607, "step": 1420 }, { "epoch": 0.02414969432904381, "grad_norm": 1.823146939277649, "learning_rate": 1.207464324917673e-06, "loss": 0.0591, "step": 1430 }, { "epoch": 0.02431857331036579, "grad_norm": 1.5554646253585815, "learning_rate": 1.2159081313856287e-06, "loss": 0.07, "step": 1440 }, { "epoch": 0.024487452291687778, "grad_norm": 2.0357418060302734, "learning_rate": 1.2243519378535844e-06, "loss": 0.0584, "step": 1450 }, { "epoch": 0.02465633127300976, "grad_norm": 1.6744521856307983, "learning_rate": 1.2327957443215402e-06, "loss": 0.0633, "step": 1460 }, { "epoch": 0.024825210254331747, "grad_norm": 1.5535032749176025, "learning_rate": 1.241239550789496e-06, "loss": 0.0608, "step": 1470 }, { "epoch": 0.02499408923565373, "grad_norm": 1.9693783521652222, "learning_rate": 1.2496833572574518e-06, "loss": 0.0545, "step": 1480 }, { "epoch": 0.025162968216975716, "grad_norm": 2.4152817726135254, "learning_rate": 1.2581271637254075e-06, "loss": 0.0598, "step": 1490 }, { "epoch": 0.0253318471982977, "grad_norm": 2.1319620609283447, "learning_rate": 1.2665709701933632e-06, "loss": 0.0716, "step": 1500 }, { "epoch": 0.025500726179619685, "grad_norm": 2.087752103805542, "learning_rate": 1.2750147766613192e-06, "loss": 0.0589, "step": 1510 }, { "epoch": 0.025669605160941668, "grad_norm": 1.718497633934021, "learning_rate": 1.2834585831292749e-06, "loss": 0.0701, "step": 1520 }, { "epoch": 0.025838484142263654, "grad_norm": 1.7935630083084106, "learning_rate": 1.2919023895972304e-06, "loss": 0.0554, "step": 1530 }, { "epoch": 0.026007363123585637, "grad_norm": 1.5332882404327393, "learning_rate": 1.3003461960651863e-06, "loss": 0.0528, "step": 1540 }, { "epoch": 0.026176242104907623, "grad_norm": 1.6784400939941406, "learning_rate": 1.308790002533142e-06, "loss": 0.0501, "step": 1550 }, { "epoch": 0.02634512108622961, "grad_norm": 1.5685261487960815, "learning_rate": 1.3172338090010978e-06, "loss": 0.0593, "step": 1560 }, { "epoch": 0.026514000067551592, "grad_norm": 1.7446118593215942, "learning_rate": 1.3256776154690535e-06, "loss": 0.0557, "step": 1570 }, { "epoch": 0.02668287904887358, "grad_norm": 2.1118271350860596, "learning_rate": 1.3341214219370094e-06, "loss": 0.0646, "step": 1580 }, { "epoch": 0.02685175803019556, "grad_norm": 1.6081197261810303, "learning_rate": 1.3425652284049651e-06, "loss": 0.0436, "step": 1590 }, { "epoch": 0.027020637011517547, "grad_norm": 1.5438333749771118, "learning_rate": 1.3510090348729208e-06, "loss": 0.063, "step": 1600 }, { "epoch": 0.02718951599283953, "grad_norm": 1.410036563873291, "learning_rate": 1.3594528413408768e-06, "loss": 0.0504, "step": 1610 }, { "epoch": 0.027358394974161516, "grad_norm": 1.960606336593628, "learning_rate": 1.3678966478088323e-06, "loss": 0.0698, "step": 1620 }, { "epoch": 0.0275272739554835, "grad_norm": 1.6780805587768555, "learning_rate": 1.376340454276788e-06, "loss": 0.0555, "step": 1630 }, { "epoch": 0.027696152936805486, "grad_norm": 2.481440782546997, "learning_rate": 1.384784260744744e-06, "loss": 0.0607, "step": 1640 }, { "epoch": 0.02786503191812747, "grad_norm": 1.9041049480438232, "learning_rate": 1.3932280672126996e-06, "loss": 0.055, "step": 1650 }, { "epoch": 0.028033910899449455, "grad_norm": 1.4848712682724, "learning_rate": 1.4016718736806553e-06, "loss": 0.077, "step": 1660 }, { "epoch": 0.02820278988077144, "grad_norm": 1.6858974695205688, "learning_rate": 1.410115680148611e-06, "loss": 0.0729, "step": 1670 }, { "epoch": 0.028371668862093424, "grad_norm": 1.6298991441726685, "learning_rate": 1.418559486616567e-06, "loss": 0.056, "step": 1680 }, { "epoch": 0.02854054784341541, "grad_norm": 2.075746536254883, "learning_rate": 1.4270032930845227e-06, "loss": 0.0604, "step": 1690 }, { "epoch": 0.028709426824737393, "grad_norm": 1.8184205293655396, "learning_rate": 1.4354470995524782e-06, "loss": 0.0605, "step": 1700 }, { "epoch": 0.02887830580605938, "grad_norm": 2.312771797180176, "learning_rate": 1.4438909060204344e-06, "loss": 0.0624, "step": 1710 }, { "epoch": 0.02904718478738136, "grad_norm": 1.6242586374282837, "learning_rate": 1.4523347124883899e-06, "loss": 0.0528, "step": 1720 }, { "epoch": 0.029216063768703348, "grad_norm": 1.8352328538894653, "learning_rate": 1.4607785189563456e-06, "loss": 0.068, "step": 1730 }, { "epoch": 0.02938494275002533, "grad_norm": 1.5413485765457153, "learning_rate": 1.4692223254243013e-06, "loss": 0.0626, "step": 1740 }, { "epoch": 0.029553821731347317, "grad_norm": 1.7656166553497314, "learning_rate": 1.4776661318922572e-06, "loss": 0.0574, "step": 1750 }, { "epoch": 0.0297227007126693, "grad_norm": 1.7710692882537842, "learning_rate": 1.486109938360213e-06, "loss": 0.0641, "step": 1760 }, { "epoch": 0.029891579693991286, "grad_norm": 1.8695980310440063, "learning_rate": 1.4945537448281687e-06, "loss": 0.0531, "step": 1770 }, { "epoch": 0.03006045867531327, "grad_norm": 1.7137089967727661, "learning_rate": 1.5029975512961246e-06, "loss": 0.0631, "step": 1780 }, { "epoch": 0.030229337656635255, "grad_norm": 1.2994147539138794, "learning_rate": 1.51144135776408e-06, "loss": 0.0567, "step": 1790 }, { "epoch": 0.03039821663795724, "grad_norm": 1.5220773220062256, "learning_rate": 1.5198851642320358e-06, "loss": 0.0536, "step": 1800 }, { "epoch": 0.030567095619279224, "grad_norm": 1.4681472778320312, "learning_rate": 1.5283289706999917e-06, "loss": 0.0703, "step": 1810 }, { "epoch": 0.03073597460060121, "grad_norm": 1.1753894090652466, "learning_rate": 1.5367727771679474e-06, "loss": 0.0401, "step": 1820 }, { "epoch": 0.030904853581923193, "grad_norm": 1.8973299264907837, "learning_rate": 1.5452165836359032e-06, "loss": 0.0523, "step": 1830 }, { "epoch": 0.03107373256324518, "grad_norm": 2.3283848762512207, "learning_rate": 1.5536603901038589e-06, "loss": 0.06, "step": 1840 }, { "epoch": 0.031242611544567162, "grad_norm": 1.801364779472351, "learning_rate": 1.5621041965718148e-06, "loss": 0.0746, "step": 1850 }, { "epoch": 0.031411490525889145, "grad_norm": 1.7742398977279663, "learning_rate": 1.5705480030397705e-06, "loss": 0.0483, "step": 1860 }, { "epoch": 0.03158036950721113, "grad_norm": 1.942561149597168, "learning_rate": 1.578991809507726e-06, "loss": 0.0641, "step": 1870 }, { "epoch": 0.03174924848853312, "grad_norm": 1.526241421699524, "learning_rate": 1.5874356159756822e-06, "loss": 0.0652, "step": 1880 }, { "epoch": 0.031918127469855104, "grad_norm": 1.9740265607833862, "learning_rate": 1.5958794224436377e-06, "loss": 0.0485, "step": 1890 }, { "epoch": 0.03208700645117708, "grad_norm": 1.7940231561660767, "learning_rate": 1.6043232289115934e-06, "loss": 0.0546, "step": 1900 }, { "epoch": 0.03225588543249907, "grad_norm": 1.8034690618515015, "learning_rate": 1.6127670353795491e-06, "loss": 0.0482, "step": 1910 }, { "epoch": 0.032424764413821056, "grad_norm": 1.4633737802505493, "learning_rate": 1.621210841847505e-06, "loss": 0.0573, "step": 1920 }, { "epoch": 0.03259364339514304, "grad_norm": 1.3600189685821533, "learning_rate": 1.6296546483154608e-06, "loss": 0.047, "step": 1930 }, { "epoch": 0.03276252237646503, "grad_norm": 1.6953165531158447, "learning_rate": 1.6380984547834165e-06, "loss": 0.0666, "step": 1940 }, { "epoch": 0.03293140135778701, "grad_norm": 1.5388325452804565, "learning_rate": 1.6465422612513724e-06, "loss": 0.0638, "step": 1950 }, { "epoch": 0.033100280339108994, "grad_norm": 1.3664612770080566, "learning_rate": 1.6549860677193281e-06, "loss": 0.0539, "step": 1960 }, { "epoch": 0.03326915932043098, "grad_norm": 1.6941826343536377, "learning_rate": 1.6634298741872836e-06, "loss": 0.055, "step": 1970 }, { "epoch": 0.033438038301752966, "grad_norm": 1.9536076784133911, "learning_rate": 1.6718736806552396e-06, "loss": 0.0541, "step": 1980 }, { "epoch": 0.033606917283074945, "grad_norm": 1.2788434028625488, "learning_rate": 1.6803174871231953e-06, "loss": 0.0502, "step": 1990 }, { "epoch": 0.03377579626439693, "grad_norm": 1.9091600179672241, "learning_rate": 1.688761293591151e-06, "loss": 0.0582, "step": 2000 }, { "epoch": 0.03394467524571892, "grad_norm": 1.692077398300171, "learning_rate": 1.6972051000591067e-06, "loss": 0.0671, "step": 2010 }, { "epoch": 0.034113554227040904, "grad_norm": 1.0663591623306274, "learning_rate": 1.7056489065270626e-06, "loss": 0.0522, "step": 2020 }, { "epoch": 0.03428243320836289, "grad_norm": 1.3059645891189575, "learning_rate": 1.7140927129950183e-06, "loss": 0.0538, "step": 2030 }, { "epoch": 0.03445131218968487, "grad_norm": 1.8319107294082642, "learning_rate": 1.7225365194629739e-06, "loss": 0.0432, "step": 2040 }, { "epoch": 0.034620191171006856, "grad_norm": 1.3561835289001465, "learning_rate": 1.73098032593093e-06, "loss": 0.0413, "step": 2050 }, { "epoch": 0.03478907015232884, "grad_norm": 1.7769232988357544, "learning_rate": 1.7394241323988855e-06, "loss": 0.0506, "step": 2060 }, { "epoch": 0.03495794913365083, "grad_norm": 1.5962598323822021, "learning_rate": 1.7478679388668412e-06, "loss": 0.0493, "step": 2070 }, { "epoch": 0.03512682811497281, "grad_norm": 1.9048935174942017, "learning_rate": 1.7563117453347971e-06, "loss": 0.0575, "step": 2080 }, { "epoch": 0.035295707096294794, "grad_norm": 0.8531430959701538, "learning_rate": 1.7647555518027529e-06, "loss": 0.0452, "step": 2090 }, { "epoch": 0.03546458607761678, "grad_norm": 1.2369892597198486, "learning_rate": 1.7731993582707086e-06, "loss": 0.0454, "step": 2100 }, { "epoch": 0.03563346505893877, "grad_norm": 2.01666259765625, "learning_rate": 1.7816431647386643e-06, "loss": 0.0471, "step": 2110 }, { "epoch": 0.035802344040260746, "grad_norm": 1.3217118978500366, "learning_rate": 1.7900869712066202e-06, "loss": 0.0515, "step": 2120 }, { "epoch": 0.03597122302158273, "grad_norm": 1.3094022274017334, "learning_rate": 1.798530777674576e-06, "loss": 0.0573, "step": 2130 }, { "epoch": 0.03614010200290472, "grad_norm": 1.4364557266235352, "learning_rate": 1.8069745841425314e-06, "loss": 0.0586, "step": 2140 }, { "epoch": 0.036308980984226705, "grad_norm": 1.932620882987976, "learning_rate": 1.8154183906104874e-06, "loss": 0.0553, "step": 2150 }, { "epoch": 0.03647785996554869, "grad_norm": 3.604820728302002, "learning_rate": 1.823862197078443e-06, "loss": 0.0442, "step": 2160 }, { "epoch": 0.03664673894687067, "grad_norm": 1.605178713798523, "learning_rate": 1.8323060035463988e-06, "loss": 0.0576, "step": 2170 }, { "epoch": 0.03681561792819266, "grad_norm": 2.334921360015869, "learning_rate": 1.8407498100143545e-06, "loss": 0.0478, "step": 2180 }, { "epoch": 0.03698449690951464, "grad_norm": 1.4013651609420776, "learning_rate": 1.8491936164823105e-06, "loss": 0.0547, "step": 2190 }, { "epoch": 0.03715337589083663, "grad_norm": 2.0412580966949463, "learning_rate": 1.8576374229502662e-06, "loss": 0.0537, "step": 2200 }, { "epoch": 0.03732225487215861, "grad_norm": 1.0916481018066406, "learning_rate": 1.8660812294182219e-06, "loss": 0.0492, "step": 2210 }, { "epoch": 0.037491133853480595, "grad_norm": 0.9734089970588684, "learning_rate": 1.8745250358861778e-06, "loss": 0.0459, "step": 2220 }, { "epoch": 0.03766001283480258, "grad_norm": 1.4113138914108276, "learning_rate": 1.8829688423541333e-06, "loss": 0.0443, "step": 2230 }, { "epoch": 0.03782889181612457, "grad_norm": 1.2241610288619995, "learning_rate": 1.891412648822089e-06, "loss": 0.0432, "step": 2240 }, { "epoch": 0.037997770797446546, "grad_norm": 1.8776354789733887, "learning_rate": 1.899856455290045e-06, "loss": 0.0621, "step": 2250 }, { "epoch": 0.03816664977876853, "grad_norm": 1.113374948501587, "learning_rate": 1.908300261758001e-06, "loss": 0.0421, "step": 2260 }, { "epoch": 0.03833552876009052, "grad_norm": 1.831743597984314, "learning_rate": 1.916744068225956e-06, "loss": 0.0393, "step": 2270 }, { "epoch": 0.038504407741412505, "grad_norm": 1.801637053489685, "learning_rate": 1.925187874693912e-06, "loss": 0.0655, "step": 2280 }, { "epoch": 0.03867328672273449, "grad_norm": 1.624947428703308, "learning_rate": 1.933631681161868e-06, "loss": 0.0541, "step": 2290 }, { "epoch": 0.03884216570405647, "grad_norm": 1.6676300764083862, "learning_rate": 1.9420754876298238e-06, "loss": 0.051, "step": 2300 }, { "epoch": 0.03901104468537846, "grad_norm": 1.9495735168457031, "learning_rate": 1.9505192940977795e-06, "loss": 0.0518, "step": 2310 }, { "epoch": 0.03917992366670044, "grad_norm": 1.8682224750518799, "learning_rate": 1.958963100565735e-06, "loss": 0.05, "step": 2320 }, { "epoch": 0.03934880264802243, "grad_norm": 1.753471851348877, "learning_rate": 1.967406907033691e-06, "loss": 0.0489, "step": 2330 }, { "epoch": 0.03951768162934441, "grad_norm": 1.5556252002716064, "learning_rate": 1.9758507135016466e-06, "loss": 0.0678, "step": 2340 }, { "epoch": 0.039686560610666395, "grad_norm": 1.4327486753463745, "learning_rate": 1.9842945199696028e-06, "loss": 0.039, "step": 2350 }, { "epoch": 0.03985543959198838, "grad_norm": 1.7232033014297485, "learning_rate": 1.992738326437558e-06, "loss": 0.0488, "step": 2360 }, { "epoch": 0.04002431857331037, "grad_norm": 1.4707356691360474, "learning_rate": 2.0011821329055138e-06, "loss": 0.0579, "step": 2370 }, { "epoch": 0.040193197554632354, "grad_norm": 1.9278966188430786, "learning_rate": 2.0096259393734695e-06, "loss": 0.0431, "step": 2380 }, { "epoch": 0.04036207653595433, "grad_norm": 1.8454171419143677, "learning_rate": 2.0180697458414256e-06, "loss": 0.0467, "step": 2390 }, { "epoch": 0.04053095551727632, "grad_norm": 2.1945858001708984, "learning_rate": 2.0265135523093814e-06, "loss": 0.0464, "step": 2400 }, { "epoch": 0.040699834498598306, "grad_norm": 1.4430819749832153, "learning_rate": 2.034957358777337e-06, "loss": 0.0463, "step": 2410 }, { "epoch": 0.04086871347992029, "grad_norm": 1.394455909729004, "learning_rate": 2.0434011652452928e-06, "loss": 0.0433, "step": 2420 }, { "epoch": 0.04103759246124227, "grad_norm": 1.789002537727356, "learning_rate": 2.0518449717132485e-06, "loss": 0.0606, "step": 2430 }, { "epoch": 0.04120647144256426, "grad_norm": 1.402905821800232, "learning_rate": 2.0602887781812042e-06, "loss": 0.0414, "step": 2440 }, { "epoch": 0.041375350423886244, "grad_norm": 1.8854436874389648, "learning_rate": 2.06873258464916e-06, "loss": 0.0507, "step": 2450 }, { "epoch": 0.04154422940520823, "grad_norm": 1.631987452507019, "learning_rate": 2.0771763911171157e-06, "loss": 0.0507, "step": 2460 }, { "epoch": 0.04171310838653021, "grad_norm": 1.565899133682251, "learning_rate": 2.0856201975850714e-06, "loss": 0.0563, "step": 2470 }, { "epoch": 0.041881987367852196, "grad_norm": 1.3544073104858398, "learning_rate": 2.094064004053027e-06, "loss": 0.0436, "step": 2480 }, { "epoch": 0.04205086634917418, "grad_norm": 2.4774322509765625, "learning_rate": 2.1025078105209832e-06, "loss": 0.0448, "step": 2490 }, { "epoch": 0.04221974533049617, "grad_norm": 1.2989330291748047, "learning_rate": 2.110951616988939e-06, "loss": 0.0448, "step": 2500 }, { "epoch": 0.042388624311818154, "grad_norm": 1.2610119581222534, "learning_rate": 2.1193954234568947e-06, "loss": 0.0486, "step": 2510 }, { "epoch": 0.042557503293140134, "grad_norm": 1.6147167682647705, "learning_rate": 2.1278392299248504e-06, "loss": 0.0501, "step": 2520 }, { "epoch": 0.04272638227446212, "grad_norm": 1.4339394569396973, "learning_rate": 2.136283036392806e-06, "loss": 0.0473, "step": 2530 }, { "epoch": 0.042895261255784106, "grad_norm": 0.8780645132064819, "learning_rate": 2.144726842860762e-06, "loss": 0.0429, "step": 2540 }, { "epoch": 0.04306414023710609, "grad_norm": 1.5299713611602783, "learning_rate": 2.1531706493287175e-06, "loss": 0.0495, "step": 2550 }, { "epoch": 0.04323301921842807, "grad_norm": 1.9701794385910034, "learning_rate": 2.1616144557966732e-06, "loss": 0.0305, "step": 2560 }, { "epoch": 0.04340189819975006, "grad_norm": 1.2827001810073853, "learning_rate": 2.170058262264629e-06, "loss": 0.042, "step": 2570 }, { "epoch": 0.043570777181072044, "grad_norm": 1.9396741390228271, "learning_rate": 2.1785020687325847e-06, "loss": 0.0455, "step": 2580 }, { "epoch": 0.04373965616239403, "grad_norm": 2.108351469039917, "learning_rate": 2.186945875200541e-06, "loss": 0.0383, "step": 2590 }, { "epoch": 0.04390853514371601, "grad_norm": 1.5489214658737183, "learning_rate": 2.1953896816684965e-06, "loss": 0.0448, "step": 2600 }, { "epoch": 0.044077414125037996, "grad_norm": 1.5643980503082275, "learning_rate": 2.203833488136452e-06, "loss": 0.0494, "step": 2610 }, { "epoch": 0.04424629310635998, "grad_norm": 2.522033929824829, "learning_rate": 2.2122772946044075e-06, "loss": 0.056, "step": 2620 }, { "epoch": 0.04441517208768197, "grad_norm": 1.8270297050476074, "learning_rate": 2.2207211010723637e-06, "loss": 0.0531, "step": 2630 }, { "epoch": 0.044584051069003955, "grad_norm": 1.4499754905700684, "learning_rate": 2.2291649075403194e-06, "loss": 0.0568, "step": 2640 }, { "epoch": 0.044752930050325934, "grad_norm": 1.2191816568374634, "learning_rate": 2.237608714008275e-06, "loss": 0.0386, "step": 2650 }, { "epoch": 0.04492180903164792, "grad_norm": 1.0781676769256592, "learning_rate": 2.246052520476231e-06, "loss": 0.0443, "step": 2660 }, { "epoch": 0.04509068801296991, "grad_norm": 1.3575494289398193, "learning_rate": 2.2544963269441866e-06, "loss": 0.0519, "step": 2670 }, { "epoch": 0.04525956699429189, "grad_norm": 1.3579012155532837, "learning_rate": 2.2629401334121423e-06, "loss": 0.0407, "step": 2680 }, { "epoch": 0.04542844597561387, "grad_norm": 2.418639659881592, "learning_rate": 2.2713839398800984e-06, "loss": 0.0499, "step": 2690 }, { "epoch": 0.04559732495693586, "grad_norm": 1.2846612930297852, "learning_rate": 2.279827746348054e-06, "loss": 0.0484, "step": 2700 }, { "epoch": 0.045766203938257845, "grad_norm": 1.309875249862671, "learning_rate": 2.2882715528160094e-06, "loss": 0.0408, "step": 2710 }, { "epoch": 0.04593508291957983, "grad_norm": 1.0566610097885132, "learning_rate": 2.296715359283965e-06, "loss": 0.0386, "step": 2720 }, { "epoch": 0.04610396190090181, "grad_norm": 1.7260029315948486, "learning_rate": 2.3051591657519213e-06, "loss": 0.0527, "step": 2730 }, { "epoch": 0.0462728408822238, "grad_norm": 1.9203428030014038, "learning_rate": 2.313602972219877e-06, "loss": 0.0439, "step": 2740 }, { "epoch": 0.04644171986354578, "grad_norm": 1.4393073320388794, "learning_rate": 2.3220467786878327e-06, "loss": 0.0433, "step": 2750 }, { "epoch": 0.04661059884486777, "grad_norm": 1.2651625871658325, "learning_rate": 2.3304905851557884e-06, "loss": 0.0551, "step": 2760 }, { "epoch": 0.046779477826189755, "grad_norm": 1.8014253377914429, "learning_rate": 2.338934391623744e-06, "loss": 0.0453, "step": 2770 }, { "epoch": 0.046948356807511735, "grad_norm": 1.7524049282073975, "learning_rate": 2.3473781980917e-06, "loss": 0.0436, "step": 2780 }, { "epoch": 0.04711723578883372, "grad_norm": 1.9497369527816772, "learning_rate": 2.355822004559656e-06, "loss": 0.0492, "step": 2790 }, { "epoch": 0.04728611477015571, "grad_norm": 1.2166839838027954, "learning_rate": 2.3642658110276113e-06, "loss": 0.0491, "step": 2800 }, { "epoch": 0.04745499375147769, "grad_norm": 1.23659086227417, "learning_rate": 2.372709617495567e-06, "loss": 0.0428, "step": 2810 }, { "epoch": 0.04762387273279967, "grad_norm": 1.4529292583465576, "learning_rate": 2.3811534239635227e-06, "loss": 0.0441, "step": 2820 }, { "epoch": 0.04779275171412166, "grad_norm": 2.0341522693634033, "learning_rate": 2.389597230431479e-06, "loss": 0.0499, "step": 2830 }, { "epoch": 0.047961630695443645, "grad_norm": 1.328629970550537, "learning_rate": 2.3980410368994346e-06, "loss": 0.0378, "step": 2840 }, { "epoch": 0.04813050967676563, "grad_norm": 1.3418527841567993, "learning_rate": 2.4064848433673903e-06, "loss": 0.0466, "step": 2850 }, { "epoch": 0.04829938865808762, "grad_norm": 1.2660189867019653, "learning_rate": 2.414928649835346e-06, "loss": 0.0511, "step": 2860 }, { "epoch": 0.0484682676394096, "grad_norm": 0.9920435547828674, "learning_rate": 2.4233724563033017e-06, "loss": 0.033, "step": 2870 }, { "epoch": 0.04863714662073158, "grad_norm": 1.2757459878921509, "learning_rate": 2.4318162627712575e-06, "loss": 0.0458, "step": 2880 }, { "epoch": 0.04880602560205357, "grad_norm": 1.546118974685669, "learning_rate": 2.440260069239213e-06, "loss": 0.0469, "step": 2890 }, { "epoch": 0.048974904583375556, "grad_norm": 1.1236437559127808, "learning_rate": 2.448703875707169e-06, "loss": 0.0511, "step": 2900 }, { "epoch": 0.049143783564697535, "grad_norm": 1.1466679573059082, "learning_rate": 2.4571476821751246e-06, "loss": 0.0454, "step": 2910 }, { "epoch": 0.04931266254601952, "grad_norm": 1.4250552654266357, "learning_rate": 2.4655914886430803e-06, "loss": 0.0505, "step": 2920 }, { "epoch": 0.04948154152734151, "grad_norm": 1.9872429370880127, "learning_rate": 2.4740352951110365e-06, "loss": 0.0432, "step": 2930 }, { "epoch": 0.049650420508663494, "grad_norm": 1.230780839920044, "learning_rate": 2.482479101578992e-06, "loss": 0.0579, "step": 2940 }, { "epoch": 0.04981929948998547, "grad_norm": 1.9713211059570312, "learning_rate": 2.490922908046948e-06, "loss": 0.0447, "step": 2950 }, { "epoch": 0.04998817847130746, "grad_norm": 1.5888829231262207, "learning_rate": 2.4993667145149036e-06, "loss": 0.047, "step": 2960 }, { "epoch": 0.050157057452629446, "grad_norm": 1.1290431022644043, "learning_rate": 2.507810520982859e-06, "loss": 0.0447, "step": 2970 }, { "epoch": 0.05032593643395143, "grad_norm": 1.471766710281372, "learning_rate": 2.516254327450815e-06, "loss": 0.0469, "step": 2980 }, { "epoch": 0.05049481541527342, "grad_norm": 1.6369303464889526, "learning_rate": 2.5246981339187708e-06, "loss": 0.0487, "step": 2990 }, { "epoch": 0.0506636943965954, "grad_norm": 2.5098061561584473, "learning_rate": 2.5331419403867265e-06, "loss": 0.0559, "step": 3000 }, { "epoch": 0.050832573377917384, "grad_norm": 1.1351569890975952, "learning_rate": 2.541585746854682e-06, "loss": 0.0391, "step": 3010 }, { "epoch": 0.05100145235923937, "grad_norm": 1.0536853075027466, "learning_rate": 2.5500295533226383e-06, "loss": 0.0388, "step": 3020 }, { "epoch": 0.051170331340561356, "grad_norm": 1.4086560010910034, "learning_rate": 2.5584733597905936e-06, "loss": 0.0478, "step": 3030 }, { "epoch": 0.051339210321883336, "grad_norm": 3.0169553756713867, "learning_rate": 2.5669171662585498e-06, "loss": 0.0386, "step": 3040 }, { "epoch": 0.05150808930320532, "grad_norm": 1.3086912631988525, "learning_rate": 2.5753609727265055e-06, "loss": 0.0477, "step": 3050 }, { "epoch": 0.05167696828452731, "grad_norm": 1.1072076559066772, "learning_rate": 2.5838047791944608e-06, "loss": 0.0427, "step": 3060 }, { "epoch": 0.051845847265849294, "grad_norm": 2.252181053161621, "learning_rate": 2.592248585662417e-06, "loss": 0.0402, "step": 3070 }, { "epoch": 0.052014726247171274, "grad_norm": 1.5490436553955078, "learning_rate": 2.6006923921303726e-06, "loss": 0.0334, "step": 3080 }, { "epoch": 0.05218360522849326, "grad_norm": 1.103030800819397, "learning_rate": 2.6091361985983284e-06, "loss": 0.0407, "step": 3090 }, { "epoch": 0.052352484209815246, "grad_norm": 1.4749923944473267, "learning_rate": 2.617580005066284e-06, "loss": 0.0416, "step": 3100 }, { "epoch": 0.05252136319113723, "grad_norm": 1.2113434076309204, "learning_rate": 2.6260238115342398e-06, "loss": 0.0408, "step": 3110 }, { "epoch": 0.05269024217245922, "grad_norm": 1.0980024337768555, "learning_rate": 2.6344676180021955e-06, "loss": 0.0477, "step": 3120 }, { "epoch": 0.0528591211537812, "grad_norm": 1.110611081123352, "learning_rate": 2.6429114244701516e-06, "loss": 0.0474, "step": 3130 }, { "epoch": 0.053028000135103184, "grad_norm": 1.4007654190063477, "learning_rate": 2.651355230938107e-06, "loss": 0.0382, "step": 3140 }, { "epoch": 0.05319687911642517, "grad_norm": 1.486162781715393, "learning_rate": 2.6597990374060627e-06, "loss": 0.0411, "step": 3150 }, { "epoch": 0.05336575809774716, "grad_norm": 0.6783000826835632, "learning_rate": 2.668242843874019e-06, "loss": 0.0372, "step": 3160 }, { "epoch": 0.053534637079069136, "grad_norm": 1.3188930749893188, "learning_rate": 2.676686650341974e-06, "loss": 0.0413, "step": 3170 }, { "epoch": 0.05370351606039112, "grad_norm": 0.9572204351425171, "learning_rate": 2.6851304568099302e-06, "loss": 0.0321, "step": 3180 }, { "epoch": 0.05387239504171311, "grad_norm": 1.444933295249939, "learning_rate": 2.693574263277886e-06, "loss": 0.0407, "step": 3190 }, { "epoch": 0.054041274023035095, "grad_norm": 1.888963222503662, "learning_rate": 2.7020180697458417e-06, "loss": 0.0469, "step": 3200 }, { "epoch": 0.05421015300435708, "grad_norm": 1.5982540845870972, "learning_rate": 2.7104618762137974e-06, "loss": 0.0384, "step": 3210 }, { "epoch": 0.05437903198567906, "grad_norm": 1.590179681777954, "learning_rate": 2.7189056826817535e-06, "loss": 0.0407, "step": 3220 }, { "epoch": 0.05454791096700105, "grad_norm": 1.4853049516677856, "learning_rate": 2.727349489149709e-06, "loss": 0.0449, "step": 3230 }, { "epoch": 0.05471678994832303, "grad_norm": 1.445435643196106, "learning_rate": 2.7357932956176645e-06, "loss": 0.0492, "step": 3240 }, { "epoch": 0.05488566892964502, "grad_norm": 1.2493444681167603, "learning_rate": 2.7442371020856207e-06, "loss": 0.0344, "step": 3250 }, { "epoch": 0.055054547910967, "grad_norm": 1.0682129859924316, "learning_rate": 2.752680908553576e-06, "loss": 0.044, "step": 3260 }, { "epoch": 0.055223426892288985, "grad_norm": 1.3165730237960815, "learning_rate": 2.761124715021532e-06, "loss": 0.0392, "step": 3270 }, { "epoch": 0.05539230587361097, "grad_norm": 1.340289831161499, "learning_rate": 2.769568521489488e-06, "loss": 0.0382, "step": 3280 }, { "epoch": 0.05556118485493296, "grad_norm": 1.6471136808395386, "learning_rate": 2.7780123279574435e-06, "loss": 0.031, "step": 3290 }, { "epoch": 0.05573006383625494, "grad_norm": 1.063820719718933, "learning_rate": 2.7864561344253993e-06, "loss": 0.0345, "step": 3300 }, { "epoch": 0.05589894281757692, "grad_norm": 1.2836464643478394, "learning_rate": 2.7948999408933545e-06, "loss": 0.037, "step": 3310 }, { "epoch": 0.05606782179889891, "grad_norm": 1.4748398065567017, "learning_rate": 2.8033437473613107e-06, "loss": 0.0404, "step": 3320 }, { "epoch": 0.056236700780220895, "grad_norm": 0.8889347910881042, "learning_rate": 2.8117875538292664e-06, "loss": 0.0372, "step": 3330 }, { "epoch": 0.05640557976154288, "grad_norm": 1.31154465675354, "learning_rate": 2.820231360297222e-06, "loss": 0.0389, "step": 3340 }, { "epoch": 0.05657445874286486, "grad_norm": 1.3657736778259277, "learning_rate": 2.828675166765178e-06, "loss": 0.0433, "step": 3350 }, { "epoch": 0.05674333772418685, "grad_norm": 1.4111806154251099, "learning_rate": 2.837118973233134e-06, "loss": 0.0412, "step": 3360 }, { "epoch": 0.056912216705508833, "grad_norm": 1.3018723726272583, "learning_rate": 2.8455627797010893e-06, "loss": 0.0471, "step": 3370 }, { "epoch": 0.05708109568683082, "grad_norm": 0.9619355201721191, "learning_rate": 2.8540065861690454e-06, "loss": 0.0462, "step": 3380 }, { "epoch": 0.0572499746681528, "grad_norm": 1.2722628116607666, "learning_rate": 2.862450392637001e-06, "loss": 0.0348, "step": 3390 }, { "epoch": 0.057418853649474785, "grad_norm": 1.1755346059799194, "learning_rate": 2.8708941991049564e-06, "loss": 0.0443, "step": 3400 }, { "epoch": 0.05758773263079677, "grad_norm": 1.5047011375427246, "learning_rate": 2.8793380055729126e-06, "loss": 0.0337, "step": 3410 }, { "epoch": 0.05775661161211876, "grad_norm": 1.0099011659622192, "learning_rate": 2.8877818120408687e-06, "loss": 0.0416, "step": 3420 }, { "epoch": 0.05792549059344074, "grad_norm": 1.2063549757003784, "learning_rate": 2.896225618508824e-06, "loss": 0.0395, "step": 3430 }, { "epoch": 0.05809436957476272, "grad_norm": 0.6962475180625916, "learning_rate": 2.9046694249767797e-06, "loss": 0.04, "step": 3440 }, { "epoch": 0.05826324855608471, "grad_norm": 1.7103729248046875, "learning_rate": 2.913113231444736e-06, "loss": 0.0339, "step": 3450 }, { "epoch": 0.058432127537406696, "grad_norm": 1.1187158823013306, "learning_rate": 2.921557037912691e-06, "loss": 0.0354, "step": 3460 }, { "epoch": 0.05860100651872868, "grad_norm": 1.716038465499878, "learning_rate": 2.9300008443806473e-06, "loss": 0.0415, "step": 3470 }, { "epoch": 0.05876988550005066, "grad_norm": 0.8300755023956299, "learning_rate": 2.9384446508486026e-06, "loss": 0.035, "step": 3480 }, { "epoch": 0.05893876448137265, "grad_norm": 1.4077914953231812, "learning_rate": 2.9468884573165583e-06, "loss": 0.0321, "step": 3490 }, { "epoch": 0.059107643462694634, "grad_norm": 1.3664056062698364, "learning_rate": 2.9553322637845144e-06, "loss": 0.038, "step": 3500 }, { "epoch": 0.05927652244401662, "grad_norm": 0.7854644656181335, "learning_rate": 2.9637760702524697e-06, "loss": 0.0289, "step": 3510 }, { "epoch": 0.0594454014253386, "grad_norm": 0.7113710641860962, "learning_rate": 2.972219876720426e-06, "loss": 0.0373, "step": 3520 }, { "epoch": 0.059614280406660586, "grad_norm": 0.6057510375976562, "learning_rate": 2.9806636831883816e-06, "loss": 0.0357, "step": 3530 }, { "epoch": 0.05978315938798257, "grad_norm": 1.5479621887207031, "learning_rate": 2.9891074896563373e-06, "loss": 0.0358, "step": 3540 }, { "epoch": 0.05995203836930456, "grad_norm": 1.3861747980117798, "learning_rate": 2.997551296124293e-06, "loss": 0.0352, "step": 3550 }, { "epoch": 0.06012091735062654, "grad_norm": 1.5647327899932861, "learning_rate": 3.005995102592249e-06, "loss": 0.0388, "step": 3560 }, { "epoch": 0.060289796331948524, "grad_norm": 1.495255470275879, "learning_rate": 3.0144389090602045e-06, "loss": 0.0339, "step": 3570 }, { "epoch": 0.06045867531327051, "grad_norm": 0.9999911785125732, "learning_rate": 3.02288271552816e-06, "loss": 0.0325, "step": 3580 }, { "epoch": 0.060627554294592496, "grad_norm": 1.0844345092773438, "learning_rate": 3.0313265219961163e-06, "loss": 0.0405, "step": 3590 }, { "epoch": 0.06079643327591448, "grad_norm": 1.4192538261413574, "learning_rate": 3.0397703284640716e-06, "loss": 0.0404, "step": 3600 }, { "epoch": 0.06096531225723646, "grad_norm": 1.6723047494888306, "learning_rate": 3.0482141349320277e-06, "loss": 0.0434, "step": 3610 }, { "epoch": 0.06113419123855845, "grad_norm": 1.4502129554748535, "learning_rate": 3.0566579413999835e-06, "loss": 0.0468, "step": 3620 }, { "epoch": 0.061303070219880434, "grad_norm": 1.9646533727645874, "learning_rate": 3.065101747867939e-06, "loss": 0.038, "step": 3630 }, { "epoch": 0.06147194920120242, "grad_norm": 1.6549303531646729, "learning_rate": 3.073545554335895e-06, "loss": 0.0359, "step": 3640 }, { "epoch": 0.0616408281825244, "grad_norm": 0.9417336583137512, "learning_rate": 3.08198936080385e-06, "loss": 0.0489, "step": 3650 }, { "epoch": 0.061809707163846386, "grad_norm": 1.076530933380127, "learning_rate": 3.0904331672718063e-06, "loss": 0.0267, "step": 3660 }, { "epoch": 0.06197858614516837, "grad_norm": 0.8277405500411987, "learning_rate": 3.0988769737397625e-06, "loss": 0.0347, "step": 3670 }, { "epoch": 0.06214746512649036, "grad_norm": 1.1691596508026123, "learning_rate": 3.1073207802077178e-06, "loss": 0.0386, "step": 3680 }, { "epoch": 0.062316344107812345, "grad_norm": 1.1090384721755981, "learning_rate": 3.1157645866756735e-06, "loss": 0.047, "step": 3690 }, { "epoch": 0.062485223089134324, "grad_norm": 0.8020485639572144, "learning_rate": 3.1242083931436296e-06, "loss": 0.0399, "step": 3700 }, { "epoch": 0.06265410207045631, "grad_norm": 1.239336609840393, "learning_rate": 3.132652199611585e-06, "loss": 0.0264, "step": 3710 }, { "epoch": 0.06282298105177829, "grad_norm": 1.1300865411758423, "learning_rate": 3.141096006079541e-06, "loss": 0.0482, "step": 3720 }, { "epoch": 0.06299186003310028, "grad_norm": 1.2585505247116089, "learning_rate": 3.1495398125474968e-06, "loss": 0.0349, "step": 3730 }, { "epoch": 0.06316073901442226, "grad_norm": 1.029784917831421, "learning_rate": 3.157983619015452e-06, "loss": 0.0328, "step": 3740 }, { "epoch": 0.06332961799574426, "grad_norm": 1.1722172498703003, "learning_rate": 3.166427425483408e-06, "loss": 0.0374, "step": 3750 }, { "epoch": 0.06349849697706623, "grad_norm": 1.0654218196868896, "learning_rate": 3.1748712319513643e-06, "loss": 0.0405, "step": 3760 }, { "epoch": 0.06366737595838821, "grad_norm": 2.3727657794952393, "learning_rate": 3.1833150384193196e-06, "loss": 0.0384, "step": 3770 }, { "epoch": 0.06383625493971021, "grad_norm": 0.7092825770378113, "learning_rate": 3.1917588448872754e-06, "loss": 0.0408, "step": 3780 }, { "epoch": 0.06400513392103219, "grad_norm": 0.9577116966247559, "learning_rate": 3.2002026513552315e-06, "loss": 0.0306, "step": 3790 }, { "epoch": 0.06417401290235417, "grad_norm": 1.9324092864990234, "learning_rate": 3.2086464578231868e-06, "loss": 0.0555, "step": 3800 }, { "epoch": 0.06434289188367616, "grad_norm": 1.7745249271392822, "learning_rate": 3.217090264291143e-06, "loss": 0.0352, "step": 3810 }, { "epoch": 0.06451177086499814, "grad_norm": 1.0225039720535278, "learning_rate": 3.2255340707590982e-06, "loss": 0.0398, "step": 3820 }, { "epoch": 0.06468064984632013, "grad_norm": 0.7767083048820496, "learning_rate": 3.233977877227054e-06, "loss": 0.0392, "step": 3830 }, { "epoch": 0.06484952882764211, "grad_norm": 0.8499109148979187, "learning_rate": 3.24242168369501e-06, "loss": 0.0325, "step": 3840 }, { "epoch": 0.06501840780896409, "grad_norm": 2.0226261615753174, "learning_rate": 3.2508654901629654e-06, "loss": 0.047, "step": 3850 }, { "epoch": 0.06518728679028608, "grad_norm": 1.387879729270935, "learning_rate": 3.2593092966309215e-06, "loss": 0.0419, "step": 3860 }, { "epoch": 0.06535616577160806, "grad_norm": 1.6518296003341675, "learning_rate": 3.2677531030988772e-06, "loss": 0.0386, "step": 3870 }, { "epoch": 0.06552504475293006, "grad_norm": 0.9880000352859497, "learning_rate": 3.276196909566833e-06, "loss": 0.0495, "step": 3880 }, { "epoch": 0.06569392373425204, "grad_norm": 1.4660706520080566, "learning_rate": 3.2846407160347887e-06, "loss": 0.0346, "step": 3890 }, { "epoch": 0.06586280271557401, "grad_norm": 1.0483763217926025, "learning_rate": 3.293084522502745e-06, "loss": 0.0412, "step": 3900 }, { "epoch": 0.06603168169689601, "grad_norm": 1.4879200458526611, "learning_rate": 3.3015283289707e-06, "loss": 0.0329, "step": 3910 }, { "epoch": 0.06620056067821799, "grad_norm": 1.3209527730941772, "learning_rate": 3.3099721354386562e-06, "loss": 0.0347, "step": 3920 }, { "epoch": 0.06636943965953998, "grad_norm": 1.257446527481079, "learning_rate": 3.318415941906612e-06, "loss": 0.0386, "step": 3930 }, { "epoch": 0.06653831864086196, "grad_norm": 1.08904230594635, "learning_rate": 3.3268597483745672e-06, "loss": 0.0351, "step": 3940 }, { "epoch": 0.06670719762218394, "grad_norm": 0.8985928893089294, "learning_rate": 3.3353035548425234e-06, "loss": 0.0435, "step": 3950 }, { "epoch": 0.06687607660350593, "grad_norm": 1.0589429140090942, "learning_rate": 3.343747361310479e-06, "loss": 0.0325, "step": 3960 }, { "epoch": 0.06704495558482791, "grad_norm": 1.2211490869522095, "learning_rate": 3.352191167778435e-06, "loss": 0.0394, "step": 3970 }, { "epoch": 0.06721383456614989, "grad_norm": 2.073185443878174, "learning_rate": 3.3606349742463905e-06, "loss": 0.0434, "step": 3980 }, { "epoch": 0.06738271354747188, "grad_norm": 0.997965931892395, "learning_rate": 3.3690787807143467e-06, "loss": 0.0467, "step": 3990 }, { "epoch": 0.06755159252879386, "grad_norm": 1.2670109272003174, "learning_rate": 3.377522587182302e-06, "loss": 0.028, "step": 4000 }, { "epoch": 0.06772047151011586, "grad_norm": 1.550099492073059, "learning_rate": 3.385966393650258e-06, "loss": 0.0355, "step": 4010 }, { "epoch": 0.06788935049143784, "grad_norm": 1.5806294679641724, "learning_rate": 3.3944102001182134e-06, "loss": 0.0322, "step": 4020 }, { "epoch": 0.06805822947275982, "grad_norm": 1.3001365661621094, "learning_rate": 3.402854006586169e-06, "loss": 0.0366, "step": 4030 }, { "epoch": 0.06822710845408181, "grad_norm": 0.9627389311790466, "learning_rate": 3.4112978130541253e-06, "loss": 0.0326, "step": 4040 }, { "epoch": 0.06839598743540379, "grad_norm": 1.2563292980194092, "learning_rate": 3.4197416195220806e-06, "loss": 0.0289, "step": 4050 }, { "epoch": 0.06856486641672578, "grad_norm": 1.5162264108657837, "learning_rate": 3.4281854259900367e-06, "loss": 0.0364, "step": 4060 }, { "epoch": 0.06873374539804776, "grad_norm": 1.0253645181655884, "learning_rate": 3.4366292324579924e-06, "loss": 0.0369, "step": 4070 }, { "epoch": 0.06890262437936974, "grad_norm": 2.18703031539917, "learning_rate": 3.4450730389259477e-06, "loss": 0.0484, "step": 4080 }, { "epoch": 0.06907150336069173, "grad_norm": 0.791836142539978, "learning_rate": 3.453516845393904e-06, "loss": 0.0354, "step": 4090 }, { "epoch": 0.06924038234201371, "grad_norm": 1.7538957595825195, "learning_rate": 3.46196065186186e-06, "loss": 0.0448, "step": 4100 }, { "epoch": 0.06940926132333569, "grad_norm": 0.877981424331665, "learning_rate": 3.4704044583298153e-06, "loss": 0.0343, "step": 4110 }, { "epoch": 0.06957814030465768, "grad_norm": 1.0726875066757202, "learning_rate": 3.478848264797771e-06, "loss": 0.0376, "step": 4120 }, { "epoch": 0.06974701928597966, "grad_norm": 0.965667188167572, "learning_rate": 3.487292071265727e-06, "loss": 0.0391, "step": 4130 }, { "epoch": 0.06991589826730166, "grad_norm": 1.931389570236206, "learning_rate": 3.4957358777336824e-06, "loss": 0.0382, "step": 4140 }, { "epoch": 0.07008477724862364, "grad_norm": 1.253759503364563, "learning_rate": 3.5041796842016386e-06, "loss": 0.0307, "step": 4150 }, { "epoch": 0.07025365622994562, "grad_norm": 0.8634681105613708, "learning_rate": 3.5126234906695943e-06, "loss": 0.0346, "step": 4160 }, { "epoch": 0.07042253521126761, "grad_norm": 1.077595829963684, "learning_rate": 3.52106729713755e-06, "loss": 0.0418, "step": 4170 }, { "epoch": 0.07059141419258959, "grad_norm": 0.8953436017036438, "learning_rate": 3.5295111036055057e-06, "loss": 0.0461, "step": 4180 }, { "epoch": 0.07076029317391158, "grad_norm": 1.4149075746536255, "learning_rate": 3.537954910073461e-06, "loss": 0.035, "step": 4190 }, { "epoch": 0.07092917215523356, "grad_norm": 1.2377710342407227, "learning_rate": 3.546398716541417e-06, "loss": 0.0461, "step": 4200 }, { "epoch": 0.07109805113655554, "grad_norm": 1.382753849029541, "learning_rate": 3.554842523009373e-06, "loss": 0.0346, "step": 4210 }, { "epoch": 0.07126693011787753, "grad_norm": 0.8931282162666321, "learning_rate": 3.5632863294773286e-06, "loss": 0.0391, "step": 4220 }, { "epoch": 0.07143580909919951, "grad_norm": 1.3630508184432983, "learning_rate": 3.5717301359452843e-06, "loss": 0.048, "step": 4230 }, { "epoch": 0.07160468808052149, "grad_norm": 0.677607536315918, "learning_rate": 3.5801739424132404e-06, "loss": 0.04, "step": 4240 }, { "epoch": 0.07177356706184349, "grad_norm": 0.7360579371452332, "learning_rate": 3.5886177488811957e-06, "loss": 0.0443, "step": 4250 }, { "epoch": 0.07194244604316546, "grad_norm": 1.412963628768921, "learning_rate": 3.597061555349152e-06, "loss": 0.0343, "step": 4260 }, { "epoch": 0.07211132502448746, "grad_norm": 1.404305100440979, "learning_rate": 3.6055053618171076e-06, "loss": 0.0374, "step": 4270 }, { "epoch": 0.07228020400580944, "grad_norm": 1.1884757280349731, "learning_rate": 3.613949168285063e-06, "loss": 0.0384, "step": 4280 }, { "epoch": 0.07244908298713142, "grad_norm": 1.0378539562225342, "learning_rate": 3.622392974753019e-06, "loss": 0.0385, "step": 4290 }, { "epoch": 0.07261796196845341, "grad_norm": 1.0136774778366089, "learning_rate": 3.6308367812209747e-06, "loss": 0.0316, "step": 4300 }, { "epoch": 0.07278684094977539, "grad_norm": 0.9616747498512268, "learning_rate": 3.6392805876889305e-06, "loss": 0.0358, "step": 4310 }, { "epoch": 0.07295571993109738, "grad_norm": 0.64796382188797, "learning_rate": 3.647724394156886e-06, "loss": 0.0316, "step": 4320 }, { "epoch": 0.07312459891241936, "grad_norm": 0.8659948706626892, "learning_rate": 3.6561682006248423e-06, "loss": 0.0413, "step": 4330 }, { "epoch": 0.07329347789374134, "grad_norm": 0.8856196403503418, "learning_rate": 3.6646120070927976e-06, "loss": 0.034, "step": 4340 }, { "epoch": 0.07346235687506333, "grad_norm": 0.9550674557685852, "learning_rate": 3.6730558135607538e-06, "loss": 0.0314, "step": 4350 }, { "epoch": 0.07363123585638531, "grad_norm": 0.9058743715286255, "learning_rate": 3.681499620028709e-06, "loss": 0.0362, "step": 4360 }, { "epoch": 0.07380011483770729, "grad_norm": 1.113545298576355, "learning_rate": 3.6899434264966648e-06, "loss": 0.0379, "step": 4370 }, { "epoch": 0.07396899381902929, "grad_norm": 1.0566591024398804, "learning_rate": 3.698387232964621e-06, "loss": 0.0436, "step": 4380 }, { "epoch": 0.07413787280035126, "grad_norm": 0.9145056009292603, "learning_rate": 3.706831039432576e-06, "loss": 0.0386, "step": 4390 }, { "epoch": 0.07430675178167326, "grad_norm": 0.8822766542434692, "learning_rate": 3.7152748459005323e-06, "loss": 0.0367, "step": 4400 }, { "epoch": 0.07447563076299524, "grad_norm": 1.2156240940093994, "learning_rate": 3.723718652368488e-06, "loss": 0.0261, "step": 4410 }, { "epoch": 0.07464450974431722, "grad_norm": 0.9196016192436218, "learning_rate": 3.7321624588364438e-06, "loss": 0.0272, "step": 4420 }, { "epoch": 0.07481338872563921, "grad_norm": 0.6716427206993103, "learning_rate": 3.7406062653043995e-06, "loss": 0.0347, "step": 4430 }, { "epoch": 0.07498226770696119, "grad_norm": 1.185735821723938, "learning_rate": 3.7490500717723556e-06, "loss": 0.0378, "step": 4440 }, { "epoch": 0.07515114668828318, "grad_norm": 0.7145845293998718, "learning_rate": 3.757493878240311e-06, "loss": 0.037, "step": 4450 }, { "epoch": 0.07532002566960516, "grad_norm": 1.1240606307983398, "learning_rate": 3.7659376847082666e-06, "loss": 0.0438, "step": 4460 }, { "epoch": 0.07548890465092714, "grad_norm": 0.8652691841125488, "learning_rate": 3.7743814911762228e-06, "loss": 0.03, "step": 4470 }, { "epoch": 0.07565778363224913, "grad_norm": 1.0248202085494995, "learning_rate": 3.782825297644178e-06, "loss": 0.0546, "step": 4480 }, { "epoch": 0.07582666261357111, "grad_norm": 1.4976266622543335, "learning_rate": 3.791269104112134e-06, "loss": 0.0308, "step": 4490 }, { "epoch": 0.07599554159489309, "grad_norm": 1.1704972982406616, "learning_rate": 3.79971291058009e-06, "loss": 0.0322, "step": 4500 }, { "epoch": 0.07616442057621509, "grad_norm": 1.110110878944397, "learning_rate": 3.8081567170480456e-06, "loss": 0.03, "step": 4510 }, { "epoch": 0.07633329955753707, "grad_norm": 1.212935209274292, "learning_rate": 3.816600523516002e-06, "loss": 0.0361, "step": 4520 }, { "epoch": 0.07650217853885906, "grad_norm": 0.9736179113388062, "learning_rate": 3.825044329983957e-06, "loss": 0.0303, "step": 4530 }, { "epoch": 0.07667105752018104, "grad_norm": 1.010158658027649, "learning_rate": 3.833488136451912e-06, "loss": 0.0326, "step": 4540 }, { "epoch": 0.07683993650150302, "grad_norm": 1.191083312034607, "learning_rate": 3.841931942919869e-06, "loss": 0.0385, "step": 4550 }, { "epoch": 0.07700881548282501, "grad_norm": 0.9775597453117371, "learning_rate": 3.850375749387824e-06, "loss": 0.0325, "step": 4560 }, { "epoch": 0.07717769446414699, "grad_norm": 1.1655429601669312, "learning_rate": 3.85881955585578e-06, "loss": 0.0316, "step": 4570 }, { "epoch": 0.07734657344546898, "grad_norm": 1.221381664276123, "learning_rate": 3.867263362323736e-06, "loss": 0.0307, "step": 4580 }, { "epoch": 0.07751545242679096, "grad_norm": 0.9984418153762817, "learning_rate": 3.875707168791692e-06, "loss": 0.0257, "step": 4590 }, { "epoch": 0.07768433140811294, "grad_norm": 1.0637917518615723, "learning_rate": 3.8841509752596475e-06, "loss": 0.0276, "step": 4600 }, { "epoch": 0.07785321038943493, "grad_norm": 1.5251665115356445, "learning_rate": 3.892594781727603e-06, "loss": 0.0312, "step": 4610 }, { "epoch": 0.07802208937075691, "grad_norm": 1.0923829078674316, "learning_rate": 3.901038588195559e-06, "loss": 0.0398, "step": 4620 }, { "epoch": 0.0781909683520789, "grad_norm": 1.5831670761108398, "learning_rate": 3.909482394663515e-06, "loss": 0.0323, "step": 4630 }, { "epoch": 0.07835984733340089, "grad_norm": 1.174458384513855, "learning_rate": 3.91792620113147e-06, "loss": 0.0325, "step": 4640 }, { "epoch": 0.07852872631472287, "grad_norm": 0.9345521330833435, "learning_rate": 3.926370007599426e-06, "loss": 0.0307, "step": 4650 }, { "epoch": 0.07869760529604486, "grad_norm": 1.266530990600586, "learning_rate": 3.934813814067382e-06, "loss": 0.0402, "step": 4660 }, { "epoch": 0.07886648427736684, "grad_norm": 1.402368426322937, "learning_rate": 3.9432576205353375e-06, "loss": 0.033, "step": 4670 }, { "epoch": 0.07903536325868882, "grad_norm": 1.0880115032196045, "learning_rate": 3.951701427003293e-06, "loss": 0.0364, "step": 4680 }, { "epoch": 0.07920424224001081, "grad_norm": 1.0289287567138672, "learning_rate": 3.960145233471249e-06, "loss": 0.0333, "step": 4690 }, { "epoch": 0.07937312122133279, "grad_norm": 0.644823431968689, "learning_rate": 3.9685890399392055e-06, "loss": 0.0411, "step": 4700 }, { "epoch": 0.07954200020265478, "grad_norm": 1.1915676593780518, "learning_rate": 3.97703284640716e-06, "loss": 0.0376, "step": 4710 }, { "epoch": 0.07971087918397676, "grad_norm": 1.119989037513733, "learning_rate": 3.985476652875116e-06, "loss": 0.0453, "step": 4720 }, { "epoch": 0.07987975816529874, "grad_norm": 0.7749732732772827, "learning_rate": 3.993920459343072e-06, "loss": 0.0257, "step": 4730 }, { "epoch": 0.08004863714662074, "grad_norm": 1.2472307682037354, "learning_rate": 4.0023642658110276e-06, "loss": 0.0353, "step": 4740 }, { "epoch": 0.08021751612794271, "grad_norm": 0.6984288096427917, "learning_rate": 4.010808072278984e-06, "loss": 0.0299, "step": 4750 }, { "epoch": 0.08038639510926471, "grad_norm": 1.3735345602035522, "learning_rate": 4.019251878746939e-06, "loss": 0.0329, "step": 4760 }, { "epoch": 0.08055527409058669, "grad_norm": 0.6760706901550293, "learning_rate": 4.0276956852148956e-06, "loss": 0.0295, "step": 4770 }, { "epoch": 0.08072415307190867, "grad_norm": 0.8599599003791809, "learning_rate": 4.036139491682851e-06, "loss": 0.0294, "step": 4780 }, { "epoch": 0.08089303205323066, "grad_norm": 0.933626651763916, "learning_rate": 4.044583298150806e-06, "loss": 0.0277, "step": 4790 }, { "epoch": 0.08106191103455264, "grad_norm": 0.8998093008995056, "learning_rate": 4.053027104618763e-06, "loss": 0.0319, "step": 4800 }, { "epoch": 0.08123079001587462, "grad_norm": 1.0193257331848145, "learning_rate": 4.061470911086718e-06, "loss": 0.037, "step": 4810 }, { "epoch": 0.08139966899719661, "grad_norm": 2.009052276611328, "learning_rate": 4.069914717554674e-06, "loss": 0.0364, "step": 4820 }, { "epoch": 0.08156854797851859, "grad_norm": 0.5936148762702942, "learning_rate": 4.07835852402263e-06, "loss": 0.0345, "step": 4830 }, { "epoch": 0.08173742695984058, "grad_norm": 1.1631348133087158, "learning_rate": 4.0868023304905856e-06, "loss": 0.0345, "step": 4840 }, { "epoch": 0.08190630594116256, "grad_norm": 1.0821152925491333, "learning_rate": 4.095246136958541e-06, "loss": 0.0262, "step": 4850 }, { "epoch": 0.08207518492248454, "grad_norm": 1.0656628608703613, "learning_rate": 4.103689943426497e-06, "loss": 0.0299, "step": 4860 }, { "epoch": 0.08224406390380654, "grad_norm": 0.94742351770401, "learning_rate": 4.112133749894453e-06, "loss": 0.0533, "step": 4870 }, { "epoch": 0.08241294288512852, "grad_norm": 0.9988957643508911, "learning_rate": 4.1205775563624084e-06, "loss": 0.0283, "step": 4880 }, { "epoch": 0.08258182186645051, "grad_norm": 1.1617743968963623, "learning_rate": 4.129021362830364e-06, "loss": 0.0448, "step": 4890 }, { "epoch": 0.08275070084777249, "grad_norm": 1.1771855354309082, "learning_rate": 4.13746516929832e-06, "loss": 0.0367, "step": 4900 }, { "epoch": 0.08291957982909447, "grad_norm": 0.5939379930496216, "learning_rate": 4.145908975766276e-06, "loss": 0.0443, "step": 4910 }, { "epoch": 0.08308845881041646, "grad_norm": 1.7286912202835083, "learning_rate": 4.154352782234231e-06, "loss": 0.0378, "step": 4920 }, { "epoch": 0.08325733779173844, "grad_norm": 0.8446910977363586, "learning_rate": 4.162796588702187e-06, "loss": 0.0416, "step": 4930 }, { "epoch": 0.08342621677306042, "grad_norm": 1.161563754081726, "learning_rate": 4.171240395170143e-06, "loss": 0.0354, "step": 4940 }, { "epoch": 0.08359509575438241, "grad_norm": 1.051945447921753, "learning_rate": 4.179684201638099e-06, "loss": 0.0342, "step": 4950 }, { "epoch": 0.08376397473570439, "grad_norm": 0.8371422290802002, "learning_rate": 4.188128008106054e-06, "loss": 0.0295, "step": 4960 }, { "epoch": 0.08393285371702638, "grad_norm": 1.0762720108032227, "learning_rate": 4.19657181457401e-06, "loss": 0.0327, "step": 4970 }, { "epoch": 0.08410173269834836, "grad_norm": 0.8461849689483643, "learning_rate": 4.2050156210419665e-06, "loss": 0.0316, "step": 4980 }, { "epoch": 0.08427061167967034, "grad_norm": 0.90746009349823, "learning_rate": 4.213459427509921e-06, "loss": 0.0366, "step": 4990 }, { "epoch": 0.08443949066099234, "grad_norm": 1.1432992219924927, "learning_rate": 4.221903233977878e-06, "loss": 0.0274, "step": 5000 }, { "epoch": 0.08460836964231432, "grad_norm": 1.1541163921356201, "learning_rate": 4.230347040445834e-06, "loss": 0.0244, "step": 5010 }, { "epoch": 0.08477724862363631, "grad_norm": 1.0365073680877686, "learning_rate": 4.238790846913789e-06, "loss": 0.0385, "step": 5020 }, { "epoch": 0.08494612760495829, "grad_norm": 1.1780064105987549, "learning_rate": 4.247234653381745e-06, "loss": 0.0307, "step": 5030 }, { "epoch": 0.08511500658628027, "grad_norm": 1.133918046951294, "learning_rate": 4.255678459849701e-06, "loss": 0.0311, "step": 5040 }, { "epoch": 0.08528388556760226, "grad_norm": 1.1428512334823608, "learning_rate": 4.2641222663176565e-06, "loss": 0.0295, "step": 5050 }, { "epoch": 0.08545276454892424, "grad_norm": 0.803820788860321, "learning_rate": 4.272566072785612e-06, "loss": 0.0324, "step": 5060 }, { "epoch": 0.08562164353024622, "grad_norm": 0.7726360559463501, "learning_rate": 4.281009879253568e-06, "loss": 0.0364, "step": 5070 }, { "epoch": 0.08579052251156821, "grad_norm": 0.9378515481948853, "learning_rate": 4.289453685721524e-06, "loss": 0.0348, "step": 5080 }, { "epoch": 0.08595940149289019, "grad_norm": 0.7142356634140015, "learning_rate": 4.297897492189479e-06, "loss": 0.0379, "step": 5090 }, { "epoch": 0.08612828047421218, "grad_norm": 0.8294343948364258, "learning_rate": 4.306341298657435e-06, "loss": 0.0347, "step": 5100 }, { "epoch": 0.08629715945553416, "grad_norm": 0.8555284142494202, "learning_rate": 4.314785105125391e-06, "loss": 0.0313, "step": 5110 }, { "epoch": 0.08646603843685614, "grad_norm": 0.69459068775177, "learning_rate": 4.3232289115933465e-06, "loss": 0.0451, "step": 5120 }, { "epoch": 0.08663491741817814, "grad_norm": 0.46717333793640137, "learning_rate": 4.331672718061302e-06, "loss": 0.036, "step": 5130 }, { "epoch": 0.08680379639950012, "grad_norm": 0.8715049028396606, "learning_rate": 4.340116524529258e-06, "loss": 0.0315, "step": 5140 }, { "epoch": 0.08697267538082211, "grad_norm": 0.8892848491668701, "learning_rate": 4.3485603309972145e-06, "loss": 0.0253, "step": 5150 }, { "epoch": 0.08714155436214409, "grad_norm": 1.0383132696151733, "learning_rate": 4.357004137465169e-06, "loss": 0.0277, "step": 5160 }, { "epoch": 0.08731043334346607, "grad_norm": 0.9192862510681152, "learning_rate": 4.365447943933125e-06, "loss": 0.038, "step": 5170 }, { "epoch": 0.08747931232478806, "grad_norm": 0.5625611543655396, "learning_rate": 4.373891750401082e-06, "loss": 0.0403, "step": 5180 }, { "epoch": 0.08764819130611004, "grad_norm": 1.1519018411636353, "learning_rate": 4.3823355568690365e-06, "loss": 0.0285, "step": 5190 }, { "epoch": 0.08781707028743202, "grad_norm": 0.7443860769271851, "learning_rate": 4.390779363336993e-06, "loss": 0.0261, "step": 5200 }, { "epoch": 0.08798594926875401, "grad_norm": 1.0271004438400269, "learning_rate": 4.399223169804949e-06, "loss": 0.0323, "step": 5210 }, { "epoch": 0.08815482825007599, "grad_norm": 1.428999423980713, "learning_rate": 4.407666976272904e-06, "loss": 0.029, "step": 5220 }, { "epoch": 0.08832370723139799, "grad_norm": 0.7856512665748596, "learning_rate": 4.41611078274086e-06, "loss": 0.0269, "step": 5230 }, { "epoch": 0.08849258621271996, "grad_norm": 0.9490830302238464, "learning_rate": 4.424554589208815e-06, "loss": 0.0272, "step": 5240 }, { "epoch": 0.08866146519404194, "grad_norm": 1.3088481426239014, "learning_rate": 4.432998395676772e-06, "loss": 0.0312, "step": 5250 }, { "epoch": 0.08883034417536394, "grad_norm": 0.6195964217185974, "learning_rate": 4.441442202144727e-06, "loss": 0.0334, "step": 5260 }, { "epoch": 0.08899922315668592, "grad_norm": 0.990301787853241, "learning_rate": 4.449886008612683e-06, "loss": 0.0321, "step": 5270 }, { "epoch": 0.08916810213800791, "grad_norm": 1.2231767177581787, "learning_rate": 4.458329815080639e-06, "loss": 0.0407, "step": 5280 }, { "epoch": 0.08933698111932989, "grad_norm": 0.5485568046569824, "learning_rate": 4.4667736215485945e-06, "loss": 0.0295, "step": 5290 }, { "epoch": 0.08950586010065187, "grad_norm": 0.655379593372345, "learning_rate": 4.47521742801655e-06, "loss": 0.0368, "step": 5300 }, { "epoch": 0.08967473908197386, "grad_norm": 0.787260115146637, "learning_rate": 4.483661234484506e-06, "loss": 0.0301, "step": 5310 }, { "epoch": 0.08984361806329584, "grad_norm": 1.13280189037323, "learning_rate": 4.492105040952462e-06, "loss": 0.0403, "step": 5320 }, { "epoch": 0.09001249704461782, "grad_norm": 1.3725346326828003, "learning_rate": 4.500548847420417e-06, "loss": 0.0247, "step": 5330 }, { "epoch": 0.09018137602593981, "grad_norm": 0.7656813859939575, "learning_rate": 4.508992653888373e-06, "loss": 0.0385, "step": 5340 }, { "epoch": 0.09035025500726179, "grad_norm": 0.8323396444320679, "learning_rate": 4.517436460356329e-06, "loss": 0.0367, "step": 5350 }, { "epoch": 0.09051913398858379, "grad_norm": 0.8658256530761719, "learning_rate": 4.5258802668242845e-06, "loss": 0.0287, "step": 5360 }, { "epoch": 0.09068801296990577, "grad_norm": 1.1537574529647827, "learning_rate": 4.53432407329224e-06, "loss": 0.0385, "step": 5370 }, { "epoch": 0.09085689195122774, "grad_norm": 1.3440539836883545, "learning_rate": 4.542767879760197e-06, "loss": 0.0324, "step": 5380 }, { "epoch": 0.09102577093254974, "grad_norm": 1.1386476755142212, "learning_rate": 4.551211686228152e-06, "loss": 0.0357, "step": 5390 }, { "epoch": 0.09119464991387172, "grad_norm": 1.3374617099761963, "learning_rate": 4.559655492696108e-06, "loss": 0.0352, "step": 5400 }, { "epoch": 0.09136352889519371, "grad_norm": 1.2686468362808228, "learning_rate": 4.568099299164064e-06, "loss": 0.0374, "step": 5410 }, { "epoch": 0.09153240787651569, "grad_norm": 0.7710447311401367, "learning_rate": 4.576543105632019e-06, "loss": 0.0296, "step": 5420 }, { "epoch": 0.09170128685783767, "grad_norm": 0.7680432200431824, "learning_rate": 4.584986912099975e-06, "loss": 0.0392, "step": 5430 }, { "epoch": 0.09187016583915966, "grad_norm": 1.0128931999206543, "learning_rate": 4.59343071856793e-06, "loss": 0.0411, "step": 5440 }, { "epoch": 0.09203904482048164, "grad_norm": 0.6742813587188721, "learning_rate": 4.601874525035887e-06, "loss": 0.0332, "step": 5450 }, { "epoch": 0.09220792380180362, "grad_norm": 0.7573670148849487, "learning_rate": 4.6103183315038426e-06, "loss": 0.0383, "step": 5460 }, { "epoch": 0.09237680278312561, "grad_norm": 0.7057352066040039, "learning_rate": 4.618762137971797e-06, "loss": 0.0286, "step": 5470 }, { "epoch": 0.0925456817644476, "grad_norm": 1.0494682788848877, "learning_rate": 4.627205944439754e-06, "loss": 0.022, "step": 5480 }, { "epoch": 0.09271456074576959, "grad_norm": 1.5324232578277588, "learning_rate": 4.63564975090771e-06, "loss": 0.0316, "step": 5490 }, { "epoch": 0.09288343972709157, "grad_norm": 0.6551754474639893, "learning_rate": 4.644093557375665e-06, "loss": 0.028, "step": 5500 }, { "epoch": 0.09305231870841355, "grad_norm": 1.2128721475601196, "learning_rate": 4.652537363843621e-06, "loss": 0.0313, "step": 5510 }, { "epoch": 0.09322119768973554, "grad_norm": 1.1632741689682007, "learning_rate": 4.660981170311577e-06, "loss": 0.04, "step": 5520 }, { "epoch": 0.09339007667105752, "grad_norm": 0.8389279246330261, "learning_rate": 4.6694249767795326e-06, "loss": 0.044, "step": 5530 }, { "epoch": 0.09355895565237951, "grad_norm": 0.8050138354301453, "learning_rate": 4.677868783247488e-06, "loss": 0.0296, "step": 5540 }, { "epoch": 0.09372783463370149, "grad_norm": 0.8000451922416687, "learning_rate": 4.686312589715444e-06, "loss": 0.0322, "step": 5550 }, { "epoch": 0.09389671361502347, "grad_norm": 0.9331355094909668, "learning_rate": 4.6947563961834e-06, "loss": 0.0277, "step": 5560 }, { "epoch": 0.09406559259634546, "grad_norm": 0.753913402557373, "learning_rate": 4.7032002026513554e-06, "loss": 0.0248, "step": 5570 }, { "epoch": 0.09423447157766744, "grad_norm": 0.7416695952415466, "learning_rate": 4.711644009119312e-06, "loss": 0.0364, "step": 5580 }, { "epoch": 0.09440335055898944, "grad_norm": 0.6754153966903687, "learning_rate": 4.720087815587267e-06, "loss": 0.027, "step": 5590 }, { "epoch": 0.09457222954031141, "grad_norm": 0.8324164152145386, "learning_rate": 4.728531622055223e-06, "loss": 0.0319, "step": 5600 }, { "epoch": 0.0947411085216334, "grad_norm": 0.8881238102912903, "learning_rate": 4.736975428523178e-06, "loss": 0.0318, "step": 5610 }, { "epoch": 0.09490998750295539, "grad_norm": 0.8107007145881653, "learning_rate": 4.745419234991134e-06, "loss": 0.0386, "step": 5620 }, { "epoch": 0.09507886648427737, "grad_norm": 0.7100491523742676, "learning_rate": 4.753863041459091e-06, "loss": 0.0345, "step": 5630 }, { "epoch": 0.09524774546559935, "grad_norm": 1.2052028179168701, "learning_rate": 4.7623068479270455e-06, "loss": 0.0409, "step": 5640 }, { "epoch": 0.09541662444692134, "grad_norm": 0.7823473811149597, "learning_rate": 4.770750654395002e-06, "loss": 0.0349, "step": 5650 }, { "epoch": 0.09558550342824332, "grad_norm": 0.777672290802002, "learning_rate": 4.779194460862958e-06, "loss": 0.032, "step": 5660 }, { "epoch": 0.09575438240956531, "grad_norm": 1.7833751440048218, "learning_rate": 4.787638267330913e-06, "loss": 0.0354, "step": 5670 }, { "epoch": 0.09592326139088729, "grad_norm": 0.7709932923316956, "learning_rate": 4.796082073798869e-06, "loss": 0.0242, "step": 5680 }, { "epoch": 0.09609214037220927, "grad_norm": 0.6071993112564087, "learning_rate": 4.804525880266825e-06, "loss": 0.0293, "step": 5690 }, { "epoch": 0.09626101935353126, "grad_norm": 1.1912256479263306, "learning_rate": 4.812969686734781e-06, "loss": 0.0429, "step": 5700 }, { "epoch": 0.09642989833485324, "grad_norm": 1.245107889175415, "learning_rate": 4.821413493202736e-06, "loss": 0.0323, "step": 5710 }, { "epoch": 0.09659877731617524, "grad_norm": 0.849605917930603, "learning_rate": 4.829857299670692e-06, "loss": 0.0366, "step": 5720 }, { "epoch": 0.09676765629749721, "grad_norm": 1.1494288444519043, "learning_rate": 4.838301106138648e-06, "loss": 0.0244, "step": 5730 }, { "epoch": 0.0969365352788192, "grad_norm": 0.6942419409751892, "learning_rate": 4.8467449126066035e-06, "loss": 0.0328, "step": 5740 }, { "epoch": 0.09710541426014119, "grad_norm": 0.6845659613609314, "learning_rate": 4.855188719074559e-06, "loss": 0.03, "step": 5750 }, { "epoch": 0.09727429324146317, "grad_norm": 1.266840934753418, "learning_rate": 4.863632525542515e-06, "loss": 0.0352, "step": 5760 }, { "epoch": 0.09744317222278515, "grad_norm": 1.122252345085144, "learning_rate": 4.872076332010471e-06, "loss": 0.0346, "step": 5770 }, { "epoch": 0.09761205120410714, "grad_norm": 0.7238612174987793, "learning_rate": 4.880520138478426e-06, "loss": 0.0233, "step": 5780 }, { "epoch": 0.09778093018542912, "grad_norm": 0.8712977766990662, "learning_rate": 4.888963944946382e-06, "loss": 0.0316, "step": 5790 }, { "epoch": 0.09794980916675111, "grad_norm": 1.5232795476913452, "learning_rate": 4.897407751414338e-06, "loss": 0.0293, "step": 5800 }, { "epoch": 0.09811868814807309, "grad_norm": 0.7720745801925659, "learning_rate": 4.9058515578822935e-06, "loss": 0.0288, "step": 5810 }, { "epoch": 0.09828756712939507, "grad_norm": 0.5283493995666504, "learning_rate": 4.914295364350249e-06, "loss": 0.0292, "step": 5820 }, { "epoch": 0.09845644611071706, "grad_norm": 1.5968481302261353, "learning_rate": 4.922739170818206e-06, "loss": 0.0269, "step": 5830 }, { "epoch": 0.09862532509203904, "grad_norm": 1.0562238693237305, "learning_rate": 4.931182977286161e-06, "loss": 0.0363, "step": 5840 }, { "epoch": 0.09879420407336104, "grad_norm": 1.5915251970291138, "learning_rate": 4.939626783754116e-06, "loss": 0.0356, "step": 5850 }, { "epoch": 0.09896308305468302, "grad_norm": 0.44126665592193604, "learning_rate": 4.948070590222073e-06, "loss": 0.0317, "step": 5860 }, { "epoch": 0.099131962036005, "grad_norm": 1.1033127307891846, "learning_rate": 4.956514396690028e-06, "loss": 0.0329, "step": 5870 }, { "epoch": 0.09930084101732699, "grad_norm": 0.6937429904937744, "learning_rate": 4.964958203157984e-06, "loss": 0.0343, "step": 5880 }, { "epoch": 0.09946971999864897, "grad_norm": 1.1075745820999146, "learning_rate": 4.97340200962594e-06, "loss": 0.0266, "step": 5890 }, { "epoch": 0.09963859897997095, "grad_norm": 1.0542914867401123, "learning_rate": 4.981845816093896e-06, "loss": 0.0364, "step": 5900 }, { "epoch": 0.09980747796129294, "grad_norm": 0.7375944256782532, "learning_rate": 4.9902896225618515e-06, "loss": 0.0262, "step": 5910 }, { "epoch": 0.09997635694261492, "grad_norm": 0.5543315410614014, "learning_rate": 4.998733429029807e-06, "loss": 0.0341, "step": 5920 }, { "epoch": 0.10014523592393691, "grad_norm": 0.6655603051185608, "learning_rate": 5.007177235497763e-06, "loss": 0.0254, "step": 5930 }, { "epoch": 0.10031411490525889, "grad_norm": 0.9888606667518616, "learning_rate": 5.015621041965718e-06, "loss": 0.0255, "step": 5940 }, { "epoch": 0.10048299388658087, "grad_norm": 1.0200597047805786, "learning_rate": 5.024064848433674e-06, "loss": 0.0316, "step": 5950 }, { "epoch": 0.10065187286790286, "grad_norm": 0.7875651121139526, "learning_rate": 5.03250865490163e-06, "loss": 0.0454, "step": 5960 }, { "epoch": 0.10082075184922484, "grad_norm": 1.0683919191360474, "learning_rate": 5.040952461369586e-06, "loss": 0.034, "step": 5970 }, { "epoch": 0.10098963083054684, "grad_norm": 0.6697292923927307, "learning_rate": 5.0493962678375415e-06, "loss": 0.028, "step": 5980 }, { "epoch": 0.10115850981186882, "grad_norm": 0.892511248588562, "learning_rate": 5.057840074305498e-06, "loss": 0.0337, "step": 5990 }, { "epoch": 0.1013273887931908, "grad_norm": 0.47370007634162903, "learning_rate": 5.066283880773453e-06, "loss": 0.0217, "step": 6000 }, { "epoch": 0.10149626777451279, "grad_norm": 1.1253525018692017, "learning_rate": 5.074727687241409e-06, "loss": 0.026, "step": 6010 }, { "epoch": 0.10166514675583477, "grad_norm": 0.8835741281509399, "learning_rate": 5.083171493709364e-06, "loss": 0.027, "step": 6020 }, { "epoch": 0.10183402573715675, "grad_norm": 0.929317831993103, "learning_rate": 5.091615300177321e-06, "loss": 0.0236, "step": 6030 }, { "epoch": 0.10200290471847874, "grad_norm": 1.2849595546722412, "learning_rate": 5.100059106645277e-06, "loss": 0.0232, "step": 6040 }, { "epoch": 0.10217178369980072, "grad_norm": 1.1769667863845825, "learning_rate": 5.1085029131132315e-06, "loss": 0.0324, "step": 6050 }, { "epoch": 0.10234066268112271, "grad_norm": 0.7820567488670349, "learning_rate": 5.116946719581187e-06, "loss": 0.0275, "step": 6060 }, { "epoch": 0.10250954166244469, "grad_norm": 0.8414903283119202, "learning_rate": 5.125390526049143e-06, "loss": 0.0292, "step": 6070 }, { "epoch": 0.10267842064376667, "grad_norm": 0.8490903973579407, "learning_rate": 5.1338343325170995e-06, "loss": 0.0348, "step": 6080 }, { "epoch": 0.10284729962508866, "grad_norm": 0.781624972820282, "learning_rate": 5.142278138985055e-06, "loss": 0.0367, "step": 6090 }, { "epoch": 0.10301617860641064, "grad_norm": 0.6722185015678406, "learning_rate": 5.150721945453011e-06, "loss": 0.0367, "step": 6100 }, { "epoch": 0.10318505758773264, "grad_norm": 1.2355557680130005, "learning_rate": 5.159165751920966e-06, "loss": 0.0433, "step": 6110 }, { "epoch": 0.10335393656905462, "grad_norm": 0.9777665138244629, "learning_rate": 5.1676095583889216e-06, "loss": 0.0388, "step": 6120 }, { "epoch": 0.1035228155503766, "grad_norm": 0.9369296431541443, "learning_rate": 5.176053364856878e-06, "loss": 0.0281, "step": 6130 }, { "epoch": 0.10369169453169859, "grad_norm": 0.9452105760574341, "learning_rate": 5.184497171324834e-06, "loss": 0.0305, "step": 6140 }, { "epoch": 0.10386057351302057, "grad_norm": 1.7142455577850342, "learning_rate": 5.1929409777927896e-06, "loss": 0.0295, "step": 6150 }, { "epoch": 0.10402945249434255, "grad_norm": 0.9701829552650452, "learning_rate": 5.201384784260745e-06, "loss": 0.0321, "step": 6160 }, { "epoch": 0.10419833147566454, "grad_norm": 0.8972094655036926, "learning_rate": 5.2098285907287e-06, "loss": 0.0256, "step": 6170 }, { "epoch": 0.10436721045698652, "grad_norm": 0.9146260023117065, "learning_rate": 5.218272397196657e-06, "loss": 0.0327, "step": 6180 }, { "epoch": 0.10453608943830851, "grad_norm": 0.8840290904045105, "learning_rate": 5.226716203664612e-06, "loss": 0.0287, "step": 6190 }, { "epoch": 0.10470496841963049, "grad_norm": 1.2182095050811768, "learning_rate": 5.235160010132568e-06, "loss": 0.028, "step": 6200 }, { "epoch": 0.10487384740095247, "grad_norm": 0.8025933504104614, "learning_rate": 5.243603816600525e-06, "loss": 0.0355, "step": 6210 }, { "epoch": 0.10504272638227447, "grad_norm": 0.6005998253822327, "learning_rate": 5.2520476230684796e-06, "loss": 0.0239, "step": 6220 }, { "epoch": 0.10521160536359644, "grad_norm": 0.7659167647361755, "learning_rate": 5.260491429536435e-06, "loss": 0.021, "step": 6230 }, { "epoch": 0.10538048434491844, "grad_norm": 0.6125292778015137, "learning_rate": 5.268935236004391e-06, "loss": 0.0297, "step": 6240 }, { "epoch": 0.10554936332624042, "grad_norm": 1.1459487676620483, "learning_rate": 5.277379042472347e-06, "loss": 0.0387, "step": 6250 }, { "epoch": 0.1057182423075624, "grad_norm": 0.8622835278511047, "learning_rate": 5.285822848940303e-06, "loss": 0.0264, "step": 6260 }, { "epoch": 0.10588712128888439, "grad_norm": 1.1727265119552612, "learning_rate": 5.294266655408259e-06, "loss": 0.0231, "step": 6270 }, { "epoch": 0.10605600027020637, "grad_norm": 1.2240904569625854, "learning_rate": 5.302710461876214e-06, "loss": 0.0313, "step": 6280 }, { "epoch": 0.10622487925152835, "grad_norm": 0.6780516505241394, "learning_rate": 5.31115426834417e-06, "loss": 0.0302, "step": 6290 }, { "epoch": 0.10639375823285034, "grad_norm": 0.8513317108154297, "learning_rate": 5.319598074812125e-06, "loss": 0.0285, "step": 6300 }, { "epoch": 0.10656263721417232, "grad_norm": 0.4087432324886322, "learning_rate": 5.328041881280082e-06, "loss": 0.0317, "step": 6310 }, { "epoch": 0.10673151619549431, "grad_norm": 0.8078209161758423, "learning_rate": 5.336485687748038e-06, "loss": 0.0341, "step": 6320 }, { "epoch": 0.10690039517681629, "grad_norm": 1.0778888463974, "learning_rate": 5.344929494215993e-06, "loss": 0.0274, "step": 6330 }, { "epoch": 0.10706927415813827, "grad_norm": 0.9710830450057983, "learning_rate": 5.353373300683948e-06, "loss": 0.0259, "step": 6340 }, { "epoch": 0.10723815313946027, "grad_norm": 0.9011974334716797, "learning_rate": 5.361817107151904e-06, "loss": 0.0248, "step": 6350 }, { "epoch": 0.10740703212078224, "grad_norm": 2.028024673461914, "learning_rate": 5.3702609136198605e-06, "loss": 0.0346, "step": 6360 }, { "epoch": 0.10757591110210424, "grad_norm": 0.8174864053726196, "learning_rate": 5.378704720087816e-06, "loss": 0.0367, "step": 6370 }, { "epoch": 0.10774479008342622, "grad_norm": 0.78709477186203, "learning_rate": 5.387148526555772e-06, "loss": 0.0413, "step": 6380 }, { "epoch": 0.1079136690647482, "grad_norm": 0.5622537732124329, "learning_rate": 5.3955923330237285e-06, "loss": 0.0204, "step": 6390 }, { "epoch": 0.10808254804607019, "grad_norm": 0.6878485083580017, "learning_rate": 5.404036139491683e-06, "loss": 0.0358, "step": 6400 }, { "epoch": 0.10825142702739217, "grad_norm": 0.9953665733337402, "learning_rate": 5.412479945959639e-06, "loss": 0.0383, "step": 6410 }, { "epoch": 0.10842030600871416, "grad_norm": 0.8835489153862, "learning_rate": 5.420923752427595e-06, "loss": 0.0333, "step": 6420 }, { "epoch": 0.10858918499003614, "grad_norm": 0.6029289364814758, "learning_rate": 5.4293675588955505e-06, "loss": 0.0278, "step": 6430 }, { "epoch": 0.10875806397135812, "grad_norm": 0.9145163297653198, "learning_rate": 5.437811365363507e-06, "loss": 0.0229, "step": 6440 }, { "epoch": 0.10892694295268011, "grad_norm": 1.7839081287384033, "learning_rate": 5.446255171831462e-06, "loss": 0.0379, "step": 6450 }, { "epoch": 0.1090958219340021, "grad_norm": 0.8438087105751038, "learning_rate": 5.454698978299418e-06, "loss": 0.0296, "step": 6460 }, { "epoch": 0.10926470091532407, "grad_norm": 0.9489569664001465, "learning_rate": 5.463142784767373e-06, "loss": 0.0271, "step": 6470 }, { "epoch": 0.10943357989664607, "grad_norm": 0.6387969255447388, "learning_rate": 5.471586591235329e-06, "loss": 0.024, "step": 6480 }, { "epoch": 0.10960245887796805, "grad_norm": 0.5493649244308472, "learning_rate": 5.480030397703286e-06, "loss": 0.0349, "step": 6490 }, { "epoch": 0.10977133785929004, "grad_norm": 0.5748535394668579, "learning_rate": 5.488474204171241e-06, "loss": 0.0262, "step": 6500 }, { "epoch": 0.10994021684061202, "grad_norm": 1.3443704843521118, "learning_rate": 5.496918010639196e-06, "loss": 0.0278, "step": 6510 }, { "epoch": 0.110109095821934, "grad_norm": 0.7899364233016968, "learning_rate": 5.505361817107152e-06, "loss": 0.0317, "step": 6520 }, { "epoch": 0.11027797480325599, "grad_norm": 0.7861093282699585, "learning_rate": 5.5138056235751085e-06, "loss": 0.0306, "step": 6530 }, { "epoch": 0.11044685378457797, "grad_norm": 0.9036576151847839, "learning_rate": 5.522249430043064e-06, "loss": 0.0281, "step": 6540 }, { "epoch": 0.11061573276589996, "grad_norm": 1.0217006206512451, "learning_rate": 5.53069323651102e-06, "loss": 0.0281, "step": 6550 }, { "epoch": 0.11078461174722194, "grad_norm": 0.851105809211731, "learning_rate": 5.539137042978976e-06, "loss": 0.0376, "step": 6560 }, { "epoch": 0.11095349072854392, "grad_norm": 0.6672607660293579, "learning_rate": 5.5475808494469305e-06, "loss": 0.027, "step": 6570 }, { "epoch": 0.11112236970986591, "grad_norm": 0.7568426132202148, "learning_rate": 5.556024655914887e-06, "loss": 0.0274, "step": 6580 }, { "epoch": 0.1112912486911879, "grad_norm": 0.9894194602966309, "learning_rate": 5.564468462382843e-06, "loss": 0.0346, "step": 6590 }, { "epoch": 0.11146012767250987, "grad_norm": 0.6986855864524841, "learning_rate": 5.5729122688507985e-06, "loss": 0.0287, "step": 6600 }, { "epoch": 0.11162900665383187, "grad_norm": 0.7445778250694275, "learning_rate": 5.581356075318754e-06, "loss": 0.0258, "step": 6610 }, { "epoch": 0.11179788563515385, "grad_norm": 0.8864195942878723, "learning_rate": 5.589799881786709e-06, "loss": 0.027, "step": 6620 }, { "epoch": 0.11196676461647584, "grad_norm": 1.012162446975708, "learning_rate": 5.598243688254666e-06, "loss": 0.0208, "step": 6630 }, { "epoch": 0.11213564359779782, "grad_norm": 0.7525762915611267, "learning_rate": 5.606687494722621e-06, "loss": 0.0331, "step": 6640 }, { "epoch": 0.1123045225791198, "grad_norm": 0.9764576554298401, "learning_rate": 5.615131301190577e-06, "loss": 0.0284, "step": 6650 }, { "epoch": 0.11247340156044179, "grad_norm": 0.8299107551574707, "learning_rate": 5.623575107658533e-06, "loss": 0.0208, "step": 6660 }, { "epoch": 0.11264228054176377, "grad_norm": 0.6143267750740051, "learning_rate": 5.632018914126489e-06, "loss": 0.0293, "step": 6670 }, { "epoch": 0.11281115952308576, "grad_norm": 0.45407575368881226, "learning_rate": 5.640462720594444e-06, "loss": 0.0307, "step": 6680 }, { "epoch": 0.11298003850440774, "grad_norm": 0.5135723948478699, "learning_rate": 5.6489065270624e-06, "loss": 0.0237, "step": 6690 }, { "epoch": 0.11314891748572972, "grad_norm": 1.054136037826538, "learning_rate": 5.657350333530356e-06, "loss": 0.0268, "step": 6700 }, { "epoch": 0.11331779646705172, "grad_norm": 0.5216715335845947, "learning_rate": 5.665794139998312e-06, "loss": 0.0245, "step": 6710 }, { "epoch": 0.1134866754483737, "grad_norm": 0.6621972322463989, "learning_rate": 5.674237946466268e-06, "loss": 0.0319, "step": 6720 }, { "epoch": 0.11365555442969567, "grad_norm": 0.537342369556427, "learning_rate": 5.682681752934224e-06, "loss": 0.0298, "step": 6730 }, { "epoch": 0.11382443341101767, "grad_norm": 0.8001373410224915, "learning_rate": 5.6911255594021785e-06, "loss": 0.0316, "step": 6740 }, { "epoch": 0.11399331239233965, "grad_norm": 0.6013008952140808, "learning_rate": 5.699569365870134e-06, "loss": 0.0326, "step": 6750 }, { "epoch": 0.11416219137366164, "grad_norm": 0.7509782910346985, "learning_rate": 5.708013172338091e-06, "loss": 0.0233, "step": 6760 }, { "epoch": 0.11433107035498362, "grad_norm": 0.7353625297546387, "learning_rate": 5.7164569788060465e-06, "loss": 0.0227, "step": 6770 }, { "epoch": 0.1144999493363056, "grad_norm": 0.9115102887153625, "learning_rate": 5.724900785274002e-06, "loss": 0.0323, "step": 6780 }, { "epoch": 0.11466882831762759, "grad_norm": 0.6321166157722473, "learning_rate": 5.733344591741957e-06, "loss": 0.0251, "step": 6790 }, { "epoch": 0.11483770729894957, "grad_norm": 1.5242328643798828, "learning_rate": 5.741788398209913e-06, "loss": 0.0325, "step": 6800 }, { "epoch": 0.11500658628027156, "grad_norm": 0.5735524892807007, "learning_rate": 5.750232204677869e-06, "loss": 0.0214, "step": 6810 }, { "epoch": 0.11517546526159354, "grad_norm": 0.8593398332595825, "learning_rate": 5.758676011145825e-06, "loss": 0.0292, "step": 6820 }, { "epoch": 0.11534434424291552, "grad_norm": 1.1599549055099487, "learning_rate": 5.767119817613781e-06, "loss": 0.0277, "step": 6830 }, { "epoch": 0.11551322322423752, "grad_norm": 0.8293540477752686, "learning_rate": 5.775563624081737e-06, "loss": 0.0267, "step": 6840 }, { "epoch": 0.1156821022055595, "grad_norm": 0.5638085603713989, "learning_rate": 5.7840074305496914e-06, "loss": 0.0301, "step": 6850 }, { "epoch": 0.11585098118688147, "grad_norm": 1.095698356628418, "learning_rate": 5.792451237017648e-06, "loss": 0.027, "step": 6860 }, { "epoch": 0.11601986016820347, "grad_norm": 0.514751672744751, "learning_rate": 5.800895043485604e-06, "loss": 0.03, "step": 6870 }, { "epoch": 0.11618873914952545, "grad_norm": 0.9088679552078247, "learning_rate": 5.809338849953559e-06, "loss": 0.0206, "step": 6880 }, { "epoch": 0.11635761813084744, "grad_norm": 0.7249288558959961, "learning_rate": 5.817782656421516e-06, "loss": 0.0274, "step": 6890 }, { "epoch": 0.11652649711216942, "grad_norm": 0.6660640835762024, "learning_rate": 5.826226462889472e-06, "loss": 0.0422, "step": 6900 }, { "epoch": 0.1166953760934914, "grad_norm": 0.7393006086349487, "learning_rate": 5.8346702693574266e-06, "loss": 0.0215, "step": 6910 }, { "epoch": 0.11686425507481339, "grad_norm": 1.2577327489852905, "learning_rate": 5.843114075825382e-06, "loss": 0.0278, "step": 6920 }, { "epoch": 0.11703313405613537, "grad_norm": 0.8931872844696045, "learning_rate": 5.851557882293338e-06, "loss": 0.0306, "step": 6930 }, { "epoch": 0.11720201303745736, "grad_norm": 0.9390203952789307, "learning_rate": 5.8600016887612946e-06, "loss": 0.0273, "step": 6940 }, { "epoch": 0.11737089201877934, "grad_norm": 0.9605112075805664, "learning_rate": 5.86844549522925e-06, "loss": 0.0308, "step": 6950 }, { "epoch": 0.11753977100010132, "grad_norm": 0.8742411136627197, "learning_rate": 5.876889301697205e-06, "loss": 0.0275, "step": 6960 }, { "epoch": 0.11770864998142332, "grad_norm": 0.732410192489624, "learning_rate": 5.885333108165161e-06, "loss": 0.0356, "step": 6970 }, { "epoch": 0.1178775289627453, "grad_norm": 0.6333898901939392, "learning_rate": 5.893776914633117e-06, "loss": 0.0221, "step": 6980 }, { "epoch": 0.11804640794406727, "grad_norm": 0.707695722579956, "learning_rate": 5.902220721101073e-06, "loss": 0.0252, "step": 6990 }, { "epoch": 0.11821528692538927, "grad_norm": 0.726567268371582, "learning_rate": 5.910664527569029e-06, "loss": 0.0289, "step": 7000 }, { "epoch": 0.11838416590671125, "grad_norm": 0.7457570433616638, "learning_rate": 5.919108334036985e-06, "loss": 0.029, "step": 7010 }, { "epoch": 0.11855304488803324, "grad_norm": 0.6745281219482422, "learning_rate": 5.9275521405049395e-06, "loss": 0.0208, "step": 7020 }, { "epoch": 0.11872192386935522, "grad_norm": 0.7475462555885315, "learning_rate": 5.935995946972896e-06, "loss": 0.0278, "step": 7030 }, { "epoch": 0.1188908028506772, "grad_norm": 0.8534787893295288, "learning_rate": 5.944439753440852e-06, "loss": 0.0239, "step": 7040 }, { "epoch": 0.11905968183199919, "grad_norm": 1.0036873817443848, "learning_rate": 5.9528835599088075e-06, "loss": 0.0349, "step": 7050 }, { "epoch": 0.11922856081332117, "grad_norm": 0.8925744891166687, "learning_rate": 5.961327366376763e-06, "loss": 0.0215, "step": 7060 }, { "epoch": 0.11939743979464316, "grad_norm": 0.6704158186912537, "learning_rate": 5.96977117284472e-06, "loss": 0.0257, "step": 7070 }, { "epoch": 0.11956631877596514, "grad_norm": 0.8526410460472107, "learning_rate": 5.978214979312675e-06, "loss": 0.0367, "step": 7080 }, { "epoch": 0.11973519775728712, "grad_norm": 1.5824928283691406, "learning_rate": 5.98665878578063e-06, "loss": 0.0395, "step": 7090 }, { "epoch": 0.11990407673860912, "grad_norm": 0.8535922765731812, "learning_rate": 5.995102592248586e-06, "loss": 0.0195, "step": 7100 }, { "epoch": 0.1200729557199311, "grad_norm": 0.6084505319595337, "learning_rate": 6.003546398716542e-06, "loss": 0.022, "step": 7110 }, { "epoch": 0.12024183470125308, "grad_norm": 0.6891043782234192, "learning_rate": 6.011990205184498e-06, "loss": 0.0237, "step": 7120 }, { "epoch": 0.12041071368257507, "grad_norm": 0.5578417778015137, "learning_rate": 6.020434011652453e-06, "loss": 0.0345, "step": 7130 }, { "epoch": 0.12057959266389705, "grad_norm": 0.5704952478408813, "learning_rate": 6.028877818120409e-06, "loss": 0.0376, "step": 7140 }, { "epoch": 0.12074847164521904, "grad_norm": 0.7165504693984985, "learning_rate": 6.037321624588365e-06, "loss": 0.0207, "step": 7150 }, { "epoch": 0.12091735062654102, "grad_norm": 0.6438784599304199, "learning_rate": 6.04576543105632e-06, "loss": 0.0306, "step": 7160 }, { "epoch": 0.121086229607863, "grad_norm": 0.44113150238990784, "learning_rate": 6.054209237524277e-06, "loss": 0.0205, "step": 7170 }, { "epoch": 0.12125510858918499, "grad_norm": 0.827568769454956, "learning_rate": 6.062653043992233e-06, "loss": 0.0284, "step": 7180 }, { "epoch": 0.12142398757050697, "grad_norm": 1.3699350357055664, "learning_rate": 6.0710968504601875e-06, "loss": 0.0333, "step": 7190 }, { "epoch": 0.12159286655182897, "grad_norm": 0.4937279522418976, "learning_rate": 6.079540656928143e-06, "loss": 0.0271, "step": 7200 }, { "epoch": 0.12176174553315094, "grad_norm": 0.9783071279525757, "learning_rate": 6.0879844633961e-06, "loss": 0.0279, "step": 7210 }, { "epoch": 0.12193062451447292, "grad_norm": 0.7814403176307678, "learning_rate": 6.0964282698640555e-06, "loss": 0.0409, "step": 7220 }, { "epoch": 0.12209950349579492, "grad_norm": 0.8203415870666504, "learning_rate": 6.104872076332011e-06, "loss": 0.0199, "step": 7230 }, { "epoch": 0.1222683824771169, "grad_norm": 0.8916028738021851, "learning_rate": 6.113315882799967e-06, "loss": 0.0293, "step": 7240 }, { "epoch": 0.12243726145843888, "grad_norm": 0.8109354972839355, "learning_rate": 6.121759689267922e-06, "loss": 0.0269, "step": 7250 }, { "epoch": 0.12260614043976087, "grad_norm": 1.0256584882736206, "learning_rate": 6.130203495735878e-06, "loss": 0.0306, "step": 7260 }, { "epoch": 0.12277501942108285, "grad_norm": 0.7087888121604919, "learning_rate": 6.138647302203834e-06, "loss": 0.0303, "step": 7270 }, { "epoch": 0.12294389840240484, "grad_norm": 1.0990185737609863, "learning_rate": 6.14709110867179e-06, "loss": 0.0225, "step": 7280 }, { "epoch": 0.12311277738372682, "grad_norm": 0.7741889357566833, "learning_rate": 6.1555349151397455e-06, "loss": 0.0187, "step": 7290 }, { "epoch": 0.1232816563650488, "grad_norm": 0.8553534150123596, "learning_rate": 6.1639787216077e-06, "loss": 0.0191, "step": 7300 }, { "epoch": 0.1234505353463708, "grad_norm": 0.8120425939559937, "learning_rate": 6.172422528075657e-06, "loss": 0.0216, "step": 7310 }, { "epoch": 0.12361941432769277, "grad_norm": 0.36096081137657166, "learning_rate": 6.180866334543613e-06, "loss": 0.0237, "step": 7320 }, { "epoch": 0.12378829330901477, "grad_norm": 0.9354158639907837, "learning_rate": 6.189310141011568e-06, "loss": 0.0294, "step": 7330 }, { "epoch": 0.12395717229033675, "grad_norm": 0.7746331691741943, "learning_rate": 6.197753947479525e-06, "loss": 0.0229, "step": 7340 }, { "epoch": 0.12412605127165872, "grad_norm": 0.5529250502586365, "learning_rate": 6.206197753947481e-06, "loss": 0.0247, "step": 7350 }, { "epoch": 0.12429493025298072, "grad_norm": 0.9333901405334473, "learning_rate": 6.2146415604154355e-06, "loss": 0.0326, "step": 7360 }, { "epoch": 0.1244638092343027, "grad_norm": 0.9938616752624512, "learning_rate": 6.223085366883391e-06, "loss": 0.0242, "step": 7370 }, { "epoch": 0.12463268821562469, "grad_norm": 0.8571752905845642, "learning_rate": 6.231529173351347e-06, "loss": 0.0336, "step": 7380 }, { "epoch": 0.12480156719694667, "grad_norm": 1.245314121246338, "learning_rate": 6.2399729798193035e-06, "loss": 0.0252, "step": 7390 }, { "epoch": 0.12497044617826865, "grad_norm": 0.777734637260437, "learning_rate": 6.248416786287259e-06, "loss": 0.0264, "step": 7400 }, { "epoch": 0.12513932515959064, "grad_norm": 0.6064144968986511, "learning_rate": 6.256860592755215e-06, "loss": 0.026, "step": 7410 }, { "epoch": 0.12530820414091262, "grad_norm": 1.2296903133392334, "learning_rate": 6.26530439922317e-06, "loss": 0.025, "step": 7420 }, { "epoch": 0.1254770831222346, "grad_norm": 0.6496667861938477, "learning_rate": 6.2737482056911255e-06, "loss": 0.0223, "step": 7430 }, { "epoch": 0.12564596210355658, "grad_norm": 0.6174941062927246, "learning_rate": 6.282192012159082e-06, "loss": 0.0198, "step": 7440 }, { "epoch": 0.1258148410848786, "grad_norm": 0.2949312925338745, "learning_rate": 6.290635818627038e-06, "loss": 0.0314, "step": 7450 }, { "epoch": 0.12598372006620057, "grad_norm": 0.671984851360321, "learning_rate": 6.2990796250949935e-06, "loss": 0.0377, "step": 7460 }, { "epoch": 0.12615259904752255, "grad_norm": 0.7739905714988708, "learning_rate": 6.307523431562948e-06, "loss": 0.027, "step": 7470 }, { "epoch": 0.12632147802884452, "grad_norm": 0.5354557633399963, "learning_rate": 6.315967238030904e-06, "loss": 0.0288, "step": 7480 }, { "epoch": 0.1264903570101665, "grad_norm": 0.703662097454071, "learning_rate": 6.324411044498861e-06, "loss": 0.0357, "step": 7490 }, { "epoch": 0.1266592359914885, "grad_norm": 0.6207019686698914, "learning_rate": 6.332854850966816e-06, "loss": 0.0311, "step": 7500 }, { "epoch": 0.1268281149728105, "grad_norm": 0.8686960339546204, "learning_rate": 6.341298657434772e-06, "loss": 0.0295, "step": 7510 }, { "epoch": 0.12699699395413247, "grad_norm": 0.5282285213470459, "learning_rate": 6.349742463902729e-06, "loss": 0.0219, "step": 7520 }, { "epoch": 0.12716587293545445, "grad_norm": 0.3807213008403778, "learning_rate": 6.3581862703706836e-06, "loss": 0.0193, "step": 7530 }, { "epoch": 0.12733475191677643, "grad_norm": 1.047757625579834, "learning_rate": 6.366630076838639e-06, "loss": 0.0436, "step": 7540 }, { "epoch": 0.12750363089809844, "grad_norm": 0.5530644059181213, "learning_rate": 6.375073883306595e-06, "loss": 0.027, "step": 7550 }, { "epoch": 0.12767250987942041, "grad_norm": 0.7794262170791626, "learning_rate": 6.383517689774551e-06, "loss": 0.0267, "step": 7560 }, { "epoch": 0.1278413888607424, "grad_norm": 0.8317309021949768, "learning_rate": 6.391961496242507e-06, "loss": 0.0259, "step": 7570 }, { "epoch": 0.12801026784206437, "grad_norm": 0.6068344712257385, "learning_rate": 6.400405302710463e-06, "loss": 0.0231, "step": 7580 }, { "epoch": 0.12817914682338635, "grad_norm": 0.7803915143013, "learning_rate": 6.408849109178418e-06, "loss": 0.0299, "step": 7590 }, { "epoch": 0.12834802580470833, "grad_norm": 1.0557247400283813, "learning_rate": 6.4172929156463736e-06, "loss": 0.0328, "step": 7600 }, { "epoch": 0.12851690478603034, "grad_norm": 0.7013477683067322, "learning_rate": 6.425736722114329e-06, "loss": 0.0237, "step": 7610 }, { "epoch": 0.12868578376735232, "grad_norm": 0.7050474882125854, "learning_rate": 6.434180528582286e-06, "loss": 0.0261, "step": 7620 }, { "epoch": 0.1288546627486743, "grad_norm": 0.6687779426574707, "learning_rate": 6.4426243350502416e-06, "loss": 0.0327, "step": 7630 }, { "epoch": 0.12902354172999628, "grad_norm": 0.8051851987838745, "learning_rate": 6.4510681415181964e-06, "loss": 0.0243, "step": 7640 }, { "epoch": 0.12919242071131826, "grad_norm": 0.7042991518974304, "learning_rate": 6.459511947986152e-06, "loss": 0.0395, "step": 7650 }, { "epoch": 0.12936129969264026, "grad_norm": 0.8095232248306274, "learning_rate": 6.467955754454108e-06, "loss": 0.0275, "step": 7660 }, { "epoch": 0.12953017867396224, "grad_norm": 1.0498719215393066, "learning_rate": 6.4763995609220644e-06, "loss": 0.0288, "step": 7670 }, { "epoch": 0.12969905765528422, "grad_norm": 0.7133126258850098, "learning_rate": 6.48484336739002e-06, "loss": 0.0244, "step": 7680 }, { "epoch": 0.1298679366366062, "grad_norm": 0.8465026617050171, "learning_rate": 6.493287173857976e-06, "loss": 0.0315, "step": 7690 }, { "epoch": 0.13003681561792818, "grad_norm": 0.5495076179504395, "learning_rate": 6.501730980325931e-06, "loss": 0.0204, "step": 7700 }, { "epoch": 0.1302056945992502, "grad_norm": 0.7399181127548218, "learning_rate": 6.510174786793887e-06, "loss": 0.0313, "step": 7710 }, { "epoch": 0.13037457358057217, "grad_norm": 0.7683311700820923, "learning_rate": 6.518618593261843e-06, "loss": 0.0222, "step": 7720 }, { "epoch": 0.13054345256189415, "grad_norm": 1.0526340007781982, "learning_rate": 6.527062399729799e-06, "loss": 0.0238, "step": 7730 }, { "epoch": 0.13071233154321613, "grad_norm": 0.6518090963363647, "learning_rate": 6.5355062061977545e-06, "loss": 0.0317, "step": 7740 }, { "epoch": 0.1308812105245381, "grad_norm": 1.3063868284225464, "learning_rate": 6.543950012665711e-06, "loss": 0.025, "step": 7750 }, { "epoch": 0.1310500895058601, "grad_norm": 1.0071359872817993, "learning_rate": 6.552393819133666e-06, "loss": 0.0313, "step": 7760 }, { "epoch": 0.1312189684871821, "grad_norm": 1.0694866180419922, "learning_rate": 6.560837625601622e-06, "loss": 0.0314, "step": 7770 }, { "epoch": 0.13138784746850407, "grad_norm": 0.5349934697151184, "learning_rate": 6.569281432069577e-06, "loss": 0.0263, "step": 7780 }, { "epoch": 0.13155672644982605, "grad_norm": 0.7973740696907043, "learning_rate": 6.577725238537533e-06, "loss": 0.0311, "step": 7790 }, { "epoch": 0.13172560543114803, "grad_norm": 1.2314813137054443, "learning_rate": 6.58616904500549e-06, "loss": 0.0326, "step": 7800 }, { "epoch": 0.13189448441247004, "grad_norm": 0.749681830406189, "learning_rate": 6.594612851473445e-06, "loss": 0.0322, "step": 7810 }, { "epoch": 0.13206336339379202, "grad_norm": 0.8937078714370728, "learning_rate": 6.6030566579414e-06, "loss": 0.0285, "step": 7820 }, { "epoch": 0.132232242375114, "grad_norm": 0.9274656176567078, "learning_rate": 6.611500464409356e-06, "loss": 0.0276, "step": 7830 }, { "epoch": 0.13240112135643597, "grad_norm": 0.9130686521530151, "learning_rate": 6.6199442708773125e-06, "loss": 0.0244, "step": 7840 }, { "epoch": 0.13257000033775795, "grad_norm": 1.2705516815185547, "learning_rate": 6.628388077345268e-06, "loss": 0.0254, "step": 7850 }, { "epoch": 0.13273887931907996, "grad_norm": 0.7902691960334778, "learning_rate": 6.636831883813224e-06, "loss": 0.0326, "step": 7860 }, { "epoch": 0.13290775830040194, "grad_norm": 0.6276863813400269, "learning_rate": 6.645275690281179e-06, "loss": 0.0207, "step": 7870 }, { "epoch": 0.13307663728172392, "grad_norm": 0.7307090759277344, "learning_rate": 6.6537194967491345e-06, "loss": 0.0327, "step": 7880 }, { "epoch": 0.1332455162630459, "grad_norm": 0.7401148080825806, "learning_rate": 6.662163303217091e-06, "loss": 0.0297, "step": 7890 }, { "epoch": 0.13341439524436788, "grad_norm": 0.8455392718315125, "learning_rate": 6.670607109685047e-06, "loss": 0.0242, "step": 7900 }, { "epoch": 0.13358327422568986, "grad_norm": 1.0645242929458618, "learning_rate": 6.6790509161530025e-06, "loss": 0.0299, "step": 7910 }, { "epoch": 0.13375215320701186, "grad_norm": 1.2958219051361084, "learning_rate": 6.687494722620958e-06, "loss": 0.0251, "step": 7920 }, { "epoch": 0.13392103218833384, "grad_norm": 0.6259620189666748, "learning_rate": 6.695938529088913e-06, "loss": 0.0305, "step": 7930 }, { "epoch": 0.13408991116965582, "grad_norm": 0.7308422327041626, "learning_rate": 6.70438233555687e-06, "loss": 0.0302, "step": 7940 }, { "epoch": 0.1342587901509778, "grad_norm": 0.6973051428794861, "learning_rate": 6.712826142024825e-06, "loss": 0.0251, "step": 7950 }, { "epoch": 0.13442766913229978, "grad_norm": 0.7307868599891663, "learning_rate": 6.721269948492781e-06, "loss": 0.0295, "step": 7960 }, { "epoch": 0.1345965481136218, "grad_norm": 0.7114649415016174, "learning_rate": 6.729713754960737e-06, "loss": 0.0274, "step": 7970 }, { "epoch": 0.13476542709494377, "grad_norm": 0.9427970051765442, "learning_rate": 6.738157561428693e-06, "loss": 0.0427, "step": 7980 }, { "epoch": 0.13493430607626575, "grad_norm": 0.7614327073097229, "learning_rate": 6.746601367896648e-06, "loss": 0.0326, "step": 7990 }, { "epoch": 0.13510318505758773, "grad_norm": 1.0472508668899536, "learning_rate": 6.755045174364604e-06, "loss": 0.0372, "step": 8000 }, { "epoch": 0.1352720640389097, "grad_norm": 0.8436040282249451, "learning_rate": 6.76348898083256e-06, "loss": 0.0276, "step": 8010 }, { "epoch": 0.1354409430202317, "grad_norm": 0.9778740406036377, "learning_rate": 6.771932787300516e-06, "loss": 0.0214, "step": 8020 }, { "epoch": 0.1356098220015537, "grad_norm": 0.5791234970092773, "learning_rate": 6.780376593768472e-06, "loss": 0.029, "step": 8030 }, { "epoch": 0.13577870098287567, "grad_norm": 0.9300340414047241, "learning_rate": 6.788820400236427e-06, "loss": 0.0385, "step": 8040 }, { "epoch": 0.13594757996419765, "grad_norm": 1.152604103088379, "learning_rate": 6.7972642067043825e-06, "loss": 0.0234, "step": 8050 }, { "epoch": 0.13611645894551963, "grad_norm": 0.7195177674293518, "learning_rate": 6.805708013172338e-06, "loss": 0.0309, "step": 8060 }, { "epoch": 0.13628533792684164, "grad_norm": 1.0967925786972046, "learning_rate": 6.814151819640295e-06, "loss": 0.0336, "step": 8070 }, { "epoch": 0.13645421690816362, "grad_norm": 0.5536007881164551, "learning_rate": 6.8225956261082505e-06, "loss": 0.0274, "step": 8080 }, { "epoch": 0.1366230958894856, "grad_norm": 0.7264208793640137, "learning_rate": 6.831039432576206e-06, "loss": 0.023, "step": 8090 }, { "epoch": 0.13679197487080758, "grad_norm": 0.6385886073112488, "learning_rate": 6.839483239044161e-06, "loss": 0.029, "step": 8100 }, { "epoch": 0.13696085385212955, "grad_norm": 0.7037984132766724, "learning_rate": 6.847927045512117e-06, "loss": 0.031, "step": 8110 }, { "epoch": 0.13712973283345156, "grad_norm": 0.7837616205215454, "learning_rate": 6.856370851980073e-06, "loss": 0.0218, "step": 8120 }, { "epoch": 0.13729861181477354, "grad_norm": 0.4086551368236542, "learning_rate": 6.864814658448029e-06, "loss": 0.0366, "step": 8130 }, { "epoch": 0.13746749079609552, "grad_norm": 0.5136587619781494, "learning_rate": 6.873258464915985e-06, "loss": 0.0268, "step": 8140 }, { "epoch": 0.1376363697774175, "grad_norm": 0.5345762372016907, "learning_rate": 6.881702271383941e-06, "loss": 0.025, "step": 8150 }, { "epoch": 0.13780524875873948, "grad_norm": 0.5250120162963867, "learning_rate": 6.890146077851895e-06, "loss": 0.0257, "step": 8160 }, { "epoch": 0.13797412774006146, "grad_norm": 0.9793301820755005, "learning_rate": 6.898589884319852e-06, "loss": 0.0243, "step": 8170 }, { "epoch": 0.13814300672138347, "grad_norm": 0.577810525894165, "learning_rate": 6.907033690787808e-06, "loss": 0.0208, "step": 8180 }, { "epoch": 0.13831188570270544, "grad_norm": 0.7518154978752136, "learning_rate": 6.915477497255763e-06, "loss": 0.0269, "step": 8190 }, { "epoch": 0.13848076468402742, "grad_norm": 0.8045996427536011, "learning_rate": 6.92392130372372e-06, "loss": 0.027, "step": 8200 }, { "epoch": 0.1386496436653494, "grad_norm": 1.0903373956680298, "learning_rate": 6.932365110191675e-06, "loss": 0.0301, "step": 8210 }, { "epoch": 0.13881852264667138, "grad_norm": 0.8010530471801758, "learning_rate": 6.9408089166596306e-06, "loss": 0.0227, "step": 8220 }, { "epoch": 0.1389874016279934, "grad_norm": 1.175809383392334, "learning_rate": 6.949252723127586e-06, "loss": 0.0248, "step": 8230 }, { "epoch": 0.13915628060931537, "grad_norm": 0.8808058500289917, "learning_rate": 6.957696529595542e-06, "loss": 0.0315, "step": 8240 }, { "epoch": 0.13932515959063735, "grad_norm": 1.1625251770019531, "learning_rate": 6.9661403360634986e-06, "loss": 0.033, "step": 8250 }, { "epoch": 0.13949403857195933, "grad_norm": 0.8134043216705322, "learning_rate": 6.974584142531454e-06, "loss": 0.0254, "step": 8260 }, { "epoch": 0.1396629175532813, "grad_norm": 0.9934115409851074, "learning_rate": 6.983027948999409e-06, "loss": 0.0276, "step": 8270 }, { "epoch": 0.13983179653460331, "grad_norm": 0.4335698187351227, "learning_rate": 6.991471755467365e-06, "loss": 0.0219, "step": 8280 }, { "epoch": 0.1400006755159253, "grad_norm": 1.3480175733566284, "learning_rate": 6.9999155619353206e-06, "loss": 0.0303, "step": 8290 }, { "epoch": 0.14016955449724727, "grad_norm": 0.7946396470069885, "learning_rate": 7.008359368403277e-06, "loss": 0.0269, "step": 8300 }, { "epoch": 0.14033843347856925, "grad_norm": 0.5268502831459045, "learning_rate": 7.016803174871233e-06, "loss": 0.0233, "step": 8310 }, { "epoch": 0.14050731245989123, "grad_norm": 0.5505070090293884, "learning_rate": 7.0252469813391886e-06, "loss": 0.0258, "step": 8320 }, { "epoch": 0.14067619144121324, "grad_norm": 0.9606903791427612, "learning_rate": 7.0336907878071434e-06, "loss": 0.0268, "step": 8330 }, { "epoch": 0.14084507042253522, "grad_norm": 0.5266547799110413, "learning_rate": 7.0421345942751e-06, "loss": 0.022, "step": 8340 }, { "epoch": 0.1410139494038572, "grad_norm": 0.7518401741981506, "learning_rate": 7.050578400743056e-06, "loss": 0.0206, "step": 8350 }, { "epoch": 0.14118282838517918, "grad_norm": 0.7635805010795593, "learning_rate": 7.0590222072110114e-06, "loss": 0.0304, "step": 8360 }, { "epoch": 0.14135170736650116, "grad_norm": 0.9198780059814453, "learning_rate": 7.067466013678967e-06, "loss": 0.0281, "step": 8370 }, { "epoch": 0.14152058634782316, "grad_norm": 0.7319763898849487, "learning_rate": 7.075909820146922e-06, "loss": 0.0318, "step": 8380 }, { "epoch": 0.14168946532914514, "grad_norm": 0.5521085858345032, "learning_rate": 7.084353626614879e-06, "loss": 0.0297, "step": 8390 }, { "epoch": 0.14185834431046712, "grad_norm": 0.6080648899078369, "learning_rate": 7.092797433082834e-06, "loss": 0.0261, "step": 8400 }, { "epoch": 0.1420272232917891, "grad_norm": 0.6619170308113098, "learning_rate": 7.10124123955079e-06, "loss": 0.0215, "step": 8410 }, { "epoch": 0.14219610227311108, "grad_norm": 0.5255106687545776, "learning_rate": 7.109685046018746e-06, "loss": 0.0278, "step": 8420 }, { "epoch": 0.14236498125443306, "grad_norm": 0.7859612703323364, "learning_rate": 7.118128852486702e-06, "loss": 0.0279, "step": 8430 }, { "epoch": 0.14253386023575507, "grad_norm": 0.9534808397293091, "learning_rate": 7.126572658954657e-06, "loss": 0.0285, "step": 8440 }, { "epoch": 0.14270273921707705, "grad_norm": 0.38796234130859375, "learning_rate": 7.135016465422613e-06, "loss": 0.0312, "step": 8450 }, { "epoch": 0.14287161819839903, "grad_norm": 0.8625936508178711, "learning_rate": 7.143460271890569e-06, "loss": 0.0317, "step": 8460 }, { "epoch": 0.143040497179721, "grad_norm": 0.48165667057037354, "learning_rate": 7.151904078358524e-06, "loss": 0.026, "step": 8470 }, { "epoch": 0.14320937616104298, "grad_norm": 0.564396321773529, "learning_rate": 7.160347884826481e-06, "loss": 0.0266, "step": 8480 }, { "epoch": 0.143378255142365, "grad_norm": 0.4453924894332886, "learning_rate": 7.168791691294437e-06, "loss": 0.0234, "step": 8490 }, { "epoch": 0.14354713412368697, "grad_norm": 0.6605733036994934, "learning_rate": 7.1772354977623915e-06, "loss": 0.0253, "step": 8500 }, { "epoch": 0.14371601310500895, "grad_norm": 0.7302463054656982, "learning_rate": 7.185679304230347e-06, "loss": 0.0194, "step": 8510 }, { "epoch": 0.14388489208633093, "grad_norm": 0.6695296764373779, "learning_rate": 7.194123110698304e-06, "loss": 0.0333, "step": 8520 }, { "epoch": 0.1440537710676529, "grad_norm": 0.7077109217643738, "learning_rate": 7.2025669171662595e-06, "loss": 0.0259, "step": 8530 }, { "epoch": 0.14422265004897492, "grad_norm": 0.6012173891067505, "learning_rate": 7.211010723634215e-06, "loss": 0.0259, "step": 8540 }, { "epoch": 0.1443915290302969, "grad_norm": 0.6956303119659424, "learning_rate": 7.21945453010217e-06, "loss": 0.0271, "step": 8550 }, { "epoch": 0.14456040801161887, "grad_norm": 0.5428749322891235, "learning_rate": 7.227898336570126e-06, "loss": 0.0292, "step": 8560 }, { "epoch": 0.14472928699294085, "grad_norm": 0.5987834334373474, "learning_rate": 7.236342143038082e-06, "loss": 0.0231, "step": 8570 }, { "epoch": 0.14489816597426283, "grad_norm": 0.4678768217563629, "learning_rate": 7.244785949506038e-06, "loss": 0.0265, "step": 8580 }, { "epoch": 0.14506704495558484, "grad_norm": 0.5171623826026917, "learning_rate": 7.253229755973994e-06, "loss": 0.0314, "step": 8590 }, { "epoch": 0.14523592393690682, "grad_norm": 0.5761619806289673, "learning_rate": 7.2616735624419495e-06, "loss": 0.0199, "step": 8600 }, { "epoch": 0.1454048029182288, "grad_norm": 0.9877454042434692, "learning_rate": 7.270117368909904e-06, "loss": 0.0363, "step": 8610 }, { "epoch": 0.14557368189955078, "grad_norm": 0.9053419828414917, "learning_rate": 7.278561175377861e-06, "loss": 0.0289, "step": 8620 }, { "epoch": 0.14574256088087276, "grad_norm": 0.7025904655456543, "learning_rate": 7.287004981845817e-06, "loss": 0.032, "step": 8630 }, { "epoch": 0.14591143986219476, "grad_norm": 0.9373230338096619, "learning_rate": 7.295448788313772e-06, "loss": 0.0265, "step": 8640 }, { "epoch": 0.14608031884351674, "grad_norm": 0.7615880370140076, "learning_rate": 7.303892594781729e-06, "loss": 0.0218, "step": 8650 }, { "epoch": 0.14624919782483872, "grad_norm": 0.5171211957931519, "learning_rate": 7.312336401249685e-06, "loss": 0.0199, "step": 8660 }, { "epoch": 0.1464180768061607, "grad_norm": 1.0446025133132935, "learning_rate": 7.3207802077176395e-06, "loss": 0.0222, "step": 8670 }, { "epoch": 0.14658695578748268, "grad_norm": 0.7181363105773926, "learning_rate": 7.329224014185595e-06, "loss": 0.0229, "step": 8680 }, { "epoch": 0.1467558347688047, "grad_norm": 0.7750243544578552, "learning_rate": 7.337667820653551e-06, "loss": 0.0285, "step": 8690 }, { "epoch": 0.14692471375012667, "grad_norm": 0.5968472957611084, "learning_rate": 7.3461116271215075e-06, "loss": 0.0206, "step": 8700 }, { "epoch": 0.14709359273144865, "grad_norm": 0.3912679851055145, "learning_rate": 7.354555433589463e-06, "loss": 0.024, "step": 8710 }, { "epoch": 0.14726247171277063, "grad_norm": 0.7991706728935242, "learning_rate": 7.362999240057418e-06, "loss": 0.0311, "step": 8720 }, { "epoch": 0.1474313506940926, "grad_norm": 1.0436598062515259, "learning_rate": 7.371443046525374e-06, "loss": 0.0271, "step": 8730 }, { "epoch": 0.14760022967541458, "grad_norm": 0.7234228849411011, "learning_rate": 7.3798868529933295e-06, "loss": 0.0297, "step": 8740 }, { "epoch": 0.1477691086567366, "grad_norm": 0.8511013388633728, "learning_rate": 7.388330659461286e-06, "loss": 0.0241, "step": 8750 }, { "epoch": 0.14793798763805857, "grad_norm": 0.7389883995056152, "learning_rate": 7.396774465929242e-06, "loss": 0.0198, "step": 8760 }, { "epoch": 0.14810686661938055, "grad_norm": 0.6432809233665466, "learning_rate": 7.4052182723971975e-06, "loss": 0.0281, "step": 8770 }, { "epoch": 0.14827574560070253, "grad_norm": 0.46126535534858704, "learning_rate": 7.413662078865152e-06, "loss": 0.0331, "step": 8780 }, { "epoch": 0.1484446245820245, "grad_norm": 0.45681968331336975, "learning_rate": 7.422105885333108e-06, "loss": 0.0209, "step": 8790 }, { "epoch": 0.14861350356334652, "grad_norm": 0.6761797070503235, "learning_rate": 7.430549691801065e-06, "loss": 0.0262, "step": 8800 }, { "epoch": 0.1487823825446685, "grad_norm": 0.9811164736747742, "learning_rate": 7.43899349826902e-06, "loss": 0.0234, "step": 8810 }, { "epoch": 0.14895126152599047, "grad_norm": 0.603226363658905, "learning_rate": 7.447437304736976e-06, "loss": 0.0289, "step": 8820 }, { "epoch": 0.14912014050731245, "grad_norm": 0.8395642042160034, "learning_rate": 7.455881111204933e-06, "loss": 0.0252, "step": 8830 }, { "epoch": 0.14928901948863443, "grad_norm": 0.5211999416351318, "learning_rate": 7.4643249176728875e-06, "loss": 0.0277, "step": 8840 }, { "epoch": 0.14945789846995644, "grad_norm": 0.8333593010902405, "learning_rate": 7.472768724140843e-06, "loss": 0.0244, "step": 8850 }, { "epoch": 0.14962677745127842, "grad_norm": 0.5337498784065247, "learning_rate": 7.481212530608799e-06, "loss": 0.0225, "step": 8860 }, { "epoch": 0.1497956564326004, "grad_norm": 1.7065420150756836, "learning_rate": 7.489656337076755e-06, "loss": 0.0194, "step": 8870 }, { "epoch": 0.14996453541392238, "grad_norm": 0.5083616375923157, "learning_rate": 7.498100143544711e-06, "loss": 0.0195, "step": 8880 }, { "epoch": 0.15013341439524436, "grad_norm": 0.6260021328926086, "learning_rate": 7.506543950012666e-06, "loss": 0.029, "step": 8890 }, { "epoch": 0.15030229337656636, "grad_norm": 0.6546813249588013, "learning_rate": 7.514987756480622e-06, "loss": 0.0178, "step": 8900 }, { "epoch": 0.15047117235788834, "grad_norm": 1.0041747093200684, "learning_rate": 7.5234315629485776e-06, "loss": 0.0241, "step": 8910 }, { "epoch": 0.15064005133921032, "grad_norm": 0.7132732272148132, "learning_rate": 7.531875369416533e-06, "loss": 0.0295, "step": 8920 }, { "epoch": 0.1508089303205323, "grad_norm": 1.2364150285720825, "learning_rate": 7.54031917588449e-06, "loss": 0.0319, "step": 8930 }, { "epoch": 0.15097780930185428, "grad_norm": 0.7450812458992004, "learning_rate": 7.5487629823524456e-06, "loss": 0.0253, "step": 8940 }, { "epoch": 0.1511466882831763, "grad_norm": 1.1177706718444824, "learning_rate": 7.5572067888204004e-06, "loss": 0.0237, "step": 8950 }, { "epoch": 0.15131556726449827, "grad_norm": 1.196389079093933, "learning_rate": 7.565650595288356e-06, "loss": 0.0277, "step": 8960 }, { "epoch": 0.15148444624582025, "grad_norm": 1.1744909286499023, "learning_rate": 7.574094401756312e-06, "loss": 0.0238, "step": 8970 }, { "epoch": 0.15165332522714223, "grad_norm": 0.417736679315567, "learning_rate": 7.582538208224268e-06, "loss": 0.025, "step": 8980 }, { "epoch": 0.1518222042084642, "grad_norm": 0.42292356491088867, "learning_rate": 7.590982014692224e-06, "loss": 0.0232, "step": 8990 }, { "epoch": 0.15199108318978619, "grad_norm": 0.942428708076477, "learning_rate": 7.59942582116018e-06, "loss": 0.0293, "step": 9000 }, { "epoch": 0.1521599621711082, "grad_norm": 0.6466562747955322, "learning_rate": 7.607869627628135e-06, "loss": 0.033, "step": 9010 }, { "epoch": 0.15232884115243017, "grad_norm": 1.459081768989563, "learning_rate": 7.616313434096091e-06, "loss": 0.0283, "step": 9020 }, { "epoch": 0.15249772013375215, "grad_norm": 0.659103274345398, "learning_rate": 7.624757240564047e-06, "loss": 0.0289, "step": 9030 }, { "epoch": 0.15266659911507413, "grad_norm": 0.789659321308136, "learning_rate": 7.633201047032004e-06, "loss": 0.0229, "step": 9040 }, { "epoch": 0.1528354780963961, "grad_norm": 0.5552644729614258, "learning_rate": 7.64164485349996e-06, "loss": 0.0314, "step": 9050 }, { "epoch": 0.15300435707771812, "grad_norm": 0.38360726833343506, "learning_rate": 7.650088659967913e-06, "loss": 0.0262, "step": 9060 }, { "epoch": 0.1531732360590401, "grad_norm": 1.0132044553756714, "learning_rate": 7.658532466435869e-06, "loss": 0.0227, "step": 9070 }, { "epoch": 0.15334211504036208, "grad_norm": 0.47871652245521545, "learning_rate": 7.666976272903825e-06, "loss": 0.0217, "step": 9080 }, { "epoch": 0.15351099402168406, "grad_norm": 0.7566169500350952, "learning_rate": 7.675420079371782e-06, "loss": 0.0207, "step": 9090 }, { "epoch": 0.15367987300300603, "grad_norm": 0.7285457253456116, "learning_rate": 7.683863885839738e-06, "loss": 0.0249, "step": 9100 }, { "epoch": 0.15384875198432804, "grad_norm": 1.1430641412734985, "learning_rate": 7.692307692307694e-06, "loss": 0.0311, "step": 9110 }, { "epoch": 0.15401763096565002, "grad_norm": 0.8826469779014587, "learning_rate": 7.700751498775648e-06, "loss": 0.0344, "step": 9120 }, { "epoch": 0.154186509946972, "grad_norm": 0.49296239018440247, "learning_rate": 7.709195305243603e-06, "loss": 0.0233, "step": 9130 }, { "epoch": 0.15435538892829398, "grad_norm": 0.6690921783447266, "learning_rate": 7.71763911171156e-06, "loss": 0.0201, "step": 9140 }, { "epoch": 0.15452426790961596, "grad_norm": 0.4823167324066162, "learning_rate": 7.726082918179516e-06, "loss": 0.0237, "step": 9150 }, { "epoch": 0.15469314689093797, "grad_norm": 0.8466058373451233, "learning_rate": 7.734526724647472e-06, "loss": 0.0293, "step": 9160 }, { "epoch": 0.15486202587225995, "grad_norm": 1.1035593748092651, "learning_rate": 7.742970531115428e-06, "loss": 0.0189, "step": 9170 }, { "epoch": 0.15503090485358192, "grad_norm": 0.5730131268501282, "learning_rate": 7.751414337583384e-06, "loss": 0.0269, "step": 9180 }, { "epoch": 0.1551997838349039, "grad_norm": 0.8047856688499451, "learning_rate": 7.75985814405134e-06, "loss": 0.0243, "step": 9190 }, { "epoch": 0.15536866281622588, "grad_norm": 0.6468626260757446, "learning_rate": 7.768301950519295e-06, "loss": 0.0272, "step": 9200 }, { "epoch": 0.1555375417975479, "grad_norm": 0.7310505509376526, "learning_rate": 7.77674575698725e-06, "loss": 0.0263, "step": 9210 }, { "epoch": 0.15570642077886987, "grad_norm": 0.8928807377815247, "learning_rate": 7.785189563455206e-06, "loss": 0.0284, "step": 9220 }, { "epoch": 0.15587529976019185, "grad_norm": 0.8265830874443054, "learning_rate": 7.793633369923162e-06, "loss": 0.0355, "step": 9230 }, { "epoch": 0.15604417874151383, "grad_norm": 0.7041018009185791, "learning_rate": 7.802077176391118e-06, "loss": 0.0246, "step": 9240 }, { "epoch": 0.1562130577228358, "grad_norm": 0.5974507927894592, "learning_rate": 7.810520982859074e-06, "loss": 0.0273, "step": 9250 }, { "epoch": 0.1563819367041578, "grad_norm": 0.6266682744026184, "learning_rate": 7.81896478932703e-06, "loss": 0.0189, "step": 9260 }, { "epoch": 0.1565508156854798, "grad_norm": 0.865249514579773, "learning_rate": 7.827408595794985e-06, "loss": 0.029, "step": 9270 }, { "epoch": 0.15671969466680177, "grad_norm": 0.7133133411407471, "learning_rate": 7.83585240226294e-06, "loss": 0.0293, "step": 9280 }, { "epoch": 0.15688857364812375, "grad_norm": 0.5880972743034363, "learning_rate": 7.844296208730896e-06, "loss": 0.0192, "step": 9290 }, { "epoch": 0.15705745262944573, "grad_norm": 0.4244532585144043, "learning_rate": 7.852740015198852e-06, "loss": 0.0243, "step": 9300 }, { "epoch": 0.1572263316107677, "grad_norm": 0.5400128364562988, "learning_rate": 7.861183821666808e-06, "loss": 0.0241, "step": 9310 }, { "epoch": 0.15739521059208972, "grad_norm": 0.8132038116455078, "learning_rate": 7.869627628134764e-06, "loss": 0.027, "step": 9320 }, { "epoch": 0.1575640895734117, "grad_norm": 0.6960012316703796, "learning_rate": 7.87807143460272e-06, "loss": 0.0239, "step": 9330 }, { "epoch": 0.15773296855473368, "grad_norm": 0.5879611968994141, "learning_rate": 7.886515241070675e-06, "loss": 0.0291, "step": 9340 }, { "epoch": 0.15790184753605566, "grad_norm": 0.77744460105896, "learning_rate": 7.89495904753863e-06, "loss": 0.0267, "step": 9350 }, { "epoch": 0.15807072651737764, "grad_norm": 0.8511924743652344, "learning_rate": 7.903402854006587e-06, "loss": 0.0182, "step": 9360 }, { "epoch": 0.15823960549869964, "grad_norm": 0.6264917254447937, "learning_rate": 7.911846660474542e-06, "loss": 0.0205, "step": 9370 }, { "epoch": 0.15840848448002162, "grad_norm": 0.6096451282501221, "learning_rate": 7.920290466942498e-06, "loss": 0.032, "step": 9380 }, { "epoch": 0.1585773634613436, "grad_norm": 0.6685690879821777, "learning_rate": 7.928734273410454e-06, "loss": 0.0186, "step": 9390 }, { "epoch": 0.15874624244266558, "grad_norm": 0.4904281497001648, "learning_rate": 7.937178079878411e-06, "loss": 0.0341, "step": 9400 }, { "epoch": 0.15891512142398756, "grad_norm": 1.175802230834961, "learning_rate": 7.945621886346365e-06, "loss": 0.0334, "step": 9410 }, { "epoch": 0.15908400040530957, "grad_norm": 0.8451055288314819, "learning_rate": 7.95406569281432e-06, "loss": 0.0308, "step": 9420 }, { "epoch": 0.15925287938663155, "grad_norm": 0.7504803538322449, "learning_rate": 7.962509499282277e-06, "loss": 0.0184, "step": 9430 }, { "epoch": 0.15942175836795353, "grad_norm": 0.5174987316131592, "learning_rate": 7.970953305750232e-06, "loss": 0.023, "step": 9440 }, { "epoch": 0.1595906373492755, "grad_norm": 0.656176745891571, "learning_rate": 7.97939711221819e-06, "loss": 0.0304, "step": 9450 }, { "epoch": 0.15975951633059748, "grad_norm": 0.8219150900840759, "learning_rate": 7.987840918686144e-06, "loss": 0.0225, "step": 9460 }, { "epoch": 0.1599283953119195, "grad_norm": 0.7753942608833313, "learning_rate": 7.9962847251541e-06, "loss": 0.0251, "step": 9470 }, { "epoch": 0.16009727429324147, "grad_norm": 0.7428410053253174, "learning_rate": 8.004728531622055e-06, "loss": 0.0207, "step": 9480 }, { "epoch": 0.16026615327456345, "grad_norm": 1.1648149490356445, "learning_rate": 8.013172338090013e-06, "loss": 0.0208, "step": 9490 }, { "epoch": 0.16043503225588543, "grad_norm": 0.698803722858429, "learning_rate": 8.021616144557968e-06, "loss": 0.024, "step": 9500 }, { "epoch": 0.1606039112372074, "grad_norm": 0.7957724928855896, "learning_rate": 8.030059951025924e-06, "loss": 0.0215, "step": 9510 }, { "epoch": 0.16077279021852942, "grad_norm": 1.0153863430023193, "learning_rate": 8.038503757493878e-06, "loss": 0.0262, "step": 9520 }, { "epoch": 0.1609416691998514, "grad_norm": 1.0146210193634033, "learning_rate": 8.046947563961834e-06, "loss": 0.022, "step": 9530 }, { "epoch": 0.16111054818117337, "grad_norm": 0.7417749762535095, "learning_rate": 8.055391370429791e-06, "loss": 0.0274, "step": 9540 }, { "epoch": 0.16127942716249535, "grad_norm": 1.140822172164917, "learning_rate": 8.063835176897747e-06, "loss": 0.0291, "step": 9550 }, { "epoch": 0.16144830614381733, "grad_norm": 0.5979601740837097, "learning_rate": 8.072278983365703e-06, "loss": 0.0183, "step": 9560 }, { "epoch": 0.1616171851251393, "grad_norm": 0.6509764790534973, "learning_rate": 8.080722789833658e-06, "loss": 0.0238, "step": 9570 }, { "epoch": 0.16178606410646132, "grad_norm": 0.836624801158905, "learning_rate": 8.089166596301612e-06, "loss": 0.0336, "step": 9580 }, { "epoch": 0.1619549430877833, "grad_norm": 0.6009635925292969, "learning_rate": 8.09761040276957e-06, "loss": 0.0196, "step": 9590 }, { "epoch": 0.16212382206910528, "grad_norm": 0.5371243953704834, "learning_rate": 8.106054209237525e-06, "loss": 0.0204, "step": 9600 }, { "epoch": 0.16229270105042726, "grad_norm": 0.8353222608566284, "learning_rate": 8.114498015705481e-06, "loss": 0.0343, "step": 9610 }, { "epoch": 0.16246158003174924, "grad_norm": 0.4745965600013733, "learning_rate": 8.122941822173437e-06, "loss": 0.0213, "step": 9620 }, { "epoch": 0.16263045901307124, "grad_norm": 1.6374354362487793, "learning_rate": 8.131385628641391e-06, "loss": 0.0214, "step": 9630 }, { "epoch": 0.16279933799439322, "grad_norm": 0.5918369293212891, "learning_rate": 8.139829435109348e-06, "loss": 0.0231, "step": 9640 }, { "epoch": 0.1629682169757152, "grad_norm": 0.8262466192245483, "learning_rate": 8.148273241577304e-06, "loss": 0.0155, "step": 9650 }, { "epoch": 0.16313709595703718, "grad_norm": 0.6775799989700317, "learning_rate": 8.15671704804526e-06, "loss": 0.0192, "step": 9660 }, { "epoch": 0.16330597493835916, "grad_norm": 0.667338490486145, "learning_rate": 8.165160854513215e-06, "loss": 0.027, "step": 9670 }, { "epoch": 0.16347485391968117, "grad_norm": 0.9058126211166382, "learning_rate": 8.173604660981171e-06, "loss": 0.0309, "step": 9680 }, { "epoch": 0.16364373290100315, "grad_norm": 0.46606990694999695, "learning_rate": 8.182048467449127e-06, "loss": 0.0245, "step": 9690 }, { "epoch": 0.16381261188232513, "grad_norm": 0.6624146699905396, "learning_rate": 8.190492273917083e-06, "loss": 0.0195, "step": 9700 }, { "epoch": 0.1639814908636471, "grad_norm": 0.5917381048202515, "learning_rate": 8.198936080385038e-06, "loss": 0.0248, "step": 9710 }, { "epoch": 0.16415036984496909, "grad_norm": 0.6708534955978394, "learning_rate": 8.207379886852994e-06, "loss": 0.0239, "step": 9720 }, { "epoch": 0.1643192488262911, "grad_norm": 0.42430436611175537, "learning_rate": 8.21582369332095e-06, "loss": 0.0283, "step": 9730 }, { "epoch": 0.16448812780761307, "grad_norm": 0.8212451338768005, "learning_rate": 8.224267499788905e-06, "loss": 0.0243, "step": 9740 }, { "epoch": 0.16465700678893505, "grad_norm": 0.38725754618644714, "learning_rate": 8.232711306256861e-06, "loss": 0.0291, "step": 9750 }, { "epoch": 0.16482588577025703, "grad_norm": 0.5020327568054199, "learning_rate": 8.241155112724817e-06, "loss": 0.0249, "step": 9760 }, { "epoch": 0.164994764751579, "grad_norm": 0.48994648456573486, "learning_rate": 8.249598919192773e-06, "loss": 0.02, "step": 9770 }, { "epoch": 0.16516364373290102, "grad_norm": 0.6430173516273499, "learning_rate": 8.258042725660728e-06, "loss": 0.0203, "step": 9780 }, { "epoch": 0.165332522714223, "grad_norm": 0.397400826215744, "learning_rate": 8.266486532128684e-06, "loss": 0.0243, "step": 9790 }, { "epoch": 0.16550140169554498, "grad_norm": 0.7359941005706787, "learning_rate": 8.27493033859664e-06, "loss": 0.0225, "step": 9800 }, { "epoch": 0.16567028067686695, "grad_norm": 0.6520860195159912, "learning_rate": 8.283374145064595e-06, "loss": 0.0201, "step": 9810 }, { "epoch": 0.16583915965818893, "grad_norm": 0.4707304835319519, "learning_rate": 8.291817951532551e-06, "loss": 0.0255, "step": 9820 }, { "epoch": 0.1660080386395109, "grad_norm": 0.6044006943702698, "learning_rate": 8.300261758000507e-06, "loss": 0.0262, "step": 9830 }, { "epoch": 0.16617691762083292, "grad_norm": 0.5344979763031006, "learning_rate": 8.308705564468463e-06, "loss": 0.0194, "step": 9840 }, { "epoch": 0.1663457966021549, "grad_norm": 0.5736908316612244, "learning_rate": 8.31714937093642e-06, "loss": 0.0206, "step": 9850 }, { "epoch": 0.16651467558347688, "grad_norm": 0.786710262298584, "learning_rate": 8.325593177404374e-06, "loss": 0.025, "step": 9860 }, { "epoch": 0.16668355456479886, "grad_norm": 0.7198952436447144, "learning_rate": 8.33403698387233e-06, "loss": 0.0286, "step": 9870 }, { "epoch": 0.16685243354612084, "grad_norm": 0.8855789303779602, "learning_rate": 8.342480790340285e-06, "loss": 0.0323, "step": 9880 }, { "epoch": 0.16702131252744284, "grad_norm": 0.9888249635696411, "learning_rate": 8.350924596808241e-06, "loss": 0.0218, "step": 9890 }, { "epoch": 0.16719019150876482, "grad_norm": 1.0433447360992432, "learning_rate": 8.359368403276199e-06, "loss": 0.03, "step": 9900 }, { "epoch": 0.1673590704900868, "grad_norm": 0.8720406293869019, "learning_rate": 8.367812209744154e-06, "loss": 0.0166, "step": 9910 }, { "epoch": 0.16752794947140878, "grad_norm": 0.3344874382019043, "learning_rate": 8.376256016212108e-06, "loss": 0.0178, "step": 9920 }, { "epoch": 0.16769682845273076, "grad_norm": 1.2925947904586792, "learning_rate": 8.384699822680064e-06, "loss": 0.0316, "step": 9930 }, { "epoch": 0.16786570743405277, "grad_norm": 0.8986530303955078, "learning_rate": 8.39314362914802e-06, "loss": 0.0224, "step": 9940 }, { "epoch": 0.16803458641537475, "grad_norm": 0.5507974028587341, "learning_rate": 8.401587435615977e-06, "loss": 0.0176, "step": 9950 }, { "epoch": 0.16820346539669673, "grad_norm": 1.0177147388458252, "learning_rate": 8.410031242083933e-06, "loss": 0.0229, "step": 9960 }, { "epoch": 0.1683723443780187, "grad_norm": 0.5627002716064453, "learning_rate": 8.418475048551887e-06, "loss": 0.022, "step": 9970 }, { "epoch": 0.16854122335934069, "grad_norm": 0.6353285908699036, "learning_rate": 8.426918855019843e-06, "loss": 0.0233, "step": 9980 }, { "epoch": 0.1687101023406627, "grad_norm": 0.894352376461029, "learning_rate": 8.4353626614878e-06, "loss": 0.0321, "step": 9990 }, { "epoch": 0.16887898132198467, "grad_norm": 0.8122364282608032, "learning_rate": 8.443806467955756e-06, "loss": 0.031, "step": 10000 }, { "epoch": 0.16904786030330665, "grad_norm": 0.7611676454544067, "learning_rate": 8.452250274423711e-06, "loss": 0.0317, "step": 10010 }, { "epoch": 0.16921673928462863, "grad_norm": 0.6512179970741272, "learning_rate": 8.460694080891667e-06, "loss": 0.0254, "step": 10020 }, { "epoch": 0.1693856182659506, "grad_norm": 0.5101268887519836, "learning_rate": 8.469137887359621e-06, "loss": 0.0216, "step": 10030 }, { "epoch": 0.16955449724727262, "grad_norm": 0.6685275435447693, "learning_rate": 8.477581693827579e-06, "loss": 0.0317, "step": 10040 }, { "epoch": 0.1697233762285946, "grad_norm": 0.2907133996486664, "learning_rate": 8.486025500295534e-06, "loss": 0.017, "step": 10050 }, { "epoch": 0.16989225520991658, "grad_norm": 0.6469211578369141, "learning_rate": 8.49446930676349e-06, "loss": 0.0303, "step": 10060 }, { "epoch": 0.17006113419123856, "grad_norm": 0.5327174663543701, "learning_rate": 8.502913113231446e-06, "loss": 0.0226, "step": 10070 }, { "epoch": 0.17023001317256053, "grad_norm": 0.5200572609901428, "learning_rate": 8.511356919699402e-06, "loss": 0.0185, "step": 10080 }, { "epoch": 0.17039889215388251, "grad_norm": 0.5748829245567322, "learning_rate": 8.519800726167357e-06, "loss": 0.0353, "step": 10090 }, { "epoch": 0.17056777113520452, "grad_norm": 0.6301975846290588, "learning_rate": 8.528244532635313e-06, "loss": 0.0196, "step": 10100 }, { "epoch": 0.1707366501165265, "grad_norm": 0.704617440700531, "learning_rate": 8.536688339103269e-06, "loss": 0.0271, "step": 10110 }, { "epoch": 0.17090552909784848, "grad_norm": 0.5856525301933289, "learning_rate": 8.545132145571224e-06, "loss": 0.0236, "step": 10120 }, { "epoch": 0.17107440807917046, "grad_norm": 1.631162405014038, "learning_rate": 8.55357595203918e-06, "loss": 0.0186, "step": 10130 }, { "epoch": 0.17124328706049244, "grad_norm": 0.8358505964279175, "learning_rate": 8.562019758507136e-06, "loss": 0.0298, "step": 10140 }, { "epoch": 0.17141216604181445, "grad_norm": 0.7410273551940918, "learning_rate": 8.570463564975092e-06, "loss": 0.0274, "step": 10150 }, { "epoch": 0.17158104502313642, "grad_norm": 0.567267119884491, "learning_rate": 8.578907371443047e-06, "loss": 0.0195, "step": 10160 }, { "epoch": 0.1717499240044584, "grad_norm": 0.4039730727672577, "learning_rate": 8.587351177911003e-06, "loss": 0.0225, "step": 10170 }, { "epoch": 0.17191880298578038, "grad_norm": 0.49185991287231445, "learning_rate": 8.595794984378959e-06, "loss": 0.0209, "step": 10180 }, { "epoch": 0.17208768196710236, "grad_norm": 1.0941450595855713, "learning_rate": 8.604238790846914e-06, "loss": 0.0273, "step": 10190 }, { "epoch": 0.17225656094842437, "grad_norm": 0.6085687875747681, "learning_rate": 8.61268259731487e-06, "loss": 0.021, "step": 10200 }, { "epoch": 0.17242543992974635, "grad_norm": 1.1875650882720947, "learning_rate": 8.621126403782826e-06, "loss": 0.0254, "step": 10210 }, { "epoch": 0.17259431891106833, "grad_norm": 0.8196940422058105, "learning_rate": 8.629570210250782e-06, "loss": 0.021, "step": 10220 }, { "epoch": 0.1727631978923903, "grad_norm": 0.5267223119735718, "learning_rate": 8.638014016718737e-06, "loss": 0.0267, "step": 10230 }, { "epoch": 0.1729320768737123, "grad_norm": 0.5580756664276123, "learning_rate": 8.646457823186693e-06, "loss": 0.0252, "step": 10240 }, { "epoch": 0.1731009558550343, "grad_norm": 0.47954678535461426, "learning_rate": 8.654901629654649e-06, "loss": 0.0246, "step": 10250 }, { "epoch": 0.17326983483635627, "grad_norm": 0.8336713314056396, "learning_rate": 8.663345436122604e-06, "loss": 0.0253, "step": 10260 }, { "epoch": 0.17343871381767825, "grad_norm": 0.6275038123130798, "learning_rate": 8.67178924259056e-06, "loss": 0.0294, "step": 10270 }, { "epoch": 0.17360759279900023, "grad_norm": 0.6810721158981323, "learning_rate": 8.680233049058516e-06, "loss": 0.0245, "step": 10280 }, { "epoch": 0.1737764717803222, "grad_norm": 0.820842444896698, "learning_rate": 8.688676855526472e-06, "loss": 0.0283, "step": 10290 }, { "epoch": 0.17394535076164422, "grad_norm": 0.5076444745063782, "learning_rate": 8.697120661994429e-06, "loss": 0.0214, "step": 10300 }, { "epoch": 0.1741142297429662, "grad_norm": 0.6619169116020203, "learning_rate": 8.705564468462383e-06, "loss": 0.0196, "step": 10310 }, { "epoch": 0.17428310872428818, "grad_norm": 0.41596201062202454, "learning_rate": 8.714008274930339e-06, "loss": 0.0188, "step": 10320 }, { "epoch": 0.17445198770561016, "grad_norm": 0.608745276927948, "learning_rate": 8.722452081398294e-06, "loss": 0.0265, "step": 10330 }, { "epoch": 0.17462086668693214, "grad_norm": 0.36283841729164124, "learning_rate": 8.73089588786625e-06, "loss": 0.0245, "step": 10340 }, { "epoch": 0.17478974566825414, "grad_norm": 0.3668351173400879, "learning_rate": 8.739339694334208e-06, "loss": 0.0233, "step": 10350 }, { "epoch": 0.17495862464957612, "grad_norm": 0.4525785744190216, "learning_rate": 8.747783500802163e-06, "loss": 0.0175, "step": 10360 }, { "epoch": 0.1751275036308981, "grad_norm": 0.470765620470047, "learning_rate": 8.756227307270117e-06, "loss": 0.0169, "step": 10370 }, { "epoch": 0.17529638261222008, "grad_norm": 1.1896953582763672, "learning_rate": 8.764671113738073e-06, "loss": 0.0292, "step": 10380 }, { "epoch": 0.17546526159354206, "grad_norm": 0.5696656703948975, "learning_rate": 8.773114920206029e-06, "loss": 0.0211, "step": 10390 }, { "epoch": 0.17563414057486404, "grad_norm": 0.4538893401622772, "learning_rate": 8.781558726673986e-06, "loss": 0.0206, "step": 10400 }, { "epoch": 0.17580301955618605, "grad_norm": 0.47685733437538147, "learning_rate": 8.790002533141942e-06, "loss": 0.0289, "step": 10410 }, { "epoch": 0.17597189853750803, "grad_norm": 0.5298289656639099, "learning_rate": 8.798446339609898e-06, "loss": 0.0261, "step": 10420 }, { "epoch": 0.17614077751883, "grad_norm": 0.7344599962234497, "learning_rate": 8.806890146077852e-06, "loss": 0.0295, "step": 10430 }, { "epoch": 0.17630965650015198, "grad_norm": 0.8257589340209961, "learning_rate": 8.815333952545807e-06, "loss": 0.0285, "step": 10440 }, { "epoch": 0.17647853548147396, "grad_norm": 0.9098700284957886, "learning_rate": 8.823777759013765e-06, "loss": 0.0298, "step": 10450 }, { "epoch": 0.17664741446279597, "grad_norm": 0.6701042652130127, "learning_rate": 8.83222156548172e-06, "loss": 0.0218, "step": 10460 }, { "epoch": 0.17681629344411795, "grad_norm": 0.5204718708992004, "learning_rate": 8.840665371949676e-06, "loss": 0.0194, "step": 10470 }, { "epoch": 0.17698517242543993, "grad_norm": 0.6172491312026978, "learning_rate": 8.84910917841763e-06, "loss": 0.0149, "step": 10480 }, { "epoch": 0.1771540514067619, "grad_norm": 0.3661925196647644, "learning_rate": 8.857552984885588e-06, "loss": 0.0282, "step": 10490 }, { "epoch": 0.1773229303880839, "grad_norm": 0.5069329142570496, "learning_rate": 8.865996791353543e-06, "loss": 0.0265, "step": 10500 }, { "epoch": 0.1774918093694059, "grad_norm": 0.5684731006622314, "learning_rate": 8.874440597821499e-06, "loss": 0.0243, "step": 10510 }, { "epoch": 0.17766068835072787, "grad_norm": 0.7573150396347046, "learning_rate": 8.882884404289455e-06, "loss": 0.0259, "step": 10520 }, { "epoch": 0.17782956733204985, "grad_norm": 0.3337867856025696, "learning_rate": 8.89132821075741e-06, "loss": 0.0166, "step": 10530 }, { "epoch": 0.17799844631337183, "grad_norm": 0.5824311971664429, "learning_rate": 8.899772017225366e-06, "loss": 0.0264, "step": 10540 }, { "epoch": 0.1781673252946938, "grad_norm": 1.268000602722168, "learning_rate": 8.908215823693322e-06, "loss": 0.0274, "step": 10550 }, { "epoch": 0.17833620427601582, "grad_norm": 0.6390296220779419, "learning_rate": 8.916659630161278e-06, "loss": 0.0264, "step": 10560 }, { "epoch": 0.1785050832573378, "grad_norm": 0.4512906074523926, "learning_rate": 8.925103436629233e-06, "loss": 0.0194, "step": 10570 }, { "epoch": 0.17867396223865978, "grad_norm": 0.8487885594367981, "learning_rate": 8.933547243097189e-06, "loss": 0.0196, "step": 10580 }, { "epoch": 0.17884284121998176, "grad_norm": 0.39800211787223816, "learning_rate": 8.941991049565145e-06, "loss": 0.0288, "step": 10590 }, { "epoch": 0.17901172020130374, "grad_norm": 0.7670180201530457, "learning_rate": 8.9504348560331e-06, "loss": 0.0287, "step": 10600 }, { "epoch": 0.17918059918262574, "grad_norm": 0.7117230892181396, "learning_rate": 8.958878662501056e-06, "loss": 0.0241, "step": 10610 }, { "epoch": 0.17934947816394772, "grad_norm": 0.15308979153633118, "learning_rate": 8.967322468969012e-06, "loss": 0.0178, "step": 10620 }, { "epoch": 0.1795183571452697, "grad_norm": 0.5344383716583252, "learning_rate": 8.975766275436968e-06, "loss": 0.0197, "step": 10630 }, { "epoch": 0.17968723612659168, "grad_norm": 0.727837324142456, "learning_rate": 8.984210081904923e-06, "loss": 0.0224, "step": 10640 }, { "epoch": 0.17985611510791366, "grad_norm": 0.351887583732605, "learning_rate": 8.992653888372879e-06, "loss": 0.0228, "step": 10650 }, { "epoch": 0.18002499408923564, "grad_norm": 0.6908945441246033, "learning_rate": 9.001097694840835e-06, "loss": 0.033, "step": 10660 }, { "epoch": 0.18019387307055765, "grad_norm": 0.5336304306983948, "learning_rate": 9.00954150130879e-06, "loss": 0.0201, "step": 10670 }, { "epoch": 0.18036275205187963, "grad_norm": 1.0046935081481934, "learning_rate": 9.017985307776746e-06, "loss": 0.0277, "step": 10680 }, { "epoch": 0.1805316310332016, "grad_norm": 0.3801724314689636, "learning_rate": 9.026429114244702e-06, "loss": 0.0275, "step": 10690 }, { "epoch": 0.18070051001452359, "grad_norm": 0.6878061890602112, "learning_rate": 9.034872920712658e-06, "loss": 0.0158, "step": 10700 }, { "epoch": 0.18086938899584556, "grad_norm": 0.3330031931400299, "learning_rate": 9.043316727180613e-06, "loss": 0.0266, "step": 10710 }, { "epoch": 0.18103826797716757, "grad_norm": 0.49852776527404785, "learning_rate": 9.051760533648569e-06, "loss": 0.026, "step": 10720 }, { "epoch": 0.18120714695848955, "grad_norm": 0.8927915096282959, "learning_rate": 9.060204340116525e-06, "loss": 0.0255, "step": 10730 }, { "epoch": 0.18137602593981153, "grad_norm": 0.5409148335456848, "learning_rate": 9.06864814658448e-06, "loss": 0.0258, "step": 10740 }, { "epoch": 0.1815449049211335, "grad_norm": 0.48345130681991577, "learning_rate": 9.077091953052436e-06, "loss": 0.0194, "step": 10750 }, { "epoch": 0.1817137839024555, "grad_norm": 0.5153825283050537, "learning_rate": 9.085535759520394e-06, "loss": 0.0211, "step": 10760 }, { "epoch": 0.1818826628837775, "grad_norm": 0.3957502543926239, "learning_rate": 9.093979565988348e-06, "loss": 0.0172, "step": 10770 }, { "epoch": 0.18205154186509948, "grad_norm": 0.36593711376190186, "learning_rate": 9.102423372456303e-06, "loss": 0.0222, "step": 10780 }, { "epoch": 0.18222042084642145, "grad_norm": 0.7464541792869568, "learning_rate": 9.110867178924259e-06, "loss": 0.0252, "step": 10790 }, { "epoch": 0.18238929982774343, "grad_norm": 1.2551143169403076, "learning_rate": 9.119310985392217e-06, "loss": 0.0235, "step": 10800 }, { "epoch": 0.1825581788090654, "grad_norm": 1.337477207183838, "learning_rate": 9.127754791860172e-06, "loss": 0.0234, "step": 10810 }, { "epoch": 0.18272705779038742, "grad_norm": 0.4931284487247467, "learning_rate": 9.136198598328128e-06, "loss": 0.024, "step": 10820 }, { "epoch": 0.1828959367717094, "grad_norm": 0.3226066529750824, "learning_rate": 9.144642404796082e-06, "loss": 0.027, "step": 10830 }, { "epoch": 0.18306481575303138, "grad_norm": 0.5521312952041626, "learning_rate": 9.153086211264038e-06, "loss": 0.0213, "step": 10840 }, { "epoch": 0.18323369473435336, "grad_norm": 0.6311039328575134, "learning_rate": 9.161530017731995e-06, "loss": 0.0267, "step": 10850 }, { "epoch": 0.18340257371567534, "grad_norm": 0.6132925152778625, "learning_rate": 9.16997382419995e-06, "loss": 0.0295, "step": 10860 }, { "epoch": 0.18357145269699734, "grad_norm": 0.4181976914405823, "learning_rate": 9.178417630667907e-06, "loss": 0.0203, "step": 10870 }, { "epoch": 0.18374033167831932, "grad_norm": 1.5171884298324585, "learning_rate": 9.18686143713586e-06, "loss": 0.0242, "step": 10880 }, { "epoch": 0.1839092106596413, "grad_norm": 0.604030430316925, "learning_rate": 9.195305243603816e-06, "loss": 0.0322, "step": 10890 }, { "epoch": 0.18407808964096328, "grad_norm": 0.5242132544517517, "learning_rate": 9.203749050071774e-06, "loss": 0.0227, "step": 10900 }, { "epoch": 0.18424696862228526, "grad_norm": 0.9391341209411621, "learning_rate": 9.21219285653973e-06, "loss": 0.0286, "step": 10910 }, { "epoch": 0.18441584760360724, "grad_norm": 0.8224199414253235, "learning_rate": 9.220636663007685e-06, "loss": 0.0186, "step": 10920 }, { "epoch": 0.18458472658492925, "grad_norm": 0.6385284662246704, "learning_rate": 9.22908046947564e-06, "loss": 0.0269, "step": 10930 }, { "epoch": 0.18475360556625123, "grad_norm": 0.8068597912788391, "learning_rate": 9.237524275943595e-06, "loss": 0.0255, "step": 10940 }, { "epoch": 0.1849224845475732, "grad_norm": 0.5114062428474426, "learning_rate": 9.245968082411552e-06, "loss": 0.0227, "step": 10950 }, { "epoch": 0.1850913635288952, "grad_norm": 1.249602198600769, "learning_rate": 9.254411888879508e-06, "loss": 0.0278, "step": 10960 }, { "epoch": 0.18526024251021717, "grad_norm": 0.5511909127235413, "learning_rate": 9.262855695347464e-06, "loss": 0.0224, "step": 10970 }, { "epoch": 0.18542912149153917, "grad_norm": 0.5877230763435364, "learning_rate": 9.27129950181542e-06, "loss": 0.026, "step": 10980 }, { "epoch": 0.18559800047286115, "grad_norm": 0.2088562250137329, "learning_rate": 9.279743308283375e-06, "loss": 0.0221, "step": 10990 }, { "epoch": 0.18576687945418313, "grad_norm": 0.6409210562705994, "learning_rate": 9.28818711475133e-06, "loss": 0.0256, "step": 11000 }, { "epoch": 0.1859357584355051, "grad_norm": 0.41283226013183594, "learning_rate": 9.296630921219287e-06, "loss": 0.0189, "step": 11010 }, { "epoch": 0.1861046374168271, "grad_norm": 0.6499667763710022, "learning_rate": 9.305074727687242e-06, "loss": 0.0251, "step": 11020 }, { "epoch": 0.1862735163981491, "grad_norm": 0.5403351187705994, "learning_rate": 9.313518534155198e-06, "loss": 0.0278, "step": 11030 }, { "epoch": 0.18644239537947108, "grad_norm": 0.768379271030426, "learning_rate": 9.321962340623154e-06, "loss": 0.031, "step": 11040 }, { "epoch": 0.18661127436079306, "grad_norm": 0.3782619833946228, "learning_rate": 9.33040614709111e-06, "loss": 0.0208, "step": 11050 }, { "epoch": 0.18678015334211504, "grad_norm": 0.7442289590835571, "learning_rate": 9.338849953559065e-06, "loss": 0.0181, "step": 11060 }, { "epoch": 0.18694903232343701, "grad_norm": 0.8636730909347534, "learning_rate": 9.347293760027021e-06, "loss": 0.0294, "step": 11070 }, { "epoch": 0.18711791130475902, "grad_norm": 0.8814787864685059, "learning_rate": 9.355737566494977e-06, "loss": 0.0201, "step": 11080 }, { "epoch": 0.187286790286081, "grad_norm": 0.815417468547821, "learning_rate": 9.364181372962932e-06, "loss": 0.0222, "step": 11090 }, { "epoch": 0.18745566926740298, "grad_norm": 0.9170165657997131, "learning_rate": 9.372625179430888e-06, "loss": 0.0237, "step": 11100 }, { "epoch": 0.18762454824872496, "grad_norm": 0.6802468299865723, "learning_rate": 9.381068985898844e-06, "loss": 0.0183, "step": 11110 }, { "epoch": 0.18779342723004694, "grad_norm": 0.8495076894760132, "learning_rate": 9.3895127923668e-06, "loss": 0.0255, "step": 11120 }, { "epoch": 0.18796230621136895, "grad_norm": 0.4068478047847748, "learning_rate": 9.397956598834755e-06, "loss": 0.0149, "step": 11130 }, { "epoch": 0.18813118519269093, "grad_norm": 0.50099116563797, "learning_rate": 9.406400405302711e-06, "loss": 0.0196, "step": 11140 }, { "epoch": 0.1883000641740129, "grad_norm": 0.8940195441246033, "learning_rate": 9.414844211770667e-06, "loss": 0.0335, "step": 11150 }, { "epoch": 0.18846894315533488, "grad_norm": 0.5865596532821655, "learning_rate": 9.423288018238624e-06, "loss": 0.0242, "step": 11160 }, { "epoch": 0.18863782213665686, "grad_norm": 0.6378818154335022, "learning_rate": 9.431731824706578e-06, "loss": 0.0181, "step": 11170 }, { "epoch": 0.18880670111797887, "grad_norm": 0.38039612770080566, "learning_rate": 9.440175631174534e-06, "loss": 0.0201, "step": 11180 }, { "epoch": 0.18897558009930085, "grad_norm": 0.22521519660949707, "learning_rate": 9.44861943764249e-06, "loss": 0.0214, "step": 11190 }, { "epoch": 0.18914445908062283, "grad_norm": 0.5852522253990173, "learning_rate": 9.457063244110445e-06, "loss": 0.0171, "step": 11200 }, { "epoch": 0.1893133380619448, "grad_norm": 0.883776068687439, "learning_rate": 9.465507050578403e-06, "loss": 0.0345, "step": 11210 }, { "epoch": 0.1894822170432668, "grad_norm": 0.7664034366607666, "learning_rate": 9.473950857046357e-06, "loss": 0.0176, "step": 11220 }, { "epoch": 0.18965109602458877, "grad_norm": 0.6304885149002075, "learning_rate": 9.482394663514312e-06, "loss": 0.0373, "step": 11230 }, { "epoch": 0.18981997500591077, "grad_norm": 0.39434248208999634, "learning_rate": 9.490838469982268e-06, "loss": 0.0349, "step": 11240 }, { "epoch": 0.18998885398723275, "grad_norm": 0.8853156566619873, "learning_rate": 9.499282276450224e-06, "loss": 0.0256, "step": 11250 }, { "epoch": 0.19015773296855473, "grad_norm": 0.38937607407569885, "learning_rate": 9.507726082918181e-06, "loss": 0.018, "step": 11260 }, { "epoch": 0.1903266119498767, "grad_norm": 0.6200795769691467, "learning_rate": 9.516169889386137e-06, "loss": 0.0256, "step": 11270 }, { "epoch": 0.1904954909311987, "grad_norm": 0.6941394209861755, "learning_rate": 9.524613695854091e-06, "loss": 0.0206, "step": 11280 }, { "epoch": 0.1906643699125207, "grad_norm": 0.6616683602333069, "learning_rate": 9.533057502322047e-06, "loss": 0.0269, "step": 11290 }, { "epoch": 0.19083324889384268, "grad_norm": 0.7434414625167847, "learning_rate": 9.541501308790004e-06, "loss": 0.0229, "step": 11300 }, { "epoch": 0.19100212787516466, "grad_norm": 0.7199409008026123, "learning_rate": 9.54994511525796e-06, "loss": 0.0278, "step": 11310 }, { "epoch": 0.19117100685648664, "grad_norm": 0.6231432557106018, "learning_rate": 9.558388921725915e-06, "loss": 0.0234, "step": 11320 }, { "epoch": 0.19133988583780862, "grad_norm": 0.6943575739860535, "learning_rate": 9.566832728193871e-06, "loss": 0.0232, "step": 11330 }, { "epoch": 0.19150876481913062, "grad_norm": 0.47736856341362, "learning_rate": 9.575276534661825e-06, "loss": 0.0277, "step": 11340 }, { "epoch": 0.1916776438004526, "grad_norm": 0.20426806807518005, "learning_rate": 9.583720341129783e-06, "loss": 0.0288, "step": 11350 }, { "epoch": 0.19184652278177458, "grad_norm": 0.7373768091201782, "learning_rate": 9.592164147597738e-06, "loss": 0.0175, "step": 11360 }, { "epoch": 0.19201540176309656, "grad_norm": 0.48403263092041016, "learning_rate": 9.600607954065694e-06, "loss": 0.0244, "step": 11370 }, { "epoch": 0.19218428074441854, "grad_norm": 0.7235735654830933, "learning_rate": 9.60905176053365e-06, "loss": 0.0256, "step": 11380 }, { "epoch": 0.19235315972574055, "grad_norm": 0.670491099357605, "learning_rate": 9.617495567001604e-06, "loss": 0.0263, "step": 11390 }, { "epoch": 0.19252203870706253, "grad_norm": 0.7391282916069031, "learning_rate": 9.625939373469561e-06, "loss": 0.0206, "step": 11400 }, { "epoch": 0.1926909176883845, "grad_norm": 0.4037543833255768, "learning_rate": 9.634383179937517e-06, "loss": 0.0258, "step": 11410 }, { "epoch": 0.19285979666970648, "grad_norm": 1.1110835075378418, "learning_rate": 9.642826986405473e-06, "loss": 0.0215, "step": 11420 }, { "epoch": 0.19302867565102846, "grad_norm": 0.4552389979362488, "learning_rate": 9.651270792873428e-06, "loss": 0.0265, "step": 11430 }, { "epoch": 0.19319755463235047, "grad_norm": 0.4980524480342865, "learning_rate": 9.659714599341384e-06, "loss": 0.0198, "step": 11440 }, { "epoch": 0.19336643361367245, "grad_norm": 0.9337989091873169, "learning_rate": 9.66815840580934e-06, "loss": 0.029, "step": 11450 }, { "epoch": 0.19353531259499443, "grad_norm": 0.21923384070396423, "learning_rate": 9.676602212277296e-06, "loss": 0.0251, "step": 11460 }, { "epoch": 0.1937041915763164, "grad_norm": 0.6393147110939026, "learning_rate": 9.685046018745251e-06, "loss": 0.022, "step": 11470 }, { "epoch": 0.1938730705576384, "grad_norm": 0.6046420335769653, "learning_rate": 9.693489825213207e-06, "loss": 0.0242, "step": 11480 }, { "epoch": 0.19404194953896037, "grad_norm": 0.8095200657844543, "learning_rate": 9.701933631681163e-06, "loss": 0.0258, "step": 11490 }, { "epoch": 0.19421082852028237, "grad_norm": 0.5620001554489136, "learning_rate": 9.710377438149118e-06, "loss": 0.025, "step": 11500 }, { "epoch": 0.19437970750160435, "grad_norm": 0.551791787147522, "learning_rate": 9.718821244617074e-06, "loss": 0.0181, "step": 11510 }, { "epoch": 0.19454858648292633, "grad_norm": 0.7764133810997009, "learning_rate": 9.72726505108503e-06, "loss": 0.0282, "step": 11520 }, { "epoch": 0.1947174654642483, "grad_norm": 0.35652756690979004, "learning_rate": 9.735708857552986e-06, "loss": 0.0185, "step": 11530 }, { "epoch": 0.1948863444455703, "grad_norm": 0.4888761639595032, "learning_rate": 9.744152664020941e-06, "loss": 0.0188, "step": 11540 }, { "epoch": 0.1950552234268923, "grad_norm": 0.9273265600204468, "learning_rate": 9.752596470488897e-06, "loss": 0.0221, "step": 11550 }, { "epoch": 0.19522410240821428, "grad_norm": 0.5575427412986755, "learning_rate": 9.761040276956853e-06, "loss": 0.022, "step": 11560 }, { "epoch": 0.19539298138953626, "grad_norm": 0.4084751307964325, "learning_rate": 9.769484083424808e-06, "loss": 0.022, "step": 11570 }, { "epoch": 0.19556186037085824, "grad_norm": 0.6172500252723694, "learning_rate": 9.777927889892764e-06, "loss": 0.0195, "step": 11580 }, { "epoch": 0.19573073935218022, "grad_norm": 0.8854919672012329, "learning_rate": 9.78637169636072e-06, "loss": 0.0218, "step": 11590 }, { "epoch": 0.19589961833350222, "grad_norm": 0.7149381637573242, "learning_rate": 9.794815502828676e-06, "loss": 0.018, "step": 11600 }, { "epoch": 0.1960684973148242, "grad_norm": 0.8019700050354004, "learning_rate": 9.803259309296633e-06, "loss": 0.0204, "step": 11610 }, { "epoch": 0.19623737629614618, "grad_norm": 0.6410493850708008, "learning_rate": 9.811703115764587e-06, "loss": 0.0256, "step": 11620 }, { "epoch": 0.19640625527746816, "grad_norm": 0.38607993721961975, "learning_rate": 9.820146922232543e-06, "loss": 0.0297, "step": 11630 }, { "epoch": 0.19657513425879014, "grad_norm": 0.783348798751831, "learning_rate": 9.828590728700498e-06, "loss": 0.0191, "step": 11640 }, { "epoch": 0.19674401324011215, "grad_norm": 0.502961277961731, "learning_rate": 9.837034535168454e-06, "loss": 0.0192, "step": 11650 }, { "epoch": 0.19691289222143413, "grad_norm": 0.7703133821487427, "learning_rate": 9.845478341636412e-06, "loss": 0.0221, "step": 11660 }, { "epoch": 0.1970817712027561, "grad_norm": 0.6097660064697266, "learning_rate": 9.853922148104367e-06, "loss": 0.0227, "step": 11670 }, { "epoch": 0.19725065018407809, "grad_norm": 0.7382051348686218, "learning_rate": 9.862365954572321e-06, "loss": 0.0211, "step": 11680 }, { "epoch": 0.19741952916540007, "grad_norm": 0.8009887337684631, "learning_rate": 9.870809761040277e-06, "loss": 0.0192, "step": 11690 }, { "epoch": 0.19758840814672207, "grad_norm": 0.6980581283569336, "learning_rate": 9.879253567508233e-06, "loss": 0.0207, "step": 11700 }, { "epoch": 0.19775728712804405, "grad_norm": 0.5944986939430237, "learning_rate": 9.88769737397619e-06, "loss": 0.0268, "step": 11710 }, { "epoch": 0.19792616610936603, "grad_norm": 0.22180627286434174, "learning_rate": 9.896141180444146e-06, "loss": 0.0207, "step": 11720 }, { "epoch": 0.198095045090688, "grad_norm": 0.24817654490470886, "learning_rate": 9.9045849869121e-06, "loss": 0.0243, "step": 11730 }, { "epoch": 0.19826392407201, "grad_norm": 0.8196826577186584, "learning_rate": 9.913028793380056e-06, "loss": 0.0212, "step": 11740 }, { "epoch": 0.19843280305333197, "grad_norm": 0.518610417842865, "learning_rate": 9.921472599848011e-06, "loss": 0.0176, "step": 11750 }, { "epoch": 0.19860168203465398, "grad_norm": 0.5892357230186462, "learning_rate": 9.929916406315969e-06, "loss": 0.0206, "step": 11760 }, { "epoch": 0.19877056101597596, "grad_norm": 0.71112060546875, "learning_rate": 9.938360212783924e-06, "loss": 0.0281, "step": 11770 }, { "epoch": 0.19893943999729793, "grad_norm": 0.553156852722168, "learning_rate": 9.94680401925188e-06, "loss": 0.0253, "step": 11780 }, { "epoch": 0.1991083189786199, "grad_norm": 0.8104478716850281, "learning_rate": 9.955247825719834e-06, "loss": 0.0258, "step": 11790 }, { "epoch": 0.1992771979599419, "grad_norm": 0.4921213388442993, "learning_rate": 9.963691632187792e-06, "loss": 0.0238, "step": 11800 }, { "epoch": 0.1994460769412639, "grad_norm": 0.7473612427711487, "learning_rate": 9.972135438655747e-06, "loss": 0.0275, "step": 11810 }, { "epoch": 0.19961495592258588, "grad_norm": 0.5946088433265686, "learning_rate": 9.980579245123703e-06, "loss": 0.0253, "step": 11820 }, { "epoch": 0.19978383490390786, "grad_norm": 0.5260125994682312, "learning_rate": 9.989023051591659e-06, "loss": 0.0243, "step": 11830 }, { "epoch": 0.19995271388522984, "grad_norm": 0.6882367730140686, "learning_rate": 9.997466858059614e-06, "loss": 0.0243, "step": 11840 }, { "epoch": 0.20012159286655182, "grad_norm": 0.2990383505821228, "learning_rate": 9.999999893575002e-06, "loss": 0.0226, "step": 11850 }, { "epoch": 0.20029047184787382, "grad_norm": 0.6635308265686035, "learning_rate": 9.999999372309713e-06, "loss": 0.023, "step": 11860 }, { "epoch": 0.2004593508291958, "grad_norm": 0.31979241967201233, "learning_rate": 9.99999841665673e-06, "loss": 0.0233, "step": 11870 }, { "epoch": 0.20062822981051778, "grad_norm": 0.5974593162536621, "learning_rate": 9.999997026616138e-06, "loss": 0.0195, "step": 11880 }, { "epoch": 0.20079710879183976, "grad_norm": 0.7368662357330322, "learning_rate": 9.999995202188056e-06, "loss": 0.0234, "step": 11890 }, { "epoch": 0.20096598777316174, "grad_norm": 0.4962439239025116, "learning_rate": 9.99999294337264e-06, "loss": 0.0233, "step": 11900 }, { "epoch": 0.20113486675448375, "grad_norm": 0.4079640209674835, "learning_rate": 9.999990250170093e-06, "loss": 0.0303, "step": 11910 }, { "epoch": 0.20130374573580573, "grad_norm": 0.6423386931419373, "learning_rate": 9.999987122580642e-06, "loss": 0.0218, "step": 11920 }, { "epoch": 0.2014726247171277, "grad_norm": 0.5372032523155212, "learning_rate": 9.999983560604563e-06, "loss": 0.0219, "step": 11930 }, { "epoch": 0.2016415036984497, "grad_norm": 0.4462243914604187, "learning_rate": 9.999979564242163e-06, "loss": 0.019, "step": 11940 }, { "epoch": 0.20181038267977167, "grad_norm": 0.29582729935646057, "learning_rate": 9.999975133493791e-06, "loss": 0.0272, "step": 11950 }, { "epoch": 0.20197926166109367, "grad_norm": 0.6830415725708008, "learning_rate": 9.999970268359832e-06, "loss": 0.0229, "step": 11960 }, { "epoch": 0.20214814064241565, "grad_norm": 0.5628382563591003, "learning_rate": 9.999964968840706e-06, "loss": 0.0286, "step": 11970 }, { "epoch": 0.20231701962373763, "grad_norm": 0.867768406867981, "learning_rate": 9.999959234936875e-06, "loss": 0.0255, "step": 11980 }, { "epoch": 0.2024858986050596, "grad_norm": 0.5641778111457825, "learning_rate": 9.99995306664884e-06, "loss": 0.0176, "step": 11990 }, { "epoch": 0.2026547775863816, "grad_norm": 0.9771850109100342, "learning_rate": 9.999946463977133e-06, "loss": 0.027, "step": 12000 }, { "epoch": 0.2028236565677036, "grad_norm": 0.37274765968322754, "learning_rate": 9.999939426922328e-06, "loss": 0.0268, "step": 12010 }, { "epoch": 0.20299253554902558, "grad_norm": 0.49503371119499207, "learning_rate": 9.999931955485039e-06, "loss": 0.0212, "step": 12020 }, { "epoch": 0.20316141453034756, "grad_norm": 0.9417015910148621, "learning_rate": 9.999924049665912e-06, "loss": 0.0319, "step": 12030 }, { "epoch": 0.20333029351166954, "grad_norm": 0.7954615950584412, "learning_rate": 9.999915709465637e-06, "loss": 0.0258, "step": 12040 }, { "epoch": 0.20349917249299151, "grad_norm": 0.6638253331184387, "learning_rate": 9.999906934884935e-06, "loss": 0.0152, "step": 12050 }, { "epoch": 0.2036680514743135, "grad_norm": 0.3444608151912689, "learning_rate": 9.999897725924572e-06, "loss": 0.021, "step": 12060 }, { "epoch": 0.2038369304556355, "grad_norm": 0.7522075772285461, "learning_rate": 9.999888082585344e-06, "loss": 0.0176, "step": 12070 }, { "epoch": 0.20400580943695748, "grad_norm": 0.9024222493171692, "learning_rate": 9.999878004868092e-06, "loss": 0.0231, "step": 12080 }, { "epoch": 0.20417468841827946, "grad_norm": 0.44545236229896545, "learning_rate": 9.999867492773691e-06, "loss": 0.0217, "step": 12090 }, { "epoch": 0.20434356739960144, "grad_norm": 0.8078463077545166, "learning_rate": 9.999856546303053e-06, "loss": 0.0186, "step": 12100 }, { "epoch": 0.20451244638092342, "grad_norm": 0.8430474996566772, "learning_rate": 9.999845165457129e-06, "loss": 0.0264, "step": 12110 }, { "epoch": 0.20468132536224543, "grad_norm": 0.8249558210372925, "learning_rate": 9.999833350236911e-06, "loss": 0.0244, "step": 12120 }, { "epoch": 0.2048502043435674, "grad_norm": 0.7176030278205872, "learning_rate": 9.999821100643421e-06, "loss": 0.0273, "step": 12130 }, { "epoch": 0.20501908332488938, "grad_norm": 0.5935543179512024, "learning_rate": 9.999808416677725e-06, "loss": 0.0176, "step": 12140 }, { "epoch": 0.20518796230621136, "grad_norm": 0.9513965845108032, "learning_rate": 9.999795298340928e-06, "loss": 0.0316, "step": 12150 }, { "epoch": 0.20535684128753334, "grad_norm": 0.44202563166618347, "learning_rate": 9.999781745634164e-06, "loss": 0.028, "step": 12160 }, { "epoch": 0.20552572026885535, "grad_norm": 0.6539182066917419, "learning_rate": 9.999767758558615e-06, "loss": 0.0253, "step": 12170 }, { "epoch": 0.20569459925017733, "grad_norm": 0.35216692090034485, "learning_rate": 9.999753337115491e-06, "loss": 0.026, "step": 12180 }, { "epoch": 0.2058634782314993, "grad_norm": 0.8939276933670044, "learning_rate": 9.999738481306053e-06, "loss": 0.0249, "step": 12190 }, { "epoch": 0.2060323572128213, "grad_norm": 0.43644335865974426, "learning_rate": 9.999723191131585e-06, "loss": 0.0179, "step": 12200 }, { "epoch": 0.20620123619414327, "grad_norm": 0.6035345792770386, "learning_rate": 9.999707466593418e-06, "loss": 0.022, "step": 12210 }, { "epoch": 0.20637011517546527, "grad_norm": 0.6664296984672546, "learning_rate": 9.999691307692916e-06, "loss": 0.0221, "step": 12220 }, { "epoch": 0.20653899415678725, "grad_norm": 0.4628756046295166, "learning_rate": 9.999674714431485e-06, "loss": 0.0229, "step": 12230 }, { "epoch": 0.20670787313810923, "grad_norm": 0.4419783055782318, "learning_rate": 9.999657686810566e-06, "loss": 0.0182, "step": 12240 }, { "epoch": 0.2068767521194312, "grad_norm": 0.6284075379371643, "learning_rate": 9.999640224831637e-06, "loss": 0.0164, "step": 12250 }, { "epoch": 0.2070456311007532, "grad_norm": 0.7496688961982727, "learning_rate": 9.999622328496218e-06, "loss": 0.0206, "step": 12260 }, { "epoch": 0.2072145100820752, "grad_norm": 0.49926644563674927, "learning_rate": 9.999603997805862e-06, "loss": 0.023, "step": 12270 }, { "epoch": 0.20738338906339718, "grad_norm": 0.2747437059879303, "learning_rate": 9.99958523276216e-06, "loss": 0.0203, "step": 12280 }, { "epoch": 0.20755226804471916, "grad_norm": 0.6081754565238953, "learning_rate": 9.999566033366746e-06, "loss": 0.0355, "step": 12290 }, { "epoch": 0.20772114702604114, "grad_norm": 0.5186958909034729, "learning_rate": 9.999546399621283e-06, "loss": 0.0156, "step": 12300 }, { "epoch": 0.20789002600736312, "grad_norm": 0.6928595900535583, "learning_rate": 9.999526331527483e-06, "loss": 0.0193, "step": 12310 }, { "epoch": 0.2080589049886851, "grad_norm": 0.5429145693778992, "learning_rate": 9.999505829087084e-06, "loss": 0.0265, "step": 12320 }, { "epoch": 0.2082277839700071, "grad_norm": 0.3580666780471802, "learning_rate": 9.99948489230187e-06, "loss": 0.025, "step": 12330 }, { "epoch": 0.20839666295132908, "grad_norm": 0.7779616117477417, "learning_rate": 9.999463521173659e-06, "loss": 0.0304, "step": 12340 }, { "epoch": 0.20856554193265106, "grad_norm": 0.7145877480506897, "learning_rate": 9.999441715704306e-06, "loss": 0.0278, "step": 12350 }, { "epoch": 0.20873442091397304, "grad_norm": 0.7262795567512512, "learning_rate": 9.999419475895708e-06, "loss": 0.022, "step": 12360 }, { "epoch": 0.20890329989529502, "grad_norm": 0.8629158735275269, "learning_rate": 9.999396801749797e-06, "loss": 0.0285, "step": 12370 }, { "epoch": 0.20907217887661703, "grad_norm": 0.794607400894165, "learning_rate": 9.999373693268543e-06, "loss": 0.02, "step": 12380 }, { "epoch": 0.209241057857939, "grad_norm": 0.5337771773338318, "learning_rate": 9.999350150453952e-06, "loss": 0.0261, "step": 12390 }, { "epoch": 0.20940993683926099, "grad_norm": 0.44297730922698975, "learning_rate": 9.999326173308071e-06, "loss": 0.0216, "step": 12400 }, { "epoch": 0.20957881582058296, "grad_norm": 0.37074369192123413, "learning_rate": 9.999301761832982e-06, "loss": 0.0259, "step": 12410 }, { "epoch": 0.20974769480190494, "grad_norm": 0.4414767920970917, "learning_rate": 9.999276916030806e-06, "loss": 0.0192, "step": 12420 }, { "epoch": 0.20991657378322695, "grad_norm": 0.32481127977371216, "learning_rate": 9.999251635903702e-06, "loss": 0.0185, "step": 12430 }, { "epoch": 0.21008545276454893, "grad_norm": 0.42931702733039856, "learning_rate": 9.999225921453864e-06, "loss": 0.0199, "step": 12440 }, { "epoch": 0.2102543317458709, "grad_norm": 0.33273544907569885, "learning_rate": 9.99919977268353e-06, "loss": 0.0236, "step": 12450 }, { "epoch": 0.2104232107271929, "grad_norm": 0.3540130853652954, "learning_rate": 9.999173189594968e-06, "loss": 0.0212, "step": 12460 }, { "epoch": 0.21059208970851487, "grad_norm": 0.41759201884269714, "learning_rate": 9.99914617219049e-06, "loss": 0.015, "step": 12470 }, { "epoch": 0.21076096868983688, "grad_norm": 0.6415428519248962, "learning_rate": 9.999118720472443e-06, "loss": 0.0203, "step": 12480 }, { "epoch": 0.21092984767115885, "grad_norm": 0.9386029839515686, "learning_rate": 9.99909083444321e-06, "loss": 0.0226, "step": 12490 }, { "epoch": 0.21109872665248083, "grad_norm": 0.6860324740409851, "learning_rate": 9.999062514105217e-06, "loss": 0.0256, "step": 12500 }, { "epoch": 0.2112676056338028, "grad_norm": 1.149688482284546, "learning_rate": 9.99903375946092e-06, "loss": 0.0184, "step": 12510 }, { "epoch": 0.2114364846151248, "grad_norm": 0.538566529750824, "learning_rate": 9.99900457051282e-06, "loss": 0.0242, "step": 12520 }, { "epoch": 0.2116053635964468, "grad_norm": 0.5071699619293213, "learning_rate": 9.998974947263453e-06, "loss": 0.0215, "step": 12530 }, { "epoch": 0.21177424257776878, "grad_norm": 0.8370416760444641, "learning_rate": 9.998944889715394e-06, "loss": 0.0151, "step": 12540 }, { "epoch": 0.21194312155909076, "grad_norm": 0.6351609826087952, "learning_rate": 9.998914397871248e-06, "loss": 0.0246, "step": 12550 }, { "epoch": 0.21211200054041274, "grad_norm": 0.42137032747268677, "learning_rate": 9.998883471733672e-06, "loss": 0.0184, "step": 12560 }, { "epoch": 0.21228087952173472, "grad_norm": 0.38347581028938293, "learning_rate": 9.998852111305346e-06, "loss": 0.016, "step": 12570 }, { "epoch": 0.2124497585030567, "grad_norm": 0.4378102123737335, "learning_rate": 9.998820316588999e-06, "loss": 0.0276, "step": 12580 }, { "epoch": 0.2126186374843787, "grad_norm": 0.7070825099945068, "learning_rate": 9.998788087587393e-06, "loss": 0.018, "step": 12590 }, { "epoch": 0.21278751646570068, "grad_norm": 0.5297057032585144, "learning_rate": 9.998755424303325e-06, "loss": 0.0238, "step": 12600 }, { "epoch": 0.21295639544702266, "grad_norm": 0.5860087275505066, "learning_rate": 9.998722326739635e-06, "loss": 0.0183, "step": 12610 }, { "epoch": 0.21312527442834464, "grad_norm": 0.5606216192245483, "learning_rate": 9.9986887948992e-06, "loss": 0.0254, "step": 12620 }, { "epoch": 0.21329415340966662, "grad_norm": 0.5790494084358215, "learning_rate": 9.998654828784929e-06, "loss": 0.0232, "step": 12630 }, { "epoch": 0.21346303239098863, "grad_norm": 0.5284299850463867, "learning_rate": 9.998620428399777e-06, "loss": 0.0261, "step": 12640 }, { "epoch": 0.2136319113723106, "grad_norm": 0.5943793058395386, "learning_rate": 9.998585593746728e-06, "loss": 0.019, "step": 12650 }, { "epoch": 0.21380079035363259, "grad_norm": 0.5194329619407654, "learning_rate": 9.998550324828813e-06, "loss": 0.0217, "step": 12660 }, { "epoch": 0.21396966933495457, "grad_norm": 0.4665532410144806, "learning_rate": 9.998514621649094e-06, "loss": 0.0256, "step": 12670 }, { "epoch": 0.21413854831627654, "grad_norm": 0.4193422496318817, "learning_rate": 9.998478484210673e-06, "loss": 0.0264, "step": 12680 }, { "epoch": 0.21430742729759855, "grad_norm": 0.597014307975769, "learning_rate": 9.998441912516688e-06, "loss": 0.0146, "step": 12690 }, { "epoch": 0.21447630627892053, "grad_norm": 0.47630682587623596, "learning_rate": 9.998404906570319e-06, "loss": 0.0174, "step": 12700 }, { "epoch": 0.2146451852602425, "grad_norm": 0.49711552262306213, "learning_rate": 9.99836746637478e-06, "loss": 0.0217, "step": 12710 }, { "epoch": 0.2148140642415645, "grad_norm": 0.48704996705055237, "learning_rate": 9.99832959193332e-06, "loss": 0.0169, "step": 12720 }, { "epoch": 0.21498294322288647, "grad_norm": 0.5877883434295654, "learning_rate": 9.998291283249235e-06, "loss": 0.0231, "step": 12730 }, { "epoch": 0.21515182220420848, "grad_norm": 0.5747621655464172, "learning_rate": 9.998252540325852e-06, "loss": 0.0212, "step": 12740 }, { "epoch": 0.21532070118553046, "grad_norm": 0.7880575656890869, "learning_rate": 9.998213363166536e-06, "loss": 0.0176, "step": 12750 }, { "epoch": 0.21548958016685243, "grad_norm": 0.5190985798835754, "learning_rate": 9.998173751774688e-06, "loss": 0.0221, "step": 12760 }, { "epoch": 0.21565845914817441, "grad_norm": 0.31717509031295776, "learning_rate": 9.998133706153754e-06, "loss": 0.0193, "step": 12770 }, { "epoch": 0.2158273381294964, "grad_norm": 0.4212071895599365, "learning_rate": 9.998093226307211e-06, "loss": 0.025, "step": 12780 }, { "epoch": 0.2159962171108184, "grad_norm": 0.5605604648590088, "learning_rate": 9.998052312238576e-06, "loss": 0.02, "step": 12790 }, { "epoch": 0.21616509609214038, "grad_norm": 0.580276608467102, "learning_rate": 9.998010963951403e-06, "loss": 0.0227, "step": 12800 }, { "epoch": 0.21633397507346236, "grad_norm": 0.4050631523132324, "learning_rate": 9.997969181449284e-06, "loss": 0.025, "step": 12810 }, { "epoch": 0.21650285405478434, "grad_norm": 0.46946343779563904, "learning_rate": 9.997926964735848e-06, "loss": 0.0171, "step": 12820 }, { "epoch": 0.21667173303610632, "grad_norm": 0.29595935344696045, "learning_rate": 9.997884313814766e-06, "loss": 0.0215, "step": 12830 }, { "epoch": 0.21684061201742832, "grad_norm": 0.7430428862571716, "learning_rate": 9.997841228689741e-06, "loss": 0.0191, "step": 12840 }, { "epoch": 0.2170094909987503, "grad_norm": 0.383340984582901, "learning_rate": 9.997797709364516e-06, "loss": 0.0301, "step": 12850 }, { "epoch": 0.21717836998007228, "grad_norm": 0.6155153512954712, "learning_rate": 9.997753755842873e-06, "loss": 0.0212, "step": 12860 }, { "epoch": 0.21734724896139426, "grad_norm": 0.7785362601280212, "learning_rate": 9.99770936812863e-06, "loss": 0.0236, "step": 12870 }, { "epoch": 0.21751612794271624, "grad_norm": 0.6694698929786682, "learning_rate": 9.997664546225644e-06, "loss": 0.0242, "step": 12880 }, { "epoch": 0.21768500692403822, "grad_norm": 0.7689952254295349, "learning_rate": 9.997619290137808e-06, "loss": 0.0245, "step": 12890 }, { "epoch": 0.21785388590536023, "grad_norm": 0.3865833282470703, "learning_rate": 9.997573599869054e-06, "loss": 0.0202, "step": 12900 }, { "epoch": 0.2180227648866822, "grad_norm": 0.47006192803382874, "learning_rate": 9.997527475423352e-06, "loss": 0.0266, "step": 12910 }, { "epoch": 0.2181916438680042, "grad_norm": 0.6935707330703735, "learning_rate": 9.997480916804707e-06, "loss": 0.0233, "step": 12920 }, { "epoch": 0.21836052284932617, "grad_norm": 0.30998989939689636, "learning_rate": 9.997433924017169e-06, "loss": 0.0233, "step": 12930 }, { "epoch": 0.21852940183064815, "grad_norm": 0.34223639965057373, "learning_rate": 9.997386497064814e-06, "loss": 0.024, "step": 12940 }, { "epoch": 0.21869828081197015, "grad_norm": 0.480033278465271, "learning_rate": 9.997338635951767e-06, "loss": 0.0151, "step": 12950 }, { "epoch": 0.21886715979329213, "grad_norm": 0.5518850684165955, "learning_rate": 9.997290340682183e-06, "loss": 0.0223, "step": 12960 }, { "epoch": 0.2190360387746141, "grad_norm": 0.5471957921981812, "learning_rate": 9.997241611260263e-06, "loss": 0.0264, "step": 12970 }, { "epoch": 0.2192049177559361, "grad_norm": 0.5571374893188477, "learning_rate": 9.997192447690234e-06, "loss": 0.0166, "step": 12980 }, { "epoch": 0.21937379673725807, "grad_norm": 1.0467559099197388, "learning_rate": 9.997142849976371e-06, "loss": 0.0226, "step": 12990 }, { "epoch": 0.21954267571858008, "grad_norm": 0.506353497505188, "learning_rate": 9.997092818122982e-06, "loss": 0.0251, "step": 13000 }, { "epoch": 0.21971155469990206, "grad_norm": 0.8406707048416138, "learning_rate": 9.997042352134413e-06, "loss": 0.0192, "step": 13010 }, { "epoch": 0.21988043368122404, "grad_norm": 0.30533769726753235, "learning_rate": 9.996991452015051e-06, "loss": 0.0226, "step": 13020 }, { "epoch": 0.22004931266254601, "grad_norm": 0.6905465722084045, "learning_rate": 9.996940117769313e-06, "loss": 0.0222, "step": 13030 }, { "epoch": 0.220218191643868, "grad_norm": 0.5504990816116333, "learning_rate": 9.996888349401666e-06, "loss": 0.021, "step": 13040 }, { "epoch": 0.22038707062519, "grad_norm": 0.31456801295280457, "learning_rate": 9.9968361469166e-06, "loss": 0.0191, "step": 13050 }, { "epoch": 0.22055594960651198, "grad_norm": 0.48716938495635986, "learning_rate": 9.996783510318656e-06, "loss": 0.0182, "step": 13060 }, { "epoch": 0.22072482858783396, "grad_norm": 0.3966607451438904, "learning_rate": 9.996730439612403e-06, "loss": 0.0225, "step": 13070 }, { "epoch": 0.22089370756915594, "grad_norm": 0.5541304349899292, "learning_rate": 9.996676934802454e-06, "loss": 0.0189, "step": 13080 }, { "epoch": 0.22106258655047792, "grad_norm": 0.6227548122406006, "learning_rate": 9.996622995893457e-06, "loss": 0.0198, "step": 13090 }, { "epoch": 0.22123146553179993, "grad_norm": 0.42270156741142273, "learning_rate": 9.996568622890097e-06, "loss": 0.0286, "step": 13100 }, { "epoch": 0.2214003445131219, "grad_norm": 0.4265011250972748, "learning_rate": 9.996513815797098e-06, "loss": 0.0205, "step": 13110 }, { "epoch": 0.22156922349444388, "grad_norm": 0.8292638659477234, "learning_rate": 9.996458574619223e-06, "loss": 0.0231, "step": 13120 }, { "epoch": 0.22173810247576586, "grad_norm": 0.3784141540527344, "learning_rate": 9.99640289936127e-06, "loss": 0.0201, "step": 13130 }, { "epoch": 0.22190698145708784, "grad_norm": 0.3135412037372589, "learning_rate": 9.996346790028076e-06, "loss": 0.018, "step": 13140 }, { "epoch": 0.22207586043840982, "grad_norm": 0.41473856568336487, "learning_rate": 9.996290246624516e-06, "loss": 0.0191, "step": 13150 }, { "epoch": 0.22224473941973183, "grad_norm": 0.43884238600730896, "learning_rate": 9.996233269155501e-06, "loss": 0.0194, "step": 13160 }, { "epoch": 0.2224136184010538, "grad_norm": 0.4485374093055725, "learning_rate": 9.996175857625985e-06, "loss": 0.0145, "step": 13170 }, { "epoch": 0.2225824973823758, "grad_norm": 0.2920628488063812, "learning_rate": 9.996118012040951e-06, "loss": 0.0254, "step": 13180 }, { "epoch": 0.22275137636369777, "grad_norm": 0.38315463066101074, "learning_rate": 9.996059732405427e-06, "loss": 0.0219, "step": 13190 }, { "epoch": 0.22292025534501975, "grad_norm": 1.1109602451324463, "learning_rate": 9.996001018724475e-06, "loss": 0.025, "step": 13200 }, { "epoch": 0.22308913432634175, "grad_norm": 0.30119943618774414, "learning_rate": 9.995941871003198e-06, "loss": 0.0185, "step": 13210 }, { "epoch": 0.22325801330766373, "grad_norm": 0.5722905993461609, "learning_rate": 9.995882289246733e-06, "loss": 0.0209, "step": 13220 }, { "epoch": 0.2234268922889857, "grad_norm": 0.6589497327804565, "learning_rate": 9.995822273460254e-06, "loss": 0.0268, "step": 13230 }, { "epoch": 0.2235957712703077, "grad_norm": 0.6521400213241577, "learning_rate": 9.99576182364898e-06, "loss": 0.0237, "step": 13240 }, { "epoch": 0.22376465025162967, "grad_norm": 0.626105546951294, "learning_rate": 9.99570093981816e-06, "loss": 0.0217, "step": 13250 }, { "epoch": 0.22393352923295168, "grad_norm": 0.5794259905815125, "learning_rate": 9.995639621973083e-06, "loss": 0.0284, "step": 13260 }, { "epoch": 0.22410240821427366, "grad_norm": 0.48004478216171265, "learning_rate": 9.995577870119076e-06, "loss": 0.0221, "step": 13270 }, { "epoch": 0.22427128719559564, "grad_norm": 0.537192165851593, "learning_rate": 9.995515684261506e-06, "loss": 0.023, "step": 13280 }, { "epoch": 0.22444016617691762, "grad_norm": 0.37853914499282837, "learning_rate": 9.995453064405774e-06, "loss": 0.018, "step": 13290 }, { "epoch": 0.2246090451582396, "grad_norm": 0.5441258549690247, "learning_rate": 9.99539001055732e-06, "loss": 0.0221, "step": 13300 }, { "epoch": 0.2247779241395616, "grad_norm": 0.4827272593975067, "learning_rate": 9.995326522721623e-06, "loss": 0.0206, "step": 13310 }, { "epoch": 0.22494680312088358, "grad_norm": 0.46701982617378235, "learning_rate": 9.995262600904199e-06, "loss": 0.025, "step": 13320 }, { "epoch": 0.22511568210220556, "grad_norm": 0.526961624622345, "learning_rate": 9.9951982451106e-06, "loss": 0.0182, "step": 13330 }, { "epoch": 0.22528456108352754, "grad_norm": 0.4834033250808716, "learning_rate": 9.995133455346417e-06, "loss": 0.0193, "step": 13340 }, { "epoch": 0.22545344006484952, "grad_norm": 0.39934253692626953, "learning_rate": 9.99506823161728e-06, "loss": 0.0182, "step": 13350 }, { "epoch": 0.22562231904617153, "grad_norm": 1.106377124786377, "learning_rate": 9.995002573928853e-06, "loss": 0.0251, "step": 13360 }, { "epoch": 0.2257911980274935, "grad_norm": 0.4155421853065491, "learning_rate": 9.994936482286843e-06, "loss": 0.0216, "step": 13370 }, { "epoch": 0.22596007700881549, "grad_norm": 0.538070023059845, "learning_rate": 9.994869956696992e-06, "loss": 0.0195, "step": 13380 }, { "epoch": 0.22612895599013746, "grad_norm": 0.5227135419845581, "learning_rate": 9.994802997165077e-06, "loss": 0.023, "step": 13390 }, { "epoch": 0.22629783497145944, "grad_norm": 0.29460328817367554, "learning_rate": 9.994735603696917e-06, "loss": 0.022, "step": 13400 }, { "epoch": 0.22646671395278142, "grad_norm": 0.34215784072875977, "learning_rate": 9.994667776298368e-06, "loss": 0.0199, "step": 13410 }, { "epoch": 0.22663559293410343, "grad_norm": 0.5711653232574463, "learning_rate": 9.99459951497532e-06, "loss": 0.0218, "step": 13420 }, { "epoch": 0.2268044719154254, "grad_norm": 0.4246247112751007, "learning_rate": 9.994530819733705e-06, "loss": 0.021, "step": 13430 }, { "epoch": 0.2269733508967474, "grad_norm": 0.407925009727478, "learning_rate": 9.994461690579491e-06, "loss": 0.0208, "step": 13440 }, { "epoch": 0.22714222987806937, "grad_norm": 0.5388248562812805, "learning_rate": 9.994392127518684e-06, "loss": 0.0254, "step": 13450 }, { "epoch": 0.22731110885939135, "grad_norm": 0.7013906240463257, "learning_rate": 9.994322130557327e-06, "loss": 0.0231, "step": 13460 }, { "epoch": 0.22747998784071335, "grad_norm": 0.44070202112197876, "learning_rate": 9.994251699701501e-06, "loss": 0.0187, "step": 13470 }, { "epoch": 0.22764886682203533, "grad_norm": 0.6252650618553162, "learning_rate": 9.994180834957326e-06, "loss": 0.0202, "step": 13480 }, { "epoch": 0.2278177458033573, "grad_norm": 0.41597700119018555, "learning_rate": 9.994109536330957e-06, "loss": 0.0204, "step": 13490 }, { "epoch": 0.2279866247846793, "grad_norm": 0.3650316596031189, "learning_rate": 9.994037803828587e-06, "loss": 0.0276, "step": 13500 }, { "epoch": 0.22815550376600127, "grad_norm": 0.8243926167488098, "learning_rate": 9.993965637456453e-06, "loss": 0.0258, "step": 13510 }, { "epoch": 0.22832438274732328, "grad_norm": 0.4992065131664276, "learning_rate": 9.99389303722082e-06, "loss": 0.0243, "step": 13520 }, { "epoch": 0.22849326172864526, "grad_norm": 0.21114574372768402, "learning_rate": 9.993820003127997e-06, "loss": 0.0158, "step": 13530 }, { "epoch": 0.22866214070996724, "grad_norm": 0.6005690097808838, "learning_rate": 9.993746535184328e-06, "loss": 0.0149, "step": 13540 }, { "epoch": 0.22883101969128922, "grad_norm": 0.3650211989879608, "learning_rate": 9.993672633396199e-06, "loss": 0.0199, "step": 13550 }, { "epoch": 0.2289998986726112, "grad_norm": 0.5062518119812012, "learning_rate": 9.993598297770026e-06, "loss": 0.0262, "step": 13560 }, { "epoch": 0.2291687776539332, "grad_norm": 0.4716508984565735, "learning_rate": 9.993523528312269e-06, "loss": 0.0233, "step": 13570 }, { "epoch": 0.22933765663525518, "grad_norm": 0.567931592464447, "learning_rate": 9.993448325029425e-06, "loss": 0.0179, "step": 13580 }, { "epoch": 0.22950653561657716, "grad_norm": 0.5805219411849976, "learning_rate": 9.993372687928025e-06, "loss": 0.0263, "step": 13590 }, { "epoch": 0.22967541459789914, "grad_norm": 0.4566687345504761, "learning_rate": 9.993296617014642e-06, "loss": 0.0163, "step": 13600 }, { "epoch": 0.22984429357922112, "grad_norm": 0.518731415271759, "learning_rate": 9.993220112295885e-06, "loss": 0.0203, "step": 13610 }, { "epoch": 0.23001317256054313, "grad_norm": 0.695753276348114, "learning_rate": 9.9931431737784e-06, "loss": 0.0218, "step": 13620 }, { "epoch": 0.2301820515418651, "grad_norm": 0.4565315246582031, "learning_rate": 9.99306580146887e-06, "loss": 0.0178, "step": 13630 }, { "epoch": 0.2303509305231871, "grad_norm": 0.4179416000843048, "learning_rate": 9.992987995374019e-06, "loss": 0.021, "step": 13640 }, { "epoch": 0.23051980950450907, "grad_norm": 0.47001731395721436, "learning_rate": 9.992909755500605e-06, "loss": 0.0214, "step": 13650 }, { "epoch": 0.23068868848583104, "grad_norm": 0.559281587600708, "learning_rate": 9.992831081855426e-06, "loss": 0.0141, "step": 13660 }, { "epoch": 0.23085756746715305, "grad_norm": 0.5146023631095886, "learning_rate": 9.992751974445318e-06, "loss": 0.0196, "step": 13670 }, { "epoch": 0.23102644644847503, "grad_norm": 1.0131707191467285, "learning_rate": 9.992672433277151e-06, "loss": 0.0236, "step": 13680 }, { "epoch": 0.231195325429797, "grad_norm": 0.6941403746604919, "learning_rate": 9.992592458357839e-06, "loss": 0.0177, "step": 13690 }, { "epoch": 0.231364204411119, "grad_norm": 0.3243723213672638, "learning_rate": 9.992512049694324e-06, "loss": 0.0162, "step": 13700 }, { "epoch": 0.23153308339244097, "grad_norm": 0.557278037071228, "learning_rate": 9.9924312072936e-06, "loss": 0.0236, "step": 13710 }, { "epoch": 0.23170196237376295, "grad_norm": 0.45139017701148987, "learning_rate": 9.992349931162684e-06, "loss": 0.0206, "step": 13720 }, { "epoch": 0.23187084135508496, "grad_norm": 0.6714091300964355, "learning_rate": 9.99226822130864e-06, "loss": 0.0225, "step": 13730 }, { "epoch": 0.23203972033640693, "grad_norm": 0.5997486114501953, "learning_rate": 9.992186077738565e-06, "loss": 0.0209, "step": 13740 }, { "epoch": 0.23220859931772891, "grad_norm": 0.4821280539035797, "learning_rate": 9.992103500459597e-06, "loss": 0.0195, "step": 13750 }, { "epoch": 0.2323774782990509, "grad_norm": 0.639990508556366, "learning_rate": 9.992020489478909e-06, "loss": 0.0182, "step": 13760 }, { "epoch": 0.23254635728037287, "grad_norm": 0.6128180027008057, "learning_rate": 9.991937044803712e-06, "loss": 0.0211, "step": 13770 }, { "epoch": 0.23271523626169488, "grad_norm": 0.8132696747779846, "learning_rate": 9.991853166441259e-06, "loss": 0.0218, "step": 13780 }, { "epoch": 0.23288411524301686, "grad_norm": 0.7176445722579956, "learning_rate": 9.991768854398833e-06, "loss": 0.0234, "step": 13790 }, { "epoch": 0.23305299422433884, "grad_norm": 0.4085693061351776, "learning_rate": 9.99168410868376e-06, "loss": 0.0163, "step": 13800 }, { "epoch": 0.23322187320566082, "grad_norm": 0.3417041003704071, "learning_rate": 9.991598929303405e-06, "loss": 0.0233, "step": 13810 }, { "epoch": 0.2333907521869828, "grad_norm": 0.4678489565849304, "learning_rate": 9.991513316265167e-06, "loss": 0.0196, "step": 13820 }, { "epoch": 0.2335596311683048, "grad_norm": 0.8273573517799377, "learning_rate": 9.991427269576482e-06, "loss": 0.0244, "step": 13830 }, { "epoch": 0.23372851014962678, "grad_norm": 0.7097352743148804, "learning_rate": 9.991340789244826e-06, "loss": 0.0277, "step": 13840 }, { "epoch": 0.23389738913094876, "grad_norm": 0.5307984352111816, "learning_rate": 9.991253875277713e-06, "loss": 0.0179, "step": 13850 }, { "epoch": 0.23406626811227074, "grad_norm": 0.6064855456352234, "learning_rate": 9.991166527682694e-06, "loss": 0.0232, "step": 13860 }, { "epoch": 0.23423514709359272, "grad_norm": 0.5978100895881653, "learning_rate": 9.991078746467358e-06, "loss": 0.0235, "step": 13870 }, { "epoch": 0.23440402607491473, "grad_norm": 0.5300696492195129, "learning_rate": 9.99099053163933e-06, "loss": 0.0175, "step": 13880 }, { "epoch": 0.2345729050562367, "grad_norm": 0.5673061609268188, "learning_rate": 9.990901883206274e-06, "loss": 0.0152, "step": 13890 }, { "epoch": 0.2347417840375587, "grad_norm": 0.6411839127540588, "learning_rate": 9.990812801175893e-06, "loss": 0.0257, "step": 13900 }, { "epoch": 0.23491066301888067, "grad_norm": 0.34212085604667664, "learning_rate": 9.990723285555925e-06, "loss": 0.0169, "step": 13910 }, { "epoch": 0.23507954200020265, "grad_norm": 0.49424660205841064, "learning_rate": 9.990633336354147e-06, "loss": 0.0237, "step": 13920 }, { "epoch": 0.23524842098152465, "grad_norm": 0.6221587061882019, "learning_rate": 9.990542953578372e-06, "loss": 0.0144, "step": 13930 }, { "epoch": 0.23541729996284663, "grad_norm": 0.9134078621864319, "learning_rate": 9.990452137236457e-06, "loss": 0.0208, "step": 13940 }, { "epoch": 0.2355861789441686, "grad_norm": 0.4055245816707611, "learning_rate": 9.990360887336285e-06, "loss": 0.0228, "step": 13950 }, { "epoch": 0.2357550579254906, "grad_norm": 0.296913743019104, "learning_rate": 9.99026920388579e-06, "loss": 0.0202, "step": 13960 }, { "epoch": 0.23592393690681257, "grad_norm": 0.9005787968635559, "learning_rate": 9.990177086892934e-06, "loss": 0.0192, "step": 13970 }, { "epoch": 0.23609281588813455, "grad_norm": 0.6659127473831177, "learning_rate": 9.99008453636572e-06, "loss": 0.022, "step": 13980 }, { "epoch": 0.23626169486945656, "grad_norm": 0.3509773910045624, "learning_rate": 9.98999155231219e-06, "loss": 0.0173, "step": 13990 }, { "epoch": 0.23643057385077854, "grad_norm": 0.31743955612182617, "learning_rate": 9.989898134740421e-06, "loss": 0.021, "step": 14000 }, { "epoch": 0.23659945283210052, "grad_norm": 0.3662709593772888, "learning_rate": 9.98980428365853e-06, "loss": 0.0252, "step": 14010 }, { "epoch": 0.2367683318134225, "grad_norm": 0.44568443298339844, "learning_rate": 9.989709999074668e-06, "loss": 0.02, "step": 14020 }, { "epoch": 0.23693721079474447, "grad_norm": 0.27915942668914795, "learning_rate": 9.989615280997028e-06, "loss": 0.0228, "step": 14030 }, { "epoch": 0.23710608977606648, "grad_norm": 0.34470582008361816, "learning_rate": 9.98952012943384e-06, "loss": 0.0218, "step": 14040 }, { "epoch": 0.23727496875738846, "grad_norm": 0.3908019959926605, "learning_rate": 9.98942454439337e-06, "loss": 0.0207, "step": 14050 }, { "epoch": 0.23744384773871044, "grad_norm": 0.5438292026519775, "learning_rate": 9.98932852588392e-06, "loss": 0.0241, "step": 14060 }, { "epoch": 0.23761272672003242, "grad_norm": 0.47010132670402527, "learning_rate": 9.989232073913833e-06, "loss": 0.0255, "step": 14070 }, { "epoch": 0.2377816057013544, "grad_norm": 0.3563394546508789, "learning_rate": 9.98913518849149e-06, "loss": 0.0245, "step": 14080 }, { "epoch": 0.2379504846826764, "grad_norm": 0.30855071544647217, "learning_rate": 9.989037869625306e-06, "loss": 0.023, "step": 14090 }, { "epoch": 0.23811936366399838, "grad_norm": 0.6422715783119202, "learning_rate": 9.98894011732374e-06, "loss": 0.0185, "step": 14100 }, { "epoch": 0.23828824264532036, "grad_norm": 0.38222169876098633, "learning_rate": 9.988841931595278e-06, "loss": 0.0247, "step": 14110 }, { "epoch": 0.23845712162664234, "grad_norm": 0.790664792060852, "learning_rate": 9.988743312448454e-06, "loss": 0.0304, "step": 14120 }, { "epoch": 0.23862600060796432, "grad_norm": 0.33638784289360046, "learning_rate": 9.988644259891836e-06, "loss": 0.0178, "step": 14130 }, { "epoch": 0.23879487958928633, "grad_norm": 0.5386788845062256, "learning_rate": 9.988544773934027e-06, "loss": 0.0273, "step": 14140 }, { "epoch": 0.2389637585706083, "grad_norm": 0.5503007769584656, "learning_rate": 9.988444854583675e-06, "loss": 0.0188, "step": 14150 }, { "epoch": 0.2391326375519303, "grad_norm": 0.46143990755081177, "learning_rate": 9.988344501849456e-06, "loss": 0.0213, "step": 14160 }, { "epoch": 0.23930151653325227, "grad_norm": 0.543230414390564, "learning_rate": 9.98824371574009e-06, "loss": 0.0185, "step": 14170 }, { "epoch": 0.23947039551457425, "grad_norm": 0.7487009763717651, "learning_rate": 9.988142496264332e-06, "loss": 0.018, "step": 14180 }, { "epoch": 0.23963927449589625, "grad_norm": 0.43244966864585876, "learning_rate": 9.98804084343098e-06, "loss": 0.0208, "step": 14190 }, { "epoch": 0.23980815347721823, "grad_norm": 0.3707679808139801, "learning_rate": 9.987938757248858e-06, "loss": 0.0177, "step": 14200 }, { "epoch": 0.2399770324585402, "grad_norm": 0.44960302114486694, "learning_rate": 9.987836237726842e-06, "loss": 0.0259, "step": 14210 }, { "epoch": 0.2401459114398622, "grad_norm": 0.5064738988876343, "learning_rate": 9.987733284873836e-06, "loss": 0.0176, "step": 14220 }, { "epoch": 0.24031479042118417, "grad_norm": 0.25783678889274597, "learning_rate": 9.98762989869878e-06, "loss": 0.0204, "step": 14230 }, { "epoch": 0.24048366940250615, "grad_norm": 0.37166646122932434, "learning_rate": 9.987526079210664e-06, "loss": 0.0179, "step": 14240 }, { "epoch": 0.24065254838382816, "grad_norm": 0.47617506980895996, "learning_rate": 9.987421826418503e-06, "loss": 0.0216, "step": 14250 }, { "epoch": 0.24082142736515014, "grad_norm": 0.4230816066265106, "learning_rate": 9.987317140331353e-06, "loss": 0.0237, "step": 14260 }, { "epoch": 0.24099030634647212, "grad_norm": 0.3620251715183258, "learning_rate": 9.987212020958312e-06, "loss": 0.025, "step": 14270 }, { "epoch": 0.2411591853277941, "grad_norm": 0.22503137588500977, "learning_rate": 9.987106468308512e-06, "loss": 0.0215, "step": 14280 }, { "epoch": 0.24132806430911607, "grad_norm": 0.1525031477212906, "learning_rate": 9.987000482391121e-06, "loss": 0.019, "step": 14290 }, { "epoch": 0.24149694329043808, "grad_norm": 0.5609692931175232, "learning_rate": 9.986894063215347e-06, "loss": 0.0243, "step": 14300 }, { "epoch": 0.24166582227176006, "grad_norm": 0.39346054196357727, "learning_rate": 9.986787210790439e-06, "loss": 0.0182, "step": 14310 }, { "epoch": 0.24183470125308204, "grad_norm": 0.6329065561294556, "learning_rate": 9.986679925125675e-06, "loss": 0.0176, "step": 14320 }, { "epoch": 0.24200358023440402, "grad_norm": 0.2376514971256256, "learning_rate": 9.98657220623038e-06, "loss": 0.016, "step": 14330 }, { "epoch": 0.242172459215726, "grad_norm": 0.5367241501808167, "learning_rate": 9.986464054113911e-06, "loss": 0.016, "step": 14340 }, { "epoch": 0.242341338197048, "grad_norm": 0.233290895819664, "learning_rate": 9.986355468785663e-06, "loss": 0.0244, "step": 14350 }, { "epoch": 0.24251021717836999, "grad_norm": 0.6817636489868164, "learning_rate": 9.986246450255071e-06, "loss": 0.0238, "step": 14360 }, { "epoch": 0.24267909615969196, "grad_norm": 0.5034674406051636, "learning_rate": 9.986136998531604e-06, "loss": 0.021, "step": 14370 }, { "epoch": 0.24284797514101394, "grad_norm": 0.5380590558052063, "learning_rate": 9.986027113624775e-06, "loss": 0.0209, "step": 14380 }, { "epoch": 0.24301685412233592, "grad_norm": 0.44196218252182007, "learning_rate": 9.985916795544126e-06, "loss": 0.0251, "step": 14390 }, { "epoch": 0.24318573310365793, "grad_norm": 0.6018614768981934, "learning_rate": 9.985806044299244e-06, "loss": 0.0209, "step": 14400 }, { "epoch": 0.2433546120849799, "grad_norm": 0.5888574123382568, "learning_rate": 9.985694859899749e-06, "loss": 0.0187, "step": 14410 }, { "epoch": 0.2435234910663019, "grad_norm": 0.5520173907279968, "learning_rate": 9.985583242355303e-06, "loss": 0.0176, "step": 14420 }, { "epoch": 0.24369237004762387, "grad_norm": 0.5106216073036194, "learning_rate": 9.985471191675599e-06, "loss": 0.0214, "step": 14430 }, { "epoch": 0.24386124902894585, "grad_norm": 0.5226804614067078, "learning_rate": 9.985358707870377e-06, "loss": 0.0274, "step": 14440 }, { "epoch": 0.24403012801026785, "grad_norm": 0.3097682297229767, "learning_rate": 9.985245790949405e-06, "loss": 0.0206, "step": 14450 }, { "epoch": 0.24419900699158983, "grad_norm": 0.31447046995162964, "learning_rate": 9.985132440922494e-06, "loss": 0.0137, "step": 14460 }, { "epoch": 0.2443678859729118, "grad_norm": 0.30227798223495483, "learning_rate": 9.985018657799493e-06, "loss": 0.0248, "step": 14470 }, { "epoch": 0.2445367649542338, "grad_norm": 0.43398672342300415, "learning_rate": 9.984904441590286e-06, "loss": 0.0212, "step": 14480 }, { "epoch": 0.24470564393555577, "grad_norm": 0.34564530849456787, "learning_rate": 9.984789792304795e-06, "loss": 0.0232, "step": 14490 }, { "epoch": 0.24487452291687775, "grad_norm": 0.46257296204566956, "learning_rate": 9.984674709952982e-06, "loss": 0.018, "step": 14500 }, { "epoch": 0.24504340189819976, "grad_norm": 0.6801007390022278, "learning_rate": 9.984559194544846e-06, "loss": 0.02, "step": 14510 }, { "epoch": 0.24521228087952174, "grad_norm": 0.49176621437072754, "learning_rate": 9.984443246090418e-06, "loss": 0.0174, "step": 14520 }, { "epoch": 0.24538115986084372, "grad_norm": 0.6392713785171509, "learning_rate": 9.984326864599777e-06, "loss": 0.0197, "step": 14530 }, { "epoch": 0.2455500388421657, "grad_norm": 0.43070656061172485, "learning_rate": 9.98421005008303e-06, "loss": 0.018, "step": 14540 }, { "epoch": 0.24571891782348768, "grad_norm": 0.3873124122619629, "learning_rate": 9.984092802550329e-06, "loss": 0.0151, "step": 14550 }, { "epoch": 0.24588779680480968, "grad_norm": 0.8746378421783447, "learning_rate": 9.983975122011857e-06, "loss": 0.0221, "step": 14560 }, { "epoch": 0.24605667578613166, "grad_norm": 0.34932076930999756, "learning_rate": 9.98385700847784e-06, "loss": 0.0171, "step": 14570 }, { "epoch": 0.24622555476745364, "grad_norm": 0.7810940146446228, "learning_rate": 9.983738461958537e-06, "loss": 0.015, "step": 14580 }, { "epoch": 0.24639443374877562, "grad_norm": 0.5140601992607117, "learning_rate": 9.983619482464248e-06, "loss": 0.0212, "step": 14590 }, { "epoch": 0.2465633127300976, "grad_norm": 0.5268415212631226, "learning_rate": 9.983500070005312e-06, "loss": 0.0147, "step": 14600 }, { "epoch": 0.2467321917114196, "grad_norm": 0.9163571000099182, "learning_rate": 9.9833802245921e-06, "loss": 0.0285, "step": 14610 }, { "epoch": 0.2469010706927416, "grad_norm": 0.7802188992500305, "learning_rate": 9.983259946235024e-06, "loss": 0.0207, "step": 14620 }, { "epoch": 0.24706994967406357, "grad_norm": 0.3059062957763672, "learning_rate": 9.983139234944537e-06, "loss": 0.0216, "step": 14630 }, { "epoch": 0.24723882865538555, "grad_norm": 0.5545188784599304, "learning_rate": 9.983018090731123e-06, "loss": 0.0152, "step": 14640 }, { "epoch": 0.24740770763670752, "grad_norm": 0.41944533586502075, "learning_rate": 9.982896513605308e-06, "loss": 0.0189, "step": 14650 }, { "epoch": 0.24757658661802953, "grad_norm": 0.3496347665786743, "learning_rate": 9.982774503577654e-06, "loss": 0.0238, "step": 14660 }, { "epoch": 0.2477454655993515, "grad_norm": 0.5184956789016724, "learning_rate": 9.982652060658759e-06, "loss": 0.0226, "step": 14670 }, { "epoch": 0.2479143445806735, "grad_norm": 0.461725115776062, "learning_rate": 9.982529184859265e-06, "loss": 0.0209, "step": 14680 }, { "epoch": 0.24808322356199547, "grad_norm": 0.3003922700881958, "learning_rate": 9.982405876189844e-06, "loss": 0.0249, "step": 14690 }, { "epoch": 0.24825210254331745, "grad_norm": 0.6940450072288513, "learning_rate": 9.982282134661208e-06, "loss": 0.0244, "step": 14700 }, { "epoch": 0.24842098152463946, "grad_norm": 0.22043921053409576, "learning_rate": 9.982157960284108e-06, "loss": 0.0144, "step": 14710 }, { "epoch": 0.24858986050596144, "grad_norm": 0.2820742726325989, "learning_rate": 9.982033353069334e-06, "loss": 0.019, "step": 14720 }, { "epoch": 0.24875873948728341, "grad_norm": 0.3318594694137573, "learning_rate": 9.98190831302771e-06, "loss": 0.0152, "step": 14730 }, { "epoch": 0.2489276184686054, "grad_norm": 0.7075830698013306, "learning_rate": 9.9817828401701e-06, "loss": 0.0216, "step": 14740 }, { "epoch": 0.24909649744992737, "grad_norm": 0.35692620277404785, "learning_rate": 9.981656934507403e-06, "loss": 0.0157, "step": 14750 }, { "epoch": 0.24926537643124938, "grad_norm": 1.1544647216796875, "learning_rate": 9.981530596050557e-06, "loss": 0.0226, "step": 14760 }, { "epoch": 0.24943425541257136, "grad_norm": 0.2920507788658142, "learning_rate": 9.981403824810542e-06, "loss": 0.0154, "step": 14770 }, { "epoch": 0.24960313439389334, "grad_norm": 0.5528314113616943, "learning_rate": 9.981276620798368e-06, "loss": 0.0244, "step": 14780 }, { "epoch": 0.24977201337521532, "grad_norm": 0.6581658124923706, "learning_rate": 9.981148984025088e-06, "loss": 0.0194, "step": 14790 }, { "epoch": 0.2499408923565373, "grad_norm": 0.5038837194442749, "learning_rate": 9.98102091450179e-06, "loss": 0.0233, "step": 14800 }, { "epoch": 0.2501097713378593, "grad_norm": 0.47944748401641846, "learning_rate": 9.980892412239598e-06, "loss": 0.0236, "step": 14810 }, { "epoch": 0.2502786503191813, "grad_norm": 0.6787019371986389, "learning_rate": 9.98076347724968e-06, "loss": 0.0211, "step": 14820 }, { "epoch": 0.25044752930050324, "grad_norm": 0.6708154678344727, "learning_rate": 9.980634109543234e-06, "loss": 0.018, "step": 14830 }, { "epoch": 0.25061640828182524, "grad_norm": 0.25312289595603943, "learning_rate": 9.980504309131504e-06, "loss": 0.0184, "step": 14840 }, { "epoch": 0.25078528726314725, "grad_norm": 0.49572768807411194, "learning_rate": 9.980374076025763e-06, "loss": 0.0231, "step": 14850 }, { "epoch": 0.2509541662444692, "grad_norm": 0.5475833415985107, "learning_rate": 9.980243410237325e-06, "loss": 0.0183, "step": 14860 }, { "epoch": 0.2511230452257912, "grad_norm": 0.47025546431541443, "learning_rate": 9.980112311777543e-06, "loss": 0.0233, "step": 14870 }, { "epoch": 0.25129192420711316, "grad_norm": 0.40052610635757446, "learning_rate": 9.979980780657807e-06, "loss": 0.0213, "step": 14880 }, { "epoch": 0.25146080318843517, "grad_norm": 0.26449790596961975, "learning_rate": 9.979848816889544e-06, "loss": 0.0148, "step": 14890 }, { "epoch": 0.2516296821697572, "grad_norm": 0.303157240152359, "learning_rate": 9.979716420484217e-06, "loss": 0.0187, "step": 14900 }, { "epoch": 0.2517985611510791, "grad_norm": 0.8671082854270935, "learning_rate": 9.97958359145333e-06, "loss": 0.0221, "step": 14910 }, { "epoch": 0.25196744013240113, "grad_norm": 0.45890510082244873, "learning_rate": 9.979450329808422e-06, "loss": 0.0249, "step": 14920 }, { "epoch": 0.2521363191137231, "grad_norm": 0.31333497166633606, "learning_rate": 9.979316635561072e-06, "loss": 0.0195, "step": 14930 }, { "epoch": 0.2523051980950451, "grad_norm": 0.39345693588256836, "learning_rate": 9.979182508722894e-06, "loss": 0.0247, "step": 14940 }, { "epoch": 0.2524740770763671, "grad_norm": 0.5027623176574707, "learning_rate": 9.97904794930554e-06, "loss": 0.0157, "step": 14950 }, { "epoch": 0.25264295605768905, "grad_norm": 0.691646158695221, "learning_rate": 9.9789129573207e-06, "loss": 0.0248, "step": 14960 }, { "epoch": 0.25281183503901106, "grad_norm": 0.5151656866073608, "learning_rate": 9.978777532780104e-06, "loss": 0.0199, "step": 14970 }, { "epoch": 0.252980714020333, "grad_norm": 0.4503122866153717, "learning_rate": 9.978641675695516e-06, "loss": 0.0287, "step": 14980 }, { "epoch": 0.253149593001655, "grad_norm": 0.7286814451217651, "learning_rate": 9.978505386078737e-06, "loss": 0.0317, "step": 14990 }, { "epoch": 0.253318471982977, "grad_norm": 0.2355549931526184, "learning_rate": 9.978368663941609e-06, "loss": 0.0254, "step": 15000 }, { "epoch": 0.253487350964299, "grad_norm": 0.649070680141449, "learning_rate": 9.978231509296012e-06, "loss": 0.0241, "step": 15010 }, { "epoch": 0.253656229945621, "grad_norm": 0.43430808186531067, "learning_rate": 9.97809392215386e-06, "loss": 0.0176, "step": 15020 }, { "epoch": 0.25382510892694293, "grad_norm": 0.5050315260887146, "learning_rate": 9.977955902527104e-06, "loss": 0.0204, "step": 15030 }, { "epoch": 0.25399398790826494, "grad_norm": 0.23612195253372192, "learning_rate": 9.977817450427738e-06, "loss": 0.0167, "step": 15040 }, { "epoch": 0.25416286688958695, "grad_norm": 0.4376524090766907, "learning_rate": 9.97767856586779e-06, "loss": 0.0164, "step": 15050 }, { "epoch": 0.2543317458709089, "grad_norm": 0.5730429887771606, "learning_rate": 9.977539248859326e-06, "loss": 0.0201, "step": 15060 }, { "epoch": 0.2545006248522309, "grad_norm": 0.5206154584884644, "learning_rate": 9.977399499414449e-06, "loss": 0.0188, "step": 15070 }, { "epoch": 0.25466950383355286, "grad_norm": 0.3005998134613037, "learning_rate": 9.977259317545298e-06, "loss": 0.0149, "step": 15080 }, { "epoch": 0.25483838281487486, "grad_norm": 0.6042799353599548, "learning_rate": 9.977118703264055e-06, "loss": 0.0197, "step": 15090 }, { "epoch": 0.25500726179619687, "grad_norm": 0.26405996084213257, "learning_rate": 9.976977656582935e-06, "loss": 0.0208, "step": 15100 }, { "epoch": 0.2551761407775188, "grad_norm": 0.36158326268196106, "learning_rate": 9.976836177514192e-06, "loss": 0.0142, "step": 15110 }, { "epoch": 0.25534501975884083, "grad_norm": 0.4845794141292572, "learning_rate": 9.976694266070116e-06, "loss": 0.0195, "step": 15120 }, { "epoch": 0.2555138987401628, "grad_norm": 0.40729570388793945, "learning_rate": 9.976551922263039e-06, "loss": 0.0191, "step": 15130 }, { "epoch": 0.2556827777214848, "grad_norm": 0.4112001061439514, "learning_rate": 9.976409146105322e-06, "loss": 0.0169, "step": 15140 }, { "epoch": 0.2558516567028068, "grad_norm": 0.46115824580192566, "learning_rate": 9.976265937609376e-06, "loss": 0.0185, "step": 15150 }, { "epoch": 0.25602053568412875, "grad_norm": 0.5301120281219482, "learning_rate": 9.976122296787635e-06, "loss": 0.0245, "step": 15160 }, { "epoch": 0.25618941466545075, "grad_norm": 0.3431488871574402, "learning_rate": 9.975978223652584e-06, "loss": 0.0217, "step": 15170 }, { "epoch": 0.2563582936467727, "grad_norm": 0.4446263313293457, "learning_rate": 9.97583371821674e-06, "loss": 0.0189, "step": 15180 }, { "epoch": 0.2565271726280947, "grad_norm": 0.5236457586288452, "learning_rate": 9.975688780492653e-06, "loss": 0.019, "step": 15190 }, { "epoch": 0.25669605160941666, "grad_norm": 0.536490261554718, "learning_rate": 9.975543410492919e-06, "loss": 0.0254, "step": 15200 }, { "epoch": 0.25686493059073867, "grad_norm": 0.6047970652580261, "learning_rate": 9.975397608230164e-06, "loss": 0.0197, "step": 15210 }, { "epoch": 0.2570338095720607, "grad_norm": 0.7294623851776123, "learning_rate": 9.975251373717057e-06, "loss": 0.0216, "step": 15220 }, { "epoch": 0.25720268855338263, "grad_norm": 0.45333272218704224, "learning_rate": 9.975104706966301e-06, "loss": 0.0227, "step": 15230 }, { "epoch": 0.25737156753470464, "grad_norm": 0.444168359041214, "learning_rate": 9.974957607990638e-06, "loss": 0.0187, "step": 15240 }, { "epoch": 0.2575404465160266, "grad_norm": 0.38153859972953796, "learning_rate": 9.974810076802851e-06, "loss": 0.0242, "step": 15250 }, { "epoch": 0.2577093254973486, "grad_norm": 0.5535459518432617, "learning_rate": 9.974662113415754e-06, "loss": 0.0259, "step": 15260 }, { "epoch": 0.2578782044786706, "grad_norm": 0.720681369304657, "learning_rate": 9.974513717842201e-06, "loss": 0.0155, "step": 15270 }, { "epoch": 0.25804708345999255, "grad_norm": 0.6066797971725464, "learning_rate": 9.974364890095088e-06, "loss": 0.018, "step": 15280 }, { "epoch": 0.25821596244131456, "grad_norm": 0.627778947353363, "learning_rate": 9.97421563018734e-06, "loss": 0.0296, "step": 15290 }, { "epoch": 0.2583848414226365, "grad_norm": 0.77975994348526, "learning_rate": 9.974065938131928e-06, "loss": 0.0245, "step": 15300 }, { "epoch": 0.2585537204039585, "grad_norm": 0.7298102974891663, "learning_rate": 9.973915813941854e-06, "loss": 0.0242, "step": 15310 }, { "epoch": 0.2587225993852805, "grad_norm": 0.40137696266174316, "learning_rate": 9.973765257630163e-06, "loss": 0.0164, "step": 15320 }, { "epoch": 0.2588914783666025, "grad_norm": 0.5265408158302307, "learning_rate": 9.973614269209936e-06, "loss": 0.0176, "step": 15330 }, { "epoch": 0.2590603573479245, "grad_norm": 0.42064565420150757, "learning_rate": 9.973462848694287e-06, "loss": 0.021, "step": 15340 }, { "epoch": 0.25922923632924644, "grad_norm": 0.3870995044708252, "learning_rate": 9.973310996096372e-06, "loss": 0.0161, "step": 15350 }, { "epoch": 0.25939811531056844, "grad_norm": 0.9742095470428467, "learning_rate": 9.973158711429384e-06, "loss": 0.0166, "step": 15360 }, { "epoch": 0.25956699429189045, "grad_norm": 0.637778103351593, "learning_rate": 9.973005994706555e-06, "loss": 0.0243, "step": 15370 }, { "epoch": 0.2597358732732124, "grad_norm": 0.8102396726608276, "learning_rate": 9.972852845941148e-06, "loss": 0.0183, "step": 15380 }, { "epoch": 0.2599047522545344, "grad_norm": 0.5176591277122498, "learning_rate": 9.972699265146474e-06, "loss": 0.031, "step": 15390 }, { "epoch": 0.26007363123585636, "grad_norm": 0.594653308391571, "learning_rate": 9.972545252335871e-06, "loss": 0.0188, "step": 15400 }, { "epoch": 0.26024251021717837, "grad_norm": 0.5402161478996277, "learning_rate": 9.972390807522723e-06, "loss": 0.0163, "step": 15410 }, { "epoch": 0.2604113891985004, "grad_norm": 0.7489143013954163, "learning_rate": 9.972235930720444e-06, "loss": 0.0219, "step": 15420 }, { "epoch": 0.2605802681798223, "grad_norm": 0.48383212089538574, "learning_rate": 9.97208062194249e-06, "loss": 0.0143, "step": 15430 }, { "epoch": 0.26074914716114433, "grad_norm": 0.7182400822639465, "learning_rate": 9.971924881202359e-06, "loss": 0.0275, "step": 15440 }, { "epoch": 0.2609180261424663, "grad_norm": 0.6321203708648682, "learning_rate": 9.971768708513574e-06, "loss": 0.0249, "step": 15450 }, { "epoch": 0.2610869051237883, "grad_norm": 0.7095159888267517, "learning_rate": 9.971612103889708e-06, "loss": 0.0139, "step": 15460 }, { "epoch": 0.2612557841051103, "grad_norm": 0.23490828275680542, "learning_rate": 9.971455067344364e-06, "loss": 0.0175, "step": 15470 }, { "epoch": 0.26142466308643225, "grad_norm": 0.3158451020717621, "learning_rate": 9.971297598891187e-06, "loss": 0.0254, "step": 15480 }, { "epoch": 0.26159354206775426, "grad_norm": 0.3854454755783081, "learning_rate": 9.971139698543855e-06, "loss": 0.0193, "step": 15490 }, { "epoch": 0.2617624210490762, "grad_norm": 0.3682965040206909, "learning_rate": 9.970981366316086e-06, "loss": 0.0133, "step": 15500 }, { "epoch": 0.2619313000303982, "grad_norm": 0.7961382865905762, "learning_rate": 9.97082260222164e-06, "loss": 0.0171, "step": 15510 }, { "epoch": 0.2621001790117202, "grad_norm": 0.496171772480011, "learning_rate": 9.970663406274303e-06, "loss": 0.0215, "step": 15520 }, { "epoch": 0.2622690579930422, "grad_norm": 0.5307033658027649, "learning_rate": 9.970503778487911e-06, "loss": 0.0207, "step": 15530 }, { "epoch": 0.2624379369743642, "grad_norm": 0.6386639475822449, "learning_rate": 9.97034371887633e-06, "loss": 0.022, "step": 15540 }, { "epoch": 0.26260681595568613, "grad_norm": 0.42235931754112244, "learning_rate": 9.970183227453466e-06, "loss": 0.0208, "step": 15550 }, { "epoch": 0.26277569493700814, "grad_norm": 0.6357032060623169, "learning_rate": 9.970022304233262e-06, "loss": 0.0188, "step": 15560 }, { "epoch": 0.26294457391833015, "grad_norm": 0.24795334041118622, "learning_rate": 9.9698609492297e-06, "loss": 0.0146, "step": 15570 }, { "epoch": 0.2631134528996521, "grad_norm": 0.5271705985069275, "learning_rate": 9.969699162456794e-06, "loss": 0.0127, "step": 15580 }, { "epoch": 0.2632823318809741, "grad_norm": 0.4081493616104126, "learning_rate": 9.969536943928604e-06, "loss": 0.021, "step": 15590 }, { "epoch": 0.26345121086229606, "grad_norm": 0.41462722420692444, "learning_rate": 9.969374293659222e-06, "loss": 0.0167, "step": 15600 }, { "epoch": 0.26362008984361807, "grad_norm": 0.47190189361572266, "learning_rate": 9.969211211662777e-06, "loss": 0.0238, "step": 15610 }, { "epoch": 0.2637889688249401, "grad_norm": 0.3420904278755188, "learning_rate": 9.969047697953438e-06, "loss": 0.018, "step": 15620 }, { "epoch": 0.263957847806262, "grad_norm": 0.8474419713020325, "learning_rate": 9.968883752545413e-06, "loss": 0.02, "step": 15630 }, { "epoch": 0.26412672678758403, "grad_norm": 0.5219714045524597, "learning_rate": 9.968719375452943e-06, "loss": 0.0198, "step": 15640 }, { "epoch": 0.264295605768906, "grad_norm": 0.6794513463973999, "learning_rate": 9.968554566690308e-06, "loss": 0.0202, "step": 15650 }, { "epoch": 0.264464484750228, "grad_norm": 0.47097939252853394, "learning_rate": 9.968389326271825e-06, "loss": 0.0189, "step": 15660 }, { "epoch": 0.26463336373155, "grad_norm": 0.4104780852794647, "learning_rate": 9.968223654211855e-06, "loss": 0.0258, "step": 15670 }, { "epoch": 0.26480224271287195, "grad_norm": 0.31443631649017334, "learning_rate": 9.968057550524786e-06, "loss": 0.0191, "step": 15680 }, { "epoch": 0.26497112169419396, "grad_norm": 0.35714051127433777, "learning_rate": 9.967891015225053e-06, "loss": 0.0188, "step": 15690 }, { "epoch": 0.2651400006755159, "grad_norm": 0.44018155336380005, "learning_rate": 9.96772404832712e-06, "loss": 0.0207, "step": 15700 }, { "epoch": 0.2653088796568379, "grad_norm": 0.5909832715988159, "learning_rate": 9.967556649845494e-06, "loss": 0.0159, "step": 15710 }, { "epoch": 0.2654777586381599, "grad_norm": 0.5652768611907959, "learning_rate": 9.967388819794718e-06, "loss": 0.023, "step": 15720 }, { "epoch": 0.2656466376194819, "grad_norm": 0.239408940076828, "learning_rate": 9.967220558189377e-06, "loss": 0.0154, "step": 15730 }, { "epoch": 0.2658155166008039, "grad_norm": 0.6039668321609497, "learning_rate": 9.967051865044082e-06, "loss": 0.0179, "step": 15740 }, { "epoch": 0.26598439558212583, "grad_norm": 0.4002627730369568, "learning_rate": 9.966882740373493e-06, "loss": 0.0181, "step": 15750 }, { "epoch": 0.26615327456344784, "grad_norm": 0.6226483583450317, "learning_rate": 9.966713184192303e-06, "loss": 0.0266, "step": 15760 }, { "epoch": 0.2663221535447698, "grad_norm": 0.14853042364120483, "learning_rate": 9.966543196515241e-06, "loss": 0.0155, "step": 15770 }, { "epoch": 0.2664910325260918, "grad_norm": 0.8942045569419861, "learning_rate": 9.966372777357076e-06, "loss": 0.0219, "step": 15780 }, { "epoch": 0.2666599115074138, "grad_norm": 0.43304285407066345, "learning_rate": 9.966201926732614e-06, "loss": 0.0194, "step": 15790 }, { "epoch": 0.26682879048873576, "grad_norm": 0.41091352701187134, "learning_rate": 9.966030644656698e-06, "loss": 0.0221, "step": 15800 }, { "epoch": 0.26699766947005776, "grad_norm": 0.40352919697761536, "learning_rate": 9.965858931144207e-06, "loss": 0.0193, "step": 15810 }, { "epoch": 0.2671665484513797, "grad_norm": 0.40477150678634644, "learning_rate": 9.965686786210061e-06, "loss": 0.0148, "step": 15820 }, { "epoch": 0.2673354274327017, "grad_norm": 0.38234567642211914, "learning_rate": 9.965514209869214e-06, "loss": 0.0256, "step": 15830 }, { "epoch": 0.26750430641402373, "grad_norm": 0.41817936301231384, "learning_rate": 9.96534120213666e-06, "loss": 0.0237, "step": 15840 }, { "epoch": 0.2676731853953457, "grad_norm": 0.146399587392807, "learning_rate": 9.965167763027432e-06, "loss": 0.02, "step": 15850 }, { "epoch": 0.2678420643766677, "grad_norm": 0.48173823952674866, "learning_rate": 9.964993892556592e-06, "loss": 0.0213, "step": 15860 }, { "epoch": 0.26801094335798964, "grad_norm": 0.32855135202407837, "learning_rate": 9.96481959073925e-06, "loss": 0.0131, "step": 15870 }, { "epoch": 0.26817982233931165, "grad_norm": 0.4687929153442383, "learning_rate": 9.964644857590549e-06, "loss": 0.0241, "step": 15880 }, { "epoch": 0.26834870132063365, "grad_norm": 0.26092514395713806, "learning_rate": 9.964469693125664e-06, "loss": 0.0143, "step": 15890 }, { "epoch": 0.2685175803019556, "grad_norm": 0.2810869812965393, "learning_rate": 9.964294097359821e-06, "loss": 0.0196, "step": 15900 }, { "epoch": 0.2686864592832776, "grad_norm": 0.805853009223938, "learning_rate": 9.964118070308269e-06, "loss": 0.0162, "step": 15910 }, { "epoch": 0.26885533826459956, "grad_norm": 0.5327790975570679, "learning_rate": 9.963941611986304e-06, "loss": 0.0191, "step": 15920 }, { "epoch": 0.26902421724592157, "grad_norm": 0.4684774577617645, "learning_rate": 9.963764722409255e-06, "loss": 0.0205, "step": 15930 }, { "epoch": 0.2691930962272436, "grad_norm": 0.35359323024749756, "learning_rate": 9.96358740159249e-06, "loss": 0.0208, "step": 15940 }, { "epoch": 0.26936197520856553, "grad_norm": 0.6772895455360413, "learning_rate": 9.963409649551415e-06, "loss": 0.0222, "step": 15950 }, { "epoch": 0.26953085418988754, "grad_norm": 0.5996107459068298, "learning_rate": 9.96323146630147e-06, "loss": 0.021, "step": 15960 }, { "epoch": 0.2696997331712095, "grad_norm": 0.6357703804969788, "learning_rate": 9.963052851858136e-06, "loss": 0.0184, "step": 15970 }, { "epoch": 0.2698686121525315, "grad_norm": 0.840653121471405, "learning_rate": 9.962873806236934e-06, "loss": 0.0159, "step": 15980 }, { "epoch": 0.2700374911338535, "grad_norm": 0.41944214701652527, "learning_rate": 9.962694329453418e-06, "loss": 0.0174, "step": 15990 }, { "epoch": 0.27020637011517545, "grad_norm": 0.8161709308624268, "learning_rate": 9.962514421523178e-06, "loss": 0.0162, "step": 16000 }, { "epoch": 0.27037524909649746, "grad_norm": 0.572853684425354, "learning_rate": 9.962334082461843e-06, "loss": 0.0168, "step": 16010 }, { "epoch": 0.2705441280778194, "grad_norm": 0.5249279141426086, "learning_rate": 9.962153312285083e-06, "loss": 0.0209, "step": 16020 }, { "epoch": 0.2707130070591414, "grad_norm": 0.2929364740848541, "learning_rate": 9.961972111008605e-06, "loss": 0.0159, "step": 16030 }, { "epoch": 0.2708818860404634, "grad_norm": 0.5418990850448608, "learning_rate": 9.961790478648145e-06, "loss": 0.0194, "step": 16040 }, { "epoch": 0.2710507650217854, "grad_norm": 0.49187004566192627, "learning_rate": 9.961608415219488e-06, "loss": 0.0203, "step": 16050 }, { "epoch": 0.2712196440031074, "grad_norm": 0.5167809128761292, "learning_rate": 9.961425920738452e-06, "loss": 0.0195, "step": 16060 }, { "epoch": 0.27138852298442934, "grad_norm": 0.6556298136711121, "learning_rate": 9.961242995220886e-06, "loss": 0.0271, "step": 16070 }, { "epoch": 0.27155740196575134, "grad_norm": 0.5527656078338623, "learning_rate": 9.961059638682686e-06, "loss": 0.0188, "step": 16080 }, { "epoch": 0.27172628094707335, "grad_norm": 0.6550527811050415, "learning_rate": 9.960875851139783e-06, "loss": 0.023, "step": 16090 }, { "epoch": 0.2718951599283953, "grad_norm": 0.5358278751373291, "learning_rate": 9.960691632608142e-06, "loss": 0.0213, "step": 16100 }, { "epoch": 0.2720640389097173, "grad_norm": 0.9688189625740051, "learning_rate": 9.960506983103766e-06, "loss": 0.0264, "step": 16110 }, { "epoch": 0.27223291789103926, "grad_norm": 0.39056456089019775, "learning_rate": 9.960321902642697e-06, "loss": 0.0295, "step": 16120 }, { "epoch": 0.27240179687236127, "grad_norm": 0.3406197726726532, "learning_rate": 9.960136391241018e-06, "loss": 0.0299, "step": 16130 }, { "epoch": 0.2725706758536833, "grad_norm": 0.5045730471611023, "learning_rate": 9.959950448914844e-06, "loss": 0.0253, "step": 16140 }, { "epoch": 0.2727395548350052, "grad_norm": 0.7192314863204956, "learning_rate": 9.959764075680328e-06, "loss": 0.024, "step": 16150 }, { "epoch": 0.27290843381632723, "grad_norm": 0.5880616307258606, "learning_rate": 9.959577271553663e-06, "loss": 0.0239, "step": 16160 }, { "epoch": 0.2730773127976492, "grad_norm": 0.40849769115448, "learning_rate": 9.959390036551075e-06, "loss": 0.0184, "step": 16170 }, { "epoch": 0.2732461917789712, "grad_norm": 0.5486689209938049, "learning_rate": 9.959202370688835e-06, "loss": 0.0174, "step": 16180 }, { "epoch": 0.2734150707602932, "grad_norm": 0.40428295731544495, "learning_rate": 9.959014273983245e-06, "loss": 0.0145, "step": 16190 }, { "epoch": 0.27358394974161515, "grad_norm": 0.3321518898010254, "learning_rate": 9.958825746450646e-06, "loss": 0.0138, "step": 16200 }, { "epoch": 0.27375282872293716, "grad_norm": 0.6309951543807983, "learning_rate": 9.958636788107416e-06, "loss": 0.0155, "step": 16210 }, { "epoch": 0.2739217077042591, "grad_norm": 0.717494010925293, "learning_rate": 9.958447398969975e-06, "loss": 0.023, "step": 16220 }, { "epoch": 0.2740905866855811, "grad_norm": 0.4899601638317108, "learning_rate": 9.958257579054772e-06, "loss": 0.0213, "step": 16230 }, { "epoch": 0.2742594656669031, "grad_norm": 1.075669765472412, "learning_rate": 9.9580673283783e-06, "loss": 0.0305, "step": 16240 }, { "epoch": 0.2744283446482251, "grad_norm": 0.35664352774620056, "learning_rate": 9.957876646957088e-06, "loss": 0.0204, "step": 16250 }, { "epoch": 0.2745972236295471, "grad_norm": 0.1813022494316101, "learning_rate": 9.957685534807702e-06, "loss": 0.0233, "step": 16260 }, { "epoch": 0.27476610261086903, "grad_norm": 0.7125722765922546, "learning_rate": 9.957493991946746e-06, "loss": 0.0253, "step": 16270 }, { "epoch": 0.27493498159219104, "grad_norm": 0.5755765438079834, "learning_rate": 9.957302018390858e-06, "loss": 0.0155, "step": 16280 }, { "epoch": 0.27510386057351305, "grad_norm": 0.39908039569854736, "learning_rate": 9.957109614156718e-06, "loss": 0.0182, "step": 16290 }, { "epoch": 0.275272739554835, "grad_norm": 0.487160325050354, "learning_rate": 9.956916779261041e-06, "loss": 0.0228, "step": 16300 }, { "epoch": 0.275441618536157, "grad_norm": 0.3758297264575958, "learning_rate": 9.956723513720584e-06, "loss": 0.0209, "step": 16310 }, { "epoch": 0.27561049751747896, "grad_norm": 0.26998332142829895, "learning_rate": 9.95652981755213e-06, "loss": 0.0156, "step": 16320 }, { "epoch": 0.27577937649880097, "grad_norm": 0.5673339366912842, "learning_rate": 9.956335690772511e-06, "loss": 0.0239, "step": 16330 }, { "epoch": 0.2759482554801229, "grad_norm": 0.5530702471733093, "learning_rate": 9.956141133398594e-06, "loss": 0.0222, "step": 16340 }, { "epoch": 0.2761171344614449, "grad_norm": 0.5020323395729065, "learning_rate": 9.95594614544728e-06, "loss": 0.0147, "step": 16350 }, { "epoch": 0.27628601344276693, "grad_norm": 0.4245317876338959, "learning_rate": 9.955750726935507e-06, "loss": 0.0182, "step": 16360 }, { "epoch": 0.2764548924240889, "grad_norm": 0.36643654108047485, "learning_rate": 9.955554877880255e-06, "loss": 0.0206, "step": 16370 }, { "epoch": 0.2766237714054109, "grad_norm": 0.39701759815216064, "learning_rate": 9.955358598298537e-06, "loss": 0.0172, "step": 16380 }, { "epoch": 0.27679265038673284, "grad_norm": 0.25150078535079956, "learning_rate": 9.95516188820741e-06, "loss": 0.0108, "step": 16390 }, { "epoch": 0.27696152936805485, "grad_norm": 0.6173630952835083, "learning_rate": 9.954964747623956e-06, "loss": 0.0196, "step": 16400 }, { "epoch": 0.27713040834937686, "grad_norm": 0.4626672565937042, "learning_rate": 9.954767176565307e-06, "loss": 0.0179, "step": 16410 }, { "epoch": 0.2772992873306988, "grad_norm": 0.7373744249343872, "learning_rate": 9.95456917504863e-06, "loss": 0.0205, "step": 16420 }, { "epoch": 0.2774681663120208, "grad_norm": 0.5412279963493347, "learning_rate": 9.954370743091119e-06, "loss": 0.0202, "step": 16430 }, { "epoch": 0.27763704529334277, "grad_norm": 0.4503740668296814, "learning_rate": 9.954171880710021e-06, "loss": 0.0206, "step": 16440 }, { "epoch": 0.2778059242746648, "grad_norm": 0.4846859276294708, "learning_rate": 9.953972587922609e-06, "loss": 0.0264, "step": 16450 }, { "epoch": 0.2779748032559868, "grad_norm": 0.3509608209133148, "learning_rate": 9.953772864746198e-06, "loss": 0.014, "step": 16460 }, { "epoch": 0.27814368223730873, "grad_norm": 0.733658492565155, "learning_rate": 9.953572711198138e-06, "loss": 0.0231, "step": 16470 }, { "epoch": 0.27831256121863074, "grad_norm": 0.3196578323841095, "learning_rate": 9.95337212729582e-06, "loss": 0.016, "step": 16480 }, { "epoch": 0.2784814401999527, "grad_norm": 0.6395419836044312, "learning_rate": 9.95317111305667e-06, "loss": 0.018, "step": 16490 }, { "epoch": 0.2786503191812747, "grad_norm": 0.38936924934387207, "learning_rate": 9.95296966849815e-06, "loss": 0.0272, "step": 16500 }, { "epoch": 0.2788191981625967, "grad_norm": 0.33032235503196716, "learning_rate": 9.952767793637761e-06, "loss": 0.0215, "step": 16510 }, { "epoch": 0.27898807714391866, "grad_norm": 0.38854411244392395, "learning_rate": 9.952565488493043e-06, "loss": 0.0204, "step": 16520 }, { "epoch": 0.27915695612524066, "grad_norm": 0.8301010131835938, "learning_rate": 9.95236275308157e-06, "loss": 0.0212, "step": 16530 }, { "epoch": 0.2793258351065626, "grad_norm": 0.4588996469974518, "learning_rate": 9.952159587420956e-06, "loss": 0.0161, "step": 16540 }, { "epoch": 0.2794947140878846, "grad_norm": 0.6009469628334045, "learning_rate": 9.951955991528853e-06, "loss": 0.022, "step": 16550 }, { "epoch": 0.27966359306920663, "grad_norm": 0.33491480350494385, "learning_rate": 9.951751965422947e-06, "loss": 0.0173, "step": 16560 }, { "epoch": 0.2798324720505286, "grad_norm": 0.38250643014907837, "learning_rate": 9.951547509120965e-06, "loss": 0.0196, "step": 16570 }, { "epoch": 0.2800013510318506, "grad_norm": 0.37367552518844604, "learning_rate": 9.951342622640667e-06, "loss": 0.0197, "step": 16580 }, { "epoch": 0.28017023001317254, "grad_norm": 0.33073481917381287, "learning_rate": 9.951137305999854e-06, "loss": 0.0166, "step": 16590 }, { "epoch": 0.28033910899449455, "grad_norm": 0.508078932762146, "learning_rate": 9.950931559216366e-06, "loss": 0.0186, "step": 16600 }, { "epoch": 0.28050798797581655, "grad_norm": 0.662084698677063, "learning_rate": 9.950725382308075e-06, "loss": 0.0209, "step": 16610 }, { "epoch": 0.2806768669571385, "grad_norm": 0.42937126755714417, "learning_rate": 9.950518775292894e-06, "loss": 0.0252, "step": 16620 }, { "epoch": 0.2808457459384605, "grad_norm": 0.4727373421192169, "learning_rate": 9.950311738188773e-06, "loss": 0.0196, "step": 16630 }, { "epoch": 0.28101462491978246, "grad_norm": 0.7033623456954956, "learning_rate": 9.950104271013696e-06, "loss": 0.0193, "step": 16640 }, { "epoch": 0.28118350390110447, "grad_norm": 0.32233497500419617, "learning_rate": 9.949896373785692e-06, "loss": 0.0181, "step": 16650 }, { "epoch": 0.2813523828824265, "grad_norm": 0.6565945148468018, "learning_rate": 9.94968804652282e-06, "loss": 0.0186, "step": 16660 }, { "epoch": 0.28152126186374843, "grad_norm": 0.25558826327323914, "learning_rate": 9.949479289243177e-06, "loss": 0.0196, "step": 16670 }, { "epoch": 0.28169014084507044, "grad_norm": 0.2668958306312561, "learning_rate": 9.949270101964902e-06, "loss": 0.0131, "step": 16680 }, { "epoch": 0.2818590198263924, "grad_norm": 0.4997486472129822, "learning_rate": 9.94906048470617e-06, "loss": 0.0288, "step": 16690 }, { "epoch": 0.2820278988077144, "grad_norm": 0.41545507311820984, "learning_rate": 9.94885043748519e-06, "loss": 0.0231, "step": 16700 }, { "epoch": 0.2821967777890364, "grad_norm": 0.6680389642715454, "learning_rate": 9.94863996032021e-06, "loss": 0.0164, "step": 16710 }, { "epoch": 0.28236565677035835, "grad_norm": 0.8060377240180969, "learning_rate": 9.948429053229516e-06, "loss": 0.0216, "step": 16720 }, { "epoch": 0.28253453575168036, "grad_norm": 0.7226446866989136, "learning_rate": 9.94821771623143e-06, "loss": 0.0248, "step": 16730 }, { "epoch": 0.2827034147330023, "grad_norm": 0.4156365394592285, "learning_rate": 9.948005949344314e-06, "loss": 0.0165, "step": 16740 }, { "epoch": 0.2828722937143243, "grad_norm": 0.46510371565818787, "learning_rate": 9.947793752586567e-06, "loss": 0.0171, "step": 16750 }, { "epoch": 0.2830411726956463, "grad_norm": 0.5080302357673645, "learning_rate": 9.947581125976624e-06, "loss": 0.0163, "step": 16760 }, { "epoch": 0.2832100516769683, "grad_norm": 0.21218693256378174, "learning_rate": 9.947368069532955e-06, "loss": 0.0209, "step": 16770 }, { "epoch": 0.2833789306582903, "grad_norm": 0.6322805881500244, "learning_rate": 9.94715458327407e-06, "loss": 0.0198, "step": 16780 }, { "epoch": 0.28354780963961224, "grad_norm": 0.4281499981880188, "learning_rate": 9.946940667218517e-06, "loss": 0.014, "step": 16790 }, { "epoch": 0.28371668862093424, "grad_norm": 0.4742284417152405, "learning_rate": 9.946726321384882e-06, "loss": 0.0191, "step": 16800 }, { "epoch": 0.28388556760225625, "grad_norm": 0.27764782309532166, "learning_rate": 9.946511545791785e-06, "loss": 0.0241, "step": 16810 }, { "epoch": 0.2840544465835782, "grad_norm": 0.7079959511756897, "learning_rate": 9.946296340457885e-06, "loss": 0.0272, "step": 16820 }, { "epoch": 0.2842233255649002, "grad_norm": 0.7235041260719299, "learning_rate": 9.946080705401881e-06, "loss": 0.0262, "step": 16830 }, { "epoch": 0.28439220454622216, "grad_norm": 0.6466995477676392, "learning_rate": 9.945864640642505e-06, "loss": 0.019, "step": 16840 }, { "epoch": 0.28456108352754417, "grad_norm": 0.6293975710868835, "learning_rate": 9.945648146198525e-06, "loss": 0.0142, "step": 16850 }, { "epoch": 0.2847299625088661, "grad_norm": 0.325697660446167, "learning_rate": 9.945431222088755e-06, "loss": 0.0138, "step": 16860 }, { "epoch": 0.2848988414901881, "grad_norm": 0.505759596824646, "learning_rate": 9.94521386833204e-06, "loss": 0.0244, "step": 16870 }, { "epoch": 0.28506772047151013, "grad_norm": 0.5668811202049255, "learning_rate": 9.94499608494726e-06, "loss": 0.0163, "step": 16880 }, { "epoch": 0.2852365994528321, "grad_norm": 0.5341402888298035, "learning_rate": 9.944777871953338e-06, "loss": 0.0182, "step": 16890 }, { "epoch": 0.2854054784341541, "grad_norm": 0.504335343837738, "learning_rate": 9.94455922936923e-06, "loss": 0.0236, "step": 16900 }, { "epoch": 0.28557435741547604, "grad_norm": 0.4566879868507385, "learning_rate": 9.944340157213931e-06, "loss": 0.0174, "step": 16910 }, { "epoch": 0.28574323639679805, "grad_norm": 0.8313616514205933, "learning_rate": 9.944120655506477e-06, "loss": 0.0206, "step": 16920 }, { "epoch": 0.28591211537812006, "grad_norm": 0.8023596405982971, "learning_rate": 9.943900724265936e-06, "loss": 0.0252, "step": 16930 }, { "epoch": 0.286080994359442, "grad_norm": 0.4826772212982178, "learning_rate": 9.943680363511412e-06, "loss": 0.0184, "step": 16940 }, { "epoch": 0.286249873340764, "grad_norm": 0.3287493884563446, "learning_rate": 9.943459573262055e-06, "loss": 0.0264, "step": 16950 }, { "epoch": 0.28641875232208597, "grad_norm": 0.5584782361984253, "learning_rate": 9.943238353537041e-06, "loss": 0.0273, "step": 16960 }, { "epoch": 0.286587631303408, "grad_norm": 0.48604336380958557, "learning_rate": 9.943016704355592e-06, "loss": 0.0181, "step": 16970 }, { "epoch": 0.28675651028473, "grad_norm": 0.3268612325191498, "learning_rate": 9.942794625736965e-06, "loss": 0.0155, "step": 16980 }, { "epoch": 0.28692538926605193, "grad_norm": 0.5803906917572021, "learning_rate": 9.942572117700451e-06, "loss": 0.0253, "step": 16990 }, { "epoch": 0.28709426824737394, "grad_norm": 0.4128732979297638, "learning_rate": 9.942349180265384e-06, "loss": 0.0178, "step": 17000 }, { "epoch": 0.2872631472286959, "grad_norm": 0.8912205100059509, "learning_rate": 9.942125813451131e-06, "loss": 0.0267, "step": 17010 }, { "epoch": 0.2874320262100179, "grad_norm": 0.3962225317955017, "learning_rate": 9.941902017277096e-06, "loss": 0.0216, "step": 17020 }, { "epoch": 0.2876009051913399, "grad_norm": 0.330261766910553, "learning_rate": 9.941677791762725e-06, "loss": 0.0149, "step": 17030 }, { "epoch": 0.28776978417266186, "grad_norm": 0.35532042384147644, "learning_rate": 9.941453136927495e-06, "loss": 0.0131, "step": 17040 }, { "epoch": 0.28793866315398386, "grad_norm": 0.483746200799942, "learning_rate": 9.941228052790926e-06, "loss": 0.0189, "step": 17050 }, { "epoch": 0.2881075421353058, "grad_norm": 0.4206216633319855, "learning_rate": 9.94100253937257e-06, "loss": 0.0182, "step": 17060 }, { "epoch": 0.2882764211166278, "grad_norm": 0.6069573760032654, "learning_rate": 9.940776596692022e-06, "loss": 0.017, "step": 17070 }, { "epoch": 0.28844530009794983, "grad_norm": 0.4840218424797058, "learning_rate": 9.94055022476891e-06, "loss": 0.0206, "step": 17080 }, { "epoch": 0.2886141790792718, "grad_norm": 0.47343623638153076, "learning_rate": 9.9403234236229e-06, "loss": 0.0259, "step": 17090 }, { "epoch": 0.2887830580605938, "grad_norm": 0.1708446890115738, "learning_rate": 9.940096193273697e-06, "loss": 0.0209, "step": 17100 }, { "epoch": 0.28895193704191574, "grad_norm": 0.3193197548389435, "learning_rate": 9.939868533741042e-06, "loss": 0.0183, "step": 17110 }, { "epoch": 0.28912081602323775, "grad_norm": 0.35388264060020447, "learning_rate": 9.939640445044712e-06, "loss": 0.0218, "step": 17120 }, { "epoch": 0.28928969500455975, "grad_norm": 0.7934678196907043, "learning_rate": 9.939411927204526e-06, "loss": 0.0167, "step": 17130 }, { "epoch": 0.2894585739858817, "grad_norm": 0.4737137258052826, "learning_rate": 9.939182980240334e-06, "loss": 0.0179, "step": 17140 }, { "epoch": 0.2896274529672037, "grad_norm": 0.7701609134674072, "learning_rate": 9.938953604172027e-06, "loss": 0.0188, "step": 17150 }, { "epoch": 0.28979633194852567, "grad_norm": 0.6444275379180908, "learning_rate": 9.938723799019533e-06, "loss": 0.0166, "step": 17160 }, { "epoch": 0.28996521092984767, "grad_norm": 0.6839781403541565, "learning_rate": 9.93849356480282e-06, "loss": 0.0161, "step": 17170 }, { "epoch": 0.2901340899111697, "grad_norm": 0.4692671298980713, "learning_rate": 9.938262901541884e-06, "loss": 0.0194, "step": 17180 }, { "epoch": 0.29030296889249163, "grad_norm": 0.8728505969047546, "learning_rate": 9.938031809256769e-06, "loss": 0.0205, "step": 17190 }, { "epoch": 0.29047184787381364, "grad_norm": 0.37854933738708496, "learning_rate": 9.937800287967551e-06, "loss": 0.0161, "step": 17200 }, { "epoch": 0.2906407268551356, "grad_norm": 0.429860919713974, "learning_rate": 9.937568337694344e-06, "loss": 0.0165, "step": 17210 }, { "epoch": 0.2908096058364576, "grad_norm": 0.3469194173812866, "learning_rate": 9.937335958457297e-06, "loss": 0.0292, "step": 17220 }, { "epoch": 0.2909784848177796, "grad_norm": 0.5373216271400452, "learning_rate": 9.937103150276601e-06, "loss": 0.0173, "step": 17230 }, { "epoch": 0.29114736379910156, "grad_norm": 0.2766892910003662, "learning_rate": 9.936869913172481e-06, "loss": 0.0209, "step": 17240 }, { "epoch": 0.29131624278042356, "grad_norm": 0.24636512994766235, "learning_rate": 9.9366362471652e-06, "loss": 0.0175, "step": 17250 }, { "epoch": 0.2914851217617455, "grad_norm": 0.3717285692691803, "learning_rate": 9.93640215227506e-06, "loss": 0.0189, "step": 17260 }, { "epoch": 0.2916540007430675, "grad_norm": 0.44437047839164734, "learning_rate": 9.936167628522394e-06, "loss": 0.0148, "step": 17270 }, { "epoch": 0.29182287972438953, "grad_norm": 0.312215119600296, "learning_rate": 9.935932675927582e-06, "loss": 0.0251, "step": 17280 }, { "epoch": 0.2919917587057115, "grad_norm": 0.38122665882110596, "learning_rate": 9.935697294511034e-06, "loss": 0.0192, "step": 17290 }, { "epoch": 0.2921606376870335, "grad_norm": 0.4787892997264862, "learning_rate": 9.935461484293198e-06, "loss": 0.0173, "step": 17300 }, { "epoch": 0.29232951666835544, "grad_norm": 0.4442560076713562, "learning_rate": 9.935225245294563e-06, "loss": 0.0142, "step": 17310 }, { "epoch": 0.29249839564967745, "grad_norm": 0.5321149826049805, "learning_rate": 9.934988577535652e-06, "loss": 0.015, "step": 17320 }, { "epoch": 0.29266727463099945, "grad_norm": 0.3176901936531067, "learning_rate": 9.934751481037025e-06, "loss": 0.0143, "step": 17330 }, { "epoch": 0.2928361536123214, "grad_norm": 0.7992257475852966, "learning_rate": 9.934513955819283e-06, "loss": 0.0127, "step": 17340 }, { "epoch": 0.2930050325936434, "grad_norm": 0.39807987213134766, "learning_rate": 9.934276001903058e-06, "loss": 0.0219, "step": 17350 }, { "epoch": 0.29317391157496536, "grad_norm": 0.4414093494415283, "learning_rate": 9.934037619309025e-06, "loss": 0.0288, "step": 17360 }, { "epoch": 0.29334279055628737, "grad_norm": 0.20186300575733185, "learning_rate": 9.933798808057894e-06, "loss": 0.0174, "step": 17370 }, { "epoch": 0.2935116695376094, "grad_norm": 0.5560017824172974, "learning_rate": 9.933559568170413e-06, "loss": 0.0211, "step": 17380 }, { "epoch": 0.29368054851893133, "grad_norm": 0.4020896255970001, "learning_rate": 9.933319899667367e-06, "loss": 0.0229, "step": 17390 }, { "epoch": 0.29384942750025334, "grad_norm": 0.4187105596065521, "learning_rate": 9.933079802569575e-06, "loss": 0.0197, "step": 17400 }, { "epoch": 0.2940183064815753, "grad_norm": 0.5327821373939514, "learning_rate": 9.932839276897897e-06, "loss": 0.0184, "step": 17410 }, { "epoch": 0.2941871854628973, "grad_norm": 0.26920777559280396, "learning_rate": 9.93259832267323e-06, "loss": 0.0149, "step": 17420 }, { "epoch": 0.29435606444421925, "grad_norm": 0.23836274445056915, "learning_rate": 9.932356939916508e-06, "loss": 0.0227, "step": 17430 }, { "epoch": 0.29452494342554125, "grad_norm": 0.44489356875419617, "learning_rate": 9.932115128648703e-06, "loss": 0.0347, "step": 17440 }, { "epoch": 0.29469382240686326, "grad_norm": 0.19159740209579468, "learning_rate": 9.931872888890819e-06, "loss": 0.0176, "step": 17450 }, { "epoch": 0.2948627013881852, "grad_norm": 0.18655534088611603, "learning_rate": 9.931630220663904e-06, "loss": 0.0184, "step": 17460 }, { "epoch": 0.2950315803695072, "grad_norm": 0.48356756567955017, "learning_rate": 9.931387123989037e-06, "loss": 0.0231, "step": 17470 }, { "epoch": 0.29520045935082917, "grad_norm": 0.6865365505218506, "learning_rate": 9.931143598887345e-06, "loss": 0.0161, "step": 17480 }, { "epoch": 0.2953693383321512, "grad_norm": 0.3123454451560974, "learning_rate": 9.930899645379979e-06, "loss": 0.0205, "step": 17490 }, { "epoch": 0.2955382173134732, "grad_norm": 0.5933753252029419, "learning_rate": 9.930655263488133e-06, "loss": 0.0169, "step": 17500 }, { "epoch": 0.29570709629479514, "grad_norm": 0.4239480197429657, "learning_rate": 9.930410453233039e-06, "loss": 0.0147, "step": 17510 }, { "epoch": 0.29587597527611714, "grad_norm": 0.4082532823085785, "learning_rate": 9.930165214635968e-06, "loss": 0.0148, "step": 17520 }, { "epoch": 0.2960448542574391, "grad_norm": 0.2968171238899231, "learning_rate": 9.929919547718222e-06, "loss": 0.0151, "step": 17530 }, { "epoch": 0.2962137332387611, "grad_norm": 1.0294702053070068, "learning_rate": 9.92967345250115e-06, "loss": 0.018, "step": 17540 }, { "epoch": 0.2963826122200831, "grad_norm": 0.5531472563743591, "learning_rate": 9.929426929006124e-06, "loss": 0.0211, "step": 17550 }, { "epoch": 0.29655149120140506, "grad_norm": 0.35614311695098877, "learning_rate": 9.929179977254567e-06, "loss": 0.02, "step": 17560 }, { "epoch": 0.29672037018272707, "grad_norm": 0.5983462333679199, "learning_rate": 9.92893259726793e-06, "loss": 0.0231, "step": 17570 }, { "epoch": 0.296889249164049, "grad_norm": 0.25889769196510315, "learning_rate": 9.92868478906771e-06, "loss": 0.0228, "step": 17580 }, { "epoch": 0.297058128145371, "grad_norm": 0.6001752018928528, "learning_rate": 9.92843655267543e-06, "loss": 0.0166, "step": 17590 }, { "epoch": 0.29722700712669303, "grad_norm": 0.49652528762817383, "learning_rate": 9.92818788811266e-06, "loss": 0.0244, "step": 17600 }, { "epoch": 0.297395886108015, "grad_norm": 0.4355519115924835, "learning_rate": 9.927938795401003e-06, "loss": 0.0186, "step": 17610 }, { "epoch": 0.297564765089337, "grad_norm": 0.32691168785095215, "learning_rate": 9.927689274562096e-06, "loss": 0.013, "step": 17620 }, { "epoch": 0.29773364407065894, "grad_norm": 0.43549874424934387, "learning_rate": 9.927439325617621e-06, "loss": 0.0169, "step": 17630 }, { "epoch": 0.29790252305198095, "grad_norm": 0.42445969581604004, "learning_rate": 9.927188948589293e-06, "loss": 0.0149, "step": 17640 }, { "epoch": 0.29807140203330296, "grad_norm": 0.39481642842292786, "learning_rate": 9.92693814349886e-06, "loss": 0.0232, "step": 17650 }, { "epoch": 0.2982402810146249, "grad_norm": 0.62065589427948, "learning_rate": 9.926686910368116e-06, "loss": 0.0172, "step": 17660 }, { "epoch": 0.2984091599959469, "grad_norm": 0.4284171760082245, "learning_rate": 9.926435249218885e-06, "loss": 0.0141, "step": 17670 }, { "epoch": 0.29857803897726887, "grad_norm": 0.48390981554985046, "learning_rate": 9.926183160073028e-06, "loss": 0.0142, "step": 17680 }, { "epoch": 0.2987469179585909, "grad_norm": 0.6760067939758301, "learning_rate": 9.925930642952452e-06, "loss": 0.0141, "step": 17690 }, { "epoch": 0.2989157969399129, "grad_norm": 0.46550559997558594, "learning_rate": 9.925677697879093e-06, "loss": 0.0259, "step": 17700 }, { "epoch": 0.29908467592123483, "grad_norm": 0.4758705198764801, "learning_rate": 9.925424324874924e-06, "loss": 0.0205, "step": 17710 }, { "epoch": 0.29925355490255684, "grad_norm": 0.3053137958049774, "learning_rate": 9.92517052396196e-06, "loss": 0.0153, "step": 17720 }, { "epoch": 0.2994224338838788, "grad_norm": 0.4718374013900757, "learning_rate": 9.924916295162248e-06, "loss": 0.0231, "step": 17730 }, { "epoch": 0.2995913128652008, "grad_norm": 0.425984263420105, "learning_rate": 9.924661638497877e-06, "loss": 0.0275, "step": 17740 }, { "epoch": 0.2997601918465228, "grad_norm": 0.33221670985221863, "learning_rate": 9.924406553990968e-06, "loss": 0.0187, "step": 17750 }, { "epoch": 0.29992907082784476, "grad_norm": 0.3464619219303131, "learning_rate": 9.924151041663687e-06, "loss": 0.0198, "step": 17760 }, { "epoch": 0.30009794980916676, "grad_norm": 0.2972230017185211, "learning_rate": 9.923895101538228e-06, "loss": 0.0157, "step": 17770 }, { "epoch": 0.3002668287904887, "grad_norm": 0.6621490120887756, "learning_rate": 9.923638733636828e-06, "loss": 0.0167, "step": 17780 }, { "epoch": 0.3004357077718107, "grad_norm": 0.3329199552536011, "learning_rate": 9.92338193798176e-06, "loss": 0.0123, "step": 17790 }, { "epoch": 0.30060458675313273, "grad_norm": 0.21089676022529602, "learning_rate": 9.923124714595333e-06, "loss": 0.0128, "step": 17800 }, { "epoch": 0.3007734657344547, "grad_norm": 0.7872081995010376, "learning_rate": 9.922867063499894e-06, "loss": 0.0158, "step": 17810 }, { "epoch": 0.3009423447157767, "grad_norm": 0.6057849526405334, "learning_rate": 9.92260898471783e-06, "loss": 0.0161, "step": 17820 }, { "epoch": 0.30111122369709864, "grad_norm": 0.5100249648094177, "learning_rate": 9.922350478271558e-06, "loss": 0.0172, "step": 17830 }, { "epoch": 0.30128010267842065, "grad_norm": 0.39213794469833374, "learning_rate": 9.922091544183537e-06, "loss": 0.0172, "step": 17840 }, { "epoch": 0.30144898165974265, "grad_norm": 0.4070068597793579, "learning_rate": 9.921832182476263e-06, "loss": 0.025, "step": 17850 }, { "epoch": 0.3016178606410646, "grad_norm": 0.34352415800094604, "learning_rate": 9.921572393172272e-06, "loss": 0.011, "step": 17860 }, { "epoch": 0.3017867396223866, "grad_norm": 0.43593162298202515, "learning_rate": 9.921312176294128e-06, "loss": 0.0188, "step": 17870 }, { "epoch": 0.30195561860370856, "grad_norm": 0.5427636504173279, "learning_rate": 9.921051531864446e-06, "loss": 0.0158, "step": 17880 }, { "epoch": 0.30212449758503057, "grad_norm": 0.6037964224815369, "learning_rate": 9.92079045990586e-06, "loss": 0.0206, "step": 17890 }, { "epoch": 0.3022933765663526, "grad_norm": 0.33470791578292847, "learning_rate": 9.920528960441061e-06, "loss": 0.0162, "step": 17900 }, { "epoch": 0.30246225554767453, "grad_norm": 0.458097368478775, "learning_rate": 9.92026703349276e-06, "loss": 0.0137, "step": 17910 }, { "epoch": 0.30263113452899654, "grad_norm": 0.43014979362487793, "learning_rate": 9.920004679083716e-06, "loss": 0.0221, "step": 17920 }, { "epoch": 0.3028000135103185, "grad_norm": 0.5025340914726257, "learning_rate": 9.919741897236724e-06, "loss": 0.017, "step": 17930 }, { "epoch": 0.3029688924916405, "grad_norm": 0.5514077544212341, "learning_rate": 9.919478687974609e-06, "loss": 0.0201, "step": 17940 }, { "epoch": 0.3031377714729625, "grad_norm": 0.5257657170295715, "learning_rate": 9.91921505132024e-06, "loss": 0.0205, "step": 17950 }, { "epoch": 0.30330665045428445, "grad_norm": 0.4434436559677124, "learning_rate": 9.91895098729652e-06, "loss": 0.02, "step": 17960 }, { "epoch": 0.30347552943560646, "grad_norm": 0.5572549104690552, "learning_rate": 9.918686495926393e-06, "loss": 0.0226, "step": 17970 }, { "epoch": 0.3036444084169284, "grad_norm": 0.3814947009086609, "learning_rate": 9.918421577232837e-06, "loss": 0.0185, "step": 17980 }, { "epoch": 0.3038132873982504, "grad_norm": 0.613476037979126, "learning_rate": 9.918156231238865e-06, "loss": 0.0231, "step": 17990 }, { "epoch": 0.30398216637957237, "grad_norm": 0.4853728711605072, "learning_rate": 9.917890457967531e-06, "loss": 0.0202, "step": 18000 }, { "epoch": 0.3041510453608944, "grad_norm": 0.2768503427505493, "learning_rate": 9.917624257441924e-06, "loss": 0.0164, "step": 18010 }, { "epoch": 0.3043199243422164, "grad_norm": 0.26165032386779785, "learning_rate": 9.917357629685174e-06, "loss": 0.0224, "step": 18020 }, { "epoch": 0.30448880332353834, "grad_norm": 0.4276423752307892, "learning_rate": 9.91709057472044e-06, "loss": 0.0212, "step": 18030 }, { "epoch": 0.30465768230486034, "grad_norm": 0.4715642035007477, "learning_rate": 9.916823092570927e-06, "loss": 0.0196, "step": 18040 }, { "epoch": 0.3048265612861823, "grad_norm": 0.6050187349319458, "learning_rate": 9.916555183259872e-06, "loss": 0.0196, "step": 18050 }, { "epoch": 0.3049954402675043, "grad_norm": 1.035692572593689, "learning_rate": 9.916286846810549e-06, "loss": 0.0176, "step": 18060 }, { "epoch": 0.3051643192488263, "grad_norm": 0.6224516034126282, "learning_rate": 9.916018083246272e-06, "loss": 0.0161, "step": 18070 }, { "epoch": 0.30533319823014826, "grad_norm": 0.2576322555541992, "learning_rate": 9.91574889259039e-06, "loss": 0.0176, "step": 18080 }, { "epoch": 0.30550207721147027, "grad_norm": 0.06671546399593353, "learning_rate": 9.915479274866289e-06, "loss": 0.0127, "step": 18090 }, { "epoch": 0.3056709561927922, "grad_norm": 0.3171045184135437, "learning_rate": 9.915209230097394e-06, "loss": 0.0166, "step": 18100 }, { "epoch": 0.3058398351741142, "grad_norm": 0.7173612117767334, "learning_rate": 9.914938758307165e-06, "loss": 0.0173, "step": 18110 }, { "epoch": 0.30600871415543623, "grad_norm": 0.3250857889652252, "learning_rate": 9.9146678595191e-06, "loss": 0.0198, "step": 18120 }, { "epoch": 0.3061775931367582, "grad_norm": 0.6886175274848938, "learning_rate": 9.914396533756734e-06, "loss": 0.0149, "step": 18130 }, { "epoch": 0.3063464721180802, "grad_norm": 0.5037250518798828, "learning_rate": 9.914124781043642e-06, "loss": 0.0224, "step": 18140 }, { "epoch": 0.30651535109940214, "grad_norm": 0.3597768247127533, "learning_rate": 9.913852601403427e-06, "loss": 0.0162, "step": 18150 }, { "epoch": 0.30668423008072415, "grad_norm": 0.44987696409225464, "learning_rate": 9.91357999485974e-06, "loss": 0.0178, "step": 18160 }, { "epoch": 0.30685310906204616, "grad_norm": 0.5935501456260681, "learning_rate": 9.913306961436262e-06, "loss": 0.0174, "step": 18170 }, { "epoch": 0.3070219880433681, "grad_norm": 0.5068091154098511, "learning_rate": 9.913033501156716e-06, "loss": 0.0136, "step": 18180 }, { "epoch": 0.3071908670246901, "grad_norm": 0.5425840616226196, "learning_rate": 9.912759614044858e-06, "loss": 0.019, "step": 18190 }, { "epoch": 0.30735974600601207, "grad_norm": 0.12163656204938889, "learning_rate": 9.912485300124484e-06, "loss": 0.0145, "step": 18200 }, { "epoch": 0.3075286249873341, "grad_norm": 0.41326960921287537, "learning_rate": 9.912210559419422e-06, "loss": 0.0273, "step": 18210 }, { "epoch": 0.3076975039686561, "grad_norm": 0.37264105677604675, "learning_rate": 9.911935391953544e-06, "loss": 0.0184, "step": 18220 }, { "epoch": 0.30786638294997803, "grad_norm": 0.6312577128410339, "learning_rate": 9.911659797750756e-06, "loss": 0.0194, "step": 18230 }, { "epoch": 0.30803526193130004, "grad_norm": 0.29896748065948486, "learning_rate": 9.911383776835e-06, "loss": 0.0192, "step": 18240 }, { "epoch": 0.308204140912622, "grad_norm": 0.3889026343822479, "learning_rate": 9.911107329230256e-06, "loss": 0.0162, "step": 18250 }, { "epoch": 0.308373019893944, "grad_norm": 0.44100889563560486, "learning_rate": 9.910830454960543e-06, "loss": 0.0209, "step": 18260 }, { "epoch": 0.308541898875266, "grad_norm": 0.37389400601387024, "learning_rate": 9.910553154049912e-06, "loss": 0.0183, "step": 18270 }, { "epoch": 0.30871077785658796, "grad_norm": 0.47064727544784546, "learning_rate": 9.910275426522455e-06, "loss": 0.0212, "step": 18280 }, { "epoch": 0.30887965683790997, "grad_norm": 0.763359785079956, "learning_rate": 9.909997272402301e-06, "loss": 0.0204, "step": 18290 }, { "epoch": 0.3090485358192319, "grad_norm": 0.5857645273208618, "learning_rate": 9.909718691713617e-06, "loss": 0.0191, "step": 18300 }, { "epoch": 0.3092174148005539, "grad_norm": 0.4882037341594696, "learning_rate": 9.909439684480601e-06, "loss": 0.023, "step": 18310 }, { "epoch": 0.30938629378187593, "grad_norm": 0.3817943334579468, "learning_rate": 9.909160250727497e-06, "loss": 0.0167, "step": 18320 }, { "epoch": 0.3095551727631979, "grad_norm": 0.7557612061500549, "learning_rate": 9.90888039047858e-06, "loss": 0.0228, "step": 18330 }, { "epoch": 0.3097240517445199, "grad_norm": 0.6548097133636475, "learning_rate": 9.908600103758163e-06, "loss": 0.0184, "step": 18340 }, { "epoch": 0.30989293072584184, "grad_norm": 0.7125250101089478, "learning_rate": 9.908319390590597e-06, "loss": 0.0168, "step": 18350 }, { "epoch": 0.31006180970716385, "grad_norm": 0.17690429091453552, "learning_rate": 9.90803825100027e-06, "loss": 0.0146, "step": 18360 }, { "epoch": 0.31023068868848586, "grad_norm": 0.3247373402118683, "learning_rate": 9.907756685011604e-06, "loss": 0.0151, "step": 18370 }, { "epoch": 0.3103995676698078, "grad_norm": 0.3593253791332245, "learning_rate": 9.907474692649065e-06, "loss": 0.0212, "step": 18380 }, { "epoch": 0.3105684466511298, "grad_norm": 0.19085606932640076, "learning_rate": 9.907192273937149e-06, "loss": 0.0132, "step": 18390 }, { "epoch": 0.31073732563245177, "grad_norm": 0.3573344945907593, "learning_rate": 9.906909428900392e-06, "loss": 0.0195, "step": 18400 }, { "epoch": 0.3109062046137738, "grad_norm": 0.46128079295158386, "learning_rate": 9.90662615756337e-06, "loss": 0.016, "step": 18410 }, { "epoch": 0.3110750835950958, "grad_norm": 0.6635055541992188, "learning_rate": 9.906342459950688e-06, "loss": 0.014, "step": 18420 }, { "epoch": 0.31124396257641773, "grad_norm": 0.22589139640331268, "learning_rate": 9.906058336086998e-06, "loss": 0.0181, "step": 18430 }, { "epoch": 0.31141284155773974, "grad_norm": 0.49778109788894653, "learning_rate": 9.90577378599698e-06, "loss": 0.0152, "step": 18440 }, { "epoch": 0.3115817205390617, "grad_norm": 0.4774090051651001, "learning_rate": 9.905488809705356e-06, "loss": 0.0152, "step": 18450 }, { "epoch": 0.3117505995203837, "grad_norm": 0.5595214366912842, "learning_rate": 9.905203407236884e-06, "loss": 0.022, "step": 18460 }, { "epoch": 0.3119194785017057, "grad_norm": 0.5681846141815186, "learning_rate": 9.904917578616362e-06, "loss": 0.0141, "step": 18470 }, { "epoch": 0.31208835748302766, "grad_norm": 0.569534420967102, "learning_rate": 9.904631323868619e-06, "loss": 0.0284, "step": 18480 }, { "epoch": 0.31225723646434966, "grad_norm": 0.5416630506515503, "learning_rate": 9.904344643018524e-06, "loss": 0.0189, "step": 18490 }, { "epoch": 0.3124261154456716, "grad_norm": 0.3578236699104309, "learning_rate": 9.904057536090982e-06, "loss": 0.0144, "step": 18500 }, { "epoch": 0.3125949944269936, "grad_norm": 0.2788947522640228, "learning_rate": 9.903770003110941e-06, "loss": 0.0133, "step": 18510 }, { "epoch": 0.3127638734083156, "grad_norm": 1.018109917640686, "learning_rate": 9.903482044103378e-06, "loss": 0.0265, "step": 18520 }, { "epoch": 0.3129327523896376, "grad_norm": 0.6169915199279785, "learning_rate": 9.903193659093309e-06, "loss": 0.0253, "step": 18530 }, { "epoch": 0.3131016313709596, "grad_norm": 0.3897062838077545, "learning_rate": 9.90290484810579e-06, "loss": 0.0164, "step": 18540 }, { "epoch": 0.31327051035228154, "grad_norm": 0.35018405318260193, "learning_rate": 9.902615611165912e-06, "loss": 0.0178, "step": 18550 }, { "epoch": 0.31343938933360355, "grad_norm": 0.4411773681640625, "learning_rate": 9.902325948298802e-06, "loss": 0.0146, "step": 18560 }, { "epoch": 0.3136082683149255, "grad_norm": 0.2760438024997711, "learning_rate": 9.902035859529628e-06, "loss": 0.0179, "step": 18570 }, { "epoch": 0.3137771472962475, "grad_norm": 0.39440369606018066, "learning_rate": 9.901745344883588e-06, "loss": 0.0133, "step": 18580 }, { "epoch": 0.3139460262775695, "grad_norm": 0.409431129693985, "learning_rate": 9.901454404385927e-06, "loss": 0.0144, "step": 18590 }, { "epoch": 0.31411490525889146, "grad_norm": 0.5547463893890381, "learning_rate": 9.901163038061913e-06, "loss": 0.0177, "step": 18600 }, { "epoch": 0.31428378424021347, "grad_norm": 0.46744078397750854, "learning_rate": 9.900871245936868e-06, "loss": 0.0242, "step": 18610 }, { "epoch": 0.3144526632215354, "grad_norm": 0.3766386806964874, "learning_rate": 9.900579028036137e-06, "loss": 0.0197, "step": 18620 }, { "epoch": 0.31462154220285743, "grad_norm": 0.451323926448822, "learning_rate": 9.900286384385107e-06, "loss": 0.0136, "step": 18630 }, { "epoch": 0.31479042118417944, "grad_norm": 0.668236255645752, "learning_rate": 9.899993315009206e-06, "loss": 0.0193, "step": 18640 }, { "epoch": 0.3149593001655014, "grad_norm": 0.32987180352211, "learning_rate": 9.89969981993389e-06, "loss": 0.0171, "step": 18650 }, { "epoch": 0.3151281791468234, "grad_norm": 0.7450850009918213, "learning_rate": 9.899405899184662e-06, "loss": 0.0199, "step": 18660 }, { "epoch": 0.31529705812814535, "grad_norm": 0.40514615178108215, "learning_rate": 9.899111552787052e-06, "loss": 0.013, "step": 18670 }, { "epoch": 0.31546593710946735, "grad_norm": 0.6169034838676453, "learning_rate": 9.898816780766639e-06, "loss": 0.0184, "step": 18680 }, { "epoch": 0.31563481609078936, "grad_norm": 1.0042823553085327, "learning_rate": 9.898521583149024e-06, "loss": 0.0206, "step": 18690 }, { "epoch": 0.3158036950721113, "grad_norm": 0.8167505264282227, "learning_rate": 9.898225959959859e-06, "loss": 0.0208, "step": 18700 }, { "epoch": 0.3159725740534333, "grad_norm": 0.38031846284866333, "learning_rate": 9.897929911224824e-06, "loss": 0.0214, "step": 18710 }, { "epoch": 0.31614145303475527, "grad_norm": 0.3951919376850128, "learning_rate": 9.89763343696964e-06, "loss": 0.0155, "step": 18720 }, { "epoch": 0.3163103320160773, "grad_norm": 0.5657620429992676, "learning_rate": 9.897336537220065e-06, "loss": 0.0143, "step": 18730 }, { "epoch": 0.3164792109973993, "grad_norm": 0.4349915385246277, "learning_rate": 9.897039212001893e-06, "loss": 0.0238, "step": 18740 }, { "epoch": 0.31664808997872124, "grad_norm": 0.24529661238193512, "learning_rate": 9.896741461340954e-06, "loss": 0.0166, "step": 18750 }, { "epoch": 0.31681696896004324, "grad_norm": 0.6066117882728577, "learning_rate": 9.896443285263114e-06, "loss": 0.017, "step": 18760 }, { "epoch": 0.3169858479413652, "grad_norm": 0.2662718892097473, "learning_rate": 9.89614468379428e-06, "loss": 0.0213, "step": 18770 }, { "epoch": 0.3171547269226872, "grad_norm": 0.46993115544319153, "learning_rate": 9.895845656960393e-06, "loss": 0.016, "step": 18780 }, { "epoch": 0.3173236059040092, "grad_norm": 0.4385437071323395, "learning_rate": 9.895546204787431e-06, "loss": 0.0188, "step": 18790 }, { "epoch": 0.31749248488533116, "grad_norm": 0.49428504705429077, "learning_rate": 9.895246327301412e-06, "loss": 0.0181, "step": 18800 }, { "epoch": 0.31766136386665317, "grad_norm": 0.4775525629520416, "learning_rate": 9.894946024528387e-06, "loss": 0.0171, "step": 18810 }, { "epoch": 0.3178302428479751, "grad_norm": 0.4769463539123535, "learning_rate": 9.894645296494445e-06, "loss": 0.0141, "step": 18820 }, { "epoch": 0.3179991218292971, "grad_norm": 0.2888689637184143, "learning_rate": 9.894344143225714e-06, "loss": 0.0219, "step": 18830 }, { "epoch": 0.31816800081061913, "grad_norm": 0.45865121483802795, "learning_rate": 9.894042564748359e-06, "loss": 0.0141, "step": 18840 }, { "epoch": 0.3183368797919411, "grad_norm": 0.2486175298690796, "learning_rate": 9.893740561088576e-06, "loss": 0.0168, "step": 18850 }, { "epoch": 0.3185057587732631, "grad_norm": 0.6669756174087524, "learning_rate": 9.893438132272604e-06, "loss": 0.0173, "step": 18860 }, { "epoch": 0.31867463775458504, "grad_norm": 0.3962286710739136, "learning_rate": 9.893135278326718e-06, "loss": 0.0137, "step": 18870 }, { "epoch": 0.31884351673590705, "grad_norm": 0.4624646008014679, "learning_rate": 9.89283199927723e-06, "loss": 0.0148, "step": 18880 }, { "epoch": 0.31901239571722906, "grad_norm": 0.42909497022628784, "learning_rate": 9.892528295150488e-06, "loss": 0.0188, "step": 18890 }, { "epoch": 0.319181274698551, "grad_norm": 0.7345790863037109, "learning_rate": 9.892224165972874e-06, "loss": 0.0168, "step": 18900 }, { "epoch": 0.319350153679873, "grad_norm": 0.700605034828186, "learning_rate": 9.891919611770815e-06, "loss": 0.0195, "step": 18910 }, { "epoch": 0.31951903266119497, "grad_norm": 0.4058918058872223, "learning_rate": 9.891614632570765e-06, "loss": 0.0161, "step": 18920 }, { "epoch": 0.319687911642517, "grad_norm": 0.35027775168418884, "learning_rate": 9.891309228399222e-06, "loss": 0.0201, "step": 18930 }, { "epoch": 0.319856790623839, "grad_norm": 0.560458779335022, "learning_rate": 9.89100339928272e-06, "loss": 0.0121, "step": 18940 }, { "epoch": 0.32002566960516093, "grad_norm": 0.570374608039856, "learning_rate": 9.890697145247826e-06, "loss": 0.0133, "step": 18950 }, { "epoch": 0.32019454858648294, "grad_norm": 0.26971012353897095, "learning_rate": 9.890390466321148e-06, "loss": 0.02, "step": 18960 }, { "epoch": 0.3203634275678049, "grad_norm": 0.35091519355773926, "learning_rate": 9.89008336252933e-06, "loss": 0.0157, "step": 18970 }, { "epoch": 0.3205323065491269, "grad_norm": 0.17395195364952087, "learning_rate": 9.889775833899054e-06, "loss": 0.0118, "step": 18980 }, { "epoch": 0.3207011855304489, "grad_norm": 0.7268196940422058, "learning_rate": 9.889467880457034e-06, "loss": 0.0191, "step": 18990 }, { "epoch": 0.32087006451177086, "grad_norm": 0.5469682216644287, "learning_rate": 9.889159502230024e-06, "loss": 0.0243, "step": 19000 }, { "epoch": 0.32103894349309287, "grad_norm": 0.5889578461647034, "learning_rate": 9.888850699244818e-06, "loss": 0.0176, "step": 19010 }, { "epoch": 0.3212078224744148, "grad_norm": 0.3695332109928131, "learning_rate": 9.888541471528243e-06, "loss": 0.0208, "step": 19020 }, { "epoch": 0.3213767014557368, "grad_norm": 0.40556612610816956, "learning_rate": 9.888231819107163e-06, "loss": 0.0247, "step": 19030 }, { "epoch": 0.32154558043705883, "grad_norm": 0.6787765622138977, "learning_rate": 9.887921742008481e-06, "loss": 0.0189, "step": 19040 }, { "epoch": 0.3217144594183808, "grad_norm": 0.6790021657943726, "learning_rate": 9.887611240259135e-06, "loss": 0.0189, "step": 19050 }, { "epoch": 0.3218833383997028, "grad_norm": 0.25870010256767273, "learning_rate": 9.887300313886103e-06, "loss": 0.0189, "step": 19060 }, { "epoch": 0.32205221738102474, "grad_norm": 0.39045703411102295, "learning_rate": 9.886988962916393e-06, "loss": 0.0117, "step": 19070 }, { "epoch": 0.32222109636234675, "grad_norm": 0.4802965223789215, "learning_rate": 9.886677187377058e-06, "loss": 0.013, "step": 19080 }, { "epoch": 0.3223899753436687, "grad_norm": 0.20160503685474396, "learning_rate": 9.886364987295183e-06, "loss": 0.0182, "step": 19090 }, { "epoch": 0.3225588543249907, "grad_norm": 0.4747060537338257, "learning_rate": 9.88605236269789e-06, "loss": 0.0203, "step": 19100 }, { "epoch": 0.3227277333063127, "grad_norm": 0.4694049656391144, "learning_rate": 9.885739313612342e-06, "loss": 0.0215, "step": 19110 }, { "epoch": 0.32289661228763467, "grad_norm": 0.29344022274017334, "learning_rate": 9.885425840065734e-06, "loss": 0.0206, "step": 19120 }, { "epoch": 0.3230654912689567, "grad_norm": 0.5369806289672852, "learning_rate": 9.885111942085299e-06, "loss": 0.0144, "step": 19130 }, { "epoch": 0.3232343702502786, "grad_norm": 0.39230239391326904, "learning_rate": 9.884797619698309e-06, "loss": 0.0161, "step": 19140 }, { "epoch": 0.32340324923160063, "grad_norm": 0.349448561668396, "learning_rate": 9.884482872932073e-06, "loss": 0.017, "step": 19150 }, { "epoch": 0.32357212821292264, "grad_norm": 0.4088936448097229, "learning_rate": 9.884167701813933e-06, "loss": 0.0176, "step": 19160 }, { "epoch": 0.3237410071942446, "grad_norm": 0.4803636372089386, "learning_rate": 9.88385210637127e-06, "loss": 0.0191, "step": 19170 }, { "epoch": 0.3239098861755666, "grad_norm": 0.39564934372901917, "learning_rate": 9.883536086631506e-06, "loss": 0.0138, "step": 19180 }, { "epoch": 0.32407876515688855, "grad_norm": 0.41683611273765564, "learning_rate": 9.88321964262209e-06, "loss": 0.0189, "step": 19190 }, { "epoch": 0.32424764413821056, "grad_norm": 0.20269539952278137, "learning_rate": 9.88290277437052e-06, "loss": 0.0146, "step": 19200 }, { "epoch": 0.32441652311953256, "grad_norm": 0.33310815691947937, "learning_rate": 9.882585481904321e-06, "loss": 0.0155, "step": 19210 }, { "epoch": 0.3245854021008545, "grad_norm": 0.34580206871032715, "learning_rate": 9.882267765251059e-06, "loss": 0.017, "step": 19220 }, { "epoch": 0.3247542810821765, "grad_norm": 0.2718508839607239, "learning_rate": 9.881949624438337e-06, "loss": 0.0178, "step": 19230 }, { "epoch": 0.3249231600634985, "grad_norm": 0.35183119773864746, "learning_rate": 9.881631059493795e-06, "loss": 0.0216, "step": 19240 }, { "epoch": 0.3250920390448205, "grad_norm": 0.3884856700897217, "learning_rate": 9.881312070445105e-06, "loss": 0.015, "step": 19250 }, { "epoch": 0.3252609180261425, "grad_norm": 0.8045526146888733, "learning_rate": 9.880992657319987e-06, "loss": 0.018, "step": 19260 }, { "epoch": 0.32542979700746444, "grad_norm": 0.5049144625663757, "learning_rate": 9.880672820146187e-06, "loss": 0.0208, "step": 19270 }, { "epoch": 0.32559867598878645, "grad_norm": 0.3806297779083252, "learning_rate": 9.880352558951492e-06, "loss": 0.0133, "step": 19280 }, { "epoch": 0.3257675549701084, "grad_norm": 0.4158436357975006, "learning_rate": 9.880031873763723e-06, "loss": 0.0138, "step": 19290 }, { "epoch": 0.3259364339514304, "grad_norm": 0.21957385540008545, "learning_rate": 9.879710764610745e-06, "loss": 0.0146, "step": 19300 }, { "epoch": 0.3261053129327524, "grad_norm": 0.3167566657066345, "learning_rate": 9.879389231520453e-06, "loss": 0.018, "step": 19310 }, { "epoch": 0.32627419191407436, "grad_norm": 0.28444838523864746, "learning_rate": 9.87906727452078e-06, "loss": 0.0116, "step": 19320 }, { "epoch": 0.32644307089539637, "grad_norm": 0.2900006175041199, "learning_rate": 9.878744893639696e-06, "loss": 0.0214, "step": 19330 }, { "epoch": 0.3266119498767183, "grad_norm": 0.11886059492826462, "learning_rate": 9.878422088905214e-06, "loss": 0.0226, "step": 19340 }, { "epoch": 0.32678082885804033, "grad_norm": 0.9278571605682373, "learning_rate": 9.878098860345372e-06, "loss": 0.0143, "step": 19350 }, { "epoch": 0.32694970783936234, "grad_norm": 0.9603517055511475, "learning_rate": 9.877775207988255e-06, "loss": 0.0245, "step": 19360 }, { "epoch": 0.3271185868206843, "grad_norm": 0.563686192035675, "learning_rate": 9.87745113186198e-06, "loss": 0.0212, "step": 19370 }, { "epoch": 0.3272874658020063, "grad_norm": 0.3838774263858795, "learning_rate": 9.877126631994704e-06, "loss": 0.0139, "step": 19380 }, { "epoch": 0.32745634478332825, "grad_norm": 0.5817590355873108, "learning_rate": 9.876801708414616e-06, "loss": 0.0164, "step": 19390 }, { "epoch": 0.32762522376465025, "grad_norm": 0.3220866918563843, "learning_rate": 9.876476361149945e-06, "loss": 0.0154, "step": 19400 }, { "epoch": 0.32779410274597226, "grad_norm": 0.4399472773075104, "learning_rate": 9.876150590228957e-06, "loss": 0.0175, "step": 19410 }, { "epoch": 0.3279629817272942, "grad_norm": 0.1290949285030365, "learning_rate": 9.875824395679954e-06, "loss": 0.0174, "step": 19420 }, { "epoch": 0.3281318607086162, "grad_norm": 0.2101457417011261, "learning_rate": 9.875497777531276e-06, "loss": 0.0182, "step": 19430 }, { "epoch": 0.32830073968993817, "grad_norm": 0.7624188661575317, "learning_rate": 9.875170735811297e-06, "loss": 0.0217, "step": 19440 }, { "epoch": 0.3284696186712602, "grad_norm": 0.2244545966386795, "learning_rate": 9.87484327054843e-06, "loss": 0.0164, "step": 19450 }, { "epoch": 0.3286384976525822, "grad_norm": 0.4283643364906311, "learning_rate": 9.874515381771125e-06, "loss": 0.0188, "step": 19460 }, { "epoch": 0.32880737663390414, "grad_norm": 0.21855807304382324, "learning_rate": 9.87418706950787e-06, "loss": 0.0152, "step": 19470 }, { "epoch": 0.32897625561522614, "grad_norm": 0.4774697721004486, "learning_rate": 9.873858333787186e-06, "loss": 0.0168, "step": 19480 }, { "epoch": 0.3291451345965481, "grad_norm": 0.26515135169029236, "learning_rate": 9.87352917463763e-06, "loss": 0.0169, "step": 19490 }, { "epoch": 0.3293140135778701, "grad_norm": 0.41530242562294006, "learning_rate": 9.873199592087803e-06, "loss": 0.0212, "step": 19500 }, { "epoch": 0.3294828925591921, "grad_norm": 0.4806460440158844, "learning_rate": 9.872869586166336e-06, "loss": 0.0206, "step": 19510 }, { "epoch": 0.32965177154051406, "grad_norm": 0.317101389169693, "learning_rate": 9.872539156901899e-06, "loss": 0.0186, "step": 19520 }, { "epoch": 0.32982065052183607, "grad_norm": 0.5362359285354614, "learning_rate": 9.8722083043232e-06, "loss": 0.0203, "step": 19530 }, { "epoch": 0.329989529503158, "grad_norm": 0.3100317716598511, "learning_rate": 9.871877028458986e-06, "loss": 0.0139, "step": 19540 }, { "epoch": 0.33015840848448, "grad_norm": 0.47785574197769165, "learning_rate": 9.87154532933803e-06, "loss": 0.0183, "step": 19550 }, { "epoch": 0.33032728746580203, "grad_norm": 0.47614917159080505, "learning_rate": 9.871213206989152e-06, "loss": 0.0182, "step": 19560 }, { "epoch": 0.330496166447124, "grad_norm": 0.30103957653045654, "learning_rate": 9.870880661441209e-06, "loss": 0.0148, "step": 19570 }, { "epoch": 0.330665045428446, "grad_norm": 0.37139344215393066, "learning_rate": 9.870547692723089e-06, "loss": 0.0181, "step": 19580 }, { "epoch": 0.33083392440976794, "grad_norm": 0.30953940749168396, "learning_rate": 9.87021430086372e-06, "loss": 0.0121, "step": 19590 }, { "epoch": 0.33100280339108995, "grad_norm": 0.6726409196853638, "learning_rate": 9.869880485892067e-06, "loss": 0.0292, "step": 19600 }, { "epoch": 0.3311716823724119, "grad_norm": 0.5317507982254028, "learning_rate": 9.86954624783713e-06, "loss": 0.0181, "step": 19610 }, { "epoch": 0.3313405613537339, "grad_norm": 0.6046273112297058, "learning_rate": 9.869211586727946e-06, "loss": 0.02, "step": 19620 }, { "epoch": 0.3315094403350559, "grad_norm": 0.6293184757232666, "learning_rate": 9.868876502593592e-06, "loss": 0.0164, "step": 19630 }, { "epoch": 0.33167831931637787, "grad_norm": 0.38960328698158264, "learning_rate": 9.868540995463179e-06, "loss": 0.0181, "step": 19640 }, { "epoch": 0.3318471982976999, "grad_norm": 0.42142921686172485, "learning_rate": 9.868205065365853e-06, "loss": 0.0158, "step": 19650 }, { "epoch": 0.3320160772790218, "grad_norm": 0.3168331980705261, "learning_rate": 9.867868712330799e-06, "loss": 0.0185, "step": 19660 }, { "epoch": 0.33218495626034383, "grad_norm": 0.4664633870124817, "learning_rate": 9.86753193638724e-06, "loss": 0.0173, "step": 19670 }, { "epoch": 0.33235383524166584, "grad_norm": 0.3101034164428711, "learning_rate": 9.867194737564433e-06, "loss": 0.0159, "step": 19680 }, { "epoch": 0.3325227142229878, "grad_norm": 0.5575663447380066, "learning_rate": 9.866857115891674e-06, "loss": 0.0149, "step": 19690 }, { "epoch": 0.3326915932043098, "grad_norm": 0.3817130923271179, "learning_rate": 9.866519071398296e-06, "loss": 0.0167, "step": 19700 }, { "epoch": 0.33286047218563175, "grad_norm": 0.33556482195854187, "learning_rate": 9.866180604113665e-06, "loss": 0.0136, "step": 19710 }, { "epoch": 0.33302935116695376, "grad_norm": 0.20700223743915558, "learning_rate": 9.865841714067186e-06, "loss": 0.014, "step": 19720 }, { "epoch": 0.33319823014827576, "grad_norm": 0.4168405830860138, "learning_rate": 9.865502401288302e-06, "loss": 0.0194, "step": 19730 }, { "epoch": 0.3333671091295977, "grad_norm": 0.16102193295955658, "learning_rate": 9.865162665806494e-06, "loss": 0.0177, "step": 19740 }, { "epoch": 0.3335359881109197, "grad_norm": 0.3381134271621704, "learning_rate": 9.864822507651273e-06, "loss": 0.0179, "step": 19750 }, { "epoch": 0.3337048670922417, "grad_norm": 0.4141623377799988, "learning_rate": 9.864481926852194e-06, "loss": 0.0139, "step": 19760 }, { "epoch": 0.3338737460735637, "grad_norm": 0.3810795545578003, "learning_rate": 9.864140923438845e-06, "loss": 0.0185, "step": 19770 }, { "epoch": 0.3340426250548857, "grad_norm": 0.43281999230384827, "learning_rate": 9.86379949744085e-06, "loss": 0.0165, "step": 19780 }, { "epoch": 0.33421150403620764, "grad_norm": 0.6133947372436523, "learning_rate": 9.863457648887874e-06, "loss": 0.0181, "step": 19790 }, { "epoch": 0.33438038301752965, "grad_norm": 0.33099985122680664, "learning_rate": 9.863115377809615e-06, "loss": 0.0222, "step": 19800 }, { "epoch": 0.3345492619988516, "grad_norm": 0.5183334946632385, "learning_rate": 9.862772684235808e-06, "loss": 0.0248, "step": 19810 }, { "epoch": 0.3347181409801736, "grad_norm": 0.5772638916969299, "learning_rate": 9.862429568196225e-06, "loss": 0.0136, "step": 19820 }, { "epoch": 0.3348870199614956, "grad_norm": 0.406947523355484, "learning_rate": 9.862086029720679e-06, "loss": 0.0138, "step": 19830 }, { "epoch": 0.33505589894281756, "grad_norm": 0.38760504126548767, "learning_rate": 9.86174206883901e-06, "loss": 0.0141, "step": 19840 }, { "epoch": 0.33522477792413957, "grad_norm": 0.4951607882976532, "learning_rate": 9.861397685581104e-06, "loss": 0.0142, "step": 19850 }, { "epoch": 0.3353936569054615, "grad_norm": 0.7375919222831726, "learning_rate": 9.861052879976877e-06, "loss": 0.0178, "step": 19860 }, { "epoch": 0.33556253588678353, "grad_norm": 0.3808887302875519, "learning_rate": 9.860707652056291e-06, "loss": 0.0163, "step": 19870 }, { "epoch": 0.33573141486810554, "grad_norm": 0.25889718532562256, "learning_rate": 9.860362001849333e-06, "loss": 0.0136, "step": 19880 }, { "epoch": 0.3359002938494275, "grad_norm": 0.49072712659835815, "learning_rate": 9.860015929386034e-06, "loss": 0.0222, "step": 19890 }, { "epoch": 0.3360691728307495, "grad_norm": 0.4197664260864258, "learning_rate": 9.85966943469646e-06, "loss": 0.0149, "step": 19900 }, { "epoch": 0.33623805181207145, "grad_norm": 0.29250746965408325, "learning_rate": 9.859322517810713e-06, "loss": 0.0193, "step": 19910 }, { "epoch": 0.33640693079339345, "grad_norm": 0.3284964859485626, "learning_rate": 9.858975178758934e-06, "loss": 0.012, "step": 19920 }, { "epoch": 0.33657580977471546, "grad_norm": 0.3526450991630554, "learning_rate": 9.858627417571296e-06, "loss": 0.0198, "step": 19930 }, { "epoch": 0.3367446887560374, "grad_norm": 0.5241876840591431, "learning_rate": 9.858279234278015e-06, "loss": 0.0208, "step": 19940 }, { "epoch": 0.3369135677373594, "grad_norm": 0.44886285066604614, "learning_rate": 9.857930628909337e-06, "loss": 0.0184, "step": 19950 }, { "epoch": 0.33708244671868137, "grad_norm": 0.3038310706615448, "learning_rate": 9.857581601495551e-06, "loss": 0.0113, "step": 19960 }, { "epoch": 0.3372513257000034, "grad_norm": 0.4701795279979706, "learning_rate": 9.85723215206698e-06, "loss": 0.0136, "step": 19970 }, { "epoch": 0.3374202046813254, "grad_norm": 0.3316556513309479, "learning_rate": 9.856882280653979e-06, "loss": 0.0209, "step": 19980 }, { "epoch": 0.33758908366264734, "grad_norm": 0.4121064245700836, "learning_rate": 9.856531987286947e-06, "loss": 0.02, "step": 19990 }, { "epoch": 0.33775796264396934, "grad_norm": 0.24513308703899384, "learning_rate": 9.856181271996317e-06, "loss": 0.0155, "step": 20000 }, { "epoch": 0.3379268416252913, "grad_norm": 0.5186970829963684, "learning_rate": 9.855830134812558e-06, "loss": 0.0182, "step": 20010 }, { "epoch": 0.3380957206066133, "grad_norm": 0.47879117727279663, "learning_rate": 9.855478575766176e-06, "loss": 0.0215, "step": 20020 }, { "epoch": 0.3382645995879353, "grad_norm": 0.7025765776634216, "learning_rate": 9.855126594887711e-06, "loss": 0.0147, "step": 20030 }, { "epoch": 0.33843347856925726, "grad_norm": 0.48808521032333374, "learning_rate": 9.854774192207745e-06, "loss": 0.0196, "step": 20040 }, { "epoch": 0.33860235755057927, "grad_norm": 0.5696502327919006, "learning_rate": 9.854421367756894e-06, "loss": 0.0151, "step": 20050 }, { "epoch": 0.3387712365319012, "grad_norm": 0.29748794436454773, "learning_rate": 9.854068121565808e-06, "loss": 0.0194, "step": 20060 }, { "epoch": 0.33894011551322323, "grad_norm": 0.30838513374328613, "learning_rate": 9.85371445366518e-06, "loss": 0.0152, "step": 20070 }, { "epoch": 0.33910899449454524, "grad_norm": 0.3632211983203888, "learning_rate": 9.853360364085732e-06, "loss": 0.0237, "step": 20080 }, { "epoch": 0.3392778734758672, "grad_norm": 0.387522429227829, "learning_rate": 9.853005852858229e-06, "loss": 0.0214, "step": 20090 }, { "epoch": 0.3394467524571892, "grad_norm": 0.3722478151321411, "learning_rate": 9.852650920013468e-06, "loss": 0.0212, "step": 20100 }, { "epoch": 0.33961563143851115, "grad_norm": 0.47967293858528137, "learning_rate": 9.852295565582288e-06, "loss": 0.0172, "step": 20110 }, { "epoch": 0.33978451041983315, "grad_norm": 0.4094510078430176, "learning_rate": 9.851939789595558e-06, "loss": 0.0187, "step": 20120 }, { "epoch": 0.33995338940115516, "grad_norm": 0.22125361859798431, "learning_rate": 9.851583592084188e-06, "loss": 0.0182, "step": 20130 }, { "epoch": 0.3401222683824771, "grad_norm": 0.3691854774951935, "learning_rate": 9.851226973079125e-06, "loss": 0.0146, "step": 20140 }, { "epoch": 0.3402911473637991, "grad_norm": 0.6479901671409607, "learning_rate": 9.850869932611348e-06, "loss": 0.0208, "step": 20150 }, { "epoch": 0.34046002634512107, "grad_norm": 0.44904616475105286, "learning_rate": 9.85051247071188e-06, "loss": 0.0189, "step": 20160 }, { "epoch": 0.3406289053264431, "grad_norm": 0.20872396230697632, "learning_rate": 9.85015458741177e-06, "loss": 0.021, "step": 20170 }, { "epoch": 0.34079778430776503, "grad_norm": 0.6545212864875793, "learning_rate": 9.84979628274212e-06, "loss": 0.0137, "step": 20180 }, { "epoch": 0.34096666328908704, "grad_norm": 0.3114277124404907, "learning_rate": 9.849437556734048e-06, "loss": 0.0198, "step": 20190 }, { "epoch": 0.34113554227040904, "grad_norm": 0.09733012318611145, "learning_rate": 9.849078409418727e-06, "loss": 0.0155, "step": 20200 }, { "epoch": 0.341304421251731, "grad_norm": 0.427557110786438, "learning_rate": 9.848718840827354e-06, "loss": 0.0192, "step": 20210 }, { "epoch": 0.341473300233053, "grad_norm": 0.25777140259742737, "learning_rate": 9.848358850991168e-06, "loss": 0.0202, "step": 20220 }, { "epoch": 0.34164217921437495, "grad_norm": 0.49640634655952454, "learning_rate": 9.847998439941448e-06, "loss": 0.0176, "step": 20230 }, { "epoch": 0.34181105819569696, "grad_norm": 0.27950742840766907, "learning_rate": 9.847637607709502e-06, "loss": 0.0184, "step": 20240 }, { "epoch": 0.34197993717701897, "grad_norm": 0.5282090306282043, "learning_rate": 9.84727635432668e-06, "loss": 0.0284, "step": 20250 }, { "epoch": 0.3421488161583409, "grad_norm": 0.26793909072875977, "learning_rate": 9.846914679824365e-06, "loss": 0.0158, "step": 20260 }, { "epoch": 0.3423176951396629, "grad_norm": 0.3348469138145447, "learning_rate": 9.846552584233979e-06, "loss": 0.017, "step": 20270 }, { "epoch": 0.3424865741209849, "grad_norm": 0.8974435329437256, "learning_rate": 9.84619006758698e-06, "loss": 0.0167, "step": 20280 }, { "epoch": 0.3426554531023069, "grad_norm": 0.4158783555030823, "learning_rate": 9.845827129914864e-06, "loss": 0.0151, "step": 20290 }, { "epoch": 0.3428243320836289, "grad_norm": 0.5957549810409546, "learning_rate": 9.845463771249162e-06, "loss": 0.0245, "step": 20300 }, { "epoch": 0.34299321106495084, "grad_norm": 0.5899356603622437, "learning_rate": 9.84509999162144e-06, "loss": 0.0194, "step": 20310 }, { "epoch": 0.34316209004627285, "grad_norm": 0.6160961389541626, "learning_rate": 9.844735791063302e-06, "loss": 0.0259, "step": 20320 }, { "epoch": 0.3433309690275948, "grad_norm": 0.2757844924926758, "learning_rate": 9.84437116960639e-06, "loss": 0.0159, "step": 20330 }, { "epoch": 0.3434998480089168, "grad_norm": 0.3917083442211151, "learning_rate": 9.844006127282383e-06, "loss": 0.0206, "step": 20340 }, { "epoch": 0.3436687269902388, "grad_norm": 0.6353252530097961, "learning_rate": 9.843640664122992e-06, "loss": 0.0244, "step": 20350 }, { "epoch": 0.34383760597156077, "grad_norm": 1.0132454633712769, "learning_rate": 9.843274780159969e-06, "loss": 0.0176, "step": 20360 }, { "epoch": 0.3440064849528828, "grad_norm": 0.542728841304779, "learning_rate": 9.842908475425101e-06, "loss": 0.0284, "step": 20370 }, { "epoch": 0.3441753639342047, "grad_norm": 0.24217545986175537, "learning_rate": 9.842541749950212e-06, "loss": 0.0136, "step": 20380 }, { "epoch": 0.34434424291552673, "grad_norm": 0.417770117521286, "learning_rate": 9.842174603767161e-06, "loss": 0.0155, "step": 20390 }, { "epoch": 0.34451312189684874, "grad_norm": 0.6287169456481934, "learning_rate": 9.841807036907845e-06, "loss": 0.0187, "step": 20400 }, { "epoch": 0.3446820008781707, "grad_norm": 0.5152618885040283, "learning_rate": 9.8414390494042e-06, "loss": 0.0163, "step": 20410 }, { "epoch": 0.3448508798594927, "grad_norm": 0.4426683187484741, "learning_rate": 9.841070641288191e-06, "loss": 0.0174, "step": 20420 }, { "epoch": 0.34501975884081465, "grad_norm": 0.2523270547389984, "learning_rate": 9.84070181259183e-06, "loss": 0.0196, "step": 20430 }, { "epoch": 0.34518863782213666, "grad_norm": 0.5795959234237671, "learning_rate": 9.840332563347155e-06, "loss": 0.0176, "step": 20440 }, { "epoch": 0.34535751680345866, "grad_norm": 0.938748836517334, "learning_rate": 9.839962893586247e-06, "loss": 0.0306, "step": 20450 }, { "epoch": 0.3455263957847806, "grad_norm": 0.4093405604362488, "learning_rate": 9.839592803341224e-06, "loss": 0.0172, "step": 20460 }, { "epoch": 0.3456952747661026, "grad_norm": 0.3624989092350006, "learning_rate": 9.839222292644238e-06, "loss": 0.0149, "step": 20470 }, { "epoch": 0.3458641537474246, "grad_norm": 0.6763572692871094, "learning_rate": 9.838851361527476e-06, "loss": 0.0168, "step": 20480 }, { "epoch": 0.3460330327287466, "grad_norm": 0.3277342617511749, "learning_rate": 9.838480010023166e-06, "loss": 0.0167, "step": 20490 }, { "epoch": 0.3462019117100686, "grad_norm": 0.6657400131225586, "learning_rate": 9.838108238163566e-06, "loss": 0.0232, "step": 20500 }, { "epoch": 0.34637079069139054, "grad_norm": 0.3744269013404846, "learning_rate": 9.83773604598098e-06, "loss": 0.0154, "step": 20510 }, { "epoch": 0.34653966967271255, "grad_norm": 0.8175145387649536, "learning_rate": 9.837363433507738e-06, "loss": 0.0144, "step": 20520 }, { "epoch": 0.3467085486540345, "grad_norm": 0.5065975785255432, "learning_rate": 9.836990400776216e-06, "loss": 0.0183, "step": 20530 }, { "epoch": 0.3468774276353565, "grad_norm": 0.5336281657218933, "learning_rate": 9.83661694781882e-06, "loss": 0.0154, "step": 20540 }, { "epoch": 0.3470463066166785, "grad_norm": 0.3848208785057068, "learning_rate": 9.836243074667996e-06, "loss": 0.0187, "step": 20550 }, { "epoch": 0.34721518559800046, "grad_norm": 0.3086855411529541, "learning_rate": 9.835868781356224e-06, "loss": 0.0151, "step": 20560 }, { "epoch": 0.34738406457932247, "grad_norm": 0.4785178601741791, "learning_rate": 9.835494067916021e-06, "loss": 0.0144, "step": 20570 }, { "epoch": 0.3475529435606444, "grad_norm": 0.5550265312194824, "learning_rate": 9.835118934379943e-06, "loss": 0.0153, "step": 20580 }, { "epoch": 0.34772182254196643, "grad_norm": 0.32151955366134644, "learning_rate": 9.834743380780578e-06, "loss": 0.0183, "step": 20590 }, { "epoch": 0.34789070152328844, "grad_norm": 0.7880455255508423, "learning_rate": 9.834367407150559e-06, "loss": 0.0118, "step": 20600 }, { "epoch": 0.3480595805046104, "grad_norm": 0.3355417549610138, "learning_rate": 9.833991013522542e-06, "loss": 0.018, "step": 20610 }, { "epoch": 0.3482284594859324, "grad_norm": 0.2467212826013565, "learning_rate": 9.83361419992923e-06, "loss": 0.0193, "step": 20620 }, { "epoch": 0.34839733846725435, "grad_norm": 0.33153533935546875, "learning_rate": 9.833236966403364e-06, "loss": 0.0117, "step": 20630 }, { "epoch": 0.34856621744857635, "grad_norm": 0.3466675579547882, "learning_rate": 9.832859312977712e-06, "loss": 0.0174, "step": 20640 }, { "epoch": 0.34873509642989836, "grad_norm": 0.4936138689517975, "learning_rate": 9.832481239685085e-06, "loss": 0.018, "step": 20650 }, { "epoch": 0.3489039754112203, "grad_norm": 0.5886813402175903, "learning_rate": 9.83210274655833e-06, "loss": 0.0215, "step": 20660 }, { "epoch": 0.3490728543925423, "grad_norm": 0.4772260785102844, "learning_rate": 9.831723833630329e-06, "loss": 0.0169, "step": 20670 }, { "epoch": 0.34924173337386427, "grad_norm": 0.5520090460777283, "learning_rate": 9.831344500934e-06, "loss": 0.019, "step": 20680 }, { "epoch": 0.3494106123551863, "grad_norm": 0.4441514313220978, "learning_rate": 9.830964748502298e-06, "loss": 0.0127, "step": 20690 }, { "epoch": 0.3495794913365083, "grad_norm": 0.4084551930427551, "learning_rate": 9.830584576368219e-06, "loss": 0.0204, "step": 20700 }, { "epoch": 0.34974837031783024, "grad_norm": 0.35008537769317627, "learning_rate": 9.830203984564788e-06, "loss": 0.0152, "step": 20710 }, { "epoch": 0.34991724929915224, "grad_norm": 0.36462506651878357, "learning_rate": 9.829822973125071e-06, "loss": 0.0182, "step": 20720 }, { "epoch": 0.3500861282804742, "grad_norm": 0.35957837104797363, "learning_rate": 9.829441542082167e-06, "loss": 0.0177, "step": 20730 }, { "epoch": 0.3502550072617962, "grad_norm": 0.3887561857700348, "learning_rate": 9.829059691469219e-06, "loss": 0.0172, "step": 20740 }, { "epoch": 0.35042388624311815, "grad_norm": 0.694828987121582, "learning_rate": 9.828677421319396e-06, "loss": 0.0185, "step": 20750 }, { "epoch": 0.35059276522444016, "grad_norm": 0.43039315938949585, "learning_rate": 9.82829473166591e-06, "loss": 0.0213, "step": 20760 }, { "epoch": 0.35076164420576217, "grad_norm": 0.2545764744281769, "learning_rate": 9.827911622542011e-06, "loss": 0.0156, "step": 20770 }, { "epoch": 0.3509305231870841, "grad_norm": 0.2639315128326416, "learning_rate": 9.827528093980977e-06, "loss": 0.0247, "step": 20780 }, { "epoch": 0.3510994021684061, "grad_norm": 0.42080315947532654, "learning_rate": 9.827144146016134e-06, "loss": 0.0293, "step": 20790 }, { "epoch": 0.3512682811497281, "grad_norm": 0.3015105426311493, "learning_rate": 9.826759778680836e-06, "loss": 0.0139, "step": 20800 }, { "epoch": 0.3514371601310501, "grad_norm": 0.6242576241493225, "learning_rate": 9.826374992008474e-06, "loss": 0.014, "step": 20810 }, { "epoch": 0.3516060391123721, "grad_norm": 0.4345533847808838, "learning_rate": 9.82598978603248e-06, "loss": 0.0157, "step": 20820 }, { "epoch": 0.35177491809369404, "grad_norm": 0.3599334955215454, "learning_rate": 9.825604160786319e-06, "loss": 0.0201, "step": 20830 }, { "epoch": 0.35194379707501605, "grad_norm": 0.45618486404418945, "learning_rate": 9.825218116303495e-06, "loss": 0.0156, "step": 20840 }, { "epoch": 0.352112676056338, "grad_norm": 0.5665951371192932, "learning_rate": 9.824831652617542e-06, "loss": 0.016, "step": 20850 }, { "epoch": 0.35228155503766, "grad_norm": 0.9064202904701233, "learning_rate": 9.824444769762039e-06, "loss": 0.0171, "step": 20860 }, { "epoch": 0.352450434018982, "grad_norm": 0.48538950085639954, "learning_rate": 9.824057467770596e-06, "loss": 0.0186, "step": 20870 }, { "epoch": 0.35261931300030397, "grad_norm": 0.40993863344192505, "learning_rate": 9.82366974667686e-06, "loss": 0.0165, "step": 20880 }, { "epoch": 0.352788191981626, "grad_norm": 0.3493928611278534, "learning_rate": 9.823281606514516e-06, "loss": 0.0142, "step": 20890 }, { "epoch": 0.3529570709629479, "grad_norm": 0.6281397938728333, "learning_rate": 9.822893047317286e-06, "loss": 0.0215, "step": 20900 }, { "epoch": 0.35312594994426993, "grad_norm": 0.19262272119522095, "learning_rate": 9.822504069118926e-06, "loss": 0.0115, "step": 20910 }, { "epoch": 0.35329482892559194, "grad_norm": 0.2661234736442566, "learning_rate": 9.82211467195323e-06, "loss": 0.0135, "step": 20920 }, { "epoch": 0.3534637079069139, "grad_norm": 0.535638689994812, "learning_rate": 9.821724855854026e-06, "loss": 0.0199, "step": 20930 }, { "epoch": 0.3536325868882359, "grad_norm": 0.31614384055137634, "learning_rate": 9.821334620855182e-06, "loss": 0.0213, "step": 20940 }, { "epoch": 0.35380146586955785, "grad_norm": 0.8506525158882141, "learning_rate": 9.8209439669906e-06, "loss": 0.0167, "step": 20950 }, { "epoch": 0.35397034485087986, "grad_norm": 0.855387270450592, "learning_rate": 9.82055289429422e-06, "loss": 0.0278, "step": 20960 }, { "epoch": 0.35413922383220187, "grad_norm": 0.5544021129608154, "learning_rate": 9.820161402800016e-06, "loss": 0.0192, "step": 20970 }, { "epoch": 0.3543081028135238, "grad_norm": 0.37924736738204956, "learning_rate": 9.819769492542e-06, "loss": 0.0157, "step": 20980 }, { "epoch": 0.3544769817948458, "grad_norm": 0.34623581171035767, "learning_rate": 9.819377163554222e-06, "loss": 0.0131, "step": 20990 }, { "epoch": 0.3546458607761678, "grad_norm": 0.22979938983917236, "learning_rate": 9.818984415870765e-06, "loss": 0.0121, "step": 21000 }, { "epoch": 0.3548147397574898, "grad_norm": 0.2599260210990906, "learning_rate": 9.81859124952575e-06, "loss": 0.0134, "step": 21010 }, { "epoch": 0.3549836187388118, "grad_norm": 0.33643224835395813, "learning_rate": 9.818197664553335e-06, "loss": 0.0155, "step": 21020 }, { "epoch": 0.35515249772013374, "grad_norm": 0.42654943466186523, "learning_rate": 9.817803660987713e-06, "loss": 0.0154, "step": 21030 }, { "epoch": 0.35532137670145575, "grad_norm": 0.4921734035015106, "learning_rate": 9.817409238863113e-06, "loss": 0.0172, "step": 21040 }, { "epoch": 0.3554902556827777, "grad_norm": 0.4060327112674713, "learning_rate": 9.817014398213805e-06, "loss": 0.0135, "step": 21050 }, { "epoch": 0.3556591346640997, "grad_norm": 0.2639528512954712, "learning_rate": 9.816619139074088e-06, "loss": 0.0144, "step": 21060 }, { "epoch": 0.3558280136454217, "grad_norm": 0.35626593232154846, "learning_rate": 9.816223461478302e-06, "loss": 0.0206, "step": 21070 }, { "epoch": 0.35599689262674367, "grad_norm": 0.5175348520278931, "learning_rate": 9.815827365460826e-06, "loss": 0.0117, "step": 21080 }, { "epoch": 0.3561657716080657, "grad_norm": 0.33635932207107544, "learning_rate": 9.815430851056067e-06, "loss": 0.0194, "step": 21090 }, { "epoch": 0.3563346505893876, "grad_norm": 0.6535633206367493, "learning_rate": 9.815033918298476e-06, "loss": 0.0255, "step": 21100 }, { "epoch": 0.35650352957070963, "grad_norm": 0.48357874155044556, "learning_rate": 9.814636567222536e-06, "loss": 0.0182, "step": 21110 }, { "epoch": 0.35667240855203164, "grad_norm": 0.4817354083061218, "learning_rate": 9.81423879786277e-06, "loss": 0.021, "step": 21120 }, { "epoch": 0.3568412875333536, "grad_norm": 0.4282674789428711, "learning_rate": 9.813840610253733e-06, "loss": 0.0182, "step": 21130 }, { "epoch": 0.3570101665146756, "grad_norm": 0.5560261011123657, "learning_rate": 9.813442004430021e-06, "loss": 0.009, "step": 21140 }, { "epoch": 0.35717904549599755, "grad_norm": 0.5548796057701111, "learning_rate": 9.81304298042626e-06, "loss": 0.0216, "step": 21150 }, { "epoch": 0.35734792447731956, "grad_norm": 0.8228307366371155, "learning_rate": 9.81264353827712e-06, "loss": 0.0161, "step": 21160 }, { "epoch": 0.35751680345864156, "grad_norm": 0.6285344958305359, "learning_rate": 9.8122436780173e-06, "loss": 0.0176, "step": 21170 }, { "epoch": 0.3576856824399635, "grad_norm": 0.5121346712112427, "learning_rate": 9.811843399681545e-06, "loss": 0.0204, "step": 21180 }, { "epoch": 0.3578545614212855, "grad_norm": 0.7280012369155884, "learning_rate": 9.811442703304623e-06, "loss": 0.0159, "step": 21190 }, { "epoch": 0.3580234404026075, "grad_norm": 0.34191441535949707, "learning_rate": 9.811041588921351e-06, "loss": 0.0154, "step": 21200 }, { "epoch": 0.3581923193839295, "grad_norm": 0.4376964271068573, "learning_rate": 9.810640056566575e-06, "loss": 0.0148, "step": 21210 }, { "epoch": 0.3583611983652515, "grad_norm": 0.4862738847732544, "learning_rate": 9.810238106275176e-06, "loss": 0.0138, "step": 21220 }, { "epoch": 0.35853007734657344, "grad_norm": 0.3721591532230377, "learning_rate": 9.809835738082079e-06, "loss": 0.0174, "step": 21230 }, { "epoch": 0.35869895632789545, "grad_norm": 0.2363503873348236, "learning_rate": 9.809432952022238e-06, "loss": 0.0116, "step": 21240 }, { "epoch": 0.3588678353092174, "grad_norm": 0.4770016372203827, "learning_rate": 9.809029748130648e-06, "loss": 0.0159, "step": 21250 }, { "epoch": 0.3590367142905394, "grad_norm": 0.4118897318840027, "learning_rate": 9.808626126442338e-06, "loss": 0.0193, "step": 21260 }, { "epoch": 0.35920559327186136, "grad_norm": 0.3197208642959595, "learning_rate": 9.808222086992371e-06, "loss": 0.0209, "step": 21270 }, { "epoch": 0.35937447225318336, "grad_norm": 0.4949062466621399, "learning_rate": 9.807817629815852e-06, "loss": 0.0177, "step": 21280 }, { "epoch": 0.35954335123450537, "grad_norm": 0.3787829279899597, "learning_rate": 9.807412754947918e-06, "loss": 0.0234, "step": 21290 }, { "epoch": 0.3597122302158273, "grad_norm": 0.5609737634658813, "learning_rate": 9.807007462423745e-06, "loss": 0.0205, "step": 21300 }, { "epoch": 0.35988110919714933, "grad_norm": 0.3775010406970978, "learning_rate": 9.806601752278542e-06, "loss": 0.0128, "step": 21310 }, { "epoch": 0.3600499881784713, "grad_norm": 0.4999432861804962, "learning_rate": 9.806195624547557e-06, "loss": 0.0156, "step": 21320 }, { "epoch": 0.3602188671597933, "grad_norm": 0.21927042305469513, "learning_rate": 9.805789079266072e-06, "loss": 0.0154, "step": 21330 }, { "epoch": 0.3603877461411153, "grad_norm": 0.28368058800697327, "learning_rate": 9.805382116469409e-06, "loss": 0.012, "step": 21340 }, { "epoch": 0.36055662512243725, "grad_norm": 0.5769209265708923, "learning_rate": 9.804974736192922e-06, "loss": 0.0158, "step": 21350 }, { "epoch": 0.36072550410375925, "grad_norm": 0.37719377875328064, "learning_rate": 9.804566938472005e-06, "loss": 0.0202, "step": 21360 }, { "epoch": 0.3608943830850812, "grad_norm": 0.21622665226459503, "learning_rate": 9.804158723342083e-06, "loss": 0.0156, "step": 21370 }, { "epoch": 0.3610632620664032, "grad_norm": 0.3651207387447357, "learning_rate": 9.803750090838625e-06, "loss": 0.0219, "step": 21380 }, { "epoch": 0.3612321410477252, "grad_norm": 0.3043971657752991, "learning_rate": 9.80334104099713e-06, "loss": 0.0215, "step": 21390 }, { "epoch": 0.36140102002904717, "grad_norm": 0.6069381237030029, "learning_rate": 9.802931573853135e-06, "loss": 0.0149, "step": 21400 }, { "epoch": 0.3615698990103692, "grad_norm": 0.5950382947921753, "learning_rate": 9.802521689442214e-06, "loss": 0.0197, "step": 21410 }, { "epoch": 0.36173877799169113, "grad_norm": 0.34684035181999207, "learning_rate": 9.802111387799977e-06, "loss": 0.0171, "step": 21420 }, { "epoch": 0.36190765697301314, "grad_norm": 0.3283781111240387, "learning_rate": 9.80170066896207e-06, "loss": 0.0142, "step": 21430 }, { "epoch": 0.36207653595433514, "grad_norm": 0.45307645201683044, "learning_rate": 9.801289532964173e-06, "loss": 0.0231, "step": 21440 }, { "epoch": 0.3622454149356571, "grad_norm": 0.4832206666469574, "learning_rate": 9.800877979842008e-06, "loss": 0.0167, "step": 21450 }, { "epoch": 0.3624142939169791, "grad_norm": 0.460737407207489, "learning_rate": 9.800466009631326e-06, "loss": 0.0185, "step": 21460 }, { "epoch": 0.36258317289830105, "grad_norm": 0.4438624083995819, "learning_rate": 9.800053622367922e-06, "loss": 0.0138, "step": 21470 }, { "epoch": 0.36275205187962306, "grad_norm": 0.5473164916038513, "learning_rate": 9.79964081808762e-06, "loss": 0.018, "step": 21480 }, { "epoch": 0.36292093086094507, "grad_norm": 0.2971699833869934, "learning_rate": 9.799227596826286e-06, "loss": 0.0285, "step": 21490 }, { "epoch": 0.363089809842267, "grad_norm": 0.5505785942077637, "learning_rate": 9.798813958619816e-06, "loss": 0.0138, "step": 21500 }, { "epoch": 0.363258688823589, "grad_norm": 0.44023093581199646, "learning_rate": 9.79839990350415e-06, "loss": 0.0167, "step": 21510 }, { "epoch": 0.363427567804911, "grad_norm": 0.38235345482826233, "learning_rate": 9.797985431515257e-06, "loss": 0.0207, "step": 21520 }, { "epoch": 0.363596446786233, "grad_norm": 0.3299282491207123, "learning_rate": 9.79757054268915e-06, "loss": 0.0205, "step": 21530 }, { "epoch": 0.363765325767555, "grad_norm": 0.30612167716026306, "learning_rate": 9.797155237061866e-06, "loss": 0.0172, "step": 21540 }, { "epoch": 0.36393420474887694, "grad_norm": 0.44309329986572266, "learning_rate": 9.796739514669491e-06, "loss": 0.0175, "step": 21550 }, { "epoch": 0.36410308373019895, "grad_norm": 0.3864046335220337, "learning_rate": 9.796323375548142e-06, "loss": 0.0231, "step": 21560 }, { "epoch": 0.3642719627115209, "grad_norm": 0.3056250512599945, "learning_rate": 9.79590681973397e-06, "loss": 0.0192, "step": 21570 }, { "epoch": 0.3644408416928429, "grad_norm": 0.7078748941421509, "learning_rate": 9.795489847263165e-06, "loss": 0.0139, "step": 21580 }, { "epoch": 0.3646097206741649, "grad_norm": 0.24005694687366486, "learning_rate": 9.795072458171953e-06, "loss": 0.0171, "step": 21590 }, { "epoch": 0.36477859965548687, "grad_norm": 0.4896635115146637, "learning_rate": 9.794654652496595e-06, "loss": 0.0165, "step": 21600 }, { "epoch": 0.3649474786368089, "grad_norm": 0.3534678518772125, "learning_rate": 9.794236430273391e-06, "loss": 0.0154, "step": 21610 }, { "epoch": 0.3651163576181308, "grad_norm": 0.5009052157402039, "learning_rate": 9.793817791538673e-06, "loss": 0.0184, "step": 21620 }, { "epoch": 0.36528523659945283, "grad_norm": 0.28350135684013367, "learning_rate": 9.793398736328812e-06, "loss": 0.0178, "step": 21630 }, { "epoch": 0.36545411558077484, "grad_norm": 0.3610731065273285, "learning_rate": 9.792979264680215e-06, "loss": 0.0129, "step": 21640 }, { "epoch": 0.3656229945620968, "grad_norm": 0.34933772683143616, "learning_rate": 9.792559376629322e-06, "loss": 0.0144, "step": 21650 }, { "epoch": 0.3657918735434188, "grad_norm": 0.2427641600370407, "learning_rate": 9.792139072212617e-06, "loss": 0.0147, "step": 21660 }, { "epoch": 0.36596075252474075, "grad_norm": 0.5853689908981323, "learning_rate": 9.791718351466608e-06, "loss": 0.0227, "step": 21670 }, { "epoch": 0.36612963150606276, "grad_norm": 0.358173131942749, "learning_rate": 9.791297214427853e-06, "loss": 0.0227, "step": 21680 }, { "epoch": 0.36629851048738477, "grad_norm": 0.6358740329742432, "learning_rate": 9.790875661132937e-06, "loss": 0.0141, "step": 21690 }, { "epoch": 0.3664673894687067, "grad_norm": 0.3776998519897461, "learning_rate": 9.790453691618482e-06, "loss": 0.0154, "step": 21700 }, { "epoch": 0.3666362684500287, "grad_norm": 0.49560365080833435, "learning_rate": 9.790031305921149e-06, "loss": 0.0216, "step": 21710 }, { "epoch": 0.3668051474313507, "grad_norm": 0.3538318872451782, "learning_rate": 9.789608504077632e-06, "loss": 0.0191, "step": 21720 }, { "epoch": 0.3669740264126727, "grad_norm": 0.7083860039710999, "learning_rate": 9.789185286124665e-06, "loss": 0.0172, "step": 21730 }, { "epoch": 0.3671429053939947, "grad_norm": 0.6066915392875671, "learning_rate": 9.788761652099018e-06, "loss": 0.0125, "step": 21740 }, { "epoch": 0.36731178437531664, "grad_norm": 0.5598469972610474, "learning_rate": 9.78833760203749e-06, "loss": 0.0192, "step": 21750 }, { "epoch": 0.36748066335663865, "grad_norm": 0.42715662717819214, "learning_rate": 9.787913135976925e-06, "loss": 0.0168, "step": 21760 }, { "epoch": 0.3676495423379606, "grad_norm": 0.42966675758361816, "learning_rate": 9.787488253954199e-06, "loss": 0.0223, "step": 21770 }, { "epoch": 0.3678184213192826, "grad_norm": 0.45446786284446716, "learning_rate": 9.787062956006225e-06, "loss": 0.0173, "step": 21780 }, { "epoch": 0.3679873003006046, "grad_norm": 0.5924254059791565, "learning_rate": 9.786637242169952e-06, "loss": 0.0128, "step": 21790 }, { "epoch": 0.36815617928192657, "grad_norm": 0.47334909439086914, "learning_rate": 9.786211112482363e-06, "loss": 0.0194, "step": 21800 }, { "epoch": 0.3683250582632486, "grad_norm": 0.3175113797187805, "learning_rate": 9.785784566980482e-06, "loss": 0.0168, "step": 21810 }, { "epoch": 0.3684939372445705, "grad_norm": 0.3763917088508606, "learning_rate": 9.785357605701365e-06, "loss": 0.0108, "step": 21820 }, { "epoch": 0.36866281622589253, "grad_norm": 0.43231987953186035, "learning_rate": 9.784930228682103e-06, "loss": 0.0203, "step": 21830 }, { "epoch": 0.3688316952072145, "grad_norm": 0.43787702918052673, "learning_rate": 9.78450243595983e-06, "loss": 0.0178, "step": 21840 }, { "epoch": 0.3690005741885365, "grad_norm": 0.5913658142089844, "learning_rate": 9.784074227571707e-06, "loss": 0.0128, "step": 21850 }, { "epoch": 0.3691694531698585, "grad_norm": 0.2787625193595886, "learning_rate": 9.78364560355494e-06, "loss": 0.0159, "step": 21860 }, { "epoch": 0.36933833215118045, "grad_norm": 0.29913416504859924, "learning_rate": 9.783216563946764e-06, "loss": 0.0177, "step": 21870 }, { "epoch": 0.36950721113250246, "grad_norm": 0.3665654957294464, "learning_rate": 9.782787108784453e-06, "loss": 0.0224, "step": 21880 }, { "epoch": 0.3696760901138244, "grad_norm": 0.23347605764865875, "learning_rate": 9.782357238105317e-06, "loss": 0.0184, "step": 21890 }, { "epoch": 0.3698449690951464, "grad_norm": 0.7177707552909851, "learning_rate": 9.781926951946704e-06, "loss": 0.0215, "step": 21900 }, { "epoch": 0.3700138480764684, "grad_norm": 0.34450864791870117, "learning_rate": 9.781496250345993e-06, "loss": 0.014, "step": 21910 }, { "epoch": 0.3701827270577904, "grad_norm": 0.3181737959384918, "learning_rate": 9.781065133340609e-06, "loss": 0.0209, "step": 21920 }, { "epoch": 0.3703516060391124, "grad_norm": 0.350014865398407, "learning_rate": 9.780633600967997e-06, "loss": 0.0185, "step": 21930 }, { "epoch": 0.37052048502043433, "grad_norm": 0.3037608563899994, "learning_rate": 9.780201653265652e-06, "loss": 0.0154, "step": 21940 }, { "epoch": 0.37068936400175634, "grad_norm": 0.7128810286521912, "learning_rate": 9.779769290271104e-06, "loss": 0.0263, "step": 21950 }, { "epoch": 0.37085824298307835, "grad_norm": 0.35242214798927307, "learning_rate": 9.779336512021911e-06, "loss": 0.0209, "step": 21960 }, { "epoch": 0.3710271219644003, "grad_norm": 0.4410250782966614, "learning_rate": 9.778903318555673e-06, "loss": 0.0116, "step": 21970 }, { "epoch": 0.3711960009457223, "grad_norm": 0.42611488699913025, "learning_rate": 9.778469709910024e-06, "loss": 0.026, "step": 21980 }, { "epoch": 0.37136487992704426, "grad_norm": 0.8493971824645996, "learning_rate": 9.778035686122636e-06, "loss": 0.02, "step": 21990 }, { "epoch": 0.37153375890836626, "grad_norm": 0.3203965425491333, "learning_rate": 9.777601247231215e-06, "loss": 0.0149, "step": 22000 }, { "epoch": 0.37170263788968827, "grad_norm": 0.32773715257644653, "learning_rate": 9.777166393273505e-06, "loss": 0.0272, "step": 22010 }, { "epoch": 0.3718715168710102, "grad_norm": 0.2526051700115204, "learning_rate": 9.776731124287285e-06, "loss": 0.0119, "step": 22020 }, { "epoch": 0.37204039585233223, "grad_norm": 0.23324017226696014, "learning_rate": 9.776295440310368e-06, "loss": 0.0159, "step": 22030 }, { "epoch": 0.3722092748336542, "grad_norm": 0.3381645381450653, "learning_rate": 9.775859341380609e-06, "loss": 0.0158, "step": 22040 }, { "epoch": 0.3723781538149762, "grad_norm": 0.27023372054100037, "learning_rate": 9.775422827535892e-06, "loss": 0.0142, "step": 22050 }, { "epoch": 0.3725470327962982, "grad_norm": 0.6556872725486755, "learning_rate": 9.774985898814143e-06, "loss": 0.0182, "step": 22060 }, { "epoch": 0.37271591177762015, "grad_norm": 0.6528377532958984, "learning_rate": 9.774548555253316e-06, "loss": 0.0234, "step": 22070 }, { "epoch": 0.37288479075894215, "grad_norm": 0.3434329926967621, "learning_rate": 9.774110796891413e-06, "loss": 0.0149, "step": 22080 }, { "epoch": 0.3730536697402641, "grad_norm": 0.5215985774993896, "learning_rate": 9.773672623766461e-06, "loss": 0.0182, "step": 22090 }, { "epoch": 0.3732225487215861, "grad_norm": 0.38725125789642334, "learning_rate": 9.773234035916531e-06, "loss": 0.019, "step": 22100 }, { "epoch": 0.3733914277029081, "grad_norm": 0.9325612187385559, "learning_rate": 9.77279503337972e-06, "loss": 0.013, "step": 22110 }, { "epoch": 0.37356030668423007, "grad_norm": 0.12974794209003448, "learning_rate": 9.772355616194175e-06, "loss": 0.0152, "step": 22120 }, { "epoch": 0.3737291856655521, "grad_norm": 0.4552879333496094, "learning_rate": 9.771915784398067e-06, "loss": 0.0198, "step": 22130 }, { "epoch": 0.37389806464687403, "grad_norm": 0.7050676941871643, "learning_rate": 9.771475538029608e-06, "loss": 0.0224, "step": 22140 }, { "epoch": 0.37406694362819604, "grad_norm": 0.39355698227882385, "learning_rate": 9.771034877127048e-06, "loss": 0.0175, "step": 22150 }, { "epoch": 0.37423582260951804, "grad_norm": 0.5119345784187317, "learning_rate": 9.770593801728667e-06, "loss": 0.0171, "step": 22160 }, { "epoch": 0.37440470159084, "grad_norm": 0.3860163390636444, "learning_rate": 9.770152311872787e-06, "loss": 0.0216, "step": 22170 }, { "epoch": 0.374573580572162, "grad_norm": 0.5593900680541992, "learning_rate": 9.769710407597763e-06, "loss": 0.0119, "step": 22180 }, { "epoch": 0.37474245955348395, "grad_norm": 1.8318053483963013, "learning_rate": 9.769268088941985e-06, "loss": 0.0203, "step": 22190 }, { "epoch": 0.37491133853480596, "grad_norm": 0.29452723264694214, "learning_rate": 9.768825355943884e-06, "loss": 0.0133, "step": 22200 }, { "epoch": 0.37508021751612797, "grad_norm": 0.39600658416748047, "learning_rate": 9.76838220864192e-06, "loss": 0.0143, "step": 22210 }, { "epoch": 0.3752490964974499, "grad_norm": 0.5099027752876282, "learning_rate": 9.767938647074595e-06, "loss": 0.0166, "step": 22220 }, { "epoch": 0.3754179754787719, "grad_norm": 0.5142956376075745, "learning_rate": 9.767494671280442e-06, "loss": 0.0176, "step": 22230 }, { "epoch": 0.3755868544600939, "grad_norm": 0.3845137357711792, "learning_rate": 9.767050281298036e-06, "loss": 0.0199, "step": 22240 }, { "epoch": 0.3757557334414159, "grad_norm": 0.10484258830547333, "learning_rate": 9.766605477165983e-06, "loss": 0.0194, "step": 22250 }, { "epoch": 0.3759246124227379, "grad_norm": 0.5566744208335876, "learning_rate": 9.766160258922924e-06, "loss": 0.0179, "step": 22260 }, { "epoch": 0.37609349140405984, "grad_norm": 0.460890531539917, "learning_rate": 9.765714626607541e-06, "loss": 0.019, "step": 22270 }, { "epoch": 0.37626237038538185, "grad_norm": 0.46294018626213074, "learning_rate": 9.76526858025855e-06, "loss": 0.014, "step": 22280 }, { "epoch": 0.3764312493667038, "grad_norm": 0.4763241708278656, "learning_rate": 9.7648221199147e-06, "loss": 0.0186, "step": 22290 }, { "epoch": 0.3766001283480258, "grad_norm": 0.48027992248535156, "learning_rate": 9.764375245614782e-06, "loss": 0.017, "step": 22300 }, { "epoch": 0.3767690073293478, "grad_norm": 0.37926045060157776, "learning_rate": 9.763927957397615e-06, "loss": 0.0191, "step": 22310 }, { "epoch": 0.37693788631066977, "grad_norm": 0.5101817846298218, "learning_rate": 9.763480255302062e-06, "loss": 0.0235, "step": 22320 }, { "epoch": 0.3771067652919918, "grad_norm": 0.41211873292922974, "learning_rate": 9.763032139367015e-06, "loss": 0.0188, "step": 22330 }, { "epoch": 0.3772756442733137, "grad_norm": 0.49001485109329224, "learning_rate": 9.762583609631409e-06, "loss": 0.0204, "step": 22340 }, { "epoch": 0.37744452325463573, "grad_norm": 0.7687841653823853, "learning_rate": 9.762134666134208e-06, "loss": 0.022, "step": 22350 }, { "epoch": 0.37761340223595774, "grad_norm": 0.4504268765449524, "learning_rate": 9.761685308914418e-06, "loss": 0.0179, "step": 22360 }, { "epoch": 0.3777822812172797, "grad_norm": 0.4810147285461426, "learning_rate": 9.761235538011074e-06, "loss": 0.0172, "step": 22370 }, { "epoch": 0.3779511601986017, "grad_norm": 0.3903251588344574, "learning_rate": 9.760785353463253e-06, "loss": 0.0134, "step": 22380 }, { "epoch": 0.37812003917992365, "grad_norm": 0.5014380216598511, "learning_rate": 9.760334755310069e-06, "loss": 0.0153, "step": 22390 }, { "epoch": 0.37828891816124566, "grad_norm": 0.34161311388015747, "learning_rate": 9.759883743590663e-06, "loss": 0.0171, "step": 22400 }, { "epoch": 0.3784577971425676, "grad_norm": 0.21212662756443024, "learning_rate": 9.759432318344222e-06, "loss": 0.0153, "step": 22410 }, { "epoch": 0.3786266761238896, "grad_norm": 1.2985177040100098, "learning_rate": 9.758980479609963e-06, "loss": 0.0217, "step": 22420 }, { "epoch": 0.3787955551052116, "grad_norm": 0.18940454721450806, "learning_rate": 9.758528227427142e-06, "loss": 0.0141, "step": 22430 }, { "epoch": 0.3789644340865336, "grad_norm": 0.9366602897644043, "learning_rate": 9.75807556183505e-06, "loss": 0.0179, "step": 22440 }, { "epoch": 0.3791333130678556, "grad_norm": 0.47498619556427, "learning_rate": 9.75762248287301e-06, "loss": 0.0253, "step": 22450 }, { "epoch": 0.37930219204917753, "grad_norm": 0.3297402858734131, "learning_rate": 9.757168990580388e-06, "loss": 0.0156, "step": 22460 }, { "epoch": 0.37947107103049954, "grad_norm": 0.3359331786632538, "learning_rate": 9.75671508499658e-06, "loss": 0.0148, "step": 22470 }, { "epoch": 0.37963995001182155, "grad_norm": 0.541685163974762, "learning_rate": 9.75626076616102e-06, "loss": 0.0122, "step": 22480 }, { "epoch": 0.3798088289931435, "grad_norm": 0.5401818752288818, "learning_rate": 9.755806034113182e-06, "loss": 0.0138, "step": 22490 }, { "epoch": 0.3799777079744655, "grad_norm": 0.3296861946582794, "learning_rate": 9.75535088889257e-06, "loss": 0.0156, "step": 22500 }, { "epoch": 0.38014658695578746, "grad_norm": 0.5594167709350586, "learning_rate": 9.754895330538722e-06, "loss": 0.0177, "step": 22510 }, { "epoch": 0.38031546593710946, "grad_norm": 0.32027384638786316, "learning_rate": 9.754439359091222e-06, "loss": 0.0136, "step": 22520 }, { "epoch": 0.38048434491843147, "grad_norm": 0.38266173005104065, "learning_rate": 9.753982974589678e-06, "loss": 0.0168, "step": 22530 }, { "epoch": 0.3806532238997534, "grad_norm": 0.7056057453155518, "learning_rate": 9.753526177073742e-06, "loss": 0.0225, "step": 22540 }, { "epoch": 0.38082210288107543, "grad_norm": 0.5446527600288391, "learning_rate": 9.753068966583102e-06, "loss": 0.0156, "step": 22550 }, { "epoch": 0.3809909818623974, "grad_norm": 0.30250805616378784, "learning_rate": 9.752611343157476e-06, "loss": 0.0104, "step": 22560 }, { "epoch": 0.3811598608437194, "grad_norm": 0.23227398097515106, "learning_rate": 9.75215330683662e-06, "loss": 0.016, "step": 22570 }, { "epoch": 0.3813287398250414, "grad_norm": 0.2864345610141754, "learning_rate": 9.751694857660331e-06, "loss": 0.0159, "step": 22580 }, { "epoch": 0.38149761880636335, "grad_norm": 0.31403395533561707, "learning_rate": 9.751235995668437e-06, "loss": 0.0138, "step": 22590 }, { "epoch": 0.38166649778768535, "grad_norm": 0.4713253378868103, "learning_rate": 9.750776720900803e-06, "loss": 0.0164, "step": 22600 }, { "epoch": 0.3818353767690073, "grad_norm": 0.5014458298683167, "learning_rate": 9.750317033397327e-06, "loss": 0.0219, "step": 22610 }, { "epoch": 0.3820042557503293, "grad_norm": 0.3133956789970398, "learning_rate": 9.749856933197946e-06, "loss": 0.0115, "step": 22620 }, { "epoch": 0.3821731347316513, "grad_norm": 1.327883005142212, "learning_rate": 9.749396420342635e-06, "loss": 0.0172, "step": 22630 }, { "epoch": 0.38234201371297327, "grad_norm": 0.4948015511035919, "learning_rate": 9.748935494871401e-06, "loss": 0.0146, "step": 22640 }, { "epoch": 0.3825108926942953, "grad_norm": 0.3330906629562378, "learning_rate": 9.748474156824288e-06, "loss": 0.0123, "step": 22650 }, { "epoch": 0.38267977167561723, "grad_norm": 0.45548489689826965, "learning_rate": 9.748012406241376e-06, "loss": 0.0176, "step": 22660 }, { "epoch": 0.38284865065693924, "grad_norm": 0.5009813904762268, "learning_rate": 9.747550243162779e-06, "loss": 0.017, "step": 22670 }, { "epoch": 0.38301752963826124, "grad_norm": 0.7579880356788635, "learning_rate": 9.747087667628651e-06, "loss": 0.0197, "step": 22680 }, { "epoch": 0.3831864086195832, "grad_norm": 0.8052708506584167, "learning_rate": 9.74662467967918e-06, "loss": 0.0177, "step": 22690 }, { "epoch": 0.3833552876009052, "grad_norm": 0.12275251001119614, "learning_rate": 9.746161279354585e-06, "loss": 0.0125, "step": 22700 }, { "epoch": 0.38352416658222716, "grad_norm": 0.47928428649902344, "learning_rate": 9.745697466695129e-06, "loss": 0.0136, "step": 22710 }, { "epoch": 0.38369304556354916, "grad_norm": 0.4594384431838989, "learning_rate": 9.745233241741107e-06, "loss": 0.0143, "step": 22720 }, { "epoch": 0.38386192454487117, "grad_norm": 0.7922074198722839, "learning_rate": 9.744768604532848e-06, "loss": 0.0202, "step": 22730 }, { "epoch": 0.3840308035261931, "grad_norm": 0.4283526837825775, "learning_rate": 9.744303555110719e-06, "loss": 0.0131, "step": 22740 }, { "epoch": 0.38419968250751513, "grad_norm": 0.21779507398605347, "learning_rate": 9.743838093515121e-06, "loss": 0.0148, "step": 22750 }, { "epoch": 0.3843685614888371, "grad_norm": 0.36451154947280884, "learning_rate": 9.743372219786495e-06, "loss": 0.0187, "step": 22760 }, { "epoch": 0.3845374404701591, "grad_norm": 0.6995262503623962, "learning_rate": 9.742905933965314e-06, "loss": 0.023, "step": 22770 }, { "epoch": 0.3847063194514811, "grad_norm": 0.2609751224517822, "learning_rate": 9.742439236092087e-06, "loss": 0.0122, "step": 22780 }, { "epoch": 0.38487519843280305, "grad_norm": 0.24718590080738068, "learning_rate": 9.74197212620736e-06, "loss": 0.0155, "step": 22790 }, { "epoch": 0.38504407741412505, "grad_norm": 0.4189963936805725, "learning_rate": 9.741504604351715e-06, "loss": 0.0185, "step": 22800 }, { "epoch": 0.385212956395447, "grad_norm": 0.37249478697776794, "learning_rate": 9.741036670565766e-06, "loss": 0.0172, "step": 22810 }, { "epoch": 0.385381835376769, "grad_norm": 0.41706204414367676, "learning_rate": 9.74056832489017e-06, "loss": 0.0135, "step": 22820 }, { "epoch": 0.385550714358091, "grad_norm": 0.44198983907699585, "learning_rate": 9.740099567365615e-06, "loss": 0.016, "step": 22830 }, { "epoch": 0.38571959333941297, "grad_norm": 1.0453261137008667, "learning_rate": 9.739630398032823e-06, "loss": 0.0216, "step": 22840 }, { "epoch": 0.385888472320735, "grad_norm": 0.5846461057662964, "learning_rate": 9.739160816932556e-06, "loss": 0.0149, "step": 22850 }, { "epoch": 0.38605735130205693, "grad_norm": 0.6384121179580688, "learning_rate": 9.738690824105612e-06, "loss": 0.0195, "step": 22860 }, { "epoch": 0.38622623028337894, "grad_norm": 0.4332720935344696, "learning_rate": 9.738220419592818e-06, "loss": 0.0142, "step": 22870 }, { "epoch": 0.38639510926470094, "grad_norm": 0.4854993224143982, "learning_rate": 9.737749603435046e-06, "loss": 0.0145, "step": 22880 }, { "epoch": 0.3865639882460229, "grad_norm": 0.5501821041107178, "learning_rate": 9.737278375673197e-06, "loss": 0.0172, "step": 22890 }, { "epoch": 0.3867328672273449, "grad_norm": 0.4993639290332794, "learning_rate": 9.736806736348212e-06, "loss": 0.0199, "step": 22900 }, { "epoch": 0.38690174620866685, "grad_norm": 0.29238012433052063, "learning_rate": 9.736334685501064e-06, "loss": 0.0166, "step": 22910 }, { "epoch": 0.38707062518998886, "grad_norm": 0.6288791298866272, "learning_rate": 9.735862223172763e-06, "loss": 0.017, "step": 22920 }, { "epoch": 0.3872395041713108, "grad_norm": 0.25586846470832825, "learning_rate": 9.735389349404359e-06, "loss": 0.0192, "step": 22930 }, { "epoch": 0.3874083831526328, "grad_norm": 0.3164590895175934, "learning_rate": 9.73491606423693e-06, "loss": 0.0137, "step": 22940 }, { "epoch": 0.3875772621339548, "grad_norm": 0.22296805679798126, "learning_rate": 9.734442367711596e-06, "loss": 0.0148, "step": 22950 }, { "epoch": 0.3877461411152768, "grad_norm": 0.421290785074234, "learning_rate": 9.733968259869511e-06, "loss": 0.0145, "step": 22960 }, { "epoch": 0.3879150200965988, "grad_norm": 0.24934794008731842, "learning_rate": 9.733493740751863e-06, "loss": 0.0187, "step": 22970 }, { "epoch": 0.38808389907792074, "grad_norm": 0.5577415823936462, "learning_rate": 9.733018810399878e-06, "loss": 0.0206, "step": 22980 }, { "epoch": 0.38825277805924274, "grad_norm": 0.8287709355354309, "learning_rate": 9.732543468854816e-06, "loss": 0.0206, "step": 22990 }, { "epoch": 0.38842165704056475, "grad_norm": 0.5232577323913574, "learning_rate": 9.732067716157974e-06, "loss": 0.0147, "step": 23000 }, { "epoch": 0.3885905360218867, "grad_norm": 0.39022311568260193, "learning_rate": 9.731591552350683e-06, "loss": 0.0124, "step": 23010 }, { "epoch": 0.3887594150032087, "grad_norm": 0.49773818254470825, "learning_rate": 9.731114977474314e-06, "loss": 0.0169, "step": 23020 }, { "epoch": 0.38892829398453066, "grad_norm": 0.6461411714553833, "learning_rate": 9.730637991570269e-06, "loss": 0.0189, "step": 23030 }, { "epoch": 0.38909717296585267, "grad_norm": 0.46539995074272156, "learning_rate": 9.730160594679987e-06, "loss": 0.0154, "step": 23040 }, { "epoch": 0.3892660519471747, "grad_norm": 0.163972869515419, "learning_rate": 9.729682786844941e-06, "loss": 0.0188, "step": 23050 }, { "epoch": 0.3894349309284966, "grad_norm": 0.4370203912258148, "learning_rate": 9.729204568106645e-06, "loss": 0.0157, "step": 23060 }, { "epoch": 0.38960380990981863, "grad_norm": 0.4843927323818207, "learning_rate": 9.728725938506646e-06, "loss": 0.0162, "step": 23070 }, { "epoch": 0.3897726888911406, "grad_norm": 0.44405436515808105, "learning_rate": 9.728246898086524e-06, "loss": 0.0169, "step": 23080 }, { "epoch": 0.3899415678724626, "grad_norm": 0.3906558156013489, "learning_rate": 9.727767446887896e-06, "loss": 0.0167, "step": 23090 }, { "epoch": 0.3901104468537846, "grad_norm": 0.17765837907791138, "learning_rate": 9.727287584952419e-06, "loss": 0.0129, "step": 23100 }, { "epoch": 0.39027932583510655, "grad_norm": 0.36335012316703796, "learning_rate": 9.726807312321777e-06, "loss": 0.0149, "step": 23110 }, { "epoch": 0.39044820481642856, "grad_norm": 0.4766117036342621, "learning_rate": 9.726326629037702e-06, "loss": 0.0194, "step": 23120 }, { "epoch": 0.3906170837977505, "grad_norm": 0.4459391236305237, "learning_rate": 9.725845535141949e-06, "loss": 0.0178, "step": 23130 }, { "epoch": 0.3907859627790725, "grad_norm": 0.35999611020088196, "learning_rate": 9.725364030676316e-06, "loss": 0.0123, "step": 23140 }, { "epoch": 0.3909548417603945, "grad_norm": 0.42273008823394775, "learning_rate": 9.724882115682635e-06, "loss": 0.0236, "step": 23150 }, { "epoch": 0.3911237207417165, "grad_norm": 0.2929926812648773, "learning_rate": 9.724399790202774e-06, "loss": 0.0158, "step": 23160 }, { "epoch": 0.3912925997230385, "grad_norm": 0.21037526428699493, "learning_rate": 9.723917054278635e-06, "loss": 0.0174, "step": 23170 }, { "epoch": 0.39146147870436043, "grad_norm": 0.32426974177360535, "learning_rate": 9.723433907952158e-06, "loss": 0.0127, "step": 23180 }, { "epoch": 0.39163035768568244, "grad_norm": 0.2708360552787781, "learning_rate": 9.722950351265317e-06, "loss": 0.018, "step": 23190 }, { "epoch": 0.39179923666700445, "grad_norm": 0.29087433218955994, "learning_rate": 9.722466384260123e-06, "loss": 0.0173, "step": 23200 }, { "epoch": 0.3919681156483264, "grad_norm": 0.43166258931159973, "learning_rate": 9.721982006978622e-06, "loss": 0.0182, "step": 23210 }, { "epoch": 0.3921369946296484, "grad_norm": 0.10801143944263458, "learning_rate": 9.721497219462893e-06, "loss": 0.0195, "step": 23220 }, { "epoch": 0.39230587361097036, "grad_norm": 0.4007529020309448, "learning_rate": 9.721012021755057e-06, "loss": 0.0178, "step": 23230 }, { "epoch": 0.39247475259229236, "grad_norm": 0.2499566376209259, "learning_rate": 9.720526413897263e-06, "loss": 0.0094, "step": 23240 }, { "epoch": 0.39264363157361437, "grad_norm": 0.45073428750038147, "learning_rate": 9.720040395931702e-06, "loss": 0.014, "step": 23250 }, { "epoch": 0.3928125105549363, "grad_norm": 0.21985448896884918, "learning_rate": 9.719553967900597e-06, "loss": 0.0149, "step": 23260 }, { "epoch": 0.39298138953625833, "grad_norm": 0.12867997586727142, "learning_rate": 9.71906712984621e-06, "loss": 0.0121, "step": 23270 }, { "epoch": 0.3931502685175803, "grad_norm": 0.3741895854473114, "learning_rate": 9.71857988181083e-06, "loss": 0.0129, "step": 23280 }, { "epoch": 0.3933191474989023, "grad_norm": 0.4201858937740326, "learning_rate": 9.718092223836795e-06, "loss": 0.0119, "step": 23290 }, { "epoch": 0.3934880264802243, "grad_norm": 0.40730318427085876, "learning_rate": 9.71760415596647e-06, "loss": 0.0209, "step": 23300 }, { "epoch": 0.39365690546154625, "grad_norm": 0.7310152649879456, "learning_rate": 9.717115678242252e-06, "loss": 0.0138, "step": 23310 }, { "epoch": 0.39382578444286825, "grad_norm": 0.36590275168418884, "learning_rate": 9.716626790706587e-06, "loss": 0.0184, "step": 23320 }, { "epoch": 0.3939946634241902, "grad_norm": 0.3334403336048126, "learning_rate": 9.71613749340194e-06, "loss": 0.0187, "step": 23330 }, { "epoch": 0.3941635424055122, "grad_norm": 0.35810187458992004, "learning_rate": 9.715647786370825e-06, "loss": 0.0181, "step": 23340 }, { "epoch": 0.3943324213868342, "grad_norm": 0.2715584635734558, "learning_rate": 9.715157669655787e-06, "loss": 0.0113, "step": 23350 }, { "epoch": 0.39450130036815617, "grad_norm": 0.43508756160736084, "learning_rate": 9.714667143299403e-06, "loss": 0.0179, "step": 23360 }, { "epoch": 0.3946701793494782, "grad_norm": 0.31983357667922974, "learning_rate": 9.71417620734429e-06, "loss": 0.0169, "step": 23370 }, { "epoch": 0.39483905833080013, "grad_norm": 0.7967411875724792, "learning_rate": 9.7136848618331e-06, "loss": 0.0135, "step": 23380 }, { "epoch": 0.39500793731212214, "grad_norm": 0.32563623785972595, "learning_rate": 9.713193106808522e-06, "loss": 0.0114, "step": 23390 }, { "epoch": 0.39517681629344414, "grad_norm": 0.3153434693813324, "learning_rate": 9.712700942313274e-06, "loss": 0.0152, "step": 23400 }, { "epoch": 0.3953456952747661, "grad_norm": 0.2344546765089035, "learning_rate": 9.712208368390114e-06, "loss": 0.0136, "step": 23410 }, { "epoch": 0.3955145742560881, "grad_norm": 0.37012577056884766, "learning_rate": 9.711715385081842e-06, "loss": 0.0177, "step": 23420 }, { "epoch": 0.39568345323741005, "grad_norm": 0.3650865852832794, "learning_rate": 9.711221992431279e-06, "loss": 0.0149, "step": 23430 }, { "epoch": 0.39585233221873206, "grad_norm": 0.3819652497768402, "learning_rate": 9.710728190481296e-06, "loss": 0.0134, "step": 23440 }, { "epoch": 0.39602121120005407, "grad_norm": 0.31904858350753784, "learning_rate": 9.71023397927479e-06, "loss": 0.0155, "step": 23450 }, { "epoch": 0.396190090181376, "grad_norm": 0.37781113386154175, "learning_rate": 9.709739358854696e-06, "loss": 0.0171, "step": 23460 }, { "epoch": 0.396358969162698, "grad_norm": 0.41012537479400635, "learning_rate": 9.70924432926399e-06, "loss": 0.0154, "step": 23470 }, { "epoch": 0.39652784814402, "grad_norm": 0.17497484385967255, "learning_rate": 9.708748890545674e-06, "loss": 0.0198, "step": 23480 }, { "epoch": 0.396696727125342, "grad_norm": 0.4153347313404083, "learning_rate": 9.708253042742792e-06, "loss": 0.0169, "step": 23490 }, { "epoch": 0.39686560610666394, "grad_norm": 1.1434961557388306, "learning_rate": 9.707756785898426e-06, "loss": 0.0163, "step": 23500 }, { "epoch": 0.39703448508798594, "grad_norm": 0.23761571943759918, "learning_rate": 9.707260120055683e-06, "loss": 0.0111, "step": 23510 }, { "epoch": 0.39720336406930795, "grad_norm": 0.2450111359357834, "learning_rate": 9.706763045257716e-06, "loss": 0.0178, "step": 23520 }, { "epoch": 0.3973722430506299, "grad_norm": 0.4549131691455841, "learning_rate": 9.706265561547709e-06, "loss": 0.0187, "step": 23530 }, { "epoch": 0.3975411220319519, "grad_norm": 0.4756292402744293, "learning_rate": 9.705767668968884e-06, "loss": 0.0123, "step": 23540 }, { "epoch": 0.39771000101327386, "grad_norm": 0.3301936686038971, "learning_rate": 9.705269367564493e-06, "loss": 0.019, "step": 23550 }, { "epoch": 0.39787887999459587, "grad_norm": 0.33013230562210083, "learning_rate": 9.704770657377829e-06, "loss": 0.0164, "step": 23560 }, { "epoch": 0.3980477589759179, "grad_norm": 0.35124915838241577, "learning_rate": 9.70427153845222e-06, "loss": 0.0182, "step": 23570 }, { "epoch": 0.3982166379572398, "grad_norm": 0.7453346848487854, "learning_rate": 9.703772010831026e-06, "loss": 0.0167, "step": 23580 }, { "epoch": 0.39838551693856183, "grad_norm": 0.1776258796453476, "learning_rate": 9.703272074557645e-06, "loss": 0.0162, "step": 23590 }, { "epoch": 0.3985543959198838, "grad_norm": 0.45356830954551697, "learning_rate": 9.702771729675511e-06, "loss": 0.0143, "step": 23600 }, { "epoch": 0.3987232749012058, "grad_norm": 0.27346834540367126, "learning_rate": 9.702270976228094e-06, "loss": 0.0138, "step": 23610 }, { "epoch": 0.3988921538825278, "grad_norm": 0.23003138601779938, "learning_rate": 9.701769814258897e-06, "loss": 0.0152, "step": 23620 }, { "epoch": 0.39906103286384975, "grad_norm": 0.12602171301841736, "learning_rate": 9.701268243811458e-06, "loss": 0.011, "step": 23630 }, { "epoch": 0.39922991184517176, "grad_norm": 0.3093409836292267, "learning_rate": 9.700766264929355e-06, "loss": 0.0168, "step": 23640 }, { "epoch": 0.3993987908264937, "grad_norm": 0.2556653320789337, "learning_rate": 9.700263877656197e-06, "loss": 0.0149, "step": 23650 }, { "epoch": 0.3995676698078157, "grad_norm": 0.19497515261173248, "learning_rate": 9.699761082035632e-06, "loss": 0.0149, "step": 23660 }, { "epoch": 0.3997365487891377, "grad_norm": 0.16307483613491058, "learning_rate": 9.69925787811134e-06, "loss": 0.0123, "step": 23670 }, { "epoch": 0.3999054277704597, "grad_norm": 0.48860231041908264, "learning_rate": 9.698754265927038e-06, "loss": 0.0203, "step": 23680 }, { "epoch": 0.4000743067517817, "grad_norm": 0.4133407771587372, "learning_rate": 9.698250245526477e-06, "loss": 0.0192, "step": 23690 }, { "epoch": 0.40024318573310363, "grad_norm": 0.34657129645347595, "learning_rate": 9.69774581695345e-06, "loss": 0.0138, "step": 23700 }, { "epoch": 0.40041206471442564, "grad_norm": 0.2926517426967621, "learning_rate": 9.697240980251777e-06, "loss": 0.0152, "step": 23710 }, { "epoch": 0.40058094369574765, "grad_norm": 0.37587255239486694, "learning_rate": 9.696735735465317e-06, "loss": 0.017, "step": 23720 }, { "epoch": 0.4007498226770696, "grad_norm": 0.4334907531738281, "learning_rate": 9.696230082637965e-06, "loss": 0.022, "step": 23730 }, { "epoch": 0.4009187016583916, "grad_norm": 0.43450799584388733, "learning_rate": 9.695724021813653e-06, "loss": 0.0103, "step": 23740 }, { "epoch": 0.40108758063971356, "grad_norm": 0.4646279811859131, "learning_rate": 9.695217553036342e-06, "loss": 0.0182, "step": 23750 }, { "epoch": 0.40125645962103557, "grad_norm": 0.26507309079170227, "learning_rate": 9.694710676350035e-06, "loss": 0.021, "step": 23760 }, { "epoch": 0.4014253386023576, "grad_norm": 0.5771531462669373, "learning_rate": 9.694203391798769e-06, "loss": 0.0227, "step": 23770 }, { "epoch": 0.4015942175836795, "grad_norm": 0.2096375823020935, "learning_rate": 9.693695699426616e-06, "loss": 0.0114, "step": 23780 }, { "epoch": 0.40176309656500153, "grad_norm": 0.3028094172477722, "learning_rate": 9.69318759927768e-06, "loss": 0.0191, "step": 23790 }, { "epoch": 0.4019319755463235, "grad_norm": 0.4812890887260437, "learning_rate": 9.692679091396107e-06, "loss": 0.0217, "step": 23800 }, { "epoch": 0.4021008545276455, "grad_norm": 0.34929928183555603, "learning_rate": 9.692170175826072e-06, "loss": 0.0226, "step": 23810 }, { "epoch": 0.4022697335089675, "grad_norm": 0.25628942251205444, "learning_rate": 9.69166085261179e-06, "loss": 0.0173, "step": 23820 }, { "epoch": 0.40243861249028945, "grad_norm": 0.6465237736701965, "learning_rate": 9.691151121797509e-06, "loss": 0.0201, "step": 23830 }, { "epoch": 0.40260749147161146, "grad_norm": 0.3589831292629242, "learning_rate": 9.690640983427515e-06, "loss": 0.0222, "step": 23840 }, { "epoch": 0.4027763704529334, "grad_norm": 0.6132829785346985, "learning_rate": 9.690130437546125e-06, "loss": 0.0153, "step": 23850 }, { "epoch": 0.4029452494342554, "grad_norm": 0.40982505679130554, "learning_rate": 9.689619484197697e-06, "loss": 0.0158, "step": 23860 }, { "epoch": 0.4031141284155774, "grad_norm": 0.2794440984725952, "learning_rate": 9.689108123426618e-06, "loss": 0.0133, "step": 23870 }, { "epoch": 0.4032830073968994, "grad_norm": 0.3437424898147583, "learning_rate": 9.688596355277316e-06, "loss": 0.0159, "step": 23880 }, { "epoch": 0.4034518863782214, "grad_norm": 0.29943037033081055, "learning_rate": 9.68808417979425e-06, "loss": 0.0188, "step": 23890 }, { "epoch": 0.40362076535954333, "grad_norm": 0.4781244695186615, "learning_rate": 9.687571597021919e-06, "loss": 0.0125, "step": 23900 }, { "epoch": 0.40378964434086534, "grad_norm": 1.2228811979293823, "learning_rate": 9.687058607004855e-06, "loss": 0.0163, "step": 23910 }, { "epoch": 0.40395852332218735, "grad_norm": 0.4860147535800934, "learning_rate": 9.686545209787624e-06, "loss": 0.0104, "step": 23920 }, { "epoch": 0.4041274023035093, "grad_norm": 0.28274208307266235, "learning_rate": 9.686031405414827e-06, "loss": 0.0143, "step": 23930 }, { "epoch": 0.4042962812848313, "grad_norm": 0.8340837955474854, "learning_rate": 9.685517193931106e-06, "loss": 0.0147, "step": 23940 }, { "epoch": 0.40446516026615326, "grad_norm": 0.46410560607910156, "learning_rate": 9.685002575381131e-06, "loss": 0.014, "step": 23950 }, { "epoch": 0.40463403924747526, "grad_norm": 0.30166009068489075, "learning_rate": 9.684487549809614e-06, "loss": 0.0098, "step": 23960 }, { "epoch": 0.40480291822879727, "grad_norm": 0.2995128929615021, "learning_rate": 9.683972117261296e-06, "loss": 0.0163, "step": 23970 }, { "epoch": 0.4049717972101192, "grad_norm": 0.204609677195549, "learning_rate": 9.683456277780958e-06, "loss": 0.0149, "step": 23980 }, { "epoch": 0.40514067619144123, "grad_norm": 0.34972476959228516, "learning_rate": 9.682940031413414e-06, "loss": 0.0136, "step": 23990 }, { "epoch": 0.4053095551727632, "grad_norm": 0.40783971548080444, "learning_rate": 9.682423378203516e-06, "loss": 0.0181, "step": 24000 }, { "epoch": 0.4054784341540852, "grad_norm": 0.2745494842529297, "learning_rate": 9.68190631819615e-06, "loss": 0.0174, "step": 24010 }, { "epoch": 0.4056473131354072, "grad_norm": 0.30295246839523315, "learning_rate": 9.681388851436233e-06, "loss": 0.0089, "step": 24020 }, { "epoch": 0.40581619211672915, "grad_norm": 0.3859677016735077, "learning_rate": 9.680870977968725e-06, "loss": 0.0172, "step": 24030 }, { "epoch": 0.40598507109805115, "grad_norm": 0.45684653520584106, "learning_rate": 9.680352697838617e-06, "loss": 0.0164, "step": 24040 }, { "epoch": 0.4061539500793731, "grad_norm": 0.6446325778961182, "learning_rate": 9.679834011090936e-06, "loss": 0.0178, "step": 24050 }, { "epoch": 0.4063228290606951, "grad_norm": 0.4529818594455719, "learning_rate": 9.679314917770741e-06, "loss": 0.0146, "step": 24060 }, { "epoch": 0.40649170804201706, "grad_norm": 0.22541095316410065, "learning_rate": 9.678795417923133e-06, "loss": 0.0153, "step": 24070 }, { "epoch": 0.40666058702333907, "grad_norm": 0.7736636996269226, "learning_rate": 9.678275511593245e-06, "loss": 0.0132, "step": 24080 }, { "epoch": 0.4068294660046611, "grad_norm": 0.40410229563713074, "learning_rate": 9.677755198826242e-06, "loss": 0.0148, "step": 24090 }, { "epoch": 0.40699834498598303, "grad_norm": 0.6350624561309814, "learning_rate": 9.677234479667332e-06, "loss": 0.0169, "step": 24100 }, { "epoch": 0.40716722396730504, "grad_norm": 0.31254684925079346, "learning_rate": 9.676713354161749e-06, "loss": 0.0131, "step": 24110 }, { "epoch": 0.407336102948627, "grad_norm": 0.2327323704957962, "learning_rate": 9.67619182235477e-06, "loss": 0.0125, "step": 24120 }, { "epoch": 0.407504981929949, "grad_norm": 0.4042397141456604, "learning_rate": 9.675669884291705e-06, "loss": 0.0106, "step": 24130 }, { "epoch": 0.407673860911271, "grad_norm": 0.38585007190704346, "learning_rate": 9.675147540017897e-06, "loss": 0.0159, "step": 24140 }, { "epoch": 0.40784273989259295, "grad_norm": 0.548667848110199, "learning_rate": 9.674624789578727e-06, "loss": 0.0193, "step": 24150 }, { "epoch": 0.40801161887391496, "grad_norm": 0.6874213218688965, "learning_rate": 9.67410163301961e-06, "loss": 0.0222, "step": 24160 }, { "epoch": 0.4081804978552369, "grad_norm": 0.7321102619171143, "learning_rate": 9.673578070385997e-06, "loss": 0.024, "step": 24170 }, { "epoch": 0.4083493768365589, "grad_norm": 0.1691504418849945, "learning_rate": 9.67305410172337e-06, "loss": 0.0158, "step": 24180 }, { "epoch": 0.4085182558178809, "grad_norm": 0.7273917198181152, "learning_rate": 9.672529727077256e-06, "loss": 0.0207, "step": 24190 }, { "epoch": 0.4086871347992029, "grad_norm": 0.2722936272621155, "learning_rate": 9.672004946493207e-06, "loss": 0.0179, "step": 24200 }, { "epoch": 0.4088560137805249, "grad_norm": 0.4724622368812561, "learning_rate": 9.671479760016819e-06, "loss": 0.0155, "step": 24210 }, { "epoch": 0.40902489276184684, "grad_norm": 0.6518704891204834, "learning_rate": 9.670954167693715e-06, "loss": 0.0158, "step": 24220 }, { "epoch": 0.40919377174316884, "grad_norm": 0.35924068093299866, "learning_rate": 9.670428169569558e-06, "loss": 0.0181, "step": 24230 }, { "epoch": 0.40936265072449085, "grad_norm": 0.2863636016845703, "learning_rate": 9.669901765690045e-06, "loss": 0.0168, "step": 24240 }, { "epoch": 0.4095315297058128, "grad_norm": 0.5262883901596069, "learning_rate": 9.669374956100912e-06, "loss": 0.0166, "step": 24250 }, { "epoch": 0.4097004086871348, "grad_norm": 0.4935372769832611, "learning_rate": 9.668847740847924e-06, "loss": 0.0172, "step": 24260 }, { "epoch": 0.40986928766845676, "grad_norm": 0.49839305877685547, "learning_rate": 9.668320119976884e-06, "loss": 0.0075, "step": 24270 }, { "epoch": 0.41003816664977877, "grad_norm": 0.23918119072914124, "learning_rate": 9.667792093533632e-06, "loss": 0.0108, "step": 24280 }, { "epoch": 0.4102070456311008, "grad_norm": 0.700244128704071, "learning_rate": 9.66726366156404e-06, "loss": 0.0239, "step": 24290 }, { "epoch": 0.4103759246124227, "grad_norm": 0.8955276012420654, "learning_rate": 9.666734824114018e-06, "loss": 0.0134, "step": 24300 }, { "epoch": 0.41054480359374473, "grad_norm": 0.2530248463153839, "learning_rate": 9.66620558122951e-06, "loss": 0.0158, "step": 24310 }, { "epoch": 0.4107136825750667, "grad_norm": 0.49742740392684937, "learning_rate": 9.665675932956494e-06, "loss": 0.0116, "step": 24320 }, { "epoch": 0.4108825615563887, "grad_norm": 0.38032102584838867, "learning_rate": 9.665145879340988e-06, "loss": 0.0147, "step": 24330 }, { "epoch": 0.4110514405377107, "grad_norm": 0.29685312509536743, "learning_rate": 9.664615420429035e-06, "loss": 0.0131, "step": 24340 }, { "epoch": 0.41122031951903265, "grad_norm": 0.5528354048728943, "learning_rate": 9.664084556266729e-06, "loss": 0.0138, "step": 24350 }, { "epoch": 0.41138919850035466, "grad_norm": 0.4828174114227295, "learning_rate": 9.663553286900183e-06, "loss": 0.0141, "step": 24360 }, { "epoch": 0.4115580774816766, "grad_norm": 0.4705331325531006, "learning_rate": 9.663021612375554e-06, "loss": 0.0095, "step": 24370 }, { "epoch": 0.4117269564629986, "grad_norm": 0.493099182844162, "learning_rate": 9.662489532739034e-06, "loss": 0.0135, "step": 24380 }, { "epoch": 0.4118958354443206, "grad_norm": 0.5081321597099304, "learning_rate": 9.66195704803685e-06, "loss": 0.0113, "step": 24390 }, { "epoch": 0.4120647144256426, "grad_norm": 0.4618995487689972, "learning_rate": 9.66142415831526e-06, "loss": 0.0162, "step": 24400 }, { "epoch": 0.4122335934069646, "grad_norm": 0.3890073895454407, "learning_rate": 9.660890863620561e-06, "loss": 0.0175, "step": 24410 }, { "epoch": 0.41240247238828653, "grad_norm": 0.2489846795797348, "learning_rate": 9.660357163999086e-06, "loss": 0.0152, "step": 24420 }, { "epoch": 0.41257135136960854, "grad_norm": 0.5647677183151245, "learning_rate": 9.6598230594972e-06, "loss": 0.0104, "step": 24430 }, { "epoch": 0.41274023035093055, "grad_norm": 0.3545629680156708, "learning_rate": 9.659288550161302e-06, "loss": 0.0184, "step": 24440 }, { "epoch": 0.4129091093322525, "grad_norm": 0.34002387523651123, "learning_rate": 9.658753636037836e-06, "loss": 0.0147, "step": 24450 }, { "epoch": 0.4130779883135745, "grad_norm": 0.3906351327896118, "learning_rate": 9.658218317173267e-06, "loss": 0.0131, "step": 24460 }, { "epoch": 0.41324686729489646, "grad_norm": 0.6003063321113586, "learning_rate": 9.657682593614106e-06, "loss": 0.0271, "step": 24470 }, { "epoch": 0.41341574627621847, "grad_norm": 0.20834201574325562, "learning_rate": 9.657146465406895e-06, "loss": 0.0198, "step": 24480 }, { "epoch": 0.4135846252575405, "grad_norm": 0.35313016176223755, "learning_rate": 9.65660993259821e-06, "loss": 0.0156, "step": 24490 }, { "epoch": 0.4137535042388624, "grad_norm": 0.45617398619651794, "learning_rate": 9.656072995234664e-06, "loss": 0.0242, "step": 24500 }, { "epoch": 0.41392238322018443, "grad_norm": 0.2694984972476959, "learning_rate": 9.655535653362906e-06, "loss": 0.0168, "step": 24510 }, { "epoch": 0.4140912622015064, "grad_norm": 0.3198021650314331, "learning_rate": 9.654997907029619e-06, "loss": 0.0116, "step": 24520 }, { "epoch": 0.4142601411828284, "grad_norm": 0.7759730219841003, "learning_rate": 9.654459756281517e-06, "loss": 0.0157, "step": 24530 }, { "epoch": 0.4144290201641504, "grad_norm": 0.4029983580112457, "learning_rate": 9.65392120116536e-06, "loss": 0.02, "step": 24540 }, { "epoch": 0.41459789914547235, "grad_norm": 0.4289673864841461, "learning_rate": 9.653382241727933e-06, "loss": 0.0206, "step": 24550 }, { "epoch": 0.41476677812679436, "grad_norm": 0.356523334980011, "learning_rate": 9.652842878016057e-06, "loss": 0.0159, "step": 24560 }, { "epoch": 0.4149356571081163, "grad_norm": 0.28883445262908936, "learning_rate": 9.652303110076596e-06, "loss": 0.0196, "step": 24570 }, { "epoch": 0.4151045360894383, "grad_norm": 0.6404523849487305, "learning_rate": 9.651762937956438e-06, "loss": 0.0186, "step": 24580 }, { "epoch": 0.41527341507076027, "grad_norm": 0.2393493354320526, "learning_rate": 9.651222361702515e-06, "loss": 0.0167, "step": 24590 }, { "epoch": 0.4154422940520823, "grad_norm": 0.1618395894765854, "learning_rate": 9.65068138136179e-06, "loss": 0.0175, "step": 24600 }, { "epoch": 0.4156111730334043, "grad_norm": 0.46062174439430237, "learning_rate": 9.650139996981265e-06, "loss": 0.0137, "step": 24610 }, { "epoch": 0.41578005201472623, "grad_norm": 0.33838951587677, "learning_rate": 9.649598208607968e-06, "loss": 0.0116, "step": 24620 }, { "epoch": 0.41594893099604824, "grad_norm": 0.6031703352928162, "learning_rate": 9.649056016288976e-06, "loss": 0.0259, "step": 24630 }, { "epoch": 0.4161178099773702, "grad_norm": 0.27602168917655945, "learning_rate": 9.648513420071385e-06, "loss": 0.0128, "step": 24640 }, { "epoch": 0.4162866889586922, "grad_norm": 0.21279796957969666, "learning_rate": 9.647970420002342e-06, "loss": 0.0145, "step": 24650 }, { "epoch": 0.4164555679400142, "grad_norm": 0.5717060565948486, "learning_rate": 9.647427016129016e-06, "loss": 0.0142, "step": 24660 }, { "epoch": 0.41662444692133616, "grad_norm": 0.47810429334640503, "learning_rate": 9.646883208498621e-06, "loss": 0.0172, "step": 24670 }, { "epoch": 0.41679332590265816, "grad_norm": 0.43147191405296326, "learning_rate": 9.646338997158397e-06, "loss": 0.0142, "step": 24680 }, { "epoch": 0.4169622048839801, "grad_norm": 0.3676070272922516, "learning_rate": 9.645794382155628e-06, "loss": 0.018, "step": 24690 }, { "epoch": 0.4171310838653021, "grad_norm": 0.5531920194625854, "learning_rate": 9.645249363537626e-06, "loss": 0.0201, "step": 24700 }, { "epoch": 0.41729996284662413, "grad_norm": 0.2375483512878418, "learning_rate": 9.644703941351742e-06, "loss": 0.0119, "step": 24710 }, { "epoch": 0.4174688418279461, "grad_norm": 0.5294892191886902, "learning_rate": 9.644158115645363e-06, "loss": 0.0178, "step": 24720 }, { "epoch": 0.4176377208092681, "grad_norm": 0.5997974276542664, "learning_rate": 9.643611886465904e-06, "loss": 0.0187, "step": 24730 }, { "epoch": 0.41780659979059004, "grad_norm": 0.5045328736305237, "learning_rate": 9.643065253860824e-06, "loss": 0.0153, "step": 24740 }, { "epoch": 0.41797547877191205, "grad_norm": 0.3756917417049408, "learning_rate": 9.642518217877612e-06, "loss": 0.0229, "step": 24750 }, { "epoch": 0.41814435775323405, "grad_norm": 0.550828218460083, "learning_rate": 9.641970778563793e-06, "loss": 0.0146, "step": 24760 }, { "epoch": 0.418313236734556, "grad_norm": 0.7653302550315857, "learning_rate": 9.641422935966927e-06, "loss": 0.0192, "step": 24770 }, { "epoch": 0.418482115715878, "grad_norm": 0.38451439142227173, "learning_rate": 9.64087469013461e-06, "loss": 0.0179, "step": 24780 }, { "epoch": 0.41865099469719996, "grad_norm": 0.3148078918457031, "learning_rate": 9.640326041114472e-06, "loss": 0.0138, "step": 24790 }, { "epoch": 0.41881987367852197, "grad_norm": 0.4631596505641937, "learning_rate": 9.639776988954176e-06, "loss": 0.0145, "step": 24800 }, { "epoch": 0.418988752659844, "grad_norm": 0.23304402828216553, "learning_rate": 9.639227533701428e-06, "loss": 0.0131, "step": 24810 }, { "epoch": 0.41915763164116593, "grad_norm": 0.21832923591136932, "learning_rate": 9.638677675403959e-06, "loss": 0.0148, "step": 24820 }, { "epoch": 0.41932651062248794, "grad_norm": 0.41779929399490356, "learning_rate": 9.638127414109536e-06, "loss": 0.0256, "step": 24830 }, { "epoch": 0.4194953896038099, "grad_norm": 0.3664639890193939, "learning_rate": 9.637576749865973e-06, "loss": 0.0125, "step": 24840 }, { "epoch": 0.4196642685851319, "grad_norm": 0.4572223126888275, "learning_rate": 9.637025682721105e-06, "loss": 0.025, "step": 24850 }, { "epoch": 0.4198331475664539, "grad_norm": 0.32947272062301636, "learning_rate": 9.636474212722806e-06, "loss": 0.0149, "step": 24860 }, { "epoch": 0.42000202654777585, "grad_norm": 0.19026227295398712, "learning_rate": 9.635922339918991e-06, "loss": 0.0123, "step": 24870 }, { "epoch": 0.42017090552909786, "grad_norm": 0.49509382247924805, "learning_rate": 9.635370064357601e-06, "loss": 0.0167, "step": 24880 }, { "epoch": 0.4203397845104198, "grad_norm": 1.7090132236480713, "learning_rate": 9.634817386086619e-06, "loss": 0.0111, "step": 24890 }, { "epoch": 0.4205086634917418, "grad_norm": 0.34428921341896057, "learning_rate": 9.634264305154058e-06, "loss": 0.0161, "step": 24900 }, { "epoch": 0.4206775424730638, "grad_norm": 0.49304527044296265, "learning_rate": 9.63371082160797e-06, "loss": 0.0188, "step": 24910 }, { "epoch": 0.4208464214543858, "grad_norm": 0.31858518719673157, "learning_rate": 9.633156935496444e-06, "loss": 0.0142, "step": 24920 }, { "epoch": 0.4210153004357078, "grad_norm": 0.4294981360435486, "learning_rate": 9.632602646867593e-06, "loss": 0.0189, "step": 24930 }, { "epoch": 0.42118417941702974, "grad_norm": 0.2408149093389511, "learning_rate": 9.632047955769578e-06, "loss": 0.0162, "step": 24940 }, { "epoch": 0.42135305839835174, "grad_norm": 0.44577348232269287, "learning_rate": 9.631492862250584e-06, "loss": 0.0205, "step": 24950 }, { "epoch": 0.42152193737967375, "grad_norm": 0.4394490420818329, "learning_rate": 9.63093736635884e-06, "loss": 0.0199, "step": 24960 }, { "epoch": 0.4216908163609957, "grad_norm": 0.30449995398521423, "learning_rate": 9.630381468142606e-06, "loss": 0.0112, "step": 24970 }, { "epoch": 0.4218596953423177, "grad_norm": 0.4941108226776123, "learning_rate": 9.629825167650176e-06, "loss": 0.0145, "step": 24980 }, { "epoch": 0.42202857432363966, "grad_norm": 0.3851694166660309, "learning_rate": 9.62926846492988e-06, "loss": 0.015, "step": 24990 }, { "epoch": 0.42219745330496167, "grad_norm": 0.3836834132671356, "learning_rate": 9.628711360030082e-06, "loss": 0.0183, "step": 25000 }, { "epoch": 0.4223663322862837, "grad_norm": 0.48191311955451965, "learning_rate": 9.628153852999185e-06, "loss": 0.0136, "step": 25010 }, { "epoch": 0.4225352112676056, "grad_norm": 0.5078688859939575, "learning_rate": 9.62759594388562e-06, "loss": 0.0188, "step": 25020 }, { "epoch": 0.42270409024892763, "grad_norm": 0.3381633460521698, "learning_rate": 9.62703763273786e-06, "loss": 0.0135, "step": 25030 }, { "epoch": 0.4228729692302496, "grad_norm": 0.4489785134792328, "learning_rate": 9.62647891960441e-06, "loss": 0.0192, "step": 25040 }, { "epoch": 0.4230418482115716, "grad_norm": 0.39820846915245056, "learning_rate": 9.625919804533805e-06, "loss": 0.0188, "step": 25050 }, { "epoch": 0.4232107271928936, "grad_norm": 0.24843569099903107, "learning_rate": 9.625360287574624e-06, "loss": 0.0184, "step": 25060 }, { "epoch": 0.42337960617421555, "grad_norm": 0.38274338841438293, "learning_rate": 9.624800368775475e-06, "loss": 0.0126, "step": 25070 }, { "epoch": 0.42354848515553756, "grad_norm": 0.395666241645813, "learning_rate": 9.624240048185004e-06, "loss": 0.0205, "step": 25080 }, { "epoch": 0.4237173641368595, "grad_norm": 0.3567342460155487, "learning_rate": 9.623679325851887e-06, "loss": 0.0171, "step": 25090 }, { "epoch": 0.4238862431181815, "grad_norm": 0.21834225952625275, "learning_rate": 9.62311820182484e-06, "loss": 0.0145, "step": 25100 }, { "epoch": 0.4240551220995035, "grad_norm": 0.3490518033504486, "learning_rate": 9.622556676152612e-06, "loss": 0.0177, "step": 25110 }, { "epoch": 0.4242240010808255, "grad_norm": 0.4509107172489166, "learning_rate": 9.621994748883988e-06, "loss": 0.0138, "step": 25120 }, { "epoch": 0.4243928800621475, "grad_norm": 0.28518128395080566, "learning_rate": 9.621432420067784e-06, "loss": 0.0213, "step": 25130 }, { "epoch": 0.42456175904346943, "grad_norm": 0.47598204016685486, "learning_rate": 9.620869689752856e-06, "loss": 0.0216, "step": 25140 }, { "epoch": 0.42473063802479144, "grad_norm": 0.16935618221759796, "learning_rate": 9.620306557988094e-06, "loss": 0.0118, "step": 25150 }, { "epoch": 0.4248995170061134, "grad_norm": 0.27743813395500183, "learning_rate": 9.619743024822417e-06, "loss": 0.0148, "step": 25160 }, { "epoch": 0.4250683959874354, "grad_norm": 0.3704206645488739, "learning_rate": 9.619179090304788e-06, "loss": 0.0147, "step": 25170 }, { "epoch": 0.4252372749687574, "grad_norm": 0.4358600676059723, "learning_rate": 9.6186147544842e-06, "loss": 0.0174, "step": 25180 }, { "epoch": 0.42540615395007936, "grad_norm": 0.16976341605186462, "learning_rate": 9.618050017409677e-06, "loss": 0.0165, "step": 25190 }, { "epoch": 0.42557503293140136, "grad_norm": 0.33002665638923645, "learning_rate": 9.617484879130285e-06, "loss": 0.0199, "step": 25200 }, { "epoch": 0.4257439119127233, "grad_norm": 0.2559696435928345, "learning_rate": 9.616919339695121e-06, "loss": 0.0138, "step": 25210 }, { "epoch": 0.4259127908940453, "grad_norm": 0.4592667818069458, "learning_rate": 9.616353399153317e-06, "loss": 0.0194, "step": 25220 }, { "epoch": 0.42608166987536733, "grad_norm": 0.47571998834609985, "learning_rate": 9.615787057554042e-06, "loss": 0.0145, "step": 25230 }, { "epoch": 0.4262505488566893, "grad_norm": 0.5817928314208984, "learning_rate": 9.6152203149465e-06, "loss": 0.0211, "step": 25240 }, { "epoch": 0.4264194278380113, "grad_norm": 0.44448742270469666, "learning_rate": 9.614653171379925e-06, "loss": 0.0175, "step": 25250 }, { "epoch": 0.42658830681933324, "grad_norm": 0.22929595410823822, "learning_rate": 9.61408562690359e-06, "loss": 0.0156, "step": 25260 }, { "epoch": 0.42675718580065525, "grad_norm": 0.3709302842617035, "learning_rate": 9.6135176815668e-06, "loss": 0.019, "step": 25270 }, { "epoch": 0.42692606478197725, "grad_norm": 0.3838314116001129, "learning_rate": 9.6129493354189e-06, "loss": 0.0138, "step": 25280 }, { "epoch": 0.4270949437632992, "grad_norm": 0.5013241767883301, "learning_rate": 9.612380588509268e-06, "loss": 0.0138, "step": 25290 }, { "epoch": 0.4272638227446212, "grad_norm": 0.3924833834171295, "learning_rate": 9.611811440887309e-06, "loss": 0.0131, "step": 25300 }, { "epoch": 0.42743270172594316, "grad_norm": 0.19775889813899994, "learning_rate": 9.611241892602474e-06, "loss": 0.0158, "step": 25310 }, { "epoch": 0.42760158070726517, "grad_norm": 0.3762648403644562, "learning_rate": 9.610671943704242e-06, "loss": 0.0128, "step": 25320 }, { "epoch": 0.4277704596885872, "grad_norm": 0.38934317231178284, "learning_rate": 9.61010159424213e-06, "loss": 0.0131, "step": 25330 }, { "epoch": 0.42793933866990913, "grad_norm": 0.2527620792388916, "learning_rate": 9.609530844265686e-06, "loss": 0.0144, "step": 25340 }, { "epoch": 0.42810821765123114, "grad_norm": 0.8289698958396912, "learning_rate": 9.6089596938245e-06, "loss": 0.0173, "step": 25350 }, { "epoch": 0.4282770966325531, "grad_norm": 0.6053575277328491, "learning_rate": 9.608388142968187e-06, "loss": 0.0122, "step": 25360 }, { "epoch": 0.4284459756138751, "grad_norm": 0.3709881007671356, "learning_rate": 9.607816191746408e-06, "loss": 0.0167, "step": 25370 }, { "epoch": 0.4286148545951971, "grad_norm": 0.43658751249313354, "learning_rate": 9.607243840208846e-06, "loss": 0.0168, "step": 25380 }, { "epoch": 0.42878373357651905, "grad_norm": 0.29854971170425415, "learning_rate": 9.60667108840523e-06, "loss": 0.0152, "step": 25390 }, { "epoch": 0.42895261255784106, "grad_norm": 0.4280261993408203, "learning_rate": 9.606097936385317e-06, "loss": 0.0169, "step": 25400 }, { "epoch": 0.429121491539163, "grad_norm": 0.3219040334224701, "learning_rate": 9.605524384198904e-06, "loss": 0.0183, "step": 25410 }, { "epoch": 0.429290370520485, "grad_norm": 0.215973362326622, "learning_rate": 9.604950431895815e-06, "loss": 0.0152, "step": 25420 }, { "epoch": 0.429459249501807, "grad_norm": 0.17481784522533417, "learning_rate": 9.60437607952592e-06, "loss": 0.0129, "step": 25430 }, { "epoch": 0.429628128483129, "grad_norm": 0.24251872301101685, "learning_rate": 9.603801327139109e-06, "loss": 0.0156, "step": 25440 }, { "epoch": 0.429797007464451, "grad_norm": 0.4238976240158081, "learning_rate": 9.603226174785322e-06, "loss": 0.0164, "step": 25450 }, { "epoch": 0.42996588644577294, "grad_norm": 0.5044638514518738, "learning_rate": 9.602650622514524e-06, "loss": 0.0204, "step": 25460 }, { "epoch": 0.43013476542709494, "grad_norm": 0.33847084641456604, "learning_rate": 9.60207467037672e-06, "loss": 0.018, "step": 25470 }, { "epoch": 0.43030364440841695, "grad_norm": 0.29615679383277893, "learning_rate": 9.601498318421944e-06, "loss": 0.0167, "step": 25480 }, { "epoch": 0.4304725233897389, "grad_norm": 0.8094171285629272, "learning_rate": 9.60092156670027e-06, "loss": 0.0118, "step": 25490 }, { "epoch": 0.4306414023710609, "grad_norm": 0.18629398941993713, "learning_rate": 9.600344415261804e-06, "loss": 0.0137, "step": 25500 }, { "epoch": 0.43081028135238286, "grad_norm": 0.7554500699043274, "learning_rate": 9.599766864156688e-06, "loss": 0.0136, "step": 25510 }, { "epoch": 0.43097916033370487, "grad_norm": 0.482835590839386, "learning_rate": 9.599188913435097e-06, "loss": 0.0157, "step": 25520 }, { "epoch": 0.4311480393150269, "grad_norm": 0.31098243594169617, "learning_rate": 9.598610563147243e-06, "loss": 0.0136, "step": 25530 }, { "epoch": 0.43131691829634883, "grad_norm": 0.9725282192230225, "learning_rate": 9.598031813343374e-06, "loss": 0.0152, "step": 25540 }, { "epoch": 0.43148579727767084, "grad_norm": 0.3006746470928192, "learning_rate": 9.597452664073766e-06, "loss": 0.0191, "step": 25550 }, { "epoch": 0.4316546762589928, "grad_norm": 0.4334999918937683, "learning_rate": 9.596873115388736e-06, "loss": 0.0176, "step": 25560 }, { "epoch": 0.4318235552403148, "grad_norm": 0.6019201874732971, "learning_rate": 9.596293167338632e-06, "loss": 0.0154, "step": 25570 }, { "epoch": 0.4319924342216368, "grad_norm": 0.922774076461792, "learning_rate": 9.595712819973844e-06, "loss": 0.019, "step": 25580 }, { "epoch": 0.43216131320295875, "grad_norm": 0.27463772892951965, "learning_rate": 9.595132073344786e-06, "loss": 0.0201, "step": 25590 }, { "epoch": 0.43233019218428076, "grad_norm": 0.39934563636779785, "learning_rate": 9.594550927501912e-06, "loss": 0.0206, "step": 25600 }, { "epoch": 0.4324990711656027, "grad_norm": 0.5695970058441162, "learning_rate": 9.593969382495713e-06, "loss": 0.0164, "step": 25610 }, { "epoch": 0.4326679501469247, "grad_norm": 0.16707806289196014, "learning_rate": 9.593387438376711e-06, "loss": 0.0184, "step": 25620 }, { "epoch": 0.4328368291282467, "grad_norm": 0.5363081693649292, "learning_rate": 9.592805095195462e-06, "loss": 0.0173, "step": 25630 }, { "epoch": 0.4330057081095687, "grad_norm": 0.2763568162918091, "learning_rate": 9.592222353002564e-06, "loss": 0.0107, "step": 25640 }, { "epoch": 0.4331745870908907, "grad_norm": 0.6010493636131287, "learning_rate": 9.591639211848637e-06, "loss": 0.0171, "step": 25650 }, { "epoch": 0.43334346607221264, "grad_norm": 0.34158486127853394, "learning_rate": 9.59105567178435e-06, "loss": 0.0177, "step": 25660 }, { "epoch": 0.43351234505353464, "grad_norm": 0.17368139326572418, "learning_rate": 9.590471732860393e-06, "loss": 0.0162, "step": 25670 }, { "epoch": 0.43368122403485665, "grad_norm": 0.39202624559402466, "learning_rate": 9.589887395127503e-06, "loss": 0.0149, "step": 25680 }, { "epoch": 0.4338501030161786, "grad_norm": 0.5997549295425415, "learning_rate": 9.589302658636442e-06, "loss": 0.0168, "step": 25690 }, { "epoch": 0.4340189819975006, "grad_norm": 0.33176904916763306, "learning_rate": 9.58871752343801e-06, "loss": 0.0196, "step": 25700 }, { "epoch": 0.43418786097882256, "grad_norm": 0.4946734309196472, "learning_rate": 9.588131989583045e-06, "loss": 0.0128, "step": 25710 }, { "epoch": 0.43435673996014457, "grad_norm": 0.2609650492668152, "learning_rate": 9.587546057122415e-06, "loss": 0.014, "step": 25720 }, { "epoch": 0.4345256189414665, "grad_norm": 0.6496738195419312, "learning_rate": 9.586959726107025e-06, "loss": 0.0181, "step": 25730 }, { "epoch": 0.4346944979227885, "grad_norm": 0.33499521017074585, "learning_rate": 9.586372996587814e-06, "loss": 0.0187, "step": 25740 }, { "epoch": 0.43486337690411053, "grad_norm": 0.31070011854171753, "learning_rate": 9.585785868615753e-06, "loss": 0.0167, "step": 25750 }, { "epoch": 0.4350322558854325, "grad_norm": 0.4163181781768799, "learning_rate": 9.585198342241855e-06, "loss": 0.0103, "step": 25760 }, { "epoch": 0.4352011348667545, "grad_norm": 0.4131595194339752, "learning_rate": 9.584610417517162e-06, "loss": 0.0167, "step": 25770 }, { "epoch": 0.43537001384807644, "grad_norm": 0.2861619293689728, "learning_rate": 9.584022094492746e-06, "loss": 0.0124, "step": 25780 }, { "epoch": 0.43553889282939845, "grad_norm": 0.6416223645210266, "learning_rate": 9.583433373219726e-06, "loss": 0.0152, "step": 25790 }, { "epoch": 0.43570777181072046, "grad_norm": 0.28100112080574036, "learning_rate": 9.582844253749244e-06, "loss": 0.0189, "step": 25800 }, { "epoch": 0.4358766507920424, "grad_norm": 0.8991510272026062, "learning_rate": 9.582254736132484e-06, "loss": 0.0244, "step": 25810 }, { "epoch": 0.4360455297733644, "grad_norm": 0.4054386615753174, "learning_rate": 9.581664820420661e-06, "loss": 0.0158, "step": 25820 }, { "epoch": 0.43621440875468637, "grad_norm": 0.35046905279159546, "learning_rate": 9.581074506665025e-06, "loss": 0.018, "step": 25830 }, { "epoch": 0.4363832877360084, "grad_norm": 0.1976778656244278, "learning_rate": 9.580483794916861e-06, "loss": 0.0207, "step": 25840 }, { "epoch": 0.4365521667173304, "grad_norm": 0.3825313448905945, "learning_rate": 9.579892685227487e-06, "loss": 0.0115, "step": 25850 }, { "epoch": 0.43672104569865233, "grad_norm": 0.4125708043575287, "learning_rate": 9.579301177648262e-06, "loss": 0.009, "step": 25860 }, { "epoch": 0.43688992467997434, "grad_norm": 0.46029266715049744, "learning_rate": 9.57870927223057e-06, "loss": 0.02, "step": 25870 }, { "epoch": 0.4370588036612963, "grad_norm": 0.4059244692325592, "learning_rate": 9.578116969025835e-06, "loss": 0.0107, "step": 25880 }, { "epoch": 0.4372276826426183, "grad_norm": 0.21608702838420868, "learning_rate": 9.577524268085518e-06, "loss": 0.0136, "step": 25890 }, { "epoch": 0.4373965616239403, "grad_norm": 0.6203617453575134, "learning_rate": 9.576931169461107e-06, "loss": 0.013, "step": 25900 }, { "epoch": 0.43756544060526226, "grad_norm": 0.41262543201446533, "learning_rate": 9.576337673204134e-06, "loss": 0.0125, "step": 25910 }, { "epoch": 0.43773431958658426, "grad_norm": 0.6154870390892029, "learning_rate": 9.575743779366156e-06, "loss": 0.0186, "step": 25920 }, { "epoch": 0.4379031985679062, "grad_norm": 0.16832052171230316, "learning_rate": 9.575149487998771e-06, "loss": 0.0154, "step": 25930 }, { "epoch": 0.4380720775492282, "grad_norm": 0.3754928410053253, "learning_rate": 9.57455479915361e-06, "loss": 0.0165, "step": 25940 }, { "epoch": 0.43824095653055023, "grad_norm": 0.41728511452674866, "learning_rate": 9.573959712882335e-06, "loss": 0.0153, "step": 25950 }, { "epoch": 0.4384098355118722, "grad_norm": 0.34822696447372437, "learning_rate": 9.57336422923665e-06, "loss": 0.0134, "step": 25960 }, { "epoch": 0.4385787144931942, "grad_norm": 0.6898432970046997, "learning_rate": 9.572768348268288e-06, "loss": 0.0191, "step": 25970 }, { "epoch": 0.43874759347451614, "grad_norm": 0.3125627040863037, "learning_rate": 9.572172070029016e-06, "loss": 0.0131, "step": 25980 }, { "epoch": 0.43891647245583815, "grad_norm": 0.3441774249076843, "learning_rate": 9.571575394570639e-06, "loss": 0.0165, "step": 25990 }, { "epoch": 0.43908535143716015, "grad_norm": 0.4995330274105072, "learning_rate": 9.57097832194499e-06, "loss": 0.0162, "step": 26000 }, { "epoch": 0.4392542304184821, "grad_norm": 0.16507858037948608, "learning_rate": 9.570380852203949e-06, "loss": 0.0116, "step": 26010 }, { "epoch": 0.4394231093998041, "grad_norm": 0.5003565549850464, "learning_rate": 9.569782985399417e-06, "loss": 0.0136, "step": 26020 }, { "epoch": 0.43959198838112606, "grad_norm": 0.35312649607658386, "learning_rate": 9.569184721583337e-06, "loss": 0.0216, "step": 26030 }, { "epoch": 0.43976086736244807, "grad_norm": 0.33359161019325256, "learning_rate": 9.568586060807686e-06, "loss": 0.016, "step": 26040 }, { "epoch": 0.4399297463437701, "grad_norm": 0.6949657201766968, "learning_rate": 9.56798700312447e-06, "loss": 0.0186, "step": 26050 }, { "epoch": 0.44009862532509203, "grad_norm": 0.3860141932964325, "learning_rate": 9.567387548585739e-06, "loss": 0.0171, "step": 26060 }, { "epoch": 0.44026750430641404, "grad_norm": 0.18792645633220673, "learning_rate": 9.566787697243569e-06, "loss": 0.0149, "step": 26070 }, { "epoch": 0.440436383287736, "grad_norm": 0.40768226981163025, "learning_rate": 9.566187449150075e-06, "loss": 0.0163, "step": 26080 }, { "epoch": 0.440605262269058, "grad_norm": 0.31715714931488037, "learning_rate": 9.565586804357403e-06, "loss": 0.0116, "step": 26090 }, { "epoch": 0.44077414125038, "grad_norm": 0.7426395416259766, "learning_rate": 9.564985762917737e-06, "loss": 0.0188, "step": 26100 }, { "epoch": 0.44094302023170195, "grad_norm": 0.4107365012168884, "learning_rate": 9.564384324883294e-06, "loss": 0.0188, "step": 26110 }, { "epoch": 0.44111189921302396, "grad_norm": 0.49685952067375183, "learning_rate": 9.563782490306325e-06, "loss": 0.0109, "step": 26120 }, { "epoch": 0.4412807781943459, "grad_norm": 0.6352092623710632, "learning_rate": 9.563180259239116e-06, "loss": 0.0157, "step": 26130 }, { "epoch": 0.4414496571756679, "grad_norm": 0.40890079736709595, "learning_rate": 9.562577631733988e-06, "loss": 0.018, "step": 26140 }, { "epoch": 0.4416185361569899, "grad_norm": 0.3420584201812744, "learning_rate": 9.561974607843296e-06, "loss": 0.0128, "step": 26150 }, { "epoch": 0.4417874151383119, "grad_norm": 0.27445024251937866, "learning_rate": 9.561371187619428e-06, "loss": 0.0115, "step": 26160 }, { "epoch": 0.4419562941196339, "grad_norm": 0.34742122888565063, "learning_rate": 9.560767371114809e-06, "loss": 0.0122, "step": 26170 }, { "epoch": 0.44212517310095584, "grad_norm": 0.26341545581817627, "learning_rate": 9.560163158381894e-06, "loss": 0.0157, "step": 26180 }, { "epoch": 0.44229405208227784, "grad_norm": 0.25384917855262756, "learning_rate": 9.55955854947318e-06, "loss": 0.0148, "step": 26190 }, { "epoch": 0.44246293106359985, "grad_norm": 0.3641592264175415, "learning_rate": 9.558953544441192e-06, "loss": 0.0132, "step": 26200 }, { "epoch": 0.4426318100449218, "grad_norm": 0.5477011203765869, "learning_rate": 9.55834814333849e-06, "loss": 0.0167, "step": 26210 }, { "epoch": 0.4428006890262438, "grad_norm": 0.35231783986091614, "learning_rate": 9.557742346217673e-06, "loss": 0.0109, "step": 26220 }, { "epoch": 0.44296956800756576, "grad_norm": 0.20018072426319122, "learning_rate": 9.557136153131369e-06, "loss": 0.014, "step": 26230 }, { "epoch": 0.44313844698888777, "grad_norm": 0.8295250535011292, "learning_rate": 9.55652956413224e-06, "loss": 0.0158, "step": 26240 }, { "epoch": 0.4433073259702097, "grad_norm": 0.16604158282279968, "learning_rate": 9.55592257927299e-06, "loss": 0.0142, "step": 26250 }, { "epoch": 0.4434762049515317, "grad_norm": 0.37955567240715027, "learning_rate": 9.555315198606347e-06, "loss": 0.0172, "step": 26260 }, { "epoch": 0.44364508393285373, "grad_norm": 0.3289549648761749, "learning_rate": 9.554707422185086e-06, "loss": 0.0157, "step": 26270 }, { "epoch": 0.4438139629141757, "grad_norm": 0.4728393256664276, "learning_rate": 9.554099250062003e-06, "loss": 0.0152, "step": 26280 }, { "epoch": 0.4439828418954977, "grad_norm": 0.3184094727039337, "learning_rate": 9.553490682289935e-06, "loss": 0.0104, "step": 26290 }, { "epoch": 0.44415172087681964, "grad_norm": 0.22938072681427002, "learning_rate": 9.552881718921756e-06, "loss": 0.0138, "step": 26300 }, { "epoch": 0.44432059985814165, "grad_norm": 0.22685837745666504, "learning_rate": 9.552272360010368e-06, "loss": 0.0243, "step": 26310 }, { "epoch": 0.44448947883946366, "grad_norm": 0.8993532657623291, "learning_rate": 9.551662605608714e-06, "loss": 0.0119, "step": 26320 }, { "epoch": 0.4446583578207856, "grad_norm": 0.33938002586364746, "learning_rate": 9.551052455769765e-06, "loss": 0.0106, "step": 26330 }, { "epoch": 0.4448272368021076, "grad_norm": 0.45894935727119446, "learning_rate": 9.55044191054653e-06, "loss": 0.0142, "step": 26340 }, { "epoch": 0.44499611578342957, "grad_norm": 0.5022491216659546, "learning_rate": 9.549830969992055e-06, "loss": 0.0167, "step": 26350 }, { "epoch": 0.4451649947647516, "grad_norm": 0.35338494181632996, "learning_rate": 9.54921963415941e-06, "loss": 0.0133, "step": 26360 }, { "epoch": 0.4453338737460736, "grad_norm": 0.6328821778297424, "learning_rate": 9.548607903101715e-06, "loss": 0.0101, "step": 26370 }, { "epoch": 0.44550275272739553, "grad_norm": 0.4093334972858429, "learning_rate": 9.547995776872107e-06, "loss": 0.0114, "step": 26380 }, { "epoch": 0.44567163170871754, "grad_norm": 0.24997510015964508, "learning_rate": 9.547383255523771e-06, "loss": 0.0124, "step": 26390 }, { "epoch": 0.4458405106900395, "grad_norm": 0.4333403408527374, "learning_rate": 9.546770339109923e-06, "loss": 0.0201, "step": 26400 }, { "epoch": 0.4460093896713615, "grad_norm": 0.42311176657676697, "learning_rate": 9.546157027683807e-06, "loss": 0.0121, "step": 26410 }, { "epoch": 0.4461782686526835, "grad_norm": 0.26926666498184204, "learning_rate": 9.545543321298709e-06, "loss": 0.0124, "step": 26420 }, { "epoch": 0.44634714763400546, "grad_norm": 0.5991809368133545, "learning_rate": 9.544929220007946e-06, "loss": 0.0149, "step": 26430 }, { "epoch": 0.44651602661532747, "grad_norm": 0.33058810234069824, "learning_rate": 9.544314723864868e-06, "loss": 0.0199, "step": 26440 }, { "epoch": 0.4466849055966494, "grad_norm": 0.6107132434844971, "learning_rate": 9.543699832922863e-06, "loss": 0.0146, "step": 26450 }, { "epoch": 0.4468537845779714, "grad_norm": 0.5208635926246643, "learning_rate": 9.543084547235349e-06, "loss": 0.0161, "step": 26460 }, { "epoch": 0.44702266355929343, "grad_norm": 0.23450714349746704, "learning_rate": 9.542468866855783e-06, "loss": 0.0143, "step": 26470 }, { "epoch": 0.4471915425406154, "grad_norm": 0.3144867420196533, "learning_rate": 9.54185279183765e-06, "loss": 0.0192, "step": 26480 }, { "epoch": 0.4473604215219374, "grad_norm": 0.6343750953674316, "learning_rate": 9.541236322234479e-06, "loss": 0.015, "step": 26490 }, { "epoch": 0.44752930050325934, "grad_norm": 0.30797815322875977, "learning_rate": 9.540619458099824e-06, "loss": 0.0138, "step": 26500 }, { "epoch": 0.44769817948458135, "grad_norm": 0.28496691584587097, "learning_rate": 9.540002199487275e-06, "loss": 0.0154, "step": 26510 }, { "epoch": 0.44786705846590336, "grad_norm": 0.741199791431427, "learning_rate": 9.539384546450462e-06, "loss": 0.0133, "step": 26520 }, { "epoch": 0.4480359374472253, "grad_norm": 0.18239901959896088, "learning_rate": 9.53876649904304e-06, "loss": 0.0108, "step": 26530 }, { "epoch": 0.4482048164285473, "grad_norm": 0.2890654504299164, "learning_rate": 9.538148057318708e-06, "loss": 0.011, "step": 26540 }, { "epoch": 0.44837369540986927, "grad_norm": 0.6393955945968628, "learning_rate": 9.537529221331192e-06, "loss": 0.022, "step": 26550 }, { "epoch": 0.4485425743911913, "grad_norm": 0.24101217091083527, "learning_rate": 9.536909991134256e-06, "loss": 0.0136, "step": 26560 }, { "epoch": 0.4487114533725133, "grad_norm": 0.42668139934539795, "learning_rate": 9.536290366781697e-06, "loss": 0.0219, "step": 26570 }, { "epoch": 0.44888033235383523, "grad_norm": 0.2818431556224823, "learning_rate": 9.535670348327347e-06, "loss": 0.0158, "step": 26580 }, { "epoch": 0.44904921133515724, "grad_norm": 0.21462874114513397, "learning_rate": 9.535049935825072e-06, "loss": 0.0152, "step": 26590 }, { "epoch": 0.4492180903164792, "grad_norm": 0.35662850737571716, "learning_rate": 9.534429129328769e-06, "loss": 0.0159, "step": 26600 }, { "epoch": 0.4493869692978012, "grad_norm": 0.8232488632202148, "learning_rate": 9.533807928892376e-06, "loss": 0.0193, "step": 26610 }, { "epoch": 0.4495558482791232, "grad_norm": 0.3893921971321106, "learning_rate": 9.53318633456986e-06, "loss": 0.0154, "step": 26620 }, { "epoch": 0.44972472726044516, "grad_norm": 0.33084994554519653, "learning_rate": 9.53256434641522e-06, "loss": 0.0151, "step": 26630 }, { "epoch": 0.44989360624176716, "grad_norm": 0.3978137671947479, "learning_rate": 9.5319419644825e-06, "loss": 0.0175, "step": 26640 }, { "epoch": 0.4500624852230891, "grad_norm": 0.6095555424690247, "learning_rate": 9.531319188825765e-06, "loss": 0.0171, "step": 26650 }, { "epoch": 0.4502313642044111, "grad_norm": 0.6134287714958191, "learning_rate": 9.530696019499124e-06, "loss": 0.0128, "step": 26660 }, { "epoch": 0.45040024318573313, "grad_norm": 0.38799166679382324, "learning_rate": 9.530072456556714e-06, "loss": 0.0175, "step": 26670 }, { "epoch": 0.4505691221670551, "grad_norm": 0.5400074124336243, "learning_rate": 9.529448500052707e-06, "loss": 0.0165, "step": 26680 }, { "epoch": 0.4507380011483771, "grad_norm": 0.4050469994544983, "learning_rate": 9.528824150041316e-06, "loss": 0.0135, "step": 26690 }, { "epoch": 0.45090688012969904, "grad_norm": 0.5345562696456909, "learning_rate": 9.528199406576779e-06, "loss": 0.013, "step": 26700 }, { "epoch": 0.45107575911102105, "grad_norm": 0.2422999143600464, "learning_rate": 9.527574269713373e-06, "loss": 0.0162, "step": 26710 }, { "epoch": 0.45124463809234305, "grad_norm": 0.37807419896125793, "learning_rate": 9.526948739505411e-06, "loss": 0.0223, "step": 26720 }, { "epoch": 0.451413517073665, "grad_norm": 0.2656182050704956, "learning_rate": 9.526322816007232e-06, "loss": 0.011, "step": 26730 }, { "epoch": 0.451582396054987, "grad_norm": 0.3770647943019867, "learning_rate": 9.52569649927322e-06, "loss": 0.0108, "step": 26740 }, { "epoch": 0.45175127503630896, "grad_norm": 0.6738564968109131, "learning_rate": 9.525069789357786e-06, "loss": 0.0205, "step": 26750 }, { "epoch": 0.45192015401763097, "grad_norm": 0.31082624197006226, "learning_rate": 9.524442686315376e-06, "loss": 0.0163, "step": 26760 }, { "epoch": 0.452089032998953, "grad_norm": 0.20740343630313873, "learning_rate": 9.523815190200472e-06, "loss": 0.0098, "step": 26770 }, { "epoch": 0.45225791198027493, "grad_norm": 0.22319597005844116, "learning_rate": 9.523187301067588e-06, "loss": 0.0167, "step": 26780 }, { "epoch": 0.45242679096159694, "grad_norm": 0.37125077843666077, "learning_rate": 9.522559018971279e-06, "loss": 0.0113, "step": 26790 }, { "epoch": 0.4525956699429189, "grad_norm": 0.5613295435905457, "learning_rate": 9.521930343966119e-06, "loss": 0.0169, "step": 26800 }, { "epoch": 0.4527645489242409, "grad_norm": 0.22383475303649902, "learning_rate": 9.521301276106734e-06, "loss": 0.0133, "step": 26810 }, { "epoch": 0.45293342790556285, "grad_norm": 0.3214682340621948, "learning_rate": 9.520671815447772e-06, "loss": 0.0167, "step": 26820 }, { "epoch": 0.45310230688688485, "grad_norm": 0.36937302350997925, "learning_rate": 9.520041962043922e-06, "loss": 0.017, "step": 26830 }, { "epoch": 0.45327118586820686, "grad_norm": 0.32778021693229675, "learning_rate": 9.519411715949902e-06, "loss": 0.0153, "step": 26840 }, { "epoch": 0.4534400648495288, "grad_norm": 0.2544723451137543, "learning_rate": 9.518781077220464e-06, "loss": 0.0177, "step": 26850 }, { "epoch": 0.4536089438308508, "grad_norm": 0.22988148033618927, "learning_rate": 9.5181500459104e-06, "loss": 0.02, "step": 26860 }, { "epoch": 0.45377782281217277, "grad_norm": 0.5068885684013367, "learning_rate": 9.517518622074531e-06, "loss": 0.0125, "step": 26870 }, { "epoch": 0.4539467017934948, "grad_norm": 0.4262082576751709, "learning_rate": 9.516886805767715e-06, "loss": 0.0123, "step": 26880 }, { "epoch": 0.4541155807748168, "grad_norm": 0.17654213309288025, "learning_rate": 9.516254597044839e-06, "loss": 0.0114, "step": 26890 }, { "epoch": 0.45428445975613874, "grad_norm": 0.3569313883781433, "learning_rate": 9.515621995960831e-06, "loss": 0.0155, "step": 26900 }, { "epoch": 0.45445333873746074, "grad_norm": 0.4380335509777069, "learning_rate": 9.51498900257065e-06, "loss": 0.0158, "step": 26910 }, { "epoch": 0.4546222177187827, "grad_norm": 0.5103161334991455, "learning_rate": 9.514355616929286e-06, "loss": 0.0136, "step": 26920 }, { "epoch": 0.4547910967001047, "grad_norm": 0.2795758843421936, "learning_rate": 9.51372183909177e-06, "loss": 0.0107, "step": 26930 }, { "epoch": 0.4549599756814267, "grad_norm": 0.3956620693206787, "learning_rate": 9.513087669113162e-06, "loss": 0.0127, "step": 26940 }, { "epoch": 0.45512885466274866, "grad_norm": 0.34048035740852356, "learning_rate": 9.512453107048553e-06, "loss": 0.0109, "step": 26950 }, { "epoch": 0.45529773364407067, "grad_norm": 0.3299981951713562, "learning_rate": 9.511818152953076e-06, "loss": 0.0114, "step": 26960 }, { "epoch": 0.4554666126253926, "grad_norm": 0.5780040621757507, "learning_rate": 9.511182806881894e-06, "loss": 0.0247, "step": 26970 }, { "epoch": 0.4556354916067146, "grad_norm": 0.21386948227882385, "learning_rate": 9.510547068890203e-06, "loss": 0.0225, "step": 26980 }, { "epoch": 0.45580437058803663, "grad_norm": 0.2590557932853699, "learning_rate": 9.509910939033235e-06, "loss": 0.0125, "step": 26990 }, { "epoch": 0.4559732495693586, "grad_norm": 0.5035120248794556, "learning_rate": 9.509274417366258e-06, "loss": 0.015, "step": 27000 }, { "epoch": 0.4561421285506806, "grad_norm": 0.42010200023651123, "learning_rate": 9.508637503944566e-06, "loss": 0.02, "step": 27010 }, { "epoch": 0.45631100753200254, "grad_norm": 0.3305145502090454, "learning_rate": 9.508000198823498e-06, "loss": 0.0161, "step": 27020 }, { "epoch": 0.45647988651332455, "grad_norm": 0.5855815410614014, "learning_rate": 9.507362502058418e-06, "loss": 0.0137, "step": 27030 }, { "epoch": 0.45664876549464656, "grad_norm": 0.4518992602825165, "learning_rate": 9.50672441370473e-06, "loss": 0.0147, "step": 27040 }, { "epoch": 0.4568176444759685, "grad_norm": 0.4041357636451721, "learning_rate": 9.506085933817865e-06, "loss": 0.0116, "step": 27050 }, { "epoch": 0.4569865234572905, "grad_norm": 0.4744827151298523, "learning_rate": 9.505447062453298e-06, "loss": 0.0127, "step": 27060 }, { "epoch": 0.45715540243861247, "grad_norm": 0.25208234786987305, "learning_rate": 9.50480779966653e-06, "loss": 0.0118, "step": 27070 }, { "epoch": 0.4573242814199345, "grad_norm": 0.20936328172683716, "learning_rate": 9.504168145513098e-06, "loss": 0.0121, "step": 27080 }, { "epoch": 0.4574931604012565, "grad_norm": 0.43500736355781555, "learning_rate": 9.503528100048575e-06, "loss": 0.0139, "step": 27090 }, { "epoch": 0.45766203938257843, "grad_norm": 0.4873402416706085, "learning_rate": 9.502887663328567e-06, "loss": 0.0121, "step": 27100 }, { "epoch": 0.45783091836390044, "grad_norm": 0.2665565609931946, "learning_rate": 9.50224683540871e-06, "loss": 0.0118, "step": 27110 }, { "epoch": 0.4579997973452224, "grad_norm": 0.6513916850090027, "learning_rate": 9.50160561634468e-06, "loss": 0.0174, "step": 27120 }, { "epoch": 0.4581686763265444, "grad_norm": 0.10422198474407196, "learning_rate": 9.500964006192188e-06, "loss": 0.0137, "step": 27130 }, { "epoch": 0.4583375553078664, "grad_norm": 0.29074323177337646, "learning_rate": 9.50032200500697e-06, "loss": 0.0189, "step": 27140 }, { "epoch": 0.45850643428918836, "grad_norm": 0.41039010882377625, "learning_rate": 9.499679612844803e-06, "loss": 0.0146, "step": 27150 }, { "epoch": 0.45867531327051037, "grad_norm": 0.7159669995307922, "learning_rate": 9.499036829761498e-06, "loss": 0.01, "step": 27160 }, { "epoch": 0.4588441922518323, "grad_norm": 0.24955511093139648, "learning_rate": 9.498393655812899e-06, "loss": 0.0104, "step": 27170 }, { "epoch": 0.4590130712331543, "grad_norm": 0.3594813048839569, "learning_rate": 9.49775009105488e-06, "loss": 0.0208, "step": 27180 }, { "epoch": 0.45918195021447633, "grad_norm": 0.6040303707122803, "learning_rate": 9.497106135543356e-06, "loss": 0.0139, "step": 27190 }, { "epoch": 0.4593508291957983, "grad_norm": 0.43644312024116516, "learning_rate": 9.496461789334269e-06, "loss": 0.0112, "step": 27200 }, { "epoch": 0.4595197081771203, "grad_norm": 0.3304331600666046, "learning_rate": 9.495817052483601e-06, "loss": 0.0132, "step": 27210 }, { "epoch": 0.45968858715844224, "grad_norm": 0.43185150623321533, "learning_rate": 9.495171925047363e-06, "loss": 0.0153, "step": 27220 }, { "epoch": 0.45985746613976425, "grad_norm": 0.24982045590877533, "learning_rate": 9.494526407081603e-06, "loss": 0.0149, "step": 27230 }, { "epoch": 0.46002634512108626, "grad_norm": 0.4698870778083801, "learning_rate": 9.493880498642403e-06, "loss": 0.0159, "step": 27240 }, { "epoch": 0.4601952241024082, "grad_norm": 0.273215115070343, "learning_rate": 9.493234199785874e-06, "loss": 0.0264, "step": 27250 }, { "epoch": 0.4603641030837302, "grad_norm": 0.35807961225509644, "learning_rate": 9.492587510568171e-06, "loss": 0.0157, "step": 27260 }, { "epoch": 0.46053298206505217, "grad_norm": 0.45991265773773193, "learning_rate": 9.491940431045472e-06, "loss": 0.0156, "step": 27270 }, { "epoch": 0.4607018610463742, "grad_norm": 0.10596639662981033, "learning_rate": 9.491292961273997e-06, "loss": 0.0098, "step": 27280 }, { "epoch": 0.4608707400276962, "grad_norm": 0.49324339628219604, "learning_rate": 9.490645101309994e-06, "loss": 0.0148, "step": 27290 }, { "epoch": 0.46103961900901813, "grad_norm": 0.12409268319606781, "learning_rate": 9.489996851209748e-06, "loss": 0.0185, "step": 27300 }, { "epoch": 0.46120849799034014, "grad_norm": 0.4632353186607361, "learning_rate": 9.489348211029577e-06, "loss": 0.0164, "step": 27310 }, { "epoch": 0.4613773769716621, "grad_norm": 0.2607763707637787, "learning_rate": 9.488699180825835e-06, "loss": 0.0184, "step": 27320 }, { "epoch": 0.4615462559529841, "grad_norm": 0.4520711898803711, "learning_rate": 9.488049760654908e-06, "loss": 0.0156, "step": 27330 }, { "epoch": 0.4617151349343061, "grad_norm": 0.4955592751502991, "learning_rate": 9.487399950573213e-06, "loss": 0.017, "step": 27340 }, { "epoch": 0.46188401391562806, "grad_norm": 0.3537103533744812, "learning_rate": 9.486749750637207e-06, "loss": 0.0162, "step": 27350 }, { "epoch": 0.46205289289695006, "grad_norm": 0.5241483449935913, "learning_rate": 9.486099160903378e-06, "loss": 0.0171, "step": 27360 }, { "epoch": 0.462221771878272, "grad_norm": 0.47511473298072815, "learning_rate": 9.485448181428245e-06, "loss": 0.017, "step": 27370 }, { "epoch": 0.462390650859594, "grad_norm": 0.3332519233226776, "learning_rate": 9.484796812268367e-06, "loss": 0.0146, "step": 27380 }, { "epoch": 0.462559529840916, "grad_norm": 0.3343372046947479, "learning_rate": 9.48414505348033e-06, "loss": 0.0157, "step": 27390 }, { "epoch": 0.462728408822238, "grad_norm": 0.35049691796302795, "learning_rate": 9.483492905120759e-06, "loss": 0.0128, "step": 27400 }, { "epoch": 0.46289728780356, "grad_norm": 0.45943087339401245, "learning_rate": 9.48284036724631e-06, "loss": 0.0161, "step": 27410 }, { "epoch": 0.46306616678488194, "grad_norm": 0.29565444588661194, "learning_rate": 9.482187439913674e-06, "loss": 0.0125, "step": 27420 }, { "epoch": 0.46323504576620395, "grad_norm": 0.4835597276687622, "learning_rate": 9.481534123179579e-06, "loss": 0.0138, "step": 27430 }, { "epoch": 0.4634039247475259, "grad_norm": 0.3447718024253845, "learning_rate": 9.480880417100778e-06, "loss": 0.0139, "step": 27440 }, { "epoch": 0.4635728037288479, "grad_norm": 0.22290189564228058, "learning_rate": 9.480226321734068e-06, "loss": 0.0136, "step": 27450 }, { "epoch": 0.4637416827101699, "grad_norm": 0.43101367354393005, "learning_rate": 9.479571837136272e-06, "loss": 0.0177, "step": 27460 }, { "epoch": 0.46391056169149186, "grad_norm": 0.32157760858535767, "learning_rate": 9.478916963364253e-06, "loss": 0.0159, "step": 27470 }, { "epoch": 0.46407944067281387, "grad_norm": 0.29373571276664734, "learning_rate": 9.4782617004749e-06, "loss": 0.0155, "step": 27480 }, { "epoch": 0.4642483196541358, "grad_norm": 0.35171744227409363, "learning_rate": 9.477606048525147e-06, "loss": 0.0145, "step": 27490 }, { "epoch": 0.46441719863545783, "grad_norm": 0.4433634877204895, "learning_rate": 9.476950007571952e-06, "loss": 0.0176, "step": 27500 }, { "epoch": 0.46458607761677984, "grad_norm": 0.5092090368270874, "learning_rate": 9.47629357767231e-06, "loss": 0.018, "step": 27510 }, { "epoch": 0.4647549565981018, "grad_norm": 0.7164885401725769, "learning_rate": 9.475636758883251e-06, "loss": 0.0145, "step": 27520 }, { "epoch": 0.4649238355794238, "grad_norm": 0.2787405848503113, "learning_rate": 9.474979551261836e-06, "loss": 0.0151, "step": 27530 }, { "epoch": 0.46509271456074575, "grad_norm": 0.23243039846420288, "learning_rate": 9.474321954865164e-06, "loss": 0.0206, "step": 27540 }, { "epoch": 0.46526159354206775, "grad_norm": 0.598538339138031, "learning_rate": 9.473663969750363e-06, "loss": 0.018, "step": 27550 }, { "epoch": 0.46543047252338976, "grad_norm": 0.5213698148727417, "learning_rate": 9.473005595974602e-06, "loss": 0.0192, "step": 27560 }, { "epoch": 0.4655993515047117, "grad_norm": 0.16841505467891693, "learning_rate": 9.472346833595073e-06, "loss": 0.0105, "step": 27570 }, { "epoch": 0.4657682304860337, "grad_norm": 0.2888849675655365, "learning_rate": 9.47168768266901e-06, "loss": 0.0138, "step": 27580 }, { "epoch": 0.46593710946735567, "grad_norm": 0.3202715516090393, "learning_rate": 9.471028143253678e-06, "loss": 0.0158, "step": 27590 }, { "epoch": 0.4661059884486777, "grad_norm": 0.420989453792572, "learning_rate": 9.470368215406378e-06, "loss": 0.0151, "step": 27600 }, { "epoch": 0.4662748674299997, "grad_norm": 0.36819949746131897, "learning_rate": 9.46970789918444e-06, "loss": 0.0201, "step": 27610 }, { "epoch": 0.46644374641132164, "grad_norm": 0.34218302369117737, "learning_rate": 9.469047194645233e-06, "loss": 0.0129, "step": 27620 }, { "epoch": 0.46661262539264364, "grad_norm": 0.3303963243961334, "learning_rate": 9.468386101846156e-06, "loss": 0.0203, "step": 27630 }, { "epoch": 0.4667815043739656, "grad_norm": 0.45301395654678345, "learning_rate": 9.467724620844645e-06, "loss": 0.0194, "step": 27640 }, { "epoch": 0.4669503833552876, "grad_norm": 0.21728995442390442, "learning_rate": 9.467062751698165e-06, "loss": 0.0139, "step": 27650 }, { "epoch": 0.4671192623366096, "grad_norm": 0.313338965177536, "learning_rate": 9.466400494464219e-06, "loss": 0.0194, "step": 27660 }, { "epoch": 0.46728814131793156, "grad_norm": 0.3941400945186615, "learning_rate": 9.465737849200343e-06, "loss": 0.0172, "step": 27670 }, { "epoch": 0.46745702029925357, "grad_norm": 0.19455619156360626, "learning_rate": 9.465074815964106e-06, "loss": 0.0145, "step": 27680 }, { "epoch": 0.4676258992805755, "grad_norm": 0.27325284481048584, "learning_rate": 9.46441139481311e-06, "loss": 0.0127, "step": 27690 }, { "epoch": 0.4677947782618975, "grad_norm": 0.35949215292930603, "learning_rate": 9.463747585804992e-06, "loss": 0.0186, "step": 27700 }, { "epoch": 0.46796365724321953, "grad_norm": 0.6025969982147217, "learning_rate": 9.46308338899742e-06, "loss": 0.0171, "step": 27710 }, { "epoch": 0.4681325362245415, "grad_norm": 0.416098952293396, "learning_rate": 9.4624188044481e-06, "loss": 0.0122, "step": 27720 }, { "epoch": 0.4683014152058635, "grad_norm": 0.3665541112422943, "learning_rate": 9.461753832214768e-06, "loss": 0.0151, "step": 27730 }, { "epoch": 0.46847029418718544, "grad_norm": 1.14806067943573, "learning_rate": 9.461088472355198e-06, "loss": 0.0144, "step": 27740 }, { "epoch": 0.46863917316850745, "grad_norm": 0.4330260157585144, "learning_rate": 9.460422724927192e-06, "loss": 0.0171, "step": 27750 }, { "epoch": 0.46880805214982946, "grad_norm": 0.34220534563064575, "learning_rate": 9.459756589988589e-06, "loss": 0.017, "step": 27760 }, { "epoch": 0.4689769311311514, "grad_norm": 0.22302955389022827, "learning_rate": 9.459090067597263e-06, "loss": 0.0147, "step": 27770 }, { "epoch": 0.4691458101124734, "grad_norm": 0.4062312841415405, "learning_rate": 9.458423157811117e-06, "loss": 0.0208, "step": 27780 }, { "epoch": 0.46931468909379537, "grad_norm": 0.2345236986875534, "learning_rate": 9.457755860688092e-06, "loss": 0.0156, "step": 27790 }, { "epoch": 0.4694835680751174, "grad_norm": 0.25078344345092773, "learning_rate": 9.45708817628616e-06, "loss": 0.0112, "step": 27800 }, { "epoch": 0.4696524470564394, "grad_norm": 0.3568890392780304, "learning_rate": 9.45642010466333e-06, "loss": 0.0136, "step": 27810 }, { "epoch": 0.46982132603776133, "grad_norm": 0.1950414627790451, "learning_rate": 9.455751645877641e-06, "loss": 0.0141, "step": 27820 }, { "epoch": 0.46999020501908334, "grad_norm": 0.23906779289245605, "learning_rate": 9.455082799987168e-06, "loss": 0.0156, "step": 27830 }, { "epoch": 0.4701590840004053, "grad_norm": 0.4416484236717224, "learning_rate": 9.454413567050016e-06, "loss": 0.0091, "step": 27840 }, { "epoch": 0.4703279629817273, "grad_norm": 0.6680873036384583, "learning_rate": 9.45374394712433e-06, "loss": 0.014, "step": 27850 }, { "epoch": 0.4704968419630493, "grad_norm": 0.42486727237701416, "learning_rate": 9.453073940268282e-06, "loss": 0.0218, "step": 27860 }, { "epoch": 0.47066572094437126, "grad_norm": 0.37532809376716614, "learning_rate": 9.452403546540084e-06, "loss": 0.0146, "step": 27870 }, { "epoch": 0.47083459992569326, "grad_norm": 0.3401181995868683, "learning_rate": 9.451732765997975e-06, "loss": 0.0141, "step": 27880 }, { "epoch": 0.4710034789070152, "grad_norm": 0.34011077880859375, "learning_rate": 9.45106159870023e-06, "loss": 0.0136, "step": 27890 }, { "epoch": 0.4711723578883372, "grad_norm": 0.07639192044734955, "learning_rate": 9.450390044705162e-06, "loss": 0.0188, "step": 27900 }, { "epoch": 0.4713412368696592, "grad_norm": 0.26928645372390747, "learning_rate": 9.449718104071111e-06, "loss": 0.0135, "step": 27910 }, { "epoch": 0.4715101158509812, "grad_norm": 0.21892300248146057, "learning_rate": 9.449045776856454e-06, "loss": 0.0128, "step": 27920 }, { "epoch": 0.4716789948323032, "grad_norm": 0.5342828631401062, "learning_rate": 9.448373063119603e-06, "loss": 0.0117, "step": 27930 }, { "epoch": 0.47184787381362514, "grad_norm": 0.37037393450737, "learning_rate": 9.447699962919e-06, "loss": 0.0184, "step": 27940 }, { "epoch": 0.47201675279494715, "grad_norm": 0.3148495852947235, "learning_rate": 9.447026476313122e-06, "loss": 0.0128, "step": 27950 }, { "epoch": 0.4721856317762691, "grad_norm": 0.22318750619888306, "learning_rate": 9.446352603360483e-06, "loss": 0.0167, "step": 27960 }, { "epoch": 0.4723545107575911, "grad_norm": 0.4411354959011078, "learning_rate": 9.445678344119622e-06, "loss": 0.0156, "step": 27970 }, { "epoch": 0.4725233897389131, "grad_norm": 0.12262539565563202, "learning_rate": 9.445003698649123e-06, "loss": 0.0125, "step": 27980 }, { "epoch": 0.47269226872023506, "grad_norm": 0.28029167652130127, "learning_rate": 9.444328667007592e-06, "loss": 0.0163, "step": 27990 }, { "epoch": 0.47286114770155707, "grad_norm": 0.3951111435890198, "learning_rate": 9.443653249253677e-06, "loss": 0.0163, "step": 28000 }, { "epoch": 0.473030026682879, "grad_norm": 0.3622283339500427, "learning_rate": 9.442977445446058e-06, "loss": 0.0149, "step": 28010 }, { "epoch": 0.47319890566420103, "grad_norm": 0.4707929790019989, "learning_rate": 9.442301255643445e-06, "loss": 0.0157, "step": 28020 }, { "epoch": 0.47336778464552304, "grad_norm": 0.1744052767753601, "learning_rate": 9.441624679904584e-06, "loss": 0.0125, "step": 28030 }, { "epoch": 0.473536663626845, "grad_norm": 0.39539748430252075, "learning_rate": 9.440947718288254e-06, "loss": 0.0131, "step": 28040 }, { "epoch": 0.473705542608167, "grad_norm": 0.33664727210998535, "learning_rate": 9.440270370853269e-06, "loss": 0.0174, "step": 28050 }, { "epoch": 0.47387442158948895, "grad_norm": 0.5080174803733826, "learning_rate": 9.439592637658473e-06, "loss": 0.0165, "step": 28060 }, { "epoch": 0.47404330057081095, "grad_norm": 0.3285943269729614, "learning_rate": 9.438914518762747e-06, "loss": 0.0211, "step": 28070 }, { "epoch": 0.47421217955213296, "grad_norm": 0.45568224787712097, "learning_rate": 9.438236014225006e-06, "loss": 0.0164, "step": 28080 }, { "epoch": 0.4743810585334549, "grad_norm": 0.367414265871048, "learning_rate": 9.437557124104196e-06, "loss": 0.0125, "step": 28090 }, { "epoch": 0.4745499375147769, "grad_norm": 0.21083134412765503, "learning_rate": 9.436877848459296e-06, "loss": 0.0126, "step": 28100 }, { "epoch": 0.47471881649609887, "grad_norm": 0.32897481322288513, "learning_rate": 9.436198187349321e-06, "loss": 0.0193, "step": 28110 }, { "epoch": 0.4748876954774209, "grad_norm": 0.13104422390460968, "learning_rate": 9.435518140833318e-06, "loss": 0.0162, "step": 28120 }, { "epoch": 0.4750565744587429, "grad_norm": 0.414958655834198, "learning_rate": 9.434837708970366e-06, "loss": 0.0158, "step": 28130 }, { "epoch": 0.47522545344006484, "grad_norm": 0.41889986395835876, "learning_rate": 9.434156891819582e-06, "loss": 0.014, "step": 28140 }, { "epoch": 0.47539433242138684, "grad_norm": 0.2821134626865387, "learning_rate": 9.433475689440113e-06, "loss": 0.0165, "step": 28150 }, { "epoch": 0.4755632114027088, "grad_norm": 0.28008487820625305, "learning_rate": 9.432794101891139e-06, "loss": 0.0159, "step": 28160 }, { "epoch": 0.4757320903840308, "grad_norm": 0.31518012285232544, "learning_rate": 9.432112129231876e-06, "loss": 0.0115, "step": 28170 }, { "epoch": 0.4759009693653528, "grad_norm": 0.7182185053825378, "learning_rate": 9.431429771521571e-06, "loss": 0.0154, "step": 28180 }, { "epoch": 0.47606984834667476, "grad_norm": 0.26181715726852417, "learning_rate": 9.430747028819506e-06, "loss": 0.0165, "step": 28190 }, { "epoch": 0.47623872732799677, "grad_norm": 0.25920066237449646, "learning_rate": 9.430063901184995e-06, "loss": 0.0174, "step": 28200 }, { "epoch": 0.4764076063093187, "grad_norm": 0.313600093126297, "learning_rate": 9.42938038867739e-06, "loss": 0.0148, "step": 28210 }, { "epoch": 0.47657648529064073, "grad_norm": 0.4897025227546692, "learning_rate": 9.428696491356072e-06, "loss": 0.0128, "step": 28220 }, { "epoch": 0.47674536427196273, "grad_norm": 0.3904273509979248, "learning_rate": 9.42801220928045e-06, "loss": 0.0117, "step": 28230 }, { "epoch": 0.4769142432532847, "grad_norm": 0.30740317702293396, "learning_rate": 9.42732754250998e-06, "loss": 0.0129, "step": 28240 }, { "epoch": 0.4770831222346067, "grad_norm": 0.5258448123931885, "learning_rate": 9.426642491104142e-06, "loss": 0.0113, "step": 28250 }, { "epoch": 0.47725200121592865, "grad_norm": 0.4636279344558716, "learning_rate": 9.425957055122451e-06, "loss": 0.0163, "step": 28260 }, { "epoch": 0.47742088019725065, "grad_norm": 0.4731503129005432, "learning_rate": 9.425271234624455e-06, "loss": 0.0142, "step": 28270 }, { "epoch": 0.47758975917857266, "grad_norm": 0.27981793880462646, "learning_rate": 9.42458502966974e-06, "loss": 0.0125, "step": 28280 }, { "epoch": 0.4777586381598946, "grad_norm": 0.2569150924682617, "learning_rate": 9.423898440317919e-06, "loss": 0.0184, "step": 28290 }, { "epoch": 0.4779275171412166, "grad_norm": 0.27429184317588806, "learning_rate": 9.423211466628641e-06, "loss": 0.0151, "step": 28300 }, { "epoch": 0.47809639612253857, "grad_norm": 0.3728543817996979, "learning_rate": 9.42252410866159e-06, "loss": 0.0127, "step": 28310 }, { "epoch": 0.4782652751038606, "grad_norm": 0.39093658328056335, "learning_rate": 9.42183636647648e-06, "loss": 0.0149, "step": 28320 }, { "epoch": 0.4784341540851826, "grad_norm": 0.23667994141578674, "learning_rate": 9.421148240133063e-06, "loss": 0.0094, "step": 28330 }, { "epoch": 0.47860303306650454, "grad_norm": 0.29413530230522156, "learning_rate": 9.420459729691119e-06, "loss": 0.0194, "step": 28340 }, { "epoch": 0.47877191204782654, "grad_norm": 0.2955327033996582, "learning_rate": 9.419770835210467e-06, "loss": 0.0174, "step": 28350 }, { "epoch": 0.4789407910291485, "grad_norm": 0.30983036756515503, "learning_rate": 9.419081556750954e-06, "loss": 0.0155, "step": 28360 }, { "epoch": 0.4791096700104705, "grad_norm": 0.37345126271247864, "learning_rate": 9.418391894372463e-06, "loss": 0.0136, "step": 28370 }, { "epoch": 0.4792785489917925, "grad_norm": 0.2864719033241272, "learning_rate": 9.417701848134912e-06, "loss": 0.0154, "step": 28380 }, { "epoch": 0.47944742797311446, "grad_norm": 0.40875864028930664, "learning_rate": 9.41701141809825e-06, "loss": 0.0142, "step": 28390 }, { "epoch": 0.47961630695443647, "grad_norm": 0.26205602288246155, "learning_rate": 9.416320604322458e-06, "loss": 0.0094, "step": 28400 }, { "epoch": 0.4797851859357584, "grad_norm": 0.1287785768508911, "learning_rate": 9.415629406867554e-06, "loss": 0.0115, "step": 28410 }, { "epoch": 0.4799540649170804, "grad_norm": 0.4243737459182739, "learning_rate": 9.414937825793586e-06, "loss": 0.025, "step": 28420 }, { "epoch": 0.48012294389840243, "grad_norm": 0.15742017328739166, "learning_rate": 9.41424586116064e-06, "loss": 0.0149, "step": 28430 }, { "epoch": 0.4802918228797244, "grad_norm": 0.9734819531440735, "learning_rate": 9.41355351302883e-06, "loss": 0.016, "step": 28440 }, { "epoch": 0.4804607018610464, "grad_norm": 0.19299717247486115, "learning_rate": 9.412860781458304e-06, "loss": 0.0215, "step": 28450 }, { "epoch": 0.48062958084236834, "grad_norm": 0.37051111459732056, "learning_rate": 9.41216766650925e-06, "loss": 0.0139, "step": 28460 }, { "epoch": 0.48079845982369035, "grad_norm": 0.3610552251338959, "learning_rate": 9.411474168241877e-06, "loss": 0.0176, "step": 28470 }, { "epoch": 0.4809673388050123, "grad_norm": 0.6716757416725159, "learning_rate": 9.410780286716438e-06, "loss": 0.0144, "step": 28480 }, { "epoch": 0.4811362177863343, "grad_norm": 0.1809031367301941, "learning_rate": 9.410086021993217e-06, "loss": 0.0113, "step": 28490 }, { "epoch": 0.4813050967676563, "grad_norm": 0.22418951988220215, "learning_rate": 9.409391374132529e-06, "loss": 0.0105, "step": 28500 }, { "epoch": 0.48147397574897827, "grad_norm": 0.5349839329719543, "learning_rate": 9.408696343194722e-06, "loss": 0.0189, "step": 28510 }, { "epoch": 0.4816428547303003, "grad_norm": 0.08435141295194626, "learning_rate": 9.40800092924018e-06, "loss": 0.0099, "step": 28520 }, { "epoch": 0.4818117337116222, "grad_norm": 0.41056007146835327, "learning_rate": 9.407305132329319e-06, "loss": 0.0157, "step": 28530 }, { "epoch": 0.48198061269294423, "grad_norm": 0.43929946422576904, "learning_rate": 9.406608952522587e-06, "loss": 0.0143, "step": 28540 }, { "epoch": 0.48214949167426624, "grad_norm": 0.38167956471443176, "learning_rate": 9.405912389880469e-06, "loss": 0.0162, "step": 28550 }, { "epoch": 0.4823183706555882, "grad_norm": 0.3792663812637329, "learning_rate": 9.405215444463475e-06, "loss": 0.0199, "step": 28560 }, { "epoch": 0.4824872496369102, "grad_norm": 0.39999693632125854, "learning_rate": 9.404518116332163e-06, "loss": 0.0163, "step": 28570 }, { "epoch": 0.48265612861823215, "grad_norm": 0.36150676012039185, "learning_rate": 9.403820405547104e-06, "loss": 0.0131, "step": 28580 }, { "epoch": 0.48282500759955416, "grad_norm": 0.1631571501493454, "learning_rate": 9.403122312168922e-06, "loss": 0.0152, "step": 28590 }, { "epoch": 0.48299388658087616, "grad_norm": 0.29915836453437805, "learning_rate": 9.402423836258262e-06, "loss": 0.0125, "step": 28600 }, { "epoch": 0.4831627655621981, "grad_norm": 0.42582517862319946, "learning_rate": 9.401724977875808e-06, "loss": 0.0124, "step": 28610 }, { "epoch": 0.4833316445435201, "grad_norm": 0.4201221168041229, "learning_rate": 9.401025737082274e-06, "loss": 0.0117, "step": 28620 }, { "epoch": 0.4835005235248421, "grad_norm": 0.48225638270378113, "learning_rate": 9.400326113938406e-06, "loss": 0.0129, "step": 28630 }, { "epoch": 0.4836694025061641, "grad_norm": 0.42771637439727783, "learning_rate": 9.39962610850499e-06, "loss": 0.0119, "step": 28640 }, { "epoch": 0.4838382814874861, "grad_norm": 0.456990510225296, "learning_rate": 9.398925720842837e-06, "loss": 0.015, "step": 28650 }, { "epoch": 0.48400716046880804, "grad_norm": 0.753706157207489, "learning_rate": 9.398224951012797e-06, "loss": 0.015, "step": 28660 }, { "epoch": 0.48417603945013005, "grad_norm": 0.508696973323822, "learning_rate": 9.39752379907575e-06, "loss": 0.0156, "step": 28670 }, { "epoch": 0.484344918431452, "grad_norm": 0.48223522305488586, "learning_rate": 9.396822265092611e-06, "loss": 0.0169, "step": 28680 }, { "epoch": 0.484513797412774, "grad_norm": 0.5040777921676636, "learning_rate": 9.396120349124327e-06, "loss": 0.0196, "step": 28690 }, { "epoch": 0.484682676394096, "grad_norm": 0.3110726475715637, "learning_rate": 9.395418051231882e-06, "loss": 0.0126, "step": 28700 }, { "epoch": 0.48485155537541796, "grad_norm": 0.5923696756362915, "learning_rate": 9.394715371476283e-06, "loss": 0.0121, "step": 28710 }, { "epoch": 0.48502043435673997, "grad_norm": 0.4792080521583557, "learning_rate": 9.394012309918584e-06, "loss": 0.0138, "step": 28720 }, { "epoch": 0.4851893133380619, "grad_norm": 0.4469783902168274, "learning_rate": 9.393308866619861e-06, "loss": 0.0196, "step": 28730 }, { "epoch": 0.48535819231938393, "grad_norm": 0.3940734565258026, "learning_rate": 9.39260504164123e-06, "loss": 0.0171, "step": 28740 }, { "epoch": 0.48552707130070594, "grad_norm": 0.297273725271225, "learning_rate": 9.391900835043837e-06, "loss": 0.0124, "step": 28750 }, { "epoch": 0.4856959502820279, "grad_norm": 0.3750153183937073, "learning_rate": 9.39119624688886e-06, "loss": 0.0124, "step": 28760 }, { "epoch": 0.4858648292633499, "grad_norm": 0.4958670735359192, "learning_rate": 9.390491277237514e-06, "loss": 0.0122, "step": 28770 }, { "epoch": 0.48603370824467185, "grad_norm": 0.11462567001581192, "learning_rate": 9.389785926151042e-06, "loss": 0.0099, "step": 28780 }, { "epoch": 0.48620258722599385, "grad_norm": 0.4603767395019531, "learning_rate": 9.389080193690727e-06, "loss": 0.0154, "step": 28790 }, { "epoch": 0.48637146620731586, "grad_norm": 0.5498762726783752, "learning_rate": 9.38837407991788e-06, "loss": 0.0161, "step": 28800 }, { "epoch": 0.4865403451886378, "grad_norm": 0.43908488750457764, "learning_rate": 9.387667584893848e-06, "loss": 0.0131, "step": 28810 }, { "epoch": 0.4867092241699598, "grad_norm": 0.35526782274246216, "learning_rate": 9.386960708680004e-06, "loss": 0.0138, "step": 28820 }, { "epoch": 0.48687810315128177, "grad_norm": 0.297313928604126, "learning_rate": 9.386253451337765e-06, "loss": 0.0122, "step": 28830 }, { "epoch": 0.4870469821326038, "grad_norm": 0.7288819551467896, "learning_rate": 9.385545812928573e-06, "loss": 0.0188, "step": 28840 }, { "epoch": 0.4872158611139258, "grad_norm": 0.35737985372543335, "learning_rate": 9.384837793513909e-06, "loss": 0.0155, "step": 28850 }, { "epoch": 0.48738474009524774, "grad_norm": 0.22239923477172852, "learning_rate": 9.38412939315528e-06, "loss": 0.0179, "step": 28860 }, { "epoch": 0.48755361907656974, "grad_norm": 0.33121854066848755, "learning_rate": 9.383420611914233e-06, "loss": 0.0165, "step": 28870 }, { "epoch": 0.4877224980578917, "grad_norm": 0.44264379143714905, "learning_rate": 9.382711449852345e-06, "loss": 0.0146, "step": 28880 }, { "epoch": 0.4878913770392137, "grad_norm": 0.7240524291992188, "learning_rate": 9.382001907031227e-06, "loss": 0.0103, "step": 28890 }, { "epoch": 0.4880602560205357, "grad_norm": 0.31083106994628906, "learning_rate": 9.381291983512516e-06, "loss": 0.0151, "step": 28900 }, { "epoch": 0.48822913500185766, "grad_norm": 0.27454864978790283, "learning_rate": 9.380581679357896e-06, "loss": 0.0173, "step": 28910 }, { "epoch": 0.48839801398317967, "grad_norm": 0.5086013674736023, "learning_rate": 9.379870994629075e-06, "loss": 0.013, "step": 28920 }, { "epoch": 0.4885668929645016, "grad_norm": 0.3493403196334839, "learning_rate": 9.379159929387793e-06, "loss": 0.0156, "step": 28930 }, { "epoch": 0.4887357719458236, "grad_norm": 0.28934091329574585, "learning_rate": 9.378448483695826e-06, "loss": 0.0175, "step": 28940 }, { "epoch": 0.48890465092714563, "grad_norm": 0.3273850679397583, "learning_rate": 9.377736657614986e-06, "loss": 0.012, "step": 28950 }, { "epoch": 0.4890735299084676, "grad_norm": 0.30846327543258667, "learning_rate": 9.377024451207111e-06, "loss": 0.0183, "step": 28960 }, { "epoch": 0.4892424088897896, "grad_norm": 0.2935860753059387, "learning_rate": 9.376311864534075e-06, "loss": 0.0145, "step": 28970 }, { "epoch": 0.48941128787111154, "grad_norm": 0.44620949029922485, "learning_rate": 9.37559889765779e-06, "loss": 0.0114, "step": 28980 }, { "epoch": 0.48958016685243355, "grad_norm": 0.2991151511669159, "learning_rate": 9.374885550640194e-06, "loss": 0.0176, "step": 28990 }, { "epoch": 0.4897490458337555, "grad_norm": 0.5737714767456055, "learning_rate": 9.374171823543263e-06, "loss": 0.0179, "step": 29000 }, { "epoch": 0.4899179248150775, "grad_norm": 0.14395126700401306, "learning_rate": 9.373457716429001e-06, "loss": 0.013, "step": 29010 }, { "epoch": 0.4900868037963995, "grad_norm": 0.4268141984939575, "learning_rate": 9.37274322935945e-06, "loss": 0.0137, "step": 29020 }, { "epoch": 0.49025568277772147, "grad_norm": 0.4408770203590393, "learning_rate": 9.37202836239668e-06, "loss": 0.0152, "step": 29030 }, { "epoch": 0.4904245617590435, "grad_norm": 0.384721040725708, "learning_rate": 9.371313115602801e-06, "loss": 0.0112, "step": 29040 }, { "epoch": 0.4905934407403654, "grad_norm": 0.2296638935804367, "learning_rate": 9.370597489039949e-06, "loss": 0.0109, "step": 29050 }, { "epoch": 0.49076231972168743, "grad_norm": 0.5707676410675049, "learning_rate": 9.369881482770297e-06, "loss": 0.0137, "step": 29060 }, { "epoch": 0.49093119870300944, "grad_norm": 0.313325971364975, "learning_rate": 9.36916509685605e-06, "loss": 0.0149, "step": 29070 }, { "epoch": 0.4911000776843314, "grad_norm": 0.23774565756320953, "learning_rate": 9.368448331359444e-06, "loss": 0.0121, "step": 29080 }, { "epoch": 0.4912689566656534, "grad_norm": 0.4295215904712677, "learning_rate": 9.367731186342752e-06, "loss": 0.0125, "step": 29090 }, { "epoch": 0.49143783564697535, "grad_norm": 0.3850870728492737, "learning_rate": 9.367013661868278e-06, "loss": 0.0111, "step": 29100 }, { "epoch": 0.49160671462829736, "grad_norm": 0.2887423038482666, "learning_rate": 9.366295757998358e-06, "loss": 0.0171, "step": 29110 }, { "epoch": 0.49177559360961937, "grad_norm": 0.25777414441108704, "learning_rate": 9.36557747479536e-06, "loss": 0.0122, "step": 29120 }, { "epoch": 0.4919444725909413, "grad_norm": 0.2789154052734375, "learning_rate": 9.36485881232169e-06, "loss": 0.0106, "step": 29130 }, { "epoch": 0.4921133515722633, "grad_norm": 0.256022572517395, "learning_rate": 9.36413977063978e-06, "loss": 0.0127, "step": 29140 }, { "epoch": 0.4922822305535853, "grad_norm": 0.3101920783519745, "learning_rate": 9.363420349812102e-06, "loss": 0.015, "step": 29150 }, { "epoch": 0.4924511095349073, "grad_norm": 0.143906369805336, "learning_rate": 9.362700549901156e-06, "loss": 0.014, "step": 29160 }, { "epoch": 0.4926199885162293, "grad_norm": 0.2753553092479706, "learning_rate": 9.361980370969477e-06, "loss": 0.0115, "step": 29170 }, { "epoch": 0.49278886749755124, "grad_norm": 0.4196864068508148, "learning_rate": 9.36125981307963e-06, "loss": 0.0161, "step": 29180 }, { "epoch": 0.49295774647887325, "grad_norm": 0.24024438858032227, "learning_rate": 9.360538876294219e-06, "loss": 0.0209, "step": 29190 }, { "epoch": 0.4931266254601952, "grad_norm": 0.2085026204586029, "learning_rate": 9.359817560675875e-06, "loss": 0.0123, "step": 29200 }, { "epoch": 0.4932955044415172, "grad_norm": 0.6031296253204346, "learning_rate": 9.359095866287264e-06, "loss": 0.0135, "step": 29210 }, { "epoch": 0.4934643834228392, "grad_norm": 0.25162023305892944, "learning_rate": 9.358373793191086e-06, "loss": 0.0155, "step": 29220 }, { "epoch": 0.49363326240416117, "grad_norm": 0.2952132225036621, "learning_rate": 9.357651341450072e-06, "loss": 0.0138, "step": 29230 }, { "epoch": 0.4938021413854832, "grad_norm": 0.598017692565918, "learning_rate": 9.356928511126987e-06, "loss": 0.019, "step": 29240 }, { "epoch": 0.4939710203668051, "grad_norm": 0.17065562307834625, "learning_rate": 9.35620530228463e-06, "loss": 0.0122, "step": 29250 }, { "epoch": 0.49413989934812713, "grad_norm": 0.4805447459220886, "learning_rate": 9.35548171498583e-06, "loss": 0.0175, "step": 29260 }, { "epoch": 0.49430877832944914, "grad_norm": 0.24006924033164978, "learning_rate": 9.354757749293451e-06, "loss": 0.0144, "step": 29270 }, { "epoch": 0.4944776573107711, "grad_norm": 1.0004881620407104, "learning_rate": 9.35403340527039e-06, "loss": 0.016, "step": 29280 }, { "epoch": 0.4946465362920931, "grad_norm": 0.5999182462692261, "learning_rate": 9.353308682979576e-06, "loss": 0.0127, "step": 29290 }, { "epoch": 0.49481541527341505, "grad_norm": 0.24101899564266205, "learning_rate": 9.352583582483971e-06, "loss": 0.019, "step": 29300 }, { "epoch": 0.49498429425473706, "grad_norm": 0.3995469808578491, "learning_rate": 9.351858103846569e-06, "loss": 0.0117, "step": 29310 }, { "epoch": 0.49515317323605906, "grad_norm": 0.5472136735916138, "learning_rate": 9.351132247130399e-06, "loss": 0.0148, "step": 29320 }, { "epoch": 0.495322052217381, "grad_norm": 0.18047958612442017, "learning_rate": 9.35040601239852e-06, "loss": 0.0135, "step": 29330 }, { "epoch": 0.495490931198703, "grad_norm": 0.2406819760799408, "learning_rate": 9.349679399714028e-06, "loss": 0.0189, "step": 29340 }, { "epoch": 0.495659810180025, "grad_norm": 0.5741304159164429, "learning_rate": 9.348952409140048e-06, "loss": 0.0137, "step": 29350 }, { "epoch": 0.495828689161347, "grad_norm": 0.44862937927246094, "learning_rate": 9.34822504073974e-06, "loss": 0.0114, "step": 29360 }, { "epoch": 0.495997568142669, "grad_norm": 0.4523482620716095, "learning_rate": 9.347497294576295e-06, "loss": 0.0148, "step": 29370 }, { "epoch": 0.49616644712399094, "grad_norm": 0.3486880362033844, "learning_rate": 9.346769170712936e-06, "loss": 0.0175, "step": 29380 }, { "epoch": 0.49633532610531295, "grad_norm": 0.4095533490180969, "learning_rate": 9.346040669212926e-06, "loss": 0.0146, "step": 29390 }, { "epoch": 0.4965042050866349, "grad_norm": 0.43609583377838135, "learning_rate": 9.34531179013955e-06, "loss": 0.0117, "step": 29400 }, { "epoch": 0.4966730840679569, "grad_norm": 0.21987412869930267, "learning_rate": 9.344582533556134e-06, "loss": 0.0196, "step": 29410 }, { "epoch": 0.4968419630492789, "grad_norm": 0.3992569148540497, "learning_rate": 9.343852899526033e-06, "loss": 0.0135, "step": 29420 }, { "epoch": 0.49701084203060086, "grad_norm": 0.19775770604610443, "learning_rate": 9.343122888112636e-06, "loss": 0.0108, "step": 29430 }, { "epoch": 0.49717972101192287, "grad_norm": 0.14553967118263245, "learning_rate": 9.342392499379365e-06, "loss": 0.0109, "step": 29440 }, { "epoch": 0.4973485999932448, "grad_norm": 0.43086230754852295, "learning_rate": 9.341661733389675e-06, "loss": 0.0167, "step": 29450 }, { "epoch": 0.49751747897456683, "grad_norm": 2.3452517986297607, "learning_rate": 9.340930590207051e-06, "loss": 0.0114, "step": 29460 }, { "epoch": 0.49768635795588884, "grad_norm": 0.35918447375297546, "learning_rate": 9.340199069895016e-06, "loss": 0.0149, "step": 29470 }, { "epoch": 0.4978552369372108, "grad_norm": 0.2726699411869049, "learning_rate": 9.339467172517119e-06, "loss": 0.0164, "step": 29480 }, { "epoch": 0.4980241159185328, "grad_norm": 0.3156254291534424, "learning_rate": 9.338734898136948e-06, "loss": 0.0128, "step": 29490 }, { "epoch": 0.49819299489985475, "grad_norm": 0.2334623783826828, "learning_rate": 9.33800224681812e-06, "loss": 0.0143, "step": 29500 }, { "epoch": 0.49836187388117675, "grad_norm": 0.3837800920009613, "learning_rate": 9.337269218624287e-06, "loss": 0.0129, "step": 29510 }, { "epoch": 0.49853075286249876, "grad_norm": 0.23565301299095154, "learning_rate": 9.336535813619132e-06, "loss": 0.0162, "step": 29520 }, { "epoch": 0.4986996318438207, "grad_norm": 0.39907604455947876, "learning_rate": 9.335802031866372e-06, "loss": 0.0127, "step": 29530 }, { "epoch": 0.4988685108251427, "grad_norm": 0.33353081345558167, "learning_rate": 9.335067873429754e-06, "loss": 0.0103, "step": 29540 }, { "epoch": 0.49903738980646467, "grad_norm": 0.24564525485038757, "learning_rate": 9.334333338373066e-06, "loss": 0.0095, "step": 29550 }, { "epoch": 0.4992062687877867, "grad_norm": 0.30693161487579346, "learning_rate": 9.333598426760116e-06, "loss": 0.0168, "step": 29560 }, { "epoch": 0.49937514776910863, "grad_norm": 0.49876871705055237, "learning_rate": 9.332863138654752e-06, "loss": 0.0222, "step": 29570 }, { "epoch": 0.49954402675043064, "grad_norm": 0.6496544480323792, "learning_rate": 9.332127474120858e-06, "loss": 0.0141, "step": 29580 }, { "epoch": 0.49971290573175264, "grad_norm": 0.3440602719783783, "learning_rate": 9.331391433222343e-06, "loss": 0.0104, "step": 29590 }, { "epoch": 0.4998817847130746, "grad_norm": 0.4481584131717682, "learning_rate": 9.330655016023153e-06, "loss": 0.0127, "step": 29600 }, { "epoch": 0.5000506636943965, "grad_norm": 0.3733558654785156, "learning_rate": 9.329918222587268e-06, "loss": 0.0127, "step": 29610 }, { "epoch": 0.5002195426757186, "grad_norm": 0.26031407713890076, "learning_rate": 9.329181052978699e-06, "loss": 0.0102, "step": 29620 }, { "epoch": 0.5003884216570406, "grad_norm": 0.11726045608520508, "learning_rate": 9.328443507261486e-06, "loss": 0.0086, "step": 29630 }, { "epoch": 0.5005573006383626, "grad_norm": 0.49254775047302246, "learning_rate": 9.32770558549971e-06, "loss": 0.0131, "step": 29640 }, { "epoch": 0.5007261796196846, "grad_norm": 0.36943358182907104, "learning_rate": 9.326967287757476e-06, "loss": 0.0146, "step": 29650 }, { "epoch": 0.5008950586010065, "grad_norm": 0.2441185712814331, "learning_rate": 9.326228614098926e-06, "loss": 0.013, "step": 29660 }, { "epoch": 0.5010639375823285, "grad_norm": 0.34175777435302734, "learning_rate": 9.325489564588234e-06, "loss": 0.0126, "step": 29670 }, { "epoch": 0.5012328165636505, "grad_norm": 0.3529607355594635, "learning_rate": 9.32475013928961e-06, "loss": 0.015, "step": 29680 }, { "epoch": 0.5014016955449725, "grad_norm": 0.3955096900463104, "learning_rate": 9.324010338267291e-06, "loss": 0.0191, "step": 29690 }, { "epoch": 0.5015705745262945, "grad_norm": 0.31022563576698303, "learning_rate": 9.323270161585549e-06, "loss": 0.0157, "step": 29700 }, { "epoch": 0.5017394535076164, "grad_norm": 0.2583446204662323, "learning_rate": 9.322529609308689e-06, "loss": 0.0134, "step": 29710 }, { "epoch": 0.5019083324889384, "grad_norm": 0.3616587519645691, "learning_rate": 9.321788681501047e-06, "loss": 0.014, "step": 29720 }, { "epoch": 0.5020772114702604, "grad_norm": 0.32924264669418335, "learning_rate": 9.321047378226995e-06, "loss": 0.013, "step": 29730 }, { "epoch": 0.5022460904515824, "grad_norm": 0.14346452057361603, "learning_rate": 9.320305699550935e-06, "loss": 0.0151, "step": 29740 }, { "epoch": 0.5024149694329044, "grad_norm": 0.14449605345726013, "learning_rate": 9.319563645537302e-06, "loss": 0.0103, "step": 29750 }, { "epoch": 0.5025838484142263, "grad_norm": 0.6154966950416565, "learning_rate": 9.318821216250565e-06, "loss": 0.0192, "step": 29760 }, { "epoch": 0.5027527273955483, "grad_norm": 0.26221534609794617, "learning_rate": 9.318078411755223e-06, "loss": 0.0173, "step": 29770 }, { "epoch": 0.5029216063768703, "grad_norm": 0.19222542643547058, "learning_rate": 9.31733523211581e-06, "loss": 0.0127, "step": 29780 }, { "epoch": 0.5030904853581923, "grad_norm": 0.3713700473308563, "learning_rate": 9.316591677396888e-06, "loss": 0.0142, "step": 29790 }, { "epoch": 0.5032593643395143, "grad_norm": 0.17332643270492554, "learning_rate": 9.315847747663062e-06, "loss": 0.0218, "step": 29800 }, { "epoch": 0.5034282433208362, "grad_norm": 0.3924306035041809, "learning_rate": 9.315103442978958e-06, "loss": 0.0191, "step": 29810 }, { "epoch": 0.5035971223021583, "grad_norm": 0.2656623423099518, "learning_rate": 9.31435876340924e-06, "loss": 0.0144, "step": 29820 }, { "epoch": 0.5037660012834803, "grad_norm": 0.3251796066761017, "learning_rate": 9.313613709018606e-06, "loss": 0.0109, "step": 29830 }, { "epoch": 0.5039348802648023, "grad_norm": 0.3587132394313812, "learning_rate": 9.31286827987178e-06, "loss": 0.0121, "step": 29840 }, { "epoch": 0.5041037592461243, "grad_norm": 0.23871827125549316, "learning_rate": 9.31212247603353e-06, "loss": 0.0086, "step": 29850 }, { "epoch": 0.5042726382274462, "grad_norm": 0.31967130303382874, "learning_rate": 9.311376297568642e-06, "loss": 0.0199, "step": 29860 }, { "epoch": 0.5044415172087682, "grad_norm": 0.47919902205467224, "learning_rate": 9.310629744541948e-06, "loss": 0.0123, "step": 29870 }, { "epoch": 0.5046103961900902, "grad_norm": 0.42347270250320435, "learning_rate": 9.309882817018304e-06, "loss": 0.0135, "step": 29880 }, { "epoch": 0.5047792751714122, "grad_norm": 0.4722088575363159, "learning_rate": 9.3091355150626e-06, "loss": 0.0128, "step": 29890 }, { "epoch": 0.5049481541527342, "grad_norm": 0.28346312046051025, "learning_rate": 9.308387838739763e-06, "loss": 0.0166, "step": 29900 }, { "epoch": 0.5051170331340561, "grad_norm": 0.11988615244626999, "learning_rate": 9.307639788114747e-06, "loss": 0.0119, "step": 29910 }, { "epoch": 0.5052859121153781, "grad_norm": 0.33640098571777344, "learning_rate": 9.306891363252542e-06, "loss": 0.0128, "step": 29920 }, { "epoch": 0.5054547910967001, "grad_norm": 0.40779417753219604, "learning_rate": 9.306142564218169e-06, "loss": 0.0143, "step": 29930 }, { "epoch": 0.5056236700780221, "grad_norm": 0.2447594553232193, "learning_rate": 9.305393391076682e-06, "loss": 0.015, "step": 29940 }, { "epoch": 0.5057925490593441, "grad_norm": 0.34052449464797974, "learning_rate": 9.304643843893167e-06, "loss": 0.0164, "step": 29950 }, { "epoch": 0.505961428040666, "grad_norm": 0.4789716899394989, "learning_rate": 9.30389392273274e-06, "loss": 0.0164, "step": 29960 }, { "epoch": 0.506130307021988, "grad_norm": 0.3300081193447113, "learning_rate": 9.303143627660559e-06, "loss": 0.0155, "step": 29970 }, { "epoch": 0.50629918600331, "grad_norm": 0.35322800278663635, "learning_rate": 9.302392958741801e-06, "loss": 0.0124, "step": 29980 }, { "epoch": 0.506468064984632, "grad_norm": 0.4541436433792114, "learning_rate": 9.301641916041688e-06, "loss": 0.0145, "step": 29990 }, { "epoch": 0.506636943965954, "grad_norm": 0.37867382168769836, "learning_rate": 9.300890499625464e-06, "loss": 0.015, "step": 30000 }, { "epoch": 0.5068058229472759, "grad_norm": 0.24657270312309265, "learning_rate": 9.300138709558412e-06, "loss": 0.0091, "step": 30010 }, { "epoch": 0.506974701928598, "grad_norm": 0.4337073266506195, "learning_rate": 9.29938654590585e-06, "loss": 0.0098, "step": 30020 }, { "epoch": 0.50714358090992, "grad_norm": 0.2574459910392761, "learning_rate": 9.298634008733115e-06, "loss": 0.0153, "step": 30030 }, { "epoch": 0.507312459891242, "grad_norm": 0.27473774552345276, "learning_rate": 9.297881098105593e-06, "loss": 0.0188, "step": 30040 }, { "epoch": 0.507481338872564, "grad_norm": 0.37280982732772827, "learning_rate": 9.297127814088692e-06, "loss": 0.0163, "step": 30050 }, { "epoch": 0.5076502178538859, "grad_norm": 0.6914968490600586, "learning_rate": 9.296374156747855e-06, "loss": 0.0129, "step": 30060 }, { "epoch": 0.5078190968352079, "grad_norm": 0.2348087728023529, "learning_rate": 9.295620126148559e-06, "loss": 0.0109, "step": 30070 }, { "epoch": 0.5079879758165299, "grad_norm": 0.3751346170902252, "learning_rate": 9.294865722356315e-06, "loss": 0.0159, "step": 30080 }, { "epoch": 0.5081568547978519, "grad_norm": 0.27336037158966064, "learning_rate": 9.294110945436658e-06, "loss": 0.0101, "step": 30090 }, { "epoch": 0.5083257337791739, "grad_norm": 0.3609543740749359, "learning_rate": 9.293355795455166e-06, "loss": 0.0174, "step": 30100 }, { "epoch": 0.5084946127604958, "grad_norm": 0.27312931418418884, "learning_rate": 9.292600272477443e-06, "loss": 0.0087, "step": 30110 }, { "epoch": 0.5086634917418178, "grad_norm": 0.5354933142662048, "learning_rate": 9.291844376569125e-06, "loss": 0.0154, "step": 30120 }, { "epoch": 0.5088323707231398, "grad_norm": 0.3058169484138489, "learning_rate": 9.291088107795887e-06, "loss": 0.0141, "step": 30130 }, { "epoch": 0.5090012497044618, "grad_norm": 0.1657126098871231, "learning_rate": 9.290331466223427e-06, "loss": 0.0154, "step": 30140 }, { "epoch": 0.5091701286857838, "grad_norm": 0.34583884477615356, "learning_rate": 9.289574451917482e-06, "loss": 0.0155, "step": 30150 }, { "epoch": 0.5093390076671057, "grad_norm": 0.25139284133911133, "learning_rate": 9.28881706494382e-06, "loss": 0.0179, "step": 30160 }, { "epoch": 0.5095078866484277, "grad_norm": 0.419965535402298, "learning_rate": 9.288059305368242e-06, "loss": 0.0134, "step": 30170 }, { "epoch": 0.5096767656297497, "grad_norm": 0.38628333806991577, "learning_rate": 9.287301173256577e-06, "loss": 0.0184, "step": 30180 }, { "epoch": 0.5098456446110717, "grad_norm": 0.3726312518119812, "learning_rate": 9.286542668674692e-06, "loss": 0.0096, "step": 30190 }, { "epoch": 0.5100145235923937, "grad_norm": 0.2589775025844574, "learning_rate": 9.285783791688484e-06, "loss": 0.0147, "step": 30200 }, { "epoch": 0.5101834025737156, "grad_norm": 0.3010457158088684, "learning_rate": 9.285024542363882e-06, "loss": 0.0121, "step": 30210 }, { "epoch": 0.5103522815550376, "grad_norm": 1.1520979404449463, "learning_rate": 9.284264920766849e-06, "loss": 0.0116, "step": 30220 }, { "epoch": 0.5105211605363597, "grad_norm": 0.21618935465812683, "learning_rate": 9.283504926963375e-06, "loss": 0.0086, "step": 30230 }, { "epoch": 0.5106900395176817, "grad_norm": 0.22205866873264313, "learning_rate": 9.28274456101949e-06, "loss": 0.0122, "step": 30240 }, { "epoch": 0.5108589184990037, "grad_norm": 0.5822135806083679, "learning_rate": 9.281983823001252e-06, "loss": 0.0142, "step": 30250 }, { "epoch": 0.5110277974803256, "grad_norm": 0.3795391917228699, "learning_rate": 9.281222712974752e-06, "loss": 0.0129, "step": 30260 }, { "epoch": 0.5111966764616476, "grad_norm": 0.5903266668319702, "learning_rate": 9.280461231006114e-06, "loss": 0.0175, "step": 30270 }, { "epoch": 0.5113655554429696, "grad_norm": 0.4451376497745514, "learning_rate": 9.279699377161492e-06, "loss": 0.0173, "step": 30280 }, { "epoch": 0.5115344344242916, "grad_norm": 0.3659999966621399, "learning_rate": 9.278937151507075e-06, "loss": 0.0109, "step": 30290 }, { "epoch": 0.5117033134056136, "grad_norm": 0.4765652120113373, "learning_rate": 9.278174554109085e-06, "loss": 0.0176, "step": 30300 }, { "epoch": 0.5118721923869355, "grad_norm": 0.36840739846229553, "learning_rate": 9.277411585033772e-06, "loss": 0.0159, "step": 30310 }, { "epoch": 0.5120410713682575, "grad_norm": 0.28574371337890625, "learning_rate": 9.276648244347421e-06, "loss": 0.0151, "step": 30320 }, { "epoch": 0.5122099503495795, "grad_norm": 0.39012137055397034, "learning_rate": 9.27588453211635e-06, "loss": 0.0121, "step": 30330 }, { "epoch": 0.5123788293309015, "grad_norm": 0.23647305369377136, "learning_rate": 9.275120448406908e-06, "loss": 0.0141, "step": 30340 }, { "epoch": 0.5125477083122235, "grad_norm": 0.3505719304084778, "learning_rate": 9.274355993285477e-06, "loss": 0.0139, "step": 30350 }, { "epoch": 0.5127165872935454, "grad_norm": 0.30204328894615173, "learning_rate": 9.273591166818473e-06, "loss": 0.0203, "step": 30360 }, { "epoch": 0.5128854662748674, "grad_norm": 0.25394129753112793, "learning_rate": 9.272825969072338e-06, "loss": 0.0113, "step": 30370 }, { "epoch": 0.5130543452561894, "grad_norm": 0.23150768876075745, "learning_rate": 9.272060400113554e-06, "loss": 0.0127, "step": 30380 }, { "epoch": 0.5132232242375114, "grad_norm": 0.2907000482082367, "learning_rate": 9.27129446000863e-06, "loss": 0.0124, "step": 30390 }, { "epoch": 0.5133921032188333, "grad_norm": 0.4923355281352997, "learning_rate": 9.270528148824111e-06, "loss": 0.0183, "step": 30400 }, { "epoch": 0.5135609822001553, "grad_norm": 0.19186772406101227, "learning_rate": 9.26976146662657e-06, "loss": 0.0105, "step": 30410 }, { "epoch": 0.5137298611814773, "grad_norm": 0.18947818875312805, "learning_rate": 9.268994413482615e-06, "loss": 0.0164, "step": 30420 }, { "epoch": 0.5138987401627994, "grad_norm": 0.22649823129177094, "learning_rate": 9.268226989458886e-06, "loss": 0.0154, "step": 30430 }, { "epoch": 0.5140676191441214, "grad_norm": 0.4494793117046356, "learning_rate": 9.267459194622056e-06, "loss": 0.0174, "step": 30440 }, { "epoch": 0.5142364981254433, "grad_norm": 0.4076986610889435, "learning_rate": 9.266691029038826e-06, "loss": 0.0148, "step": 30450 }, { "epoch": 0.5144053771067653, "grad_norm": 0.7865347862243652, "learning_rate": 9.265922492775936e-06, "loss": 0.0203, "step": 30460 }, { "epoch": 0.5145742560880873, "grad_norm": 0.42406103014945984, "learning_rate": 9.265153585900152e-06, "loss": 0.0159, "step": 30470 }, { "epoch": 0.5147431350694093, "grad_norm": 0.24143293499946594, "learning_rate": 9.264384308478276e-06, "loss": 0.0102, "step": 30480 }, { "epoch": 0.5149120140507313, "grad_norm": 0.28295716643333435, "learning_rate": 9.263614660577142e-06, "loss": 0.0158, "step": 30490 }, { "epoch": 0.5150808930320532, "grad_norm": 0.4748977720737457, "learning_rate": 9.262844642263612e-06, "loss": 0.0146, "step": 30500 }, { "epoch": 0.5152497720133752, "grad_norm": 0.3570065200328827, "learning_rate": 9.262074253604586e-06, "loss": 0.0159, "step": 30510 }, { "epoch": 0.5154186509946972, "grad_norm": 0.43000319600105286, "learning_rate": 9.261303494666991e-06, "loss": 0.0249, "step": 30520 }, { "epoch": 0.5155875299760192, "grad_norm": 0.2546686828136444, "learning_rate": 9.260532365517791e-06, "loss": 0.0167, "step": 30530 }, { "epoch": 0.5157564089573412, "grad_norm": 0.39924725890159607, "learning_rate": 9.259760866223981e-06, "loss": 0.0157, "step": 30540 }, { "epoch": 0.5159252879386631, "grad_norm": 0.26183950901031494, "learning_rate": 9.258988996852582e-06, "loss": 0.0109, "step": 30550 }, { "epoch": 0.5160941669199851, "grad_norm": 0.27430590987205505, "learning_rate": 9.258216757470658e-06, "loss": 0.0155, "step": 30560 }, { "epoch": 0.5162630459013071, "grad_norm": 0.794164776802063, "learning_rate": 9.257444148145294e-06, "loss": 0.0161, "step": 30570 }, { "epoch": 0.5164319248826291, "grad_norm": 0.3845657706260681, "learning_rate": 9.256671168943614e-06, "loss": 0.018, "step": 30580 }, { "epoch": 0.5166008038639511, "grad_norm": 0.2951459586620331, "learning_rate": 9.255897819932776e-06, "loss": 0.0143, "step": 30590 }, { "epoch": 0.516769682845273, "grad_norm": 0.25204330682754517, "learning_rate": 9.255124101179962e-06, "loss": 0.0111, "step": 30600 }, { "epoch": 0.516938561826595, "grad_norm": 0.2234216034412384, "learning_rate": 9.254350012752393e-06, "loss": 0.0208, "step": 30610 }, { "epoch": 0.517107440807917, "grad_norm": 0.5230227708816528, "learning_rate": 9.25357555471732e-06, "loss": 0.0124, "step": 30620 }, { "epoch": 0.517276319789239, "grad_norm": 0.2835043966770172, "learning_rate": 9.252800727142026e-06, "loss": 0.0156, "step": 30630 }, { "epoch": 0.517445198770561, "grad_norm": 0.3819841742515564, "learning_rate": 9.252025530093826e-06, "loss": 0.0144, "step": 30640 }, { "epoch": 0.517614077751883, "grad_norm": 0.348182737827301, "learning_rate": 9.251249963640066e-06, "loss": 0.0196, "step": 30650 }, { "epoch": 0.517782956733205, "grad_norm": 0.32966944575309753, "learning_rate": 9.250474027848127e-06, "loss": 0.0127, "step": 30660 }, { "epoch": 0.517951835714527, "grad_norm": 0.5100705027580261, "learning_rate": 9.249697722785419e-06, "loss": 0.013, "step": 30670 }, { "epoch": 0.518120714695849, "grad_norm": 0.3094395697116852, "learning_rate": 9.248921048519387e-06, "loss": 0.02, "step": 30680 }, { "epoch": 0.518289593677171, "grad_norm": 0.3979929983615875, "learning_rate": 9.248144005117505e-06, "loss": 0.0194, "step": 30690 }, { "epoch": 0.5184584726584929, "grad_norm": 0.37635311484336853, "learning_rate": 9.247366592647282e-06, "loss": 0.0108, "step": 30700 }, { "epoch": 0.5186273516398149, "grad_norm": 0.3201894164085388, "learning_rate": 9.246588811176256e-06, "loss": 0.0167, "step": 30710 }, { "epoch": 0.5187962306211369, "grad_norm": 0.6751912832260132, "learning_rate": 9.245810660772e-06, "loss": 0.0167, "step": 30720 }, { "epoch": 0.5189651096024589, "grad_norm": 0.3691496253013611, "learning_rate": 9.245032141502119e-06, "loss": 0.0255, "step": 30730 }, { "epoch": 0.5191339885837809, "grad_norm": 0.4814707934856415, "learning_rate": 9.244253253434247e-06, "loss": 0.0119, "step": 30740 }, { "epoch": 0.5193028675651028, "grad_norm": 0.2586955428123474, "learning_rate": 9.243473996636054e-06, "loss": 0.0118, "step": 30750 }, { "epoch": 0.5194717465464248, "grad_norm": 0.2969575822353363, "learning_rate": 9.242694371175236e-06, "loss": 0.0172, "step": 30760 }, { "epoch": 0.5196406255277468, "grad_norm": 0.28661611676216125, "learning_rate": 9.241914377119528e-06, "loss": 0.0158, "step": 30770 }, { "epoch": 0.5198095045090688, "grad_norm": 0.178216353058815, "learning_rate": 9.241134014536693e-06, "loss": 0.012, "step": 30780 }, { "epoch": 0.5199783834903908, "grad_norm": 0.5145743489265442, "learning_rate": 9.240353283494525e-06, "loss": 0.0142, "step": 30790 }, { "epoch": 0.5201472624717127, "grad_norm": 0.5103529095649719, "learning_rate": 9.239572184060856e-06, "loss": 0.0155, "step": 30800 }, { "epoch": 0.5203161414530347, "grad_norm": 0.25168853998184204, "learning_rate": 9.238790716303545e-06, "loss": 0.0141, "step": 30810 }, { "epoch": 0.5204850204343567, "grad_norm": 0.26282262802124023, "learning_rate": 9.238008880290484e-06, "loss": 0.0138, "step": 30820 }, { "epoch": 0.5206538994156787, "grad_norm": 0.25692182779312134, "learning_rate": 9.237226676089594e-06, "loss": 0.018, "step": 30830 }, { "epoch": 0.5208227783970008, "grad_norm": 0.5828396677970886, "learning_rate": 9.236444103768833e-06, "loss": 0.0129, "step": 30840 }, { "epoch": 0.5209916573783226, "grad_norm": 0.3552342653274536, "learning_rate": 9.23566116339619e-06, "loss": 0.0113, "step": 30850 }, { "epoch": 0.5211605363596447, "grad_norm": 0.30298686027526855, "learning_rate": 9.234877855039684e-06, "loss": 0.0132, "step": 30860 }, { "epoch": 0.5213294153409667, "grad_norm": 0.13478034734725952, "learning_rate": 9.234094178767368e-06, "loss": 0.022, "step": 30870 }, { "epoch": 0.5214982943222887, "grad_norm": 0.509110152721405, "learning_rate": 9.233310134647324e-06, "loss": 0.0141, "step": 30880 }, { "epoch": 0.5216671733036107, "grad_norm": 0.20238731801509857, "learning_rate": 9.232525722747668e-06, "loss": 0.0183, "step": 30890 }, { "epoch": 0.5218360522849326, "grad_norm": 0.3296874463558197, "learning_rate": 9.23174094313655e-06, "loss": 0.0151, "step": 30900 }, { "epoch": 0.5220049312662546, "grad_norm": 0.4694978594779968, "learning_rate": 9.230955795882146e-06, "loss": 0.0142, "step": 30910 }, { "epoch": 0.5221738102475766, "grad_norm": 0.4024386703968048, "learning_rate": 9.230170281052672e-06, "loss": 0.0149, "step": 30920 }, { "epoch": 0.5223426892288986, "grad_norm": 0.43894240260124207, "learning_rate": 9.229384398716368e-06, "loss": 0.0115, "step": 30930 }, { "epoch": 0.5225115682102206, "grad_norm": 0.29999879002571106, "learning_rate": 9.228598148941512e-06, "loss": 0.0116, "step": 30940 }, { "epoch": 0.5226804471915425, "grad_norm": 0.35205358266830444, "learning_rate": 9.22781153179641e-06, "loss": 0.0179, "step": 30950 }, { "epoch": 0.5228493261728645, "grad_norm": 0.4561704099178314, "learning_rate": 9.227024547349403e-06, "loss": 0.0159, "step": 30960 }, { "epoch": 0.5230182051541865, "grad_norm": 0.5980950593948364, "learning_rate": 9.22623719566886e-06, "loss": 0.0173, "step": 30970 }, { "epoch": 0.5231870841355085, "grad_norm": 0.7728056311607361, "learning_rate": 9.225449476823185e-06, "loss": 0.0226, "step": 30980 }, { "epoch": 0.5233559631168305, "grad_norm": 0.3436724543571472, "learning_rate": 9.224661390880814e-06, "loss": 0.0114, "step": 30990 }, { "epoch": 0.5235248420981524, "grad_norm": 0.4754765033721924, "learning_rate": 9.223872937910214e-06, "loss": 0.0178, "step": 31000 }, { "epoch": 0.5236937210794744, "grad_norm": 0.23653709888458252, "learning_rate": 9.223084117979883e-06, "loss": 0.0133, "step": 31010 }, { "epoch": 0.5238626000607964, "grad_norm": 0.315319299697876, "learning_rate": 9.22229493115835e-06, "loss": 0.0147, "step": 31020 }, { "epoch": 0.5240314790421184, "grad_norm": 0.28836047649383545, "learning_rate": 9.221505377514182e-06, "loss": 0.0113, "step": 31030 }, { "epoch": 0.5242003580234404, "grad_norm": 0.2774013578891754, "learning_rate": 9.220715457115969e-06, "loss": 0.013, "step": 31040 }, { "epoch": 0.5243692370047623, "grad_norm": 0.3629789650440216, "learning_rate": 9.219925170032341e-06, "loss": 0.0105, "step": 31050 }, { "epoch": 0.5245381159860844, "grad_norm": 0.41096028685569763, "learning_rate": 9.219134516331955e-06, "loss": 0.018, "step": 31060 }, { "epoch": 0.5247069949674064, "grad_norm": 0.30309590697288513, "learning_rate": 9.2183434960835e-06, "loss": 0.0142, "step": 31070 }, { "epoch": 0.5248758739487284, "grad_norm": 0.3561359941959381, "learning_rate": 9.217552109355699e-06, "loss": 0.0076, "step": 31080 }, { "epoch": 0.5250447529300504, "grad_norm": 0.2163129448890686, "learning_rate": 9.216760356217304e-06, "loss": 0.0171, "step": 31090 }, { "epoch": 0.5252136319113723, "grad_norm": 0.35220658779144287, "learning_rate": 9.215968236737103e-06, "loss": 0.013, "step": 31100 }, { "epoch": 0.5253825108926943, "grad_norm": 0.3723348379135132, "learning_rate": 9.215175750983912e-06, "loss": 0.0126, "step": 31110 }, { "epoch": 0.5255513898740163, "grad_norm": 0.34597912430763245, "learning_rate": 9.21438289902658e-06, "loss": 0.0197, "step": 31120 }, { "epoch": 0.5257202688553383, "grad_norm": 0.47360140085220337, "learning_rate": 9.213589680933989e-06, "loss": 0.014, "step": 31130 }, { "epoch": 0.5258891478366603, "grad_norm": 0.36407822370529175, "learning_rate": 9.212796096775052e-06, "loss": 0.011, "step": 31140 }, { "epoch": 0.5260580268179822, "grad_norm": 0.6137476563453674, "learning_rate": 9.212002146618711e-06, "loss": 0.0102, "step": 31150 }, { "epoch": 0.5262269057993042, "grad_norm": 0.3650795817375183, "learning_rate": 9.211207830533947e-06, "loss": 0.0129, "step": 31160 }, { "epoch": 0.5263957847806262, "grad_norm": 0.1744905561208725, "learning_rate": 9.210413148589763e-06, "loss": 0.0094, "step": 31170 }, { "epoch": 0.5265646637619482, "grad_norm": 0.4177727997303009, "learning_rate": 9.209618100855205e-06, "loss": 0.0172, "step": 31180 }, { "epoch": 0.5267335427432702, "grad_norm": 0.31150588393211365, "learning_rate": 9.208822687399338e-06, "loss": 0.015, "step": 31190 }, { "epoch": 0.5269024217245921, "grad_norm": 0.1474548876285553, "learning_rate": 9.208026908291271e-06, "loss": 0.0148, "step": 31200 }, { "epoch": 0.5270713007059141, "grad_norm": 0.15506739914417267, "learning_rate": 9.207230763600139e-06, "loss": 0.0161, "step": 31210 }, { "epoch": 0.5272401796872361, "grad_norm": 0.37107545137405396, "learning_rate": 9.206434253395107e-06, "loss": 0.0162, "step": 31220 }, { "epoch": 0.5274090586685581, "grad_norm": 0.20294900238513947, "learning_rate": 9.205637377745373e-06, "loss": 0.0169, "step": 31230 }, { "epoch": 0.5275779376498801, "grad_norm": 0.19080664217472076, "learning_rate": 9.204840136720169e-06, "loss": 0.0152, "step": 31240 }, { "epoch": 0.527746816631202, "grad_norm": 0.17738643288612366, "learning_rate": 9.204042530388757e-06, "loss": 0.0064, "step": 31250 }, { "epoch": 0.527915695612524, "grad_norm": 0.19939658045768738, "learning_rate": 9.203244558820433e-06, "loss": 0.0125, "step": 31260 }, { "epoch": 0.5280845745938461, "grad_norm": 0.6183249950408936, "learning_rate": 9.20244622208452e-06, "loss": 0.0165, "step": 31270 }, { "epoch": 0.5282534535751681, "grad_norm": 0.8269907832145691, "learning_rate": 9.20164752025038e-06, "loss": 0.014, "step": 31280 }, { "epoch": 0.5284223325564901, "grad_norm": 0.6353688836097717, "learning_rate": 9.200848453387395e-06, "loss": 0.0131, "step": 31290 }, { "epoch": 0.528591211537812, "grad_norm": 0.4062422513961792, "learning_rate": 9.20004902156499e-06, "loss": 0.0132, "step": 31300 }, { "epoch": 0.528760090519134, "grad_norm": 0.6156937479972839, "learning_rate": 9.19924922485262e-06, "loss": 0.0137, "step": 31310 }, { "epoch": 0.528928969500456, "grad_norm": 0.3136243224143982, "learning_rate": 9.198449063319766e-06, "loss": 0.0093, "step": 31320 }, { "epoch": 0.529097848481778, "grad_norm": 0.2469533383846283, "learning_rate": 9.197648537035946e-06, "loss": 0.0112, "step": 31330 }, { "epoch": 0.5292667274631, "grad_norm": 0.3834691047668457, "learning_rate": 9.196847646070706e-06, "loss": 0.0145, "step": 31340 }, { "epoch": 0.5294356064444219, "grad_norm": 0.44823533296585083, "learning_rate": 9.196046390493626e-06, "loss": 0.0133, "step": 31350 }, { "epoch": 0.5296044854257439, "grad_norm": 0.27802425622940063, "learning_rate": 9.19524477037432e-06, "loss": 0.0111, "step": 31360 }, { "epoch": 0.5297733644070659, "grad_norm": 0.4278354346752167, "learning_rate": 9.194442785782426e-06, "loss": 0.0133, "step": 31370 }, { "epoch": 0.5299422433883879, "grad_norm": 0.3176576793193817, "learning_rate": 9.193640436787619e-06, "loss": 0.013, "step": 31380 }, { "epoch": 0.5301111223697099, "grad_norm": 0.5662391185760498, "learning_rate": 9.19283772345961e-06, "loss": 0.0157, "step": 31390 }, { "epoch": 0.5302800013510318, "grad_norm": 0.44036537408828735, "learning_rate": 9.19203464586813e-06, "loss": 0.0112, "step": 31400 }, { "epoch": 0.5304488803323538, "grad_norm": 0.2594541013240814, "learning_rate": 9.191231204082955e-06, "loss": 0.0102, "step": 31410 }, { "epoch": 0.5306177593136758, "grad_norm": 0.40400993824005127, "learning_rate": 9.190427398173885e-06, "loss": 0.0102, "step": 31420 }, { "epoch": 0.5307866382949978, "grad_norm": 0.5587798953056335, "learning_rate": 9.189623228210746e-06, "loss": 0.0183, "step": 31430 }, { "epoch": 0.5309555172763198, "grad_norm": 0.3522118628025055, "learning_rate": 9.18881869426341e-06, "loss": 0.0135, "step": 31440 }, { "epoch": 0.5311243962576417, "grad_norm": 0.5850970149040222, "learning_rate": 9.18801379640177e-06, "loss": 0.0122, "step": 31450 }, { "epoch": 0.5312932752389637, "grad_norm": 0.27807551622390747, "learning_rate": 9.187208534695753e-06, "loss": 0.0168, "step": 31460 }, { "epoch": 0.5314621542202858, "grad_norm": 0.2982378900051117, "learning_rate": 9.18640290921532e-06, "loss": 0.0182, "step": 31470 }, { "epoch": 0.5316310332016078, "grad_norm": 0.44534677267074585, "learning_rate": 9.185596920030458e-06, "loss": 0.013, "step": 31480 }, { "epoch": 0.5317999121829297, "grad_norm": 0.7885233163833618, "learning_rate": 9.184790567211193e-06, "loss": 0.0149, "step": 31490 }, { "epoch": 0.5319687911642517, "grad_norm": 0.2723133862018585, "learning_rate": 9.183983850827578e-06, "loss": 0.0095, "step": 31500 }, { "epoch": 0.5321376701455737, "grad_norm": 0.20422331988811493, "learning_rate": 9.183176770949698e-06, "loss": 0.0125, "step": 31510 }, { "epoch": 0.5323065491268957, "grad_norm": 0.2284206598997116, "learning_rate": 9.18236932764767e-06, "loss": 0.0115, "step": 31520 }, { "epoch": 0.5324754281082177, "grad_norm": 0.5572451949119568, "learning_rate": 9.181561520991645e-06, "loss": 0.0204, "step": 31530 }, { "epoch": 0.5326443070895396, "grad_norm": 0.5859349966049194, "learning_rate": 9.1807533510518e-06, "loss": 0.0162, "step": 31540 }, { "epoch": 0.5328131860708616, "grad_norm": 0.7416244745254517, "learning_rate": 9.17994481789835e-06, "loss": 0.0135, "step": 31550 }, { "epoch": 0.5329820650521836, "grad_norm": 0.479188472032547, "learning_rate": 9.179135921601536e-06, "loss": 0.0165, "step": 31560 }, { "epoch": 0.5331509440335056, "grad_norm": 0.18544001877307892, "learning_rate": 9.178326662231632e-06, "loss": 0.0106, "step": 31570 }, { "epoch": 0.5333198230148276, "grad_norm": 0.6648015379905701, "learning_rate": 9.177517039858948e-06, "loss": 0.0131, "step": 31580 }, { "epoch": 0.5334887019961495, "grad_norm": 0.4454619288444519, "learning_rate": 9.176707054553817e-06, "loss": 0.0161, "step": 31590 }, { "epoch": 0.5336575809774715, "grad_norm": 0.4183807969093323, "learning_rate": 9.175896706386615e-06, "loss": 0.0119, "step": 31600 }, { "epoch": 0.5338264599587935, "grad_norm": 0.27445173263549805, "learning_rate": 9.175085995427738e-06, "loss": 0.0117, "step": 31610 }, { "epoch": 0.5339953389401155, "grad_norm": 0.21824197471141815, "learning_rate": 9.174274921747621e-06, "loss": 0.0115, "step": 31620 }, { "epoch": 0.5341642179214375, "grad_norm": 0.38134610652923584, "learning_rate": 9.173463485416727e-06, "loss": 0.0144, "step": 31630 }, { "epoch": 0.5343330969027594, "grad_norm": 0.2050897181034088, "learning_rate": 9.172651686505552e-06, "loss": 0.0124, "step": 31640 }, { "epoch": 0.5345019758840814, "grad_norm": 0.3943819999694824, "learning_rate": 9.171839525084622e-06, "loss": 0.0155, "step": 31650 }, { "epoch": 0.5346708548654034, "grad_norm": 0.3804088234901428, "learning_rate": 9.171027001224498e-06, "loss": 0.0142, "step": 31660 }, { "epoch": 0.5348397338467255, "grad_norm": 0.47763702273368835, "learning_rate": 9.170214114995769e-06, "loss": 0.0151, "step": 31670 }, { "epoch": 0.5350086128280475, "grad_norm": 0.2652091383934021, "learning_rate": 9.169400866469054e-06, "loss": 0.0159, "step": 31680 }, { "epoch": 0.5351774918093694, "grad_norm": 0.5795924067497253, "learning_rate": 9.16858725571501e-06, "loss": 0.0101, "step": 31690 }, { "epoch": 0.5353463707906914, "grad_norm": 0.4437442123889923, "learning_rate": 9.16777328280432e-06, "loss": 0.0111, "step": 31700 }, { "epoch": 0.5355152497720134, "grad_norm": 0.12674549221992493, "learning_rate": 9.1669589478077e-06, "loss": 0.013, "step": 31710 }, { "epoch": 0.5356841287533354, "grad_norm": 0.49171286821365356, "learning_rate": 9.166144250795897e-06, "loss": 0.0181, "step": 31720 }, { "epoch": 0.5358530077346574, "grad_norm": 0.3093448877334595, "learning_rate": 9.165329191839689e-06, "loss": 0.0172, "step": 31730 }, { "epoch": 0.5360218867159793, "grad_norm": 0.7231780290603638, "learning_rate": 9.16451377100989e-06, "loss": 0.0157, "step": 31740 }, { "epoch": 0.5361907656973013, "grad_norm": 0.46727633476257324, "learning_rate": 9.163697988377335e-06, "loss": 0.0232, "step": 31750 }, { "epoch": 0.5363596446786233, "grad_norm": 0.31842392683029175, "learning_rate": 9.162881844012903e-06, "loss": 0.0158, "step": 31760 }, { "epoch": 0.5365285236599453, "grad_norm": 0.25221991539001465, "learning_rate": 9.162065337987499e-06, "loss": 0.0108, "step": 31770 }, { "epoch": 0.5366974026412673, "grad_norm": 0.2623094916343689, "learning_rate": 9.161248470372055e-06, "loss": 0.017, "step": 31780 }, { "epoch": 0.5368662816225892, "grad_norm": 0.4803834557533264, "learning_rate": 9.160431241237542e-06, "loss": 0.0204, "step": 31790 }, { "epoch": 0.5370351606039112, "grad_norm": 0.33160287141799927, "learning_rate": 9.159613650654957e-06, "loss": 0.0123, "step": 31800 }, { "epoch": 0.5372040395852332, "grad_norm": 0.6653539538383484, "learning_rate": 9.15879569869533e-06, "loss": 0.0152, "step": 31810 }, { "epoch": 0.5373729185665552, "grad_norm": 0.3028746247291565, "learning_rate": 9.157977385429723e-06, "loss": 0.0133, "step": 31820 }, { "epoch": 0.5375417975478772, "grad_norm": 0.2905671298503876, "learning_rate": 9.15715871092923e-06, "loss": 0.013, "step": 31830 }, { "epoch": 0.5377106765291991, "grad_norm": 0.35846051573753357, "learning_rate": 9.156339675264976e-06, "loss": 0.0145, "step": 31840 }, { "epoch": 0.5378795555105211, "grad_norm": 0.5054438710212708, "learning_rate": 9.155520278508115e-06, "loss": 0.0186, "step": 31850 }, { "epoch": 0.5380484344918431, "grad_norm": 0.27032336592674255, "learning_rate": 9.154700520729834e-06, "loss": 0.0108, "step": 31860 }, { "epoch": 0.5382173134731651, "grad_norm": 0.17470356822013855, "learning_rate": 9.153880402001351e-06, "loss": 0.0121, "step": 31870 }, { "epoch": 0.5383861924544872, "grad_norm": 0.2616274952888489, "learning_rate": 9.15305992239392e-06, "loss": 0.0166, "step": 31880 }, { "epoch": 0.538555071435809, "grad_norm": 0.3361271619796753, "learning_rate": 9.152239081978817e-06, "loss": 0.0103, "step": 31890 }, { "epoch": 0.5387239504171311, "grad_norm": 0.3247832953929901, "learning_rate": 9.15141788082736e-06, "loss": 0.0187, "step": 31900 }, { "epoch": 0.5388928293984531, "grad_norm": 0.33589720726013184, "learning_rate": 9.150596319010888e-06, "loss": 0.0139, "step": 31910 }, { "epoch": 0.5390617083797751, "grad_norm": 0.33797749876976013, "learning_rate": 9.14977439660078e-06, "loss": 0.0166, "step": 31920 }, { "epoch": 0.5392305873610971, "grad_norm": 0.41640347242355347, "learning_rate": 9.148952113668439e-06, "loss": 0.0175, "step": 31930 }, { "epoch": 0.539399466342419, "grad_norm": 0.5657302737236023, "learning_rate": 9.148129470285305e-06, "loss": 0.0146, "step": 31940 }, { "epoch": 0.539568345323741, "grad_norm": 0.2652813792228699, "learning_rate": 9.147306466522848e-06, "loss": 0.0108, "step": 31950 }, { "epoch": 0.539737224305063, "grad_norm": 0.3112548291683197, "learning_rate": 9.146483102452567e-06, "loss": 0.0132, "step": 31960 }, { "epoch": 0.539906103286385, "grad_norm": 0.35957586765289307, "learning_rate": 9.145659378145995e-06, "loss": 0.015, "step": 31970 }, { "epoch": 0.540074982267707, "grad_norm": 0.26101934909820557, "learning_rate": 9.144835293674695e-06, "loss": 0.0129, "step": 31980 }, { "epoch": 0.5402438612490289, "grad_norm": 0.36113834381103516, "learning_rate": 9.14401084911026e-06, "loss": 0.0111, "step": 31990 }, { "epoch": 0.5404127402303509, "grad_norm": 0.2403276264667511, "learning_rate": 9.143186044524317e-06, "loss": 0.0089, "step": 32000 }, { "epoch": 0.5405816192116729, "grad_norm": 0.428151398897171, "learning_rate": 9.142360879988524e-06, "loss": 0.0124, "step": 32010 }, { "epoch": 0.5407504981929949, "grad_norm": 0.27429208159446716, "learning_rate": 9.141535355574566e-06, "loss": 0.0127, "step": 32020 }, { "epoch": 0.5409193771743169, "grad_norm": 0.41957202553749084, "learning_rate": 9.140709471354165e-06, "loss": 0.0225, "step": 32030 }, { "epoch": 0.5410882561556388, "grad_norm": 0.201284259557724, "learning_rate": 9.139883227399073e-06, "loss": 0.0105, "step": 32040 }, { "epoch": 0.5412571351369608, "grad_norm": 0.20520606637001038, "learning_rate": 9.139056623781071e-06, "loss": 0.0175, "step": 32050 }, { "epoch": 0.5414260141182828, "grad_norm": 0.2689041197299957, "learning_rate": 9.138229660571969e-06, "loss": 0.0101, "step": 32060 }, { "epoch": 0.5415948930996048, "grad_norm": 0.5616424679756165, "learning_rate": 9.137402337843618e-06, "loss": 0.0118, "step": 32070 }, { "epoch": 0.5417637720809269, "grad_norm": 0.16816022992134094, "learning_rate": 9.136574655667886e-06, "loss": 0.0157, "step": 32080 }, { "epoch": 0.5419326510622487, "grad_norm": 0.5674344897270203, "learning_rate": 9.135746614116687e-06, "loss": 0.0185, "step": 32090 }, { "epoch": 0.5421015300435708, "grad_norm": 0.1665664166212082, "learning_rate": 9.134918213261955e-06, "loss": 0.0131, "step": 32100 }, { "epoch": 0.5422704090248928, "grad_norm": 0.15817023813724518, "learning_rate": 9.13408945317566e-06, "loss": 0.0142, "step": 32110 }, { "epoch": 0.5424392880062148, "grad_norm": 0.1563226729631424, "learning_rate": 9.133260333929805e-06, "loss": 0.0176, "step": 32120 }, { "epoch": 0.5426081669875368, "grad_norm": 0.38665613532066345, "learning_rate": 9.13243085559642e-06, "loss": 0.0129, "step": 32130 }, { "epoch": 0.5427770459688587, "grad_norm": 0.4528919756412506, "learning_rate": 9.131601018247567e-06, "loss": 0.0187, "step": 32140 }, { "epoch": 0.5429459249501807, "grad_norm": 0.35331833362579346, "learning_rate": 9.130770821955341e-06, "loss": 0.0179, "step": 32150 }, { "epoch": 0.5431148039315027, "grad_norm": 0.18659202754497528, "learning_rate": 9.12994026679187e-06, "loss": 0.0117, "step": 32160 }, { "epoch": 0.5432836829128247, "grad_norm": 0.4041014611721039, "learning_rate": 9.12910935282931e-06, "loss": 0.0131, "step": 32170 }, { "epoch": 0.5434525618941467, "grad_norm": 0.3345877528190613, "learning_rate": 9.128278080139842e-06, "loss": 0.0136, "step": 32180 }, { "epoch": 0.5436214408754686, "grad_norm": 0.2951003611087799, "learning_rate": 9.127446448795695e-06, "loss": 0.0149, "step": 32190 }, { "epoch": 0.5437903198567906, "grad_norm": 0.20852497220039368, "learning_rate": 9.126614458869111e-06, "loss": 0.0146, "step": 32200 }, { "epoch": 0.5439591988381126, "grad_norm": 0.322308748960495, "learning_rate": 9.125782110432377e-06, "loss": 0.0145, "step": 32210 }, { "epoch": 0.5441280778194346, "grad_norm": 0.37847623229026794, "learning_rate": 9.124949403557803e-06, "loss": 0.0196, "step": 32220 }, { "epoch": 0.5442969568007566, "grad_norm": 0.2811872661113739, "learning_rate": 9.12411633831773e-06, "loss": 0.0111, "step": 32230 }, { "epoch": 0.5444658357820785, "grad_norm": 0.325208842754364, "learning_rate": 9.123282914784536e-06, "loss": 0.0089, "step": 32240 }, { "epoch": 0.5446347147634005, "grad_norm": 0.2446177899837494, "learning_rate": 9.122449133030629e-06, "loss": 0.011, "step": 32250 }, { "epoch": 0.5448035937447225, "grad_norm": 0.16447791457176208, "learning_rate": 9.12161499312844e-06, "loss": 0.0217, "step": 32260 }, { "epoch": 0.5449724727260445, "grad_norm": 0.33406519889831543, "learning_rate": 9.12078049515044e-06, "loss": 0.014, "step": 32270 }, { "epoch": 0.5451413517073666, "grad_norm": 0.30026212334632874, "learning_rate": 9.11994563916913e-06, "loss": 0.0108, "step": 32280 }, { "epoch": 0.5453102306886884, "grad_norm": 0.6954370737075806, "learning_rate": 9.119110425257036e-06, "loss": 0.01, "step": 32290 }, { "epoch": 0.5454791096700105, "grad_norm": 0.3958251476287842, "learning_rate": 9.118274853486721e-06, "loss": 0.0155, "step": 32300 }, { "epoch": 0.5456479886513325, "grad_norm": 0.37320205569267273, "learning_rate": 9.117438923930782e-06, "loss": 0.013, "step": 32310 }, { "epoch": 0.5458168676326545, "grad_norm": 0.46532702445983887, "learning_rate": 9.116602636661836e-06, "loss": 0.0106, "step": 32320 }, { "epoch": 0.5459857466139765, "grad_norm": 0.5406472682952881, "learning_rate": 9.115765991752539e-06, "loss": 0.0131, "step": 32330 }, { "epoch": 0.5461546255952984, "grad_norm": 0.35302600264549255, "learning_rate": 9.11492898927558e-06, "loss": 0.02, "step": 32340 }, { "epoch": 0.5463235045766204, "grad_norm": 0.5102949142456055, "learning_rate": 9.114091629303672e-06, "loss": 0.0211, "step": 32350 }, { "epoch": 0.5464923835579424, "grad_norm": 0.4585028290748596, "learning_rate": 9.113253911909565e-06, "loss": 0.0142, "step": 32360 }, { "epoch": 0.5466612625392644, "grad_norm": 0.31934958696365356, "learning_rate": 9.112415837166037e-06, "loss": 0.0139, "step": 32370 }, { "epoch": 0.5468301415205864, "grad_norm": 0.542619526386261, "learning_rate": 9.111577405145897e-06, "loss": 0.0107, "step": 32380 }, { "epoch": 0.5469990205019083, "grad_norm": 0.439595103263855, "learning_rate": 9.110738615921988e-06, "loss": 0.0165, "step": 32390 }, { "epoch": 0.5471678994832303, "grad_norm": 0.3500678837299347, "learning_rate": 9.109899469567181e-06, "loss": 0.015, "step": 32400 }, { "epoch": 0.5473367784645523, "grad_norm": 0.3669789135456085, "learning_rate": 9.109059966154379e-06, "loss": 0.0113, "step": 32410 }, { "epoch": 0.5475056574458743, "grad_norm": 0.23350511491298676, "learning_rate": 9.108220105756515e-06, "loss": 0.0137, "step": 32420 }, { "epoch": 0.5476745364271963, "grad_norm": 0.5858970880508423, "learning_rate": 9.107379888446558e-06, "loss": 0.0192, "step": 32430 }, { "epoch": 0.5478434154085182, "grad_norm": 0.33348575234413147, "learning_rate": 9.106539314297498e-06, "loss": 0.0142, "step": 32440 }, { "epoch": 0.5480122943898402, "grad_norm": 0.3540392816066742, "learning_rate": 9.105698383382365e-06, "loss": 0.0136, "step": 32450 }, { "epoch": 0.5481811733711622, "grad_norm": 0.31468719244003296, "learning_rate": 9.104857095774216e-06, "loss": 0.009, "step": 32460 }, { "epoch": 0.5483500523524842, "grad_norm": 0.29871195554733276, "learning_rate": 9.104015451546141e-06, "loss": 0.0093, "step": 32470 }, { "epoch": 0.5485189313338062, "grad_norm": 0.5343881845474243, "learning_rate": 9.103173450771264e-06, "loss": 0.0165, "step": 32480 }, { "epoch": 0.5486878103151281, "grad_norm": 0.4731055200099945, "learning_rate": 9.102331093522728e-06, "loss": 0.0146, "step": 32490 }, { "epoch": 0.5488566892964502, "grad_norm": 0.1812642514705658, "learning_rate": 9.101488379873719e-06, "loss": 0.0116, "step": 32500 }, { "epoch": 0.5490255682777722, "grad_norm": 0.27014797925949097, "learning_rate": 9.100645309897451e-06, "loss": 0.0102, "step": 32510 }, { "epoch": 0.5491944472590942, "grad_norm": 0.36497506499290466, "learning_rate": 9.099801883667168e-06, "loss": 0.012, "step": 32520 }, { "epoch": 0.5493633262404162, "grad_norm": 0.2151443511247635, "learning_rate": 9.098958101256142e-06, "loss": 0.0152, "step": 32530 }, { "epoch": 0.5495322052217381, "grad_norm": 0.392543762922287, "learning_rate": 9.098113962737677e-06, "loss": 0.0135, "step": 32540 }, { "epoch": 0.5497010842030601, "grad_norm": 0.22485847771167755, "learning_rate": 9.097269468185117e-06, "loss": 0.0159, "step": 32550 }, { "epoch": 0.5498699631843821, "grad_norm": 0.3524610698223114, "learning_rate": 9.096424617671825e-06, "loss": 0.0094, "step": 32560 }, { "epoch": 0.5500388421657041, "grad_norm": 0.231504425406456, "learning_rate": 9.095579411271199e-06, "loss": 0.0117, "step": 32570 }, { "epoch": 0.5502077211470261, "grad_norm": 0.25734010338783264, "learning_rate": 9.09473384905667e-06, "loss": 0.0116, "step": 32580 }, { "epoch": 0.550376600128348, "grad_norm": 0.4006299376487732, "learning_rate": 9.093887931101696e-06, "loss": 0.0124, "step": 32590 }, { "epoch": 0.55054547910967, "grad_norm": 0.5349488854408264, "learning_rate": 9.09304165747977e-06, "loss": 0.015, "step": 32600 }, { "epoch": 0.550714358090992, "grad_norm": 0.3768586218357086, "learning_rate": 9.092195028264416e-06, "loss": 0.0145, "step": 32610 }, { "epoch": 0.550883237072314, "grad_norm": 0.417034387588501, "learning_rate": 9.091348043529186e-06, "loss": 0.0122, "step": 32620 }, { "epoch": 0.5510521160536359, "grad_norm": 0.3205206096172333, "learning_rate": 9.090500703347662e-06, "loss": 0.0116, "step": 32630 }, { "epoch": 0.5512209950349579, "grad_norm": 0.18757414817810059, "learning_rate": 9.089653007793462e-06, "loss": 0.0124, "step": 32640 }, { "epoch": 0.5513898740162799, "grad_norm": 0.27058523893356323, "learning_rate": 9.088804956940227e-06, "loss": 0.0083, "step": 32650 }, { "epoch": 0.5515587529976019, "grad_norm": 0.36923667788505554, "learning_rate": 9.087956550861638e-06, "loss": 0.0203, "step": 32660 }, { "epoch": 0.5517276319789239, "grad_norm": 0.3422497808933258, "learning_rate": 9.087107789631402e-06, "loss": 0.0099, "step": 32670 }, { "epoch": 0.5518965109602458, "grad_norm": 0.2883071303367615, "learning_rate": 9.086258673323254e-06, "loss": 0.0157, "step": 32680 }, { "epoch": 0.5520653899415678, "grad_norm": 0.2524901032447815, "learning_rate": 9.085409202010967e-06, "loss": 0.0092, "step": 32690 }, { "epoch": 0.5522342689228898, "grad_norm": 0.8117314577102661, "learning_rate": 9.084559375768338e-06, "loss": 0.0209, "step": 32700 }, { "epoch": 0.5524031479042119, "grad_norm": 0.3688042461872101, "learning_rate": 9.0837091946692e-06, "loss": 0.0151, "step": 32710 }, { "epoch": 0.5525720268855339, "grad_norm": 0.3001657724380493, "learning_rate": 9.082858658787414e-06, "loss": 0.0154, "step": 32720 }, { "epoch": 0.5527409058668558, "grad_norm": 0.30820679664611816, "learning_rate": 9.082007768196871e-06, "loss": 0.0137, "step": 32730 }, { "epoch": 0.5529097848481778, "grad_norm": 0.3566303849220276, "learning_rate": 9.081156522971498e-06, "loss": 0.0172, "step": 32740 }, { "epoch": 0.5530786638294998, "grad_norm": 0.7636209726333618, "learning_rate": 9.080304923185244e-06, "loss": 0.0145, "step": 32750 }, { "epoch": 0.5532475428108218, "grad_norm": 0.4156229794025421, "learning_rate": 9.079452968912097e-06, "loss": 0.0133, "step": 32760 }, { "epoch": 0.5534164217921438, "grad_norm": 0.41480058431625366, "learning_rate": 9.078600660226073e-06, "loss": 0.0168, "step": 32770 }, { "epoch": 0.5535853007734657, "grad_norm": 0.13904574513435364, "learning_rate": 9.077747997201218e-06, "loss": 0.0096, "step": 32780 }, { "epoch": 0.5537541797547877, "grad_norm": 0.6296315789222717, "learning_rate": 9.076894979911608e-06, "loss": 0.0155, "step": 32790 }, { "epoch": 0.5539230587361097, "grad_norm": 0.49356287717819214, "learning_rate": 9.076041608431353e-06, "loss": 0.0164, "step": 32800 }, { "epoch": 0.5540919377174317, "grad_norm": 0.4117225110530853, "learning_rate": 9.07518788283459e-06, "loss": 0.0138, "step": 32810 }, { "epoch": 0.5542608166987537, "grad_norm": 0.6349764466285706, "learning_rate": 9.07433380319549e-06, "loss": 0.0146, "step": 32820 }, { "epoch": 0.5544296956800756, "grad_norm": 0.19186539947986603, "learning_rate": 9.073479369588252e-06, "loss": 0.0113, "step": 32830 }, { "epoch": 0.5545985746613976, "grad_norm": 0.45612621307373047, "learning_rate": 9.07262458208711e-06, "loss": 0.013, "step": 32840 }, { "epoch": 0.5547674536427196, "grad_norm": 0.4856705069541931, "learning_rate": 9.071769440766322e-06, "loss": 0.0118, "step": 32850 }, { "epoch": 0.5549363326240416, "grad_norm": 0.256145715713501, "learning_rate": 9.070913945700184e-06, "loss": 0.0188, "step": 32860 }, { "epoch": 0.5551052116053636, "grad_norm": 0.3223809003829956, "learning_rate": 9.070058096963014e-06, "loss": 0.0102, "step": 32870 }, { "epoch": 0.5552740905866855, "grad_norm": 0.40519246459007263, "learning_rate": 9.069201894629174e-06, "loss": 0.0103, "step": 32880 }, { "epoch": 0.5554429695680075, "grad_norm": 0.35551002621650696, "learning_rate": 9.068345338773042e-06, "loss": 0.0088, "step": 32890 }, { "epoch": 0.5556118485493295, "grad_norm": 0.1695539653301239, "learning_rate": 9.067488429469036e-06, "loss": 0.012, "step": 32900 }, { "epoch": 0.5557807275306516, "grad_norm": 0.41220182180404663, "learning_rate": 9.066631166791604e-06, "loss": 0.0131, "step": 32910 }, { "epoch": 0.5559496065119736, "grad_norm": 0.5393982529640198, "learning_rate": 9.06577355081522e-06, "loss": 0.0173, "step": 32920 }, { "epoch": 0.5561184854932955, "grad_norm": 0.3787648677825928, "learning_rate": 9.06491558161439e-06, "loss": 0.0168, "step": 32930 }, { "epoch": 0.5562873644746175, "grad_norm": 0.3659694492816925, "learning_rate": 9.064057259263659e-06, "loss": 0.0123, "step": 32940 }, { "epoch": 0.5564562434559395, "grad_norm": 0.3476395010948181, "learning_rate": 9.063198583837589e-06, "loss": 0.0113, "step": 32950 }, { "epoch": 0.5566251224372615, "grad_norm": 0.30171528458595276, "learning_rate": 9.062339555410784e-06, "loss": 0.0146, "step": 32960 }, { "epoch": 0.5567940014185835, "grad_norm": 0.23577791452407837, "learning_rate": 9.061480174057873e-06, "loss": 0.0117, "step": 32970 }, { "epoch": 0.5569628803999054, "grad_norm": 0.4989687204360962, "learning_rate": 9.060620439853514e-06, "loss": 0.0149, "step": 32980 }, { "epoch": 0.5571317593812274, "grad_norm": 0.6547173261642456, "learning_rate": 9.059760352872403e-06, "loss": 0.0127, "step": 32990 }, { "epoch": 0.5573006383625494, "grad_norm": 0.3522737920284271, "learning_rate": 9.05889991318926e-06, "loss": 0.0138, "step": 33000 }, { "epoch": 0.5574695173438714, "grad_norm": 0.6529216766357422, "learning_rate": 9.058039120878839e-06, "loss": 0.0163, "step": 33010 }, { "epoch": 0.5576383963251934, "grad_norm": 0.2663683593273163, "learning_rate": 9.057177976015923e-06, "loss": 0.0095, "step": 33020 }, { "epoch": 0.5578072753065153, "grad_norm": 0.23393034934997559, "learning_rate": 9.056316478675326e-06, "loss": 0.0131, "step": 33030 }, { "epoch": 0.5579761542878373, "grad_norm": 0.42181453108787537, "learning_rate": 9.055454628931893e-06, "loss": 0.0124, "step": 33040 }, { "epoch": 0.5581450332691593, "grad_norm": 0.22560563683509827, "learning_rate": 9.054592426860498e-06, "loss": 0.0114, "step": 33050 }, { "epoch": 0.5583139122504813, "grad_norm": 0.2737715542316437, "learning_rate": 9.053729872536048e-06, "loss": 0.0115, "step": 33060 }, { "epoch": 0.5584827912318033, "grad_norm": 0.21776412427425385, "learning_rate": 9.052866966033482e-06, "loss": 0.0139, "step": 33070 }, { "epoch": 0.5586516702131252, "grad_norm": 0.49655354022979736, "learning_rate": 9.052003707427763e-06, "loss": 0.0157, "step": 33080 }, { "epoch": 0.5588205491944472, "grad_norm": 0.34138134121894836, "learning_rate": 9.05114009679389e-06, "loss": 0.0118, "step": 33090 }, { "epoch": 0.5589894281757692, "grad_norm": 0.1872914880514145, "learning_rate": 9.050276134206895e-06, "loss": 0.0103, "step": 33100 }, { "epoch": 0.5591583071570912, "grad_norm": 0.42276179790496826, "learning_rate": 9.049411819741834e-06, "loss": 0.0142, "step": 33110 }, { "epoch": 0.5593271861384133, "grad_norm": 0.3550369441509247, "learning_rate": 9.048547153473795e-06, "loss": 0.0136, "step": 33120 }, { "epoch": 0.5594960651197352, "grad_norm": 0.35435038805007935, "learning_rate": 9.0476821354779e-06, "loss": 0.0148, "step": 33130 }, { "epoch": 0.5596649441010572, "grad_norm": 0.6611677408218384, "learning_rate": 9.0468167658293e-06, "loss": 0.0085, "step": 33140 }, { "epoch": 0.5598338230823792, "grad_norm": 0.3018876016139984, "learning_rate": 9.045951044603175e-06, "loss": 0.0171, "step": 33150 }, { "epoch": 0.5600027020637012, "grad_norm": 0.32584285736083984, "learning_rate": 9.045084971874738e-06, "loss": 0.0159, "step": 33160 }, { "epoch": 0.5601715810450232, "grad_norm": 0.20887382328510284, "learning_rate": 9.04421854771923e-06, "loss": 0.0097, "step": 33170 }, { "epoch": 0.5603404600263451, "grad_norm": 0.566470742225647, "learning_rate": 9.043351772211926e-06, "loss": 0.0142, "step": 33180 }, { "epoch": 0.5605093390076671, "grad_norm": 0.38047656416893005, "learning_rate": 9.042484645428126e-06, "loss": 0.0174, "step": 33190 }, { "epoch": 0.5606782179889891, "grad_norm": 0.31474313139915466, "learning_rate": 9.041617167443167e-06, "loss": 0.0113, "step": 33200 }, { "epoch": 0.5608470969703111, "grad_norm": 0.6456449627876282, "learning_rate": 9.04074933833241e-06, "loss": 0.0189, "step": 33210 }, { "epoch": 0.5610159759516331, "grad_norm": 0.5589651465415955, "learning_rate": 9.039881158171252e-06, "loss": 0.0165, "step": 33220 }, { "epoch": 0.561184854932955, "grad_norm": 0.2653728127479553, "learning_rate": 9.03901262703512e-06, "loss": 0.0125, "step": 33230 }, { "epoch": 0.561353733914277, "grad_norm": 0.548275351524353, "learning_rate": 9.038143744999467e-06, "loss": 0.0188, "step": 33240 }, { "epoch": 0.561522612895599, "grad_norm": 0.2459510713815689, "learning_rate": 9.03727451213978e-06, "loss": 0.0161, "step": 33250 }, { "epoch": 0.561691491876921, "grad_norm": 0.2616035044193268, "learning_rate": 9.036404928531577e-06, "loss": 0.0179, "step": 33260 }, { "epoch": 0.561860370858243, "grad_norm": 0.4076712727546692, "learning_rate": 9.035534994250406e-06, "loss": 0.0135, "step": 33270 }, { "epoch": 0.5620292498395649, "grad_norm": 0.7773197293281555, "learning_rate": 9.034664709371841e-06, "loss": 0.0129, "step": 33280 }, { "epoch": 0.5621981288208869, "grad_norm": 0.23376522958278656, "learning_rate": 9.033794073971492e-06, "loss": 0.0151, "step": 33290 }, { "epoch": 0.5623670078022089, "grad_norm": 0.387561559677124, "learning_rate": 9.032923088125e-06, "loss": 0.0168, "step": 33300 }, { "epoch": 0.562535886783531, "grad_norm": 0.33522069454193115, "learning_rate": 9.03205175190803e-06, "loss": 0.0128, "step": 33310 }, { "epoch": 0.562704765764853, "grad_norm": 0.7434704303741455, "learning_rate": 9.031180065396286e-06, "loss": 0.0169, "step": 33320 }, { "epoch": 0.5628736447461749, "grad_norm": 0.3392420709133148, "learning_rate": 9.030308028665495e-06, "loss": 0.016, "step": 33330 }, { "epoch": 0.5630425237274969, "grad_norm": 0.34404444694519043, "learning_rate": 9.02943564179142e-06, "loss": 0.0184, "step": 33340 }, { "epoch": 0.5632114027088189, "grad_norm": 0.4486728608608246, "learning_rate": 9.028562904849847e-06, "loss": 0.0111, "step": 33350 }, { "epoch": 0.5633802816901409, "grad_norm": 0.200127974152565, "learning_rate": 9.027689817916603e-06, "loss": 0.0113, "step": 33360 }, { "epoch": 0.5635491606714629, "grad_norm": 0.320716917514801, "learning_rate": 9.026816381067536e-06, "loss": 0.013, "step": 33370 }, { "epoch": 0.5637180396527848, "grad_norm": 0.3715952932834625, "learning_rate": 9.025942594378528e-06, "loss": 0.0159, "step": 33380 }, { "epoch": 0.5638869186341068, "grad_norm": 0.378153920173645, "learning_rate": 9.025068457925493e-06, "loss": 0.013, "step": 33390 }, { "epoch": 0.5640557976154288, "grad_norm": 0.15989789366722107, "learning_rate": 9.024193971784375e-06, "loss": 0.0118, "step": 33400 }, { "epoch": 0.5642246765967508, "grad_norm": 0.33813712000846863, "learning_rate": 9.023319136031144e-06, "loss": 0.0173, "step": 33410 }, { "epoch": 0.5643935555780728, "grad_norm": 0.5213518738746643, "learning_rate": 9.022443950741804e-06, "loss": 0.0114, "step": 33420 }, { "epoch": 0.5645624345593947, "grad_norm": 0.3260529041290283, "learning_rate": 9.021568415992393e-06, "loss": 0.0128, "step": 33430 }, { "epoch": 0.5647313135407167, "grad_norm": 0.25418972969055176, "learning_rate": 9.020692531858968e-06, "loss": 0.0113, "step": 33440 }, { "epoch": 0.5649001925220387, "grad_norm": 0.27780628204345703, "learning_rate": 9.019816298417632e-06, "loss": 0.0123, "step": 33450 }, { "epoch": 0.5650690715033607, "grad_norm": 0.5685291886329651, "learning_rate": 9.018939715744505e-06, "loss": 0.013, "step": 33460 }, { "epoch": 0.5652379504846827, "grad_norm": 0.20036561787128448, "learning_rate": 9.018062783915742e-06, "loss": 0.0146, "step": 33470 }, { "epoch": 0.5654068294660046, "grad_norm": 0.4739370048046112, "learning_rate": 9.017185503007532e-06, "loss": 0.0133, "step": 33480 }, { "epoch": 0.5655757084473266, "grad_norm": 0.39744237065315247, "learning_rate": 9.016307873096088e-06, "loss": 0.0117, "step": 33490 }, { "epoch": 0.5657445874286486, "grad_norm": 0.6176339387893677, "learning_rate": 9.015429894257658e-06, "loss": 0.0158, "step": 33500 }, { "epoch": 0.5659134664099706, "grad_norm": 0.35638824105262756, "learning_rate": 9.014551566568517e-06, "loss": 0.0086, "step": 33510 }, { "epoch": 0.5660823453912927, "grad_norm": 0.36074551939964294, "learning_rate": 9.013672890104974e-06, "loss": 0.0162, "step": 33520 }, { "epoch": 0.5662512243726145, "grad_norm": 0.2309679538011551, "learning_rate": 9.012793864943366e-06, "loss": 0.0117, "step": 33530 }, { "epoch": 0.5664201033539366, "grad_norm": 0.4988129734992981, "learning_rate": 9.011914491160058e-06, "loss": 0.0179, "step": 33540 }, { "epoch": 0.5665889823352586, "grad_norm": 0.3919214606285095, "learning_rate": 9.01103476883145e-06, "loss": 0.0168, "step": 33550 }, { "epoch": 0.5667578613165806, "grad_norm": 0.12818431854248047, "learning_rate": 9.010154698033973e-06, "loss": 0.0146, "step": 33560 }, { "epoch": 0.5669267402979026, "grad_norm": 0.2996361255645752, "learning_rate": 9.009274278844077e-06, "loss": 0.0129, "step": 33570 }, { "epoch": 0.5670956192792245, "grad_norm": 0.7740024328231812, "learning_rate": 9.00839351133826e-06, "loss": 0.0176, "step": 33580 }, { "epoch": 0.5672644982605465, "grad_norm": 0.18302391469478607, "learning_rate": 9.007512395593036e-06, "loss": 0.0184, "step": 33590 }, { "epoch": 0.5674333772418685, "grad_norm": 0.2744663655757904, "learning_rate": 9.006630931684954e-06, "loss": 0.0115, "step": 33600 }, { "epoch": 0.5676022562231905, "grad_norm": 0.4681263566017151, "learning_rate": 9.005749119690594e-06, "loss": 0.0126, "step": 33610 }, { "epoch": 0.5677711352045125, "grad_norm": 0.19204381108283997, "learning_rate": 9.004866959686569e-06, "loss": 0.0176, "step": 33620 }, { "epoch": 0.5679400141858344, "grad_norm": 0.2825409471988678, "learning_rate": 9.003984451749512e-06, "loss": 0.0186, "step": 33630 }, { "epoch": 0.5681088931671564, "grad_norm": 0.34315797686576843, "learning_rate": 9.0031015959561e-06, "loss": 0.0151, "step": 33640 }, { "epoch": 0.5682777721484784, "grad_norm": 0.2925620973110199, "learning_rate": 9.002218392383028e-06, "loss": 0.0111, "step": 33650 }, { "epoch": 0.5684466511298004, "grad_norm": 0.2545560896396637, "learning_rate": 9.001334841107033e-06, "loss": 0.0125, "step": 33660 }, { "epoch": 0.5686155301111224, "grad_norm": 0.383556067943573, "learning_rate": 9.000450942204867e-06, "loss": 0.0113, "step": 33670 }, { "epoch": 0.5687844090924443, "grad_norm": 0.21399599313735962, "learning_rate": 8.999566695753328e-06, "loss": 0.0117, "step": 33680 }, { "epoch": 0.5689532880737663, "grad_norm": 0.23803631961345673, "learning_rate": 8.998682101829235e-06, "loss": 0.0139, "step": 33690 }, { "epoch": 0.5691221670550883, "grad_norm": 0.11417369544506073, "learning_rate": 8.99779716050944e-06, "loss": 0.0105, "step": 33700 }, { "epoch": 0.5692910460364103, "grad_norm": 0.256244421005249, "learning_rate": 8.996911871870823e-06, "loss": 0.0104, "step": 33710 }, { "epoch": 0.5694599250177322, "grad_norm": 0.22586345672607422, "learning_rate": 8.996026235990295e-06, "loss": 0.0104, "step": 33720 }, { "epoch": 0.5696288039990542, "grad_norm": 0.3727395534515381, "learning_rate": 8.995140252944803e-06, "loss": 0.0122, "step": 33730 }, { "epoch": 0.5697976829803763, "grad_norm": 0.3635350167751312, "learning_rate": 8.994253922811313e-06, "loss": 0.0117, "step": 33740 }, { "epoch": 0.5699665619616983, "grad_norm": 0.2900923490524292, "learning_rate": 8.993367245666828e-06, "loss": 0.0094, "step": 33750 }, { "epoch": 0.5701354409430203, "grad_norm": 0.29732850193977356, "learning_rate": 8.992480221588384e-06, "loss": 0.0179, "step": 33760 }, { "epoch": 0.5703043199243422, "grad_norm": 0.41331636905670166, "learning_rate": 8.991592850653041e-06, "loss": 0.0126, "step": 33770 }, { "epoch": 0.5704731989056642, "grad_norm": 0.3617541491985321, "learning_rate": 8.990705132937893e-06, "loss": 0.0138, "step": 33780 }, { "epoch": 0.5706420778869862, "grad_norm": 0.8432496190071106, "learning_rate": 8.98981706852006e-06, "loss": 0.0161, "step": 33790 }, { "epoch": 0.5708109568683082, "grad_norm": 0.24652421474456787, "learning_rate": 8.988928657476699e-06, "loss": 0.0084, "step": 33800 }, { "epoch": 0.5709798358496302, "grad_norm": 0.30083340406417847, "learning_rate": 8.988039899884989e-06, "loss": 0.0149, "step": 33810 }, { "epoch": 0.5711487148309521, "grad_norm": 0.5609596967697144, "learning_rate": 8.987150795822146e-06, "loss": 0.0109, "step": 33820 }, { "epoch": 0.5713175938122741, "grad_norm": 0.3378518521785736, "learning_rate": 8.98626134536541e-06, "loss": 0.0129, "step": 33830 }, { "epoch": 0.5714864727935961, "grad_norm": 0.39024966955184937, "learning_rate": 8.985371548592057e-06, "loss": 0.0141, "step": 33840 }, { "epoch": 0.5716553517749181, "grad_norm": 0.32702741026878357, "learning_rate": 8.984481405579389e-06, "loss": 0.012, "step": 33850 }, { "epoch": 0.5718242307562401, "grad_norm": 0.7394389510154724, "learning_rate": 8.983590916404741e-06, "loss": 0.0089, "step": 33860 }, { "epoch": 0.571993109737562, "grad_norm": 0.3861747980117798, "learning_rate": 8.982700081145475e-06, "loss": 0.0157, "step": 33870 }, { "epoch": 0.572161988718884, "grad_norm": 0.27691277861595154, "learning_rate": 8.981808899878985e-06, "loss": 0.0134, "step": 33880 }, { "epoch": 0.572330867700206, "grad_norm": 0.4107409417629242, "learning_rate": 8.980917372682693e-06, "loss": 0.016, "step": 33890 }, { "epoch": 0.572499746681528, "grad_norm": 0.30998775362968445, "learning_rate": 8.980025499634058e-06, "loss": 0.011, "step": 33900 }, { "epoch": 0.57266862566285, "grad_norm": 0.18609938025474548, "learning_rate": 8.979133280810557e-06, "loss": 0.0123, "step": 33910 }, { "epoch": 0.5728375046441719, "grad_norm": 0.22462032735347748, "learning_rate": 8.978240716289708e-06, "loss": 0.0144, "step": 33920 }, { "epoch": 0.5730063836254939, "grad_norm": 0.364920973777771, "learning_rate": 8.977347806149055e-06, "loss": 0.0151, "step": 33930 }, { "epoch": 0.573175262606816, "grad_norm": 0.5219033360481262, "learning_rate": 8.976454550466169e-06, "loss": 0.0138, "step": 33940 }, { "epoch": 0.573344141588138, "grad_norm": 0.5134621858596802, "learning_rate": 8.975560949318655e-06, "loss": 0.013, "step": 33950 }, { "epoch": 0.57351302056946, "grad_norm": 0.5141118764877319, "learning_rate": 8.97466700278415e-06, "loss": 0.0141, "step": 33960 }, { "epoch": 0.5736818995507819, "grad_norm": 0.49783238768577576, "learning_rate": 8.973772710940313e-06, "loss": 0.0157, "step": 33970 }, { "epoch": 0.5738507785321039, "grad_norm": 0.3783746063709259, "learning_rate": 8.97287807386484e-06, "loss": 0.0151, "step": 33980 }, { "epoch": 0.5740196575134259, "grad_norm": 0.5560950040817261, "learning_rate": 8.971983091635456e-06, "loss": 0.0111, "step": 33990 }, { "epoch": 0.5741885364947479, "grad_norm": 0.37697237730026245, "learning_rate": 8.971087764329913e-06, "loss": 0.0155, "step": 34000 }, { "epoch": 0.5743574154760699, "grad_norm": 0.23552700877189636, "learning_rate": 8.970192092025995e-06, "loss": 0.0097, "step": 34010 }, { "epoch": 0.5745262944573918, "grad_norm": 0.24793054163455963, "learning_rate": 8.969296074801518e-06, "loss": 0.0134, "step": 34020 }, { "epoch": 0.5746951734387138, "grad_norm": 0.3029188811779022, "learning_rate": 8.968399712734324e-06, "loss": 0.0142, "step": 34030 }, { "epoch": 0.5748640524200358, "grad_norm": 0.2985615134239197, "learning_rate": 8.967503005902286e-06, "loss": 0.0113, "step": 34040 }, { "epoch": 0.5750329314013578, "grad_norm": 0.33892694115638733, "learning_rate": 8.966605954383311e-06, "loss": 0.0183, "step": 34050 }, { "epoch": 0.5752018103826798, "grad_norm": 0.532491147518158, "learning_rate": 8.965708558255329e-06, "loss": 0.0138, "step": 34060 }, { "epoch": 0.5753706893640017, "grad_norm": 0.19450317323207855, "learning_rate": 8.964810817596306e-06, "loss": 0.0105, "step": 34070 }, { "epoch": 0.5755395683453237, "grad_norm": 0.39776158332824707, "learning_rate": 8.963912732484232e-06, "loss": 0.0175, "step": 34080 }, { "epoch": 0.5757084473266457, "grad_norm": 0.5135356783866882, "learning_rate": 8.963014302997135e-06, "loss": 0.0133, "step": 34090 }, { "epoch": 0.5758773263079677, "grad_norm": 0.5112438797950745, "learning_rate": 8.962115529213064e-06, "loss": 0.0161, "step": 34100 }, { "epoch": 0.5760462052892897, "grad_norm": 0.67034912109375, "learning_rate": 8.961216411210107e-06, "loss": 0.0169, "step": 34110 }, { "epoch": 0.5762150842706116, "grad_norm": 0.21989841759204865, "learning_rate": 8.960316949066375e-06, "loss": 0.0094, "step": 34120 }, { "epoch": 0.5763839632519336, "grad_norm": 0.26306477189064026, "learning_rate": 8.959417142860007e-06, "loss": 0.0108, "step": 34130 }, { "epoch": 0.5765528422332556, "grad_norm": 0.38999733328819275, "learning_rate": 8.958516992669183e-06, "loss": 0.0144, "step": 34140 }, { "epoch": 0.5767217212145777, "grad_norm": 0.33775269985198975, "learning_rate": 8.957616498572101e-06, "loss": 0.0167, "step": 34150 }, { "epoch": 0.5768906001958997, "grad_norm": 0.22331936657428741, "learning_rate": 8.956715660646996e-06, "loss": 0.0081, "step": 34160 }, { "epoch": 0.5770594791772216, "grad_norm": 0.3733069896697998, "learning_rate": 8.95581447897213e-06, "loss": 0.0164, "step": 34170 }, { "epoch": 0.5772283581585436, "grad_norm": 0.37304946780204773, "learning_rate": 8.954912953625795e-06, "loss": 0.0157, "step": 34180 }, { "epoch": 0.5773972371398656, "grad_norm": 0.584617555141449, "learning_rate": 8.954011084686315e-06, "loss": 0.0155, "step": 34190 }, { "epoch": 0.5775661161211876, "grad_norm": 0.66473388671875, "learning_rate": 8.953108872232038e-06, "loss": 0.0173, "step": 34200 }, { "epoch": 0.5777349951025096, "grad_norm": 0.22316719591617584, "learning_rate": 8.95220631634135e-06, "loss": 0.0096, "step": 34210 }, { "epoch": 0.5779038740838315, "grad_norm": 0.28325214982032776, "learning_rate": 8.951303417092662e-06, "loss": 0.0126, "step": 34220 }, { "epoch": 0.5780727530651535, "grad_norm": 0.4342688322067261, "learning_rate": 8.950400174564415e-06, "loss": 0.0188, "step": 34230 }, { "epoch": 0.5782416320464755, "grad_norm": 0.017115410417318344, "learning_rate": 8.94949658883508e-06, "loss": 0.012, "step": 34240 }, { "epoch": 0.5784105110277975, "grad_norm": 0.575115978717804, "learning_rate": 8.94859265998316e-06, "loss": 0.0239, "step": 34250 }, { "epoch": 0.5785793900091195, "grad_norm": 0.28768643736839294, "learning_rate": 8.947688388087186e-06, "loss": 0.0144, "step": 34260 }, { "epoch": 0.5787482689904414, "grad_norm": 0.4072001278400421, "learning_rate": 8.946783773225717e-06, "loss": 0.0129, "step": 34270 }, { "epoch": 0.5789171479717634, "grad_norm": 0.3251725733280182, "learning_rate": 8.945878815477345e-06, "loss": 0.0104, "step": 34280 }, { "epoch": 0.5790860269530854, "grad_norm": 0.4600308835506439, "learning_rate": 8.944973514920691e-06, "loss": 0.0161, "step": 34290 }, { "epoch": 0.5792549059344074, "grad_norm": 0.3207756280899048, "learning_rate": 8.944067871634404e-06, "loss": 0.0158, "step": 34300 }, { "epoch": 0.5794237849157294, "grad_norm": 0.428150475025177, "learning_rate": 8.943161885697165e-06, "loss": 0.0185, "step": 34310 }, { "epoch": 0.5795926638970513, "grad_norm": 0.295407772064209, "learning_rate": 8.942255557187684e-06, "loss": 0.0206, "step": 34320 }, { "epoch": 0.5797615428783733, "grad_norm": 0.2532251477241516, "learning_rate": 8.9413488861847e-06, "loss": 0.0136, "step": 34330 }, { "epoch": 0.5799304218596953, "grad_norm": 0.4959830045700073, "learning_rate": 8.940441872766981e-06, "loss": 0.0104, "step": 34340 }, { "epoch": 0.5800993008410174, "grad_norm": 0.35530513525009155, "learning_rate": 8.93953451701333e-06, "loss": 0.0113, "step": 34350 }, { "epoch": 0.5802681798223394, "grad_norm": 0.4104394316673279, "learning_rate": 8.938626819002571e-06, "loss": 0.0123, "step": 34360 }, { "epoch": 0.5804370588036613, "grad_norm": 0.4915297031402588, "learning_rate": 8.937718778813568e-06, "loss": 0.0128, "step": 34370 }, { "epoch": 0.5806059377849833, "grad_norm": 0.3764171898365021, "learning_rate": 8.936810396525204e-06, "loss": 0.0169, "step": 34380 }, { "epoch": 0.5807748167663053, "grad_norm": 0.3386439383029938, "learning_rate": 8.9359016722164e-06, "loss": 0.0109, "step": 34390 }, { "epoch": 0.5809436957476273, "grad_norm": 0.48123499751091003, "learning_rate": 8.934992605966105e-06, "loss": 0.0101, "step": 34400 }, { "epoch": 0.5811125747289493, "grad_norm": 0.32396143674850464, "learning_rate": 8.934083197853293e-06, "loss": 0.0114, "step": 34410 }, { "epoch": 0.5812814537102712, "grad_norm": 0.26534754037857056, "learning_rate": 8.933173447956973e-06, "loss": 0.0156, "step": 34420 }, { "epoch": 0.5814503326915932, "grad_norm": 0.5500498414039612, "learning_rate": 8.932263356356181e-06, "loss": 0.0215, "step": 34430 }, { "epoch": 0.5816192116729152, "grad_norm": 0.6007277369499207, "learning_rate": 8.931352923129985e-06, "loss": 0.012, "step": 34440 }, { "epoch": 0.5817880906542372, "grad_norm": 0.4230002760887146, "learning_rate": 8.93044214835748e-06, "loss": 0.0176, "step": 34450 }, { "epoch": 0.5819569696355592, "grad_norm": 0.5334902405738831, "learning_rate": 8.929531032117792e-06, "loss": 0.0121, "step": 34460 }, { "epoch": 0.5821258486168811, "grad_norm": 0.5950012803077698, "learning_rate": 8.928619574490076e-06, "loss": 0.0146, "step": 34470 }, { "epoch": 0.5822947275982031, "grad_norm": 0.21652959287166595, "learning_rate": 8.927707775553519e-06, "loss": 0.0094, "step": 34480 }, { "epoch": 0.5824636065795251, "grad_norm": 0.27369582653045654, "learning_rate": 8.926795635387336e-06, "loss": 0.013, "step": 34490 }, { "epoch": 0.5826324855608471, "grad_norm": 0.2634276747703552, "learning_rate": 8.925883154070769e-06, "loss": 0.0131, "step": 34500 }, { "epoch": 0.5828013645421691, "grad_norm": 0.2805032432079315, "learning_rate": 8.924970331683092e-06, "loss": 0.0172, "step": 34510 }, { "epoch": 0.582970243523491, "grad_norm": 0.27422329783439636, "learning_rate": 8.92405716830361e-06, "loss": 0.011, "step": 34520 }, { "epoch": 0.583139122504813, "grad_norm": 0.4107745289802551, "learning_rate": 8.92314366401166e-06, "loss": 0.0141, "step": 34530 }, { "epoch": 0.583308001486135, "grad_norm": 0.30471155047416687, "learning_rate": 8.922229818886602e-06, "loss": 0.0103, "step": 34540 }, { "epoch": 0.583476880467457, "grad_norm": 0.23070016503334045, "learning_rate": 8.921315633007825e-06, "loss": 0.0086, "step": 34550 }, { "epoch": 0.5836457594487791, "grad_norm": 0.4586946368217468, "learning_rate": 8.920401106454755e-06, "loss": 0.0134, "step": 34560 }, { "epoch": 0.583814638430101, "grad_norm": 0.27155807614326477, "learning_rate": 8.919486239306845e-06, "loss": 0.0151, "step": 34570 }, { "epoch": 0.583983517411423, "grad_norm": 0.6994673609733582, "learning_rate": 8.918571031643572e-06, "loss": 0.0135, "step": 34580 }, { "epoch": 0.584152396392745, "grad_norm": 0.2086217850446701, "learning_rate": 8.917655483544451e-06, "loss": 0.0126, "step": 34590 }, { "epoch": 0.584321275374067, "grad_norm": 0.7660681009292603, "learning_rate": 8.916739595089023e-06, "loss": 0.0098, "step": 34600 }, { "epoch": 0.584490154355389, "grad_norm": 0.26405754685401917, "learning_rate": 8.915823366356856e-06, "loss": 0.015, "step": 34610 }, { "epoch": 0.5846590333367109, "grad_norm": 0.18680161237716675, "learning_rate": 8.91490679742755e-06, "loss": 0.0105, "step": 34620 }, { "epoch": 0.5848279123180329, "grad_norm": 0.45336055755615234, "learning_rate": 8.913989888380733e-06, "loss": 0.0105, "step": 34630 }, { "epoch": 0.5849967912993549, "grad_norm": 0.3167678415775299, "learning_rate": 8.913072639296065e-06, "loss": 0.0131, "step": 34640 }, { "epoch": 0.5851656702806769, "grad_norm": 0.49092718958854675, "learning_rate": 8.912155050253238e-06, "loss": 0.0123, "step": 34650 }, { "epoch": 0.5853345492619989, "grad_norm": 0.3791387975215912, "learning_rate": 8.911237121331962e-06, "loss": 0.0168, "step": 34660 }, { "epoch": 0.5855034282433208, "grad_norm": 0.21681010723114014, "learning_rate": 8.910318852611992e-06, "loss": 0.0129, "step": 34670 }, { "epoch": 0.5856723072246428, "grad_norm": 0.2903102934360504, "learning_rate": 8.909400244173102e-06, "loss": 0.0106, "step": 34680 }, { "epoch": 0.5858411862059648, "grad_norm": 0.32834547758102417, "learning_rate": 8.908481296095096e-06, "loss": 0.0127, "step": 34690 }, { "epoch": 0.5860100651872868, "grad_norm": 0.32214128971099854, "learning_rate": 8.907562008457812e-06, "loss": 0.0125, "step": 34700 }, { "epoch": 0.5861789441686088, "grad_norm": 0.2324737012386322, "learning_rate": 8.906642381341118e-06, "loss": 0.0141, "step": 34710 }, { "epoch": 0.5863478231499307, "grad_norm": 0.23856808245182037, "learning_rate": 8.905722414824906e-06, "loss": 0.0158, "step": 34720 }, { "epoch": 0.5865167021312527, "grad_norm": 0.1706516295671463, "learning_rate": 8.904802108989102e-06, "loss": 0.0119, "step": 34730 }, { "epoch": 0.5866855811125747, "grad_norm": 0.3160979449748993, "learning_rate": 8.903881463913658e-06, "loss": 0.0132, "step": 34740 }, { "epoch": 0.5868544600938967, "grad_norm": 0.44641515612602234, "learning_rate": 8.902960479678558e-06, "loss": 0.0106, "step": 34750 }, { "epoch": 0.5870233390752188, "grad_norm": 0.35576921701431274, "learning_rate": 8.902039156363814e-06, "loss": 0.0127, "step": 34760 }, { "epoch": 0.5871922180565406, "grad_norm": 0.2899986803531647, "learning_rate": 8.901117494049471e-06, "loss": 0.0151, "step": 34770 }, { "epoch": 0.5873610970378627, "grad_norm": 0.12945765256881714, "learning_rate": 8.9001954928156e-06, "loss": 0.0121, "step": 34780 }, { "epoch": 0.5875299760191847, "grad_norm": 0.29490843415260315, "learning_rate": 8.899273152742301e-06, "loss": 0.0105, "step": 34790 }, { "epoch": 0.5876988550005067, "grad_norm": 0.3057744801044464, "learning_rate": 8.898350473909705e-06, "loss": 0.0164, "step": 34800 }, { "epoch": 0.5878677339818286, "grad_norm": 0.6646566987037659, "learning_rate": 8.897427456397972e-06, "loss": 0.0155, "step": 34810 }, { "epoch": 0.5880366129631506, "grad_norm": 0.3042181134223938, "learning_rate": 8.896504100287293e-06, "loss": 0.0167, "step": 34820 }, { "epoch": 0.5882054919444726, "grad_norm": 0.3604864776134491, "learning_rate": 8.895580405657884e-06, "loss": 0.0159, "step": 34830 }, { "epoch": 0.5883743709257946, "grad_norm": 0.4550817906856537, "learning_rate": 8.894656372589996e-06, "loss": 0.0159, "step": 34840 }, { "epoch": 0.5885432499071166, "grad_norm": 0.24346275627613068, "learning_rate": 8.893732001163905e-06, "loss": 0.0147, "step": 34850 }, { "epoch": 0.5887121288884385, "grad_norm": 0.3486877381801605, "learning_rate": 8.892807291459919e-06, "loss": 0.0139, "step": 34860 }, { "epoch": 0.5888810078697605, "grad_norm": 0.2704324722290039, "learning_rate": 8.891882243558374e-06, "loss": 0.0121, "step": 34870 }, { "epoch": 0.5890498868510825, "grad_norm": 0.4430345594882965, "learning_rate": 8.890956857539637e-06, "loss": 0.0127, "step": 34880 }, { "epoch": 0.5892187658324045, "grad_norm": 0.26114949584007263, "learning_rate": 8.890031133484103e-06, "loss": 0.0134, "step": 34890 }, { "epoch": 0.5893876448137265, "grad_norm": 0.31391459703445435, "learning_rate": 8.889105071472194e-06, "loss": 0.0115, "step": 34900 }, { "epoch": 0.5895565237950484, "grad_norm": 0.3124453127384186, "learning_rate": 8.888178671584369e-06, "loss": 0.0134, "step": 34910 }, { "epoch": 0.5897254027763704, "grad_norm": 0.3871319591999054, "learning_rate": 8.887251933901107e-06, "loss": 0.0131, "step": 34920 }, { "epoch": 0.5898942817576924, "grad_norm": 0.34666624665260315, "learning_rate": 8.886324858502923e-06, "loss": 0.0173, "step": 34930 }, { "epoch": 0.5900631607390144, "grad_norm": 0.20694296061992645, "learning_rate": 8.885397445470358e-06, "loss": 0.0121, "step": 34940 }, { "epoch": 0.5902320397203364, "grad_norm": 0.28022637963294983, "learning_rate": 8.884469694883981e-06, "loss": 0.0138, "step": 34950 }, { "epoch": 0.5904009187016583, "grad_norm": 0.3608654737472534, "learning_rate": 8.8835416068244e-06, "loss": 0.0174, "step": 34960 }, { "epoch": 0.5905697976829803, "grad_norm": 0.5648422837257385, "learning_rate": 8.882613181372237e-06, "loss": 0.0179, "step": 34970 }, { "epoch": 0.5907386766643024, "grad_norm": 0.35864126682281494, "learning_rate": 8.881684418608154e-06, "loss": 0.0145, "step": 34980 }, { "epoch": 0.5909075556456244, "grad_norm": 0.2777981460094452, "learning_rate": 8.880755318612844e-06, "loss": 0.0155, "step": 34990 }, { "epoch": 0.5910764346269464, "grad_norm": 0.23029805719852448, "learning_rate": 8.879825881467018e-06, "loss": 0.0093, "step": 35000 }, { "epoch": 0.5912453136082683, "grad_norm": 0.3533613979816437, "learning_rate": 8.878896107251428e-06, "loss": 0.0123, "step": 35010 }, { "epoch": 0.5914141925895903, "grad_norm": 0.21461203694343567, "learning_rate": 8.877965996046849e-06, "loss": 0.0153, "step": 35020 }, { "epoch": 0.5915830715709123, "grad_norm": 0.18956726789474487, "learning_rate": 8.877035547934086e-06, "loss": 0.0152, "step": 35030 }, { "epoch": 0.5917519505522343, "grad_norm": 0.15679365396499634, "learning_rate": 8.876104762993974e-06, "loss": 0.0094, "step": 35040 }, { "epoch": 0.5919208295335563, "grad_norm": 0.16133059561252594, "learning_rate": 8.87517364130738e-06, "loss": 0.0091, "step": 35050 }, { "epoch": 0.5920897085148782, "grad_norm": 0.23049746453762054, "learning_rate": 8.874242182955194e-06, "loss": 0.0118, "step": 35060 }, { "epoch": 0.5922585874962002, "grad_norm": 0.46959057450294495, "learning_rate": 8.873310388018342e-06, "loss": 0.0123, "step": 35070 }, { "epoch": 0.5924274664775222, "grad_norm": 0.5337226986885071, "learning_rate": 8.872378256577774e-06, "loss": 0.0169, "step": 35080 }, { "epoch": 0.5925963454588442, "grad_norm": 0.2498755156993866, "learning_rate": 8.87144578871447e-06, "loss": 0.0147, "step": 35090 }, { "epoch": 0.5927652244401662, "grad_norm": 0.20205648243427277, "learning_rate": 8.870512984509444e-06, "loss": 0.0113, "step": 35100 }, { "epoch": 0.5929341034214881, "grad_norm": 0.4444373846054077, "learning_rate": 8.869579844043736e-06, "loss": 0.0182, "step": 35110 }, { "epoch": 0.5931029824028101, "grad_norm": 0.34532982110977173, "learning_rate": 8.86864636739841e-06, "loss": 0.017, "step": 35120 }, { "epoch": 0.5932718613841321, "grad_norm": 0.2567788064479828, "learning_rate": 8.867712554654568e-06, "loss": 0.011, "step": 35130 }, { "epoch": 0.5934407403654541, "grad_norm": 0.339938759803772, "learning_rate": 8.866778405893338e-06, "loss": 0.0099, "step": 35140 }, { "epoch": 0.5936096193467761, "grad_norm": 0.24847541749477386, "learning_rate": 8.865843921195874e-06, "loss": 0.0133, "step": 35150 }, { "epoch": 0.593778498328098, "grad_norm": 0.25056397914886475, "learning_rate": 8.864909100643362e-06, "loss": 0.0095, "step": 35160 }, { "epoch": 0.59394737730942, "grad_norm": 0.5740976929664612, "learning_rate": 8.86397394431702e-06, "loss": 0.0107, "step": 35170 }, { "epoch": 0.594116256290742, "grad_norm": 0.3389330506324768, "learning_rate": 8.863038452298088e-06, "loss": 0.0141, "step": 35180 }, { "epoch": 0.5942851352720641, "grad_norm": 0.4372316002845764, "learning_rate": 8.862102624667844e-06, "loss": 0.0205, "step": 35190 }, { "epoch": 0.5944540142533861, "grad_norm": 0.26352471113204956, "learning_rate": 8.861166461507585e-06, "loss": 0.0138, "step": 35200 }, { "epoch": 0.594622893234708, "grad_norm": 0.3727920353412628, "learning_rate": 8.860229962898645e-06, "loss": 0.0148, "step": 35210 }, { "epoch": 0.59479177221603, "grad_norm": 0.11757022142410278, "learning_rate": 8.859293128922386e-06, "loss": 0.0101, "step": 35220 }, { "epoch": 0.594960651197352, "grad_norm": 0.2992539405822754, "learning_rate": 8.858355959660196e-06, "loss": 0.0103, "step": 35230 }, { "epoch": 0.595129530178674, "grad_norm": 0.5973184108734131, "learning_rate": 8.857418455193493e-06, "loss": 0.0158, "step": 35240 }, { "epoch": 0.595298409159996, "grad_norm": 0.4014204144477844, "learning_rate": 8.85648061560373e-06, "loss": 0.0129, "step": 35250 }, { "epoch": 0.5954672881413179, "grad_norm": 0.7198635935783386, "learning_rate": 8.85554244097238e-06, "loss": 0.0122, "step": 35260 }, { "epoch": 0.5956361671226399, "grad_norm": 0.4030918776988983, "learning_rate": 8.854603931380949e-06, "loss": 0.0135, "step": 35270 }, { "epoch": 0.5958050461039619, "grad_norm": 0.2576174736022949, "learning_rate": 8.853665086910975e-06, "loss": 0.0233, "step": 35280 }, { "epoch": 0.5959739250852839, "grad_norm": 0.35581570863723755, "learning_rate": 8.85272590764402e-06, "loss": 0.0109, "step": 35290 }, { "epoch": 0.5961428040666059, "grad_norm": 0.2627013027667999, "learning_rate": 8.851786393661678e-06, "loss": 0.0124, "step": 35300 }, { "epoch": 0.5963116830479278, "grad_norm": 0.3657463788986206, "learning_rate": 8.850846545045575e-06, "loss": 0.0119, "step": 35310 }, { "epoch": 0.5964805620292498, "grad_norm": 0.2572197914123535, "learning_rate": 8.849906361877359e-06, "loss": 0.013, "step": 35320 }, { "epoch": 0.5966494410105718, "grad_norm": 0.30862900614738464, "learning_rate": 8.848965844238711e-06, "loss": 0.0123, "step": 35330 }, { "epoch": 0.5968183199918938, "grad_norm": 0.2943691611289978, "learning_rate": 8.848024992211343e-06, "loss": 0.0137, "step": 35340 }, { "epoch": 0.5969871989732158, "grad_norm": 0.20602825284004211, "learning_rate": 8.847083805876992e-06, "loss": 0.0106, "step": 35350 }, { "epoch": 0.5971560779545377, "grad_norm": 0.15277300775051117, "learning_rate": 8.846142285317429e-06, "loss": 0.0118, "step": 35360 }, { "epoch": 0.5973249569358597, "grad_norm": 0.4201198220252991, "learning_rate": 8.845200430614447e-06, "loss": 0.0153, "step": 35370 }, { "epoch": 0.5974938359171817, "grad_norm": 0.27402162551879883, "learning_rate": 8.844258241849873e-06, "loss": 0.0118, "step": 35380 }, { "epoch": 0.5976627148985038, "grad_norm": 0.509698212146759, "learning_rate": 8.843315719105564e-06, "loss": 0.0126, "step": 35390 }, { "epoch": 0.5978315938798258, "grad_norm": 0.3371243476867676, "learning_rate": 8.842372862463403e-06, "loss": 0.0173, "step": 35400 }, { "epoch": 0.5980004728611477, "grad_norm": 0.3160133957862854, "learning_rate": 8.841429672005302e-06, "loss": 0.0126, "step": 35410 }, { "epoch": 0.5981693518424697, "grad_norm": 0.15287983417510986, "learning_rate": 8.840486147813205e-06, "loss": 0.0102, "step": 35420 }, { "epoch": 0.5983382308237917, "grad_norm": 0.2878815829753876, "learning_rate": 8.83954228996908e-06, "loss": 0.0107, "step": 35430 }, { "epoch": 0.5985071098051137, "grad_norm": 0.372206449508667, "learning_rate": 8.838598098554931e-06, "loss": 0.0101, "step": 35440 }, { "epoch": 0.5986759887864357, "grad_norm": 0.3006468117237091, "learning_rate": 8.837653573652787e-06, "loss": 0.0117, "step": 35450 }, { "epoch": 0.5988448677677576, "grad_norm": 0.28710126876831055, "learning_rate": 8.836708715344703e-06, "loss": 0.0097, "step": 35460 }, { "epoch": 0.5990137467490796, "grad_norm": 0.2625959813594818, "learning_rate": 8.835763523712765e-06, "loss": 0.0125, "step": 35470 }, { "epoch": 0.5991826257304016, "grad_norm": 0.536334753036499, "learning_rate": 8.834817998839092e-06, "loss": 0.0122, "step": 35480 }, { "epoch": 0.5993515047117236, "grad_norm": 0.5782738327980042, "learning_rate": 8.833872140805828e-06, "loss": 0.0124, "step": 35490 }, { "epoch": 0.5995203836930456, "grad_norm": 0.20416350662708282, "learning_rate": 8.832925949695148e-06, "loss": 0.0109, "step": 35500 }, { "epoch": 0.5996892626743675, "grad_norm": 0.3872024416923523, "learning_rate": 8.831979425589253e-06, "loss": 0.0154, "step": 35510 }, { "epoch": 0.5998581416556895, "grad_norm": 0.17295877635478973, "learning_rate": 8.831032568570374e-06, "loss": 0.0123, "step": 35520 }, { "epoch": 0.6000270206370115, "grad_norm": 0.25782379508018494, "learning_rate": 8.830085378720775e-06, "loss": 0.0142, "step": 35530 }, { "epoch": 0.6001958996183335, "grad_norm": 0.41551050543785095, "learning_rate": 8.829137856122742e-06, "loss": 0.0154, "step": 35540 }, { "epoch": 0.6003647785996555, "grad_norm": 0.5353936553001404, "learning_rate": 8.828190000858596e-06, "loss": 0.0135, "step": 35550 }, { "epoch": 0.6005336575809774, "grad_norm": 0.3239074945449829, "learning_rate": 8.827241813010683e-06, "loss": 0.013, "step": 35560 }, { "epoch": 0.6007025365622994, "grad_norm": 0.27426329255104065, "learning_rate": 8.826293292661379e-06, "loss": 0.0163, "step": 35570 }, { "epoch": 0.6008714155436214, "grad_norm": 0.24393023550510406, "learning_rate": 8.82534443989309e-06, "loss": 0.0138, "step": 35580 }, { "epoch": 0.6010402945249435, "grad_norm": 0.22658607363700867, "learning_rate": 8.82439525478825e-06, "loss": 0.0104, "step": 35590 }, { "epoch": 0.6012091735062655, "grad_norm": 0.196907639503479, "learning_rate": 8.823445737429319e-06, "loss": 0.0119, "step": 35600 }, { "epoch": 0.6013780524875874, "grad_norm": 0.1250912845134735, "learning_rate": 8.822495887898794e-06, "loss": 0.0168, "step": 35610 }, { "epoch": 0.6015469314689094, "grad_norm": 0.5971598029136658, "learning_rate": 8.821545706279192e-06, "loss": 0.0154, "step": 35620 }, { "epoch": 0.6017158104502314, "grad_norm": 0.3286975920200348, "learning_rate": 8.820595192653063e-06, "loss": 0.0136, "step": 35630 }, { "epoch": 0.6018846894315534, "grad_norm": 0.4142327308654785, "learning_rate": 8.819644347102985e-06, "loss": 0.0143, "step": 35640 }, { "epoch": 0.6020535684128754, "grad_norm": 0.0911472886800766, "learning_rate": 8.818693169711567e-06, "loss": 0.0121, "step": 35650 }, { "epoch": 0.6022224473941973, "grad_norm": 0.1978856325149536, "learning_rate": 8.817741660561443e-06, "loss": 0.0132, "step": 35660 }, { "epoch": 0.6023913263755193, "grad_norm": 0.3920150697231293, "learning_rate": 8.81678981973528e-06, "loss": 0.0104, "step": 35670 }, { "epoch": 0.6025602053568413, "grad_norm": 0.37247732281684875, "learning_rate": 8.815837647315767e-06, "loss": 0.0107, "step": 35680 }, { "epoch": 0.6027290843381633, "grad_norm": 0.32894349098205566, "learning_rate": 8.81488514338563e-06, "loss": 0.011, "step": 35690 }, { "epoch": 0.6028979633194853, "grad_norm": 0.34249183535575867, "learning_rate": 8.813932308027622e-06, "loss": 0.0128, "step": 35700 }, { "epoch": 0.6030668423008072, "grad_norm": 0.4146033227443695, "learning_rate": 8.812979141324518e-06, "loss": 0.0111, "step": 35710 }, { "epoch": 0.6032357212821292, "grad_norm": 0.3750793933868408, "learning_rate": 8.81202564335913e-06, "loss": 0.0124, "step": 35720 }, { "epoch": 0.6034046002634512, "grad_norm": 0.3861348032951355, "learning_rate": 8.811071814214297e-06, "loss": 0.0108, "step": 35730 }, { "epoch": 0.6035734792447732, "grad_norm": 0.32238584756851196, "learning_rate": 8.81011765397288e-06, "loss": 0.019, "step": 35740 }, { "epoch": 0.6037423582260952, "grad_norm": 0.29884716868400574, "learning_rate": 8.80916316271778e-06, "loss": 0.0112, "step": 35750 }, { "epoch": 0.6039112372074171, "grad_norm": 0.2990702986717224, "learning_rate": 8.808208340531919e-06, "loss": 0.0116, "step": 35760 }, { "epoch": 0.6040801161887391, "grad_norm": 0.5690932869911194, "learning_rate": 8.807253187498247e-06, "loss": 0.0108, "step": 35770 }, { "epoch": 0.6042489951700611, "grad_norm": 0.24786603450775146, "learning_rate": 8.806297703699747e-06, "loss": 0.0203, "step": 35780 }, { "epoch": 0.6044178741513831, "grad_norm": 0.19200380146503448, "learning_rate": 8.80534188921943e-06, "loss": 0.0125, "step": 35790 }, { "epoch": 0.6045867531327052, "grad_norm": 0.40764370560646057, "learning_rate": 8.804385744140335e-06, "loss": 0.0132, "step": 35800 }, { "epoch": 0.604755632114027, "grad_norm": 0.495195597410202, "learning_rate": 8.803429268545528e-06, "loss": 0.0103, "step": 35810 }, { "epoch": 0.6049245110953491, "grad_norm": 0.2215053290128708, "learning_rate": 8.802472462518105e-06, "loss": 0.0137, "step": 35820 }, { "epoch": 0.6050933900766711, "grad_norm": 0.2625499367713928, "learning_rate": 8.801515326141195e-06, "loss": 0.0133, "step": 35830 }, { "epoch": 0.6052622690579931, "grad_norm": 0.29674509167671204, "learning_rate": 8.800557859497946e-06, "loss": 0.0124, "step": 35840 }, { "epoch": 0.6054311480393151, "grad_norm": 0.38389790058135986, "learning_rate": 8.799600062671542e-06, "loss": 0.0191, "step": 35850 }, { "epoch": 0.605600027020637, "grad_norm": 0.3131234347820282, "learning_rate": 8.798641935745198e-06, "loss": 0.0178, "step": 35860 }, { "epoch": 0.605768906001959, "grad_norm": 0.38704124093055725, "learning_rate": 8.797683478802149e-06, "loss": 0.0177, "step": 35870 }, { "epoch": 0.605937784983281, "grad_norm": 0.4257465898990631, "learning_rate": 8.796724691925664e-06, "loss": 0.0162, "step": 35880 }, { "epoch": 0.606106663964603, "grad_norm": 0.2436385154724121, "learning_rate": 8.795765575199045e-06, "loss": 0.0098, "step": 35890 }, { "epoch": 0.606275542945925, "grad_norm": 0.3277183175086975, "learning_rate": 8.79480612870561e-06, "loss": 0.0073, "step": 35900 }, { "epoch": 0.6064444219272469, "grad_norm": 0.34780991077423096, "learning_rate": 8.793846352528719e-06, "loss": 0.0194, "step": 35910 }, { "epoch": 0.6066133009085689, "grad_norm": 0.3609021008014679, "learning_rate": 8.792886246751753e-06, "loss": 0.0109, "step": 35920 }, { "epoch": 0.6067821798898909, "grad_norm": 0.32332998514175415, "learning_rate": 8.791925811458124e-06, "loss": 0.0157, "step": 35930 }, { "epoch": 0.6069510588712129, "grad_norm": 0.5418674945831299, "learning_rate": 8.79096504673127e-06, "loss": 0.0095, "step": 35940 }, { "epoch": 0.6071199378525348, "grad_norm": 0.26197370886802673, "learning_rate": 8.790003952654665e-06, "loss": 0.012, "step": 35950 }, { "epoch": 0.6072888168338568, "grad_norm": 0.4449974000453949, "learning_rate": 8.789042529311802e-06, "loss": 0.0122, "step": 35960 }, { "epoch": 0.6074576958151788, "grad_norm": 0.30168506503105164, "learning_rate": 8.78808077678621e-06, "loss": 0.0151, "step": 35970 }, { "epoch": 0.6076265747965008, "grad_norm": 0.3409970700740814, "learning_rate": 8.787118695161441e-06, "loss": 0.021, "step": 35980 }, { "epoch": 0.6077954537778228, "grad_norm": 0.6410372257232666, "learning_rate": 8.78615628452108e-06, "loss": 0.0131, "step": 35990 }, { "epoch": 0.6079643327591447, "grad_norm": 0.3142605125904083, "learning_rate": 8.785193544948738e-06, "loss": 0.0129, "step": 36000 }, { "epoch": 0.6081332117404668, "grad_norm": 0.19575126469135284, "learning_rate": 8.784230476528058e-06, "loss": 0.0121, "step": 36010 }, { "epoch": 0.6083020907217888, "grad_norm": 0.30776897072792053, "learning_rate": 8.783267079342705e-06, "loss": 0.0081, "step": 36020 }, { "epoch": 0.6084709697031108, "grad_norm": 0.45295339822769165, "learning_rate": 8.782303353476378e-06, "loss": 0.0157, "step": 36030 }, { "epoch": 0.6086398486844328, "grad_norm": 0.25764110684394836, "learning_rate": 8.781339299012805e-06, "loss": 0.0087, "step": 36040 }, { "epoch": 0.6088087276657547, "grad_norm": 0.34225156903266907, "learning_rate": 8.78037491603574e-06, "loss": 0.0108, "step": 36050 }, { "epoch": 0.6089776066470767, "grad_norm": 0.2366352379322052, "learning_rate": 8.779410204628964e-06, "loss": 0.0138, "step": 36060 }, { "epoch": 0.6091464856283987, "grad_norm": 0.20184089243412018, "learning_rate": 8.778445164876292e-06, "loss": 0.0113, "step": 36070 }, { "epoch": 0.6093153646097207, "grad_norm": 0.18498098850250244, "learning_rate": 8.77747979686156e-06, "loss": 0.009, "step": 36080 }, { "epoch": 0.6094842435910427, "grad_norm": 0.4813518822193146, "learning_rate": 8.776514100668642e-06, "loss": 0.0109, "step": 36090 }, { "epoch": 0.6096531225723646, "grad_norm": 0.3513389825820923, "learning_rate": 8.775548076381432e-06, "loss": 0.0152, "step": 36100 }, { "epoch": 0.6098220015536866, "grad_norm": 0.30403274297714233, "learning_rate": 8.774581724083857e-06, "loss": 0.017, "step": 36110 }, { "epoch": 0.6099908805350086, "grad_norm": 0.3054022490978241, "learning_rate": 8.77361504385987e-06, "loss": 0.014, "step": 36120 }, { "epoch": 0.6101597595163306, "grad_norm": 0.22599956393241882, "learning_rate": 8.772648035793457e-06, "loss": 0.0113, "step": 36130 }, { "epoch": 0.6103286384976526, "grad_norm": 0.4767861068248749, "learning_rate": 8.771680699968625e-06, "loss": 0.012, "step": 36140 }, { "epoch": 0.6104975174789745, "grad_norm": 0.24261638522148132, "learning_rate": 8.770713036469416e-06, "loss": 0.0131, "step": 36150 }, { "epoch": 0.6106663964602965, "grad_norm": 0.28834646940231323, "learning_rate": 8.7697450453799e-06, "loss": 0.0155, "step": 36160 }, { "epoch": 0.6108352754416185, "grad_norm": 0.22376200556755066, "learning_rate": 8.76877672678417e-06, "loss": 0.0089, "step": 36170 }, { "epoch": 0.6110041544229405, "grad_norm": 0.4729090929031372, "learning_rate": 8.767808080766353e-06, "loss": 0.0084, "step": 36180 }, { "epoch": 0.6111730334042625, "grad_norm": 0.29605820775032043, "learning_rate": 8.766839107410602e-06, "loss": 0.0119, "step": 36190 }, { "epoch": 0.6113419123855844, "grad_norm": 0.29798823595046997, "learning_rate": 8.7658698068011e-06, "loss": 0.0131, "step": 36200 }, { "epoch": 0.6115107913669064, "grad_norm": 0.3014447093009949, "learning_rate": 8.764900179022056e-06, "loss": 0.0141, "step": 36210 }, { "epoch": 0.6116796703482285, "grad_norm": 0.2779231667518616, "learning_rate": 8.763930224157711e-06, "loss": 0.0125, "step": 36220 }, { "epoch": 0.6118485493295505, "grad_norm": 0.7193223237991333, "learning_rate": 8.762959942292331e-06, "loss": 0.0113, "step": 36230 }, { "epoch": 0.6120174283108725, "grad_norm": 0.5410144329071045, "learning_rate": 8.761989333510212e-06, "loss": 0.0194, "step": 36240 }, { "epoch": 0.6121863072921944, "grad_norm": 0.29608097672462463, "learning_rate": 8.761018397895679e-06, "loss": 0.0133, "step": 36250 }, { "epoch": 0.6123551862735164, "grad_norm": 0.13617070019245148, "learning_rate": 8.760047135533082e-06, "loss": 0.0209, "step": 36260 }, { "epoch": 0.6125240652548384, "grad_norm": 0.32375144958496094, "learning_rate": 8.759075546506803e-06, "loss": 0.0101, "step": 36270 }, { "epoch": 0.6126929442361604, "grad_norm": 0.6839095950126648, "learning_rate": 8.758103630901254e-06, "loss": 0.0171, "step": 36280 }, { "epoch": 0.6128618232174824, "grad_norm": 0.23742537200450897, "learning_rate": 8.757131388800868e-06, "loss": 0.0157, "step": 36290 }, { "epoch": 0.6130307021988043, "grad_norm": 0.4480851888656616, "learning_rate": 8.756158820290114e-06, "loss": 0.01, "step": 36300 }, { "epoch": 0.6131995811801263, "grad_norm": 0.2584850490093231, "learning_rate": 8.755185925453486e-06, "loss": 0.018, "step": 36310 }, { "epoch": 0.6133684601614483, "grad_norm": 0.31085896492004395, "learning_rate": 8.754212704375507e-06, "loss": 0.0142, "step": 36320 }, { "epoch": 0.6135373391427703, "grad_norm": 0.6149510741233826, "learning_rate": 8.753239157140725e-06, "loss": 0.0179, "step": 36330 }, { "epoch": 0.6137062181240923, "grad_norm": 0.2767481803894043, "learning_rate": 8.752265283833724e-06, "loss": 0.015, "step": 36340 }, { "epoch": 0.6138750971054142, "grad_norm": 0.2944318950176239, "learning_rate": 8.751291084539108e-06, "loss": 0.0122, "step": 36350 }, { "epoch": 0.6140439760867362, "grad_norm": 0.36013463139533997, "learning_rate": 8.750316559341516e-06, "loss": 0.0159, "step": 36360 }, { "epoch": 0.6142128550680582, "grad_norm": 0.1766245812177658, "learning_rate": 8.749341708325612e-06, "loss": 0.0106, "step": 36370 }, { "epoch": 0.6143817340493802, "grad_norm": 0.30829694867134094, "learning_rate": 8.748366531576086e-06, "loss": 0.0135, "step": 36380 }, { "epoch": 0.6145506130307022, "grad_norm": 0.5767016410827637, "learning_rate": 8.747391029177662e-06, "loss": 0.0128, "step": 36390 }, { "epoch": 0.6147194920120241, "grad_norm": 0.7155065536499023, "learning_rate": 8.746415201215086e-06, "loss": 0.0139, "step": 36400 }, { "epoch": 0.6148883709933461, "grad_norm": 0.7512149214744568, "learning_rate": 8.745439047773139e-06, "loss": 0.0183, "step": 36410 }, { "epoch": 0.6150572499746682, "grad_norm": 0.3000109791755676, "learning_rate": 8.744462568936625e-06, "loss": 0.0081, "step": 36420 }, { "epoch": 0.6152261289559902, "grad_norm": 0.3139452636241913, "learning_rate": 8.743485764790379e-06, "loss": 0.0181, "step": 36430 }, { "epoch": 0.6153950079373122, "grad_norm": 0.36900973320007324, "learning_rate": 8.74250863541926e-06, "loss": 0.014, "step": 36440 }, { "epoch": 0.6155638869186341, "grad_norm": 0.13224677741527557, "learning_rate": 8.741531180908163e-06, "loss": 0.0092, "step": 36450 }, { "epoch": 0.6157327658999561, "grad_norm": 0.2671251893043518, "learning_rate": 8.740553401342005e-06, "loss": 0.011, "step": 36460 }, { "epoch": 0.6159016448812781, "grad_norm": 0.48352935910224915, "learning_rate": 8.739575296805734e-06, "loss": 0.0203, "step": 36470 }, { "epoch": 0.6160705238626001, "grad_norm": 0.24340324103832245, "learning_rate": 8.738596867384324e-06, "loss": 0.0131, "step": 36480 }, { "epoch": 0.6162394028439221, "grad_norm": 0.1293298751115799, "learning_rate": 8.737618113162779e-06, "loss": 0.0162, "step": 36490 }, { "epoch": 0.616408281825244, "grad_norm": 0.1638811081647873, "learning_rate": 8.73663903422613e-06, "loss": 0.0136, "step": 36500 }, { "epoch": 0.616577160806566, "grad_norm": 0.6742410659790039, "learning_rate": 8.73565963065944e-06, "loss": 0.0126, "step": 36510 }, { "epoch": 0.616746039787888, "grad_norm": 0.27054479718208313, "learning_rate": 8.734679902547794e-06, "loss": 0.0089, "step": 36520 }, { "epoch": 0.61691491876921, "grad_norm": 0.36976495385169983, "learning_rate": 8.733699849976308e-06, "loss": 0.0158, "step": 36530 }, { "epoch": 0.617083797750532, "grad_norm": 0.2841840386390686, "learning_rate": 8.73271947303013e-06, "loss": 0.0104, "step": 36540 }, { "epoch": 0.6172526767318539, "grad_norm": 0.25464752316474915, "learning_rate": 8.731738771794431e-06, "loss": 0.0105, "step": 36550 }, { "epoch": 0.6174215557131759, "grad_norm": 0.3841431140899658, "learning_rate": 8.73075774635441e-06, "loss": 0.0136, "step": 36560 }, { "epoch": 0.6175904346944979, "grad_norm": 0.7134889960289001, "learning_rate": 8.7297763967953e-06, "loss": 0.0151, "step": 36570 }, { "epoch": 0.6177593136758199, "grad_norm": 0.20459185540676117, "learning_rate": 8.728794723202353e-06, "loss": 0.0177, "step": 36580 }, { "epoch": 0.6179281926571419, "grad_norm": 0.24054524302482605, "learning_rate": 8.72781272566086e-06, "loss": 0.0187, "step": 36590 }, { "epoch": 0.6180970716384638, "grad_norm": 0.2633349895477295, "learning_rate": 8.72683040425613e-06, "loss": 0.0082, "step": 36600 }, { "epoch": 0.6182659506197858, "grad_norm": 0.5847419500350952, "learning_rate": 8.725847759073508e-06, "loss": 0.017, "step": 36610 }, { "epoch": 0.6184348296011078, "grad_norm": 0.24189765751361847, "learning_rate": 8.724864790198361e-06, "loss": 0.0156, "step": 36620 }, { "epoch": 0.6186037085824299, "grad_norm": 0.5842215418815613, "learning_rate": 8.72388149771609e-06, "loss": 0.0143, "step": 36630 }, { "epoch": 0.6187725875637519, "grad_norm": 0.4609352946281433, "learning_rate": 8.722897881712119e-06, "loss": 0.0108, "step": 36640 }, { "epoch": 0.6189414665450738, "grad_norm": 0.4770306348800659, "learning_rate": 8.721913942271903e-06, "loss": 0.0173, "step": 36650 }, { "epoch": 0.6191103455263958, "grad_norm": 0.2097359001636505, "learning_rate": 8.720929679480922e-06, "loss": 0.0146, "step": 36660 }, { "epoch": 0.6192792245077178, "grad_norm": 0.7630535364151001, "learning_rate": 8.719945093424689e-06, "loss": 0.0157, "step": 36670 }, { "epoch": 0.6194481034890398, "grad_norm": 0.2714206874370575, "learning_rate": 8.718960184188742e-06, "loss": 0.0146, "step": 36680 }, { "epoch": 0.6196169824703618, "grad_norm": 0.1898977905511856, "learning_rate": 8.717974951858645e-06, "loss": 0.0152, "step": 36690 }, { "epoch": 0.6197858614516837, "grad_norm": 0.3235757350921631, "learning_rate": 8.716989396519997e-06, "loss": 0.0102, "step": 36700 }, { "epoch": 0.6199547404330057, "grad_norm": 0.2065633088350296, "learning_rate": 8.716003518258417e-06, "loss": 0.0111, "step": 36710 }, { "epoch": 0.6201236194143277, "grad_norm": 0.14227710664272308, "learning_rate": 8.715017317159558e-06, "loss": 0.0171, "step": 36720 }, { "epoch": 0.6202924983956497, "grad_norm": 0.16530486941337585, "learning_rate": 8.714030793309097e-06, "loss": 0.01, "step": 36730 }, { "epoch": 0.6204613773769717, "grad_norm": 0.2735306918621063, "learning_rate": 8.713043946792743e-06, "loss": 0.0136, "step": 36740 }, { "epoch": 0.6206302563582936, "grad_norm": 0.19315029680728912, "learning_rate": 8.712056777696227e-06, "loss": 0.0135, "step": 36750 }, { "epoch": 0.6207991353396156, "grad_norm": 0.47587963938713074, "learning_rate": 8.711069286105314e-06, "loss": 0.0122, "step": 36760 }, { "epoch": 0.6209680143209376, "grad_norm": 0.25627315044403076, "learning_rate": 8.710081472105796e-06, "loss": 0.0124, "step": 36770 }, { "epoch": 0.6211368933022596, "grad_norm": 0.3066911995410919, "learning_rate": 8.709093335783491e-06, "loss": 0.0144, "step": 36780 }, { "epoch": 0.6213057722835816, "grad_norm": 0.2959972620010376, "learning_rate": 8.708104877224246e-06, "loss": 0.0124, "step": 36790 }, { "epoch": 0.6214746512649035, "grad_norm": 0.39057856798171997, "learning_rate": 8.707116096513935e-06, "loss": 0.0132, "step": 36800 }, { "epoch": 0.6216435302462255, "grad_norm": 1.7104684114456177, "learning_rate": 8.70612699373846e-06, "loss": 0.017, "step": 36810 }, { "epoch": 0.6218124092275475, "grad_norm": 0.28709834814071655, "learning_rate": 8.705137568983754e-06, "loss": 0.0143, "step": 36820 }, { "epoch": 0.6219812882088696, "grad_norm": 0.4411291480064392, "learning_rate": 8.704147822335776e-06, "loss": 0.0126, "step": 36830 }, { "epoch": 0.6221501671901916, "grad_norm": 0.17485707998275757, "learning_rate": 8.703157753880511e-06, "loss": 0.0103, "step": 36840 }, { "epoch": 0.6223190461715135, "grad_norm": 0.2565169036388397, "learning_rate": 8.702167363703975e-06, "loss": 0.0117, "step": 36850 }, { "epoch": 0.6224879251528355, "grad_norm": 0.32308369874954224, "learning_rate": 8.70117665189221e-06, "loss": 0.0142, "step": 36860 }, { "epoch": 0.6226568041341575, "grad_norm": 0.20571167767047882, "learning_rate": 8.700185618531286e-06, "loss": 0.0113, "step": 36870 }, { "epoch": 0.6228256831154795, "grad_norm": 0.29156526923179626, "learning_rate": 8.699194263707303e-06, "loss": 0.0117, "step": 36880 }, { "epoch": 0.6229945620968015, "grad_norm": 0.14705628156661987, "learning_rate": 8.698202587506385e-06, "loss": 0.0112, "step": 36890 }, { "epoch": 0.6231634410781234, "grad_norm": 0.4012943208217621, "learning_rate": 8.69721059001469e-06, "loss": 0.0126, "step": 36900 }, { "epoch": 0.6233323200594454, "grad_norm": 0.33518439531326294, "learning_rate": 8.696218271318398e-06, "loss": 0.0091, "step": 36910 }, { "epoch": 0.6235011990407674, "grad_norm": 0.4512496888637543, "learning_rate": 8.695225631503721e-06, "loss": 0.0131, "step": 36920 }, { "epoch": 0.6236700780220894, "grad_norm": 0.47511911392211914, "learning_rate": 8.694232670656894e-06, "loss": 0.0152, "step": 36930 }, { "epoch": 0.6238389570034114, "grad_norm": 0.32702356576919556, "learning_rate": 8.693239388864185e-06, "loss": 0.014, "step": 36940 }, { "epoch": 0.6240078359847333, "grad_norm": 0.5963748693466187, "learning_rate": 8.692245786211888e-06, "loss": 0.0133, "step": 36950 }, { "epoch": 0.6241767149660553, "grad_norm": 0.2996255159378052, "learning_rate": 8.691251862786325e-06, "loss": 0.0187, "step": 36960 }, { "epoch": 0.6243455939473773, "grad_norm": 0.3139644265174866, "learning_rate": 8.690257618673845e-06, "loss": 0.0133, "step": 36970 }, { "epoch": 0.6245144729286993, "grad_norm": 0.2940963804721832, "learning_rate": 8.689263053960826e-06, "loss": 0.0106, "step": 36980 }, { "epoch": 0.6246833519100213, "grad_norm": 0.49121859669685364, "learning_rate": 8.688268168733673e-06, "loss": 0.0171, "step": 36990 }, { "epoch": 0.6248522308913432, "grad_norm": 0.3564804792404175, "learning_rate": 8.687272963078818e-06, "loss": 0.0131, "step": 37000 }, { "epoch": 0.6250211098726652, "grad_norm": 0.45515018701553345, "learning_rate": 8.686277437082723e-06, "loss": 0.0219, "step": 37010 }, { "epoch": 0.6251899888539872, "grad_norm": 0.21793796122074127, "learning_rate": 8.685281590831877e-06, "loss": 0.0095, "step": 37020 }, { "epoch": 0.6253588678353093, "grad_norm": 0.43722015619277954, "learning_rate": 8.684285424412799e-06, "loss": 0.011, "step": 37030 }, { "epoch": 0.6255277468166311, "grad_norm": 0.17248278856277466, "learning_rate": 8.68328893791203e-06, "loss": 0.0138, "step": 37040 }, { "epoch": 0.6256966257979532, "grad_norm": 0.23726725578308105, "learning_rate": 8.682292131416142e-06, "loss": 0.0156, "step": 37050 }, { "epoch": 0.6258655047792752, "grad_norm": 0.19464842975139618, "learning_rate": 8.681295005011739e-06, "loss": 0.0183, "step": 37060 }, { "epoch": 0.6260343837605972, "grad_norm": 0.231507807970047, "learning_rate": 8.680297558785444e-06, "loss": 0.0143, "step": 37070 }, { "epoch": 0.6262032627419192, "grad_norm": 0.4537944197654724, "learning_rate": 8.679299792823917e-06, "loss": 0.0193, "step": 37080 }, { "epoch": 0.6263721417232411, "grad_norm": 0.4318271279335022, "learning_rate": 8.67830170721384e-06, "loss": 0.0128, "step": 37090 }, { "epoch": 0.6265410207045631, "grad_norm": 0.36371421813964844, "learning_rate": 8.677303302041922e-06, "loss": 0.0134, "step": 37100 }, { "epoch": 0.6267098996858851, "grad_norm": 0.23517626523971558, "learning_rate": 8.676304577394906e-06, "loss": 0.0076, "step": 37110 }, { "epoch": 0.6268787786672071, "grad_norm": 0.25445717573165894, "learning_rate": 8.675305533359556e-06, "loss": 0.011, "step": 37120 }, { "epoch": 0.6270476576485291, "grad_norm": 0.3912562131881714, "learning_rate": 8.674306170022666e-06, "loss": 0.0107, "step": 37130 }, { "epoch": 0.627216536629851, "grad_norm": 0.37826213240623474, "learning_rate": 8.67330648747106e-06, "loss": 0.0127, "step": 37140 }, { "epoch": 0.627385415611173, "grad_norm": 0.21439026296138763, "learning_rate": 8.672306485791586e-06, "loss": 0.0144, "step": 37150 }, { "epoch": 0.627554294592495, "grad_norm": 0.2711268961429596, "learning_rate": 8.671306165071125e-06, "loss": 0.0192, "step": 37160 }, { "epoch": 0.627723173573817, "grad_norm": 0.3755973279476166, "learning_rate": 8.670305525396581e-06, "loss": 0.0118, "step": 37170 }, { "epoch": 0.627892052555139, "grad_norm": 0.20100459456443787, "learning_rate": 8.669304566854883e-06, "loss": 0.0163, "step": 37180 }, { "epoch": 0.6280609315364609, "grad_norm": 0.28272828459739685, "learning_rate": 8.668303289533e-06, "loss": 0.017, "step": 37190 }, { "epoch": 0.6282298105177829, "grad_norm": 0.24809302389621735, "learning_rate": 8.667301693517912e-06, "loss": 0.0101, "step": 37200 }, { "epoch": 0.6283986894991049, "grad_norm": 0.37955379486083984, "learning_rate": 8.666299778896642e-06, "loss": 0.0108, "step": 37210 }, { "epoch": 0.6285675684804269, "grad_norm": 0.27815333008766174, "learning_rate": 8.66529754575623e-06, "loss": 0.0129, "step": 37220 }, { "epoch": 0.628736447461749, "grad_norm": 0.4643027186393738, "learning_rate": 8.664294994183749e-06, "loss": 0.0123, "step": 37230 }, { "epoch": 0.6289053264430708, "grad_norm": 0.411923348903656, "learning_rate": 8.663292124266297e-06, "loss": 0.0137, "step": 37240 }, { "epoch": 0.6290742054243929, "grad_norm": 0.14192308485507965, "learning_rate": 8.662288936091002e-06, "loss": 0.0138, "step": 37250 }, { "epoch": 0.6292430844057149, "grad_norm": 0.5402505397796631, "learning_rate": 8.661285429745019e-06, "loss": 0.0137, "step": 37260 }, { "epoch": 0.6294119633870369, "grad_norm": 0.32625722885131836, "learning_rate": 8.660281605315528e-06, "loss": 0.011, "step": 37270 }, { "epoch": 0.6295808423683589, "grad_norm": 0.2858527898788452, "learning_rate": 8.659277462889742e-06, "loss": 0.0122, "step": 37280 }, { "epoch": 0.6297497213496808, "grad_norm": 0.2738531827926636, "learning_rate": 8.658273002554893e-06, "loss": 0.0111, "step": 37290 }, { "epoch": 0.6299186003310028, "grad_norm": 0.41171902418136597, "learning_rate": 8.657268224398254e-06, "loss": 0.0167, "step": 37300 }, { "epoch": 0.6300874793123248, "grad_norm": 0.3307449221611023, "learning_rate": 8.656263128507112e-06, "loss": 0.0158, "step": 37310 }, { "epoch": 0.6302563582936468, "grad_norm": 0.31655487418174744, "learning_rate": 8.655257714968787e-06, "loss": 0.0092, "step": 37320 }, { "epoch": 0.6304252372749688, "grad_norm": 0.5419958829879761, "learning_rate": 8.654251983870629e-06, "loss": 0.0157, "step": 37330 }, { "epoch": 0.6305941162562907, "grad_norm": 0.26532331109046936, "learning_rate": 8.653245935300015e-06, "loss": 0.0109, "step": 37340 }, { "epoch": 0.6307629952376127, "grad_norm": 0.4453965723514557, "learning_rate": 8.652239569344342e-06, "loss": 0.0097, "step": 37350 }, { "epoch": 0.6309318742189347, "grad_norm": 0.35132864117622375, "learning_rate": 8.651232886091048e-06, "loss": 0.011, "step": 37360 }, { "epoch": 0.6311007532002567, "grad_norm": 0.4223516583442688, "learning_rate": 8.650225885627587e-06, "loss": 0.0099, "step": 37370 }, { "epoch": 0.6312696321815787, "grad_norm": 0.41193634271621704, "learning_rate": 8.649218568041446e-06, "loss": 0.0116, "step": 37380 }, { "epoch": 0.6314385111629006, "grad_norm": 0.25965234637260437, "learning_rate": 8.648210933420135e-06, "loss": 0.0093, "step": 37390 }, { "epoch": 0.6316073901442226, "grad_norm": 0.2371969074010849, "learning_rate": 8.647202981851202e-06, "loss": 0.0184, "step": 37400 }, { "epoch": 0.6317762691255446, "grad_norm": 0.42637625336647034, "learning_rate": 8.646194713422206e-06, "loss": 0.0131, "step": 37410 }, { "epoch": 0.6319451481068666, "grad_norm": 0.29317814111709595, "learning_rate": 8.645186128220752e-06, "loss": 0.018, "step": 37420 }, { "epoch": 0.6321140270881886, "grad_norm": 0.4829319417476654, "learning_rate": 8.644177226334459e-06, "loss": 0.012, "step": 37430 }, { "epoch": 0.6322829060695105, "grad_norm": 0.49588218331336975, "learning_rate": 8.643168007850975e-06, "loss": 0.0103, "step": 37440 }, { "epoch": 0.6324517850508325, "grad_norm": 0.8827278017997742, "learning_rate": 8.642158472857984e-06, "loss": 0.0186, "step": 37450 }, { "epoch": 0.6326206640321546, "grad_norm": 0.3235566318035126, "learning_rate": 8.64114862144319e-06, "loss": 0.0115, "step": 37460 }, { "epoch": 0.6327895430134766, "grad_norm": 0.3982635736465454, "learning_rate": 8.640138453694324e-06, "loss": 0.0129, "step": 37470 }, { "epoch": 0.6329584219947986, "grad_norm": 0.2814439535140991, "learning_rate": 8.63912796969915e-06, "loss": 0.0094, "step": 37480 }, { "epoch": 0.6331273009761205, "grad_norm": 0.6687512397766113, "learning_rate": 8.638117169545455e-06, "loss": 0.0115, "step": 37490 }, { "epoch": 0.6332961799574425, "grad_norm": 0.44237229228019714, "learning_rate": 8.637106053321053e-06, "loss": 0.012, "step": 37500 }, { "epoch": 0.6334650589387645, "grad_norm": 0.2573307454586029, "learning_rate": 8.636094621113794e-06, "loss": 0.0121, "step": 37510 }, { "epoch": 0.6336339379200865, "grad_norm": 0.6105567812919617, "learning_rate": 8.63508287301154e-06, "loss": 0.0105, "step": 37520 }, { "epoch": 0.6338028169014085, "grad_norm": 0.7206735610961914, "learning_rate": 8.634070809102196e-06, "loss": 0.0202, "step": 37530 }, { "epoch": 0.6339716958827304, "grad_norm": 0.3728944957256317, "learning_rate": 8.633058429473682e-06, "loss": 0.0119, "step": 37540 }, { "epoch": 0.6341405748640524, "grad_norm": 0.43207770586013794, "learning_rate": 8.632045734213957e-06, "loss": 0.0115, "step": 37550 }, { "epoch": 0.6343094538453744, "grad_norm": 0.5599718689918518, "learning_rate": 8.631032723410998e-06, "loss": 0.0102, "step": 37560 }, { "epoch": 0.6344783328266964, "grad_norm": 0.46625831723213196, "learning_rate": 8.630019397152811e-06, "loss": 0.0145, "step": 37570 }, { "epoch": 0.6346472118080184, "grad_norm": 0.28533807396888733, "learning_rate": 8.629005755527436e-06, "loss": 0.0139, "step": 37580 }, { "epoch": 0.6348160907893403, "grad_norm": 0.3656749129295349, "learning_rate": 8.627991798622932e-06, "loss": 0.0131, "step": 37590 }, { "epoch": 0.6349849697706623, "grad_norm": 0.4012068510055542, "learning_rate": 8.626977526527392e-06, "loss": 0.0111, "step": 37600 }, { "epoch": 0.6351538487519843, "grad_norm": 0.22687885165214539, "learning_rate": 8.625962939328931e-06, "loss": 0.0136, "step": 37610 }, { "epoch": 0.6353227277333063, "grad_norm": 0.4902939796447754, "learning_rate": 8.624948037115695e-06, "loss": 0.0117, "step": 37620 }, { "epoch": 0.6354916067146283, "grad_norm": 0.46686720848083496, "learning_rate": 8.623932819975855e-06, "loss": 0.0138, "step": 37630 }, { "epoch": 0.6356604856959502, "grad_norm": 0.2897970676422119, "learning_rate": 8.622917287997612e-06, "loss": 0.0137, "step": 37640 }, { "epoch": 0.6358293646772722, "grad_norm": 0.16473078727722168, "learning_rate": 8.621901441269194e-06, "loss": 0.0135, "step": 37650 }, { "epoch": 0.6359982436585943, "grad_norm": 0.20700731873512268, "learning_rate": 8.620885279878852e-06, "loss": 0.0094, "step": 37660 }, { "epoch": 0.6361671226399163, "grad_norm": 0.13878686726093292, "learning_rate": 8.61986880391487e-06, "loss": 0.0109, "step": 37670 }, { "epoch": 0.6363360016212383, "grad_norm": 0.2550697326660156, "learning_rate": 8.618852013465557e-06, "loss": 0.012, "step": 37680 }, { "epoch": 0.6365048806025602, "grad_norm": 0.3606637120246887, "learning_rate": 8.617834908619248e-06, "loss": 0.0132, "step": 37690 }, { "epoch": 0.6366737595838822, "grad_norm": 0.6950335502624512, "learning_rate": 8.61681748946431e-06, "loss": 0.0133, "step": 37700 }, { "epoch": 0.6368426385652042, "grad_norm": 0.19572758674621582, "learning_rate": 8.615799756089126e-06, "loss": 0.0142, "step": 37710 }, { "epoch": 0.6370115175465262, "grad_norm": 0.27191850543022156, "learning_rate": 8.614781708582121e-06, "loss": 0.0193, "step": 37720 }, { "epoch": 0.6371803965278482, "grad_norm": 0.16277171671390533, "learning_rate": 8.61376334703174e-06, "loss": 0.0175, "step": 37730 }, { "epoch": 0.6373492755091701, "grad_norm": 0.2902902662754059, "learning_rate": 8.612744671526454e-06, "loss": 0.0107, "step": 37740 }, { "epoch": 0.6375181544904921, "grad_norm": 0.6800429224967957, "learning_rate": 8.611725682154763e-06, "loss": 0.0181, "step": 37750 }, { "epoch": 0.6376870334718141, "grad_norm": 0.32164233922958374, "learning_rate": 8.610706379005196e-06, "loss": 0.0095, "step": 37760 }, { "epoch": 0.6378559124531361, "grad_norm": 0.5322725176811218, "learning_rate": 8.609686762166306e-06, "loss": 0.017, "step": 37770 }, { "epoch": 0.6380247914344581, "grad_norm": 0.4601190686225891, "learning_rate": 8.608666831726675e-06, "loss": 0.0133, "step": 37780 }, { "epoch": 0.63819367041578, "grad_norm": 0.2594170868396759, "learning_rate": 8.607646587774912e-06, "loss": 0.0154, "step": 37790 }, { "epoch": 0.638362549397102, "grad_norm": 0.23163098096847534, "learning_rate": 8.606626030399653e-06, "loss": 0.0113, "step": 37800 }, { "epoch": 0.638531428378424, "grad_norm": 0.39834892749786377, "learning_rate": 8.605605159689563e-06, "loss": 0.0131, "step": 37810 }, { "epoch": 0.638700307359746, "grad_norm": 0.3297876715660095, "learning_rate": 8.604583975733331e-06, "loss": 0.0198, "step": 37820 }, { "epoch": 0.638869186341068, "grad_norm": 0.20426252484321594, "learning_rate": 8.603562478619676e-06, "loss": 0.0124, "step": 37830 }, { "epoch": 0.6390380653223899, "grad_norm": 0.3862072229385376, "learning_rate": 8.602540668437343e-06, "loss": 0.01, "step": 37840 }, { "epoch": 0.6392069443037119, "grad_norm": 0.19004659354686737, "learning_rate": 8.601518545275105e-06, "loss": 0.0143, "step": 37850 }, { "epoch": 0.639375823285034, "grad_norm": 0.16592803597450256, "learning_rate": 8.60049610922176e-06, "loss": 0.0116, "step": 37860 }, { "epoch": 0.639544702266356, "grad_norm": 0.16895180940628052, "learning_rate": 8.599473360366138e-06, "loss": 0.014, "step": 37870 }, { "epoch": 0.639713581247678, "grad_norm": 0.33063530921936035, "learning_rate": 8.598450298797087e-06, "loss": 0.0105, "step": 37880 }, { "epoch": 0.6398824602289999, "grad_norm": 0.21470004320144653, "learning_rate": 8.597426924603494e-06, "loss": 0.0108, "step": 37890 }, { "epoch": 0.6400513392103219, "grad_norm": 0.4957345128059387, "learning_rate": 8.596403237874263e-06, "loss": 0.0168, "step": 37900 }, { "epoch": 0.6402202181916439, "grad_norm": 0.3953581154346466, "learning_rate": 8.595379238698331e-06, "loss": 0.008, "step": 37910 }, { "epoch": 0.6403890971729659, "grad_norm": 0.5112695097923279, "learning_rate": 8.594354927164664e-06, "loss": 0.0142, "step": 37920 }, { "epoch": 0.6405579761542879, "grad_norm": 0.5843388438224792, "learning_rate": 8.593330303362245e-06, "loss": 0.0109, "step": 37930 }, { "epoch": 0.6407268551356098, "grad_norm": 0.2294318526983261, "learning_rate": 8.592305367380096e-06, "loss": 0.0098, "step": 37940 }, { "epoch": 0.6408957341169318, "grad_norm": 0.39706459641456604, "learning_rate": 8.59128011930726e-06, "loss": 0.007, "step": 37950 }, { "epoch": 0.6410646130982538, "grad_norm": 0.3869457244873047, "learning_rate": 8.590254559232804e-06, "loss": 0.0125, "step": 37960 }, { "epoch": 0.6412334920795758, "grad_norm": 0.41509753465652466, "learning_rate": 8.589228687245832e-06, "loss": 0.0139, "step": 37970 }, { "epoch": 0.6414023710608978, "grad_norm": 0.4624182879924774, "learning_rate": 8.588202503435466e-06, "loss": 0.0152, "step": 37980 }, { "epoch": 0.6415712500422197, "grad_norm": 0.3344597816467285, "learning_rate": 8.587176007890858e-06, "loss": 0.014, "step": 37990 }, { "epoch": 0.6417401290235417, "grad_norm": 0.28660210967063904, "learning_rate": 8.58614920070119e-06, "loss": 0.01, "step": 38000 }, { "epoch": 0.6419090080048637, "grad_norm": 0.21404990553855896, "learning_rate": 8.585122081955666e-06, "loss": 0.0133, "step": 38010 }, { "epoch": 0.6420778869861857, "grad_norm": 0.17201706767082214, "learning_rate": 8.584094651743521e-06, "loss": 0.0097, "step": 38020 }, { "epoch": 0.6422467659675077, "grad_norm": 0.3942388892173767, "learning_rate": 8.583066910154015e-06, "loss": 0.0119, "step": 38030 }, { "epoch": 0.6424156449488296, "grad_norm": 0.24716559052467346, "learning_rate": 8.582038857276434e-06, "loss": 0.0132, "step": 38040 }, { "epoch": 0.6425845239301516, "grad_norm": 0.31735286116600037, "learning_rate": 8.581010493200096e-06, "loss": 0.0114, "step": 38050 }, { "epoch": 0.6427534029114736, "grad_norm": 0.2069772183895111, "learning_rate": 8.579981818014341e-06, "loss": 0.0078, "step": 38060 }, { "epoch": 0.6429222818927957, "grad_norm": 0.3262009918689728, "learning_rate": 8.578952831808539e-06, "loss": 0.0131, "step": 38070 }, { "epoch": 0.6430911608741177, "grad_norm": 0.33835676312446594, "learning_rate": 8.577923534672081e-06, "loss": 0.0118, "step": 38080 }, { "epoch": 0.6432600398554396, "grad_norm": 0.291413277387619, "learning_rate": 8.576893926694397e-06, "loss": 0.0178, "step": 38090 }, { "epoch": 0.6434289188367616, "grad_norm": 0.12457141280174255, "learning_rate": 8.575864007964932e-06, "loss": 0.0131, "step": 38100 }, { "epoch": 0.6435977978180836, "grad_norm": 0.3811846375465393, "learning_rate": 8.574833778573164e-06, "loss": 0.0133, "step": 38110 }, { "epoch": 0.6437666767994056, "grad_norm": 0.36235663294792175, "learning_rate": 8.5738032386086e-06, "loss": 0.0148, "step": 38120 }, { "epoch": 0.6439355557807275, "grad_norm": 0.33182623982429504, "learning_rate": 8.572772388160763e-06, "loss": 0.0134, "step": 38130 }, { "epoch": 0.6441044347620495, "grad_norm": 0.28978708386421204, "learning_rate": 8.57174122731922e-06, "loss": 0.0096, "step": 38140 }, { "epoch": 0.6442733137433715, "grad_norm": 0.18557409942150116, "learning_rate": 8.570709756173548e-06, "loss": 0.0123, "step": 38150 }, { "epoch": 0.6444421927246935, "grad_norm": 0.4751642942428589, "learning_rate": 8.569677974813364e-06, "loss": 0.0144, "step": 38160 }, { "epoch": 0.6446110717060155, "grad_norm": 0.2356078177690506, "learning_rate": 8.568645883328303e-06, "loss": 0.0097, "step": 38170 }, { "epoch": 0.6447799506873374, "grad_norm": 0.2032170295715332, "learning_rate": 8.567613481808034e-06, "loss": 0.0139, "step": 38180 }, { "epoch": 0.6449488296686594, "grad_norm": 0.26225048303604126, "learning_rate": 8.566580770342246e-06, "loss": 0.0096, "step": 38190 }, { "epoch": 0.6451177086499814, "grad_norm": 0.40109169483184814, "learning_rate": 8.565547749020662e-06, "loss": 0.0143, "step": 38200 }, { "epoch": 0.6452865876313034, "grad_norm": 0.19793319702148438, "learning_rate": 8.564514417933025e-06, "loss": 0.0136, "step": 38210 }, { "epoch": 0.6454554666126254, "grad_norm": 0.785406231880188, "learning_rate": 8.563480777169108e-06, "loss": 0.0205, "step": 38220 }, { "epoch": 0.6456243455939473, "grad_norm": 0.4174165427684784, "learning_rate": 8.562446826818716e-06, "loss": 0.0148, "step": 38230 }, { "epoch": 0.6457932245752693, "grad_norm": 0.35657525062561035, "learning_rate": 8.561412566971671e-06, "loss": 0.013, "step": 38240 }, { "epoch": 0.6459621035565913, "grad_norm": 0.1661452353000641, "learning_rate": 8.56037799771783e-06, "loss": 0.0149, "step": 38250 }, { "epoch": 0.6461309825379133, "grad_norm": 0.2798537313938141, "learning_rate": 8.559343119147075e-06, "loss": 0.0107, "step": 38260 }, { "epoch": 0.6462998615192354, "grad_norm": 0.269522488117218, "learning_rate": 8.558307931349309e-06, "loss": 0.0103, "step": 38270 }, { "epoch": 0.6464687405005572, "grad_norm": 0.6430035829544067, "learning_rate": 8.55727243441447e-06, "loss": 0.0134, "step": 38280 }, { "epoch": 0.6466376194818793, "grad_norm": 0.37135428190231323, "learning_rate": 8.556236628432517e-06, "loss": 0.0159, "step": 38290 }, { "epoch": 0.6468064984632013, "grad_norm": 0.30446192622184753, "learning_rate": 8.555200513493442e-06, "loss": 0.0132, "step": 38300 }, { "epoch": 0.6469753774445233, "grad_norm": 0.30114951729774475, "learning_rate": 8.554164089687255e-06, "loss": 0.0097, "step": 38310 }, { "epoch": 0.6471442564258453, "grad_norm": 0.31075048446655273, "learning_rate": 8.553127357104003e-06, "loss": 0.0121, "step": 38320 }, { "epoch": 0.6473131354071672, "grad_norm": 0.2948855459690094, "learning_rate": 8.552090315833753e-06, "loss": 0.014, "step": 38330 }, { "epoch": 0.6474820143884892, "grad_norm": 0.6233791708946228, "learning_rate": 8.551052965966599e-06, "loss": 0.0091, "step": 38340 }, { "epoch": 0.6476508933698112, "grad_norm": 0.4202510714530945, "learning_rate": 8.550015307592666e-06, "loss": 0.0094, "step": 38350 }, { "epoch": 0.6478197723511332, "grad_norm": 0.20123396813869476, "learning_rate": 8.548977340802103e-06, "loss": 0.0127, "step": 38360 }, { "epoch": 0.6479886513324552, "grad_norm": 0.5492796897888184, "learning_rate": 8.547939065685082e-06, "loss": 0.0158, "step": 38370 }, { "epoch": 0.6481575303137771, "grad_norm": 0.3745957911014557, "learning_rate": 8.54690048233181e-06, "loss": 0.0115, "step": 38380 }, { "epoch": 0.6483264092950991, "grad_norm": 0.5333303809165955, "learning_rate": 8.545861590832515e-06, "loss": 0.0232, "step": 38390 }, { "epoch": 0.6484952882764211, "grad_norm": 0.2545996606349945, "learning_rate": 8.544822391277454e-06, "loss": 0.0136, "step": 38400 }, { "epoch": 0.6486641672577431, "grad_norm": 0.32117709517478943, "learning_rate": 8.54378288375691e-06, "loss": 0.0096, "step": 38410 }, { "epoch": 0.6488330462390651, "grad_norm": 0.14981786906719208, "learning_rate": 8.542743068361195e-06, "loss": 0.018, "step": 38420 }, { "epoch": 0.649001925220387, "grad_norm": 0.32619595527648926, "learning_rate": 8.541702945180639e-06, "loss": 0.0093, "step": 38430 }, { "epoch": 0.649170804201709, "grad_norm": 0.40606358647346497, "learning_rate": 8.540662514305612e-06, "loss": 0.0149, "step": 38440 }, { "epoch": 0.649339683183031, "grad_norm": 0.37711378931999207, "learning_rate": 8.5396217758265e-06, "loss": 0.0107, "step": 38450 }, { "epoch": 0.649508562164353, "grad_norm": 0.34101343154907227, "learning_rate": 8.538580729833723e-06, "loss": 0.011, "step": 38460 }, { "epoch": 0.649677441145675, "grad_norm": 0.265125036239624, "learning_rate": 8.537539376417722e-06, "loss": 0.0166, "step": 38470 }, { "epoch": 0.649846320126997, "grad_norm": 0.5395755171775818, "learning_rate": 8.536497715668971e-06, "loss": 0.012, "step": 38480 }, { "epoch": 0.650015199108319, "grad_norm": 0.35794007778167725, "learning_rate": 8.535455747677961e-06, "loss": 0.0135, "step": 38490 }, { "epoch": 0.650184078089641, "grad_norm": 0.31857186555862427, "learning_rate": 8.534413472535222e-06, "loss": 0.0122, "step": 38500 }, { "epoch": 0.650352957070963, "grad_norm": 0.38044875860214233, "learning_rate": 8.533370890331298e-06, "loss": 0.0191, "step": 38510 }, { "epoch": 0.650521836052285, "grad_norm": 0.3679791986942291, "learning_rate": 8.53232800115677e-06, "loss": 0.0093, "step": 38520 }, { "epoch": 0.6506907150336069, "grad_norm": 0.2929641604423523, "learning_rate": 8.531284805102242e-06, "loss": 0.011, "step": 38530 }, { "epoch": 0.6508595940149289, "grad_norm": 0.25449374318122864, "learning_rate": 8.530241302258343e-06, "loss": 0.009, "step": 38540 }, { "epoch": 0.6510284729962509, "grad_norm": 0.2652078866958618, "learning_rate": 8.529197492715728e-06, "loss": 0.0134, "step": 38550 }, { "epoch": 0.6511973519775729, "grad_norm": 0.38325434923171997, "learning_rate": 8.528153376565084e-06, "loss": 0.0109, "step": 38560 }, { "epoch": 0.6513662309588949, "grad_norm": 0.3753387928009033, "learning_rate": 8.527108953897121e-06, "loss": 0.0157, "step": 38570 }, { "epoch": 0.6515351099402168, "grad_norm": 0.15534073114395142, "learning_rate": 8.526064224802572e-06, "loss": 0.0178, "step": 38580 }, { "epoch": 0.6517039889215388, "grad_norm": 0.28162238001823425, "learning_rate": 8.525019189372205e-06, "loss": 0.0099, "step": 38590 }, { "epoch": 0.6518728679028608, "grad_norm": 0.4760454297065735, "learning_rate": 8.523973847696809e-06, "loss": 0.0109, "step": 38600 }, { "epoch": 0.6520417468841828, "grad_norm": 0.37654367089271545, "learning_rate": 8.5229281998672e-06, "loss": 0.0138, "step": 38610 }, { "epoch": 0.6522106258655048, "grad_norm": 0.3962853252887726, "learning_rate": 8.52188224597422e-06, "loss": 0.008, "step": 38620 }, { "epoch": 0.6523795048468267, "grad_norm": 0.254077285528183, "learning_rate": 8.520835986108743e-06, "loss": 0.0089, "step": 38630 }, { "epoch": 0.6525483838281487, "grad_norm": 0.31678852438926697, "learning_rate": 8.519789420361661e-06, "loss": 0.0105, "step": 38640 }, { "epoch": 0.6527172628094707, "grad_norm": 0.22203032672405243, "learning_rate": 8.518742548823899e-06, "loss": 0.0102, "step": 38650 }, { "epoch": 0.6528861417907927, "grad_norm": 0.6165353655815125, "learning_rate": 8.517695371586408e-06, "loss": 0.0136, "step": 38660 }, { "epoch": 0.6530550207721147, "grad_norm": 0.2529297471046448, "learning_rate": 8.516647888740161e-06, "loss": 0.011, "step": 38670 }, { "epoch": 0.6532238997534366, "grad_norm": 0.2952573299407959, "learning_rate": 8.515600100376164e-06, "loss": 0.0103, "step": 38680 }, { "epoch": 0.6533927787347587, "grad_norm": 0.26103663444519043, "learning_rate": 8.514552006585444e-06, "loss": 0.0138, "step": 38690 }, { "epoch": 0.6535616577160807, "grad_norm": 0.4808572828769684, "learning_rate": 8.513503607459057e-06, "loss": 0.0131, "step": 38700 }, { "epoch": 0.6537305366974027, "grad_norm": 0.24693365395069122, "learning_rate": 8.512454903088088e-06, "loss": 0.0113, "step": 38710 }, { "epoch": 0.6538994156787247, "grad_norm": 0.296301931142807, "learning_rate": 8.511405893563642e-06, "loss": 0.0167, "step": 38720 }, { "epoch": 0.6540682946600466, "grad_norm": 0.3051263689994812, "learning_rate": 8.510356578976857e-06, "loss": 0.0166, "step": 38730 }, { "epoch": 0.6542371736413686, "grad_norm": 0.3517211079597473, "learning_rate": 8.509306959418895e-06, "loss": 0.0103, "step": 38740 }, { "epoch": 0.6544060526226906, "grad_norm": 0.06324566900730133, "learning_rate": 8.508257034980941e-06, "loss": 0.0156, "step": 38750 }, { "epoch": 0.6545749316040126, "grad_norm": 0.4400292932987213, "learning_rate": 8.507206805754215e-06, "loss": 0.014, "step": 38760 }, { "epoch": 0.6547438105853346, "grad_norm": 0.15270347893238068, "learning_rate": 8.506156271829954e-06, "loss": 0.0182, "step": 38770 }, { "epoch": 0.6549126895666565, "grad_norm": 0.46321970224380493, "learning_rate": 8.505105433299428e-06, "loss": 0.0145, "step": 38780 }, { "epoch": 0.6550815685479785, "grad_norm": 0.26478657126426697, "learning_rate": 8.504054290253932e-06, "loss": 0.0104, "step": 38790 }, { "epoch": 0.6552504475293005, "grad_norm": 0.5245122313499451, "learning_rate": 8.503002842784782e-06, "loss": 0.0167, "step": 38800 }, { "epoch": 0.6554193265106225, "grad_norm": 0.43785202503204346, "learning_rate": 8.501951090983331e-06, "loss": 0.0071, "step": 38810 }, { "epoch": 0.6555882054919445, "grad_norm": 0.47871965169906616, "learning_rate": 8.500899034940952e-06, "loss": 0.0102, "step": 38820 }, { "epoch": 0.6557570844732664, "grad_norm": 0.2065676748752594, "learning_rate": 8.49984667474904e-06, "loss": 0.0092, "step": 38830 }, { "epoch": 0.6559259634545884, "grad_norm": 0.5593289732933044, "learning_rate": 8.498794010499027e-06, "loss": 0.0179, "step": 38840 }, { "epoch": 0.6560948424359104, "grad_norm": 0.2450961023569107, "learning_rate": 8.497741042282364e-06, "loss": 0.0078, "step": 38850 }, { "epoch": 0.6562637214172324, "grad_norm": 0.39785000681877136, "learning_rate": 8.496687770190529e-06, "loss": 0.0133, "step": 38860 }, { "epoch": 0.6564326003985544, "grad_norm": 0.4620729088783264, "learning_rate": 8.495634194315028e-06, "loss": 0.0128, "step": 38870 }, { "epoch": 0.6566014793798763, "grad_norm": 0.7051069736480713, "learning_rate": 8.494580314747396e-06, "loss": 0.0142, "step": 38880 }, { "epoch": 0.6567703583611983, "grad_norm": 0.3605963885784149, "learning_rate": 8.493526131579188e-06, "loss": 0.0094, "step": 38890 }, { "epoch": 0.6569392373425204, "grad_norm": 0.20349712669849396, "learning_rate": 8.49247164490199e-06, "loss": 0.0199, "step": 38900 }, { "epoch": 0.6571081163238424, "grad_norm": 0.27782541513442993, "learning_rate": 8.491416854807412e-06, "loss": 0.0148, "step": 38910 }, { "epoch": 0.6572769953051644, "grad_norm": 0.31796473264694214, "learning_rate": 8.490361761387095e-06, "loss": 0.0099, "step": 38920 }, { "epoch": 0.6574458742864863, "grad_norm": 1.0125811100006104, "learning_rate": 8.4893063647327e-06, "loss": 0.012, "step": 38930 }, { "epoch": 0.6576147532678083, "grad_norm": 0.28303298354148865, "learning_rate": 8.48825066493592e-06, "loss": 0.016, "step": 38940 }, { "epoch": 0.6577836322491303, "grad_norm": 0.36916378140449524, "learning_rate": 8.487194662088468e-06, "loss": 0.0116, "step": 38950 }, { "epoch": 0.6579525112304523, "grad_norm": 0.19140088558197021, "learning_rate": 8.48613835628209e-06, "loss": 0.0131, "step": 38960 }, { "epoch": 0.6581213902117743, "grad_norm": 0.5260114073753357, "learning_rate": 8.485081747608553e-06, "loss": 0.0115, "step": 38970 }, { "epoch": 0.6582902691930962, "grad_norm": 0.4645565152168274, "learning_rate": 8.484024836159652e-06, "loss": 0.011, "step": 38980 }, { "epoch": 0.6584591481744182, "grad_norm": 0.21117472648620605, "learning_rate": 8.482967622027211e-06, "loss": 0.013, "step": 38990 }, { "epoch": 0.6586280271557402, "grad_norm": 0.14484405517578125, "learning_rate": 8.48191010530308e-06, "loss": 0.0143, "step": 39000 }, { "epoch": 0.6587969061370622, "grad_norm": 0.06575613468885422, "learning_rate": 8.48085228607913e-06, "loss": 0.0114, "step": 39010 }, { "epoch": 0.6589657851183842, "grad_norm": 1.0866694450378418, "learning_rate": 8.47979416444726e-06, "loss": 0.0102, "step": 39020 }, { "epoch": 0.6591346640997061, "grad_norm": 0.1429176777601242, "learning_rate": 8.478735740499402e-06, "loss": 0.0106, "step": 39030 }, { "epoch": 0.6593035430810281, "grad_norm": 0.3620598316192627, "learning_rate": 8.477677014327507e-06, "loss": 0.0148, "step": 39040 }, { "epoch": 0.6594724220623501, "grad_norm": 0.45246627926826477, "learning_rate": 8.476617986023555e-06, "loss": 0.0122, "step": 39050 }, { "epoch": 0.6596413010436721, "grad_norm": 0.24875281751155853, "learning_rate": 8.475558655679552e-06, "loss": 0.0129, "step": 39060 }, { "epoch": 0.6598101800249941, "grad_norm": 0.23008233308792114, "learning_rate": 8.474499023387527e-06, "loss": 0.012, "step": 39070 }, { "epoch": 0.659979059006316, "grad_norm": 0.18534572422504425, "learning_rate": 8.473439089239543e-06, "loss": 0.0139, "step": 39080 }, { "epoch": 0.660147937987638, "grad_norm": 0.1933961659669876, "learning_rate": 8.472378853327681e-06, "loss": 0.0067, "step": 39090 }, { "epoch": 0.66031681696896, "grad_norm": 0.21397151052951813, "learning_rate": 8.471318315744054e-06, "loss": 0.0187, "step": 39100 }, { "epoch": 0.6604856959502821, "grad_norm": 0.2184857875108719, "learning_rate": 8.470257476580797e-06, "loss": 0.0133, "step": 39110 }, { "epoch": 0.6606545749316041, "grad_norm": 0.2728938162326813, "learning_rate": 8.469196335930073e-06, "loss": 0.0078, "step": 39120 }, { "epoch": 0.660823453912926, "grad_norm": 0.1932516098022461, "learning_rate": 8.468134893884072e-06, "loss": 0.0101, "step": 39130 }, { "epoch": 0.660992332894248, "grad_norm": 0.37269169092178345, "learning_rate": 8.467073150535012e-06, "loss": 0.0151, "step": 39140 }, { "epoch": 0.66116121187557, "grad_norm": 0.36518046259880066, "learning_rate": 8.46601110597513e-06, "loss": 0.0109, "step": 39150 }, { "epoch": 0.661330090856892, "grad_norm": 0.1544988751411438, "learning_rate": 8.464948760296698e-06, "loss": 0.0124, "step": 39160 }, { "epoch": 0.661498969838214, "grad_norm": 0.17972524464130402, "learning_rate": 8.463886113592008e-06, "loss": 0.0112, "step": 39170 }, { "epoch": 0.6616678488195359, "grad_norm": 0.42887672781944275, "learning_rate": 8.462823165953378e-06, "loss": 0.017, "step": 39180 }, { "epoch": 0.6618367278008579, "grad_norm": 0.1743101328611374, "learning_rate": 8.461759917473158e-06, "loss": 0.0111, "step": 39190 }, { "epoch": 0.6620056067821799, "grad_norm": 0.22840259969234467, "learning_rate": 8.460696368243719e-06, "loss": 0.0088, "step": 39200 }, { "epoch": 0.6621744857635019, "grad_norm": 0.18783634901046753, "learning_rate": 8.459632518357459e-06, "loss": 0.0184, "step": 39210 }, { "epoch": 0.6623433647448238, "grad_norm": 0.20934629440307617, "learning_rate": 8.458568367906804e-06, "loss": 0.0083, "step": 39220 }, { "epoch": 0.6625122437261458, "grad_norm": 0.3193424940109253, "learning_rate": 8.457503916984204e-06, "loss": 0.01, "step": 39230 }, { "epoch": 0.6626811227074678, "grad_norm": 0.5041815042495728, "learning_rate": 8.456439165682136e-06, "loss": 0.0121, "step": 39240 }, { "epoch": 0.6628500016887898, "grad_norm": 0.5530292987823486, "learning_rate": 8.455374114093103e-06, "loss": 0.0115, "step": 39250 }, { "epoch": 0.6630188806701118, "grad_norm": 0.46410438418388367, "learning_rate": 8.454308762309632e-06, "loss": 0.0107, "step": 39260 }, { "epoch": 0.6631877596514337, "grad_norm": 0.3483157157897949, "learning_rate": 8.45324311042428e-06, "loss": 0.0104, "step": 39270 }, { "epoch": 0.6633566386327557, "grad_norm": 0.23933722078800201, "learning_rate": 8.452177158529632e-06, "loss": 0.0131, "step": 39280 }, { "epoch": 0.6635255176140777, "grad_norm": 0.5357312560081482, "learning_rate": 8.451110906718289e-06, "loss": 0.016, "step": 39290 }, { "epoch": 0.6636943965953997, "grad_norm": 0.2685016989707947, "learning_rate": 8.450044355082887e-06, "loss": 0.0124, "step": 39300 }, { "epoch": 0.6638632755767218, "grad_norm": 0.13302215933799744, "learning_rate": 8.448977503716084e-06, "loss": 0.0101, "step": 39310 }, { "epoch": 0.6640321545580437, "grad_norm": 0.1892687827348709, "learning_rate": 8.447910352710569e-06, "loss": 0.0129, "step": 39320 }, { "epoch": 0.6642010335393657, "grad_norm": 0.6670637130737305, "learning_rate": 8.446842902159051e-06, "loss": 0.021, "step": 39330 }, { "epoch": 0.6643699125206877, "grad_norm": 0.27414077520370483, "learning_rate": 8.445775152154268e-06, "loss": 0.0107, "step": 39340 }, { "epoch": 0.6645387915020097, "grad_norm": 0.4842005670070648, "learning_rate": 8.444707102788983e-06, "loss": 0.0102, "step": 39350 }, { "epoch": 0.6647076704833317, "grad_norm": 0.45098644495010376, "learning_rate": 8.443638754155985e-06, "loss": 0.0154, "step": 39360 }, { "epoch": 0.6648765494646536, "grad_norm": 0.3472047746181488, "learning_rate": 8.44257010634809e-06, "loss": 0.0112, "step": 39370 }, { "epoch": 0.6650454284459756, "grad_norm": 0.4302093982696533, "learning_rate": 8.441501159458142e-06, "loss": 0.0171, "step": 39380 }, { "epoch": 0.6652143074272976, "grad_norm": 0.3246120512485504, "learning_rate": 8.440431913579003e-06, "loss": 0.0105, "step": 39390 }, { "epoch": 0.6653831864086196, "grad_norm": 0.3183690011501312, "learning_rate": 8.43936236880357e-06, "loss": 0.0117, "step": 39400 }, { "epoch": 0.6655520653899416, "grad_norm": 0.4343954622745514, "learning_rate": 8.438292525224764e-06, "loss": 0.0086, "step": 39410 }, { "epoch": 0.6657209443712635, "grad_norm": 0.13204225897789001, "learning_rate": 8.437222382935528e-06, "loss": 0.0149, "step": 39420 }, { "epoch": 0.6658898233525855, "grad_norm": 0.21170683205127716, "learning_rate": 8.436151942028834e-06, "loss": 0.0134, "step": 39430 }, { "epoch": 0.6660587023339075, "grad_norm": 0.3138885498046875, "learning_rate": 8.435081202597678e-06, "loss": 0.0098, "step": 39440 }, { "epoch": 0.6662275813152295, "grad_norm": 0.3321925401687622, "learning_rate": 8.434010164735086e-06, "loss": 0.02, "step": 39450 }, { "epoch": 0.6663964602965515, "grad_norm": 0.5843718647956848, "learning_rate": 8.432938828534104e-06, "loss": 0.0112, "step": 39460 }, { "epoch": 0.6665653392778734, "grad_norm": 0.20243676006793976, "learning_rate": 8.431867194087811e-06, "loss": 0.0077, "step": 39470 }, { "epoch": 0.6667342182591954, "grad_norm": 0.9229133725166321, "learning_rate": 8.430795261489302e-06, "loss": 0.0158, "step": 39480 }, { "epoch": 0.6669030972405174, "grad_norm": 0.22384405136108398, "learning_rate": 8.429723030831709e-06, "loss": 0.017, "step": 39490 }, { "epoch": 0.6670719762218394, "grad_norm": 0.5781461596488953, "learning_rate": 8.428650502208185e-06, "loss": 0.0163, "step": 39500 }, { "epoch": 0.6672408552031615, "grad_norm": 0.31292980909347534, "learning_rate": 8.427577675711905e-06, "loss": 0.0118, "step": 39510 }, { "epoch": 0.6674097341844833, "grad_norm": 0.22406470775604248, "learning_rate": 8.426504551436074e-06, "loss": 0.0154, "step": 39520 }, { "epoch": 0.6675786131658054, "grad_norm": 0.402627557516098, "learning_rate": 8.425431129473926e-06, "loss": 0.0135, "step": 39530 }, { "epoch": 0.6677474921471274, "grad_norm": 0.4167880415916443, "learning_rate": 8.424357409918714e-06, "loss": 0.0137, "step": 39540 }, { "epoch": 0.6679163711284494, "grad_norm": 0.18556590378284454, "learning_rate": 8.423283392863721e-06, "loss": 0.0102, "step": 39550 }, { "epoch": 0.6680852501097714, "grad_norm": 0.21292981505393982, "learning_rate": 8.422209078402257e-06, "loss": 0.0109, "step": 39560 }, { "epoch": 0.6682541290910933, "grad_norm": 0.7037652730941772, "learning_rate": 8.421134466627653e-06, "loss": 0.0191, "step": 39570 }, { "epoch": 0.6684230080724153, "grad_norm": 0.5781016945838928, "learning_rate": 8.420059557633269e-06, "loss": 0.0141, "step": 39580 }, { "epoch": 0.6685918870537373, "grad_norm": 0.4194015860557556, "learning_rate": 8.418984351512491e-06, "loss": 0.0172, "step": 39590 }, { "epoch": 0.6687607660350593, "grad_norm": 0.504652202129364, "learning_rate": 8.417908848358731e-06, "loss": 0.0137, "step": 39600 }, { "epoch": 0.6689296450163813, "grad_norm": 0.34235796332359314, "learning_rate": 8.416833048265425e-06, "loss": 0.0134, "step": 39610 }, { "epoch": 0.6690985239977032, "grad_norm": 0.49511024355888367, "learning_rate": 8.415756951326037e-06, "loss": 0.0087, "step": 39620 }, { "epoch": 0.6692674029790252, "grad_norm": 0.3983369767665863, "learning_rate": 8.414680557634054e-06, "loss": 0.016, "step": 39630 }, { "epoch": 0.6694362819603472, "grad_norm": 0.753752589225769, "learning_rate": 8.413603867282991e-06, "loss": 0.0167, "step": 39640 }, { "epoch": 0.6696051609416692, "grad_norm": 0.4461466073989868, "learning_rate": 8.41252688036639e-06, "loss": 0.0169, "step": 39650 }, { "epoch": 0.6697740399229912, "grad_norm": 0.5036262273788452, "learning_rate": 8.411449596977814e-06, "loss": 0.012, "step": 39660 }, { "epoch": 0.6699429189043131, "grad_norm": 0.27077218890190125, "learning_rate": 8.410372017210859e-06, "loss": 0.0109, "step": 39670 }, { "epoch": 0.6701117978856351, "grad_norm": 0.30919894576072693, "learning_rate": 8.409294141159138e-06, "loss": 0.0145, "step": 39680 }, { "epoch": 0.6702806768669571, "grad_norm": 0.37967634201049805, "learning_rate": 8.408215968916295e-06, "loss": 0.0135, "step": 39690 }, { "epoch": 0.6704495558482791, "grad_norm": 1.5243675708770752, "learning_rate": 8.407137500576003e-06, "loss": 0.0131, "step": 39700 }, { "epoch": 0.6706184348296012, "grad_norm": 0.5391483306884766, "learning_rate": 8.406058736231952e-06, "loss": 0.0101, "step": 39710 }, { "epoch": 0.670787313810923, "grad_norm": 0.3526119887828827, "learning_rate": 8.404979675977863e-06, "loss": 0.0139, "step": 39720 }, { "epoch": 0.670956192792245, "grad_norm": 0.2666623592376709, "learning_rate": 8.403900319907484e-06, "loss": 0.0108, "step": 39730 }, { "epoch": 0.6711250717735671, "grad_norm": 0.40423211455345154, "learning_rate": 8.402820668114588e-06, "loss": 0.0094, "step": 39740 }, { "epoch": 0.6712939507548891, "grad_norm": 0.37170472741127014, "learning_rate": 8.401740720692968e-06, "loss": 0.016, "step": 39750 }, { "epoch": 0.6714628297362111, "grad_norm": 0.446287602186203, "learning_rate": 8.400660477736452e-06, "loss": 0.0145, "step": 39760 }, { "epoch": 0.671631708717533, "grad_norm": 0.1870456337928772, "learning_rate": 8.399579939338884e-06, "loss": 0.0112, "step": 39770 }, { "epoch": 0.671800587698855, "grad_norm": 0.18409638106822968, "learning_rate": 8.398499105594143e-06, "loss": 0.0112, "step": 39780 }, { "epoch": 0.671969466680177, "grad_norm": 0.23124592006206512, "learning_rate": 8.397417976596125e-06, "loss": 0.0101, "step": 39790 }, { "epoch": 0.672138345661499, "grad_norm": 0.4410819411277771, "learning_rate": 8.39633655243876e-06, "loss": 0.0136, "step": 39800 }, { "epoch": 0.672307224642821, "grad_norm": 0.5888890027999878, "learning_rate": 8.395254833215998e-06, "loss": 0.0118, "step": 39810 }, { "epoch": 0.6724761036241429, "grad_norm": 0.332856684923172, "learning_rate": 8.394172819021815e-06, "loss": 0.0155, "step": 39820 }, { "epoch": 0.6726449826054649, "grad_norm": 0.35429564118385315, "learning_rate": 8.393090509950215e-06, "loss": 0.0082, "step": 39830 }, { "epoch": 0.6728138615867869, "grad_norm": 0.6555919051170349, "learning_rate": 8.392007906095226e-06, "loss": 0.0108, "step": 39840 }, { "epoch": 0.6729827405681089, "grad_norm": 0.21912232041358948, "learning_rate": 8.3909250075509e-06, "loss": 0.0148, "step": 39850 }, { "epoch": 0.6731516195494309, "grad_norm": 0.23575979471206665, "learning_rate": 8.389841814411321e-06, "loss": 0.0135, "step": 39860 }, { "epoch": 0.6733204985307528, "grad_norm": 0.4170130789279938, "learning_rate": 8.38875832677059e-06, "loss": 0.012, "step": 39870 }, { "epoch": 0.6734893775120748, "grad_norm": 0.27534595131874084, "learning_rate": 8.38767454472284e-06, "loss": 0.0131, "step": 39880 }, { "epoch": 0.6736582564933968, "grad_norm": 0.3929915130138397, "learning_rate": 8.386590468362226e-06, "loss": 0.012, "step": 39890 }, { "epoch": 0.6738271354747188, "grad_norm": 0.36869174242019653, "learning_rate": 8.38550609778293e-06, "loss": 0.0132, "step": 39900 }, { "epoch": 0.6739960144560408, "grad_norm": 0.35409650206565857, "learning_rate": 8.38442143307916e-06, "loss": 0.0096, "step": 39910 }, { "epoch": 0.6741648934373627, "grad_norm": 0.27569442987442017, "learning_rate": 8.383336474345149e-06, "loss": 0.0127, "step": 39920 }, { "epoch": 0.6743337724186848, "grad_norm": 0.39278045296669006, "learning_rate": 8.382251221675157e-06, "loss": 0.0159, "step": 39930 }, { "epoch": 0.6745026514000068, "grad_norm": 0.26019129157066345, "learning_rate": 8.381165675163467e-06, "loss": 0.012, "step": 39940 }, { "epoch": 0.6746715303813288, "grad_norm": 0.47749000787734985, "learning_rate": 8.380079834904386e-06, "loss": 0.0148, "step": 39950 }, { "epoch": 0.6748404093626508, "grad_norm": 0.30610302090644836, "learning_rate": 8.378993700992253e-06, "loss": 0.0123, "step": 39960 }, { "epoch": 0.6750092883439727, "grad_norm": 0.4464901089668274, "learning_rate": 8.377907273521427e-06, "loss": 0.0135, "step": 39970 }, { "epoch": 0.6751781673252947, "grad_norm": 0.1559576690196991, "learning_rate": 8.376820552586292e-06, "loss": 0.017, "step": 39980 }, { "epoch": 0.6753470463066167, "grad_norm": 0.2691028416156769, "learning_rate": 8.375733538281266e-06, "loss": 0.0105, "step": 39990 }, { "epoch": 0.6755159252879387, "grad_norm": 0.2629511058330536, "learning_rate": 8.374646230700779e-06, "loss": 0.0099, "step": 40000 }, { "epoch": 0.6756848042692607, "grad_norm": 0.24541381001472473, "learning_rate": 8.373558629939296e-06, "loss": 0.0124, "step": 40010 }, { "epoch": 0.6758536832505826, "grad_norm": 0.36522239446640015, "learning_rate": 8.372470736091307e-06, "loss": 0.0149, "step": 40020 }, { "epoch": 0.6760225622319046, "grad_norm": 0.2273145616054535, "learning_rate": 8.371382549251325e-06, "loss": 0.0136, "step": 40030 }, { "epoch": 0.6761914412132266, "grad_norm": 0.3691844642162323, "learning_rate": 8.370294069513889e-06, "loss": 0.0144, "step": 40040 }, { "epoch": 0.6763603201945486, "grad_norm": 0.28501424193382263, "learning_rate": 8.36920529697356e-06, "loss": 0.0086, "step": 40050 }, { "epoch": 0.6765291991758706, "grad_norm": 0.30749037861824036, "learning_rate": 8.368116231724932e-06, "loss": 0.0126, "step": 40060 }, { "epoch": 0.6766980781571925, "grad_norm": 0.27632206678390503, "learning_rate": 8.367026873862618e-06, "loss": 0.0101, "step": 40070 }, { "epoch": 0.6768669571385145, "grad_norm": 0.3383098840713501, "learning_rate": 8.365937223481261e-06, "loss": 0.0087, "step": 40080 }, { "epoch": 0.6770358361198365, "grad_norm": 0.3614688515663147, "learning_rate": 8.364847280675526e-06, "loss": 0.0133, "step": 40090 }, { "epoch": 0.6772047151011585, "grad_norm": 0.19998450577259064, "learning_rate": 8.363757045540103e-06, "loss": 0.0137, "step": 40100 }, { "epoch": 0.6773735940824805, "grad_norm": 0.45603862404823303, "learning_rate": 8.362666518169711e-06, "loss": 0.0107, "step": 40110 }, { "epoch": 0.6775424730638024, "grad_norm": 0.33199068903923035, "learning_rate": 8.36157569865909e-06, "loss": 0.013, "step": 40120 }, { "epoch": 0.6777113520451244, "grad_norm": 0.5434440970420837, "learning_rate": 8.360484587103014e-06, "loss": 0.0192, "step": 40130 }, { "epoch": 0.6778802310264465, "grad_norm": 0.3008168041706085, "learning_rate": 8.359393183596267e-06, "loss": 0.0113, "step": 40140 }, { "epoch": 0.6780491100077685, "grad_norm": 0.4576249122619629, "learning_rate": 8.358301488233674e-06, "loss": 0.0122, "step": 40150 }, { "epoch": 0.6782179889890905, "grad_norm": 0.4599747359752655, "learning_rate": 8.357209501110075e-06, "loss": 0.0106, "step": 40160 }, { "epoch": 0.6783868679704124, "grad_norm": 0.18459255993366241, "learning_rate": 8.356117222320344e-06, "loss": 0.0137, "step": 40170 }, { "epoch": 0.6785557469517344, "grad_norm": 0.29364830255508423, "learning_rate": 8.35502465195937e-06, "loss": 0.0108, "step": 40180 }, { "epoch": 0.6787246259330564, "grad_norm": 0.262579083442688, "learning_rate": 8.353931790122077e-06, "loss": 0.0085, "step": 40190 }, { "epoch": 0.6788935049143784, "grad_norm": 0.20680271089076996, "learning_rate": 8.352838636903407e-06, "loss": 0.0089, "step": 40200 }, { "epoch": 0.6790623838957004, "grad_norm": 0.1917966902256012, "learning_rate": 8.351745192398333e-06, "loss": 0.0127, "step": 40210 }, { "epoch": 0.6792312628770223, "grad_norm": 0.21906068921089172, "learning_rate": 8.35065145670185e-06, "loss": 0.0113, "step": 40220 }, { "epoch": 0.6794001418583443, "grad_norm": 0.6496953368186951, "learning_rate": 8.349557429908977e-06, "loss": 0.0099, "step": 40230 }, { "epoch": 0.6795690208396663, "grad_norm": 0.5130422115325928, "learning_rate": 8.348463112114763e-06, "loss": 0.014, "step": 40240 }, { "epoch": 0.6797378998209883, "grad_norm": 0.43357259035110474, "learning_rate": 8.34736850341428e-06, "loss": 0.0119, "step": 40250 }, { "epoch": 0.6799067788023103, "grad_norm": 0.3624105155467987, "learning_rate": 8.346273603902622e-06, "loss": 0.014, "step": 40260 }, { "epoch": 0.6800756577836322, "grad_norm": 0.2143857330083847, "learning_rate": 8.345178413674916e-06, "loss": 0.0123, "step": 40270 }, { "epoch": 0.6802445367649542, "grad_norm": 0.5371170043945312, "learning_rate": 8.344082932826303e-06, "loss": 0.0162, "step": 40280 }, { "epoch": 0.6804134157462762, "grad_norm": 0.20867757499217987, "learning_rate": 8.342987161451958e-06, "loss": 0.0126, "step": 40290 }, { "epoch": 0.6805822947275982, "grad_norm": 0.4553457200527191, "learning_rate": 8.341891099647083e-06, "loss": 0.0135, "step": 40300 }, { "epoch": 0.6807511737089202, "grad_norm": 0.3190728724002838, "learning_rate": 8.340794747506898e-06, "loss": 0.0156, "step": 40310 }, { "epoch": 0.6809200526902421, "grad_norm": 0.22667360305786133, "learning_rate": 8.33969810512665e-06, "loss": 0.0151, "step": 40320 }, { "epoch": 0.6810889316715641, "grad_norm": 0.3281976878643036, "learning_rate": 8.338601172601616e-06, "loss": 0.0133, "step": 40330 }, { "epoch": 0.6812578106528862, "grad_norm": 0.15649119019508362, "learning_rate": 8.337503950027091e-06, "loss": 0.016, "step": 40340 }, { "epoch": 0.6814266896342082, "grad_norm": 0.24130329489707947, "learning_rate": 8.336406437498403e-06, "loss": 0.0145, "step": 40350 }, { "epoch": 0.6815955686155301, "grad_norm": 0.3593396842479706, "learning_rate": 8.335308635110898e-06, "loss": 0.0141, "step": 40360 }, { "epoch": 0.6817644475968521, "grad_norm": 0.23807735741138458, "learning_rate": 8.33421054295995e-06, "loss": 0.0099, "step": 40370 }, { "epoch": 0.6819333265781741, "grad_norm": 0.3708740174770355, "learning_rate": 8.333112161140963e-06, "loss": 0.0108, "step": 40380 }, { "epoch": 0.6821022055594961, "grad_norm": 0.18727350234985352, "learning_rate": 8.332013489749357e-06, "loss": 0.0106, "step": 40390 }, { "epoch": 0.6822710845408181, "grad_norm": 0.2441655695438385, "learning_rate": 8.330914528880584e-06, "loss": 0.0121, "step": 40400 }, { "epoch": 0.68243996352214, "grad_norm": 0.33972615003585815, "learning_rate": 8.32981527863012e-06, "loss": 0.011, "step": 40410 }, { "epoch": 0.682608842503462, "grad_norm": 0.2547035813331604, "learning_rate": 8.328715739093462e-06, "loss": 0.0121, "step": 40420 }, { "epoch": 0.682777721484784, "grad_norm": 0.5147927403450012, "learning_rate": 8.327615910366138e-06, "loss": 0.0142, "step": 40430 }, { "epoch": 0.682946600466106, "grad_norm": 0.5657573938369751, "learning_rate": 8.326515792543697e-06, "loss": 0.0183, "step": 40440 }, { "epoch": 0.683115479447428, "grad_norm": 0.7611470222473145, "learning_rate": 8.325415385721715e-06, "loss": 0.0192, "step": 40450 }, { "epoch": 0.6832843584287499, "grad_norm": 0.43450433015823364, "learning_rate": 8.324314689995794e-06, "loss": 0.0117, "step": 40460 }, { "epoch": 0.6834532374100719, "grad_norm": 0.6663591861724854, "learning_rate": 8.323213705461559e-06, "loss": 0.0105, "step": 40470 }, { "epoch": 0.6836221163913939, "grad_norm": 0.08053232729434967, "learning_rate": 8.322112432214659e-06, "loss": 0.0145, "step": 40480 }, { "epoch": 0.6837909953727159, "grad_norm": 0.278033971786499, "learning_rate": 8.321010870350773e-06, "loss": 0.0133, "step": 40490 }, { "epoch": 0.6839598743540379, "grad_norm": 0.260314404964447, "learning_rate": 8.319909019965598e-06, "loss": 0.0148, "step": 40500 }, { "epoch": 0.6841287533353598, "grad_norm": 0.23567607998847961, "learning_rate": 8.318806881154864e-06, "loss": 0.0101, "step": 40510 }, { "epoch": 0.6842976323166818, "grad_norm": 0.30972281098365784, "learning_rate": 8.31770445401432e-06, "loss": 0.015, "step": 40520 }, { "epoch": 0.6844665112980038, "grad_norm": 0.24491360783576965, "learning_rate": 8.316601738639742e-06, "loss": 0.0104, "step": 40530 }, { "epoch": 0.6846353902793259, "grad_norm": 0.6555915474891663, "learning_rate": 8.315498735126932e-06, "loss": 0.0166, "step": 40540 }, { "epoch": 0.6848042692606479, "grad_norm": 0.5342641472816467, "learning_rate": 8.314395443571716e-06, "loss": 0.0122, "step": 40550 }, { "epoch": 0.6849731482419698, "grad_norm": 0.39063313603401184, "learning_rate": 8.313291864069947e-06, "loss": 0.0157, "step": 40560 }, { "epoch": 0.6851420272232918, "grad_norm": 0.20303210616111755, "learning_rate": 8.312187996717499e-06, "loss": 0.0084, "step": 40570 }, { "epoch": 0.6853109062046138, "grad_norm": 0.2854264974594116, "learning_rate": 8.311083841610272e-06, "loss": 0.0128, "step": 40580 }, { "epoch": 0.6854797851859358, "grad_norm": 0.19092638790607452, "learning_rate": 8.309979398844197e-06, "loss": 0.0153, "step": 40590 }, { "epoch": 0.6856486641672578, "grad_norm": 0.8534296751022339, "learning_rate": 8.30887466851522e-06, "loss": 0.0235, "step": 40600 }, { "epoch": 0.6858175431485797, "grad_norm": 0.44900572299957275, "learning_rate": 8.307769650719322e-06, "loss": 0.0128, "step": 40610 }, { "epoch": 0.6859864221299017, "grad_norm": 0.14883385598659515, "learning_rate": 8.306664345552501e-06, "loss": 0.0158, "step": 40620 }, { "epoch": 0.6861553011112237, "grad_norm": 0.2662615478038788, "learning_rate": 8.305558753110782e-06, "loss": 0.0111, "step": 40630 }, { "epoch": 0.6863241800925457, "grad_norm": 0.27925819158554077, "learning_rate": 8.304452873490223e-06, "loss": 0.0108, "step": 40640 }, { "epoch": 0.6864930590738677, "grad_norm": 0.41528403759002686, "learning_rate": 8.303346706786891e-06, "loss": 0.0136, "step": 40650 }, { "epoch": 0.6866619380551896, "grad_norm": 0.33822643756866455, "learning_rate": 8.302240253096896e-06, "loss": 0.0144, "step": 40660 }, { "epoch": 0.6868308170365116, "grad_norm": 0.33300212025642395, "learning_rate": 8.301133512516358e-06, "loss": 0.0171, "step": 40670 }, { "epoch": 0.6869996960178336, "grad_norm": 0.21889641880989075, "learning_rate": 8.300026485141429e-06, "loss": 0.0144, "step": 40680 }, { "epoch": 0.6871685749991556, "grad_norm": 0.3641320765018463, "learning_rate": 8.298919171068283e-06, "loss": 0.0119, "step": 40690 }, { "epoch": 0.6873374539804776, "grad_norm": 0.387766569852829, "learning_rate": 8.297811570393126e-06, "loss": 0.0116, "step": 40700 }, { "epoch": 0.6875063329617995, "grad_norm": 0.5651323795318604, "learning_rate": 8.296703683212179e-06, "loss": 0.0131, "step": 40710 }, { "epoch": 0.6876752119431215, "grad_norm": 0.4242200255393982, "learning_rate": 8.295595509621695e-06, "loss": 0.0183, "step": 40720 }, { "epoch": 0.6878440909244435, "grad_norm": 0.5297519564628601, "learning_rate": 8.294487049717947e-06, "loss": 0.0218, "step": 40730 }, { "epoch": 0.6880129699057655, "grad_norm": 0.18419508635997772, "learning_rate": 8.29337830359724e-06, "loss": 0.0105, "step": 40740 }, { "epoch": 0.6881818488870876, "grad_norm": 0.35069385170936584, "learning_rate": 8.292269271355892e-06, "loss": 0.0108, "step": 40750 }, { "epoch": 0.6883507278684095, "grad_norm": 0.6460521817207336, "learning_rate": 8.29115995309026e-06, "loss": 0.0156, "step": 40760 }, { "epoch": 0.6885196068497315, "grad_norm": 0.25035029649734497, "learning_rate": 8.290050348896717e-06, "loss": 0.0065, "step": 40770 }, { "epoch": 0.6886884858310535, "grad_norm": 0.3722369074821472, "learning_rate": 8.28894045887166e-06, "loss": 0.0106, "step": 40780 }, { "epoch": 0.6888573648123755, "grad_norm": 0.7604500651359558, "learning_rate": 8.287830283111514e-06, "loss": 0.0134, "step": 40790 }, { "epoch": 0.6890262437936975, "grad_norm": 0.34528860449790955, "learning_rate": 8.28671982171273e-06, "loss": 0.0142, "step": 40800 }, { "epoch": 0.6891951227750194, "grad_norm": 0.14592565596103668, "learning_rate": 8.285609074771781e-06, "loss": 0.0153, "step": 40810 }, { "epoch": 0.6893640017563414, "grad_norm": 0.18766401708126068, "learning_rate": 8.284498042385167e-06, "loss": 0.014, "step": 40820 }, { "epoch": 0.6895328807376634, "grad_norm": 0.4808811843395233, "learning_rate": 8.283386724649412e-06, "loss": 0.0151, "step": 40830 }, { "epoch": 0.6897017597189854, "grad_norm": 0.5595186948776245, "learning_rate": 8.282275121661064e-06, "loss": 0.0175, "step": 40840 }, { "epoch": 0.6898706387003074, "grad_norm": 0.37195688486099243, "learning_rate": 8.281163233516696e-06, "loss": 0.0133, "step": 40850 }, { "epoch": 0.6900395176816293, "grad_norm": 0.34146183729171753, "learning_rate": 8.280051060312905e-06, "loss": 0.0138, "step": 40860 }, { "epoch": 0.6902083966629513, "grad_norm": 0.2693497836589813, "learning_rate": 8.278938602146315e-06, "loss": 0.012, "step": 40870 }, { "epoch": 0.6903772756442733, "grad_norm": 0.29639309644699097, "learning_rate": 8.277825859113577e-06, "loss": 0.0119, "step": 40880 }, { "epoch": 0.6905461546255953, "grad_norm": 0.3384367823600769, "learning_rate": 8.276712831311358e-06, "loss": 0.0104, "step": 40890 }, { "epoch": 0.6907150336069173, "grad_norm": 0.26972222328186035, "learning_rate": 8.275599518836357e-06, "loss": 0.0131, "step": 40900 }, { "epoch": 0.6908839125882392, "grad_norm": 0.35366690158843994, "learning_rate": 8.274485921785298e-06, "loss": 0.0146, "step": 40910 }, { "epoch": 0.6910527915695612, "grad_norm": 0.17956729233264923, "learning_rate": 8.273372040254925e-06, "loss": 0.0168, "step": 40920 }, { "epoch": 0.6912216705508832, "grad_norm": 0.22357797622680664, "learning_rate": 8.272257874342009e-06, "loss": 0.0135, "step": 40930 }, { "epoch": 0.6913905495322052, "grad_norm": 0.2913973331451416, "learning_rate": 8.271143424143348e-06, "loss": 0.0121, "step": 40940 }, { "epoch": 0.6915594285135273, "grad_norm": 0.5312107801437378, "learning_rate": 8.270028689755763e-06, "loss": 0.0119, "step": 40950 }, { "epoch": 0.6917283074948491, "grad_norm": 0.3923189640045166, "learning_rate": 8.268913671276098e-06, "loss": 0.012, "step": 40960 }, { "epoch": 0.6918971864761712, "grad_norm": 0.2990339696407318, "learning_rate": 8.26779836880122e-06, "loss": 0.012, "step": 40970 }, { "epoch": 0.6920660654574932, "grad_norm": 0.3210398852825165, "learning_rate": 8.26668278242803e-06, "loss": 0.0102, "step": 40980 }, { "epoch": 0.6922349444388152, "grad_norm": 0.16870665550231934, "learning_rate": 8.265566912253444e-06, "loss": 0.0119, "step": 40990 }, { "epoch": 0.6924038234201372, "grad_norm": 0.538878858089447, "learning_rate": 8.264450758374407e-06, "loss": 0.0126, "step": 41000 }, { "epoch": 0.6925727024014591, "grad_norm": 0.24902203679084778, "learning_rate": 8.263334320887887e-06, "loss": 0.0097, "step": 41010 }, { "epoch": 0.6927415813827811, "grad_norm": 0.4097386300563812, "learning_rate": 8.262217599890876e-06, "loss": 0.0151, "step": 41020 }, { "epoch": 0.6929104603641031, "grad_norm": 0.4117305278778076, "learning_rate": 8.261100595480396e-06, "loss": 0.0141, "step": 41030 }, { "epoch": 0.6930793393454251, "grad_norm": 0.28312617540359497, "learning_rate": 8.259983307753486e-06, "loss": 0.0114, "step": 41040 }, { "epoch": 0.6932482183267471, "grad_norm": 0.21548573672771454, "learning_rate": 8.258865736807216e-06, "loss": 0.0074, "step": 41050 }, { "epoch": 0.693417097308069, "grad_norm": 0.2950454652309418, "learning_rate": 8.257747882738673e-06, "loss": 0.0102, "step": 41060 }, { "epoch": 0.693585976289391, "grad_norm": 0.20469088852405548, "learning_rate": 8.25662974564498e-06, "loss": 0.0094, "step": 41070 }, { "epoch": 0.693754855270713, "grad_norm": 0.13972753286361694, "learning_rate": 8.255511325623273e-06, "loss": 0.0094, "step": 41080 }, { "epoch": 0.693923734252035, "grad_norm": 0.38794174790382385, "learning_rate": 8.25439262277072e-06, "loss": 0.01, "step": 41090 }, { "epoch": 0.694092613233357, "grad_norm": 0.2593320906162262, "learning_rate": 8.25327363718451e-06, "loss": 0.0122, "step": 41100 }, { "epoch": 0.6942614922146789, "grad_norm": 0.4119921624660492, "learning_rate": 8.252154368961856e-06, "loss": 0.0109, "step": 41110 }, { "epoch": 0.6944303711960009, "grad_norm": 0.1880456507205963, "learning_rate": 8.2510348182e-06, "loss": 0.0096, "step": 41120 }, { "epoch": 0.6945992501773229, "grad_norm": 0.23672430217266083, "learning_rate": 8.249914984996209e-06, "loss": 0.0104, "step": 41130 }, { "epoch": 0.6947681291586449, "grad_norm": 0.28491273522377014, "learning_rate": 8.248794869447763e-06, "loss": 0.0064, "step": 41140 }, { "epoch": 0.694937008139967, "grad_norm": 0.5389191508293152, "learning_rate": 8.247674471651983e-06, "loss": 0.0164, "step": 41150 }, { "epoch": 0.6951058871212888, "grad_norm": 0.3892063498497009, "learning_rate": 8.246553791706202e-06, "loss": 0.0129, "step": 41160 }, { "epoch": 0.6952747661026109, "grad_norm": 0.2765096426010132, "learning_rate": 8.245432829707783e-06, "loss": 0.0139, "step": 41170 }, { "epoch": 0.6954436450839329, "grad_norm": 0.30072274804115295, "learning_rate": 8.24431158575411e-06, "loss": 0.0144, "step": 41180 }, { "epoch": 0.6956125240652549, "grad_norm": 0.17690441012382507, "learning_rate": 8.2431900599426e-06, "loss": 0.0121, "step": 41190 }, { "epoch": 0.6957814030465769, "grad_norm": 0.2539726793766022, "learning_rate": 8.242068252370681e-06, "loss": 0.0097, "step": 41200 }, { "epoch": 0.6959502820278988, "grad_norm": 0.36967727541923523, "learning_rate": 8.240946163135818e-06, "loss": 0.0158, "step": 41210 }, { "epoch": 0.6961191610092208, "grad_norm": 0.3249482810497284, "learning_rate": 8.239823792335495e-06, "loss": 0.0101, "step": 41220 }, { "epoch": 0.6962880399905428, "grad_norm": 0.2512367069721222, "learning_rate": 8.238701140067217e-06, "loss": 0.0135, "step": 41230 }, { "epoch": 0.6964569189718648, "grad_norm": 0.2705637514591217, "learning_rate": 8.237578206428523e-06, "loss": 0.0085, "step": 41240 }, { "epoch": 0.6966257979531868, "grad_norm": 0.2655506432056427, "learning_rate": 8.236454991516967e-06, "loss": 0.01, "step": 41250 }, { "epoch": 0.6967946769345087, "grad_norm": 0.3973148465156555, "learning_rate": 8.23533149543013e-06, "loss": 0.0178, "step": 41260 }, { "epoch": 0.6969635559158307, "grad_norm": 0.21638266742229462, "learning_rate": 8.234207718265622e-06, "loss": 0.0128, "step": 41270 }, { "epoch": 0.6971324348971527, "grad_norm": 0.20284807682037354, "learning_rate": 8.233083660121073e-06, "loss": 0.0089, "step": 41280 }, { "epoch": 0.6973013138784747, "grad_norm": 0.38769328594207764, "learning_rate": 8.231959321094137e-06, "loss": 0.0124, "step": 41290 }, { "epoch": 0.6974701928597967, "grad_norm": 0.2635006606578827, "learning_rate": 8.230834701282493e-06, "loss": 0.0103, "step": 41300 }, { "epoch": 0.6976390718411186, "grad_norm": 0.8119834661483765, "learning_rate": 8.229709800783849e-06, "loss": 0.0131, "step": 41310 }, { "epoch": 0.6978079508224406, "grad_norm": 0.4303053915500641, "learning_rate": 8.228584619695932e-06, "loss": 0.0163, "step": 41320 }, { "epoch": 0.6979768298037626, "grad_norm": 0.23575688898563385, "learning_rate": 8.227459158116492e-06, "loss": 0.0142, "step": 41330 }, { "epoch": 0.6981457087850846, "grad_norm": 0.2220524698495865, "learning_rate": 8.226333416143312e-06, "loss": 0.0157, "step": 41340 }, { "epoch": 0.6983145877664066, "grad_norm": 0.24744877219200134, "learning_rate": 8.225207393874188e-06, "loss": 0.0137, "step": 41350 }, { "epoch": 0.6984834667477285, "grad_norm": 0.5948857665061951, "learning_rate": 8.22408109140695e-06, "loss": 0.0149, "step": 41360 }, { "epoch": 0.6986523457290506, "grad_norm": 0.3244536221027374, "learning_rate": 8.222954508839447e-06, "loss": 0.0095, "step": 41370 }, { "epoch": 0.6988212247103726, "grad_norm": 0.24759702384471893, "learning_rate": 8.221827646269554e-06, "loss": 0.0111, "step": 41380 }, { "epoch": 0.6989901036916946, "grad_norm": 0.7828853130340576, "learning_rate": 8.220700503795169e-06, "loss": 0.0139, "step": 41390 }, { "epoch": 0.6991589826730166, "grad_norm": 0.5097396373748779, "learning_rate": 8.219573081514215e-06, "loss": 0.0097, "step": 41400 }, { "epoch": 0.6993278616543385, "grad_norm": 0.47684869170188904, "learning_rate": 8.218445379524642e-06, "loss": 0.0144, "step": 41410 }, { "epoch": 0.6994967406356605, "grad_norm": 0.6441669464111328, "learning_rate": 8.21731739792442e-06, "loss": 0.0145, "step": 41420 }, { "epoch": 0.6996656196169825, "grad_norm": 0.27110517024993896, "learning_rate": 8.216189136811548e-06, "loss": 0.0091, "step": 41430 }, { "epoch": 0.6998344985983045, "grad_norm": 0.26825693249702454, "learning_rate": 8.215060596284043e-06, "loss": 0.0148, "step": 41440 }, { "epoch": 0.7000033775796264, "grad_norm": 0.40835100412368774, "learning_rate": 8.213931776439953e-06, "loss": 0.0158, "step": 41450 }, { "epoch": 0.7001722565609484, "grad_norm": 0.6533754467964172, "learning_rate": 8.212802677377345e-06, "loss": 0.0157, "step": 41460 }, { "epoch": 0.7003411355422704, "grad_norm": 0.4708280861377716, "learning_rate": 8.211673299194311e-06, "loss": 0.0144, "step": 41470 }, { "epoch": 0.7005100145235924, "grad_norm": 0.12061113864183426, "learning_rate": 8.210543641988973e-06, "loss": 0.0108, "step": 41480 }, { "epoch": 0.7006788935049144, "grad_norm": 0.2897886037826538, "learning_rate": 8.20941370585947e-06, "loss": 0.0078, "step": 41490 }, { "epoch": 0.7008477724862363, "grad_norm": 0.373481810092926, "learning_rate": 8.208283490903968e-06, "loss": 0.0101, "step": 41500 }, { "epoch": 0.7010166514675583, "grad_norm": 0.25497928261756897, "learning_rate": 8.207152997220658e-06, "loss": 0.0173, "step": 41510 }, { "epoch": 0.7011855304488803, "grad_norm": 0.4952937960624695, "learning_rate": 8.206022224907755e-06, "loss": 0.0125, "step": 41520 }, { "epoch": 0.7013544094302023, "grad_norm": 0.20987926423549652, "learning_rate": 8.204891174063496e-06, "loss": 0.0166, "step": 41530 }, { "epoch": 0.7015232884115243, "grad_norm": 0.24783635139465332, "learning_rate": 8.203759844786145e-06, "loss": 0.013, "step": 41540 }, { "epoch": 0.7016921673928462, "grad_norm": 0.35071736574172974, "learning_rate": 8.202628237173989e-06, "loss": 0.0119, "step": 41550 }, { "epoch": 0.7018610463741682, "grad_norm": 0.18448083102703094, "learning_rate": 8.20149635132534e-06, "loss": 0.012, "step": 41560 }, { "epoch": 0.7020299253554902, "grad_norm": 0.36172017455101013, "learning_rate": 8.200364187338531e-06, "loss": 0.0181, "step": 41570 }, { "epoch": 0.7021988043368123, "grad_norm": 0.20051077008247375, "learning_rate": 8.199231745311926e-06, "loss": 0.0128, "step": 41580 }, { "epoch": 0.7023676833181343, "grad_norm": 0.20635364949703217, "learning_rate": 8.198099025343906e-06, "loss": 0.0175, "step": 41590 }, { "epoch": 0.7025365622994562, "grad_norm": 0.31007111072540283, "learning_rate": 8.19696602753288e-06, "loss": 0.0155, "step": 41600 }, { "epoch": 0.7027054412807782, "grad_norm": 0.414027601480484, "learning_rate": 8.195832751977278e-06, "loss": 0.0108, "step": 41610 }, { "epoch": 0.7028743202621002, "grad_norm": 0.49032846093177795, "learning_rate": 8.194699198775556e-06, "loss": 0.0118, "step": 41620 }, { "epoch": 0.7030431992434222, "grad_norm": 0.3073863685131073, "learning_rate": 8.193565368026197e-06, "loss": 0.0101, "step": 41630 }, { "epoch": 0.7032120782247442, "grad_norm": 0.4730361998081207, "learning_rate": 8.192431259827705e-06, "loss": 0.0197, "step": 41640 }, { "epoch": 0.7033809572060661, "grad_norm": 0.3031933605670929, "learning_rate": 8.191296874278607e-06, "loss": 0.0108, "step": 41650 }, { "epoch": 0.7035498361873881, "grad_norm": 0.5371814966201782, "learning_rate": 8.190162211477457e-06, "loss": 0.0215, "step": 41660 }, { "epoch": 0.7037187151687101, "grad_norm": 0.2891017496585846, "learning_rate": 8.189027271522831e-06, "loss": 0.0081, "step": 41670 }, { "epoch": 0.7038875941500321, "grad_norm": 0.3123476207256317, "learning_rate": 8.18789205451333e-06, "loss": 0.0105, "step": 41680 }, { "epoch": 0.7040564731313541, "grad_norm": 0.3840523362159729, "learning_rate": 8.186756560547579e-06, "loss": 0.0126, "step": 41690 }, { "epoch": 0.704225352112676, "grad_norm": 0.22710064053535461, "learning_rate": 8.185620789724228e-06, "loss": 0.0159, "step": 41700 }, { "epoch": 0.704394231093998, "grad_norm": 0.23566067218780518, "learning_rate": 8.184484742141944e-06, "loss": 0.0112, "step": 41710 }, { "epoch": 0.70456311007532, "grad_norm": 0.24733005464076996, "learning_rate": 8.183348417899434e-06, "loss": 0.0144, "step": 41720 }, { "epoch": 0.704731989056642, "grad_norm": 0.5306110978126526, "learning_rate": 8.18221181709541e-06, "loss": 0.0115, "step": 41730 }, { "epoch": 0.704900868037964, "grad_norm": 0.28903526067733765, "learning_rate": 8.181074939828625e-06, "loss": 0.0103, "step": 41740 }, { "epoch": 0.7050697470192859, "grad_norm": 0.2446138709783554, "learning_rate": 8.17993778619784e-06, "loss": 0.0139, "step": 41750 }, { "epoch": 0.7052386260006079, "grad_norm": 0.25841882824897766, "learning_rate": 8.178800356301855e-06, "loss": 0.0183, "step": 41760 }, { "epoch": 0.70540750498193, "grad_norm": 0.23241619765758514, "learning_rate": 8.177662650239484e-06, "loss": 0.0103, "step": 41770 }, { "epoch": 0.705576383963252, "grad_norm": 0.4556638300418854, "learning_rate": 8.176524668109568e-06, "loss": 0.0122, "step": 41780 }, { "epoch": 0.705745262944574, "grad_norm": 0.18292222917079926, "learning_rate": 8.175386410010972e-06, "loss": 0.0115, "step": 41790 }, { "epoch": 0.7059141419258959, "grad_norm": 0.16010068356990814, "learning_rate": 8.174247876042586e-06, "loss": 0.0105, "step": 41800 }, { "epoch": 0.7060830209072179, "grad_norm": 0.40067198872566223, "learning_rate": 8.173109066303323e-06, "loss": 0.0107, "step": 41810 }, { "epoch": 0.7062518998885399, "grad_norm": 0.4344941973686218, "learning_rate": 8.171969980892118e-06, "loss": 0.0094, "step": 41820 }, { "epoch": 0.7064207788698619, "grad_norm": 0.8550710678100586, "learning_rate": 8.170830619907934e-06, "loss": 0.0182, "step": 41830 }, { "epoch": 0.7065896578511839, "grad_norm": 0.2452988177537918, "learning_rate": 8.169690983449757e-06, "loss": 0.0095, "step": 41840 }, { "epoch": 0.7067585368325058, "grad_norm": 0.45134931802749634, "learning_rate": 8.168551071616593e-06, "loss": 0.0134, "step": 41850 }, { "epoch": 0.7069274158138278, "grad_norm": 0.45750582218170166, "learning_rate": 8.167410884507477e-06, "loss": 0.012, "step": 41860 }, { "epoch": 0.7070962947951498, "grad_norm": 0.18825504183769226, "learning_rate": 8.166270422221463e-06, "loss": 0.0095, "step": 41870 }, { "epoch": 0.7072651737764718, "grad_norm": 0.2817719280719757, "learning_rate": 8.165129684857636e-06, "loss": 0.0118, "step": 41880 }, { "epoch": 0.7074340527577938, "grad_norm": 0.26539766788482666, "learning_rate": 8.163988672515096e-06, "loss": 0.019, "step": 41890 }, { "epoch": 0.7076029317391157, "grad_norm": 0.45657655596733093, "learning_rate": 8.162847385292973e-06, "loss": 0.0117, "step": 41900 }, { "epoch": 0.7077718107204377, "grad_norm": 0.618705153465271, "learning_rate": 8.16170582329042e-06, "loss": 0.0154, "step": 41910 }, { "epoch": 0.7079406897017597, "grad_norm": 0.16507969796657562, "learning_rate": 8.160563986606611e-06, "loss": 0.0111, "step": 41920 }, { "epoch": 0.7081095686830817, "grad_norm": 0.4809911549091339, "learning_rate": 8.159421875340749e-06, "loss": 0.0111, "step": 41930 }, { "epoch": 0.7082784476644037, "grad_norm": 0.3536389172077179, "learning_rate": 8.158279489592056e-06, "loss": 0.0086, "step": 41940 }, { "epoch": 0.7084473266457256, "grad_norm": 0.42795613408088684, "learning_rate": 8.15713682945978e-06, "loss": 0.0109, "step": 41950 }, { "epoch": 0.7086162056270476, "grad_norm": 0.2073279619216919, "learning_rate": 8.15599389504319e-06, "loss": 0.008, "step": 41960 }, { "epoch": 0.7087850846083696, "grad_norm": 0.3315151333808899, "learning_rate": 8.154850686441586e-06, "loss": 0.0119, "step": 41970 }, { "epoch": 0.7089539635896916, "grad_norm": 0.2767898440361023, "learning_rate": 8.153707203754286e-06, "loss": 0.0107, "step": 41980 }, { "epoch": 0.7091228425710137, "grad_norm": 0.24608786404132843, "learning_rate": 8.15256344708063e-06, "loss": 0.0099, "step": 41990 }, { "epoch": 0.7092917215523356, "grad_norm": 0.15359628200531006, "learning_rate": 8.151419416519988e-06, "loss": 0.0137, "step": 42000 }, { "epoch": 0.7094606005336576, "grad_norm": 0.2605164051055908, "learning_rate": 8.150275112171749e-06, "loss": 0.0105, "step": 42010 }, { "epoch": 0.7096294795149796, "grad_norm": 0.6359145045280457, "learning_rate": 8.149130534135326e-06, "loss": 0.0174, "step": 42020 }, { "epoch": 0.7097983584963016, "grad_norm": 0.26171934604644775, "learning_rate": 8.14798568251016e-06, "loss": 0.0131, "step": 42030 }, { "epoch": 0.7099672374776236, "grad_norm": 0.2300816774368286, "learning_rate": 8.146840557395712e-06, "loss": 0.0084, "step": 42040 }, { "epoch": 0.7101361164589455, "grad_norm": 0.3527112603187561, "learning_rate": 8.145695158891467e-06, "loss": 0.0107, "step": 42050 }, { "epoch": 0.7103049954402675, "grad_norm": 0.5191010236740112, "learning_rate": 8.144549487096934e-06, "loss": 0.0089, "step": 42060 }, { "epoch": 0.7104738744215895, "grad_norm": 0.7001582384109497, "learning_rate": 8.143403542111648e-06, "loss": 0.0155, "step": 42070 }, { "epoch": 0.7106427534029115, "grad_norm": 0.1801554560661316, "learning_rate": 8.142257324035163e-06, "loss": 0.0136, "step": 42080 }, { "epoch": 0.7108116323842335, "grad_norm": 0.3444766104221344, "learning_rate": 8.141110832967064e-06, "loss": 0.0114, "step": 42090 }, { "epoch": 0.7109805113655554, "grad_norm": 0.6953239440917969, "learning_rate": 8.13996406900695e-06, "loss": 0.0124, "step": 42100 }, { "epoch": 0.7111493903468774, "grad_norm": 0.5063139200210571, "learning_rate": 8.138817032254453e-06, "loss": 0.0126, "step": 42110 }, { "epoch": 0.7113182693281994, "grad_norm": 0.28260305523872375, "learning_rate": 8.137669722809225e-06, "loss": 0.0096, "step": 42120 }, { "epoch": 0.7114871483095214, "grad_norm": 0.3011462688446045, "learning_rate": 8.136522140770939e-06, "loss": 0.0129, "step": 42130 }, { "epoch": 0.7116560272908434, "grad_norm": 0.3981657326221466, "learning_rate": 8.135374286239293e-06, "loss": 0.0126, "step": 42140 }, { "epoch": 0.7118249062721653, "grad_norm": 0.434320867061615, "learning_rate": 8.134226159314014e-06, "loss": 0.0169, "step": 42150 }, { "epoch": 0.7119937852534873, "grad_norm": 0.49254968762397766, "learning_rate": 8.133077760094846e-06, "loss": 0.0096, "step": 42160 }, { "epoch": 0.7121626642348093, "grad_norm": 0.3154815435409546, "learning_rate": 8.131929088681561e-06, "loss": 0.0135, "step": 42170 }, { "epoch": 0.7123315432161313, "grad_norm": 0.15152670443058014, "learning_rate": 8.130780145173948e-06, "loss": 0.0079, "step": 42180 }, { "epoch": 0.7125004221974534, "grad_norm": 0.2883227467536926, "learning_rate": 8.12963092967183e-06, "loss": 0.0087, "step": 42190 }, { "epoch": 0.7126693011787752, "grad_norm": 0.4074741005897522, "learning_rate": 8.128481442275043e-06, "loss": 0.0096, "step": 42200 }, { "epoch": 0.7128381801600973, "grad_norm": 0.5727638602256775, "learning_rate": 8.127331683083456e-06, "loss": 0.014, "step": 42210 }, { "epoch": 0.7130070591414193, "grad_norm": 0.24874190986156464, "learning_rate": 8.126181652196956e-06, "loss": 0.0124, "step": 42220 }, { "epoch": 0.7131759381227413, "grad_norm": 0.6826158165931702, "learning_rate": 8.125031349715453e-06, "loss": 0.0114, "step": 42230 }, { "epoch": 0.7133448171040633, "grad_norm": 0.32412034273147583, "learning_rate": 8.123880775738884e-06, "loss": 0.012, "step": 42240 }, { "epoch": 0.7135136960853852, "grad_norm": 0.37394586205482483, "learning_rate": 8.12272993036721e-06, "loss": 0.0198, "step": 42250 }, { "epoch": 0.7136825750667072, "grad_norm": 0.2949330806732178, "learning_rate": 8.121578813700409e-06, "loss": 0.0093, "step": 42260 }, { "epoch": 0.7138514540480292, "grad_norm": 0.371358186006546, "learning_rate": 8.120427425838492e-06, "loss": 0.0141, "step": 42270 }, { "epoch": 0.7140203330293512, "grad_norm": 0.36977964639663696, "learning_rate": 8.119275766881484e-06, "loss": 0.017, "step": 42280 }, { "epoch": 0.7141892120106732, "grad_norm": 0.29963964223861694, "learning_rate": 8.118123836929442e-06, "loss": 0.0111, "step": 42290 }, { "epoch": 0.7143580909919951, "grad_norm": 0.38372546434402466, "learning_rate": 8.116971636082442e-06, "loss": 0.0129, "step": 42300 }, { "epoch": 0.7145269699733171, "grad_norm": 0.5465008020401001, "learning_rate": 8.115819164440584e-06, "loss": 0.0143, "step": 42310 }, { "epoch": 0.7146958489546391, "grad_norm": 0.40953922271728516, "learning_rate": 8.114666422103991e-06, "loss": 0.0098, "step": 42320 }, { "epoch": 0.7148647279359611, "grad_norm": 0.470480352640152, "learning_rate": 8.113513409172811e-06, "loss": 0.013, "step": 42330 }, { "epoch": 0.7150336069172831, "grad_norm": 0.40505313873291016, "learning_rate": 8.112360125747216e-06, "loss": 0.013, "step": 42340 }, { "epoch": 0.715202485898605, "grad_norm": 0.3591001629829407, "learning_rate": 8.111206571927401e-06, "loss": 0.0181, "step": 42350 }, { "epoch": 0.715371364879927, "grad_norm": 0.403339147567749, "learning_rate": 8.110052747813582e-06, "loss": 0.014, "step": 42360 }, { "epoch": 0.715540243861249, "grad_norm": 0.42683514952659607, "learning_rate": 8.108898653506e-06, "loss": 0.0201, "step": 42370 }, { "epoch": 0.715709122842571, "grad_norm": 0.3167420029640198, "learning_rate": 8.10774428910492e-06, "loss": 0.0151, "step": 42380 }, { "epoch": 0.715878001823893, "grad_norm": 0.37647223472595215, "learning_rate": 8.106589654710633e-06, "loss": 0.0143, "step": 42390 }, { "epoch": 0.716046880805215, "grad_norm": 0.32277387380599976, "learning_rate": 8.105434750423449e-06, "loss": 0.0075, "step": 42400 }, { "epoch": 0.716215759786537, "grad_norm": 0.3334757685661316, "learning_rate": 8.104279576343703e-06, "loss": 0.0114, "step": 42410 }, { "epoch": 0.716384638767859, "grad_norm": 0.4539603590965271, "learning_rate": 8.103124132571755e-06, "loss": 0.0159, "step": 42420 }, { "epoch": 0.716553517749181, "grad_norm": 0.3101227283477783, "learning_rate": 8.101968419207985e-06, "loss": 0.0085, "step": 42430 }, { "epoch": 0.716722396730503, "grad_norm": 0.30713993310928345, "learning_rate": 8.1008124363528e-06, "loss": 0.0106, "step": 42440 }, { "epoch": 0.7168912757118249, "grad_norm": 0.8219064474105835, "learning_rate": 8.099656184106629e-06, "loss": 0.0138, "step": 42450 }, { "epoch": 0.7170601546931469, "grad_norm": 0.1715286523103714, "learning_rate": 8.098499662569925e-06, "loss": 0.0113, "step": 42460 }, { "epoch": 0.7172290336744689, "grad_norm": 0.10377176105976105, "learning_rate": 8.09734287184316e-06, "loss": 0.0076, "step": 42470 }, { "epoch": 0.7173979126557909, "grad_norm": 0.5281046628952026, "learning_rate": 8.096185812026839e-06, "loss": 0.0155, "step": 42480 }, { "epoch": 0.7175667916371129, "grad_norm": 0.3900276720523834, "learning_rate": 8.09502848322148e-06, "loss": 0.0139, "step": 42490 }, { "epoch": 0.7177356706184348, "grad_norm": 0.1799519807100296, "learning_rate": 8.09387088552763e-06, "loss": 0.012, "step": 42500 }, { "epoch": 0.7179045495997568, "grad_norm": 0.25410735607147217, "learning_rate": 8.09271301904586e-06, "loss": 0.0156, "step": 42510 }, { "epoch": 0.7180734285810788, "grad_norm": 0.47142839431762695, "learning_rate": 8.09155488387676e-06, "loss": 0.0141, "step": 42520 }, { "epoch": 0.7182423075624008, "grad_norm": 0.2991814911365509, "learning_rate": 8.090396480120947e-06, "loss": 0.0109, "step": 42530 }, { "epoch": 0.7184111865437227, "grad_norm": 0.3724268078804016, "learning_rate": 8.089237807879061e-06, "loss": 0.0126, "step": 42540 }, { "epoch": 0.7185800655250447, "grad_norm": 0.33117586374282837, "learning_rate": 8.088078867251762e-06, "loss": 0.0128, "step": 42550 }, { "epoch": 0.7187489445063667, "grad_norm": 0.18921425938606262, "learning_rate": 8.086919658339742e-06, "loss": 0.0107, "step": 42560 }, { "epoch": 0.7189178234876887, "grad_norm": 0.28900399804115295, "learning_rate": 8.085760181243704e-06, "loss": 0.0116, "step": 42570 }, { "epoch": 0.7190867024690107, "grad_norm": 0.2748830318450928, "learning_rate": 8.084600436064383e-06, "loss": 0.0104, "step": 42580 }, { "epoch": 0.7192555814503326, "grad_norm": 0.3455299735069275, "learning_rate": 8.083440422902533e-06, "loss": 0.0143, "step": 42590 }, { "epoch": 0.7194244604316546, "grad_norm": 0.20121806859970093, "learning_rate": 8.082280141858938e-06, "loss": 0.0113, "step": 42600 }, { "epoch": 0.7195933394129767, "grad_norm": 0.6171212792396545, "learning_rate": 8.081119593034394e-06, "loss": 0.0144, "step": 42610 }, { "epoch": 0.7197622183942987, "grad_norm": 0.28583672642707825, "learning_rate": 8.07995877652973e-06, "loss": 0.0173, "step": 42620 }, { "epoch": 0.7199310973756207, "grad_norm": 0.3035142421722412, "learning_rate": 8.078797692445795e-06, "loss": 0.0113, "step": 42630 }, { "epoch": 0.7200999763569426, "grad_norm": 0.1746544986963272, "learning_rate": 8.077636340883462e-06, "loss": 0.009, "step": 42640 }, { "epoch": 0.7202688553382646, "grad_norm": 0.6695650815963745, "learning_rate": 8.076474721943623e-06, "loss": 0.0165, "step": 42650 }, { "epoch": 0.7204377343195866, "grad_norm": 0.3158681094646454, "learning_rate": 8.0753128357272e-06, "loss": 0.0071, "step": 42660 }, { "epoch": 0.7206066133009086, "grad_norm": 0.5050218105316162, "learning_rate": 8.074150682335134e-06, "loss": 0.0113, "step": 42670 }, { "epoch": 0.7207754922822306, "grad_norm": 0.3102779686450958, "learning_rate": 8.072988261868388e-06, "loss": 0.0122, "step": 42680 }, { "epoch": 0.7209443712635525, "grad_norm": 0.34874942898750305, "learning_rate": 8.071825574427951e-06, "loss": 0.012, "step": 42690 }, { "epoch": 0.7211132502448745, "grad_norm": 0.3732661306858063, "learning_rate": 8.070662620114838e-06, "loss": 0.0127, "step": 42700 }, { "epoch": 0.7212821292261965, "grad_norm": 0.2517901062965393, "learning_rate": 8.06949939903008e-06, "loss": 0.0089, "step": 42710 }, { "epoch": 0.7214510082075185, "grad_norm": 0.2966994345188141, "learning_rate": 8.068335911274736e-06, "loss": 0.015, "step": 42720 }, { "epoch": 0.7216198871888405, "grad_norm": 0.4560511112213135, "learning_rate": 8.067172156949884e-06, "loss": 0.0192, "step": 42730 }, { "epoch": 0.7217887661701624, "grad_norm": 0.22092261910438538, "learning_rate": 8.066008136156634e-06, "loss": 0.0072, "step": 42740 }, { "epoch": 0.7219576451514844, "grad_norm": 0.2374516725540161, "learning_rate": 8.064843848996107e-06, "loss": 0.0109, "step": 42750 }, { "epoch": 0.7221265241328064, "grad_norm": 0.348343163728714, "learning_rate": 8.063679295569458e-06, "loss": 0.0073, "step": 42760 }, { "epoch": 0.7222954031141284, "grad_norm": 0.342538446187973, "learning_rate": 8.062514475977858e-06, "loss": 0.0106, "step": 42770 }, { "epoch": 0.7224642820954504, "grad_norm": 0.3703286945819855, "learning_rate": 8.061349390322507e-06, "loss": 0.0096, "step": 42780 }, { "epoch": 0.7226331610767723, "grad_norm": 0.298345685005188, "learning_rate": 8.06018403870462e-06, "loss": 0.0122, "step": 42790 }, { "epoch": 0.7228020400580943, "grad_norm": 0.36521396040916443, "learning_rate": 8.059018421225444e-06, "loss": 0.01, "step": 42800 }, { "epoch": 0.7229709190394163, "grad_norm": 0.3494165539741516, "learning_rate": 8.057852537986242e-06, "loss": 0.0172, "step": 42810 }, { "epoch": 0.7231397980207384, "grad_norm": 0.2150857150554657, "learning_rate": 8.056686389088305e-06, "loss": 0.0082, "step": 42820 }, { "epoch": 0.7233086770020604, "grad_norm": 0.361082524061203, "learning_rate": 8.055519974632945e-06, "loss": 0.0151, "step": 42830 }, { "epoch": 0.7234775559833823, "grad_norm": 0.23023605346679688, "learning_rate": 8.054353294721496e-06, "loss": 0.0102, "step": 42840 }, { "epoch": 0.7236464349647043, "grad_norm": 0.49685996770858765, "learning_rate": 8.053186349455317e-06, "loss": 0.0123, "step": 42850 }, { "epoch": 0.7238153139460263, "grad_norm": 0.5466755032539368, "learning_rate": 8.05201913893579e-06, "loss": 0.0157, "step": 42860 }, { "epoch": 0.7239841929273483, "grad_norm": 0.4882690906524658, "learning_rate": 8.050851663264318e-06, "loss": 0.0116, "step": 42870 }, { "epoch": 0.7241530719086703, "grad_norm": 0.31107181310653687, "learning_rate": 8.04968392254233e-06, "loss": 0.0127, "step": 42880 }, { "epoch": 0.7243219508899922, "grad_norm": 0.41390082240104675, "learning_rate": 8.048515916871275e-06, "loss": 0.0199, "step": 42890 }, { "epoch": 0.7244908298713142, "grad_norm": 0.16933877766132355, "learning_rate": 8.047347646352628e-06, "loss": 0.0114, "step": 42900 }, { "epoch": 0.7246597088526362, "grad_norm": 0.5730894207954407, "learning_rate": 8.046179111087883e-06, "loss": 0.0179, "step": 42910 }, { "epoch": 0.7248285878339582, "grad_norm": 0.29088759422302246, "learning_rate": 8.045010311178562e-06, "loss": 0.015, "step": 42920 }, { "epoch": 0.7249974668152802, "grad_norm": 0.3179890513420105, "learning_rate": 8.04384124672621e-06, "loss": 0.01, "step": 42930 }, { "epoch": 0.7251663457966021, "grad_norm": 0.24521668255329132, "learning_rate": 8.042671917832385e-06, "loss": 0.0094, "step": 42940 }, { "epoch": 0.7253352247779241, "grad_norm": 0.424618661403656, "learning_rate": 8.04150232459868e-06, "loss": 0.0108, "step": 42950 }, { "epoch": 0.7255041037592461, "grad_norm": 0.41922417283058167, "learning_rate": 8.040332467126706e-06, "loss": 0.0136, "step": 42960 }, { "epoch": 0.7256729827405681, "grad_norm": 0.21681764721870422, "learning_rate": 8.039162345518097e-06, "loss": 0.0113, "step": 42970 }, { "epoch": 0.7258418617218901, "grad_norm": 0.1659587174654007, "learning_rate": 8.03799195987451e-06, "loss": 0.0202, "step": 42980 }, { "epoch": 0.726010740703212, "grad_norm": 0.26121899485588074, "learning_rate": 8.036821310297628e-06, "loss": 0.0104, "step": 42990 }, { "epoch": 0.726179619684534, "grad_norm": 0.3670707941055298, "learning_rate": 8.035650396889151e-06, "loss": 0.0138, "step": 43000 }, { "epoch": 0.726348498665856, "grad_norm": 0.1538810282945633, "learning_rate": 8.034479219750806e-06, "loss": 0.0104, "step": 43010 }, { "epoch": 0.726517377647178, "grad_norm": 0.26926088333129883, "learning_rate": 8.03330777898434e-06, "loss": 0.0118, "step": 43020 }, { "epoch": 0.7266862566285001, "grad_norm": 0.10570037364959717, "learning_rate": 8.03213607469153e-06, "loss": 0.0109, "step": 43030 }, { "epoch": 0.726855135609822, "grad_norm": 0.12280025333166122, "learning_rate": 8.030964106974166e-06, "loss": 0.0101, "step": 43040 }, { "epoch": 0.727024014591144, "grad_norm": 0.29948264360427856, "learning_rate": 8.029791875934068e-06, "loss": 0.0109, "step": 43050 }, { "epoch": 0.727192893572466, "grad_norm": 0.3703051507472992, "learning_rate": 8.028619381673075e-06, "loss": 0.0079, "step": 43060 }, { "epoch": 0.727361772553788, "grad_norm": 0.29547327756881714, "learning_rate": 8.027446624293052e-06, "loss": 0.0159, "step": 43070 }, { "epoch": 0.72753065153511, "grad_norm": 0.23734727501869202, "learning_rate": 8.026273603895884e-06, "loss": 0.0135, "step": 43080 }, { "epoch": 0.7276995305164319, "grad_norm": 0.5447749495506287, "learning_rate": 8.025100320583482e-06, "loss": 0.0165, "step": 43090 }, { "epoch": 0.7278684094977539, "grad_norm": 0.3474448025226593, "learning_rate": 8.023926774457776e-06, "loss": 0.0093, "step": 43100 }, { "epoch": 0.7280372884790759, "grad_norm": 0.27443739771842957, "learning_rate": 8.022752965620722e-06, "loss": 0.0113, "step": 43110 }, { "epoch": 0.7282061674603979, "grad_norm": 0.22042107582092285, "learning_rate": 8.021578894174296e-06, "loss": 0.0092, "step": 43120 }, { "epoch": 0.7283750464417199, "grad_norm": 0.3346382975578308, "learning_rate": 8.020404560220501e-06, "loss": 0.0198, "step": 43130 }, { "epoch": 0.7285439254230418, "grad_norm": 0.2969478666782379, "learning_rate": 8.01922996386136e-06, "loss": 0.0107, "step": 43140 }, { "epoch": 0.7287128044043638, "grad_norm": 0.18979446589946747, "learning_rate": 8.018055105198916e-06, "loss": 0.0071, "step": 43150 }, { "epoch": 0.7288816833856858, "grad_norm": 0.3726671040058136, "learning_rate": 8.01687998433524e-06, "loss": 0.0094, "step": 43160 }, { "epoch": 0.7290505623670078, "grad_norm": 0.437589168548584, "learning_rate": 8.015704601372425e-06, "loss": 0.0106, "step": 43170 }, { "epoch": 0.7292194413483298, "grad_norm": 0.5146123170852661, "learning_rate": 8.014528956412581e-06, "loss": 0.0185, "step": 43180 }, { "epoch": 0.7293883203296517, "grad_norm": 0.230777770280838, "learning_rate": 8.01335304955785e-06, "loss": 0.0207, "step": 43190 }, { "epoch": 0.7295571993109737, "grad_norm": 0.3367876410484314, "learning_rate": 8.01217688091039e-06, "loss": 0.0089, "step": 43200 }, { "epoch": 0.7297260782922957, "grad_norm": 0.5161893367767334, "learning_rate": 8.011000450572383e-06, "loss": 0.0129, "step": 43210 }, { "epoch": 0.7298949572736178, "grad_norm": 0.24781116843223572, "learning_rate": 8.009823758646035e-06, "loss": 0.0142, "step": 43220 }, { "epoch": 0.7300638362549398, "grad_norm": 0.47829288244247437, "learning_rate": 8.008646805233575e-06, "loss": 0.0143, "step": 43230 }, { "epoch": 0.7302327152362617, "grad_norm": 0.27041780948638916, "learning_rate": 8.007469590437251e-06, "loss": 0.0114, "step": 43240 }, { "epoch": 0.7304015942175837, "grad_norm": 0.2752780020236969, "learning_rate": 8.00629211435934e-06, "loss": 0.0121, "step": 43250 }, { "epoch": 0.7305704731989057, "grad_norm": 0.2673806846141815, "learning_rate": 8.005114377102135e-06, "loss": 0.0111, "step": 43260 }, { "epoch": 0.7307393521802277, "grad_norm": 0.2502369284629822, "learning_rate": 8.003936378767958e-06, "loss": 0.0147, "step": 43270 }, { "epoch": 0.7309082311615497, "grad_norm": 0.3041146993637085, "learning_rate": 8.002758119459148e-06, "loss": 0.0151, "step": 43280 }, { "epoch": 0.7310771101428716, "grad_norm": 0.21079587936401367, "learning_rate": 8.00157959927807e-06, "loss": 0.0116, "step": 43290 }, { "epoch": 0.7312459891241936, "grad_norm": 0.34441158175468445, "learning_rate": 8.000400818327115e-06, "loss": 0.0124, "step": 43300 }, { "epoch": 0.7314148681055156, "grad_norm": 0.3169476389884949, "learning_rate": 7.999221776708686e-06, "loss": 0.0136, "step": 43310 }, { "epoch": 0.7315837470868376, "grad_norm": 0.2486472874879837, "learning_rate": 7.998042474525218e-06, "loss": 0.01, "step": 43320 }, { "epoch": 0.7317526260681596, "grad_norm": 0.3518286347389221, "learning_rate": 7.996862911879168e-06, "loss": 0.0094, "step": 43330 }, { "epoch": 0.7319215050494815, "grad_norm": 0.32449308037757874, "learning_rate": 7.995683088873011e-06, "loss": 0.0112, "step": 43340 }, { "epoch": 0.7320903840308035, "grad_norm": 0.28675493597984314, "learning_rate": 7.994503005609246e-06, "loss": 0.0116, "step": 43350 }, { "epoch": 0.7322592630121255, "grad_norm": 0.18495017290115356, "learning_rate": 7.9933226621904e-06, "loss": 0.0103, "step": 43360 }, { "epoch": 0.7324281419934475, "grad_norm": 0.3026364743709564, "learning_rate": 7.992142058719015e-06, "loss": 0.0105, "step": 43370 }, { "epoch": 0.7325970209747695, "grad_norm": 0.1911313682794571, "learning_rate": 7.99096119529766e-06, "loss": 0.0116, "step": 43380 }, { "epoch": 0.7327658999560914, "grad_norm": 0.3712156116962433, "learning_rate": 7.989780072028925e-06, "loss": 0.0138, "step": 43390 }, { "epoch": 0.7329347789374134, "grad_norm": 0.2966275215148926, "learning_rate": 7.988598689015423e-06, "loss": 0.0074, "step": 43400 }, { "epoch": 0.7331036579187354, "grad_norm": 0.4474658668041229, "learning_rate": 7.987417046359791e-06, "loss": 0.0124, "step": 43410 }, { "epoch": 0.7332725369000574, "grad_norm": 0.15415610373020172, "learning_rate": 7.986235144164686e-06, "loss": 0.0141, "step": 43420 }, { "epoch": 0.7334414158813795, "grad_norm": 0.4593863785266876, "learning_rate": 7.985052982532789e-06, "loss": 0.0163, "step": 43430 }, { "epoch": 0.7336102948627014, "grad_norm": 0.1953551471233368, "learning_rate": 7.983870561566804e-06, "loss": 0.0136, "step": 43440 }, { "epoch": 0.7337791738440234, "grad_norm": 0.3735561668872833, "learning_rate": 7.982687881369454e-06, "loss": 0.0145, "step": 43450 }, { "epoch": 0.7339480528253454, "grad_norm": 1.2083765268325806, "learning_rate": 7.981504942043492e-06, "loss": 0.0127, "step": 43460 }, { "epoch": 0.7341169318066674, "grad_norm": 0.3263375461101532, "learning_rate": 7.980321743691685e-06, "loss": 0.0084, "step": 43470 }, { "epoch": 0.7342858107879894, "grad_norm": 0.2373078316450119, "learning_rate": 7.979138286416827e-06, "loss": 0.0104, "step": 43480 }, { "epoch": 0.7344546897693113, "grad_norm": 0.6126260161399841, "learning_rate": 7.977954570321737e-06, "loss": 0.0161, "step": 43490 }, { "epoch": 0.7346235687506333, "grad_norm": 0.32356569170951843, "learning_rate": 7.97677059550925e-06, "loss": 0.0115, "step": 43500 }, { "epoch": 0.7347924477319553, "grad_norm": 0.7103553414344788, "learning_rate": 7.975586362082228e-06, "loss": 0.0114, "step": 43510 }, { "epoch": 0.7349613267132773, "grad_norm": 0.5195574760437012, "learning_rate": 7.974401870143554e-06, "loss": 0.0149, "step": 43520 }, { "epoch": 0.7351302056945993, "grad_norm": 0.4091726243495941, "learning_rate": 7.973217119796134e-06, "loss": 0.0111, "step": 43530 }, { "epoch": 0.7352990846759212, "grad_norm": 0.14204443991184235, "learning_rate": 7.972032111142896e-06, "loss": 0.0141, "step": 43540 }, { "epoch": 0.7354679636572432, "grad_norm": 0.2457074522972107, "learning_rate": 7.970846844286791e-06, "loss": 0.0093, "step": 43550 }, { "epoch": 0.7356368426385652, "grad_norm": 0.5148177742958069, "learning_rate": 7.969661319330792e-06, "loss": 0.0183, "step": 43560 }, { "epoch": 0.7358057216198872, "grad_norm": 0.29816463589668274, "learning_rate": 7.968475536377895e-06, "loss": 0.0176, "step": 43570 }, { "epoch": 0.7359746006012092, "grad_norm": 0.22095052897930145, "learning_rate": 7.967289495531117e-06, "loss": 0.0134, "step": 43580 }, { "epoch": 0.7361434795825311, "grad_norm": 0.16396556794643402, "learning_rate": 7.966103196893496e-06, "loss": 0.0088, "step": 43590 }, { "epoch": 0.7363123585638531, "grad_norm": 0.25224927067756653, "learning_rate": 7.9649166405681e-06, "loss": 0.0112, "step": 43600 }, { "epoch": 0.7364812375451751, "grad_norm": 0.2984204590320587, "learning_rate": 7.96372982665801e-06, "loss": 0.0175, "step": 43610 }, { "epoch": 0.7366501165264971, "grad_norm": 0.273943692445755, "learning_rate": 7.962542755266337e-06, "loss": 0.0059, "step": 43620 }, { "epoch": 0.7368189955078192, "grad_norm": 0.47221294045448303, "learning_rate": 7.961355426496207e-06, "loss": 0.0092, "step": 43630 }, { "epoch": 0.736987874489141, "grad_norm": 0.49977847933769226, "learning_rate": 7.960167840450775e-06, "loss": 0.0127, "step": 43640 }, { "epoch": 0.737156753470463, "grad_norm": 0.4929150640964508, "learning_rate": 7.958979997233212e-06, "loss": 0.014, "step": 43650 }, { "epoch": 0.7373256324517851, "grad_norm": 0.27828502655029297, "learning_rate": 7.95779189694672e-06, "loss": 0.0119, "step": 43660 }, { "epoch": 0.7374945114331071, "grad_norm": 1.2517930269241333, "learning_rate": 7.956603539694516e-06, "loss": 0.0174, "step": 43670 }, { "epoch": 0.737663390414429, "grad_norm": 0.38280147314071655, "learning_rate": 7.95541492557984e-06, "loss": 0.0127, "step": 43680 }, { "epoch": 0.737832269395751, "grad_norm": 0.31023159623146057, "learning_rate": 7.954226054705957e-06, "loss": 0.011, "step": 43690 }, { "epoch": 0.738001148377073, "grad_norm": 0.20649248361587524, "learning_rate": 7.953036927176154e-06, "loss": 0.0077, "step": 43700 }, { "epoch": 0.738170027358395, "grad_norm": 0.31085944175720215, "learning_rate": 7.951847543093739e-06, "loss": 0.0132, "step": 43710 }, { "epoch": 0.738338906339717, "grad_norm": 0.16252648830413818, "learning_rate": 7.950657902562041e-06, "loss": 0.0078, "step": 43720 }, { "epoch": 0.7385077853210389, "grad_norm": 0.19616857171058655, "learning_rate": 7.949468005684416e-06, "loss": 0.0092, "step": 43730 }, { "epoch": 0.7386766643023609, "grad_norm": 0.2727191746234894, "learning_rate": 7.948277852564239e-06, "loss": 0.015, "step": 43740 }, { "epoch": 0.7388455432836829, "grad_norm": 0.25804969668388367, "learning_rate": 7.947087443304905e-06, "loss": 0.0096, "step": 43750 }, { "epoch": 0.7390144222650049, "grad_norm": 0.2011924535036087, "learning_rate": 7.945896778009834e-06, "loss": 0.008, "step": 43760 }, { "epoch": 0.7391833012463269, "grad_norm": 0.32398301362991333, "learning_rate": 7.944705856782473e-06, "loss": 0.0138, "step": 43770 }, { "epoch": 0.7393521802276488, "grad_norm": 0.31945133209228516, "learning_rate": 7.94351467972628e-06, "loss": 0.0082, "step": 43780 }, { "epoch": 0.7395210592089708, "grad_norm": 0.40559476613998413, "learning_rate": 7.942323246944746e-06, "loss": 0.0121, "step": 43790 }, { "epoch": 0.7396899381902928, "grad_norm": 0.38272324204444885, "learning_rate": 7.941131558541376e-06, "loss": 0.0102, "step": 43800 }, { "epoch": 0.7398588171716148, "grad_norm": 0.3522919714450836, "learning_rate": 7.939939614619705e-06, "loss": 0.0087, "step": 43810 }, { "epoch": 0.7400276961529368, "grad_norm": 0.22814752161502838, "learning_rate": 7.938747415283284e-06, "loss": 0.0128, "step": 43820 }, { "epoch": 0.7401965751342587, "grad_norm": 0.6264970302581787, "learning_rate": 7.937554960635689e-06, "loss": 0.0144, "step": 43830 }, { "epoch": 0.7403654541155807, "grad_norm": 0.5133099555969238, "learning_rate": 7.936362250780516e-06, "loss": 0.0139, "step": 43840 }, { "epoch": 0.7405343330969028, "grad_norm": 0.20585854351520538, "learning_rate": 7.935169285821387e-06, "loss": 0.0139, "step": 43850 }, { "epoch": 0.7407032120782248, "grad_norm": 0.12526148557662964, "learning_rate": 7.933976065861941e-06, "loss": 0.0117, "step": 43860 }, { "epoch": 0.7408720910595468, "grad_norm": 0.22768470644950867, "learning_rate": 7.932782591005845e-06, "loss": 0.0125, "step": 43870 }, { "epoch": 0.7410409700408687, "grad_norm": 0.5063386559486389, "learning_rate": 7.931588861356784e-06, "loss": 0.008, "step": 43880 }, { "epoch": 0.7412098490221907, "grad_norm": 0.21931074559688568, "learning_rate": 7.930394877018466e-06, "loss": 0.009, "step": 43890 }, { "epoch": 0.7413787280035127, "grad_norm": 0.15263892710208893, "learning_rate": 7.929200638094622e-06, "loss": 0.0086, "step": 43900 }, { "epoch": 0.7415476069848347, "grad_norm": 0.30510079860687256, "learning_rate": 7.928006144689003e-06, "loss": 0.0095, "step": 43910 }, { "epoch": 0.7417164859661567, "grad_norm": 0.5758764743804932, "learning_rate": 7.926811396905385e-06, "loss": 0.0169, "step": 43920 }, { "epoch": 0.7418853649474786, "grad_norm": 0.3665143847465515, "learning_rate": 7.925616394847566e-06, "loss": 0.0104, "step": 43930 }, { "epoch": 0.7420542439288006, "grad_norm": 0.1632893979549408, "learning_rate": 7.924421138619363e-06, "loss": 0.0127, "step": 43940 }, { "epoch": 0.7422231229101226, "grad_norm": 0.24056853353977203, "learning_rate": 7.923225628324618e-06, "loss": 0.0136, "step": 43950 }, { "epoch": 0.7423920018914446, "grad_norm": 0.38718801736831665, "learning_rate": 7.92202986406719e-06, "loss": 0.0085, "step": 43960 }, { "epoch": 0.7425608808727666, "grad_norm": 0.471382737159729, "learning_rate": 7.920833845950972e-06, "loss": 0.0126, "step": 43970 }, { "epoch": 0.7427297598540885, "grad_norm": 0.4825335443019867, "learning_rate": 7.919637574079864e-06, "loss": 0.0145, "step": 43980 }, { "epoch": 0.7428986388354105, "grad_norm": 0.3070482611656189, "learning_rate": 7.9184410485578e-06, "loss": 0.0153, "step": 43990 }, { "epoch": 0.7430675178167325, "grad_norm": 0.3482120633125305, "learning_rate": 7.917244269488726e-06, "loss": 0.0128, "step": 44000 }, { "epoch": 0.7432363967980545, "grad_norm": 0.3125222623348236, "learning_rate": 7.91604723697662e-06, "loss": 0.0104, "step": 44010 }, { "epoch": 0.7434052757793765, "grad_norm": 0.27285903692245483, "learning_rate": 7.914849951125473e-06, "loss": 0.0141, "step": 44020 }, { "epoch": 0.7435741547606984, "grad_norm": 0.4807106852531433, "learning_rate": 7.913652412039308e-06, "loss": 0.0119, "step": 44030 }, { "epoch": 0.7437430337420204, "grad_norm": 0.23284122347831726, "learning_rate": 7.91245461982216e-06, "loss": 0.0106, "step": 44040 }, { "epoch": 0.7439119127233424, "grad_norm": 0.6445547938346863, "learning_rate": 7.911256574578091e-06, "loss": 0.0123, "step": 44050 }, { "epoch": 0.7440807917046645, "grad_norm": 0.2686990797519684, "learning_rate": 7.910058276411183e-06, "loss": 0.0155, "step": 44060 }, { "epoch": 0.7442496706859865, "grad_norm": 0.642832338809967, "learning_rate": 7.908859725425546e-06, "loss": 0.011, "step": 44070 }, { "epoch": 0.7444185496673084, "grad_norm": 0.3777551054954529, "learning_rate": 7.907660921725301e-06, "loss": 0.011, "step": 44080 }, { "epoch": 0.7445874286486304, "grad_norm": 0.1326722651720047, "learning_rate": 7.9064618654146e-06, "loss": 0.0108, "step": 44090 }, { "epoch": 0.7447563076299524, "grad_norm": 0.25076407194137573, "learning_rate": 7.905262556597616e-06, "loss": 0.0128, "step": 44100 }, { "epoch": 0.7449251866112744, "grad_norm": 0.16167961061000824, "learning_rate": 7.904062995378539e-06, "loss": 0.0103, "step": 44110 }, { "epoch": 0.7450940655925964, "grad_norm": 0.2836713492870331, "learning_rate": 7.902863181861584e-06, "loss": 0.0147, "step": 44120 }, { "epoch": 0.7452629445739183, "grad_norm": 0.34468740224838257, "learning_rate": 7.901663116150992e-06, "loss": 0.0149, "step": 44130 }, { "epoch": 0.7454318235552403, "grad_norm": 0.2654409408569336, "learning_rate": 7.900462798351015e-06, "loss": 0.0103, "step": 44140 }, { "epoch": 0.7456007025365623, "grad_norm": 0.3899000585079193, "learning_rate": 7.89926222856594e-06, "loss": 0.0183, "step": 44150 }, { "epoch": 0.7457695815178843, "grad_norm": 0.4018895924091339, "learning_rate": 7.898061406900065e-06, "loss": 0.0134, "step": 44160 }, { "epoch": 0.7459384604992063, "grad_norm": 0.3702820837497711, "learning_rate": 7.896860333457716e-06, "loss": 0.0089, "step": 44170 }, { "epoch": 0.7461073394805282, "grad_norm": 0.143981471657753, "learning_rate": 7.895659008343241e-06, "loss": 0.0096, "step": 44180 }, { "epoch": 0.7462762184618502, "grad_norm": 0.3184301257133484, "learning_rate": 7.894457431661007e-06, "loss": 0.0102, "step": 44190 }, { "epoch": 0.7464450974431722, "grad_norm": 0.4069705903530121, "learning_rate": 7.893255603515405e-06, "loss": 0.0141, "step": 44200 }, { "epoch": 0.7466139764244942, "grad_norm": 0.3890167474746704, "learning_rate": 7.892053524010843e-06, "loss": 0.012, "step": 44210 }, { "epoch": 0.7467828554058162, "grad_norm": 0.39360517263412476, "learning_rate": 7.890851193251758e-06, "loss": 0.0105, "step": 44220 }, { "epoch": 0.7469517343871381, "grad_norm": 0.17453855276107788, "learning_rate": 7.889648611342607e-06, "loss": 0.0119, "step": 44230 }, { "epoch": 0.7471206133684601, "grad_norm": 0.3687051236629486, "learning_rate": 7.888445778387863e-06, "loss": 0.0165, "step": 44240 }, { "epoch": 0.7472894923497821, "grad_norm": 0.33249276876449585, "learning_rate": 7.887242694492028e-06, "loss": 0.0095, "step": 44250 }, { "epoch": 0.7474583713311042, "grad_norm": 0.13841991126537323, "learning_rate": 7.886039359759623e-06, "loss": 0.0119, "step": 44260 }, { "epoch": 0.7476272503124262, "grad_norm": 0.3676372766494751, "learning_rate": 7.884835774295189e-06, "loss": 0.0161, "step": 44270 }, { "epoch": 0.7477961292937481, "grad_norm": 0.5735569596290588, "learning_rate": 7.883631938203294e-06, "loss": 0.0112, "step": 44280 }, { "epoch": 0.7479650082750701, "grad_norm": 0.36304420232772827, "learning_rate": 7.88242785158852e-06, "loss": 0.0095, "step": 44290 }, { "epoch": 0.7481338872563921, "grad_norm": 0.34083110094070435, "learning_rate": 7.881223514555481e-06, "loss": 0.0158, "step": 44300 }, { "epoch": 0.7483027662377141, "grad_norm": 0.14944572746753693, "learning_rate": 7.880018927208798e-06, "loss": 0.009, "step": 44310 }, { "epoch": 0.7484716452190361, "grad_norm": 0.4572885036468506, "learning_rate": 7.878814089653131e-06, "loss": 0.0162, "step": 44320 }, { "epoch": 0.748640524200358, "grad_norm": 0.3584893047809601, "learning_rate": 7.87760900199315e-06, "loss": 0.0214, "step": 44330 }, { "epoch": 0.74880940318168, "grad_norm": 0.24983611702919006, "learning_rate": 7.876403664333549e-06, "loss": 0.0092, "step": 44340 }, { "epoch": 0.748978282163002, "grad_norm": 0.416754812002182, "learning_rate": 7.875198076779047e-06, "loss": 0.0125, "step": 44350 }, { "epoch": 0.749147161144324, "grad_norm": 0.6228697896003723, "learning_rate": 7.873992239434382e-06, "loss": 0.0073, "step": 44360 }, { "epoch": 0.749316040125646, "grad_norm": 0.2949915826320648, "learning_rate": 7.87278615240431e-06, "loss": 0.0116, "step": 44370 }, { "epoch": 0.7494849191069679, "grad_norm": 0.37177127599716187, "learning_rate": 7.87157981579362e-06, "loss": 0.0147, "step": 44380 }, { "epoch": 0.7496537980882899, "grad_norm": 0.2167794555425644, "learning_rate": 7.870373229707112e-06, "loss": 0.0086, "step": 44390 }, { "epoch": 0.7498226770696119, "grad_norm": 0.2870257794857025, "learning_rate": 7.86916639424961e-06, "loss": 0.0106, "step": 44400 }, { "epoch": 0.7499915560509339, "grad_norm": 0.4581202268600464, "learning_rate": 7.867959309525964e-06, "loss": 0.0125, "step": 44410 }, { "epoch": 0.7501604350322559, "grad_norm": 0.16175749897956848, "learning_rate": 7.866751975641038e-06, "loss": 0.0088, "step": 44420 }, { "epoch": 0.7503293140135778, "grad_norm": 0.29800498485565186, "learning_rate": 7.865544392699726e-06, "loss": 0.0112, "step": 44430 }, { "epoch": 0.7504981929948998, "grad_norm": 0.4556095600128174, "learning_rate": 7.86433656080694e-06, "loss": 0.0122, "step": 44440 }, { "epoch": 0.7506670719762218, "grad_norm": 0.4810056984424591, "learning_rate": 7.86312848006761e-06, "loss": 0.0095, "step": 44450 }, { "epoch": 0.7508359509575439, "grad_norm": 0.5033400654792786, "learning_rate": 7.861920150586696e-06, "loss": 0.0148, "step": 44460 }, { "epoch": 0.7510048299388659, "grad_norm": 0.11359251290559769, "learning_rate": 7.86071157246917e-06, "loss": 0.0081, "step": 44470 }, { "epoch": 0.7511737089201878, "grad_norm": 0.33446869254112244, "learning_rate": 7.859502745820034e-06, "loss": 0.0105, "step": 44480 }, { "epoch": 0.7513425879015098, "grad_norm": 0.2578246295452118, "learning_rate": 7.858293670744306e-06, "loss": 0.0115, "step": 44490 }, { "epoch": 0.7515114668828318, "grad_norm": 0.20568087697029114, "learning_rate": 7.857084347347027e-06, "loss": 0.0125, "step": 44500 }, { "epoch": 0.7516803458641538, "grad_norm": 0.24131736159324646, "learning_rate": 7.85587477573326e-06, "loss": 0.0087, "step": 44510 }, { "epoch": 0.7518492248454758, "grad_norm": 0.4827099144458771, "learning_rate": 7.854664956008094e-06, "loss": 0.0119, "step": 44520 }, { "epoch": 0.7520181038267977, "grad_norm": 0.48573824763298035, "learning_rate": 7.853454888276628e-06, "loss": 0.0153, "step": 44530 }, { "epoch": 0.7521869828081197, "grad_norm": 0.36690324544906616, "learning_rate": 7.852244572643994e-06, "loss": 0.0128, "step": 44540 }, { "epoch": 0.7523558617894417, "grad_norm": 0.1606527417898178, "learning_rate": 7.851034009215342e-06, "loss": 0.0176, "step": 44550 }, { "epoch": 0.7525247407707637, "grad_norm": 0.1080680787563324, "learning_rate": 7.849823198095841e-06, "loss": 0.0101, "step": 44560 }, { "epoch": 0.7526936197520857, "grad_norm": 0.4194629490375519, "learning_rate": 7.848612139390682e-06, "loss": 0.0116, "step": 44570 }, { "epoch": 0.7528624987334076, "grad_norm": 0.35330721735954285, "learning_rate": 7.847400833205082e-06, "loss": 0.0117, "step": 44580 }, { "epoch": 0.7530313777147296, "grad_norm": 0.30620622634887695, "learning_rate": 7.846189279644275e-06, "loss": 0.0111, "step": 44590 }, { "epoch": 0.7532002566960516, "grad_norm": 0.35818615555763245, "learning_rate": 7.844977478813518e-06, "loss": 0.0093, "step": 44600 }, { "epoch": 0.7533691356773736, "grad_norm": 0.4231314957141876, "learning_rate": 7.843765430818086e-06, "loss": 0.01, "step": 44610 }, { "epoch": 0.7535380146586956, "grad_norm": 0.35517004132270813, "learning_rate": 7.842553135763284e-06, "loss": 0.0131, "step": 44620 }, { "epoch": 0.7537068936400175, "grad_norm": 0.4614948034286499, "learning_rate": 7.841340593754431e-06, "loss": 0.0075, "step": 44630 }, { "epoch": 0.7538757726213395, "grad_norm": 0.2700091302394867, "learning_rate": 7.840127804896868e-06, "loss": 0.0125, "step": 44640 }, { "epoch": 0.7540446516026615, "grad_norm": 0.5781803131103516, "learning_rate": 7.838914769295962e-06, "loss": 0.011, "step": 44650 }, { "epoch": 0.7542135305839835, "grad_norm": 0.25987938046455383, "learning_rate": 7.837701487057096e-06, "loss": 0.0117, "step": 44660 }, { "epoch": 0.7543824095653056, "grad_norm": 0.1160740926861763, "learning_rate": 7.836487958285681e-06, "loss": 0.0131, "step": 44670 }, { "epoch": 0.7545512885466275, "grad_norm": 0.17421527206897736, "learning_rate": 7.83527418308714e-06, "loss": 0.0079, "step": 44680 }, { "epoch": 0.7547201675279495, "grad_norm": 0.4889138638973236, "learning_rate": 7.834060161566928e-06, "loss": 0.0113, "step": 44690 }, { "epoch": 0.7548890465092715, "grad_norm": 0.19542564451694489, "learning_rate": 7.832845893830512e-06, "loss": 0.0111, "step": 44700 }, { "epoch": 0.7550579254905935, "grad_norm": 0.37631863355636597, "learning_rate": 7.831631379983386e-06, "loss": 0.0147, "step": 44710 }, { "epoch": 0.7552268044719155, "grad_norm": 0.34088653326034546, "learning_rate": 7.830416620131065e-06, "loss": 0.0114, "step": 44720 }, { "epoch": 0.7553956834532374, "grad_norm": 0.3028384745121002, "learning_rate": 7.829201614379082e-06, "loss": 0.0146, "step": 44730 }, { "epoch": 0.7555645624345594, "grad_norm": 0.44008713960647583, "learning_rate": 7.827986362832997e-06, "loss": 0.0122, "step": 44740 }, { "epoch": 0.7557334414158814, "grad_norm": 0.3001992702484131, "learning_rate": 7.826770865598387e-06, "loss": 0.0095, "step": 44750 }, { "epoch": 0.7559023203972034, "grad_norm": 0.4776478409767151, "learning_rate": 7.82555512278085e-06, "loss": 0.0135, "step": 44760 }, { "epoch": 0.7560711993785253, "grad_norm": 0.4465424418449402, "learning_rate": 7.824339134486007e-06, "loss": 0.0102, "step": 44770 }, { "epoch": 0.7562400783598473, "grad_norm": 0.10521680116653442, "learning_rate": 7.8231229008195e-06, "loss": 0.0143, "step": 44780 }, { "epoch": 0.7564089573411693, "grad_norm": 0.18874463438987732, "learning_rate": 7.821906421886996e-06, "loss": 0.0141, "step": 44790 }, { "epoch": 0.7565778363224913, "grad_norm": 0.24056878685951233, "learning_rate": 7.820689697794174e-06, "loss": 0.0114, "step": 44800 }, { "epoch": 0.7567467153038133, "grad_norm": 0.32136037945747375, "learning_rate": 7.819472728646744e-06, "loss": 0.0155, "step": 44810 }, { "epoch": 0.7569155942851352, "grad_norm": 0.1230238676071167, "learning_rate": 7.818255514550433e-06, "loss": 0.011, "step": 44820 }, { "epoch": 0.7570844732664572, "grad_norm": 0.19437429308891296, "learning_rate": 7.817038055610986e-06, "loss": 0.0093, "step": 44830 }, { "epoch": 0.7572533522477792, "grad_norm": 0.17221128940582275, "learning_rate": 7.815820351934179e-06, "loss": 0.0102, "step": 44840 }, { "epoch": 0.7574222312291012, "grad_norm": 0.30181795358657837, "learning_rate": 7.814602403625797e-06, "loss": 0.0121, "step": 44850 }, { "epoch": 0.7575911102104232, "grad_norm": 0.22090788185596466, "learning_rate": 7.813384210791655e-06, "loss": 0.0124, "step": 44860 }, { "epoch": 0.7577599891917451, "grad_norm": 0.1515830010175705, "learning_rate": 7.812165773537588e-06, "loss": 0.0106, "step": 44870 }, { "epoch": 0.7579288681730671, "grad_norm": 0.38921964168548584, "learning_rate": 7.81094709196945e-06, "loss": 0.0132, "step": 44880 }, { "epoch": 0.7580977471543892, "grad_norm": 0.23978674411773682, "learning_rate": 7.809728166193115e-06, "loss": 0.0076, "step": 44890 }, { "epoch": 0.7582666261357112, "grad_norm": 0.6238144636154175, "learning_rate": 7.808508996314481e-06, "loss": 0.0134, "step": 44900 }, { "epoch": 0.7584355051170332, "grad_norm": 0.2609471380710602, "learning_rate": 7.80728958243947e-06, "loss": 0.0136, "step": 44910 }, { "epoch": 0.7586043840983551, "grad_norm": 0.20090186595916748, "learning_rate": 7.806069924674017e-06, "loss": 0.0107, "step": 44920 }, { "epoch": 0.7587732630796771, "grad_norm": 0.16123461723327637, "learning_rate": 7.804850023124086e-06, "loss": 0.0084, "step": 44930 }, { "epoch": 0.7589421420609991, "grad_norm": 0.30256637930870056, "learning_rate": 7.803629877895655e-06, "loss": 0.0157, "step": 44940 }, { "epoch": 0.7591110210423211, "grad_norm": 0.12828712165355682, "learning_rate": 7.802409489094733e-06, "loss": 0.0153, "step": 44950 }, { "epoch": 0.7592799000236431, "grad_norm": 0.3148976266384125, "learning_rate": 7.801188856827341e-06, "loss": 0.0106, "step": 44960 }, { "epoch": 0.759448779004965, "grad_norm": 0.6426419019699097, "learning_rate": 7.799967981199525e-06, "loss": 0.0092, "step": 44970 }, { "epoch": 0.759617657986287, "grad_norm": 0.27717360854148865, "learning_rate": 7.798746862317353e-06, "loss": 0.0127, "step": 44980 }, { "epoch": 0.759786536967609, "grad_norm": 0.31614306569099426, "learning_rate": 7.797525500286911e-06, "loss": 0.0167, "step": 44990 }, { "epoch": 0.759955415948931, "grad_norm": 0.22301337122917175, "learning_rate": 7.796303895214307e-06, "loss": 0.0131, "step": 45000 }, { "epoch": 0.760124294930253, "grad_norm": 0.3163854777812958, "learning_rate": 7.795082047205673e-06, "loss": 0.0136, "step": 45010 }, { "epoch": 0.7602931739115749, "grad_norm": 0.16285340487957, "learning_rate": 7.79385995636716e-06, "loss": 0.0094, "step": 45020 }, { "epoch": 0.7604620528928969, "grad_norm": 0.36957234144210815, "learning_rate": 7.792637622804942e-06, "loss": 0.0124, "step": 45030 }, { "epoch": 0.7606309318742189, "grad_norm": 0.23059916496276855, "learning_rate": 7.791415046625209e-06, "loss": 0.0109, "step": 45040 }, { "epoch": 0.7607998108555409, "grad_norm": 0.3112664222717285, "learning_rate": 7.790192227934174e-06, "loss": 0.0126, "step": 45050 }, { "epoch": 0.7609686898368629, "grad_norm": 0.29478272795677185, "learning_rate": 7.788969166838079e-06, "loss": 0.0087, "step": 45060 }, { "epoch": 0.7611375688181848, "grad_norm": 0.1502278447151184, "learning_rate": 7.787745863443175e-06, "loss": 0.0126, "step": 45070 }, { "epoch": 0.7613064477995068, "grad_norm": 0.23668865859508514, "learning_rate": 7.786522317855742e-06, "loss": 0.0075, "step": 45080 }, { "epoch": 0.7614753267808289, "grad_norm": 0.15790040791034698, "learning_rate": 7.785298530182077e-06, "loss": 0.0091, "step": 45090 }, { "epoch": 0.7616442057621509, "grad_norm": 0.3438984155654907, "learning_rate": 7.784074500528502e-06, "loss": 0.0095, "step": 45100 }, { "epoch": 0.7618130847434729, "grad_norm": 0.5613481402397156, "learning_rate": 7.782850229001356e-06, "loss": 0.0119, "step": 45110 }, { "epoch": 0.7619819637247948, "grad_norm": 0.3839154541492462, "learning_rate": 7.781625715707003e-06, "loss": 0.01, "step": 45120 }, { "epoch": 0.7621508427061168, "grad_norm": 0.2678905427455902, "learning_rate": 7.780400960751819e-06, "loss": 0.0072, "step": 45130 }, { "epoch": 0.7623197216874388, "grad_norm": 0.31743884086608887, "learning_rate": 7.779175964242215e-06, "loss": 0.0073, "step": 45140 }, { "epoch": 0.7624886006687608, "grad_norm": 0.27993786334991455, "learning_rate": 7.777950726284615e-06, "loss": 0.0113, "step": 45150 }, { "epoch": 0.7626574796500828, "grad_norm": 0.3648715317249298, "learning_rate": 7.776725246985461e-06, "loss": 0.014, "step": 45160 }, { "epoch": 0.7628263586314047, "grad_norm": 0.3500330448150635, "learning_rate": 7.775499526451222e-06, "loss": 0.0102, "step": 45170 }, { "epoch": 0.7629952376127267, "grad_norm": 0.30754154920578003, "learning_rate": 7.774273564788387e-06, "loss": 0.0111, "step": 45180 }, { "epoch": 0.7631641165940487, "grad_norm": 0.34797313809394836, "learning_rate": 7.773047362103461e-06, "loss": 0.0109, "step": 45190 }, { "epoch": 0.7633329955753707, "grad_norm": 0.2443070113658905, "learning_rate": 7.771820918502976e-06, "loss": 0.0209, "step": 45200 }, { "epoch": 0.7635018745566927, "grad_norm": 0.32744595408439636, "learning_rate": 7.770594234093481e-06, "loss": 0.0105, "step": 45210 }, { "epoch": 0.7636707535380146, "grad_norm": 0.3189740478992462, "learning_rate": 7.769367308981546e-06, "loss": 0.0137, "step": 45220 }, { "epoch": 0.7638396325193366, "grad_norm": 0.36006033420562744, "learning_rate": 7.768140143273768e-06, "loss": 0.0151, "step": 45230 }, { "epoch": 0.7640085115006586, "grad_norm": 0.37499237060546875, "learning_rate": 7.766912737076756e-06, "loss": 0.0122, "step": 45240 }, { "epoch": 0.7641773904819806, "grad_norm": 0.3241405487060547, "learning_rate": 7.765685090497146e-06, "loss": 0.0136, "step": 45250 }, { "epoch": 0.7643462694633026, "grad_norm": 0.26520347595214844, "learning_rate": 7.764457203641592e-06, "loss": 0.0112, "step": 45260 }, { "epoch": 0.7645151484446245, "grad_norm": 0.6757124066352844, "learning_rate": 7.76322907661677e-06, "loss": 0.02, "step": 45270 }, { "epoch": 0.7646840274259465, "grad_norm": 0.2612970173358917, "learning_rate": 7.762000709529377e-06, "loss": 0.0151, "step": 45280 }, { "epoch": 0.7648529064072686, "grad_norm": 0.0775761753320694, "learning_rate": 7.76077210248613e-06, "loss": 0.0109, "step": 45290 }, { "epoch": 0.7650217853885906, "grad_norm": 0.2928250730037689, "learning_rate": 7.759543255593769e-06, "loss": 0.0139, "step": 45300 }, { "epoch": 0.7651906643699126, "grad_norm": 0.17659731209278107, "learning_rate": 7.75831416895905e-06, "loss": 0.0098, "step": 45310 }, { "epoch": 0.7653595433512345, "grad_norm": 0.1969529390335083, "learning_rate": 7.757084842688756e-06, "loss": 0.0131, "step": 45320 }, { "epoch": 0.7655284223325565, "grad_norm": 0.21761910617351532, "learning_rate": 7.755855276889685e-06, "loss": 0.009, "step": 45330 }, { "epoch": 0.7656973013138785, "grad_norm": 0.37643811106681824, "learning_rate": 7.754625471668662e-06, "loss": 0.02, "step": 45340 }, { "epoch": 0.7658661802952005, "grad_norm": 0.18401776254177094, "learning_rate": 7.753395427132529e-06, "loss": 0.011, "step": 45350 }, { "epoch": 0.7660350592765225, "grad_norm": 0.2540510296821594, "learning_rate": 7.752165143388146e-06, "loss": 0.0121, "step": 45360 }, { "epoch": 0.7662039382578444, "grad_norm": 0.7173495292663574, "learning_rate": 7.7509346205424e-06, "loss": 0.0157, "step": 45370 }, { "epoch": 0.7663728172391664, "grad_norm": 0.4604901075363159, "learning_rate": 7.749703858702196e-06, "loss": 0.0125, "step": 45380 }, { "epoch": 0.7665416962204884, "grad_norm": 0.34520086646080017, "learning_rate": 7.748472857974457e-06, "loss": 0.0109, "step": 45390 }, { "epoch": 0.7667105752018104, "grad_norm": 0.27134525775909424, "learning_rate": 7.74724161846613e-06, "loss": 0.0078, "step": 45400 }, { "epoch": 0.7668794541831324, "grad_norm": 0.3154299855232239, "learning_rate": 7.746010140284186e-06, "loss": 0.0089, "step": 45410 }, { "epoch": 0.7670483331644543, "grad_norm": 0.682927131652832, "learning_rate": 7.744778423535609e-06, "loss": 0.0168, "step": 45420 }, { "epoch": 0.7672172121457763, "grad_norm": 0.3222495913505554, "learning_rate": 7.743546468327407e-06, "loss": 0.0118, "step": 45430 }, { "epoch": 0.7673860911270983, "grad_norm": 0.1867654174566269, "learning_rate": 7.742314274766611e-06, "loss": 0.0107, "step": 45440 }, { "epoch": 0.7675549701084203, "grad_norm": 0.18887217342853546, "learning_rate": 7.74108184296027e-06, "loss": 0.0087, "step": 45450 }, { "epoch": 0.7677238490897423, "grad_norm": 0.33004891872406006, "learning_rate": 7.739849173015455e-06, "loss": 0.0147, "step": 45460 }, { "epoch": 0.7678927280710642, "grad_norm": 0.2951955199241638, "learning_rate": 7.738616265039258e-06, "loss": 0.0121, "step": 45470 }, { "epoch": 0.7680616070523862, "grad_norm": 0.6253363490104675, "learning_rate": 7.737383119138792e-06, "loss": 0.0177, "step": 45480 }, { "epoch": 0.7682304860337082, "grad_norm": 0.3337589502334595, "learning_rate": 7.736149735421185e-06, "loss": 0.01, "step": 45490 }, { "epoch": 0.7683993650150303, "grad_norm": 0.31811827421188354, "learning_rate": 7.734916113993597e-06, "loss": 0.0114, "step": 45500 }, { "epoch": 0.7685682439963523, "grad_norm": 0.27191412448883057, "learning_rate": 7.733682254963198e-06, "loss": 0.0103, "step": 45510 }, { "epoch": 0.7687371229776742, "grad_norm": 0.3356296420097351, "learning_rate": 7.73244815843718e-06, "loss": 0.0083, "step": 45520 }, { "epoch": 0.7689060019589962, "grad_norm": 0.533964216709137, "learning_rate": 7.731213824522766e-06, "loss": 0.0144, "step": 45530 }, { "epoch": 0.7690748809403182, "grad_norm": 0.1391294151544571, "learning_rate": 7.729979253327185e-06, "loss": 0.013, "step": 45540 }, { "epoch": 0.7692437599216402, "grad_norm": 0.3601621389389038, "learning_rate": 7.728744444957696e-06, "loss": 0.012, "step": 45550 }, { "epoch": 0.7694126389029622, "grad_norm": 0.2622627317905426, "learning_rate": 7.727509399521575e-06, "loss": 0.011, "step": 45560 }, { "epoch": 0.7695815178842841, "grad_norm": 0.11127369850873947, "learning_rate": 7.726274117126122e-06, "loss": 0.0114, "step": 45570 }, { "epoch": 0.7697503968656061, "grad_norm": 0.45080411434173584, "learning_rate": 7.725038597878655e-06, "loss": 0.0117, "step": 45580 }, { "epoch": 0.7699192758469281, "grad_norm": 0.21175998449325562, "learning_rate": 7.723802841886512e-06, "loss": 0.0068, "step": 45590 }, { "epoch": 0.7700881548282501, "grad_norm": 0.3502691686153412, "learning_rate": 7.72256684925705e-06, "loss": 0.0176, "step": 45600 }, { "epoch": 0.7702570338095721, "grad_norm": 0.3592475950717926, "learning_rate": 7.721330620097654e-06, "loss": 0.0107, "step": 45610 }, { "epoch": 0.770425912790894, "grad_norm": 0.1842406988143921, "learning_rate": 7.72009415451572e-06, "loss": 0.0118, "step": 45620 }, { "epoch": 0.770594791772216, "grad_norm": 0.3422820568084717, "learning_rate": 7.718857452618673e-06, "loss": 0.0151, "step": 45630 }, { "epoch": 0.770763670753538, "grad_norm": 0.29731425642967224, "learning_rate": 7.717620514513952e-06, "loss": 0.0113, "step": 45640 }, { "epoch": 0.77093254973486, "grad_norm": 0.2783453166484833, "learning_rate": 7.71638334030902e-06, "loss": 0.009, "step": 45650 }, { "epoch": 0.771101428716182, "grad_norm": 0.21564947068691254, "learning_rate": 7.715145930111358e-06, "loss": 0.0116, "step": 45660 }, { "epoch": 0.7712703076975039, "grad_norm": 0.39480024576187134, "learning_rate": 7.713908284028473e-06, "loss": 0.0159, "step": 45670 }, { "epoch": 0.7714391866788259, "grad_norm": 0.3449065089225769, "learning_rate": 7.712670402167885e-06, "loss": 0.0093, "step": 45680 }, { "epoch": 0.771608065660148, "grad_norm": 0.3762664496898651, "learning_rate": 7.711432284637141e-06, "loss": 0.0102, "step": 45690 }, { "epoch": 0.77177694464147, "grad_norm": 0.3383360207080841, "learning_rate": 7.710193931543803e-06, "loss": 0.011, "step": 45700 }, { "epoch": 0.771945823622792, "grad_norm": 0.1467258185148239, "learning_rate": 7.708955342995457e-06, "loss": 0.0143, "step": 45710 }, { "epoch": 0.7721147026041139, "grad_norm": 0.38757142424583435, "learning_rate": 7.707716519099708e-06, "loss": 0.0102, "step": 45720 }, { "epoch": 0.7722835815854359, "grad_norm": 0.6198473572731018, "learning_rate": 7.706477459964186e-06, "loss": 0.026, "step": 45730 }, { "epoch": 0.7724524605667579, "grad_norm": 0.21812176704406738, "learning_rate": 7.70523816569653e-06, "loss": 0.0081, "step": 45740 }, { "epoch": 0.7726213395480799, "grad_norm": 0.2187299281358719, "learning_rate": 7.703998636404414e-06, "loss": 0.0058, "step": 45750 }, { "epoch": 0.7727902185294019, "grad_norm": 0.3858238458633423, "learning_rate": 7.702758872195522e-06, "loss": 0.0094, "step": 45760 }, { "epoch": 0.7729590975107238, "grad_norm": 0.4482874572277069, "learning_rate": 7.701518873177561e-06, "loss": 0.0129, "step": 45770 }, { "epoch": 0.7731279764920458, "grad_norm": 0.22774654626846313, "learning_rate": 7.700278639458258e-06, "loss": 0.0168, "step": 45780 }, { "epoch": 0.7732968554733678, "grad_norm": 0.41081947088241577, "learning_rate": 7.699038171145368e-06, "loss": 0.0139, "step": 45790 }, { "epoch": 0.7734657344546898, "grad_norm": 0.38634777069091797, "learning_rate": 7.697797468346652e-06, "loss": 0.0105, "step": 45800 }, { "epoch": 0.7736346134360118, "grad_norm": 0.2541726529598236, "learning_rate": 7.696556531169904e-06, "loss": 0.0121, "step": 45810 }, { "epoch": 0.7738034924173337, "grad_norm": 0.2884017825126648, "learning_rate": 7.695315359722931e-06, "loss": 0.0119, "step": 45820 }, { "epoch": 0.7739723713986557, "grad_norm": 0.5123670101165771, "learning_rate": 7.694073954113566e-06, "loss": 0.0136, "step": 45830 }, { "epoch": 0.7741412503799777, "grad_norm": 0.45700234174728394, "learning_rate": 7.692832314449655e-06, "loss": 0.0108, "step": 45840 }, { "epoch": 0.7743101293612997, "grad_norm": 0.19647717475891113, "learning_rate": 7.691590440839074e-06, "loss": 0.0146, "step": 45850 }, { "epoch": 0.7744790083426216, "grad_norm": 0.21212172508239746, "learning_rate": 7.690348333389708e-06, "loss": 0.012, "step": 45860 }, { "epoch": 0.7746478873239436, "grad_norm": 0.09765265136957169, "learning_rate": 7.68910599220947e-06, "loss": 0.0117, "step": 45870 }, { "epoch": 0.7748167663052656, "grad_norm": 0.16412556171417236, "learning_rate": 7.687863417406297e-06, "loss": 0.0088, "step": 45880 }, { "epoch": 0.7749856452865876, "grad_norm": 0.49506500363349915, "learning_rate": 7.686620609088136e-06, "loss": 0.013, "step": 45890 }, { "epoch": 0.7751545242679097, "grad_norm": 0.14789004623889923, "learning_rate": 7.685377567362958e-06, "loss": 0.0109, "step": 45900 }, { "epoch": 0.7753234032492315, "grad_norm": 0.34375426173210144, "learning_rate": 7.684134292338756e-06, "loss": 0.0102, "step": 45910 }, { "epoch": 0.7754922822305536, "grad_norm": 0.18478593230247498, "learning_rate": 7.682890784123546e-06, "loss": 0.0141, "step": 45920 }, { "epoch": 0.7756611612118756, "grad_norm": 0.5571111440658569, "learning_rate": 7.681647042825357e-06, "loss": 0.0147, "step": 45930 }, { "epoch": 0.7758300401931976, "grad_norm": 0.171650692820549, "learning_rate": 7.680403068552245e-06, "loss": 0.0093, "step": 45940 }, { "epoch": 0.7759989191745196, "grad_norm": 0.5365257859230042, "learning_rate": 7.679158861412283e-06, "loss": 0.0137, "step": 45950 }, { "epoch": 0.7761677981558415, "grad_norm": 0.3632233142852783, "learning_rate": 7.677914421513564e-06, "loss": 0.0145, "step": 45960 }, { "epoch": 0.7763366771371635, "grad_norm": 0.2211979180574417, "learning_rate": 7.6766697489642e-06, "loss": 0.0245, "step": 45970 }, { "epoch": 0.7765055561184855, "grad_norm": 0.37701526284217834, "learning_rate": 7.675424843872329e-06, "loss": 0.0161, "step": 45980 }, { "epoch": 0.7766744350998075, "grad_norm": 0.5018704533576965, "learning_rate": 7.674179706346104e-06, "loss": 0.0157, "step": 45990 }, { "epoch": 0.7768433140811295, "grad_norm": 0.20682810246944427, "learning_rate": 7.672934336493696e-06, "loss": 0.0186, "step": 46000 }, { "epoch": 0.7770121930624514, "grad_norm": 0.28133273124694824, "learning_rate": 7.671688734423304e-06, "loss": 0.0145, "step": 46010 }, { "epoch": 0.7771810720437734, "grad_norm": 0.14397023618221283, "learning_rate": 7.670442900243142e-06, "loss": 0.0104, "step": 46020 }, { "epoch": 0.7773499510250954, "grad_norm": 0.22545310854911804, "learning_rate": 7.669196834061442e-06, "loss": 0.0104, "step": 46030 }, { "epoch": 0.7775188300064174, "grad_norm": 0.4165861904621124, "learning_rate": 7.667950535986466e-06, "loss": 0.0143, "step": 46040 }, { "epoch": 0.7776877089877394, "grad_norm": 0.18671762943267822, "learning_rate": 7.666704006126482e-06, "loss": 0.0099, "step": 46050 }, { "epoch": 0.7778565879690613, "grad_norm": 0.1617926061153412, "learning_rate": 7.665457244589788e-06, "loss": 0.0106, "step": 46060 }, { "epoch": 0.7780254669503833, "grad_norm": 0.2516421377658844, "learning_rate": 7.664210251484701e-06, "loss": 0.0119, "step": 46070 }, { "epoch": 0.7781943459317053, "grad_norm": 0.20529760420322418, "learning_rate": 7.662963026919557e-06, "loss": 0.0125, "step": 46080 }, { "epoch": 0.7783632249130273, "grad_norm": 0.16918635368347168, "learning_rate": 7.661715571002708e-06, "loss": 0.0131, "step": 46090 }, { "epoch": 0.7785321038943493, "grad_norm": 0.2962242364883423, "learning_rate": 7.660467883842533e-06, "loss": 0.0102, "step": 46100 }, { "epoch": 0.7787009828756712, "grad_norm": 0.3715449571609497, "learning_rate": 7.659219965547427e-06, "loss": 0.0108, "step": 46110 }, { "epoch": 0.7788698618569933, "grad_norm": 0.4509740471839905, "learning_rate": 7.657971816225808e-06, "loss": 0.0094, "step": 46120 }, { "epoch": 0.7790387408383153, "grad_norm": 0.418988972902298, "learning_rate": 7.656723435986108e-06, "loss": 0.0149, "step": 46130 }, { "epoch": 0.7792076198196373, "grad_norm": 0.47260281443595886, "learning_rate": 7.655474824936789e-06, "loss": 0.0127, "step": 46140 }, { "epoch": 0.7793764988009593, "grad_norm": 0.22129838168621063, "learning_rate": 7.654225983186323e-06, "loss": 0.0111, "step": 46150 }, { "epoch": 0.7795453777822812, "grad_norm": 0.2621282637119293, "learning_rate": 7.652976910843209e-06, "loss": 0.0124, "step": 46160 }, { "epoch": 0.7797142567636032, "grad_norm": 0.22283180058002472, "learning_rate": 7.651727608015959e-06, "loss": 0.0092, "step": 46170 }, { "epoch": 0.7798831357449252, "grad_norm": 0.2012391984462738, "learning_rate": 7.650478074813113e-06, "loss": 0.0108, "step": 46180 }, { "epoch": 0.7800520147262472, "grad_norm": 0.34220314025878906, "learning_rate": 7.649228311343226e-06, "loss": 0.0114, "step": 46190 }, { "epoch": 0.7802208937075692, "grad_norm": 0.18282945454120636, "learning_rate": 7.647978317714876e-06, "loss": 0.011, "step": 46200 }, { "epoch": 0.7803897726888911, "grad_norm": 0.2953193485736847, "learning_rate": 7.646728094036657e-06, "loss": 0.01, "step": 46210 }, { "epoch": 0.7805586516702131, "grad_norm": 0.1221238449215889, "learning_rate": 7.645477640417188e-06, "loss": 0.0131, "step": 46220 }, { "epoch": 0.7807275306515351, "grad_norm": 0.253726065158844, "learning_rate": 7.644226956965105e-06, "loss": 0.0102, "step": 46230 }, { "epoch": 0.7808964096328571, "grad_norm": 0.21170316636562347, "learning_rate": 7.642976043789061e-06, "loss": 0.0102, "step": 46240 }, { "epoch": 0.7810652886141791, "grad_norm": 0.36834341287612915, "learning_rate": 7.641724900997734e-06, "loss": 0.0128, "step": 46250 }, { "epoch": 0.781234167595501, "grad_norm": 0.24287861585617065, "learning_rate": 7.640473528699822e-06, "loss": 0.0098, "step": 46260 }, { "epoch": 0.781403046576823, "grad_norm": 0.3717556595802307, "learning_rate": 7.639221927004042e-06, "loss": 0.0145, "step": 46270 }, { "epoch": 0.781571925558145, "grad_norm": 0.2784538269042969, "learning_rate": 7.637970096019126e-06, "loss": 0.0142, "step": 46280 }, { "epoch": 0.781740804539467, "grad_norm": 0.514034628868103, "learning_rate": 7.636718035853832e-06, "loss": 0.0097, "step": 46290 }, { "epoch": 0.781909683520789, "grad_norm": 0.20783352851867676, "learning_rate": 7.635465746616936e-06, "loss": 0.0153, "step": 46300 }, { "epoch": 0.7820785625021109, "grad_norm": 0.5069323182106018, "learning_rate": 7.634213228417235e-06, "loss": 0.0103, "step": 46310 }, { "epoch": 0.782247441483433, "grad_norm": 0.12878066301345825, "learning_rate": 7.632960481363542e-06, "loss": 0.0101, "step": 46320 }, { "epoch": 0.782416320464755, "grad_norm": 0.26248136162757874, "learning_rate": 7.631707505564695e-06, "loss": 0.0091, "step": 46330 }, { "epoch": 0.782585199446077, "grad_norm": 0.17834989726543427, "learning_rate": 7.630454301129549e-06, "loss": 0.0136, "step": 46340 }, { "epoch": 0.782754078427399, "grad_norm": 0.27755725383758545, "learning_rate": 7.629200868166978e-06, "loss": 0.0086, "step": 46350 }, { "epoch": 0.7829229574087209, "grad_norm": 0.2580259144306183, "learning_rate": 7.627947206785879e-06, "loss": 0.0141, "step": 46360 }, { "epoch": 0.7830918363900429, "grad_norm": 0.054721757769584656, "learning_rate": 7.626693317095164e-06, "loss": 0.0082, "step": 46370 }, { "epoch": 0.7832607153713649, "grad_norm": 0.27089861035346985, "learning_rate": 7.625439199203773e-06, "loss": 0.0109, "step": 46380 }, { "epoch": 0.7834295943526869, "grad_norm": 0.16209478676319122, "learning_rate": 7.624184853220655e-06, "loss": 0.0085, "step": 46390 }, { "epoch": 0.7835984733340089, "grad_norm": 0.5057515501976013, "learning_rate": 7.62293027925479e-06, "loss": 0.0092, "step": 46400 }, { "epoch": 0.7837673523153308, "grad_norm": 0.2910033166408539, "learning_rate": 7.6216754774151665e-06, "loss": 0.0145, "step": 46410 }, { "epoch": 0.7839362312966528, "grad_norm": 0.19864368438720703, "learning_rate": 7.620420447810804e-06, "loss": 0.0072, "step": 46420 }, { "epoch": 0.7841051102779748, "grad_norm": 0.4253838062286377, "learning_rate": 7.619165190550732e-06, "loss": 0.0111, "step": 46430 }, { "epoch": 0.7842739892592968, "grad_norm": 0.21196413040161133, "learning_rate": 7.617909705744007e-06, "loss": 0.0115, "step": 46440 }, { "epoch": 0.7844428682406188, "grad_norm": 0.13706950843334198, "learning_rate": 7.616653993499703e-06, "loss": 0.0078, "step": 46450 }, { "epoch": 0.7846117472219407, "grad_norm": 0.3752238154411316, "learning_rate": 7.61539805392691e-06, "loss": 0.0121, "step": 46460 }, { "epoch": 0.7847806262032627, "grad_norm": 0.34791675209999084, "learning_rate": 7.614141887134745e-06, "loss": 0.0181, "step": 46470 }, { "epoch": 0.7849495051845847, "grad_norm": 0.24160362780094147, "learning_rate": 7.6128854932323346e-06, "loss": 0.0151, "step": 46480 }, { "epoch": 0.7851183841659067, "grad_norm": 0.5451534390449524, "learning_rate": 7.6116288723288375e-06, "loss": 0.0143, "step": 46490 }, { "epoch": 0.7852872631472287, "grad_norm": 0.2880723178386688, "learning_rate": 7.610372024533423e-06, "loss": 0.0112, "step": 46500 }, { "epoch": 0.7854561421285506, "grad_norm": 0.6511300802230835, "learning_rate": 7.609114949955285e-06, "loss": 0.013, "step": 46510 }, { "epoch": 0.7856250211098726, "grad_norm": 0.40377485752105713, "learning_rate": 7.607857648703633e-06, "loss": 0.0098, "step": 46520 }, { "epoch": 0.7857939000911947, "grad_norm": 0.6261894106864929, "learning_rate": 7.606600120887697e-06, "loss": 0.0124, "step": 46530 }, { "epoch": 0.7859627790725167, "grad_norm": 0.2800842523574829, "learning_rate": 7.60534236661673e-06, "loss": 0.0121, "step": 46540 }, { "epoch": 0.7861316580538387, "grad_norm": 0.23180794715881348, "learning_rate": 7.604084386000004e-06, "loss": 0.0099, "step": 46550 }, { "epoch": 0.7863005370351606, "grad_norm": 0.3289521336555481, "learning_rate": 7.602826179146806e-06, "loss": 0.0151, "step": 46560 }, { "epoch": 0.7864694160164826, "grad_norm": 0.1394244283437729, "learning_rate": 7.601567746166449e-06, "loss": 0.0106, "step": 46570 }, { "epoch": 0.7866382949978046, "grad_norm": 0.46823006868362427, "learning_rate": 7.600309087168258e-06, "loss": 0.0091, "step": 46580 }, { "epoch": 0.7868071739791266, "grad_norm": 0.27994099259376526, "learning_rate": 7.599050202261588e-06, "loss": 0.0116, "step": 46590 }, { "epoch": 0.7869760529604486, "grad_norm": 0.2836266756057739, "learning_rate": 7.597791091555804e-06, "loss": 0.0164, "step": 46600 }, { "epoch": 0.7871449319417705, "grad_norm": 0.43551790714263916, "learning_rate": 7.596531755160296e-06, "loss": 0.0133, "step": 46610 }, { "epoch": 0.7873138109230925, "grad_norm": 0.24488483369350433, "learning_rate": 7.595272193184471e-06, "loss": 0.0077, "step": 46620 }, { "epoch": 0.7874826899044145, "grad_norm": 0.3474479019641876, "learning_rate": 7.5940124057377586e-06, "loss": 0.0144, "step": 46630 }, { "epoch": 0.7876515688857365, "grad_norm": 0.442571222782135, "learning_rate": 7.592752392929603e-06, "loss": 0.0101, "step": 46640 }, { "epoch": 0.7878204478670585, "grad_norm": 0.2319364994764328, "learning_rate": 7.591492154869473e-06, "loss": 0.0069, "step": 46650 }, { "epoch": 0.7879893268483804, "grad_norm": 0.306997150182724, "learning_rate": 7.590231691666854e-06, "loss": 0.0112, "step": 46660 }, { "epoch": 0.7881582058297024, "grad_norm": 0.2804411053657532, "learning_rate": 7.588971003431255e-06, "loss": 0.0105, "step": 46670 }, { "epoch": 0.7883270848110244, "grad_norm": 0.2538372278213501, "learning_rate": 7.587710090272198e-06, "loss": 0.0102, "step": 46680 }, { "epoch": 0.7884959637923464, "grad_norm": 0.24172881245613098, "learning_rate": 7.586448952299229e-06, "loss": 0.0126, "step": 46690 }, { "epoch": 0.7886648427736684, "grad_norm": 0.24234315752983093, "learning_rate": 7.5851875896219115e-06, "loss": 0.0092, "step": 46700 }, { "epoch": 0.7888337217549903, "grad_norm": 0.4304797351360321, "learning_rate": 7.583926002349833e-06, "loss": 0.01, "step": 46710 }, { "epoch": 0.7890026007363123, "grad_norm": 0.30343765020370483, "learning_rate": 7.582664190592595e-06, "loss": 0.0107, "step": 46720 }, { "epoch": 0.7891714797176343, "grad_norm": 0.14091147482395172, "learning_rate": 7.58140215445982e-06, "loss": 0.0056, "step": 46730 }, { "epoch": 0.7893403586989564, "grad_norm": 0.16884204745292664, "learning_rate": 7.5801398940611504e-06, "loss": 0.0125, "step": 46740 }, { "epoch": 0.7895092376802784, "grad_norm": 0.3818974494934082, "learning_rate": 7.578877409506251e-06, "loss": 0.0188, "step": 46750 }, { "epoch": 0.7896781166616003, "grad_norm": 0.3540700376033783, "learning_rate": 7.577614700904799e-06, "loss": 0.0108, "step": 46760 }, { "epoch": 0.7898469956429223, "grad_norm": 0.24378050863742828, "learning_rate": 7.576351768366499e-06, "loss": 0.0116, "step": 46770 }, { "epoch": 0.7900158746242443, "grad_norm": 0.3120270073413849, "learning_rate": 7.575088612001071e-06, "loss": 0.0121, "step": 46780 }, { "epoch": 0.7901847536055663, "grad_norm": 0.294498085975647, "learning_rate": 7.573825231918255e-06, "loss": 0.0147, "step": 46790 }, { "epoch": 0.7903536325868883, "grad_norm": 0.3103255033493042, "learning_rate": 7.572561628227808e-06, "loss": 0.0155, "step": 46800 }, { "epoch": 0.7905225115682102, "grad_norm": 0.19959314167499542, "learning_rate": 7.571297801039512e-06, "loss": 0.0096, "step": 46810 }, { "epoch": 0.7906913905495322, "grad_norm": 0.23253464698791504, "learning_rate": 7.570033750463161e-06, "loss": 0.0089, "step": 46820 }, { "epoch": 0.7908602695308542, "grad_norm": 0.2799196243286133, "learning_rate": 7.568769476608577e-06, "loss": 0.0065, "step": 46830 }, { "epoch": 0.7910291485121762, "grad_norm": 0.2789957523345947, "learning_rate": 7.567504979585596e-06, "loss": 0.0147, "step": 46840 }, { "epoch": 0.7911980274934982, "grad_norm": 0.1538286954164505, "learning_rate": 7.566240259504074e-06, "loss": 0.0192, "step": 46850 }, { "epoch": 0.7913669064748201, "grad_norm": 0.20026816427707672, "learning_rate": 7.564975316473885e-06, "loss": 0.0108, "step": 46860 }, { "epoch": 0.7915357854561421, "grad_norm": 0.2107611894607544, "learning_rate": 7.563710150604926e-06, "loss": 0.0091, "step": 46870 }, { "epoch": 0.7917046644374641, "grad_norm": 0.14509549736976624, "learning_rate": 7.562444762007112e-06, "loss": 0.0125, "step": 46880 }, { "epoch": 0.7918735434187861, "grad_norm": 0.6204745769500732, "learning_rate": 7.5611791507903754e-06, "loss": 0.0118, "step": 46890 }, { "epoch": 0.7920424224001081, "grad_norm": 0.35147857666015625, "learning_rate": 7.559913317064669e-06, "loss": 0.0078, "step": 46900 }, { "epoch": 0.79221130138143, "grad_norm": 0.4269537925720215, "learning_rate": 7.558647260939968e-06, "loss": 0.0132, "step": 46910 }, { "epoch": 0.792380180362752, "grad_norm": 0.27117791771888733, "learning_rate": 7.557380982526262e-06, "loss": 0.0113, "step": 46920 }, { "epoch": 0.792549059344074, "grad_norm": 0.3272508382797241, "learning_rate": 7.556114481933563e-06, "loss": 0.0117, "step": 46930 }, { "epoch": 0.792717938325396, "grad_norm": 0.3466479480266571, "learning_rate": 7.554847759271902e-06, "loss": 0.0095, "step": 46940 }, { "epoch": 0.7928868173067181, "grad_norm": 0.5285521745681763, "learning_rate": 7.553580814651327e-06, "loss": 0.0093, "step": 46950 }, { "epoch": 0.79305569628804, "grad_norm": 0.46369221806526184, "learning_rate": 7.552313648181908e-06, "loss": 0.0127, "step": 46960 }, { "epoch": 0.793224575269362, "grad_norm": 0.39480093121528625, "learning_rate": 7.551046259973733e-06, "loss": 0.0128, "step": 46970 }, { "epoch": 0.793393454250684, "grad_norm": 0.4413769245147705, "learning_rate": 7.5497786501369106e-06, "loss": 0.013, "step": 46980 }, { "epoch": 0.793562333232006, "grad_norm": 0.19158761203289032, "learning_rate": 7.548510818781567e-06, "loss": 0.0096, "step": 46990 }, { "epoch": 0.7937312122133279, "grad_norm": 0.20560529828071594, "learning_rate": 7.5472427660178485e-06, "loss": 0.0121, "step": 47000 }, { "epoch": 0.7939000911946499, "grad_norm": 0.593219518661499, "learning_rate": 7.54597449195592e-06, "loss": 0.0079, "step": 47010 }, { "epoch": 0.7940689701759719, "grad_norm": 0.30183082818984985, "learning_rate": 7.544705996705966e-06, "loss": 0.0094, "step": 47020 }, { "epoch": 0.7942378491572939, "grad_norm": 0.280984491109848, "learning_rate": 7.543437280378189e-06, "loss": 0.0112, "step": 47030 }, { "epoch": 0.7944067281386159, "grad_norm": 0.24146124720573425, "learning_rate": 7.5421683430828165e-06, "loss": 0.0168, "step": 47040 }, { "epoch": 0.7945756071199378, "grad_norm": 0.07218944281339645, "learning_rate": 7.540899184930086e-06, "loss": 0.0101, "step": 47050 }, { "epoch": 0.7947444861012598, "grad_norm": 0.42078539729118347, "learning_rate": 7.539629806030261e-06, "loss": 0.0199, "step": 47060 }, { "epoch": 0.7949133650825818, "grad_norm": 0.32672908902168274, "learning_rate": 7.53836020649362e-06, "loss": 0.0121, "step": 47070 }, { "epoch": 0.7950822440639038, "grad_norm": 0.2601141035556793, "learning_rate": 7.537090386430465e-06, "loss": 0.0107, "step": 47080 }, { "epoch": 0.7952511230452258, "grad_norm": 0.5887835621833801, "learning_rate": 7.535820345951115e-06, "loss": 0.0184, "step": 47090 }, { "epoch": 0.7954200020265477, "grad_norm": 0.14818502962589264, "learning_rate": 7.534550085165907e-06, "loss": 0.012, "step": 47100 }, { "epoch": 0.7955888810078697, "grad_norm": 0.1991468071937561, "learning_rate": 7.5332796041851975e-06, "loss": 0.0118, "step": 47110 }, { "epoch": 0.7957577599891917, "grad_norm": 0.15108245611190796, "learning_rate": 7.532008903119363e-06, "loss": 0.0103, "step": 47120 }, { "epoch": 0.7959266389705137, "grad_norm": 0.24403466284275055, "learning_rate": 7.530737982078801e-06, "loss": 0.0095, "step": 47130 }, { "epoch": 0.7960955179518358, "grad_norm": 0.20590195059776306, "learning_rate": 7.529466841173922e-06, "loss": 0.0111, "step": 47140 }, { "epoch": 0.7962643969331576, "grad_norm": 0.3616888225078583, "learning_rate": 7.528195480515164e-06, "loss": 0.0147, "step": 47150 }, { "epoch": 0.7964332759144797, "grad_norm": 0.730536699295044, "learning_rate": 7.526923900212977e-06, "loss": 0.009, "step": 47160 }, { "epoch": 0.7966021548958017, "grad_norm": 0.09913913905620575, "learning_rate": 7.5256521003778335e-06, "loss": 0.008, "step": 47170 }, { "epoch": 0.7967710338771237, "grad_norm": 0.3023892939090729, "learning_rate": 7.524380081120225e-06, "loss": 0.0116, "step": 47180 }, { "epoch": 0.7969399128584457, "grad_norm": 0.4514957666397095, "learning_rate": 7.523107842550659e-06, "loss": 0.0111, "step": 47190 }, { "epoch": 0.7971087918397676, "grad_norm": 0.35025256872177124, "learning_rate": 7.521835384779668e-06, "loss": 0.0127, "step": 47200 }, { "epoch": 0.7972776708210896, "grad_norm": 0.2665283679962158, "learning_rate": 7.520562707917797e-06, "loss": 0.0091, "step": 47210 }, { "epoch": 0.7974465498024116, "grad_norm": 0.16138114035129547, "learning_rate": 7.519289812075614e-06, "loss": 0.0077, "step": 47220 }, { "epoch": 0.7976154287837336, "grad_norm": 0.2725352942943573, "learning_rate": 7.5180166973637055e-06, "loss": 0.0088, "step": 47230 }, { "epoch": 0.7977843077650556, "grad_norm": 0.24478267133235931, "learning_rate": 7.516743363892678e-06, "loss": 0.0117, "step": 47240 }, { "epoch": 0.7979531867463775, "grad_norm": 0.2695070207118988, "learning_rate": 7.515469811773153e-06, "loss": 0.0081, "step": 47250 }, { "epoch": 0.7981220657276995, "grad_norm": 0.3965451121330261, "learning_rate": 7.5141960411157745e-06, "loss": 0.0142, "step": 47260 }, { "epoch": 0.7982909447090215, "grad_norm": 0.2380465269088745, "learning_rate": 7.512922052031204e-06, "loss": 0.0113, "step": 47270 }, { "epoch": 0.7984598236903435, "grad_norm": 0.5078769326210022, "learning_rate": 7.511647844630125e-06, "loss": 0.0193, "step": 47280 }, { "epoch": 0.7986287026716655, "grad_norm": 0.41764113306999207, "learning_rate": 7.510373419023234e-06, "loss": 0.0135, "step": 47290 }, { "epoch": 0.7987975816529874, "grad_norm": 0.22321222722530365, "learning_rate": 7.509098775321253e-06, "loss": 0.0088, "step": 47300 }, { "epoch": 0.7989664606343094, "grad_norm": 0.21221579611301422, "learning_rate": 7.5078239136349174e-06, "loss": 0.0162, "step": 47310 }, { "epoch": 0.7991353396156314, "grad_norm": 0.4453321397304535, "learning_rate": 7.506548834074984e-06, "loss": 0.0126, "step": 47320 }, { "epoch": 0.7993042185969534, "grad_norm": 0.26200079917907715, "learning_rate": 7.5052735367522335e-06, "loss": 0.0148, "step": 47330 }, { "epoch": 0.7994730975782754, "grad_norm": 0.40955638885498047, "learning_rate": 7.503998021777454e-06, "loss": 0.014, "step": 47340 }, { "epoch": 0.7996419765595973, "grad_norm": 0.3120461106300354, "learning_rate": 7.502722289261465e-06, "loss": 0.0154, "step": 47350 }, { "epoch": 0.7998108555409194, "grad_norm": 0.12015671283006668, "learning_rate": 7.501446339315094e-06, "loss": 0.0077, "step": 47360 }, { "epoch": 0.7999797345222414, "grad_norm": 0.3435935080051422, "learning_rate": 7.500170172049195e-06, "loss": 0.0083, "step": 47370 }, { "epoch": 0.8001486135035634, "grad_norm": 0.3369479775428772, "learning_rate": 7.498893787574637e-06, "loss": 0.0092, "step": 47380 }, { "epoch": 0.8003174924848854, "grad_norm": 0.21507969498634338, "learning_rate": 7.497617186002312e-06, "loss": 0.008, "step": 47390 }, { "epoch": 0.8004863714662073, "grad_norm": 0.3198922276496887, "learning_rate": 7.496340367443124e-06, "loss": 0.0133, "step": 47400 }, { "epoch": 0.8006552504475293, "grad_norm": 0.26518750190734863, "learning_rate": 7.495063332008004e-06, "loss": 0.012, "step": 47410 }, { "epoch": 0.8008241294288513, "grad_norm": 0.5325319766998291, "learning_rate": 7.493786079807893e-06, "loss": 0.0108, "step": 47420 }, { "epoch": 0.8009930084101733, "grad_norm": 0.11008128523826599, "learning_rate": 7.4925086109537595e-06, "loss": 0.0184, "step": 47430 }, { "epoch": 0.8011618873914953, "grad_norm": 0.1000397652387619, "learning_rate": 7.491230925556584e-06, "loss": 0.0092, "step": 47440 }, { "epoch": 0.8013307663728172, "grad_norm": 0.14938311278820038, "learning_rate": 7.489953023727372e-06, "loss": 0.0078, "step": 47450 }, { "epoch": 0.8014996453541392, "grad_norm": 0.25496354699134827, "learning_rate": 7.488674905577143e-06, "loss": 0.0106, "step": 47460 }, { "epoch": 0.8016685243354612, "grad_norm": 0.39085549116134644, "learning_rate": 7.487396571216935e-06, "loss": 0.0131, "step": 47470 }, { "epoch": 0.8018374033167832, "grad_norm": 0.56815505027771, "learning_rate": 7.486118020757807e-06, "loss": 0.0123, "step": 47480 }, { "epoch": 0.8020062822981052, "grad_norm": 0.13784299790859222, "learning_rate": 7.484839254310839e-06, "loss": 0.0115, "step": 47490 }, { "epoch": 0.8021751612794271, "grad_norm": 1.2434343099594116, "learning_rate": 7.483560271987125e-06, "loss": 0.009, "step": 47500 }, { "epoch": 0.8023440402607491, "grad_norm": 0.20796576142311096, "learning_rate": 7.48228107389778e-06, "loss": 0.0116, "step": 47510 }, { "epoch": 0.8025129192420711, "grad_norm": 0.24452486634254456, "learning_rate": 7.481001660153938e-06, "loss": 0.0121, "step": 47520 }, { "epoch": 0.8026817982233931, "grad_norm": 0.382191926240921, "learning_rate": 7.479722030866752e-06, "loss": 0.012, "step": 47530 }, { "epoch": 0.8028506772047151, "grad_norm": 0.3757930397987366, "learning_rate": 7.478442186147391e-06, "loss": 0.0104, "step": 47540 }, { "epoch": 0.803019556186037, "grad_norm": 0.458967000246048, "learning_rate": 7.477162126107047e-06, "loss": 0.0133, "step": 47550 }, { "epoch": 0.803188435167359, "grad_norm": 0.4336281716823578, "learning_rate": 7.475881850856925e-06, "loss": 0.0122, "step": 47560 }, { "epoch": 0.8033573141486811, "grad_norm": 0.29747822880744934, "learning_rate": 7.4746013605082564e-06, "loss": 0.0119, "step": 47570 }, { "epoch": 0.8035261931300031, "grad_norm": 0.248299703001976, "learning_rate": 7.473320655172286e-06, "loss": 0.0119, "step": 47580 }, { "epoch": 0.8036950721113251, "grad_norm": 0.15085220336914062, "learning_rate": 7.472039734960277e-06, "loss": 0.0094, "step": 47590 }, { "epoch": 0.803863951092647, "grad_norm": 0.5963377952575684, "learning_rate": 7.470758599983513e-06, "loss": 0.0162, "step": 47600 }, { "epoch": 0.804032830073969, "grad_norm": 0.29821470379829407, "learning_rate": 7.469477250353296e-06, "loss": 0.0138, "step": 47610 }, { "epoch": 0.804201709055291, "grad_norm": 0.3447869122028351, "learning_rate": 7.468195686180947e-06, "loss": 0.0148, "step": 47620 }, { "epoch": 0.804370588036613, "grad_norm": 0.085639588534832, "learning_rate": 7.466913907577805e-06, "loss": 0.0102, "step": 47630 }, { "epoch": 0.804539467017935, "grad_norm": 0.30021384358406067, "learning_rate": 7.465631914655227e-06, "loss": 0.0116, "step": 47640 }, { "epoch": 0.8047083459992569, "grad_norm": 0.798025369644165, "learning_rate": 7.464349707524591e-06, "loss": 0.0102, "step": 47650 }, { "epoch": 0.8048772249805789, "grad_norm": 0.2935335040092468, "learning_rate": 7.46306728629729e-06, "loss": 0.0108, "step": 47660 }, { "epoch": 0.8050461039619009, "grad_norm": 0.24335773289203644, "learning_rate": 7.461784651084738e-06, "loss": 0.0171, "step": 47670 }, { "epoch": 0.8052149829432229, "grad_norm": 0.2720728814601898, "learning_rate": 7.460501801998369e-06, "loss": 0.0099, "step": 47680 }, { "epoch": 0.8053838619245449, "grad_norm": 0.20966890454292297, "learning_rate": 7.459218739149633e-06, "loss": 0.0119, "step": 47690 }, { "epoch": 0.8055527409058668, "grad_norm": 0.11991595476865768, "learning_rate": 7.457935462649998e-06, "loss": 0.0079, "step": 47700 }, { "epoch": 0.8057216198871888, "grad_norm": 0.271859347820282, "learning_rate": 7.456651972610953e-06, "loss": 0.0146, "step": 47710 }, { "epoch": 0.8058904988685108, "grad_norm": 0.3476220369338989, "learning_rate": 7.455368269144005e-06, "loss": 0.0111, "step": 47720 }, { "epoch": 0.8060593778498328, "grad_norm": 0.37007299065589905, "learning_rate": 7.454084352360678e-06, "loss": 0.0091, "step": 47730 }, { "epoch": 0.8062282568311548, "grad_norm": 0.24790099263191223, "learning_rate": 7.452800222372515e-06, "loss": 0.0171, "step": 47740 }, { "epoch": 0.8063971358124767, "grad_norm": 0.2599068284034729, "learning_rate": 7.451515879291081e-06, "loss": 0.0064, "step": 47750 }, { "epoch": 0.8065660147937987, "grad_norm": 0.2730080187320709, "learning_rate": 7.450231323227952e-06, "loss": 0.0111, "step": 47760 }, { "epoch": 0.8067348937751208, "grad_norm": 0.30448994040489197, "learning_rate": 7.44894655429473e-06, "loss": 0.0102, "step": 47770 }, { "epoch": 0.8069037727564428, "grad_norm": 0.1588059663772583, "learning_rate": 7.447661572603033e-06, "loss": 0.0118, "step": 47780 }, { "epoch": 0.8070726517377648, "grad_norm": 0.36950549483299255, "learning_rate": 7.446376378264497e-06, "loss": 0.0144, "step": 47790 }, { "epoch": 0.8072415307190867, "grad_norm": 0.324939489364624, "learning_rate": 7.445090971390775e-06, "loss": 0.0079, "step": 47800 }, { "epoch": 0.8074104097004087, "grad_norm": 0.2041865438222885, "learning_rate": 7.44380535209354e-06, "loss": 0.0068, "step": 47810 }, { "epoch": 0.8075792886817307, "grad_norm": 0.21270841360092163, "learning_rate": 7.442519520484486e-06, "loss": 0.0094, "step": 47820 }, { "epoch": 0.8077481676630527, "grad_norm": 0.25038382411003113, "learning_rate": 7.441233476675321e-06, "loss": 0.0137, "step": 47830 }, { "epoch": 0.8079170466443747, "grad_norm": 0.2894496023654938, "learning_rate": 7.439947220777772e-06, "loss": 0.0176, "step": 47840 }, { "epoch": 0.8080859256256966, "grad_norm": 0.2149915248155594, "learning_rate": 7.438660752903588e-06, "loss": 0.02, "step": 47850 }, { "epoch": 0.8082548046070186, "grad_norm": 0.1860341727733612, "learning_rate": 7.437374073164532e-06, "loss": 0.0098, "step": 47860 }, { "epoch": 0.8084236835883406, "grad_norm": 0.22972950339317322, "learning_rate": 7.4360871816723915e-06, "loss": 0.0127, "step": 47870 }, { "epoch": 0.8085925625696626, "grad_norm": 0.18896539509296417, "learning_rate": 7.434800078538965e-06, "loss": 0.0119, "step": 47880 }, { "epoch": 0.8087614415509846, "grad_norm": 0.18737810850143433, "learning_rate": 7.4335127638760725e-06, "loss": 0.0092, "step": 47890 }, { "epoch": 0.8089303205323065, "grad_norm": 0.4080350697040558, "learning_rate": 7.432225237795556e-06, "loss": 0.0121, "step": 47900 }, { "epoch": 0.8090991995136285, "grad_norm": 0.22988562285900116, "learning_rate": 7.43093750040927e-06, "loss": 0.013, "step": 47910 }, { "epoch": 0.8092680784949505, "grad_norm": 0.4389536380767822, "learning_rate": 7.429649551829091e-06, "loss": 0.0114, "step": 47920 }, { "epoch": 0.8094369574762725, "grad_norm": 0.13182252645492554, "learning_rate": 7.428361392166912e-06, "loss": 0.0171, "step": 47930 }, { "epoch": 0.8096058364575945, "grad_norm": 0.20098689198493958, "learning_rate": 7.427073021534646e-06, "loss": 0.0101, "step": 47940 }, { "epoch": 0.8097747154389164, "grad_norm": 0.9311424493789673, "learning_rate": 7.425784440044222e-06, "loss": 0.0146, "step": 47950 }, { "epoch": 0.8099435944202384, "grad_norm": 0.3293813169002533, "learning_rate": 7.424495647807592e-06, "loss": 0.0105, "step": 47960 }, { "epoch": 0.8101124734015605, "grad_norm": 0.35843613743782043, "learning_rate": 7.4232066449367205e-06, "loss": 0.0152, "step": 47970 }, { "epoch": 0.8102813523828825, "grad_norm": 0.4254811704158783, "learning_rate": 7.421917431543593e-06, "loss": 0.0201, "step": 47980 }, { "epoch": 0.8104502313642045, "grad_norm": 0.3585146367549896, "learning_rate": 7.420628007740211e-06, "loss": 0.0157, "step": 47990 }, { "epoch": 0.8106191103455264, "grad_norm": 0.21897397935390472, "learning_rate": 7.419338373638604e-06, "loss": 0.0122, "step": 48000 }, { "epoch": 0.8107879893268484, "grad_norm": 0.1698792278766632, "learning_rate": 7.418048529350804e-06, "loss": 0.0143, "step": 48010 }, { "epoch": 0.8109568683081704, "grad_norm": 0.2047257125377655, "learning_rate": 7.416758474988874e-06, "loss": 0.0102, "step": 48020 }, { "epoch": 0.8111257472894924, "grad_norm": 0.16374054551124573, "learning_rate": 7.4154682106648904e-06, "loss": 0.0114, "step": 48030 }, { "epoch": 0.8112946262708144, "grad_norm": 0.2986258268356323, "learning_rate": 7.414177736490946e-06, "loss": 0.0132, "step": 48040 }, { "epoch": 0.8114635052521363, "grad_norm": 0.3660223186016083, "learning_rate": 7.412887052579156e-06, "loss": 0.0134, "step": 48050 }, { "epoch": 0.8116323842334583, "grad_norm": 0.2964905798435211, "learning_rate": 7.411596159041652e-06, "loss": 0.0136, "step": 48060 }, { "epoch": 0.8118012632147803, "grad_norm": 0.40644949674606323, "learning_rate": 7.410305055990582e-06, "loss": 0.0129, "step": 48070 }, { "epoch": 0.8119701421961023, "grad_norm": 0.15922746062278748, "learning_rate": 7.409013743538116e-06, "loss": 0.0102, "step": 48080 }, { "epoch": 0.8121390211774242, "grad_norm": 0.3032822012901306, "learning_rate": 7.407722221796438e-06, "loss": 0.0128, "step": 48090 }, { "epoch": 0.8123079001587462, "grad_norm": 0.32347238063812256, "learning_rate": 7.406430490877754e-06, "loss": 0.0112, "step": 48100 }, { "epoch": 0.8124767791400682, "grad_norm": 0.27112242579460144, "learning_rate": 7.405138550894284e-06, "loss": 0.0082, "step": 48110 }, { "epoch": 0.8126456581213902, "grad_norm": 0.37313368916511536, "learning_rate": 7.403846401958272e-06, "loss": 0.0111, "step": 48120 }, { "epoch": 0.8128145371027122, "grad_norm": 0.19886641204357147, "learning_rate": 7.402554044181975e-06, "loss": 0.0111, "step": 48130 }, { "epoch": 0.8129834160840341, "grad_norm": 0.31106001138687134, "learning_rate": 7.401261477677669e-06, "loss": 0.0086, "step": 48140 }, { "epoch": 0.8131522950653561, "grad_norm": 0.28300997614860535, "learning_rate": 7.39996870255765e-06, "loss": 0.0119, "step": 48150 }, { "epoch": 0.8133211740466781, "grad_norm": 0.3945018947124481, "learning_rate": 7.398675718934229e-06, "loss": 0.0132, "step": 48160 }, { "epoch": 0.8134900530280001, "grad_norm": 0.11781080812215805, "learning_rate": 7.397382526919742e-06, "loss": 0.0087, "step": 48170 }, { "epoch": 0.8136589320093222, "grad_norm": 0.48262935876846313, "learning_rate": 7.396089126626536e-06, "loss": 0.0154, "step": 48180 }, { "epoch": 0.813827810990644, "grad_norm": 0.4768083989620209, "learning_rate": 7.394795518166975e-06, "loss": 0.0105, "step": 48190 }, { "epoch": 0.8139966899719661, "grad_norm": 0.37474027276039124, "learning_rate": 7.393501701653449e-06, "loss": 0.011, "step": 48200 }, { "epoch": 0.8141655689532881, "grad_norm": 0.37849941849708557, "learning_rate": 7.392207677198361e-06, "loss": 0.0158, "step": 48210 }, { "epoch": 0.8143344479346101, "grad_norm": 0.3349858820438385, "learning_rate": 7.3909134449141306e-06, "loss": 0.0166, "step": 48220 }, { "epoch": 0.8145033269159321, "grad_norm": 0.1733662188053131, "learning_rate": 7.389619004913199e-06, "loss": 0.0088, "step": 48230 }, { "epoch": 0.814672205897254, "grad_norm": 0.24052132666110992, "learning_rate": 7.388324357308023e-06, "loss": 0.0103, "step": 48240 }, { "epoch": 0.814841084878576, "grad_norm": 0.2420901507139206, "learning_rate": 7.387029502211081e-06, "loss": 0.0084, "step": 48250 }, { "epoch": 0.815009963859898, "grad_norm": 0.18632721900939941, "learning_rate": 7.385734439734864e-06, "loss": 0.0114, "step": 48260 }, { "epoch": 0.81517884284122, "grad_norm": 0.36094629764556885, "learning_rate": 7.384439169991885e-06, "loss": 0.0141, "step": 48270 }, { "epoch": 0.815347721822542, "grad_norm": 0.39335086941719055, "learning_rate": 7.383143693094673e-06, "loss": 0.0119, "step": 48280 }, { "epoch": 0.8155166008038639, "grad_norm": 0.20835889875888824, "learning_rate": 7.381848009155776e-06, "loss": 0.0106, "step": 48290 }, { "epoch": 0.8156854797851859, "grad_norm": 0.49576839804649353, "learning_rate": 7.380552118287763e-06, "loss": 0.0105, "step": 48300 }, { "epoch": 0.8158543587665079, "grad_norm": 0.7717930674552917, "learning_rate": 7.379256020603214e-06, "loss": 0.0127, "step": 48310 }, { "epoch": 0.8160232377478299, "grad_norm": 0.15583768486976624, "learning_rate": 7.377959716214731e-06, "loss": 0.0132, "step": 48320 }, { "epoch": 0.8161921167291519, "grad_norm": 0.4030170440673828, "learning_rate": 7.376663205234935e-06, "loss": 0.0086, "step": 48330 }, { "epoch": 0.8163609957104738, "grad_norm": 0.4125918447971344, "learning_rate": 7.375366487776464e-06, "loss": 0.0108, "step": 48340 }, { "epoch": 0.8165298746917958, "grad_norm": 0.20969076454639435, "learning_rate": 7.374069563951974e-06, "loss": 0.0094, "step": 48350 }, { "epoch": 0.8166987536731178, "grad_norm": 0.3803554177284241, "learning_rate": 7.372772433874135e-06, "loss": 0.0122, "step": 48360 }, { "epoch": 0.8168676326544398, "grad_norm": 0.1963759958744049, "learning_rate": 7.371475097655643e-06, "loss": 0.0103, "step": 48370 }, { "epoch": 0.8170365116357619, "grad_norm": 0.5966513752937317, "learning_rate": 7.370177555409205e-06, "loss": 0.0132, "step": 48380 }, { "epoch": 0.8172053906170837, "grad_norm": 0.30796292424201965, "learning_rate": 7.36887980724755e-06, "loss": 0.0067, "step": 48390 }, { "epoch": 0.8173742695984058, "grad_norm": 0.23949363827705383, "learning_rate": 7.367581853283419e-06, "loss": 0.0086, "step": 48400 }, { "epoch": 0.8175431485797278, "grad_norm": 0.3030568063259125, "learning_rate": 7.366283693629581e-06, "loss": 0.0133, "step": 48410 }, { "epoch": 0.8177120275610498, "grad_norm": 0.4481469690799713, "learning_rate": 7.364985328398813e-06, "loss": 0.012, "step": 48420 }, { "epoch": 0.8178809065423718, "grad_norm": 0.4121417999267578, "learning_rate": 7.363686757703915e-06, "loss": 0.0093, "step": 48430 }, { "epoch": 0.8180497855236937, "grad_norm": 0.19927896559238434, "learning_rate": 7.362387981657703e-06, "loss": 0.0102, "step": 48440 }, { "epoch": 0.8182186645050157, "grad_norm": 0.31752893328666687, "learning_rate": 7.361089000373013e-06, "loss": 0.0168, "step": 48450 }, { "epoch": 0.8183875434863377, "grad_norm": 0.12442697584629059, "learning_rate": 7.359789813962693e-06, "loss": 0.0103, "step": 48460 }, { "epoch": 0.8185564224676597, "grad_norm": 0.07852587103843689, "learning_rate": 7.35849042253962e-06, "loss": 0.0098, "step": 48470 }, { "epoch": 0.8187253014489817, "grad_norm": 0.28460079431533813, "learning_rate": 7.357190826216676e-06, "loss": 0.0124, "step": 48480 }, { "epoch": 0.8188941804303036, "grad_norm": 0.3125569522380829, "learning_rate": 7.355891025106772e-06, "loss": 0.0071, "step": 48490 }, { "epoch": 0.8190630594116256, "grad_norm": 0.25765126943588257, "learning_rate": 7.354591019322825e-06, "loss": 0.0113, "step": 48500 }, { "epoch": 0.8192319383929476, "grad_norm": 0.22241483628749847, "learning_rate": 7.353290808977782e-06, "loss": 0.0078, "step": 48510 }, { "epoch": 0.8194008173742696, "grad_norm": 0.32988259196281433, "learning_rate": 7.351990394184598e-06, "loss": 0.0116, "step": 48520 }, { "epoch": 0.8195696963555916, "grad_norm": 0.18556442856788635, "learning_rate": 7.3506897750562545e-06, "loss": 0.0084, "step": 48530 }, { "epoch": 0.8197385753369135, "grad_norm": 0.34348806738853455, "learning_rate": 7.349388951705741e-06, "loss": 0.0119, "step": 48540 }, { "epoch": 0.8199074543182355, "grad_norm": 0.214734748005867, "learning_rate": 7.348087924246075e-06, "loss": 0.0094, "step": 48550 }, { "epoch": 0.8200763332995575, "grad_norm": 0.5633267164230347, "learning_rate": 7.346786692790281e-06, "loss": 0.0163, "step": 48560 }, { "epoch": 0.8202452122808795, "grad_norm": 0.21212537586688995, "learning_rate": 7.345485257451411e-06, "loss": 0.0101, "step": 48570 }, { "epoch": 0.8204140912622016, "grad_norm": 0.5838257670402527, "learning_rate": 7.344183618342528e-06, "loss": 0.013, "step": 48580 }, { "epoch": 0.8205829702435234, "grad_norm": 0.2986941635608673, "learning_rate": 7.342881775576718e-06, "loss": 0.0111, "step": 48590 }, { "epoch": 0.8207518492248455, "grad_norm": 0.5554220080375671, "learning_rate": 7.341579729267079e-06, "loss": 0.0117, "step": 48600 }, { "epoch": 0.8209207282061675, "grad_norm": 0.2700957953929901, "learning_rate": 7.340277479526732e-06, "loss": 0.0162, "step": 48610 }, { "epoch": 0.8210896071874895, "grad_norm": 0.2669998109340668, "learning_rate": 7.338975026468811e-06, "loss": 0.0101, "step": 48620 }, { "epoch": 0.8212584861688115, "grad_norm": 0.5584315061569214, "learning_rate": 7.337672370206472e-06, "loss": 0.0174, "step": 48630 }, { "epoch": 0.8214273651501334, "grad_norm": 0.33408522605895996, "learning_rate": 7.336369510852885e-06, "loss": 0.01, "step": 48640 }, { "epoch": 0.8215962441314554, "grad_norm": 0.3170502185821533, "learning_rate": 7.33506644852124e-06, "loss": 0.0124, "step": 48650 }, { "epoch": 0.8217651231127774, "grad_norm": 0.23492203652858734, "learning_rate": 7.333763183324744e-06, "loss": 0.0115, "step": 48660 }, { "epoch": 0.8219340020940994, "grad_norm": 0.21127574145793915, "learning_rate": 7.332459715376621e-06, "loss": 0.0098, "step": 48670 }, { "epoch": 0.8221028810754214, "grad_norm": 0.48657092452049255, "learning_rate": 7.331156044790113e-06, "loss": 0.0121, "step": 48680 }, { "epoch": 0.8222717600567433, "grad_norm": 0.4317375123500824, "learning_rate": 7.3298521716784795e-06, "loss": 0.0109, "step": 48690 }, { "epoch": 0.8224406390380653, "grad_norm": 0.09636252373456955, "learning_rate": 7.3285480961550005e-06, "loss": 0.0144, "step": 48700 }, { "epoch": 0.8226095180193873, "grad_norm": 0.442411333322525, "learning_rate": 7.327243818332966e-06, "loss": 0.01, "step": 48710 }, { "epoch": 0.8227783970007093, "grad_norm": 0.30596986413002014, "learning_rate": 7.325939338325694e-06, "loss": 0.0113, "step": 48720 }, { "epoch": 0.8229472759820313, "grad_norm": 0.18639680743217468, "learning_rate": 7.32463465624651e-06, "loss": 0.0094, "step": 48730 }, { "epoch": 0.8231161549633532, "grad_norm": 0.3578716516494751, "learning_rate": 7.323329772208765e-06, "loss": 0.0118, "step": 48740 }, { "epoch": 0.8232850339446752, "grad_norm": 0.5037715435028076, "learning_rate": 7.32202468632582e-06, "loss": 0.0122, "step": 48750 }, { "epoch": 0.8234539129259972, "grad_norm": 0.22997355461120605, "learning_rate": 7.320719398711062e-06, "loss": 0.0165, "step": 48760 }, { "epoch": 0.8236227919073192, "grad_norm": 0.2667289972305298, "learning_rate": 7.319413909477887e-06, "loss": 0.0126, "step": 48770 }, { "epoch": 0.8237916708886412, "grad_norm": 0.26385802030563354, "learning_rate": 7.318108218739717e-06, "loss": 0.0104, "step": 48780 }, { "epoch": 0.8239605498699631, "grad_norm": 0.211903378367424, "learning_rate": 7.316802326609984e-06, "loss": 0.012, "step": 48790 }, { "epoch": 0.8241294288512852, "grad_norm": 0.42366257309913635, "learning_rate": 7.315496233202144e-06, "loss": 0.0104, "step": 48800 }, { "epoch": 0.8242983078326072, "grad_norm": 0.22645661234855652, "learning_rate": 7.314189938629663e-06, "loss": 0.0074, "step": 48810 }, { "epoch": 0.8244671868139292, "grad_norm": 0.372387558221817, "learning_rate": 7.312883443006031e-06, "loss": 0.0143, "step": 48820 }, { "epoch": 0.8246360657952512, "grad_norm": 0.3257629871368408, "learning_rate": 7.311576746444753e-06, "loss": 0.0104, "step": 48830 }, { "epoch": 0.8248049447765731, "grad_norm": 0.645352303981781, "learning_rate": 7.310269849059353e-06, "loss": 0.0097, "step": 48840 }, { "epoch": 0.8249738237578951, "grad_norm": 0.3609197735786438, "learning_rate": 7.308962750963367e-06, "loss": 0.0103, "step": 48850 }, { "epoch": 0.8251427027392171, "grad_norm": 0.2167227864265442, "learning_rate": 7.307655452270358e-06, "loss": 0.01, "step": 48860 }, { "epoch": 0.8253115817205391, "grad_norm": 0.18334923684597015, "learning_rate": 7.306347953093895e-06, "loss": 0.0087, "step": 48870 }, { "epoch": 0.8254804607018611, "grad_norm": 0.41448765993118286, "learning_rate": 7.305040253547575e-06, "loss": 0.0108, "step": 48880 }, { "epoch": 0.825649339683183, "grad_norm": 0.28184929490089417, "learning_rate": 7.303732353745005e-06, "loss": 0.0101, "step": 48890 }, { "epoch": 0.825818218664505, "grad_norm": 0.12955208122730255, "learning_rate": 7.302424253799815e-06, "loss": 0.0124, "step": 48900 }, { "epoch": 0.825987097645827, "grad_norm": 0.3117975890636444, "learning_rate": 7.301115953825646e-06, "loss": 0.0096, "step": 48910 }, { "epoch": 0.826155976627149, "grad_norm": 0.22284336388111115, "learning_rate": 7.299807453936162e-06, "loss": 0.0117, "step": 48920 }, { "epoch": 0.826324855608471, "grad_norm": 0.23615577816963196, "learning_rate": 7.2984987542450416e-06, "loss": 0.0067, "step": 48930 }, { "epoch": 0.8264937345897929, "grad_norm": 0.188150554895401, "learning_rate": 7.29718985486598e-06, "loss": 0.0122, "step": 48940 }, { "epoch": 0.8266626135711149, "grad_norm": 0.2177470326423645, "learning_rate": 7.295880755912695e-06, "loss": 0.0065, "step": 48950 }, { "epoch": 0.8268314925524369, "grad_norm": 0.3513130247592926, "learning_rate": 7.294571457498916e-06, "loss": 0.0083, "step": 48960 }, { "epoch": 0.8270003715337589, "grad_norm": 0.2787139117717743, "learning_rate": 7.293261959738389e-06, "loss": 0.0116, "step": 48970 }, { "epoch": 0.827169250515081, "grad_norm": 0.1704135537147522, "learning_rate": 7.291952262744885e-06, "loss": 0.0122, "step": 48980 }, { "epoch": 0.8273381294964028, "grad_norm": 0.13351117074489594, "learning_rate": 7.290642366632184e-06, "loss": 0.0111, "step": 48990 }, { "epoch": 0.8275070084777248, "grad_norm": 0.31300047039985657, "learning_rate": 7.289332271514084e-06, "loss": 0.0094, "step": 49000 }, { "epoch": 0.8276758874590469, "grad_norm": 0.2866195738315582, "learning_rate": 7.288021977504409e-06, "loss": 0.0097, "step": 49010 }, { "epoch": 0.8278447664403689, "grad_norm": 0.38986867666244507, "learning_rate": 7.28671148471699e-06, "loss": 0.0065, "step": 49020 }, { "epoch": 0.8280136454216909, "grad_norm": 0.18540726602077484, "learning_rate": 7.2854007932656824e-06, "loss": 0.0105, "step": 49030 }, { "epoch": 0.8281825244030128, "grad_norm": 0.4095011353492737, "learning_rate": 7.284089903264353e-06, "loss": 0.0158, "step": 49040 }, { "epoch": 0.8283514033843348, "grad_norm": 0.37693873047828674, "learning_rate": 7.282778814826888e-06, "loss": 0.0106, "step": 49050 }, { "epoch": 0.8285202823656568, "grad_norm": 0.08987434208393097, "learning_rate": 7.281467528067194e-06, "loss": 0.011, "step": 49060 }, { "epoch": 0.8286891613469788, "grad_norm": 0.38353800773620605, "learning_rate": 7.280156043099193e-06, "loss": 0.0082, "step": 49070 }, { "epoch": 0.8288580403283008, "grad_norm": 0.11786156892776489, "learning_rate": 7.278844360036821e-06, "loss": 0.0125, "step": 49080 }, { "epoch": 0.8290269193096227, "grad_norm": 0.2757147252559662, "learning_rate": 7.277532478994035e-06, "loss": 0.0085, "step": 49090 }, { "epoch": 0.8291957982909447, "grad_norm": 0.3113984167575836, "learning_rate": 7.276220400084809e-06, "loss": 0.0118, "step": 49100 }, { "epoch": 0.8293646772722667, "grad_norm": 0.23617778718471527, "learning_rate": 7.2749081234231314e-06, "loss": 0.0111, "step": 49110 }, { "epoch": 0.8295335562535887, "grad_norm": 0.40260595083236694, "learning_rate": 7.273595649123009e-06, "loss": 0.0192, "step": 49120 }, { "epoch": 0.8297024352349107, "grad_norm": 0.20891237258911133, "learning_rate": 7.27228297729847e-06, "loss": 0.0074, "step": 49130 }, { "epoch": 0.8298713142162326, "grad_norm": 0.14783404767513275, "learning_rate": 7.270970108063553e-06, "loss": 0.0091, "step": 49140 }, { "epoch": 0.8300401931975546, "grad_norm": 0.24090658128261566, "learning_rate": 7.269657041532319e-06, "loss": 0.0097, "step": 49150 }, { "epoch": 0.8302090721788766, "grad_norm": 0.20241080224514008, "learning_rate": 7.2683437778188405e-06, "loss": 0.0083, "step": 49160 }, { "epoch": 0.8303779511601986, "grad_norm": 0.1771409511566162, "learning_rate": 7.267030317037216e-06, "loss": 0.0086, "step": 49170 }, { "epoch": 0.8305468301415205, "grad_norm": 0.5879583954811096, "learning_rate": 7.265716659301549e-06, "loss": 0.0101, "step": 49180 }, { "epoch": 0.8307157091228425, "grad_norm": 0.5112467408180237, "learning_rate": 7.2644028047259715e-06, "loss": 0.012, "step": 49190 }, { "epoch": 0.8308845881041645, "grad_norm": 0.220084547996521, "learning_rate": 7.263088753424629e-06, "loss": 0.0141, "step": 49200 }, { "epoch": 0.8310534670854866, "grad_norm": 0.3222436010837555, "learning_rate": 7.26177450551168e-06, "loss": 0.0124, "step": 49210 }, { "epoch": 0.8312223460668086, "grad_norm": 0.4545641541481018, "learning_rate": 7.2604600611013035e-06, "loss": 0.0082, "step": 49220 }, { "epoch": 0.8313912250481305, "grad_norm": 0.24043269455432892, "learning_rate": 7.259145420307696e-06, "loss": 0.0133, "step": 49230 }, { "epoch": 0.8315601040294525, "grad_norm": 0.29840970039367676, "learning_rate": 7.25783058324507e-06, "loss": 0.0081, "step": 49240 }, { "epoch": 0.8317289830107745, "grad_norm": 0.3654631972312927, "learning_rate": 7.256515550027656e-06, "loss": 0.0141, "step": 49250 }, { "epoch": 0.8318978619920965, "grad_norm": 0.15256842970848083, "learning_rate": 7.2552003207697e-06, "loss": 0.0089, "step": 49260 }, { "epoch": 0.8320667409734185, "grad_norm": 0.5311442017555237, "learning_rate": 7.253884895585467e-06, "loss": 0.0098, "step": 49270 }, { "epoch": 0.8322356199547404, "grad_norm": 0.23457443714141846, "learning_rate": 7.252569274589236e-06, "loss": 0.0131, "step": 49280 }, { "epoch": 0.8324044989360624, "grad_norm": 0.2361890971660614, "learning_rate": 7.251253457895307e-06, "loss": 0.012, "step": 49290 }, { "epoch": 0.8325733779173844, "grad_norm": 0.30715569853782654, "learning_rate": 7.2499374456179936e-06, "loss": 0.0104, "step": 49300 }, { "epoch": 0.8327422568987064, "grad_norm": 0.21395015716552734, "learning_rate": 7.248621237871627e-06, "loss": 0.013, "step": 49310 }, { "epoch": 0.8329111358800284, "grad_norm": 0.2125607281923294, "learning_rate": 7.247304834770559e-06, "loss": 0.0143, "step": 49320 }, { "epoch": 0.8330800148613503, "grad_norm": 0.31636691093444824, "learning_rate": 7.245988236429153e-06, "loss": 0.0176, "step": 49330 }, { "epoch": 0.8332488938426723, "grad_norm": 0.2558319866657257, "learning_rate": 7.244671442961792e-06, "loss": 0.0124, "step": 49340 }, { "epoch": 0.8334177728239943, "grad_norm": 0.269485741853714, "learning_rate": 7.2433544544828775e-06, "loss": 0.0077, "step": 49350 }, { "epoch": 0.8335866518053163, "grad_norm": 0.45351746678352356, "learning_rate": 7.242037271106824e-06, "loss": 0.0111, "step": 49360 }, { "epoch": 0.8337555307866383, "grad_norm": 0.25033092498779297, "learning_rate": 7.240719892948066e-06, "loss": 0.0073, "step": 49370 }, { "epoch": 0.8339244097679602, "grad_norm": 0.3289177715778351, "learning_rate": 7.239402320121055e-06, "loss": 0.011, "step": 49380 }, { "epoch": 0.8340932887492822, "grad_norm": 0.22757065296173096, "learning_rate": 7.238084552740259e-06, "loss": 0.0156, "step": 49390 }, { "epoch": 0.8342621677306042, "grad_norm": 0.24655917286872864, "learning_rate": 7.236766590920159e-06, "loss": 0.0082, "step": 49400 }, { "epoch": 0.8344310467119262, "grad_norm": 0.510832667350769, "learning_rate": 7.23544843477526e-06, "loss": 0.0113, "step": 49410 }, { "epoch": 0.8345999256932483, "grad_norm": 0.13807697594165802, "learning_rate": 7.234130084420077e-06, "loss": 0.0084, "step": 49420 }, { "epoch": 0.8347688046745702, "grad_norm": 0.5109137296676636, "learning_rate": 7.232811539969148e-06, "loss": 0.0109, "step": 49430 }, { "epoch": 0.8349376836558922, "grad_norm": 0.3535758852958679, "learning_rate": 7.231492801537024e-06, "loss": 0.0117, "step": 49440 }, { "epoch": 0.8351065626372142, "grad_norm": 0.4742215573787689, "learning_rate": 7.230173869238273e-06, "loss": 0.0105, "step": 49450 }, { "epoch": 0.8352754416185362, "grad_norm": 0.256629079580307, "learning_rate": 7.22885474318748e-06, "loss": 0.0135, "step": 49460 }, { "epoch": 0.8354443205998582, "grad_norm": 0.17238986492156982, "learning_rate": 7.22753542349925e-06, "loss": 0.0123, "step": 49470 }, { "epoch": 0.8356131995811801, "grad_norm": 0.37828493118286133, "learning_rate": 7.2262159102881994e-06, "loss": 0.0088, "step": 49480 }, { "epoch": 0.8357820785625021, "grad_norm": 0.2372480183839798, "learning_rate": 7.224896203668965e-06, "loss": 0.0126, "step": 49490 }, { "epoch": 0.8359509575438241, "grad_norm": 0.33564499020576477, "learning_rate": 7.223576303756201e-06, "loss": 0.0093, "step": 49500 }, { "epoch": 0.8361198365251461, "grad_norm": 0.19591443240642548, "learning_rate": 7.2222562106645774e-06, "loss": 0.0097, "step": 49510 }, { "epoch": 0.8362887155064681, "grad_norm": 0.35409364104270935, "learning_rate": 7.22093592450878e-06, "loss": 0.0115, "step": 49520 }, { "epoch": 0.83645759448779, "grad_norm": 0.27126240730285645, "learning_rate": 7.219615445403511e-06, "loss": 0.0106, "step": 49530 }, { "epoch": 0.836626473469112, "grad_norm": 0.1879270225763321, "learning_rate": 7.218294773463489e-06, "loss": 0.0115, "step": 49540 }, { "epoch": 0.836795352450434, "grad_norm": 0.3020114302635193, "learning_rate": 7.216973908803455e-06, "loss": 0.007, "step": 49550 }, { "epoch": 0.836964231431756, "grad_norm": 0.19635477662086487, "learning_rate": 7.2156528515381585e-06, "loss": 0.0148, "step": 49560 }, { "epoch": 0.837133110413078, "grad_norm": 0.24270090460777283, "learning_rate": 7.214331601782372e-06, "loss": 0.0094, "step": 49570 }, { "epoch": 0.8373019893943999, "grad_norm": 0.2965719699859619, "learning_rate": 7.2130101596508825e-06, "loss": 0.0119, "step": 49580 }, { "epoch": 0.8374708683757219, "grad_norm": 0.31389865279197693, "learning_rate": 7.211688525258493e-06, "loss": 0.0132, "step": 49590 }, { "epoch": 0.8376397473570439, "grad_norm": 0.2346101999282837, "learning_rate": 7.2103666987200235e-06, "loss": 0.0101, "step": 49600 }, { "epoch": 0.837808626338366, "grad_norm": 0.3203684091567993, "learning_rate": 7.209044680150311e-06, "loss": 0.0107, "step": 49610 }, { "epoch": 0.837977505319688, "grad_norm": 0.5008103251457214, "learning_rate": 7.207722469664212e-06, "loss": 0.0115, "step": 49620 }, { "epoch": 0.8381463843010099, "grad_norm": 0.23891079425811768, "learning_rate": 7.206400067376591e-06, "loss": 0.0121, "step": 49630 }, { "epoch": 0.8383152632823319, "grad_norm": 0.35482126474380493, "learning_rate": 7.205077473402342e-06, "loss": 0.0085, "step": 49640 }, { "epoch": 0.8384841422636539, "grad_norm": 0.22445128858089447, "learning_rate": 7.203754687856364e-06, "loss": 0.0128, "step": 49650 }, { "epoch": 0.8386530212449759, "grad_norm": 0.3131442368030548, "learning_rate": 7.202431710853578e-06, "loss": 0.0093, "step": 49660 }, { "epoch": 0.8388219002262979, "grad_norm": 0.15640543401241302, "learning_rate": 7.201108542508922e-06, "loss": 0.0134, "step": 49670 }, { "epoch": 0.8389907792076198, "grad_norm": 0.19255052506923676, "learning_rate": 7.19978518293735e-06, "loss": 0.0089, "step": 49680 }, { "epoch": 0.8391596581889418, "grad_norm": 0.34956467151641846, "learning_rate": 7.1984616322538316e-06, "loss": 0.0138, "step": 49690 }, { "epoch": 0.8393285371702638, "grad_norm": 0.3877204954624176, "learning_rate": 7.197137890573353e-06, "loss": 0.0117, "step": 49700 }, { "epoch": 0.8394974161515858, "grad_norm": 0.2066309005022049, "learning_rate": 7.19581395801092e-06, "loss": 0.01, "step": 49710 }, { "epoch": 0.8396662951329078, "grad_norm": 0.3197518289089203, "learning_rate": 7.194489834681549e-06, "loss": 0.012, "step": 49720 }, { "epoch": 0.8398351741142297, "grad_norm": 0.3019656538963318, "learning_rate": 7.193165520700278e-06, "loss": 0.0085, "step": 49730 }, { "epoch": 0.8400040530955517, "grad_norm": 0.5292072296142578, "learning_rate": 7.191841016182163e-06, "loss": 0.0146, "step": 49740 }, { "epoch": 0.8401729320768737, "grad_norm": 0.3967720866203308, "learning_rate": 7.1905163212422695e-06, "loss": 0.0115, "step": 49750 }, { "epoch": 0.8403418110581957, "grad_norm": 0.42259615659713745, "learning_rate": 7.189191435995686e-06, "loss": 0.0144, "step": 49760 }, { "epoch": 0.8405106900395177, "grad_norm": 0.549751341342926, "learning_rate": 7.187866360557515e-06, "loss": 0.0135, "step": 49770 }, { "epoch": 0.8406795690208396, "grad_norm": 0.24115151166915894, "learning_rate": 7.186541095042876e-06, "loss": 0.0123, "step": 49780 }, { "epoch": 0.8408484480021616, "grad_norm": 0.36848536133766174, "learning_rate": 7.185215639566904e-06, "loss": 0.0081, "step": 49790 }, { "epoch": 0.8410173269834836, "grad_norm": 0.4316912293434143, "learning_rate": 7.1838899942447526e-06, "loss": 0.0129, "step": 49800 }, { "epoch": 0.8411862059648056, "grad_norm": 0.3462052345275879, "learning_rate": 7.1825641591915884e-06, "loss": 0.0124, "step": 49810 }, { "epoch": 0.8413550849461277, "grad_norm": 0.2713283598423004, "learning_rate": 7.1812381345226e-06, "loss": 0.0142, "step": 49820 }, { "epoch": 0.8415239639274495, "grad_norm": 0.23062407970428467, "learning_rate": 7.179911920352986e-06, "loss": 0.0156, "step": 49830 }, { "epoch": 0.8416928429087716, "grad_norm": 0.4534553289413452, "learning_rate": 7.178585516797966e-06, "loss": 0.0113, "step": 49840 }, { "epoch": 0.8418617218900936, "grad_norm": 0.5315342545509338, "learning_rate": 7.177258923972774e-06, "loss": 0.0124, "step": 49850 }, { "epoch": 0.8420306008714156, "grad_norm": 0.24293367564678192, "learning_rate": 7.175932141992664e-06, "loss": 0.012, "step": 49860 }, { "epoch": 0.8421994798527376, "grad_norm": 0.33605802059173584, "learning_rate": 7.1746051709729e-06, "loss": 0.0091, "step": 49870 }, { "epoch": 0.8423683588340595, "grad_norm": 0.3517448306083679, "learning_rate": 7.173278011028768e-06, "loss": 0.0133, "step": 49880 }, { "epoch": 0.8425372378153815, "grad_norm": 0.3754367232322693, "learning_rate": 7.171950662275566e-06, "loss": 0.0099, "step": 49890 }, { "epoch": 0.8427061167967035, "grad_norm": 0.25337114930152893, "learning_rate": 7.170623124828615e-06, "loss": 0.0138, "step": 49900 }, { "epoch": 0.8428749957780255, "grad_norm": 0.33860260248184204, "learning_rate": 7.169295398803244e-06, "loss": 0.0132, "step": 49910 }, { "epoch": 0.8430438747593475, "grad_norm": 0.11629952490329742, "learning_rate": 7.167967484314805e-06, "loss": 0.0173, "step": 49920 }, { "epoch": 0.8432127537406694, "grad_norm": 0.22529323399066925, "learning_rate": 7.166639381478663e-06, "loss": 0.0127, "step": 49930 }, { "epoch": 0.8433816327219914, "grad_norm": 0.22350895404815674, "learning_rate": 7.165311090410201e-06, "loss": 0.011, "step": 49940 }, { "epoch": 0.8435505117033134, "grad_norm": 0.1290215104818344, "learning_rate": 7.163982611224818e-06, "loss": 0.0077, "step": 49950 }, { "epoch": 0.8437193906846354, "grad_norm": 0.39876091480255127, "learning_rate": 7.162653944037926e-06, "loss": 0.0098, "step": 49960 }, { "epoch": 0.8438882696659574, "grad_norm": 0.05520620197057724, "learning_rate": 7.16132508896496e-06, "loss": 0.007, "step": 49970 }, { "epoch": 0.8440571486472793, "grad_norm": 1.0129237174987793, "learning_rate": 7.159996046121367e-06, "loss": 0.0123, "step": 49980 }, { "epoch": 0.8442260276286013, "grad_norm": 0.3056694269180298, "learning_rate": 7.158666815622609e-06, "loss": 0.0087, "step": 49990 }, { "epoch": 0.8443949066099233, "grad_norm": 0.3491649329662323, "learning_rate": 7.15733739758417e-06, "loss": 0.0108, "step": 50000 }, { "epoch": 0.8445637855912453, "grad_norm": 0.23708440363407135, "learning_rate": 7.156007792121541e-06, "loss": 0.0124, "step": 50010 }, { "epoch": 0.8447326645725673, "grad_norm": 0.26683226227760315, "learning_rate": 7.1546779993502394e-06, "loss": 0.0084, "step": 50020 }, { "epoch": 0.8449015435538892, "grad_norm": 0.19313305616378784, "learning_rate": 7.153348019385793e-06, "loss": 0.0079, "step": 50030 }, { "epoch": 0.8450704225352113, "grad_norm": 0.34487003087997437, "learning_rate": 7.1520178523437466e-06, "loss": 0.0113, "step": 50040 }, { "epoch": 0.8452393015165333, "grad_norm": 0.23905311524868011, "learning_rate": 7.150687498339664e-06, "loss": 0.0096, "step": 50050 }, { "epoch": 0.8454081804978553, "grad_norm": 0.46409913897514343, "learning_rate": 7.149356957489119e-06, "loss": 0.0137, "step": 50060 }, { "epoch": 0.8455770594791773, "grad_norm": 0.15785667300224304, "learning_rate": 7.148026229907712e-06, "loss": 0.0086, "step": 50070 }, { "epoch": 0.8457459384604992, "grad_norm": 0.5575969219207764, "learning_rate": 7.1466953157110475e-06, "loss": 0.0139, "step": 50080 }, { "epoch": 0.8459148174418212, "grad_norm": 0.5088991522789001, "learning_rate": 7.145364215014754e-06, "loss": 0.0106, "step": 50090 }, { "epoch": 0.8460836964231432, "grad_norm": 0.2836278975009918, "learning_rate": 7.144032927934473e-06, "loss": 0.0159, "step": 50100 }, { "epoch": 0.8462525754044652, "grad_norm": 0.2171567976474762, "learning_rate": 7.142701454585868e-06, "loss": 0.0077, "step": 50110 }, { "epoch": 0.8464214543857872, "grad_norm": 0.3346119821071625, "learning_rate": 7.14136979508461e-06, "loss": 0.0102, "step": 50120 }, { "epoch": 0.8465903333671091, "grad_norm": 0.45463064312934875, "learning_rate": 7.140037949546391e-06, "loss": 0.007, "step": 50130 }, { "epoch": 0.8467592123484311, "grad_norm": 0.27735090255737305, "learning_rate": 7.138705918086919e-06, "loss": 0.0086, "step": 50140 }, { "epoch": 0.8469280913297531, "grad_norm": 0.2806031107902527, "learning_rate": 7.137373700821917e-06, "loss": 0.0143, "step": 50150 }, { "epoch": 0.8470969703110751, "grad_norm": 0.559666633605957, "learning_rate": 7.136041297867125e-06, "loss": 0.0133, "step": 50160 }, { "epoch": 0.8472658492923971, "grad_norm": 0.41439804434776306, "learning_rate": 7.1347087093383e-06, "loss": 0.0142, "step": 50170 }, { "epoch": 0.847434728273719, "grad_norm": 0.09381654113531113, "learning_rate": 7.133375935351212e-06, "loss": 0.0146, "step": 50180 }, { "epoch": 0.847603607255041, "grad_norm": 0.26791006326675415, "learning_rate": 7.1320429760216515e-06, "loss": 0.0146, "step": 50190 }, { "epoch": 0.847772486236363, "grad_norm": 0.42284485697746277, "learning_rate": 7.130709831465421e-06, "loss": 0.0108, "step": 50200 }, { "epoch": 0.847941365217685, "grad_norm": 0.5261491537094116, "learning_rate": 7.129376501798342e-06, "loss": 0.0093, "step": 50210 }, { "epoch": 0.848110244199007, "grad_norm": 0.1567818522453308, "learning_rate": 7.128042987136249e-06, "loss": 0.0128, "step": 50220 }, { "epoch": 0.8482791231803289, "grad_norm": 0.23915539681911469, "learning_rate": 7.126709287594998e-06, "loss": 0.01, "step": 50230 }, { "epoch": 0.848448002161651, "grad_norm": 0.3904612362384796, "learning_rate": 7.125375403290453e-06, "loss": 0.0094, "step": 50240 }, { "epoch": 0.848616881142973, "grad_norm": 0.27531859278678894, "learning_rate": 7.124041334338503e-06, "loss": 0.0107, "step": 50250 }, { "epoch": 0.848785760124295, "grad_norm": 0.11921681463718414, "learning_rate": 7.122707080855045e-06, "loss": 0.0101, "step": 50260 }, { "epoch": 0.8489546391056169, "grad_norm": 0.4196297824382782, "learning_rate": 7.121372642955997e-06, "loss": 0.0096, "step": 50270 }, { "epoch": 0.8491235180869389, "grad_norm": 0.3332277536392212, "learning_rate": 7.120038020757291e-06, "loss": 0.0094, "step": 50280 }, { "epoch": 0.8492923970682609, "grad_norm": 0.13963796198368073, "learning_rate": 7.11870321437488e-06, "loss": 0.0059, "step": 50290 }, { "epoch": 0.8494612760495829, "grad_norm": 0.20924659073352814, "learning_rate": 7.117368223924723e-06, "loss": 0.0077, "step": 50300 }, { "epoch": 0.8496301550309049, "grad_norm": 0.4965740144252777, "learning_rate": 7.116033049522805e-06, "loss": 0.017, "step": 50310 }, { "epoch": 0.8497990340122268, "grad_norm": 0.12196065485477448, "learning_rate": 7.11469769128512e-06, "loss": 0.0099, "step": 50320 }, { "epoch": 0.8499679129935488, "grad_norm": 0.3070014715194702, "learning_rate": 7.113362149327679e-06, "loss": 0.013, "step": 50330 }, { "epoch": 0.8501367919748708, "grad_norm": 0.1305653154850006, "learning_rate": 7.1120264237665145e-06, "loss": 0.012, "step": 50340 }, { "epoch": 0.8503056709561928, "grad_norm": 0.4352061152458191, "learning_rate": 7.110690514717671e-06, "loss": 0.0135, "step": 50350 }, { "epoch": 0.8504745499375148, "grad_norm": 0.22891291975975037, "learning_rate": 7.109354422297205e-06, "loss": 0.008, "step": 50360 }, { "epoch": 0.8506434289188367, "grad_norm": 0.29207056760787964, "learning_rate": 7.108018146621199e-06, "loss": 0.0106, "step": 50370 }, { "epoch": 0.8508123079001587, "grad_norm": 0.45374301075935364, "learning_rate": 7.106681687805739e-06, "loss": 0.0102, "step": 50380 }, { "epoch": 0.8509811868814807, "grad_norm": 0.36724525690078735, "learning_rate": 7.105345045966936e-06, "loss": 0.0098, "step": 50390 }, { "epoch": 0.8511500658628027, "grad_norm": 0.20941297709941864, "learning_rate": 7.104008221220915e-06, "loss": 0.009, "step": 50400 }, { "epoch": 0.8513189448441247, "grad_norm": 0.16001488268375397, "learning_rate": 7.102671213683816e-06, "loss": 0.0106, "step": 50410 }, { "epoch": 0.8514878238254466, "grad_norm": 0.2590724229812622, "learning_rate": 7.101334023471794e-06, "loss": 0.0087, "step": 50420 }, { "epoch": 0.8516567028067686, "grad_norm": 0.2971196472644806, "learning_rate": 7.099996650701022e-06, "loss": 0.009, "step": 50430 }, { "epoch": 0.8518255817880906, "grad_norm": 0.372676819562912, "learning_rate": 7.098659095487686e-06, "loss": 0.0135, "step": 50440 }, { "epoch": 0.8519944607694127, "grad_norm": 0.27818238735198975, "learning_rate": 7.097321357947988e-06, "loss": 0.0127, "step": 50450 }, { "epoch": 0.8521633397507347, "grad_norm": 0.27883201837539673, "learning_rate": 7.095983438198152e-06, "loss": 0.0078, "step": 50460 }, { "epoch": 0.8523322187320566, "grad_norm": 0.22752515971660614, "learning_rate": 7.0946453363544095e-06, "loss": 0.0093, "step": 50470 }, { "epoch": 0.8525010977133786, "grad_norm": 0.29509738087654114, "learning_rate": 7.093307052533015e-06, "loss": 0.0151, "step": 50480 }, { "epoch": 0.8526699766947006, "grad_norm": 0.09431392699480057, "learning_rate": 7.0919685868502316e-06, "loss": 0.0104, "step": 50490 }, { "epoch": 0.8528388556760226, "grad_norm": 0.24628645181655884, "learning_rate": 7.090629939422344e-06, "loss": 0.0068, "step": 50500 }, { "epoch": 0.8530077346573446, "grad_norm": 0.3649636507034302, "learning_rate": 7.089291110365649e-06, "loss": 0.0084, "step": 50510 }, { "epoch": 0.8531766136386665, "grad_norm": 0.32526859641075134, "learning_rate": 7.087952099796463e-06, "loss": 0.0116, "step": 50520 }, { "epoch": 0.8533454926199885, "grad_norm": 0.23177547752857208, "learning_rate": 7.086612907831115e-06, "loss": 0.0108, "step": 50530 }, { "epoch": 0.8535143716013105, "grad_norm": 0.6516799926757812, "learning_rate": 7.08527353458595e-06, "loss": 0.0151, "step": 50540 }, { "epoch": 0.8536832505826325, "grad_norm": 0.29506900906562805, "learning_rate": 7.083933980177331e-06, "loss": 0.011, "step": 50550 }, { "epoch": 0.8538521295639545, "grad_norm": 0.22634294629096985, "learning_rate": 7.082594244721632e-06, "loss": 0.0115, "step": 50560 }, { "epoch": 0.8540210085452764, "grad_norm": 0.6820416450500488, "learning_rate": 7.0812543283352495e-06, "loss": 0.0149, "step": 50570 }, { "epoch": 0.8541898875265984, "grad_norm": 0.2205008566379547, "learning_rate": 7.079914231134593e-06, "loss": 0.0154, "step": 50580 }, { "epoch": 0.8543587665079204, "grad_norm": 0.4361005127429962, "learning_rate": 7.078573953236081e-06, "loss": 0.0144, "step": 50590 }, { "epoch": 0.8545276454892424, "grad_norm": 0.17876869440078735, "learning_rate": 7.077233494756159e-06, "loss": 0.0114, "step": 50600 }, { "epoch": 0.8546965244705644, "grad_norm": 0.42185938358306885, "learning_rate": 7.075892855811282e-06, "loss": 0.0127, "step": 50610 }, { "epoch": 0.8548654034518863, "grad_norm": 0.3601848781108856, "learning_rate": 7.0745520365179175e-06, "loss": 0.0115, "step": 50620 }, { "epoch": 0.8550342824332083, "grad_norm": 0.09000282734632492, "learning_rate": 7.0732110369925576e-06, "loss": 0.0124, "step": 50630 }, { "epoch": 0.8552031614145303, "grad_norm": 0.21109887957572937, "learning_rate": 7.071869857351703e-06, "loss": 0.0078, "step": 50640 }, { "epoch": 0.8553720403958524, "grad_norm": 0.2468239963054657, "learning_rate": 7.07052849771187e-06, "loss": 0.0072, "step": 50650 }, { "epoch": 0.8555409193771744, "grad_norm": 0.4021511971950531, "learning_rate": 7.069186958189598e-06, "loss": 0.0121, "step": 50660 }, { "epoch": 0.8557097983584963, "grad_norm": 0.13269954919815063, "learning_rate": 7.067845238901431e-06, "loss": 0.0112, "step": 50670 }, { "epoch": 0.8558786773398183, "grad_norm": 0.18667657673358917, "learning_rate": 7.0665033399639374e-06, "loss": 0.0131, "step": 50680 }, { "epoch": 0.8560475563211403, "grad_norm": 0.38233545422554016, "learning_rate": 7.065161261493696e-06, "loss": 0.0178, "step": 50690 }, { "epoch": 0.8562164353024623, "grad_norm": 0.17426705360412598, "learning_rate": 7.063819003607307e-06, "loss": 0.0118, "step": 50700 }, { "epoch": 0.8563853142837843, "grad_norm": 0.39646196365356445, "learning_rate": 7.062476566421379e-06, "loss": 0.0118, "step": 50710 }, { "epoch": 0.8565541932651062, "grad_norm": 0.1731969714164734, "learning_rate": 7.061133950052541e-06, "loss": 0.0108, "step": 50720 }, { "epoch": 0.8567230722464282, "grad_norm": 0.3210983872413635, "learning_rate": 7.059791154617435e-06, "loss": 0.0085, "step": 50730 }, { "epoch": 0.8568919512277502, "grad_norm": 0.7255094051361084, "learning_rate": 7.0584481802327205e-06, "loss": 0.0228, "step": 50740 }, { "epoch": 0.8570608302090722, "grad_norm": 0.25606679916381836, "learning_rate": 7.057105027015073e-06, "loss": 0.0094, "step": 50750 }, { "epoch": 0.8572297091903942, "grad_norm": 0.3768792450428009, "learning_rate": 7.055761695081182e-06, "loss": 0.012, "step": 50760 }, { "epoch": 0.8573985881717161, "grad_norm": 0.15976956486701965, "learning_rate": 7.054418184547751e-06, "loss": 0.0073, "step": 50770 }, { "epoch": 0.8575674671530381, "grad_norm": 0.5532543659210205, "learning_rate": 7.0530744955315045e-06, "loss": 0.0218, "step": 50780 }, { "epoch": 0.8577363461343601, "grad_norm": 0.6278402209281921, "learning_rate": 7.051730628149175e-06, "loss": 0.0122, "step": 50790 }, { "epoch": 0.8579052251156821, "grad_norm": 0.37004944682121277, "learning_rate": 7.050386582517515e-06, "loss": 0.012, "step": 50800 }, { "epoch": 0.8580741040970041, "grad_norm": 0.2142086625099182, "learning_rate": 7.049042358753294e-06, "loss": 0.0134, "step": 50810 }, { "epoch": 0.858242983078326, "grad_norm": 0.44810599088668823, "learning_rate": 7.047697956973294e-06, "loss": 0.0091, "step": 50820 }, { "epoch": 0.858411862059648, "grad_norm": 0.12737995386123657, "learning_rate": 7.046353377294312e-06, "loss": 0.0094, "step": 50830 }, { "epoch": 0.85858074104097, "grad_norm": 0.4227154552936554, "learning_rate": 7.045008619833166e-06, "loss": 0.013, "step": 50840 }, { "epoch": 0.858749620022292, "grad_norm": 0.22896617650985718, "learning_rate": 7.043663684706679e-06, "loss": 0.0113, "step": 50850 }, { "epoch": 0.858918499003614, "grad_norm": 0.05993523448705673, "learning_rate": 7.042318572031699e-06, "loss": 0.0049, "step": 50860 }, { "epoch": 0.859087377984936, "grad_norm": 0.18441514670848846, "learning_rate": 7.040973281925088e-06, "loss": 0.0121, "step": 50870 }, { "epoch": 0.859256256966258, "grad_norm": 0.22831836342811584, "learning_rate": 7.039627814503718e-06, "loss": 0.0095, "step": 50880 }, { "epoch": 0.85942513594758, "grad_norm": 0.1525714099407196, "learning_rate": 7.03828216988448e-06, "loss": 0.015, "step": 50890 }, { "epoch": 0.859594014928902, "grad_norm": 0.24078714847564697, "learning_rate": 7.036936348184284e-06, "loss": 0.0166, "step": 50900 }, { "epoch": 0.859762893910224, "grad_norm": 0.3926060199737549, "learning_rate": 7.035590349520048e-06, "loss": 0.0135, "step": 50910 }, { "epoch": 0.8599317728915459, "grad_norm": 0.3900412321090698, "learning_rate": 7.034244174008711e-06, "loss": 0.0149, "step": 50920 }, { "epoch": 0.8601006518728679, "grad_norm": 0.2845335602760315, "learning_rate": 7.032897821767224e-06, "loss": 0.0137, "step": 50930 }, { "epoch": 0.8602695308541899, "grad_norm": 0.5991858243942261, "learning_rate": 7.0315512929125565e-06, "loss": 0.0157, "step": 50940 }, { "epoch": 0.8604384098355119, "grad_norm": 0.2093854546546936, "learning_rate": 7.030204587561691e-06, "loss": 0.0087, "step": 50950 }, { "epoch": 0.8606072888168339, "grad_norm": 0.1400182694196701, "learning_rate": 7.0288577058316245e-06, "loss": 0.0085, "step": 50960 }, { "epoch": 0.8607761677981558, "grad_norm": 0.45835134387016296, "learning_rate": 7.027510647839373e-06, "loss": 0.0112, "step": 50970 }, { "epoch": 0.8609450467794778, "grad_norm": 0.44570279121398926, "learning_rate": 7.026163413701964e-06, "loss": 0.0117, "step": 50980 }, { "epoch": 0.8611139257607998, "grad_norm": 0.8051631450653076, "learning_rate": 7.0248160035364435e-06, "loss": 0.011, "step": 50990 }, { "epoch": 0.8612828047421218, "grad_norm": 0.28104186058044434, "learning_rate": 7.0234684174598684e-06, "loss": 0.0104, "step": 51000 }, { "epoch": 0.8614516837234438, "grad_norm": 0.25669199228286743, "learning_rate": 7.022120655589317e-06, "loss": 0.0118, "step": 51010 }, { "epoch": 0.8616205627047657, "grad_norm": 0.18817421793937683, "learning_rate": 7.020772718041877e-06, "loss": 0.012, "step": 51020 }, { "epoch": 0.8617894416860877, "grad_norm": 0.3136294186115265, "learning_rate": 7.019424604934656e-06, "loss": 0.0118, "step": 51030 }, { "epoch": 0.8619583206674097, "grad_norm": 0.2801385819911957, "learning_rate": 7.018076316384772e-06, "loss": 0.0111, "step": 51040 }, { "epoch": 0.8621271996487317, "grad_norm": 0.5604478716850281, "learning_rate": 7.016727852509364e-06, "loss": 0.0072, "step": 51050 }, { "epoch": 0.8622960786300538, "grad_norm": 0.21349410712718964, "learning_rate": 7.015379213425581e-06, "loss": 0.0108, "step": 51060 }, { "epoch": 0.8624649576113756, "grad_norm": 0.2504017949104309, "learning_rate": 7.014030399250592e-06, "loss": 0.0082, "step": 51070 }, { "epoch": 0.8626338365926977, "grad_norm": 0.3527284562587738, "learning_rate": 7.012681410101575e-06, "loss": 0.0097, "step": 51080 }, { "epoch": 0.8628027155740197, "grad_norm": 0.1663636863231659, "learning_rate": 7.011332246095731e-06, "loss": 0.009, "step": 51090 }, { "epoch": 0.8629715945553417, "grad_norm": 0.4082401394844055, "learning_rate": 7.009982907350268e-06, "loss": 0.0091, "step": 51100 }, { "epoch": 0.8631404735366637, "grad_norm": 0.37768447399139404, "learning_rate": 7.0086333939824155e-06, "loss": 0.0125, "step": 51110 }, { "epoch": 0.8633093525179856, "grad_norm": 0.20673660933971405, "learning_rate": 7.007283706109417e-06, "loss": 0.0121, "step": 51120 }, { "epoch": 0.8634782314993076, "grad_norm": 0.1284267008304596, "learning_rate": 7.005933843848528e-06, "loss": 0.0158, "step": 51130 }, { "epoch": 0.8636471104806296, "grad_norm": 0.12101856619119644, "learning_rate": 7.004583807317022e-06, "loss": 0.0063, "step": 51140 }, { "epoch": 0.8638159894619516, "grad_norm": 0.160056471824646, "learning_rate": 7.003233596632188e-06, "loss": 0.008, "step": 51150 }, { "epoch": 0.8639848684432736, "grad_norm": 0.163528710603714, "learning_rate": 7.0018832119113245e-06, "loss": 0.0104, "step": 51160 }, { "epoch": 0.8641537474245955, "grad_norm": 0.2756273150444031, "learning_rate": 7.000532653271755e-06, "loss": 0.0079, "step": 51170 }, { "epoch": 0.8643226264059175, "grad_norm": 0.16724848747253418, "learning_rate": 6.999181920830811e-06, "loss": 0.0116, "step": 51180 }, { "epoch": 0.8644915053872395, "grad_norm": 0.2072349190711975, "learning_rate": 6.99783101470584e-06, "loss": 0.0114, "step": 51190 }, { "epoch": 0.8646603843685615, "grad_norm": 0.4107675850391388, "learning_rate": 6.996479935014205e-06, "loss": 0.0099, "step": 51200 }, { "epoch": 0.8648292633498835, "grad_norm": 0.5169203281402588, "learning_rate": 6.995128681873288e-06, "loss": 0.0115, "step": 51210 }, { "epoch": 0.8649981423312054, "grad_norm": 0.131135955452919, "learning_rate": 6.993777255400478e-06, "loss": 0.0113, "step": 51220 }, { "epoch": 0.8651670213125274, "grad_norm": 0.35836270451545715, "learning_rate": 6.9924256557131845e-06, "loss": 0.0103, "step": 51230 }, { "epoch": 0.8653359002938494, "grad_norm": 0.3315335214138031, "learning_rate": 6.991073882928834e-06, "loss": 0.014, "step": 51240 }, { "epoch": 0.8655047792751714, "grad_norm": 0.31949102878570557, "learning_rate": 6.989721937164863e-06, "loss": 0.0106, "step": 51250 }, { "epoch": 0.8656736582564935, "grad_norm": 0.2842368185520172, "learning_rate": 6.988369818538725e-06, "loss": 0.0081, "step": 51260 }, { "epoch": 0.8658425372378153, "grad_norm": 0.5632551312446594, "learning_rate": 6.987017527167891e-06, "loss": 0.0079, "step": 51270 }, { "epoch": 0.8660114162191374, "grad_norm": 0.15623217821121216, "learning_rate": 6.985665063169842e-06, "loss": 0.0064, "step": 51280 }, { "epoch": 0.8661802952004594, "grad_norm": 0.3123961389064789, "learning_rate": 6.984312426662077e-06, "loss": 0.0104, "step": 51290 }, { "epoch": 0.8663491741817814, "grad_norm": 0.2788950800895691, "learning_rate": 6.982959617762111e-06, "loss": 0.015, "step": 51300 }, { "epoch": 0.8665180531631034, "grad_norm": 0.1754963994026184, "learning_rate": 6.981606636587474e-06, "loss": 0.012, "step": 51310 }, { "epoch": 0.8666869321444253, "grad_norm": 0.10720974951982498, "learning_rate": 6.980253483255708e-06, "loss": 0.0073, "step": 51320 }, { "epoch": 0.8668558111257473, "grad_norm": 0.24831461906433105, "learning_rate": 6.978900157884371e-06, "loss": 0.0096, "step": 51330 }, { "epoch": 0.8670246901070693, "grad_norm": 0.19782905280590057, "learning_rate": 6.977546660591037e-06, "loss": 0.0137, "step": 51340 }, { "epoch": 0.8671935690883913, "grad_norm": 0.18293911218643188, "learning_rate": 6.976192991493295e-06, "loss": 0.0068, "step": 51350 }, { "epoch": 0.8673624480697133, "grad_norm": 0.27053606510162354, "learning_rate": 6.974839150708748e-06, "loss": 0.0102, "step": 51360 }, { "epoch": 0.8675313270510352, "grad_norm": 0.1967473030090332, "learning_rate": 6.973485138355014e-06, "loss": 0.0117, "step": 51370 }, { "epoch": 0.8677002060323572, "grad_norm": 0.33096247911453247, "learning_rate": 6.9721309545497294e-06, "loss": 0.0084, "step": 51380 }, { "epoch": 0.8678690850136792, "grad_norm": 0.20332497358322144, "learning_rate": 6.970776599410538e-06, "loss": 0.0115, "step": 51390 }, { "epoch": 0.8680379639950012, "grad_norm": 0.22802948951721191, "learning_rate": 6.969422073055107e-06, "loss": 0.0131, "step": 51400 }, { "epoch": 0.8682068429763231, "grad_norm": 0.2755713164806366, "learning_rate": 6.968067375601109e-06, "loss": 0.01, "step": 51410 }, { "epoch": 0.8683757219576451, "grad_norm": 0.7810249924659729, "learning_rate": 6.966712507166242e-06, "loss": 0.0103, "step": 51420 }, { "epoch": 0.8685446009389671, "grad_norm": 0.40942755341529846, "learning_rate": 6.96535746786821e-06, "loss": 0.0087, "step": 51430 }, { "epoch": 0.8687134799202891, "grad_norm": 0.23172464966773987, "learning_rate": 6.964002257824739e-06, "loss": 0.0122, "step": 51440 }, { "epoch": 0.8688823589016111, "grad_norm": 0.2500697076320648, "learning_rate": 6.9626468771535626e-06, "loss": 0.008, "step": 51450 }, { "epoch": 0.869051237882933, "grad_norm": 0.18569529056549072, "learning_rate": 6.9612913259724365e-06, "loss": 0.0133, "step": 51460 }, { "epoch": 0.869220116864255, "grad_norm": 0.517388105392456, "learning_rate": 6.959935604399124e-06, "loss": 0.0103, "step": 51470 }, { "epoch": 0.869388995845577, "grad_norm": 0.40528377890586853, "learning_rate": 6.95857971255141e-06, "loss": 0.0128, "step": 51480 }, { "epoch": 0.8695578748268991, "grad_norm": 0.39428773522377014, "learning_rate": 6.957223650547089e-06, "loss": 0.0148, "step": 51490 }, { "epoch": 0.8697267538082211, "grad_norm": 0.19747062027454376, "learning_rate": 6.955867418503975e-06, "loss": 0.0127, "step": 51500 }, { "epoch": 0.869895632789543, "grad_norm": 0.3565616309642792, "learning_rate": 6.954511016539891e-06, "loss": 0.0107, "step": 51510 }, { "epoch": 0.870064511770865, "grad_norm": 0.17467696964740753, "learning_rate": 6.95315444477268e-06, "loss": 0.0162, "step": 51520 }, { "epoch": 0.870233390752187, "grad_norm": 0.4140089154243469, "learning_rate": 6.9517977033201945e-06, "loss": 0.0093, "step": 51530 }, { "epoch": 0.870402269733509, "grad_norm": 0.14075379073619843, "learning_rate": 6.950440792300309e-06, "loss": 0.0068, "step": 51540 }, { "epoch": 0.870571148714831, "grad_norm": 0.26750192046165466, "learning_rate": 6.949083711830906e-06, "loss": 0.0106, "step": 51550 }, { "epoch": 0.8707400276961529, "grad_norm": 0.25087857246398926, "learning_rate": 6.947726462029888e-06, "loss": 0.0078, "step": 51560 }, { "epoch": 0.8709089066774749, "grad_norm": 0.2906705439090729, "learning_rate": 6.946369043015165e-06, "loss": 0.0134, "step": 51570 }, { "epoch": 0.8710777856587969, "grad_norm": 0.4031949043273926, "learning_rate": 6.945011454904671e-06, "loss": 0.0139, "step": 51580 }, { "epoch": 0.8712466646401189, "grad_norm": 0.29981687664985657, "learning_rate": 6.943653697816346e-06, "loss": 0.0149, "step": 51590 }, { "epoch": 0.8714155436214409, "grad_norm": 0.20565477013587952, "learning_rate": 6.942295771868152e-06, "loss": 0.0169, "step": 51600 }, { "epoch": 0.8715844226027628, "grad_norm": 0.1616906076669693, "learning_rate": 6.940937677178059e-06, "loss": 0.0097, "step": 51610 }, { "epoch": 0.8717533015840848, "grad_norm": 0.2379731684923172, "learning_rate": 6.9395794138640585e-06, "loss": 0.0127, "step": 51620 }, { "epoch": 0.8719221805654068, "grad_norm": 0.5680673122406006, "learning_rate": 6.938220982044151e-06, "loss": 0.0119, "step": 51630 }, { "epoch": 0.8720910595467288, "grad_norm": 0.21764573454856873, "learning_rate": 6.936862381836354e-06, "loss": 0.0112, "step": 51640 }, { "epoch": 0.8722599385280508, "grad_norm": 0.3417213261127472, "learning_rate": 6.935503613358696e-06, "loss": 0.0093, "step": 51650 }, { "epoch": 0.8724288175093727, "grad_norm": 0.28952357172966003, "learning_rate": 6.934144676729229e-06, "loss": 0.011, "step": 51660 }, { "epoch": 0.8725976964906947, "grad_norm": 0.14187654852867126, "learning_rate": 6.932785572066013e-06, "loss": 0.0083, "step": 51670 }, { "epoch": 0.8727665754720167, "grad_norm": 0.30514875054359436, "learning_rate": 6.931426299487123e-06, "loss": 0.0132, "step": 51680 }, { "epoch": 0.8729354544533388, "grad_norm": 0.07448287308216095, "learning_rate": 6.930066859110647e-06, "loss": 0.0096, "step": 51690 }, { "epoch": 0.8731043334346608, "grad_norm": 0.3402806222438812, "learning_rate": 6.928707251054692e-06, "loss": 0.012, "step": 51700 }, { "epoch": 0.8732732124159827, "grad_norm": 0.24401050806045532, "learning_rate": 6.927347475437376e-06, "loss": 0.0168, "step": 51710 }, { "epoch": 0.8734420913973047, "grad_norm": 0.35589686036109924, "learning_rate": 6.925987532376837e-06, "loss": 0.0084, "step": 51720 }, { "epoch": 0.8736109703786267, "grad_norm": 0.25367504358291626, "learning_rate": 6.924627421991218e-06, "loss": 0.0094, "step": 51730 }, { "epoch": 0.8737798493599487, "grad_norm": 0.15247632563114166, "learning_rate": 6.923267144398686e-06, "loss": 0.012, "step": 51740 }, { "epoch": 0.8739487283412707, "grad_norm": 0.30826279520988464, "learning_rate": 6.921906699717417e-06, "loss": 0.0119, "step": 51750 }, { "epoch": 0.8741176073225926, "grad_norm": 0.5024303197860718, "learning_rate": 6.920546088065603e-06, "loss": 0.011, "step": 51760 }, { "epoch": 0.8742864863039146, "grad_norm": 0.26903265714645386, "learning_rate": 6.9191853095614504e-06, "loss": 0.0085, "step": 51770 }, { "epoch": 0.8744553652852366, "grad_norm": 0.05430202558636665, "learning_rate": 6.917824364323182e-06, "loss": 0.0134, "step": 51780 }, { "epoch": 0.8746242442665586, "grad_norm": 0.5315219759941101, "learning_rate": 6.9164632524690325e-06, "loss": 0.0116, "step": 51790 }, { "epoch": 0.8747931232478806, "grad_norm": 0.28931254148483276, "learning_rate": 6.915101974117251e-06, "loss": 0.0094, "step": 51800 }, { "epoch": 0.8749620022292025, "grad_norm": 0.23203228414058685, "learning_rate": 6.913740529386102e-06, "loss": 0.008, "step": 51810 }, { "epoch": 0.8751308812105245, "grad_norm": 0.76993328332901, "learning_rate": 6.912378918393865e-06, "loss": 0.0131, "step": 51820 }, { "epoch": 0.8752997601918465, "grad_norm": 0.23376961052417755, "learning_rate": 6.9110171412588344e-06, "loss": 0.0082, "step": 51830 }, { "epoch": 0.8754686391731685, "grad_norm": 0.26218125224113464, "learning_rate": 6.9096551980993176e-06, "loss": 0.0088, "step": 51840 }, { "epoch": 0.8756375181544905, "grad_norm": 0.1374657154083252, "learning_rate": 6.908293089033637e-06, "loss": 0.0108, "step": 51850 }, { "epoch": 0.8758063971358124, "grad_norm": 0.2592800259590149, "learning_rate": 6.9069308141801276e-06, "loss": 0.0137, "step": 51860 }, { "epoch": 0.8759752761171344, "grad_norm": 0.2923058569431305, "learning_rate": 6.905568373657143e-06, "loss": 0.0101, "step": 51870 }, { "epoch": 0.8761441550984564, "grad_norm": 0.2049538940191269, "learning_rate": 6.904205767583045e-06, "loss": 0.0075, "step": 51880 }, { "epoch": 0.8763130340797785, "grad_norm": 0.24685373902320862, "learning_rate": 6.902842996076218e-06, "loss": 0.0105, "step": 51890 }, { "epoch": 0.8764819130611005, "grad_norm": 0.3728879988193512, "learning_rate": 6.901480059255053e-06, "loss": 0.0103, "step": 51900 }, { "epoch": 0.8766507920424224, "grad_norm": 0.20558662712574005, "learning_rate": 6.900116957237962e-06, "loss": 0.0063, "step": 51910 }, { "epoch": 0.8768196710237444, "grad_norm": 0.48708751797676086, "learning_rate": 6.898753690143364e-06, "loss": 0.0145, "step": 51920 }, { "epoch": 0.8769885500050664, "grad_norm": 0.31499406695365906, "learning_rate": 6.897390258089699e-06, "loss": 0.0109, "step": 51930 }, { "epoch": 0.8771574289863884, "grad_norm": 0.36391177773475647, "learning_rate": 6.896026661195417e-06, "loss": 0.0095, "step": 51940 }, { "epoch": 0.8773263079677104, "grad_norm": 0.30067119002342224, "learning_rate": 6.894662899578985e-06, "loss": 0.0129, "step": 51950 }, { "epoch": 0.8774951869490323, "grad_norm": 0.2057294398546219, "learning_rate": 6.893298973358882e-06, "loss": 0.0098, "step": 51960 }, { "epoch": 0.8776640659303543, "grad_norm": 0.2905379831790924, "learning_rate": 6.891934882653605e-06, "loss": 0.0102, "step": 51970 }, { "epoch": 0.8778329449116763, "grad_norm": 0.08024156838655472, "learning_rate": 6.89057062758166e-06, "loss": 0.0126, "step": 51980 }, { "epoch": 0.8780018238929983, "grad_norm": 0.35213279724121094, "learning_rate": 6.889206208261573e-06, "loss": 0.0181, "step": 51990 }, { "epoch": 0.8781707028743203, "grad_norm": 0.18634259700775146, "learning_rate": 6.887841624811878e-06, "loss": 0.0092, "step": 52000 }, { "epoch": 0.8783395818556422, "grad_norm": 0.19943885505199432, "learning_rate": 6.886476877351129e-06, "loss": 0.0118, "step": 52010 }, { "epoch": 0.8785084608369642, "grad_norm": 0.2576650083065033, "learning_rate": 6.885111965997892e-06, "loss": 0.013, "step": 52020 }, { "epoch": 0.8786773398182862, "grad_norm": 0.14163382351398468, "learning_rate": 6.883746890870747e-06, "loss": 0.0111, "step": 52030 }, { "epoch": 0.8788462187996082, "grad_norm": 0.3760729730129242, "learning_rate": 6.8823816520882866e-06, "loss": 0.0133, "step": 52040 }, { "epoch": 0.8790150977809302, "grad_norm": 0.3877388834953308, "learning_rate": 6.8810162497691235e-06, "loss": 0.0167, "step": 52050 }, { "epoch": 0.8791839767622521, "grad_norm": 0.2856627106666565, "learning_rate": 6.879650684031875e-06, "loss": 0.0107, "step": 52060 }, { "epoch": 0.8793528557435741, "grad_norm": 0.43233436346054077, "learning_rate": 6.8782849549951825e-06, "loss": 0.0157, "step": 52070 }, { "epoch": 0.8795217347248961, "grad_norm": 0.30727851390838623, "learning_rate": 6.876919062777694e-06, "loss": 0.0128, "step": 52080 }, { "epoch": 0.8796906137062181, "grad_norm": 0.30540597438812256, "learning_rate": 6.8755530074980784e-06, "loss": 0.0135, "step": 52090 }, { "epoch": 0.8798594926875402, "grad_norm": 0.2818593680858612, "learning_rate": 6.8741867892750126e-06, "loss": 0.0168, "step": 52100 }, { "epoch": 0.880028371668862, "grad_norm": 0.31352436542510986, "learning_rate": 6.872820408227191e-06, "loss": 0.0074, "step": 52110 }, { "epoch": 0.8801972506501841, "grad_norm": 0.1451583355665207, "learning_rate": 6.871453864473321e-06, "loss": 0.0126, "step": 52120 }, { "epoch": 0.8803661296315061, "grad_norm": 0.31257322430610657, "learning_rate": 6.870087158132125e-06, "loss": 0.0146, "step": 52130 }, { "epoch": 0.8805350086128281, "grad_norm": 0.45648622512817383, "learning_rate": 6.8687202893223394e-06, "loss": 0.0101, "step": 52140 }, { "epoch": 0.8807038875941501, "grad_norm": 0.28859832882881165, "learning_rate": 6.867353258162714e-06, "loss": 0.0109, "step": 52150 }, { "epoch": 0.880872766575472, "grad_norm": 0.26303982734680176, "learning_rate": 6.865986064772014e-06, "loss": 0.0117, "step": 52160 }, { "epoch": 0.881041645556794, "grad_norm": 0.21165163815021515, "learning_rate": 6.864618709269017e-06, "loss": 0.0092, "step": 52170 }, { "epoch": 0.881210524538116, "grad_norm": 0.08407042175531387, "learning_rate": 6.863251191772516e-06, "loss": 0.0067, "step": 52180 }, { "epoch": 0.881379403519438, "grad_norm": 0.2240258902311325, "learning_rate": 6.861883512401315e-06, "loss": 0.0107, "step": 52190 }, { "epoch": 0.88154828250076, "grad_norm": 0.1828526258468628, "learning_rate": 6.8605156712742375e-06, "loss": 0.0148, "step": 52200 }, { "epoch": 0.8817171614820819, "grad_norm": 0.24475929141044617, "learning_rate": 6.859147668510119e-06, "loss": 0.0153, "step": 52210 }, { "epoch": 0.8818860404634039, "grad_norm": 0.6242151260375977, "learning_rate": 6.857779504227805e-06, "loss": 0.0135, "step": 52220 }, { "epoch": 0.8820549194447259, "grad_norm": 0.3766763508319855, "learning_rate": 6.856411178546161e-06, "loss": 0.0074, "step": 52230 }, { "epoch": 0.8822237984260479, "grad_norm": 0.2213163822889328, "learning_rate": 6.855042691584062e-06, "loss": 0.0094, "step": 52240 }, { "epoch": 0.8823926774073699, "grad_norm": 0.41674673557281494, "learning_rate": 6.853674043460399e-06, "loss": 0.0092, "step": 52250 }, { "epoch": 0.8825615563886918, "grad_norm": 0.2955242991447449, "learning_rate": 6.852305234294078e-06, "loss": 0.013, "step": 52260 }, { "epoch": 0.8827304353700138, "grad_norm": 0.8478742241859436, "learning_rate": 6.8509362642040165e-06, "loss": 0.008, "step": 52270 }, { "epoch": 0.8828993143513358, "grad_norm": 0.23291364312171936, "learning_rate": 6.84956713330915e-06, "loss": 0.0107, "step": 52280 }, { "epoch": 0.8830681933326578, "grad_norm": 0.22118932008743286, "learning_rate": 6.84819784172842e-06, "loss": 0.0105, "step": 52290 }, { "epoch": 0.8832370723139799, "grad_norm": 0.3262340724468231, "learning_rate": 6.846828389580793e-06, "loss": 0.0116, "step": 52300 }, { "epoch": 0.8834059512953018, "grad_norm": 0.5568901300430298, "learning_rate": 6.8454587769852386e-06, "loss": 0.0115, "step": 52310 }, { "epoch": 0.8835748302766238, "grad_norm": 0.40796875953674316, "learning_rate": 6.844089004060749e-06, "loss": 0.0172, "step": 52320 }, { "epoch": 0.8837437092579458, "grad_norm": 0.3730848729610443, "learning_rate": 6.842719070926325e-06, "loss": 0.008, "step": 52330 }, { "epoch": 0.8839125882392678, "grad_norm": 0.18556463718414307, "learning_rate": 6.841348977700984e-06, "loss": 0.0076, "step": 52340 }, { "epoch": 0.8840814672205898, "grad_norm": 0.546630322933197, "learning_rate": 6.839978724503756e-06, "loss": 0.0127, "step": 52350 }, { "epoch": 0.8842503462019117, "grad_norm": 0.19304822385311127, "learning_rate": 6.838608311453685e-06, "loss": 0.0096, "step": 52360 }, { "epoch": 0.8844192251832337, "grad_norm": 0.20080965757369995, "learning_rate": 6.837237738669828e-06, "loss": 0.0138, "step": 52370 }, { "epoch": 0.8845881041645557, "grad_norm": 0.06152147427201271, "learning_rate": 6.83586700627126e-06, "loss": 0.0196, "step": 52380 }, { "epoch": 0.8847569831458777, "grad_norm": 0.14873802661895752, "learning_rate": 6.834496114377063e-06, "loss": 0.0079, "step": 52390 }, { "epoch": 0.8849258621271997, "grad_norm": 0.6178528666496277, "learning_rate": 6.8331250631063406e-06, "loss": 0.0131, "step": 52400 }, { "epoch": 0.8850947411085216, "grad_norm": 0.11143571138381958, "learning_rate": 6.831753852578204e-06, "loss": 0.0122, "step": 52410 }, { "epoch": 0.8852636200898436, "grad_norm": 0.21851186454296112, "learning_rate": 6.8303824829117815e-06, "loss": 0.0099, "step": 52420 }, { "epoch": 0.8854324990711656, "grad_norm": 0.10842838883399963, "learning_rate": 6.829010954226213e-06, "loss": 0.0096, "step": 52430 }, { "epoch": 0.8856013780524876, "grad_norm": 0.0850970670580864, "learning_rate": 6.827639266640655e-06, "loss": 0.0117, "step": 52440 }, { "epoch": 0.8857702570338096, "grad_norm": 0.6304154396057129, "learning_rate": 6.826267420274275e-06, "loss": 0.017, "step": 52450 }, { "epoch": 0.8859391360151315, "grad_norm": 0.3082827627658844, "learning_rate": 6.824895415246258e-06, "loss": 0.0059, "step": 52460 }, { "epoch": 0.8861080149964535, "grad_norm": 0.3892807066440582, "learning_rate": 6.823523251675799e-06, "loss": 0.0114, "step": 52470 }, { "epoch": 0.8862768939777755, "grad_norm": 0.2660559415817261, "learning_rate": 6.822150929682108e-06, "loss": 0.0101, "step": 52480 }, { "epoch": 0.8864457729590975, "grad_norm": 0.4700593650341034, "learning_rate": 6.820778449384407e-06, "loss": 0.0108, "step": 52490 }, { "epoch": 0.8866146519404194, "grad_norm": 0.41033735871315, "learning_rate": 6.819405810901939e-06, "loss": 0.0143, "step": 52500 }, { "epoch": 0.8867835309217414, "grad_norm": 0.06930769234895706, "learning_rate": 6.818033014353951e-06, "loss": 0.0201, "step": 52510 }, { "epoch": 0.8869524099030635, "grad_norm": 0.4168311059474945, "learning_rate": 6.81666005985971e-06, "loss": 0.0185, "step": 52520 }, { "epoch": 0.8871212888843855, "grad_norm": 0.31246218085289, "learning_rate": 6.815286947538494e-06, "loss": 0.0122, "step": 52530 }, { "epoch": 0.8872901678657075, "grad_norm": 0.2663702070713043, "learning_rate": 6.813913677509598e-06, "loss": 0.0095, "step": 52540 }, { "epoch": 0.8874590468470294, "grad_norm": 0.46304693818092346, "learning_rate": 6.812540249892323e-06, "loss": 0.0105, "step": 52550 }, { "epoch": 0.8876279258283514, "grad_norm": 0.6223575472831726, "learning_rate": 6.8111666648059945e-06, "loss": 0.0141, "step": 52560 }, { "epoch": 0.8877968048096734, "grad_norm": 0.2826164960861206, "learning_rate": 6.809792922369944e-06, "loss": 0.0093, "step": 52570 }, { "epoch": 0.8879656837909954, "grad_norm": 0.20155367255210876, "learning_rate": 6.808419022703521e-06, "loss": 0.0082, "step": 52580 }, { "epoch": 0.8881345627723174, "grad_norm": 0.4284723699092865, "learning_rate": 6.807044965926082e-06, "loss": 0.0092, "step": 52590 }, { "epoch": 0.8883034417536393, "grad_norm": 0.3925187289714813, "learning_rate": 6.805670752157006e-06, "loss": 0.0122, "step": 52600 }, { "epoch": 0.8884723207349613, "grad_norm": 0.25670406222343445, "learning_rate": 6.804296381515677e-06, "loss": 0.0105, "step": 52610 }, { "epoch": 0.8886411997162833, "grad_norm": 0.3221798837184906, "learning_rate": 6.802921854121501e-06, "loss": 0.0088, "step": 52620 }, { "epoch": 0.8888100786976053, "grad_norm": 0.28428158164024353, "learning_rate": 6.8015471700938926e-06, "loss": 0.0109, "step": 52630 }, { "epoch": 0.8889789576789273, "grad_norm": 0.4654194712638855, "learning_rate": 6.800172329552281e-06, "loss": 0.0085, "step": 52640 }, { "epoch": 0.8891478366602492, "grad_norm": 0.41169798374176025, "learning_rate": 6.798797332616107e-06, "loss": 0.0119, "step": 52650 }, { "epoch": 0.8893167156415712, "grad_norm": 0.14743554592132568, "learning_rate": 6.797422179404831e-06, "loss": 0.0144, "step": 52660 }, { "epoch": 0.8894855946228932, "grad_norm": 0.12520499527454376, "learning_rate": 6.796046870037916e-06, "loss": 0.0091, "step": 52670 }, { "epoch": 0.8896544736042152, "grad_norm": 0.2524642050266266, "learning_rate": 6.794671404634854e-06, "loss": 0.0105, "step": 52680 }, { "epoch": 0.8898233525855372, "grad_norm": 0.23655077815055847, "learning_rate": 6.793295783315137e-06, "loss": 0.013, "step": 52690 }, { "epoch": 0.8899922315668591, "grad_norm": 0.23767216503620148, "learning_rate": 6.791920006198276e-06, "loss": 0.0144, "step": 52700 }, { "epoch": 0.8901611105481811, "grad_norm": 0.1915300488471985, "learning_rate": 6.790544073403796e-06, "loss": 0.0105, "step": 52710 }, { "epoch": 0.8903299895295032, "grad_norm": 0.12350580096244812, "learning_rate": 6.789167985051234e-06, "loss": 0.0106, "step": 52720 }, { "epoch": 0.8904988685108252, "grad_norm": 0.1567934900522232, "learning_rate": 6.787791741260142e-06, "loss": 0.0057, "step": 52730 }, { "epoch": 0.8906677474921472, "grad_norm": 0.25164756178855896, "learning_rate": 6.786415342150083e-06, "loss": 0.0126, "step": 52740 }, { "epoch": 0.8908366264734691, "grad_norm": 0.2836211025714874, "learning_rate": 6.785038787840639e-06, "loss": 0.0121, "step": 52750 }, { "epoch": 0.8910055054547911, "grad_norm": 0.15734100341796875, "learning_rate": 6.783662078451397e-06, "loss": 0.0133, "step": 52760 }, { "epoch": 0.8911743844361131, "grad_norm": 0.019052281975746155, "learning_rate": 6.782285214101967e-06, "loss": 0.0092, "step": 52770 }, { "epoch": 0.8913432634174351, "grad_norm": 0.18972253799438477, "learning_rate": 6.780908194911962e-06, "loss": 0.0101, "step": 52780 }, { "epoch": 0.8915121423987571, "grad_norm": 0.3455626368522644, "learning_rate": 6.779531021001018e-06, "loss": 0.0133, "step": 52790 }, { "epoch": 0.891681021380079, "grad_norm": 0.5664096474647522, "learning_rate": 6.778153692488778e-06, "loss": 0.0111, "step": 52800 }, { "epoch": 0.891849900361401, "grad_norm": 0.2951091527938843, "learning_rate": 6.776776209494904e-06, "loss": 0.0116, "step": 52810 }, { "epoch": 0.892018779342723, "grad_norm": 1.3645843267440796, "learning_rate": 6.775398572139067e-06, "loss": 0.0101, "step": 52820 }, { "epoch": 0.892187658324045, "grad_norm": 0.38894015550613403, "learning_rate": 6.774020780540952e-06, "loss": 0.0179, "step": 52830 }, { "epoch": 0.892356537305367, "grad_norm": 0.3123697340488434, "learning_rate": 6.772642834820258e-06, "loss": 0.0145, "step": 52840 }, { "epoch": 0.8925254162866889, "grad_norm": 0.10584601759910583, "learning_rate": 6.7712647350966985e-06, "loss": 0.0082, "step": 52850 }, { "epoch": 0.8926942952680109, "grad_norm": 0.2656906247138977, "learning_rate": 6.769886481489998e-06, "loss": 0.01, "step": 52860 }, { "epoch": 0.8928631742493329, "grad_norm": 0.4247768521308899, "learning_rate": 6.768508074119899e-06, "loss": 0.0092, "step": 52870 }, { "epoch": 0.8930320532306549, "grad_norm": 0.27048492431640625, "learning_rate": 6.767129513106151e-06, "loss": 0.0088, "step": 52880 }, { "epoch": 0.8932009322119769, "grad_norm": 0.26872873306274414, "learning_rate": 6.765750798568521e-06, "loss": 0.014, "step": 52890 }, { "epoch": 0.8933698111932988, "grad_norm": 0.16483400762081146, "learning_rate": 6.764371930626789e-06, "loss": 0.0064, "step": 52900 }, { "epoch": 0.8935386901746208, "grad_norm": 0.2308521270751953, "learning_rate": 6.7629929094007456e-06, "loss": 0.0156, "step": 52910 }, { "epoch": 0.8937075691559428, "grad_norm": 0.457254558801651, "learning_rate": 6.7616137350101984e-06, "loss": 0.0167, "step": 52920 }, { "epoch": 0.8938764481372649, "grad_norm": 0.20737342536449432, "learning_rate": 6.7602344075749686e-06, "loss": 0.0139, "step": 52930 }, { "epoch": 0.8940453271185869, "grad_norm": 0.158927783370018, "learning_rate": 6.758854927214884e-06, "loss": 0.0101, "step": 52940 }, { "epoch": 0.8942142060999088, "grad_norm": 0.2520968019962311, "learning_rate": 6.757475294049796e-06, "loss": 0.0119, "step": 52950 }, { "epoch": 0.8943830850812308, "grad_norm": 0.40426963567733765, "learning_rate": 6.756095508199558e-06, "loss": 0.0121, "step": 52960 }, { "epoch": 0.8945519640625528, "grad_norm": 0.3495381772518158, "learning_rate": 6.7547155697840475e-06, "loss": 0.0063, "step": 52970 }, { "epoch": 0.8947208430438748, "grad_norm": 0.30959245562553406, "learning_rate": 6.753335478923147e-06, "loss": 0.0125, "step": 52980 }, { "epoch": 0.8948897220251968, "grad_norm": 0.2224421352148056, "learning_rate": 6.751955235736757e-06, "loss": 0.0125, "step": 52990 }, { "epoch": 0.8950586010065187, "grad_norm": 0.2276410609483719, "learning_rate": 6.750574840344788e-06, "loss": 0.0114, "step": 53000 }, { "epoch": 0.8952274799878407, "grad_norm": 0.2995896637439728, "learning_rate": 6.7491942928671675e-06, "loss": 0.0107, "step": 53010 }, { "epoch": 0.8953963589691627, "grad_norm": 0.3303985893726349, "learning_rate": 6.747813593423833e-06, "loss": 0.0082, "step": 53020 }, { "epoch": 0.8955652379504847, "grad_norm": 0.3901318311691284, "learning_rate": 6.746432742134736e-06, "loss": 0.0072, "step": 53030 }, { "epoch": 0.8957341169318067, "grad_norm": 0.2578577995300293, "learning_rate": 6.745051739119841e-06, "loss": 0.0127, "step": 53040 }, { "epoch": 0.8959029959131286, "grad_norm": 0.18128110468387604, "learning_rate": 6.743670584499128e-06, "loss": 0.0145, "step": 53050 }, { "epoch": 0.8960718748944506, "grad_norm": 0.2646823525428772, "learning_rate": 6.742289278392586e-06, "loss": 0.0099, "step": 53060 }, { "epoch": 0.8962407538757726, "grad_norm": 0.8388268351554871, "learning_rate": 6.740907820920223e-06, "loss": 0.0137, "step": 53070 }, { "epoch": 0.8964096328570946, "grad_norm": 0.20255333185195923, "learning_rate": 6.739526212202052e-06, "loss": 0.0122, "step": 53080 }, { "epoch": 0.8965785118384166, "grad_norm": 0.249258890748024, "learning_rate": 6.7381444523581075e-06, "loss": 0.0106, "step": 53090 }, { "epoch": 0.8967473908197385, "grad_norm": 0.823388934135437, "learning_rate": 6.7367625415084315e-06, "loss": 0.0114, "step": 53100 }, { "epoch": 0.8969162698010605, "grad_norm": 0.4936344623565674, "learning_rate": 6.735380479773083e-06, "loss": 0.0147, "step": 53110 }, { "epoch": 0.8970851487823825, "grad_norm": 0.3575669825077057, "learning_rate": 6.73399826727213e-06, "loss": 0.0064, "step": 53120 }, { "epoch": 0.8972540277637046, "grad_norm": 0.4436119794845581, "learning_rate": 6.7326159041256554e-06, "loss": 0.0113, "step": 53130 }, { "epoch": 0.8974229067450266, "grad_norm": 0.44335126876831055, "learning_rate": 6.7312333904537575e-06, "loss": 0.0113, "step": 53140 }, { "epoch": 0.8975917857263485, "grad_norm": 0.3263305425643921, "learning_rate": 6.729850726376544e-06, "loss": 0.0089, "step": 53150 }, { "epoch": 0.8977606647076705, "grad_norm": 0.3559231758117676, "learning_rate": 6.728467912014138e-06, "loss": 0.0073, "step": 53160 }, { "epoch": 0.8979295436889925, "grad_norm": 0.19423729181289673, "learning_rate": 6.727084947486677e-06, "loss": 0.0065, "step": 53170 }, { "epoch": 0.8980984226703145, "grad_norm": 0.5516058802604675, "learning_rate": 6.725701832914307e-06, "loss": 0.0127, "step": 53180 }, { "epoch": 0.8982673016516365, "grad_norm": 0.21696148812770844, "learning_rate": 6.72431856841719e-06, "loss": 0.0118, "step": 53190 }, { "epoch": 0.8984361806329584, "grad_norm": 0.46198561787605286, "learning_rate": 6.722935154115502e-06, "loss": 0.011, "step": 53200 }, { "epoch": 0.8986050596142804, "grad_norm": 0.45884934067726135, "learning_rate": 6.7215515901294284e-06, "loss": 0.0097, "step": 53210 }, { "epoch": 0.8987739385956024, "grad_norm": 0.5789762139320374, "learning_rate": 6.720167876579171e-06, "loss": 0.0134, "step": 53220 }, { "epoch": 0.8989428175769244, "grad_norm": 0.2797548174858093, "learning_rate": 6.718784013584944e-06, "loss": 0.0117, "step": 53230 }, { "epoch": 0.8991116965582464, "grad_norm": 0.18405398726463318, "learning_rate": 6.7174000012669735e-06, "loss": 0.0101, "step": 53240 }, { "epoch": 0.8992805755395683, "grad_norm": 0.3094055652618408, "learning_rate": 6.7160158397455e-06, "loss": 0.0123, "step": 53250 }, { "epoch": 0.8994494545208903, "grad_norm": 0.2779955565929413, "learning_rate": 6.714631529140775e-06, "loss": 0.0113, "step": 53260 }, { "epoch": 0.8996183335022123, "grad_norm": 0.4414041042327881, "learning_rate": 6.7132470695730634e-06, "loss": 0.0112, "step": 53270 }, { "epoch": 0.8997872124835343, "grad_norm": 0.23391732573509216, "learning_rate": 6.711862461162645e-06, "loss": 0.0089, "step": 53280 }, { "epoch": 0.8999560914648563, "grad_norm": 0.3512779176235199, "learning_rate": 6.71047770402981e-06, "loss": 0.0071, "step": 53290 }, { "epoch": 0.9001249704461782, "grad_norm": 0.2626085579395294, "learning_rate": 6.709092798294865e-06, "loss": 0.01, "step": 53300 }, { "epoch": 0.9002938494275002, "grad_norm": 0.117583267390728, "learning_rate": 6.707707744078124e-06, "loss": 0.0074, "step": 53310 }, { "epoch": 0.9004627284088222, "grad_norm": 0.2601352632045746, "learning_rate": 6.70632254149992e-06, "loss": 0.0102, "step": 53320 }, { "epoch": 0.9006316073901443, "grad_norm": 0.2270331233739853, "learning_rate": 6.704937190680594e-06, "loss": 0.0145, "step": 53330 }, { "epoch": 0.9008004863714663, "grad_norm": 0.3878779709339142, "learning_rate": 6.703551691740502e-06, "loss": 0.0129, "step": 53340 }, { "epoch": 0.9009693653527882, "grad_norm": 0.17708905041217804, "learning_rate": 6.702166044800013e-06, "loss": 0.0104, "step": 53350 }, { "epoch": 0.9011382443341102, "grad_norm": 0.19471189379692078, "learning_rate": 6.700780249979511e-06, "loss": 0.0079, "step": 53360 }, { "epoch": 0.9013071233154322, "grad_norm": 0.05759939178824425, "learning_rate": 6.6993943073993874e-06, "loss": 0.0062, "step": 53370 }, { "epoch": 0.9014760022967542, "grad_norm": 0.1391405612230301, "learning_rate": 6.698008217180051e-06, "loss": 0.0084, "step": 53380 }, { "epoch": 0.9016448812780762, "grad_norm": 0.22650940716266632, "learning_rate": 6.6966219794419206e-06, "loss": 0.014, "step": 53390 }, { "epoch": 0.9018137602593981, "grad_norm": 0.3202018439769745, "learning_rate": 6.695235594305429e-06, "loss": 0.0092, "step": 53400 }, { "epoch": 0.9019826392407201, "grad_norm": 0.11044438183307648, "learning_rate": 6.693849061891025e-06, "loss": 0.0086, "step": 53410 }, { "epoch": 0.9021515182220421, "grad_norm": 0.2542944550514221, "learning_rate": 6.6924623823191646e-06, "loss": 0.0135, "step": 53420 }, { "epoch": 0.9023203972033641, "grad_norm": 0.20106351375579834, "learning_rate": 6.691075555710319e-06, "loss": 0.011, "step": 53430 }, { "epoch": 0.9024892761846861, "grad_norm": 0.1791500598192215, "learning_rate": 6.689688582184974e-06, "loss": 0.0076, "step": 53440 }, { "epoch": 0.902658155166008, "grad_norm": 0.24348124861717224, "learning_rate": 6.688301461863625e-06, "loss": 0.0121, "step": 53450 }, { "epoch": 0.90282703414733, "grad_norm": 0.24292168021202087, "learning_rate": 6.686914194866781e-06, "loss": 0.0118, "step": 53460 }, { "epoch": 0.902995913128652, "grad_norm": 0.2931557893753052, "learning_rate": 6.685526781314965e-06, "loss": 0.0132, "step": 53470 }, { "epoch": 0.903164792109974, "grad_norm": 0.1727854311466217, "learning_rate": 6.684139221328714e-06, "loss": 0.0111, "step": 53480 }, { "epoch": 0.903333671091296, "grad_norm": 0.5823855996131897, "learning_rate": 6.682751515028572e-06, "loss": 0.0133, "step": 53490 }, { "epoch": 0.9035025500726179, "grad_norm": 0.5283251404762268, "learning_rate": 6.681363662535105e-06, "loss": 0.0159, "step": 53500 }, { "epoch": 0.9036714290539399, "grad_norm": 0.39531856775283813, "learning_rate": 6.67997566396888e-06, "loss": 0.012, "step": 53510 }, { "epoch": 0.9038403080352619, "grad_norm": 0.2636355757713318, "learning_rate": 6.678587519450488e-06, "loss": 0.0118, "step": 53520 }, { "epoch": 0.904009187016584, "grad_norm": 0.5674525499343872, "learning_rate": 6.677199229100524e-06, "loss": 0.0109, "step": 53530 }, { "epoch": 0.904178065997906, "grad_norm": 0.25214555859565735, "learning_rate": 6.675810793039601e-06, "loss": 0.0103, "step": 53540 }, { "epoch": 0.9043469449792279, "grad_norm": 0.30326512455940247, "learning_rate": 6.674422211388343e-06, "loss": 0.0117, "step": 53550 }, { "epoch": 0.9045158239605499, "grad_norm": 0.3111715316772461, "learning_rate": 6.673033484267387e-06, "loss": 0.0068, "step": 53560 }, { "epoch": 0.9046847029418719, "grad_norm": 0.244625985622406, "learning_rate": 6.671644611797379e-06, "loss": 0.0084, "step": 53570 }, { "epoch": 0.9048535819231939, "grad_norm": 0.26340538263320923, "learning_rate": 6.670255594098987e-06, "loss": 0.0082, "step": 53580 }, { "epoch": 0.9050224609045158, "grad_norm": 0.22333155572414398, "learning_rate": 6.668866431292878e-06, "loss": 0.0066, "step": 53590 }, { "epoch": 0.9051913398858378, "grad_norm": 0.27406108379364014, "learning_rate": 6.667477123499746e-06, "loss": 0.0097, "step": 53600 }, { "epoch": 0.9053602188671598, "grad_norm": 0.17432241141796112, "learning_rate": 6.666087670840285e-06, "loss": 0.0141, "step": 53610 }, { "epoch": 0.9055290978484818, "grad_norm": 0.24518415331840515, "learning_rate": 6.6646980734352115e-06, "loss": 0.0153, "step": 53620 }, { "epoch": 0.9056979768298038, "grad_norm": 0.28548961877822876, "learning_rate": 6.663308331405246e-06, "loss": 0.0091, "step": 53630 }, { "epoch": 0.9058668558111257, "grad_norm": 0.13904935121536255, "learning_rate": 6.6619184448711294e-06, "loss": 0.0122, "step": 53640 }, { "epoch": 0.9060357347924477, "grad_norm": 0.19702884554862976, "learning_rate": 6.660528413953612e-06, "loss": 0.01, "step": 53650 }, { "epoch": 0.9062046137737697, "grad_norm": 0.2601083815097809, "learning_rate": 6.659138238773454e-06, "loss": 0.0079, "step": 53660 }, { "epoch": 0.9063734927550917, "grad_norm": 0.17253433167934418, "learning_rate": 6.657747919451432e-06, "loss": 0.0124, "step": 53670 }, { "epoch": 0.9065423717364137, "grad_norm": 0.3969866633415222, "learning_rate": 6.656357456108332e-06, "loss": 0.0155, "step": 53680 }, { "epoch": 0.9067112507177356, "grad_norm": 0.3756023645401001, "learning_rate": 6.654966848864955e-06, "loss": 0.0099, "step": 53690 }, { "epoch": 0.9068801296990576, "grad_norm": 0.43963223695755005, "learning_rate": 6.6535760978421135e-06, "loss": 0.0118, "step": 53700 }, { "epoch": 0.9070490086803796, "grad_norm": 0.5651038289070129, "learning_rate": 6.652185203160634e-06, "loss": 0.0082, "step": 53710 }, { "epoch": 0.9072178876617016, "grad_norm": 0.24974828958511353, "learning_rate": 6.6507941649413496e-06, "loss": 0.0093, "step": 53720 }, { "epoch": 0.9073867666430236, "grad_norm": 0.1940997689962387, "learning_rate": 6.649402983305116e-06, "loss": 0.0101, "step": 53730 }, { "epoch": 0.9075556456243455, "grad_norm": 0.3722456395626068, "learning_rate": 6.648011658372791e-06, "loss": 0.0113, "step": 53740 }, { "epoch": 0.9077245246056675, "grad_norm": 0.2919871509075165, "learning_rate": 6.646620190265252e-06, "loss": 0.0123, "step": 53750 }, { "epoch": 0.9078934035869896, "grad_norm": 0.3157739043235779, "learning_rate": 6.645228579103386e-06, "loss": 0.0106, "step": 53760 }, { "epoch": 0.9080622825683116, "grad_norm": 0.3014819025993347, "learning_rate": 6.643836825008093e-06, "loss": 0.0071, "step": 53770 }, { "epoch": 0.9082311615496336, "grad_norm": 0.1614004224538803, "learning_rate": 6.642444928100283e-06, "loss": 0.0085, "step": 53780 }, { "epoch": 0.9084000405309555, "grad_norm": 0.4599494934082031, "learning_rate": 6.641052888500884e-06, "loss": 0.0082, "step": 53790 }, { "epoch": 0.9085689195122775, "grad_norm": 0.2872128188610077, "learning_rate": 6.6396607063308304e-06, "loss": 0.0082, "step": 53800 }, { "epoch": 0.9087377984935995, "grad_norm": 0.3576146364212036, "learning_rate": 6.638268381711072e-06, "loss": 0.0083, "step": 53810 }, { "epoch": 0.9089066774749215, "grad_norm": 0.43911659717559814, "learning_rate": 6.636875914762571e-06, "loss": 0.0133, "step": 53820 }, { "epoch": 0.9090755564562435, "grad_norm": 0.2704036831855774, "learning_rate": 6.635483305606303e-06, "loss": 0.0088, "step": 53830 }, { "epoch": 0.9092444354375654, "grad_norm": 0.20017552375793457, "learning_rate": 6.634090554363252e-06, "loss": 0.0079, "step": 53840 }, { "epoch": 0.9094133144188874, "grad_norm": 0.37793007493019104, "learning_rate": 6.632697661154418e-06, "loss": 0.0061, "step": 53850 }, { "epoch": 0.9095821934002094, "grad_norm": 0.2485756129026413, "learning_rate": 6.6313046261008105e-06, "loss": 0.0099, "step": 53860 }, { "epoch": 0.9097510723815314, "grad_norm": 0.1377669870853424, "learning_rate": 6.629911449323455e-06, "loss": 0.0116, "step": 53870 }, { "epoch": 0.9099199513628534, "grad_norm": 0.19173584878444672, "learning_rate": 6.628518130943387e-06, "loss": 0.0079, "step": 53880 }, { "epoch": 0.9100888303441753, "grad_norm": 0.4024132490158081, "learning_rate": 6.627124671081655e-06, "loss": 0.0123, "step": 53890 }, { "epoch": 0.9102577093254973, "grad_norm": 0.14853233098983765, "learning_rate": 6.625731069859318e-06, "loss": 0.0095, "step": 53900 }, { "epoch": 0.9104265883068193, "grad_norm": 0.19563165307044983, "learning_rate": 6.62433732739745e-06, "loss": 0.0108, "step": 53910 }, { "epoch": 0.9105954672881413, "grad_norm": 0.19793400168418884, "learning_rate": 6.622943443817133e-06, "loss": 0.012, "step": 53920 }, { "epoch": 0.9107643462694633, "grad_norm": 0.36964282393455505, "learning_rate": 6.621549419239469e-06, "loss": 0.0129, "step": 53930 }, { "epoch": 0.9109332252507852, "grad_norm": 0.10548999905586243, "learning_rate": 6.620155253785563e-06, "loss": 0.0103, "step": 53940 }, { "epoch": 0.9111021042321072, "grad_norm": 0.3623182475566864, "learning_rate": 6.6187609475765405e-06, "loss": 0.0081, "step": 53950 }, { "epoch": 0.9112709832134293, "grad_norm": 0.36943569779396057, "learning_rate": 6.617366500733532e-06, "loss": 0.0113, "step": 53960 }, { "epoch": 0.9114398621947513, "grad_norm": 0.7679765224456787, "learning_rate": 6.615971913377686e-06, "loss": 0.0124, "step": 53970 }, { "epoch": 0.9116087411760733, "grad_norm": 0.27403223514556885, "learning_rate": 6.6145771856301585e-06, "loss": 0.0087, "step": 53980 }, { "epoch": 0.9117776201573952, "grad_norm": 0.2813080847263336, "learning_rate": 6.6131823176121225e-06, "loss": 0.0097, "step": 53990 }, { "epoch": 0.9119464991387172, "grad_norm": 0.17988568544387817, "learning_rate": 6.61178730944476e-06, "loss": 0.0096, "step": 54000 }, { "epoch": 0.9121153781200392, "grad_norm": 0.22310368716716766, "learning_rate": 6.610392161249264e-06, "loss": 0.0095, "step": 54010 }, { "epoch": 0.9122842571013612, "grad_norm": 0.4530591666698456, "learning_rate": 6.608996873146845e-06, "loss": 0.0114, "step": 54020 }, { "epoch": 0.9124531360826832, "grad_norm": 0.13600093126296997, "learning_rate": 6.607601445258719e-06, "loss": 0.0087, "step": 54030 }, { "epoch": 0.9126220150640051, "grad_norm": 0.17714829742908478, "learning_rate": 6.606205877706119e-06, "loss": 0.0135, "step": 54040 }, { "epoch": 0.9127908940453271, "grad_norm": 0.3642262816429138, "learning_rate": 6.604810170610287e-06, "loss": 0.0123, "step": 54050 }, { "epoch": 0.9129597730266491, "grad_norm": 0.5824573636054993, "learning_rate": 6.603414324092481e-06, "loss": 0.0138, "step": 54060 }, { "epoch": 0.9131286520079711, "grad_norm": 0.21692481637001038, "learning_rate": 6.602018338273968e-06, "loss": 0.01, "step": 54070 }, { "epoch": 0.9132975309892931, "grad_norm": 0.4959920346736908, "learning_rate": 6.600622213276027e-06, "loss": 0.0114, "step": 54080 }, { "epoch": 0.913466409970615, "grad_norm": 0.3361893594264984, "learning_rate": 6.599225949219949e-06, "loss": 0.0083, "step": 54090 }, { "epoch": 0.913635288951937, "grad_norm": 0.24080882966518402, "learning_rate": 6.597829546227041e-06, "loss": 0.0078, "step": 54100 }, { "epoch": 0.913804167933259, "grad_norm": 0.1347191482782364, "learning_rate": 6.596433004418616e-06, "loss": 0.0079, "step": 54110 }, { "epoch": 0.913973046914581, "grad_norm": 0.31009313464164734, "learning_rate": 6.595036323916003e-06, "loss": 0.0096, "step": 54120 }, { "epoch": 0.914141925895903, "grad_norm": 0.10256271809339523, "learning_rate": 6.593639504840543e-06, "loss": 0.01, "step": 54130 }, { "epoch": 0.9143108048772249, "grad_norm": 0.2479535937309265, "learning_rate": 6.59224254731359e-06, "loss": 0.009, "step": 54140 }, { "epoch": 0.9144796838585469, "grad_norm": 0.30482804775238037, "learning_rate": 6.590845451456504e-06, "loss": 0.0097, "step": 54150 }, { "epoch": 0.914648562839869, "grad_norm": 0.3364507555961609, "learning_rate": 6.589448217390664e-06, "loss": 0.0103, "step": 54160 }, { "epoch": 0.914817441821191, "grad_norm": 0.12196140736341476, "learning_rate": 6.588050845237457e-06, "loss": 0.0095, "step": 54170 }, { "epoch": 0.914986320802513, "grad_norm": 0.23746564984321594, "learning_rate": 6.586653335118284e-06, "loss": 0.0114, "step": 54180 }, { "epoch": 0.9151551997838349, "grad_norm": 0.29595857858657837, "learning_rate": 6.5852556871545566e-06, "loss": 0.0118, "step": 54190 }, { "epoch": 0.9153240787651569, "grad_norm": 0.28002676367759705, "learning_rate": 6.583857901467701e-06, "loss": 0.0068, "step": 54200 }, { "epoch": 0.9154929577464789, "grad_norm": 0.5556203722953796, "learning_rate": 6.582459978179152e-06, "loss": 0.0118, "step": 54210 }, { "epoch": 0.9156618367278009, "grad_norm": 0.3014213442802429, "learning_rate": 6.581061917410357e-06, "loss": 0.0072, "step": 54220 }, { "epoch": 0.9158307157091229, "grad_norm": 0.3002732992172241, "learning_rate": 6.579663719282777e-06, "loss": 0.0142, "step": 54230 }, { "epoch": 0.9159995946904448, "grad_norm": 0.23898988962173462, "learning_rate": 6.5782653839178835e-06, "loss": 0.0098, "step": 54240 }, { "epoch": 0.9161684736717668, "grad_norm": 0.2804737985134125, "learning_rate": 6.576866911437161e-06, "loss": 0.0103, "step": 54250 }, { "epoch": 0.9163373526530888, "grad_norm": 0.2124757617712021, "learning_rate": 6.575468301962105e-06, "loss": 0.0103, "step": 54260 }, { "epoch": 0.9165062316344108, "grad_norm": 0.40061989426612854, "learning_rate": 6.5740695556142235e-06, "loss": 0.0108, "step": 54270 }, { "epoch": 0.9166751106157328, "grad_norm": 0.36455273628234863, "learning_rate": 6.572670672515037e-06, "loss": 0.0108, "step": 54280 }, { "epoch": 0.9168439895970547, "grad_norm": 0.15739507973194122, "learning_rate": 6.5712716527860755e-06, "loss": 0.0077, "step": 54290 }, { "epoch": 0.9170128685783767, "grad_norm": 0.20441177487373352, "learning_rate": 6.569872496548881e-06, "loss": 0.0118, "step": 54300 }, { "epoch": 0.9171817475596987, "grad_norm": 0.25414150953292847, "learning_rate": 6.5684732039250135e-06, "loss": 0.01, "step": 54310 }, { "epoch": 0.9173506265410207, "grad_norm": 0.4985349178314209, "learning_rate": 6.567073775036037e-06, "loss": 0.0106, "step": 54320 }, { "epoch": 0.9175195055223427, "grad_norm": 0.3089331090450287, "learning_rate": 6.5656742100035295e-06, "loss": 0.0082, "step": 54330 }, { "epoch": 0.9176883845036646, "grad_norm": 0.3505074083805084, "learning_rate": 6.564274508949085e-06, "loss": 0.0129, "step": 54340 }, { "epoch": 0.9178572634849866, "grad_norm": 0.7978012561798096, "learning_rate": 6.562874671994302e-06, "loss": 0.0158, "step": 54350 }, { "epoch": 0.9180261424663086, "grad_norm": 0.12511873245239258, "learning_rate": 6.561474699260798e-06, "loss": 0.0108, "step": 54360 }, { "epoch": 0.9181950214476307, "grad_norm": 0.17449937760829926, "learning_rate": 6.560074590870198e-06, "loss": 0.0063, "step": 54370 }, { "epoch": 0.9183639004289527, "grad_norm": 0.11835096776485443, "learning_rate": 6.558674346944141e-06, "loss": 0.011, "step": 54380 }, { "epoch": 0.9185327794102746, "grad_norm": 0.691961944103241, "learning_rate": 6.557273967604276e-06, "loss": 0.0106, "step": 54390 }, { "epoch": 0.9187016583915966, "grad_norm": 0.43936392664909363, "learning_rate": 6.555873452972266e-06, "loss": 0.0106, "step": 54400 }, { "epoch": 0.9188705373729186, "grad_norm": 0.0823487862944603, "learning_rate": 6.55447280316978e-06, "loss": 0.0104, "step": 54410 }, { "epoch": 0.9190394163542406, "grad_norm": 0.45896318554878235, "learning_rate": 6.553072018318507e-06, "loss": 0.0107, "step": 54420 }, { "epoch": 0.9192082953355626, "grad_norm": 0.16197288036346436, "learning_rate": 6.551671098540143e-06, "loss": 0.0128, "step": 54430 }, { "epoch": 0.9193771743168845, "grad_norm": 0.5161347985267639, "learning_rate": 6.550270043956397e-06, "loss": 0.0114, "step": 54440 }, { "epoch": 0.9195460532982065, "grad_norm": 0.3273104131221771, "learning_rate": 6.548868854688988e-06, "loss": 0.0088, "step": 54450 }, { "epoch": 0.9197149322795285, "grad_norm": 0.26551368832588196, "learning_rate": 6.547467530859648e-06, "loss": 0.0092, "step": 54460 }, { "epoch": 0.9198838112608505, "grad_norm": 0.3280814290046692, "learning_rate": 6.546066072590119e-06, "loss": 0.0102, "step": 54470 }, { "epoch": 0.9200526902421725, "grad_norm": 0.12918919324874878, "learning_rate": 6.54466448000216e-06, "loss": 0.0091, "step": 54480 }, { "epoch": 0.9202215692234944, "grad_norm": 0.3368794918060303, "learning_rate": 6.543262753217535e-06, "loss": 0.0101, "step": 54490 }, { "epoch": 0.9203904482048164, "grad_norm": 0.15949943661689758, "learning_rate": 6.541860892358025e-06, "loss": 0.008, "step": 54500 }, { "epoch": 0.9205593271861384, "grad_norm": 0.3372664749622345, "learning_rate": 6.540458897545416e-06, "loss": 0.0075, "step": 54510 }, { "epoch": 0.9207282061674604, "grad_norm": 0.21994304656982422, "learning_rate": 6.539056768901516e-06, "loss": 0.0064, "step": 54520 }, { "epoch": 0.9208970851487824, "grad_norm": 0.4607284963130951, "learning_rate": 6.537654506548134e-06, "loss": 0.0086, "step": 54530 }, { "epoch": 0.9210659641301043, "grad_norm": 0.4196021258831024, "learning_rate": 6.5362521106070945e-06, "loss": 0.0056, "step": 54540 }, { "epoch": 0.9212348431114263, "grad_norm": 0.22597499191761017, "learning_rate": 6.534849581200238e-06, "loss": 0.0098, "step": 54550 }, { "epoch": 0.9214037220927483, "grad_norm": 0.3922370970249176, "learning_rate": 6.5334469184494096e-06, "loss": 0.0114, "step": 54560 }, { "epoch": 0.9215726010740704, "grad_norm": 0.16916987299919128, "learning_rate": 6.532044122476473e-06, "loss": 0.0105, "step": 54570 }, { "epoch": 0.9217414800553924, "grad_norm": 0.4125750958919525, "learning_rate": 6.530641193403294e-06, "loss": 0.013, "step": 54580 }, { "epoch": 0.9219103590367143, "grad_norm": 0.3674401044845581, "learning_rate": 6.529238131351761e-06, "loss": 0.0121, "step": 54590 }, { "epoch": 0.9220792380180363, "grad_norm": 0.35330501198768616, "learning_rate": 6.527834936443764e-06, "loss": 0.0087, "step": 54600 }, { "epoch": 0.9222481169993583, "grad_norm": 0.2922072410583496, "learning_rate": 6.526431608801214e-06, "loss": 0.0133, "step": 54610 }, { "epoch": 0.9224169959806803, "grad_norm": 0.21888259053230286, "learning_rate": 6.525028148546024e-06, "loss": 0.0066, "step": 54620 }, { "epoch": 0.9225858749620023, "grad_norm": 0.27259740233421326, "learning_rate": 6.5236245558001276e-06, "loss": 0.0103, "step": 54630 }, { "epoch": 0.9227547539433242, "grad_norm": 0.31364986300468445, "learning_rate": 6.522220830685462e-06, "loss": 0.0134, "step": 54640 }, { "epoch": 0.9229236329246462, "grad_norm": 0.31807342171669006, "learning_rate": 6.52081697332398e-06, "loss": 0.0064, "step": 54650 }, { "epoch": 0.9230925119059682, "grad_norm": 0.17528735101222992, "learning_rate": 6.519412983837648e-06, "loss": 0.0088, "step": 54660 }, { "epoch": 0.9232613908872902, "grad_norm": 0.41926589608192444, "learning_rate": 6.5180088623484374e-06, "loss": 0.0106, "step": 54670 }, { "epoch": 0.9234302698686122, "grad_norm": 0.43008750677108765, "learning_rate": 6.516604608978336e-06, "loss": 0.0127, "step": 54680 }, { "epoch": 0.9235991488499341, "grad_norm": 0.28107500076293945, "learning_rate": 6.515200223849345e-06, "loss": 0.0087, "step": 54690 }, { "epoch": 0.9237680278312561, "grad_norm": 0.10546425729990005, "learning_rate": 6.513795707083469e-06, "loss": 0.012, "step": 54700 }, { "epoch": 0.9239369068125781, "grad_norm": 0.4181738495826721, "learning_rate": 6.5123910588027315e-06, "loss": 0.0113, "step": 54710 }, { "epoch": 0.9241057857939001, "grad_norm": 0.4072757959365845, "learning_rate": 6.510986279129166e-06, "loss": 0.0111, "step": 54720 }, { "epoch": 0.924274664775222, "grad_norm": 0.36772745847702026, "learning_rate": 6.509581368184814e-06, "loss": 0.0105, "step": 54730 }, { "epoch": 0.924443543756544, "grad_norm": 0.49249911308288574, "learning_rate": 6.508176326091732e-06, "loss": 0.0082, "step": 54740 }, { "epoch": 0.924612422737866, "grad_norm": 0.30287471413612366, "learning_rate": 6.506771152971987e-06, "loss": 0.0118, "step": 54750 }, { "epoch": 0.924781301719188, "grad_norm": 0.2703082263469696, "learning_rate": 6.505365848947656e-06, "loss": 0.0132, "step": 54760 }, { "epoch": 0.92495018070051, "grad_norm": 0.11780010908842087, "learning_rate": 6.503960414140827e-06, "loss": 0.0067, "step": 54770 }, { "epoch": 0.925119059681832, "grad_norm": 0.3346705436706543, "learning_rate": 6.5025548486736046e-06, "loss": 0.0101, "step": 54780 }, { "epoch": 0.925287938663154, "grad_norm": 0.21314051747322083, "learning_rate": 6.501149152668098e-06, "loss": 0.0077, "step": 54790 }, { "epoch": 0.925456817644476, "grad_norm": 0.5059207081794739, "learning_rate": 6.499743326246433e-06, "loss": 0.0122, "step": 54800 }, { "epoch": 0.925625696625798, "grad_norm": 0.17600131034851074, "learning_rate": 6.49833736953074e-06, "loss": 0.0102, "step": 54810 }, { "epoch": 0.92579457560712, "grad_norm": 0.3322259485721588, "learning_rate": 6.496931282643169e-06, "loss": 0.0099, "step": 54820 }, { "epoch": 0.9259634545884419, "grad_norm": 0.22110185027122498, "learning_rate": 6.495525065705876e-06, "loss": 0.0111, "step": 54830 }, { "epoch": 0.9261323335697639, "grad_norm": 0.27078935503959656, "learning_rate": 6.4941187188410314e-06, "loss": 0.0139, "step": 54840 }, { "epoch": 0.9263012125510859, "grad_norm": 0.13937582075595856, "learning_rate": 6.492712242170813e-06, "loss": 0.0071, "step": 54850 }, { "epoch": 0.9264700915324079, "grad_norm": 0.47244712710380554, "learning_rate": 6.4913056358174124e-06, "loss": 0.0102, "step": 54860 }, { "epoch": 0.9266389705137299, "grad_norm": 0.6047300696372986, "learning_rate": 6.489898899903032e-06, "loss": 0.015, "step": 54870 }, { "epoch": 0.9268078494950518, "grad_norm": 0.3968108892440796, "learning_rate": 6.488492034549887e-06, "loss": 0.0112, "step": 54880 }, { "epoch": 0.9269767284763738, "grad_norm": 0.20437461137771606, "learning_rate": 6.4870850398802e-06, "loss": 0.0051, "step": 54890 }, { "epoch": 0.9271456074576958, "grad_norm": 0.17742449045181274, "learning_rate": 6.485677916016208e-06, "loss": 0.0083, "step": 54900 }, { "epoch": 0.9273144864390178, "grad_norm": 0.4514216184616089, "learning_rate": 6.484270663080162e-06, "loss": 0.0129, "step": 54910 }, { "epoch": 0.9274833654203398, "grad_norm": 0.19951798021793365, "learning_rate": 6.482863281194316e-06, "loss": 0.0121, "step": 54920 }, { "epoch": 0.9276522444016617, "grad_norm": 0.3464743196964264, "learning_rate": 6.481455770480942e-06, "loss": 0.0094, "step": 54930 }, { "epoch": 0.9278211233829837, "grad_norm": 0.21536368131637573, "learning_rate": 6.480048131062321e-06, "loss": 0.0098, "step": 54940 }, { "epoch": 0.9279900023643057, "grad_norm": 0.33820679783821106, "learning_rate": 6.478640363060744e-06, "loss": 0.0124, "step": 54950 }, { "epoch": 0.9281588813456277, "grad_norm": 0.299714595079422, "learning_rate": 6.477232466598516e-06, "loss": 0.0217, "step": 54960 }, { "epoch": 0.9283277603269497, "grad_norm": 0.20702427625656128, "learning_rate": 6.475824441797952e-06, "loss": 0.0142, "step": 54970 }, { "epoch": 0.9284966393082716, "grad_norm": 0.4743048846721649, "learning_rate": 6.474416288781375e-06, "loss": 0.0094, "step": 54980 }, { "epoch": 0.9286655182895937, "grad_norm": 0.3126533031463623, "learning_rate": 6.473008007671126e-06, "loss": 0.0092, "step": 54990 }, { "epoch": 0.9288343972709157, "grad_norm": 0.3446708619594574, "learning_rate": 6.47159959858955e-06, "loss": 0.0146, "step": 55000 }, { "epoch": 0.9290032762522377, "grad_norm": 0.16820558905601501, "learning_rate": 6.470191061659006e-06, "loss": 0.0092, "step": 55010 }, { "epoch": 0.9291721552335597, "grad_norm": 0.1957859843969345, "learning_rate": 6.4687823970018645e-06, "loss": 0.0054, "step": 55020 }, { "epoch": 0.9293410342148816, "grad_norm": 0.16107703745365143, "learning_rate": 6.467373604740509e-06, "loss": 0.0085, "step": 55030 }, { "epoch": 0.9295099131962036, "grad_norm": 0.29958686232566833, "learning_rate": 6.46596468499733e-06, "loss": 0.0152, "step": 55040 }, { "epoch": 0.9296787921775256, "grad_norm": 0.4761463403701782, "learning_rate": 6.464555637894732e-06, "loss": 0.0099, "step": 55050 }, { "epoch": 0.9298476711588476, "grad_norm": 0.5681745409965515, "learning_rate": 6.463146463555128e-06, "loss": 0.0134, "step": 55060 }, { "epoch": 0.9300165501401696, "grad_norm": 0.2340470403432846, "learning_rate": 6.461737162100944e-06, "loss": 0.0072, "step": 55070 }, { "epoch": 0.9301854291214915, "grad_norm": 0.10169792920351028, "learning_rate": 6.460327733654617e-06, "loss": 0.0128, "step": 55080 }, { "epoch": 0.9303543081028135, "grad_norm": 0.23338598012924194, "learning_rate": 6.458918178338596e-06, "loss": 0.0119, "step": 55090 }, { "epoch": 0.9305231870841355, "grad_norm": 0.7757489681243896, "learning_rate": 6.457508496275337e-06, "loss": 0.0114, "step": 55100 }, { "epoch": 0.9306920660654575, "grad_norm": 0.31056904792785645, "learning_rate": 6.456098687587313e-06, "loss": 0.0116, "step": 55110 }, { "epoch": 0.9308609450467795, "grad_norm": 0.2186836153268814, "learning_rate": 6.4546887523970036e-06, "loss": 0.0107, "step": 55120 }, { "epoch": 0.9310298240281014, "grad_norm": 0.42580774426460266, "learning_rate": 6.453278690826898e-06, "loss": 0.0095, "step": 55130 }, { "epoch": 0.9311987030094234, "grad_norm": 0.3651424050331116, "learning_rate": 6.451868502999501e-06, "loss": 0.0125, "step": 55140 }, { "epoch": 0.9313675819907454, "grad_norm": 0.21730683743953705, "learning_rate": 6.450458189037326e-06, "loss": 0.0112, "step": 55150 }, { "epoch": 0.9315364609720674, "grad_norm": 0.4306820034980774, "learning_rate": 6.4490477490629e-06, "loss": 0.0093, "step": 55160 }, { "epoch": 0.9317053399533894, "grad_norm": 0.37097322940826416, "learning_rate": 6.4476371831987535e-06, "loss": 0.0107, "step": 55170 }, { "epoch": 0.9318742189347113, "grad_norm": 0.3663095235824585, "learning_rate": 6.446226491567437e-06, "loss": 0.0126, "step": 55180 }, { "epoch": 0.9320430979160333, "grad_norm": 0.24414651095867157, "learning_rate": 6.444815674291507e-06, "loss": 0.0102, "step": 55190 }, { "epoch": 0.9322119768973554, "grad_norm": 0.18276557326316833, "learning_rate": 6.44340473149353e-06, "loss": 0.0084, "step": 55200 }, { "epoch": 0.9323808558786774, "grad_norm": 0.11836813390254974, "learning_rate": 6.441993663296088e-06, "loss": 0.0079, "step": 55210 }, { "epoch": 0.9325497348599994, "grad_norm": 0.27265021204948425, "learning_rate": 6.4405824698217715e-06, "loss": 0.0108, "step": 55220 }, { "epoch": 0.9327186138413213, "grad_norm": 0.2930389940738678, "learning_rate": 6.439171151193178e-06, "loss": 0.0085, "step": 55230 }, { "epoch": 0.9328874928226433, "grad_norm": 0.2563643753528595, "learning_rate": 6.437759707532923e-06, "loss": 0.0094, "step": 55240 }, { "epoch": 0.9330563718039653, "grad_norm": 0.33495602011680603, "learning_rate": 6.436348138963625e-06, "loss": 0.0094, "step": 55250 }, { "epoch": 0.9332252507852873, "grad_norm": 0.21081100404262543, "learning_rate": 6.4349364456079214e-06, "loss": 0.0089, "step": 55260 }, { "epoch": 0.9333941297666093, "grad_norm": 0.23020637035369873, "learning_rate": 6.433524627588456e-06, "loss": 0.0091, "step": 55270 }, { "epoch": 0.9335630087479312, "grad_norm": 0.3157247006893158, "learning_rate": 6.4321126850278834e-06, "loss": 0.0141, "step": 55280 }, { "epoch": 0.9337318877292532, "grad_norm": 0.2562662363052368, "learning_rate": 6.430700618048869e-06, "loss": 0.0127, "step": 55290 }, { "epoch": 0.9339007667105752, "grad_norm": 0.45137542486190796, "learning_rate": 6.429288426774093e-06, "loss": 0.0123, "step": 55300 }, { "epoch": 0.9340696456918972, "grad_norm": 0.44689369201660156, "learning_rate": 6.427876111326239e-06, "loss": 0.0101, "step": 55310 }, { "epoch": 0.9342385246732192, "grad_norm": 0.5044358372688293, "learning_rate": 6.426463671828007e-06, "loss": 0.0153, "step": 55320 }, { "epoch": 0.9344074036545411, "grad_norm": 0.4072774648666382, "learning_rate": 6.4250511084021075e-06, "loss": 0.015, "step": 55330 }, { "epoch": 0.9345762826358631, "grad_norm": 0.3212032616138458, "learning_rate": 6.42363842117126e-06, "loss": 0.0096, "step": 55340 }, { "epoch": 0.9347451616171851, "grad_norm": 0.202091246843338, "learning_rate": 6.422225610258194e-06, "loss": 0.0058, "step": 55350 }, { "epoch": 0.9349140405985071, "grad_norm": 0.3696402609348297, "learning_rate": 6.420812675785653e-06, "loss": 0.011, "step": 55360 }, { "epoch": 0.9350829195798291, "grad_norm": 0.2349979281425476, "learning_rate": 6.419399617876387e-06, "loss": 0.013, "step": 55370 }, { "epoch": 0.935251798561151, "grad_norm": 0.20317985117435455, "learning_rate": 6.41798643665316e-06, "loss": 0.0102, "step": 55380 }, { "epoch": 0.935420677542473, "grad_norm": 0.2946048974990845, "learning_rate": 6.416573132238747e-06, "loss": 0.0091, "step": 55390 }, { "epoch": 0.935589556523795, "grad_norm": 0.2786811888217926, "learning_rate": 6.415159704755931e-06, "loss": 0.0123, "step": 55400 }, { "epoch": 0.9357584355051171, "grad_norm": 0.27554652094841003, "learning_rate": 6.413746154327508e-06, "loss": 0.006, "step": 55410 }, { "epoch": 0.9359273144864391, "grad_norm": 0.3420974314212799, "learning_rate": 6.412332481076285e-06, "loss": 0.0099, "step": 55420 }, { "epoch": 0.936096193467761, "grad_norm": 0.3348231911659241, "learning_rate": 6.4109186851250735e-06, "loss": 0.0106, "step": 55430 }, { "epoch": 0.936265072449083, "grad_norm": 0.16080062091350555, "learning_rate": 6.409504766596703e-06, "loss": 0.0093, "step": 55440 }, { "epoch": 0.936433951430405, "grad_norm": 0.2660214602947235, "learning_rate": 6.408090725614016e-06, "loss": 0.0108, "step": 55450 }, { "epoch": 0.936602830411727, "grad_norm": 0.2629479169845581, "learning_rate": 6.406676562299855e-06, "loss": 0.0069, "step": 55460 }, { "epoch": 0.936771709393049, "grad_norm": 0.09295685589313507, "learning_rate": 6.405262276777082e-06, "loss": 0.0067, "step": 55470 }, { "epoch": 0.9369405883743709, "grad_norm": 0.2588566243648529, "learning_rate": 6.403847869168566e-06, "loss": 0.0119, "step": 55480 }, { "epoch": 0.9371094673556929, "grad_norm": 0.3465660512447357, "learning_rate": 6.402433339597187e-06, "loss": 0.0102, "step": 55490 }, { "epoch": 0.9372783463370149, "grad_norm": 0.2266152799129486, "learning_rate": 6.401018688185834e-06, "loss": 0.0126, "step": 55500 }, { "epoch": 0.9374472253183369, "grad_norm": 0.6839981079101562, "learning_rate": 6.3996039150574115e-06, "loss": 0.0113, "step": 55510 }, { "epoch": 0.9376161042996589, "grad_norm": 0.2410285919904709, "learning_rate": 6.398189020334831e-06, "loss": 0.0123, "step": 55520 }, { "epoch": 0.9377849832809808, "grad_norm": 0.4620104432106018, "learning_rate": 6.396774004141015e-06, "loss": 0.0109, "step": 55530 }, { "epoch": 0.9379538622623028, "grad_norm": 0.45715439319610596, "learning_rate": 6.395358866598895e-06, "loss": 0.0099, "step": 55540 }, { "epoch": 0.9381227412436248, "grad_norm": 0.36013928055763245, "learning_rate": 6.393943607831417e-06, "loss": 0.0127, "step": 55550 }, { "epoch": 0.9382916202249468, "grad_norm": 0.32746651768684387, "learning_rate": 6.392528227961532e-06, "loss": 0.0083, "step": 55560 }, { "epoch": 0.9384604992062688, "grad_norm": 0.5399194359779358, "learning_rate": 6.391112727112209e-06, "loss": 0.0103, "step": 55570 }, { "epoch": 0.9386293781875907, "grad_norm": 0.24238228797912598, "learning_rate": 6.389697105406419e-06, "loss": 0.0062, "step": 55580 }, { "epoch": 0.9387982571689127, "grad_norm": 0.1749555617570877, "learning_rate": 6.388281362967153e-06, "loss": 0.0086, "step": 55590 }, { "epoch": 0.9389671361502347, "grad_norm": 0.2608165740966797, "learning_rate": 6.3868654999174e-06, "loss": 0.013, "step": 55600 }, { "epoch": 0.9391360151315568, "grad_norm": 0.1804370880126953, "learning_rate": 6.385449516380174e-06, "loss": 0.0067, "step": 55610 }, { "epoch": 0.9393048941128788, "grad_norm": 0.2789192497730255, "learning_rate": 6.384033412478486e-06, "loss": 0.0107, "step": 55620 }, { "epoch": 0.9394737730942007, "grad_norm": 0.3739922344684601, "learning_rate": 6.38261718833537e-06, "loss": 0.0107, "step": 55630 }, { "epoch": 0.9396426520755227, "grad_norm": 0.2321838140487671, "learning_rate": 6.381200844073857e-06, "loss": 0.0126, "step": 55640 }, { "epoch": 0.9398115310568447, "grad_norm": 0.15967710316181183, "learning_rate": 6.379784379817001e-06, "loss": 0.014, "step": 55650 }, { "epoch": 0.9399804100381667, "grad_norm": 0.47813835740089417, "learning_rate": 6.3783677956878585e-06, "loss": 0.0098, "step": 55660 }, { "epoch": 0.9401492890194887, "grad_norm": 0.3146287798881531, "learning_rate": 6.3769510918095e-06, "loss": 0.0113, "step": 55670 }, { "epoch": 0.9403181680008106, "grad_norm": 0.14982551336288452, "learning_rate": 6.3755342683050034e-06, "loss": 0.0075, "step": 55680 }, { "epoch": 0.9404870469821326, "grad_norm": 0.19030222296714783, "learning_rate": 6.374117325297462e-06, "loss": 0.0105, "step": 55690 }, { "epoch": 0.9406559259634546, "grad_norm": 0.35190343856811523, "learning_rate": 6.372700262909973e-06, "loss": 0.0138, "step": 55700 }, { "epoch": 0.9408248049447766, "grad_norm": 0.48482441902160645, "learning_rate": 6.37128308126565e-06, "loss": 0.0102, "step": 55710 }, { "epoch": 0.9409936839260986, "grad_norm": 0.40793636441230774, "learning_rate": 6.369865780487613e-06, "loss": 0.0119, "step": 55720 }, { "epoch": 0.9411625629074205, "grad_norm": 0.2355470210313797, "learning_rate": 6.368448360698992e-06, "loss": 0.0127, "step": 55730 }, { "epoch": 0.9413314418887425, "grad_norm": 0.21711352467536926, "learning_rate": 6.367030822022931e-06, "loss": 0.0127, "step": 55740 }, { "epoch": 0.9415003208700645, "grad_norm": 0.16572336852550507, "learning_rate": 6.365613164582585e-06, "loss": 0.0073, "step": 55750 }, { "epoch": 0.9416691998513865, "grad_norm": 0.3262840509414673, "learning_rate": 6.3641953885011095e-06, "loss": 0.0096, "step": 55760 }, { "epoch": 0.9418380788327085, "grad_norm": 0.319265216588974, "learning_rate": 6.362777493901684e-06, "loss": 0.0123, "step": 55770 }, { "epoch": 0.9420069578140304, "grad_norm": 0.44849449396133423, "learning_rate": 6.361359480907488e-06, "loss": 0.0118, "step": 55780 }, { "epoch": 0.9421758367953524, "grad_norm": 0.2376345843076706, "learning_rate": 6.359941349641716e-06, "loss": 0.0087, "step": 55790 }, { "epoch": 0.9423447157766744, "grad_norm": 0.22566260397434235, "learning_rate": 6.358523100227573e-06, "loss": 0.0108, "step": 55800 }, { "epoch": 0.9425135947579965, "grad_norm": 0.3094267249107361, "learning_rate": 6.357104732788272e-06, "loss": 0.0091, "step": 55810 }, { "epoch": 0.9426824737393183, "grad_norm": 0.32871103286743164, "learning_rate": 6.355686247447035e-06, "loss": 0.0147, "step": 55820 }, { "epoch": 0.9428513527206404, "grad_norm": 0.2580513656139374, "learning_rate": 6.3542676443271015e-06, "loss": 0.0126, "step": 55830 }, { "epoch": 0.9430202317019624, "grad_norm": 0.49007415771484375, "learning_rate": 6.3528489235517125e-06, "loss": 0.0112, "step": 55840 }, { "epoch": 0.9431891106832844, "grad_norm": 0.2979622781276703, "learning_rate": 6.351430085244124e-06, "loss": 0.0103, "step": 55850 }, { "epoch": 0.9433579896646064, "grad_norm": 0.162624791264534, "learning_rate": 6.3500111295276e-06, "loss": 0.0096, "step": 55860 }, { "epoch": 0.9435268686459283, "grad_norm": 0.13929609954357147, "learning_rate": 6.3485920565254176e-06, "loss": 0.0105, "step": 55870 }, { "epoch": 0.9436957476272503, "grad_norm": 0.1961963176727295, "learning_rate": 6.347172866360862e-06, "loss": 0.0141, "step": 55880 }, { "epoch": 0.9438646266085723, "grad_norm": 0.2156241089105606, "learning_rate": 6.34575355915723e-06, "loss": 0.0052, "step": 55890 }, { "epoch": 0.9440335055898943, "grad_norm": 0.2447277456521988, "learning_rate": 6.344334135037826e-06, "loss": 0.0133, "step": 55900 }, { "epoch": 0.9442023845712163, "grad_norm": 0.23165710270404816, "learning_rate": 6.342914594125967e-06, "loss": 0.0058, "step": 55910 }, { "epoch": 0.9443712635525382, "grad_norm": 0.257876992225647, "learning_rate": 6.341494936544976e-06, "loss": 0.014, "step": 55920 }, { "epoch": 0.9445401425338602, "grad_norm": 0.1513327807188034, "learning_rate": 6.340075162418194e-06, "loss": 0.0111, "step": 55930 }, { "epoch": 0.9447090215151822, "grad_norm": 0.27724918723106384, "learning_rate": 6.338655271868965e-06, "loss": 0.0117, "step": 55940 }, { "epoch": 0.9448779004965042, "grad_norm": 0.16634225845336914, "learning_rate": 6.337235265020647e-06, "loss": 0.0075, "step": 55950 }, { "epoch": 0.9450467794778262, "grad_norm": 0.2546747028827667, "learning_rate": 6.335815141996606e-06, "loss": 0.007, "step": 55960 }, { "epoch": 0.9452156584591481, "grad_norm": 0.16532868146896362, "learning_rate": 6.334394902920218e-06, "loss": 0.0091, "step": 55970 }, { "epoch": 0.9453845374404701, "grad_norm": 0.13651470839977264, "learning_rate": 6.33297454791487e-06, "loss": 0.0058, "step": 55980 }, { "epoch": 0.9455534164217921, "grad_norm": 0.17126858234405518, "learning_rate": 6.33155407710396e-06, "loss": 0.0091, "step": 55990 }, { "epoch": 0.9457222954031141, "grad_norm": 0.322367399930954, "learning_rate": 6.330133490610897e-06, "loss": 0.009, "step": 56000 }, { "epoch": 0.9458911743844362, "grad_norm": 0.2507932186126709, "learning_rate": 6.328712788559093e-06, "loss": 0.0125, "step": 56010 }, { "epoch": 0.946060053365758, "grad_norm": 0.342790812253952, "learning_rate": 6.327291971071979e-06, "loss": 0.0149, "step": 56020 }, { "epoch": 0.94622893234708, "grad_norm": 0.1578563153743744, "learning_rate": 6.32587103827299e-06, "loss": 0.0129, "step": 56030 }, { "epoch": 0.9463978113284021, "grad_norm": 0.11944938451051712, "learning_rate": 6.324449990285575e-06, "loss": 0.0101, "step": 56040 }, { "epoch": 0.9465666903097241, "grad_norm": 0.3342602849006653, "learning_rate": 6.323028827233189e-06, "loss": 0.0097, "step": 56050 }, { "epoch": 0.9467355692910461, "grad_norm": 0.16832737624645233, "learning_rate": 6.321607549239301e-06, "loss": 0.011, "step": 56060 }, { "epoch": 0.946904448272368, "grad_norm": 0.3034612238407135, "learning_rate": 6.3201861564273885e-06, "loss": 0.013, "step": 56070 }, { "epoch": 0.94707332725369, "grad_norm": 0.1990443915128708, "learning_rate": 6.318764648920937e-06, "loss": 0.0082, "step": 56080 }, { "epoch": 0.947242206235012, "grad_norm": 0.4126876890659332, "learning_rate": 6.317343026843443e-06, "loss": 0.0107, "step": 56090 }, { "epoch": 0.947411085216334, "grad_norm": 0.30427154898643494, "learning_rate": 6.3159212903184165e-06, "loss": 0.0118, "step": 56100 }, { "epoch": 0.947579964197656, "grad_norm": 0.224120631814003, "learning_rate": 6.314499439469372e-06, "loss": 0.0099, "step": 56110 }, { "epoch": 0.9477488431789779, "grad_norm": 0.25012069940567017, "learning_rate": 6.313077474419838e-06, "loss": 0.0129, "step": 56120 }, { "epoch": 0.9479177221602999, "grad_norm": 0.49267756938934326, "learning_rate": 6.311655395293349e-06, "loss": 0.0131, "step": 56130 }, { "epoch": 0.9480866011416219, "grad_norm": 0.3877347707748413, "learning_rate": 6.310233202213455e-06, "loss": 0.0116, "step": 56140 }, { "epoch": 0.9482554801229439, "grad_norm": 0.3555859327316284, "learning_rate": 6.3088108953037095e-06, "loss": 0.0106, "step": 56150 }, { "epoch": 0.9484243591042659, "grad_norm": 0.2920999825000763, "learning_rate": 6.307388474687681e-06, "loss": 0.0115, "step": 56160 }, { "epoch": 0.9485932380855878, "grad_norm": 0.21287649869918823, "learning_rate": 6.305965940488945e-06, "loss": 0.0088, "step": 56170 }, { "epoch": 0.9487621170669098, "grad_norm": 0.10504219681024551, "learning_rate": 6.30454329283109e-06, "loss": 0.0087, "step": 56180 }, { "epoch": 0.9489309960482318, "grad_norm": 0.5056408643722534, "learning_rate": 6.303120531837709e-06, "loss": 0.0094, "step": 56190 }, { "epoch": 0.9490998750295538, "grad_norm": 0.4578671455383301, "learning_rate": 6.3016976576324105e-06, "loss": 0.0068, "step": 56200 }, { "epoch": 0.9492687540108758, "grad_norm": 0.49554821848869324, "learning_rate": 6.3002746703388075e-06, "loss": 0.0105, "step": 56210 }, { "epoch": 0.9494376329921977, "grad_norm": 0.31652384996414185, "learning_rate": 6.298851570080528e-06, "loss": 0.01, "step": 56220 }, { "epoch": 0.9496065119735198, "grad_norm": 0.3581109642982483, "learning_rate": 6.2974283569812055e-06, "loss": 0.0105, "step": 56230 }, { "epoch": 0.9497753909548418, "grad_norm": 0.3506801128387451, "learning_rate": 6.296005031164489e-06, "loss": 0.0089, "step": 56240 }, { "epoch": 0.9499442699361638, "grad_norm": 0.18025584518909454, "learning_rate": 6.2945815927540296e-06, "loss": 0.0093, "step": 56250 }, { "epoch": 0.9501131489174858, "grad_norm": 0.26221343874931335, "learning_rate": 6.293158041873495e-06, "loss": 0.0106, "step": 56260 }, { "epoch": 0.9502820278988077, "grad_norm": 0.2869492471218109, "learning_rate": 6.291734378646557e-06, "loss": 0.0131, "step": 56270 }, { "epoch": 0.9504509068801297, "grad_norm": 0.18515600264072418, "learning_rate": 6.290310603196901e-06, "loss": 0.0092, "step": 56280 }, { "epoch": 0.9506197858614517, "grad_norm": 0.21547414362430573, "learning_rate": 6.288886715648221e-06, "loss": 0.0097, "step": 56290 }, { "epoch": 0.9507886648427737, "grad_norm": 0.2379709929227829, "learning_rate": 6.287462716124224e-06, "loss": 0.0075, "step": 56300 }, { "epoch": 0.9509575438240957, "grad_norm": 0.3056010603904724, "learning_rate": 6.286038604748619e-06, "loss": 0.0099, "step": 56310 }, { "epoch": 0.9511264228054176, "grad_norm": 0.2536092698574066, "learning_rate": 6.284614381645134e-06, "loss": 0.0108, "step": 56320 }, { "epoch": 0.9512953017867396, "grad_norm": 0.23262281715869904, "learning_rate": 6.283190046937496e-06, "loss": 0.0116, "step": 56330 }, { "epoch": 0.9514641807680616, "grad_norm": 0.22586499154567719, "learning_rate": 6.281765600749452e-06, "loss": 0.0101, "step": 56340 }, { "epoch": 0.9516330597493836, "grad_norm": 0.27999740839004517, "learning_rate": 6.280341043204753e-06, "loss": 0.0146, "step": 56350 }, { "epoch": 0.9518019387307056, "grad_norm": 0.10353609919548035, "learning_rate": 6.278916374427162e-06, "loss": 0.0084, "step": 56360 }, { "epoch": 0.9519708177120275, "grad_norm": 0.24483078718185425, "learning_rate": 6.277491594540451e-06, "loss": 0.0106, "step": 56370 }, { "epoch": 0.9521396966933495, "grad_norm": 0.5984326004981995, "learning_rate": 6.2760667036684e-06, "loss": 0.0151, "step": 56380 }, { "epoch": 0.9523085756746715, "grad_norm": 0.46321332454681396, "learning_rate": 6.2746417019348e-06, "loss": 0.0136, "step": 56390 }, { "epoch": 0.9524774546559935, "grad_norm": 0.3285670280456543, "learning_rate": 6.2732165894634535e-06, "loss": 0.0094, "step": 56400 }, { "epoch": 0.9526463336373155, "grad_norm": 0.38553574681282043, "learning_rate": 6.271791366378168e-06, "loss": 0.011, "step": 56410 }, { "epoch": 0.9528152126186374, "grad_norm": 0.39605700969696045, "learning_rate": 6.270366032802766e-06, "loss": 0.0151, "step": 56420 }, { "epoch": 0.9529840915999594, "grad_norm": 0.31461477279663086, "learning_rate": 6.268940588861076e-06, "loss": 0.0113, "step": 56430 }, { "epoch": 0.9531529705812815, "grad_norm": 0.4126989543437958, "learning_rate": 6.2675150346769375e-06, "loss": 0.008, "step": 56440 }, { "epoch": 0.9533218495626035, "grad_norm": 0.1526929885149002, "learning_rate": 6.266089370374199e-06, "loss": 0.0082, "step": 56450 }, { "epoch": 0.9534907285439255, "grad_norm": 0.24183271825313568, "learning_rate": 6.264663596076717e-06, "loss": 0.0098, "step": 56460 }, { "epoch": 0.9536596075252474, "grad_norm": 0.26377633213996887, "learning_rate": 6.263237711908361e-06, "loss": 0.0071, "step": 56470 }, { "epoch": 0.9538284865065694, "grad_norm": 0.3295556306838989, "learning_rate": 6.261811717993007e-06, "loss": 0.0074, "step": 56480 }, { "epoch": 0.9539973654878914, "grad_norm": 0.29446959495544434, "learning_rate": 6.260385614454545e-06, "loss": 0.0076, "step": 56490 }, { "epoch": 0.9541662444692134, "grad_norm": 0.3164435923099518, "learning_rate": 6.258959401416867e-06, "loss": 0.0106, "step": 56500 }, { "epoch": 0.9543351234505354, "grad_norm": 0.4900718331336975, "learning_rate": 6.257533079003884e-06, "loss": 0.0124, "step": 56510 }, { "epoch": 0.9545040024318573, "grad_norm": 0.5154032707214355, "learning_rate": 6.256106647339506e-06, "loss": 0.0133, "step": 56520 }, { "epoch": 0.9546728814131793, "grad_norm": 0.2788739502429962, "learning_rate": 6.254680106547661e-06, "loss": 0.0127, "step": 56530 }, { "epoch": 0.9548417603945013, "grad_norm": 0.20623356103897095, "learning_rate": 6.253253456752283e-06, "loss": 0.0139, "step": 56540 }, { "epoch": 0.9550106393758233, "grad_norm": 0.18094845116138458, "learning_rate": 6.251826698077316e-06, "loss": 0.0091, "step": 56550 }, { "epoch": 0.9551795183571453, "grad_norm": 0.13069425523281097, "learning_rate": 6.250399830646713e-06, "loss": 0.0089, "step": 56560 }, { "epoch": 0.9553483973384672, "grad_norm": 0.21645432710647583, "learning_rate": 6.2489728545844364e-06, "loss": 0.0101, "step": 56570 }, { "epoch": 0.9555172763197892, "grad_norm": 0.3054813742637634, "learning_rate": 6.247545770014457e-06, "loss": 0.0138, "step": 56580 }, { "epoch": 0.9556861553011112, "grad_norm": 0.4338068664073944, "learning_rate": 6.24611857706076e-06, "loss": 0.0113, "step": 56590 }, { "epoch": 0.9558550342824332, "grad_norm": 0.20640714466571808, "learning_rate": 6.244691275847334e-06, "loss": 0.0089, "step": 56600 }, { "epoch": 0.9560239132637552, "grad_norm": 0.38481777906417847, "learning_rate": 6.243263866498181e-06, "loss": 0.0077, "step": 56610 }, { "epoch": 0.9561927922450771, "grad_norm": 0.4236961305141449, "learning_rate": 6.241836349137308e-06, "loss": 0.0099, "step": 56620 }, { "epoch": 0.9563616712263991, "grad_norm": 0.32704630494117737, "learning_rate": 6.2404087238887374e-06, "loss": 0.0091, "step": 56630 }, { "epoch": 0.9565305502077212, "grad_norm": 0.42838454246520996, "learning_rate": 6.238980990876494e-06, "loss": 0.0091, "step": 56640 }, { "epoch": 0.9566994291890432, "grad_norm": 0.25704410672187805, "learning_rate": 6.23755315022462e-06, "loss": 0.0112, "step": 56650 }, { "epoch": 0.9568683081703652, "grad_norm": 0.2387341558933258, "learning_rate": 6.23612520205716e-06, "loss": 0.0106, "step": 56660 }, { "epoch": 0.9570371871516871, "grad_norm": 0.30570533871650696, "learning_rate": 6.234697146498173e-06, "loss": 0.0129, "step": 56670 }, { "epoch": 0.9572060661330091, "grad_norm": 0.5054497122764587, "learning_rate": 6.233268983671722e-06, "loss": 0.0075, "step": 56680 }, { "epoch": 0.9573749451143311, "grad_norm": 0.35667580366134644, "learning_rate": 6.2318407137018845e-06, "loss": 0.0109, "step": 56690 }, { "epoch": 0.9575438240956531, "grad_norm": 0.48360562324523926, "learning_rate": 6.230412336712742e-06, "loss": 0.0128, "step": 56700 }, { "epoch": 0.9577127030769751, "grad_norm": 0.3576279282569885, "learning_rate": 6.2289838528283934e-06, "loss": 0.0118, "step": 56710 }, { "epoch": 0.957881582058297, "grad_norm": 0.3906870186328888, "learning_rate": 6.227555262172937e-06, "loss": 0.0157, "step": 56720 }, { "epoch": 0.958050461039619, "grad_norm": 0.3843008279800415, "learning_rate": 6.22612656487049e-06, "loss": 0.0119, "step": 56730 }, { "epoch": 0.958219340020941, "grad_norm": 0.3184179961681366, "learning_rate": 6.2246977610451694e-06, "loss": 0.0096, "step": 56740 }, { "epoch": 0.958388219002263, "grad_norm": 0.1757996827363968, "learning_rate": 6.223268850821109e-06, "loss": 0.0102, "step": 56750 }, { "epoch": 0.958557097983585, "grad_norm": 0.19182370603084564, "learning_rate": 6.221839834322447e-06, "loss": 0.0092, "step": 56760 }, { "epoch": 0.9587259769649069, "grad_norm": 0.48620790243148804, "learning_rate": 6.220410711673337e-06, "loss": 0.0122, "step": 56770 }, { "epoch": 0.9588948559462289, "grad_norm": 0.5213927626609802, "learning_rate": 6.218981482997932e-06, "loss": 0.0108, "step": 56780 }, { "epoch": 0.9590637349275509, "grad_norm": 0.37848785519599915, "learning_rate": 6.217552148420404e-06, "loss": 0.0073, "step": 56790 }, { "epoch": 0.9592326139088729, "grad_norm": 0.32196468114852905, "learning_rate": 6.216122708064929e-06, "loss": 0.0107, "step": 56800 }, { "epoch": 0.9594014928901949, "grad_norm": 0.46636760234832764, "learning_rate": 6.214693162055693e-06, "loss": 0.0099, "step": 56810 }, { "epoch": 0.9595703718715168, "grad_norm": 0.3101274073123932, "learning_rate": 6.21326351051689e-06, "loss": 0.0088, "step": 56820 }, { "epoch": 0.9597392508528388, "grad_norm": 0.2902311086654663, "learning_rate": 6.211833753572728e-06, "loss": 0.0078, "step": 56830 }, { "epoch": 0.9599081298341609, "grad_norm": 0.28697583079338074, "learning_rate": 6.210403891347419e-06, "loss": 0.0168, "step": 56840 }, { "epoch": 0.9600770088154829, "grad_norm": 0.33082500100135803, "learning_rate": 6.2089739239651844e-06, "loss": 0.008, "step": 56850 }, { "epoch": 0.9602458877968049, "grad_norm": 0.23391783237457275, "learning_rate": 6.20754385155026e-06, "loss": 0.0112, "step": 56860 }, { "epoch": 0.9604147667781268, "grad_norm": 0.5236848592758179, "learning_rate": 6.206113674226882e-06, "loss": 0.0107, "step": 56870 }, { "epoch": 0.9605836457594488, "grad_norm": 0.30626797676086426, "learning_rate": 6.204683392119305e-06, "loss": 0.0091, "step": 56880 }, { "epoch": 0.9607525247407708, "grad_norm": 0.578819215297699, "learning_rate": 6.2032530053517856e-06, "loss": 0.0132, "step": 56890 }, { "epoch": 0.9609214037220928, "grad_norm": 0.12884792685508728, "learning_rate": 6.201822514048596e-06, "loss": 0.0102, "step": 56900 }, { "epoch": 0.9610902827034147, "grad_norm": 0.4780179262161255, "learning_rate": 6.2003919183340085e-06, "loss": 0.0152, "step": 56910 }, { "epoch": 0.9612591616847367, "grad_norm": 0.2929025888442993, "learning_rate": 6.198961218332315e-06, "loss": 0.0117, "step": 56920 }, { "epoch": 0.9614280406660587, "grad_norm": 0.3480640649795532, "learning_rate": 6.197530414167807e-06, "loss": 0.0052, "step": 56930 }, { "epoch": 0.9615969196473807, "grad_norm": 0.3992000222206116, "learning_rate": 6.196099505964791e-06, "loss": 0.0145, "step": 56940 }, { "epoch": 0.9617657986287027, "grad_norm": 0.44348588585853577, "learning_rate": 6.194668493847581e-06, "loss": 0.0105, "step": 56950 }, { "epoch": 0.9619346776100246, "grad_norm": 0.29270613193511963, "learning_rate": 6.193237377940501e-06, "loss": 0.011, "step": 56960 }, { "epoch": 0.9621035565913466, "grad_norm": 0.28019392490386963, "learning_rate": 6.19180615836788e-06, "loss": 0.0062, "step": 56970 }, { "epoch": 0.9622724355726686, "grad_norm": 0.13561663031578064, "learning_rate": 6.190374835254062e-06, "loss": 0.0082, "step": 56980 }, { "epoch": 0.9624413145539906, "grad_norm": 0.15597108006477356, "learning_rate": 6.188943408723393e-06, "loss": 0.0082, "step": 56990 }, { "epoch": 0.9626101935353126, "grad_norm": 0.2672083377838135, "learning_rate": 6.187511878900234e-06, "loss": 0.0075, "step": 57000 }, { "epoch": 0.9627790725166345, "grad_norm": 0.35897910594940186, "learning_rate": 6.186080245908954e-06, "loss": 0.0088, "step": 57010 }, { "epoch": 0.9629479514979565, "grad_norm": 0.1837131381034851, "learning_rate": 6.184648509873929e-06, "loss": 0.0081, "step": 57020 }, { "epoch": 0.9631168304792785, "grad_norm": 0.35037434101104736, "learning_rate": 6.183216670919544e-06, "loss": 0.0118, "step": 57030 }, { "epoch": 0.9632857094606005, "grad_norm": 0.28494101762771606, "learning_rate": 6.181784729170194e-06, "loss": 0.0127, "step": 57040 }, { "epoch": 0.9634545884419226, "grad_norm": 0.22945424914360046, "learning_rate": 6.1803526847502815e-06, "loss": 0.0083, "step": 57050 }, { "epoch": 0.9636234674232445, "grad_norm": 0.15583331882953644, "learning_rate": 6.178920537784221e-06, "loss": 0.0155, "step": 57060 }, { "epoch": 0.9637923464045665, "grad_norm": 0.6591554284095764, "learning_rate": 6.177488288396431e-06, "loss": 0.0148, "step": 57070 }, { "epoch": 0.9639612253858885, "grad_norm": 0.155226469039917, "learning_rate": 6.176055936711347e-06, "loss": 0.0091, "step": 57080 }, { "epoch": 0.9641301043672105, "grad_norm": 0.37175843119621277, "learning_rate": 6.174623482853404e-06, "loss": 0.0102, "step": 57090 }, { "epoch": 0.9642989833485325, "grad_norm": 0.15022368729114532, "learning_rate": 6.1731909269470505e-06, "loss": 0.0075, "step": 57100 }, { "epoch": 0.9644678623298544, "grad_norm": 0.34422194957733154, "learning_rate": 6.171758269116743e-06, "loss": 0.0122, "step": 57110 }, { "epoch": 0.9646367413111764, "grad_norm": 0.11934704333543777, "learning_rate": 6.17032550948695e-06, "loss": 0.0049, "step": 57120 }, { "epoch": 0.9648056202924984, "grad_norm": 0.24619624018669128, "learning_rate": 6.168892648182143e-06, "loss": 0.0148, "step": 57130 }, { "epoch": 0.9649744992738204, "grad_norm": 0.3145579397678375, "learning_rate": 6.167459685326809e-06, "loss": 0.0082, "step": 57140 }, { "epoch": 0.9651433782551424, "grad_norm": 0.2758459150791168, "learning_rate": 6.166026621045436e-06, "loss": 0.0082, "step": 57150 }, { "epoch": 0.9653122572364643, "grad_norm": 0.37184616923332214, "learning_rate": 6.16459345546253e-06, "loss": 0.0084, "step": 57160 }, { "epoch": 0.9654811362177863, "grad_norm": 0.1414608657360077, "learning_rate": 6.163160188702596e-06, "loss": 0.0073, "step": 57170 }, { "epoch": 0.9656500151991083, "grad_norm": 0.3868992328643799, "learning_rate": 6.161726820890156e-06, "loss": 0.0095, "step": 57180 }, { "epoch": 0.9658188941804303, "grad_norm": 0.40127089619636536, "learning_rate": 6.160293352149736e-06, "loss": 0.0109, "step": 57190 }, { "epoch": 0.9659877731617523, "grad_norm": 0.41082650423049927, "learning_rate": 6.158859782605873e-06, "loss": 0.0115, "step": 57200 }, { "epoch": 0.9661566521430742, "grad_norm": 0.27761802077293396, "learning_rate": 6.157426112383112e-06, "loss": 0.0085, "step": 57210 }, { "epoch": 0.9663255311243962, "grad_norm": 0.3455444872379303, "learning_rate": 6.155992341606006e-06, "loss": 0.0145, "step": 57220 }, { "epoch": 0.9664944101057182, "grad_norm": 0.13662153482437134, "learning_rate": 6.1545584703991175e-06, "loss": 0.0084, "step": 57230 }, { "epoch": 0.9666632890870402, "grad_norm": 0.19164492189884186, "learning_rate": 6.153124498887019e-06, "loss": 0.0098, "step": 57240 }, { "epoch": 0.9668321680683623, "grad_norm": 0.28583601117134094, "learning_rate": 6.151690427194289e-06, "loss": 0.0077, "step": 57250 }, { "epoch": 0.9670010470496841, "grad_norm": 0.23643210530281067, "learning_rate": 6.150256255445517e-06, "loss": 0.0086, "step": 57260 }, { "epoch": 0.9671699260310062, "grad_norm": 0.25160306692123413, "learning_rate": 6.1488219837653e-06, "loss": 0.0145, "step": 57270 }, { "epoch": 0.9673388050123282, "grad_norm": 0.1876029223203659, "learning_rate": 6.1473876122782446e-06, "loss": 0.0064, "step": 57280 }, { "epoch": 0.9675076839936502, "grad_norm": 0.2472687065601349, "learning_rate": 6.145953141108966e-06, "loss": 0.0085, "step": 57290 }, { "epoch": 0.9676765629749722, "grad_norm": 0.44508159160614014, "learning_rate": 6.144518570382084e-06, "loss": 0.011, "step": 57300 }, { "epoch": 0.9678454419562941, "grad_norm": 0.27547818422317505, "learning_rate": 6.1430839002222345e-06, "loss": 0.0073, "step": 57310 }, { "epoch": 0.9680143209376161, "grad_norm": 0.3728075623512268, "learning_rate": 6.141649130754056e-06, "loss": 0.0102, "step": 57320 }, { "epoch": 0.9681831999189381, "grad_norm": 0.47020596265792847, "learning_rate": 6.1402142621022e-06, "loss": 0.0161, "step": 57330 }, { "epoch": 0.9683520789002601, "grad_norm": 0.2908536493778229, "learning_rate": 6.138779294391322e-06, "loss": 0.0077, "step": 57340 }, { "epoch": 0.9685209578815821, "grad_norm": 0.05039355158805847, "learning_rate": 6.13734422774609e-06, "loss": 0.0164, "step": 57350 }, { "epoch": 0.968689836862904, "grad_norm": 0.39348769187927246, "learning_rate": 6.135909062291177e-06, "loss": 0.013, "step": 57360 }, { "epoch": 0.968858715844226, "grad_norm": 0.33752739429473877, "learning_rate": 6.134473798151268e-06, "loss": 0.009, "step": 57370 }, { "epoch": 0.969027594825548, "grad_norm": 0.7928234934806824, "learning_rate": 6.133038435451056e-06, "loss": 0.0136, "step": 57380 }, { "epoch": 0.96919647380687, "grad_norm": 0.38997358083724976, "learning_rate": 6.131602974315242e-06, "loss": 0.0143, "step": 57390 }, { "epoch": 0.969365352788192, "grad_norm": 0.19020356237888336, "learning_rate": 6.130167414868533e-06, "loss": 0.0127, "step": 57400 }, { "epoch": 0.9695342317695139, "grad_norm": 0.18255490064620972, "learning_rate": 6.12873175723565e-06, "loss": 0.0086, "step": 57410 }, { "epoch": 0.9697031107508359, "grad_norm": 0.23928113281726837, "learning_rate": 6.127296001541317e-06, "loss": 0.0073, "step": 57420 }, { "epoch": 0.9698719897321579, "grad_norm": 0.3173568844795227, "learning_rate": 6.125860147910268e-06, "loss": 0.0107, "step": 57430 }, { "epoch": 0.9700408687134799, "grad_norm": 0.27679988741874695, "learning_rate": 6.124424196467249e-06, "loss": 0.0111, "step": 57440 }, { "epoch": 0.970209747694802, "grad_norm": 0.31680014729499817, "learning_rate": 6.122988147337012e-06, "loss": 0.011, "step": 57450 }, { "epoch": 0.9703786266761238, "grad_norm": 0.13589827716350555, "learning_rate": 6.121552000644316e-06, "loss": 0.0075, "step": 57460 }, { "epoch": 0.9705475056574459, "grad_norm": 0.29614725708961487, "learning_rate": 6.120115756513931e-06, "loss": 0.0074, "step": 57470 }, { "epoch": 0.9707163846387679, "grad_norm": 0.300493061542511, "learning_rate": 6.118679415070633e-06, "loss": 0.0118, "step": 57480 }, { "epoch": 0.9708852636200899, "grad_norm": 0.47194644808769226, "learning_rate": 6.117242976439208e-06, "loss": 0.0143, "step": 57490 }, { "epoch": 0.9710541426014119, "grad_norm": 0.15467792749404907, "learning_rate": 6.115806440744452e-06, "loss": 0.0119, "step": 57500 }, { "epoch": 0.9712230215827338, "grad_norm": 0.20230484008789062, "learning_rate": 6.114369808111167e-06, "loss": 0.0134, "step": 57510 }, { "epoch": 0.9713919005640558, "grad_norm": 0.34236419200897217, "learning_rate": 6.112933078664163e-06, "loss": 0.0086, "step": 57520 }, { "epoch": 0.9715607795453778, "grad_norm": 0.26631680130958557, "learning_rate": 6.111496252528261e-06, "loss": 0.0073, "step": 57530 }, { "epoch": 0.9717296585266998, "grad_norm": 0.2837064266204834, "learning_rate": 6.110059329828286e-06, "loss": 0.0114, "step": 57540 }, { "epoch": 0.9718985375080218, "grad_norm": 0.4496263563632965, "learning_rate": 6.108622310689078e-06, "loss": 0.0127, "step": 57550 }, { "epoch": 0.9720674164893437, "grad_norm": 0.3090165853500366, "learning_rate": 6.10718519523548e-06, "loss": 0.009, "step": 57560 }, { "epoch": 0.9722362954706657, "grad_norm": 0.1682947874069214, "learning_rate": 6.105747983592346e-06, "loss": 0.016, "step": 57570 }, { "epoch": 0.9724051744519877, "grad_norm": 0.14652305841445923, "learning_rate": 6.104310675884535e-06, "loss": 0.0089, "step": 57580 }, { "epoch": 0.9725740534333097, "grad_norm": 0.360015869140625, "learning_rate": 6.10287327223692e-06, "loss": 0.0133, "step": 57590 }, { "epoch": 0.9727429324146317, "grad_norm": 0.1893434375524521, "learning_rate": 6.101435772774376e-06, "loss": 0.0085, "step": 57600 }, { "epoch": 0.9729118113959536, "grad_norm": 0.3895432949066162, "learning_rate": 6.099998177621793e-06, "loss": 0.0117, "step": 57610 }, { "epoch": 0.9730806903772756, "grad_norm": 0.25461843609809875, "learning_rate": 6.098560486904062e-06, "loss": 0.0075, "step": 57620 }, { "epoch": 0.9732495693585976, "grad_norm": 0.33953237533569336, "learning_rate": 6.09712270074609e-06, "loss": 0.0118, "step": 57630 }, { "epoch": 0.9734184483399196, "grad_norm": 0.27405786514282227, "learning_rate": 6.095684819272784e-06, "loss": 0.0103, "step": 57640 }, { "epoch": 0.9735873273212416, "grad_norm": 0.37739503383636475, "learning_rate": 6.094246842609066e-06, "loss": 0.008, "step": 57650 }, { "epoch": 0.9737562063025635, "grad_norm": 0.3451972007751465, "learning_rate": 6.092808770879864e-06, "loss": 0.0085, "step": 57660 }, { "epoch": 0.9739250852838855, "grad_norm": 0.39221811294555664, "learning_rate": 6.091370604210114e-06, "loss": 0.0072, "step": 57670 }, { "epoch": 0.9740939642652076, "grad_norm": 0.3603988587856293, "learning_rate": 6.089932342724759e-06, "loss": 0.0083, "step": 57680 }, { "epoch": 0.9742628432465296, "grad_norm": 0.35203009843826294, "learning_rate": 6.088493986548754e-06, "loss": 0.0057, "step": 57690 }, { "epoch": 0.9744317222278516, "grad_norm": 0.19503600895404816, "learning_rate": 6.08705553580706e-06, "loss": 0.0092, "step": 57700 }, { "epoch": 0.9746006012091735, "grad_norm": 0.28253039717674255, "learning_rate": 6.085616990624642e-06, "loss": 0.0154, "step": 57710 }, { "epoch": 0.9747694801904955, "grad_norm": 0.15013544261455536, "learning_rate": 6.084178351126481e-06, "loss": 0.0094, "step": 57720 }, { "epoch": 0.9749383591718175, "grad_norm": 0.30137765407562256, "learning_rate": 6.0827396174375605e-06, "loss": 0.0099, "step": 57730 }, { "epoch": 0.9751072381531395, "grad_norm": 0.27562764286994934, "learning_rate": 6.081300789682878e-06, "loss": 0.0099, "step": 57740 }, { "epoch": 0.9752761171344615, "grad_norm": 0.29547181725502014, "learning_rate": 6.07986186798743e-06, "loss": 0.0088, "step": 57750 }, { "epoch": 0.9754449961157834, "grad_norm": 0.4075182378292084, "learning_rate": 6.07842285247623e-06, "loss": 0.0118, "step": 57760 }, { "epoch": 0.9756138750971054, "grad_norm": 0.4812328815460205, "learning_rate": 6.076983743274295e-06, "loss": 0.0123, "step": 57770 }, { "epoch": 0.9757827540784274, "grad_norm": 0.2370709627866745, "learning_rate": 6.07554454050665e-06, "loss": 0.0097, "step": 57780 }, { "epoch": 0.9759516330597494, "grad_norm": 0.19963395595550537, "learning_rate": 6.074105244298333e-06, "loss": 0.0105, "step": 57790 }, { "epoch": 0.9761205120410714, "grad_norm": 0.3466562330722809, "learning_rate": 6.072665854774384e-06, "loss": 0.0151, "step": 57800 }, { "epoch": 0.9762893910223933, "grad_norm": 0.2941662669181824, "learning_rate": 6.071226372059853e-06, "loss": 0.0099, "step": 57810 }, { "epoch": 0.9764582700037153, "grad_norm": 0.6190981268882751, "learning_rate": 6.069786796279801e-06, "loss": 0.0078, "step": 57820 }, { "epoch": 0.9766271489850373, "grad_norm": 0.20328377187252045, "learning_rate": 6.068347127559292e-06, "loss": 0.0114, "step": 57830 }, { "epoch": 0.9767960279663593, "grad_norm": 0.1953510046005249, "learning_rate": 6.066907366023402e-06, "loss": 0.0146, "step": 57840 }, { "epoch": 0.9769649069476813, "grad_norm": 0.1487017273902893, "learning_rate": 6.065467511797216e-06, "loss": 0.0073, "step": 57850 }, { "epoch": 0.9771337859290032, "grad_norm": 0.23377306759357452, "learning_rate": 6.064027565005823e-06, "loss": 0.0084, "step": 57860 }, { "epoch": 0.9773026649103252, "grad_norm": 0.3543882966041565, "learning_rate": 6.062587525774322e-06, "loss": 0.008, "step": 57870 }, { "epoch": 0.9774715438916473, "grad_norm": 0.3363850712776184, "learning_rate": 6.061147394227822e-06, "loss": 0.0093, "step": 57880 }, { "epoch": 0.9776404228729693, "grad_norm": 0.2126438319683075, "learning_rate": 6.059707170491434e-06, "loss": 0.0087, "step": 57890 }, { "epoch": 0.9778093018542913, "grad_norm": 0.20419412851333618, "learning_rate": 6.058266854690286e-06, "loss": 0.0075, "step": 57900 }, { "epoch": 0.9779781808356132, "grad_norm": 0.4551158547401428, "learning_rate": 6.056826446949504e-06, "loss": 0.0112, "step": 57910 }, { "epoch": 0.9781470598169352, "grad_norm": 0.49143701791763306, "learning_rate": 6.055385947394233e-06, "loss": 0.0079, "step": 57920 }, { "epoch": 0.9783159387982572, "grad_norm": 0.48149532079696655, "learning_rate": 6.053945356149615e-06, "loss": 0.0095, "step": 57930 }, { "epoch": 0.9784848177795792, "grad_norm": 0.4930329918861389, "learning_rate": 6.052504673340809e-06, "loss": 0.008, "step": 57940 }, { "epoch": 0.9786536967609012, "grad_norm": 0.28520941734313965, "learning_rate": 6.0510638990929746e-06, "loss": 0.0121, "step": 57950 }, { "epoch": 0.9788225757422231, "grad_norm": 0.3763655126094818, "learning_rate": 6.049623033531283e-06, "loss": 0.0061, "step": 57960 }, { "epoch": 0.9789914547235451, "grad_norm": 0.2744694948196411, "learning_rate": 6.048182076780917e-06, "loss": 0.0089, "step": 57970 }, { "epoch": 0.9791603337048671, "grad_norm": 0.3576395511627197, "learning_rate": 6.0467410289670594e-06, "loss": 0.0131, "step": 57980 }, { "epoch": 0.9793292126861891, "grad_norm": 0.23413939774036407, "learning_rate": 6.045299890214905e-06, "loss": 0.0095, "step": 57990 }, { "epoch": 0.979498091667511, "grad_norm": 1.0059250593185425, "learning_rate": 6.043858660649658e-06, "loss": 0.0098, "step": 58000 }, { "epoch": 0.979666970648833, "grad_norm": 0.5015471577644348, "learning_rate": 6.042417340396529e-06, "loss": 0.0142, "step": 58010 }, { "epoch": 0.979835849630155, "grad_norm": 0.25093045830726624, "learning_rate": 6.040975929580736e-06, "loss": 0.009, "step": 58020 }, { "epoch": 0.980004728611477, "grad_norm": 0.23211102187633514, "learning_rate": 6.039534428327504e-06, "loss": 0.0079, "step": 58030 }, { "epoch": 0.980173607592799, "grad_norm": 0.19916315376758575, "learning_rate": 6.038092836762069e-06, "loss": 0.0099, "step": 58040 }, { "epoch": 0.9803424865741209, "grad_norm": 0.1486818641424179, "learning_rate": 6.0366511550096715e-06, "loss": 0.007, "step": 58050 }, { "epoch": 0.9805113655554429, "grad_norm": 0.26882126927375793, "learning_rate": 6.035209383195563e-06, "loss": 0.0123, "step": 58060 }, { "epoch": 0.980680244536765, "grad_norm": 0.2568555474281311, "learning_rate": 6.033767521444999e-06, "loss": 0.0077, "step": 58070 }, { "epoch": 0.980849123518087, "grad_norm": 0.4664008915424347, "learning_rate": 6.032325569883245e-06, "loss": 0.0148, "step": 58080 }, { "epoch": 0.981018002499409, "grad_norm": 0.242104172706604, "learning_rate": 6.030883528635575e-06, "loss": 0.0068, "step": 58090 }, { "epoch": 0.9811868814807309, "grad_norm": 0.13327370584011078, "learning_rate": 6.029441397827273e-06, "loss": 0.01, "step": 58100 }, { "epoch": 0.9813557604620529, "grad_norm": 0.16949190199375153, "learning_rate": 6.027999177583621e-06, "loss": 0.0111, "step": 58110 }, { "epoch": 0.9815246394433749, "grad_norm": 0.7180682420730591, "learning_rate": 6.026556868029922e-06, "loss": 0.0128, "step": 58120 }, { "epoch": 0.9816935184246969, "grad_norm": 0.16329631209373474, "learning_rate": 6.025114469291478e-06, "loss": 0.0108, "step": 58130 }, { "epoch": 0.9818623974060189, "grad_norm": 0.31726503372192383, "learning_rate": 6.0236719814935995e-06, "loss": 0.0102, "step": 58140 }, { "epoch": 0.9820312763873408, "grad_norm": 0.265509694814682, "learning_rate": 6.022229404761607e-06, "loss": 0.0113, "step": 58150 }, { "epoch": 0.9822001553686628, "grad_norm": 0.44341593980789185, "learning_rate": 6.020786739220829e-06, "loss": 0.0145, "step": 58160 }, { "epoch": 0.9823690343499848, "grad_norm": 0.3429701328277588, "learning_rate": 6.019343984996602e-06, "loss": 0.0118, "step": 58170 }, { "epoch": 0.9825379133313068, "grad_norm": 0.21763400733470917, "learning_rate": 6.017901142214266e-06, "loss": 0.0134, "step": 58180 }, { "epoch": 0.9827067923126288, "grad_norm": 0.2554681599140167, "learning_rate": 6.016458210999174e-06, "loss": 0.0104, "step": 58190 }, { "epoch": 0.9828756712939507, "grad_norm": 0.5087551474571228, "learning_rate": 6.015015191476683e-06, "loss": 0.0145, "step": 58200 }, { "epoch": 0.9830445502752727, "grad_norm": 0.23113173246383667, "learning_rate": 6.013572083772159e-06, "loss": 0.0077, "step": 58210 }, { "epoch": 0.9832134292565947, "grad_norm": 0.3728889524936676, "learning_rate": 6.012128888010975e-06, "loss": 0.0081, "step": 58220 }, { "epoch": 0.9833823082379167, "grad_norm": 0.3415399491786957, "learning_rate": 6.0106856043185155e-06, "loss": 0.0082, "step": 58230 }, { "epoch": 0.9835511872192387, "grad_norm": 0.3326683044433594, "learning_rate": 6.009242232820166e-06, "loss": 0.018, "step": 58240 }, { "epoch": 0.9837200662005606, "grad_norm": 0.09744593501091003, "learning_rate": 6.007798773641324e-06, "loss": 0.0094, "step": 58250 }, { "epoch": 0.9838889451818826, "grad_norm": 0.09679453819990158, "learning_rate": 6.006355226907396e-06, "loss": 0.0136, "step": 58260 }, { "epoch": 0.9840578241632046, "grad_norm": 0.555587112903595, "learning_rate": 6.004911592743789e-06, "loss": 0.0094, "step": 58270 }, { "epoch": 0.9842267031445266, "grad_norm": 0.2450568825006485, "learning_rate": 6.003467871275926e-06, "loss": 0.0104, "step": 58280 }, { "epoch": 0.9843955821258487, "grad_norm": 0.22718805074691772, "learning_rate": 6.002024062629235e-06, "loss": 0.0105, "step": 58290 }, { "epoch": 0.9845644611071706, "grad_norm": 0.4886592626571655, "learning_rate": 6.000580166929148e-06, "loss": 0.0113, "step": 58300 }, { "epoch": 0.9847333400884926, "grad_norm": 0.15705513954162598, "learning_rate": 5.999136184301107e-06, "loss": 0.009, "step": 58310 }, { "epoch": 0.9849022190698146, "grad_norm": 0.11131570488214493, "learning_rate": 5.997692114870564e-06, "loss": 0.0081, "step": 58320 }, { "epoch": 0.9850710980511366, "grad_norm": 0.14734812080860138, "learning_rate": 5.9962479587629725e-06, "loss": 0.0089, "step": 58330 }, { "epoch": 0.9852399770324586, "grad_norm": 0.13053928315639496, "learning_rate": 5.994803716103801e-06, "loss": 0.0087, "step": 58340 }, { "epoch": 0.9854088560137805, "grad_norm": 0.22608833014965057, "learning_rate": 5.993359387018521e-06, "loss": 0.0155, "step": 58350 }, { "epoch": 0.9855777349951025, "grad_norm": 0.26973670721054077, "learning_rate": 5.9919149716326095e-06, "loss": 0.0098, "step": 58360 }, { "epoch": 0.9857466139764245, "grad_norm": 0.25673818588256836, "learning_rate": 5.990470470071558e-06, "loss": 0.0143, "step": 58370 }, { "epoch": 0.9859154929577465, "grad_norm": 0.20347300171852112, "learning_rate": 5.989025882460858e-06, "loss": 0.0077, "step": 58380 }, { "epoch": 0.9860843719390685, "grad_norm": 0.5928995609283447, "learning_rate": 5.9875812089260125e-06, "loss": 0.0084, "step": 58390 }, { "epoch": 0.9862532509203904, "grad_norm": 0.2782539129257202, "learning_rate": 5.9861364495925315e-06, "loss": 0.0087, "step": 58400 }, { "epoch": 0.9864221299017124, "grad_norm": 0.15527214109897614, "learning_rate": 5.9846916045859334e-06, "loss": 0.0097, "step": 58410 }, { "epoch": 0.9865910088830344, "grad_norm": 0.3908846974372864, "learning_rate": 5.983246674031741e-06, "loss": 0.0149, "step": 58420 }, { "epoch": 0.9867598878643564, "grad_norm": 0.22863423824310303, "learning_rate": 5.981801658055487e-06, "loss": 0.0118, "step": 58430 }, { "epoch": 0.9869287668456784, "grad_norm": 0.24884730577468872, "learning_rate": 5.980356556782709e-06, "loss": 0.0139, "step": 58440 }, { "epoch": 0.9870976458270003, "grad_norm": 0.5699552297592163, "learning_rate": 5.978911370338958e-06, "loss": 0.0196, "step": 58450 }, { "epoch": 0.9872665248083223, "grad_norm": 0.10880444198846817, "learning_rate": 5.977466098849784e-06, "loss": 0.0095, "step": 58460 }, { "epoch": 0.9874354037896443, "grad_norm": 0.3632148504257202, "learning_rate": 5.976020742440751e-06, "loss": 0.0157, "step": 58470 }, { "epoch": 0.9876042827709663, "grad_norm": 0.15232746303081512, "learning_rate": 5.974575301237429e-06, "loss": 0.0116, "step": 58480 }, { "epoch": 0.9877731617522884, "grad_norm": 0.22580760717391968, "learning_rate": 5.973129775365392e-06, "loss": 0.0099, "step": 58490 }, { "epoch": 0.9879420407336102, "grad_norm": 0.23681116104125977, "learning_rate": 5.971684164950223e-06, "loss": 0.0087, "step": 58500 }, { "epoch": 0.9881109197149323, "grad_norm": 0.642971396446228, "learning_rate": 5.9702384701175165e-06, "loss": 0.0109, "step": 58510 }, { "epoch": 0.9882797986962543, "grad_norm": 0.2329603135585785, "learning_rate": 5.968792690992868e-06, "loss": 0.011, "step": 58520 }, { "epoch": 0.9884486776775763, "grad_norm": 0.35475969314575195, "learning_rate": 5.967346827701886e-06, "loss": 0.013, "step": 58530 }, { "epoch": 0.9886175566588983, "grad_norm": 0.5404853820800781, "learning_rate": 5.965900880370181e-06, "loss": 0.0086, "step": 58540 }, { "epoch": 0.9887864356402202, "grad_norm": 0.3559904098510742, "learning_rate": 5.964454849123376e-06, "loss": 0.0128, "step": 58550 }, { "epoch": 0.9889553146215422, "grad_norm": 0.26439398527145386, "learning_rate": 5.9630087340870945e-06, "loss": 0.0094, "step": 58560 }, { "epoch": 0.9891241936028642, "grad_norm": 0.2155921459197998, "learning_rate": 5.961562535386975e-06, "loss": 0.0111, "step": 58570 }, { "epoch": 0.9892930725841862, "grad_norm": 0.2406449317932129, "learning_rate": 5.960116253148658e-06, "loss": 0.01, "step": 58580 }, { "epoch": 0.9894619515655082, "grad_norm": 0.1420430839061737, "learning_rate": 5.958669887497794e-06, "loss": 0.0088, "step": 58590 }, { "epoch": 0.9896308305468301, "grad_norm": 0.5160329341888428, "learning_rate": 5.9572234385600396e-06, "loss": 0.013, "step": 58600 }, { "epoch": 0.9897997095281521, "grad_norm": 0.07922673225402832, "learning_rate": 5.9557769064610575e-06, "loss": 0.0073, "step": 58610 }, { "epoch": 0.9899685885094741, "grad_norm": 0.73483806848526, "learning_rate": 5.954330291326521e-06, "loss": 0.0154, "step": 58620 }, { "epoch": 0.9901374674907961, "grad_norm": 0.10591046512126923, "learning_rate": 5.952883593282105e-06, "loss": 0.0063, "step": 58630 }, { "epoch": 0.9903063464721181, "grad_norm": 0.3059546947479248, "learning_rate": 5.9514368124535e-06, "loss": 0.0116, "step": 58640 }, { "epoch": 0.99047522545344, "grad_norm": 0.29181963205337524, "learning_rate": 5.9499899489663945e-06, "loss": 0.0071, "step": 58650 }, { "epoch": 0.990644104434762, "grad_norm": 0.133090078830719, "learning_rate": 5.948543002946492e-06, "loss": 0.0099, "step": 58660 }, { "epoch": 0.990812983416084, "grad_norm": 0.30975767970085144, "learning_rate": 5.947095974519496e-06, "loss": 0.0144, "step": 58670 }, { "epoch": 0.990981862397406, "grad_norm": 0.16732031106948853, "learning_rate": 5.945648863811122e-06, "loss": 0.0106, "step": 58680 }, { "epoch": 0.991150741378728, "grad_norm": 0.38107168674468994, "learning_rate": 5.944201670947093e-06, "loss": 0.0077, "step": 58690 }, { "epoch": 0.99131962036005, "grad_norm": 0.5103957653045654, "learning_rate": 5.942754396053136e-06, "loss": 0.0098, "step": 58700 }, { "epoch": 0.991488499341372, "grad_norm": 0.3663896322250366, "learning_rate": 5.941307039254987e-06, "loss": 0.0185, "step": 58710 }, { "epoch": 0.991657378322694, "grad_norm": 0.18198414146900177, "learning_rate": 5.93985960067839e-06, "loss": 0.0102, "step": 58720 }, { "epoch": 0.991826257304016, "grad_norm": 0.16505278646945953, "learning_rate": 5.938412080449094e-06, "loss": 0.0087, "step": 58730 }, { "epoch": 0.991995136285338, "grad_norm": 0.19497013092041016, "learning_rate": 5.9369644786928545e-06, "loss": 0.0126, "step": 58740 }, { "epoch": 0.9921640152666599, "grad_norm": 0.27448222041130066, "learning_rate": 5.935516795535438e-06, "loss": 0.0123, "step": 58750 }, { "epoch": 0.9923328942479819, "grad_norm": 0.19726994633674622, "learning_rate": 5.934069031102615e-06, "loss": 0.0204, "step": 58760 }, { "epoch": 0.9925017732293039, "grad_norm": 0.2923355996608734, "learning_rate": 5.932621185520162e-06, "loss": 0.0095, "step": 58770 }, { "epoch": 0.9926706522106259, "grad_norm": 0.1576002985239029, "learning_rate": 5.9311732589138675e-06, "loss": 0.0072, "step": 58780 }, { "epoch": 0.9928395311919479, "grad_norm": 0.31320905685424805, "learning_rate": 5.9297252514095205e-06, "loss": 0.0092, "step": 58790 }, { "epoch": 0.9930084101732698, "grad_norm": 0.3970954418182373, "learning_rate": 5.928277163132921e-06, "loss": 0.0075, "step": 58800 }, { "epoch": 0.9931772891545918, "grad_norm": 0.1866220384836197, "learning_rate": 5.9268289942098775e-06, "loss": 0.0121, "step": 58810 }, { "epoch": 0.9933461681359138, "grad_norm": 0.3531299829483032, "learning_rate": 5.925380744766203e-06, "loss": 0.013, "step": 58820 }, { "epoch": 0.9935150471172358, "grad_norm": 0.021519817411899567, "learning_rate": 5.923932414927716e-06, "loss": 0.0085, "step": 58830 }, { "epoch": 0.9936839260985578, "grad_norm": 0.5335684418678284, "learning_rate": 5.922484004820245e-06, "loss": 0.011, "step": 58840 }, { "epoch": 0.9938528050798797, "grad_norm": 0.32090556621551514, "learning_rate": 5.921035514569624e-06, "loss": 0.0117, "step": 58850 }, { "epoch": 0.9940216840612017, "grad_norm": 0.22520379722118378, "learning_rate": 5.919586944301694e-06, "loss": 0.0104, "step": 58860 }, { "epoch": 0.9941905630425237, "grad_norm": 0.2968713343143463, "learning_rate": 5.918138294142303e-06, "loss": 0.0115, "step": 58870 }, { "epoch": 0.9943594420238457, "grad_norm": 0.16574032604694366, "learning_rate": 5.916689564217308e-06, "loss": 0.0071, "step": 58880 }, { "epoch": 0.9945283210051677, "grad_norm": 0.21333047747612, "learning_rate": 5.915240754652569e-06, "loss": 0.0092, "step": 58890 }, { "epoch": 0.9946971999864896, "grad_norm": 0.2742922604084015, "learning_rate": 5.913791865573958e-06, "loss": 0.0145, "step": 58900 }, { "epoch": 0.9948660789678117, "grad_norm": 0.3032339811325073, "learning_rate": 5.912342897107346e-06, "loss": 0.0111, "step": 58910 }, { "epoch": 0.9950349579491337, "grad_norm": 0.6927831768989563, "learning_rate": 5.910893849378621e-06, "loss": 0.0109, "step": 58920 }, { "epoch": 0.9952038369304557, "grad_norm": 0.275565505027771, "learning_rate": 5.909444722513669e-06, "loss": 0.0076, "step": 58930 }, { "epoch": 0.9953727159117777, "grad_norm": 0.5728974938392639, "learning_rate": 5.907995516638389e-06, "loss": 0.0095, "step": 58940 }, { "epoch": 0.9955415948930996, "grad_norm": 0.27752721309661865, "learning_rate": 5.906546231878681e-06, "loss": 0.0177, "step": 58950 }, { "epoch": 0.9957104738744216, "grad_norm": 0.24905157089233398, "learning_rate": 5.9050968683604605e-06, "loss": 0.0104, "step": 58960 }, { "epoch": 0.9958793528557436, "grad_norm": 0.2709248960018158, "learning_rate": 5.903647426209641e-06, "loss": 0.0101, "step": 58970 }, { "epoch": 0.9960482318370656, "grad_norm": 0.14543026685714722, "learning_rate": 5.902197905552147e-06, "loss": 0.0074, "step": 58980 }, { "epoch": 0.9962171108183876, "grad_norm": 0.33499211072921753, "learning_rate": 5.900748306513909e-06, "loss": 0.0084, "step": 58990 }, { "epoch": 0.9963859897997095, "grad_norm": 0.27137789130210876, "learning_rate": 5.899298629220868e-06, "loss": 0.0099, "step": 59000 }, { "epoch": 0.9965548687810315, "grad_norm": 0.24000215530395508, "learning_rate": 5.897848873798963e-06, "loss": 0.0085, "step": 59010 }, { "epoch": 0.9967237477623535, "grad_norm": 0.3767193555831909, "learning_rate": 5.89639904037415e-06, "loss": 0.0112, "step": 59020 }, { "epoch": 0.9968926267436755, "grad_norm": 0.27154046297073364, "learning_rate": 5.894949129072384e-06, "loss": 0.0117, "step": 59030 }, { "epoch": 0.9970615057249975, "grad_norm": 0.24505703151226044, "learning_rate": 5.893499140019629e-06, "loss": 0.0092, "step": 59040 }, { "epoch": 0.9972303847063194, "grad_norm": 0.20760583877563477, "learning_rate": 5.89204907334186e-06, "loss": 0.0119, "step": 59050 }, { "epoch": 0.9973992636876414, "grad_norm": 0.6950781345367432, "learning_rate": 5.890598929165053e-06, "loss": 0.0119, "step": 59060 }, { "epoch": 0.9975681426689634, "grad_norm": 0.33936068415641785, "learning_rate": 5.889148707615193e-06, "loss": 0.0103, "step": 59070 }, { "epoch": 0.9977370216502854, "grad_norm": 0.22854849696159363, "learning_rate": 5.887698408818272e-06, "loss": 0.0084, "step": 59080 }, { "epoch": 0.9979059006316074, "grad_norm": 0.321882039308548, "learning_rate": 5.8862480329002895e-06, "loss": 0.0094, "step": 59090 }, { "epoch": 0.9980747796129293, "grad_norm": 0.2181207537651062, "learning_rate": 5.884797579987248e-06, "loss": 0.0137, "step": 59100 }, { "epoch": 0.9982436585942513, "grad_norm": 0.12722937762737274, "learning_rate": 5.883347050205162e-06, "loss": 0.0131, "step": 59110 }, { "epoch": 0.9984125375755734, "grad_norm": 0.30653196573257446, "learning_rate": 5.8818964436800475e-06, "loss": 0.014, "step": 59120 }, { "epoch": 0.9985814165568954, "grad_norm": 0.3061233162879944, "learning_rate": 5.8804457605379324e-06, "loss": 0.0082, "step": 59130 }, { "epoch": 0.9987502955382173, "grad_norm": 0.4360685348510742, "learning_rate": 5.878995000904847e-06, "loss": 0.0104, "step": 59140 }, { "epoch": 0.9989191745195393, "grad_norm": 0.3460616171360016, "learning_rate": 5.87754416490683e-06, "loss": 0.0092, "step": 59150 }, { "epoch": 0.9990880535008613, "grad_norm": 0.31895625591278076, "learning_rate": 5.876093252669924e-06, "loss": 0.0074, "step": 59160 }, { "epoch": 0.9992569324821833, "grad_norm": 0.33811718225479126, "learning_rate": 5.874642264320185e-06, "loss": 0.0085, "step": 59170 }, { "epoch": 0.9994258114635053, "grad_norm": 0.10699243098497391, "learning_rate": 5.8731911999836686e-06, "loss": 0.0082, "step": 59180 }, { "epoch": 0.9995946904448272, "grad_norm": 0.3112547695636749, "learning_rate": 5.871740059786442e-06, "loss": 0.0094, "step": 59190 }, { "epoch": 0.9997635694261492, "grad_norm": 0.2958757281303406, "learning_rate": 5.870288843854572e-06, "loss": 0.0095, "step": 59200 }, { "epoch": 0.9999324484074712, "grad_norm": 0.26761966943740845, "learning_rate": 5.868837552314143e-06, "loss": 0.0115, "step": 59210 }, { "epoch": 1.000101327388793, "grad_norm": 0.24635225534439087, "learning_rate": 5.867386185291234e-06, "loss": 0.0079, "step": 59220 }, { "epoch": 1.0002702063701152, "grad_norm": 0.26271742582321167, "learning_rate": 5.8659347429119394e-06, "loss": 0.0108, "step": 59230 }, { "epoch": 1.000439085351437, "grad_norm": 0.12244169414043427, "learning_rate": 5.864483225302357e-06, "loss": 0.0091, "step": 59240 }, { "epoch": 1.0006079643327592, "grad_norm": 0.5900202393531799, "learning_rate": 5.86303163258859e-06, "loss": 0.0125, "step": 59250 }, { "epoch": 1.0007768433140811, "grad_norm": 0.19378279149532318, "learning_rate": 5.861579964896748e-06, "loss": 0.0118, "step": 59260 }, { "epoch": 1.000945722295403, "grad_norm": 0.32514339685440063, "learning_rate": 5.860128222352952e-06, "loss": 0.0062, "step": 59270 }, { "epoch": 1.0011146012767251, "grad_norm": 0.34323009848594666, "learning_rate": 5.858676405083322e-06, "loss": 0.0096, "step": 59280 }, { "epoch": 1.001283480258047, "grad_norm": 0.27387315034866333, "learning_rate": 5.85722451321399e-06, "loss": 0.011, "step": 59290 }, { "epoch": 1.0014523592393691, "grad_norm": 0.4616957902908325, "learning_rate": 5.855772546871094e-06, "loss": 0.0071, "step": 59300 }, { "epoch": 1.001621238220691, "grad_norm": 0.18519508838653564, "learning_rate": 5.854320506180776e-06, "loss": 0.0117, "step": 59310 }, { "epoch": 1.001790117202013, "grad_norm": 0.1683448702096939, "learning_rate": 5.852868391269185e-06, "loss": 0.0068, "step": 59320 }, { "epoch": 1.001958996183335, "grad_norm": 0.23771503567695618, "learning_rate": 5.851416202262479e-06, "loss": 0.0088, "step": 59330 }, { "epoch": 1.002127875164657, "grad_norm": 0.16073109209537506, "learning_rate": 5.849963939286819e-06, "loss": 0.0054, "step": 59340 }, { "epoch": 1.002296754145979, "grad_norm": 0.21968217194080353, "learning_rate": 5.848511602468374e-06, "loss": 0.0063, "step": 59350 }, { "epoch": 1.002465633127301, "grad_norm": 0.1516571044921875, "learning_rate": 5.847059191933321e-06, "loss": 0.0093, "step": 59360 }, { "epoch": 1.0026345121086229, "grad_norm": 0.7206382155418396, "learning_rate": 5.845606707807842e-06, "loss": 0.0129, "step": 59370 }, { "epoch": 1.002803391089945, "grad_norm": 0.24769705533981323, "learning_rate": 5.844154150218122e-06, "loss": 0.0118, "step": 59380 }, { "epoch": 1.0029722700712669, "grad_norm": 0.46832749247550964, "learning_rate": 5.8427015192903604e-06, "loss": 0.0078, "step": 59390 }, { "epoch": 1.003141149052589, "grad_norm": 0.13215257227420807, "learning_rate": 5.841248815150754e-06, "loss": 0.0064, "step": 59400 }, { "epoch": 1.003310028033911, "grad_norm": 0.2196473926305771, "learning_rate": 5.839796037925512e-06, "loss": 0.0081, "step": 59410 }, { "epoch": 1.0034789070152328, "grad_norm": 0.45337051153182983, "learning_rate": 5.838343187740848e-06, "loss": 0.0098, "step": 59420 }, { "epoch": 1.003647785996555, "grad_norm": 0.1274431198835373, "learning_rate": 5.836890264722983e-06, "loss": 0.0105, "step": 59430 }, { "epoch": 1.0038166649778768, "grad_norm": 0.5106652975082397, "learning_rate": 5.835437268998142e-06, "loss": 0.0084, "step": 59440 }, { "epoch": 1.003985543959199, "grad_norm": 0.27914416790008545, "learning_rate": 5.8339842006925585e-06, "loss": 0.0087, "step": 59450 }, { "epoch": 1.0041544229405208, "grad_norm": 0.3063511550426483, "learning_rate": 5.832531059932471e-06, "loss": 0.0059, "step": 59460 }, { "epoch": 1.0043233019218427, "grad_norm": 0.149186372756958, "learning_rate": 5.8310778468441245e-06, "loss": 0.0072, "step": 59470 }, { "epoch": 1.0044921809031648, "grad_norm": 0.3683876395225525, "learning_rate": 5.829624561553772e-06, "loss": 0.0103, "step": 59480 }, { "epoch": 1.0046610598844867, "grad_norm": 0.29289308190345764, "learning_rate": 5.828171204187669e-06, "loss": 0.0086, "step": 59490 }, { "epoch": 1.0048299388658088, "grad_norm": 0.2158462405204773, "learning_rate": 5.826717774872083e-06, "loss": 0.0096, "step": 59500 }, { "epoch": 1.0049988178471307, "grad_norm": 0.17181408405303955, "learning_rate": 5.825264273733281e-06, "loss": 0.0073, "step": 59510 }, { "epoch": 1.0051676968284526, "grad_norm": 0.2769370675086975, "learning_rate": 5.823810700897543e-06, "loss": 0.0072, "step": 59520 }, { "epoch": 1.0053365758097748, "grad_norm": 0.3647231459617615, "learning_rate": 5.8223570564911466e-06, "loss": 0.0085, "step": 59530 }, { "epoch": 1.0055054547910967, "grad_norm": 0.19216719269752502, "learning_rate": 5.820903340640387e-06, "loss": 0.0063, "step": 59540 }, { "epoch": 1.0056743337724188, "grad_norm": 0.41295886039733887, "learning_rate": 5.819449553471555e-06, "loss": 0.009, "step": 59550 }, { "epoch": 1.0058432127537407, "grad_norm": 0.18476633727550507, "learning_rate": 5.8179956951109564e-06, "loss": 0.0077, "step": 59560 }, { "epoch": 1.0060120917350626, "grad_norm": 0.362932413816452, "learning_rate": 5.816541765684895e-06, "loss": 0.012, "step": 59570 }, { "epoch": 1.0061809707163847, "grad_norm": 0.35770854353904724, "learning_rate": 5.8150877653196855e-06, "loss": 0.0073, "step": 59580 }, { "epoch": 1.0063498496977066, "grad_norm": 0.14242222905158997, "learning_rate": 5.813633694141647e-06, "loss": 0.0052, "step": 59590 }, { "epoch": 1.0065187286790287, "grad_norm": 0.29809385538101196, "learning_rate": 5.812179552277109e-06, "loss": 0.0072, "step": 59600 }, { "epoch": 1.0066876076603506, "grad_norm": 0.35157108306884766, "learning_rate": 5.8107253398523996e-06, "loss": 0.0109, "step": 59610 }, { "epoch": 1.0068564866416725, "grad_norm": 0.3407653868198395, "learning_rate": 5.8092710569938615e-06, "loss": 0.0105, "step": 59620 }, { "epoch": 1.0070253656229946, "grad_norm": 0.2409847229719162, "learning_rate": 5.807816703827836e-06, "loss": 0.0071, "step": 59630 }, { "epoch": 1.0071942446043165, "grad_norm": 0.26714006066322327, "learning_rate": 5.806362280480674e-06, "loss": 0.0089, "step": 59640 }, { "epoch": 1.0073631235856386, "grad_norm": 0.24199412763118744, "learning_rate": 5.804907787078732e-06, "loss": 0.0066, "step": 59650 }, { "epoch": 1.0075320025669605, "grad_norm": 0.28091832995414734, "learning_rate": 5.803453223748376e-06, "loss": 0.0075, "step": 59660 }, { "epoch": 1.0077008815482824, "grad_norm": 0.3159427046775818, "learning_rate": 5.8019985906159715e-06, "loss": 0.0076, "step": 59670 }, { "epoch": 1.0078697605296045, "grad_norm": 0.33679336309432983, "learning_rate": 5.800543887807896e-06, "loss": 0.0106, "step": 59680 }, { "epoch": 1.0080386395109264, "grad_norm": 0.14134962856769562, "learning_rate": 5.799089115450528e-06, "loss": 0.0075, "step": 59690 }, { "epoch": 1.0082075184922485, "grad_norm": 0.6987384557723999, "learning_rate": 5.797634273670256e-06, "loss": 0.0101, "step": 59700 }, { "epoch": 1.0083763974735704, "grad_norm": 0.3218567669391632, "learning_rate": 5.796179362593471e-06, "loss": 0.0083, "step": 59710 }, { "epoch": 1.0085452764548923, "grad_norm": 0.2698197662830353, "learning_rate": 5.794724382346577e-06, "loss": 0.0083, "step": 59720 }, { "epoch": 1.0087141554362145, "grad_norm": 0.33588096499443054, "learning_rate": 5.793269333055974e-06, "loss": 0.0131, "step": 59730 }, { "epoch": 1.0088830344175364, "grad_norm": 0.1707211136817932, "learning_rate": 5.791814214848077e-06, "loss": 0.0098, "step": 59740 }, { "epoch": 1.0090519133988585, "grad_norm": 0.25985389947891235, "learning_rate": 5.790359027849299e-06, "loss": 0.007, "step": 59750 }, { "epoch": 1.0092207923801804, "grad_norm": 0.24158476293087006, "learning_rate": 5.788903772186067e-06, "loss": 0.0086, "step": 59760 }, { "epoch": 1.0093896713615023, "grad_norm": 0.2202313095331192, "learning_rate": 5.787448447984807e-06, "loss": 0.0081, "step": 59770 }, { "epoch": 1.0095585503428244, "grad_norm": 0.186204195022583, "learning_rate": 5.785993055371957e-06, "loss": 0.0108, "step": 59780 }, { "epoch": 1.0097274293241463, "grad_norm": 0.22703902423381805, "learning_rate": 5.784537594473956e-06, "loss": 0.0059, "step": 59790 }, { "epoch": 1.0098963083054684, "grad_norm": 0.16862712800502777, "learning_rate": 5.783082065417251e-06, "loss": 0.0106, "step": 59800 }, { "epoch": 1.0100651872867903, "grad_norm": 0.15731225907802582, "learning_rate": 5.7816264683282944e-06, "loss": 0.0144, "step": 59810 }, { "epoch": 1.0102340662681122, "grad_norm": 0.08799710869789124, "learning_rate": 5.780170803333546e-06, "loss": 0.0065, "step": 59820 }, { "epoch": 1.0104029452494343, "grad_norm": 0.13805970549583435, "learning_rate": 5.778715070559469e-06, "loss": 0.0102, "step": 59830 }, { "epoch": 1.0105718242307562, "grad_norm": 0.2852746844291687, "learning_rate": 5.777259270132536e-06, "loss": 0.0081, "step": 59840 }, { "epoch": 1.0107407032120783, "grad_norm": 0.4166901409626007, "learning_rate": 5.775803402179221e-06, "loss": 0.0092, "step": 59850 }, { "epoch": 1.0109095821934002, "grad_norm": 0.18567916750907898, "learning_rate": 5.774347466826009e-06, "loss": 0.0091, "step": 59860 }, { "epoch": 1.011078461174722, "grad_norm": 0.3375563621520996, "learning_rate": 5.772891464199386e-06, "loss": 0.0086, "step": 59870 }, { "epoch": 1.0112473401560442, "grad_norm": 0.3969753086566925, "learning_rate": 5.7714353944258464e-06, "loss": 0.0078, "step": 59880 }, { "epoch": 1.0114162191373661, "grad_norm": 0.2634918689727783, "learning_rate": 5.769979257631888e-06, "loss": 0.0075, "step": 59890 }, { "epoch": 1.0115850981186882, "grad_norm": 0.09884114563465118, "learning_rate": 5.768523053944022e-06, "loss": 0.0101, "step": 59900 }, { "epoch": 1.0117539771000101, "grad_norm": 0.27478837966918945, "learning_rate": 5.767066783488754e-06, "loss": 0.0085, "step": 59910 }, { "epoch": 1.011922856081332, "grad_norm": 0.7806825041770935, "learning_rate": 5.765610446392604e-06, "loss": 0.0081, "step": 59920 }, { "epoch": 1.0120917350626542, "grad_norm": 0.23950494825839996, "learning_rate": 5.764154042782094e-06, "loss": 0.0069, "step": 59930 }, { "epoch": 1.012260614043976, "grad_norm": 0.32733988761901855, "learning_rate": 5.762697572783753e-06, "loss": 0.0101, "step": 59940 }, { "epoch": 1.0124294930252982, "grad_norm": 0.2991465926170349, "learning_rate": 5.761241036524117e-06, "loss": 0.0121, "step": 59950 }, { "epoch": 1.01259837200662, "grad_norm": 0.420404314994812, "learning_rate": 5.7597844341297235e-06, "loss": 0.0091, "step": 59960 }, { "epoch": 1.012767250987942, "grad_norm": 0.17017912864685059, "learning_rate": 5.758327765727122e-06, "loss": 0.0095, "step": 59970 }, { "epoch": 1.012936129969264, "grad_norm": 0.1369200348854065, "learning_rate": 5.756871031442861e-06, "loss": 0.0085, "step": 59980 }, { "epoch": 1.013105008950586, "grad_norm": 0.3845866620540619, "learning_rate": 5.755414231403501e-06, "loss": 0.0101, "step": 59990 }, { "epoch": 1.013273887931908, "grad_norm": 0.3509371876716614, "learning_rate": 5.753957365735602e-06, "loss": 0.0088, "step": 60000 }, { "epoch": 1.01344276691323, "grad_norm": 0.22166387736797333, "learning_rate": 5.752500434565734e-06, "loss": 0.0033, "step": 60010 }, { "epoch": 1.0136116458945519, "grad_norm": 0.32785332202911377, "learning_rate": 5.7510434380204735e-06, "loss": 0.0083, "step": 60020 }, { "epoch": 1.013780524875874, "grad_norm": 0.49741077423095703, "learning_rate": 5.749586376226401e-06, "loss": 0.0109, "step": 60030 }, { "epoch": 1.013949403857196, "grad_norm": 0.17176930606365204, "learning_rate": 5.748129249310097e-06, "loss": 0.006, "step": 60040 }, { "epoch": 1.014118282838518, "grad_norm": 0.19714146852493286, "learning_rate": 5.746672057398161e-06, "loss": 0.0088, "step": 60050 }, { "epoch": 1.01428716181984, "grad_norm": 0.20499439537525177, "learning_rate": 5.745214800617184e-06, "loss": 0.0097, "step": 60060 }, { "epoch": 1.0144560408011618, "grad_norm": 0.3188621699810028, "learning_rate": 5.74375747909377e-06, "loss": 0.0096, "step": 60070 }, { "epoch": 1.014624919782484, "grad_norm": 0.12521196901798248, "learning_rate": 5.74230009295453e-06, "loss": 0.0061, "step": 60080 }, { "epoch": 1.0147937987638058, "grad_norm": 0.22162717580795288, "learning_rate": 5.7408426423260785e-06, "loss": 0.0076, "step": 60090 }, { "epoch": 1.014962677745128, "grad_norm": 0.09444832056760788, "learning_rate": 5.739385127335031e-06, "loss": 0.0096, "step": 60100 }, { "epoch": 1.0151315567264498, "grad_norm": 0.2524941861629486, "learning_rate": 5.737927548108018e-06, "loss": 0.0108, "step": 60110 }, { "epoch": 1.0153004357077717, "grad_norm": 0.11665771901607513, "learning_rate": 5.736469904771666e-06, "loss": 0.0124, "step": 60120 }, { "epoch": 1.0154693146890938, "grad_norm": 0.30849307775497437, "learning_rate": 5.735012197452613e-06, "loss": 0.0094, "step": 60130 }, { "epoch": 1.0156381936704157, "grad_norm": 0.27745965123176575, "learning_rate": 5.7335544262775025e-06, "loss": 0.0092, "step": 60140 }, { "epoch": 1.0158070726517379, "grad_norm": 0.3035597801208496, "learning_rate": 5.7320965913729805e-06, "loss": 0.008, "step": 60150 }, { "epoch": 1.0159759516330598, "grad_norm": 0.24121825397014618, "learning_rate": 5.7306386928657e-06, "loss": 0.01, "step": 60160 }, { "epoch": 1.0161448306143817, "grad_norm": 0.17108112573623657, "learning_rate": 5.729180730882322e-06, "loss": 0.0099, "step": 60170 }, { "epoch": 1.0163137095957038, "grad_norm": 0.3886386752128601, "learning_rate": 5.727722705549507e-06, "loss": 0.0096, "step": 60180 }, { "epoch": 1.0164825885770257, "grad_norm": 0.25637656450271606, "learning_rate": 5.726264616993926e-06, "loss": 0.0071, "step": 60190 }, { "epoch": 1.0166514675583478, "grad_norm": 0.19218358397483826, "learning_rate": 5.724806465342256e-06, "loss": 0.0093, "step": 60200 }, { "epoch": 1.0168203465396697, "grad_norm": 0.2395869493484497, "learning_rate": 5.723348250721177e-06, "loss": 0.0112, "step": 60210 }, { "epoch": 1.0169892255209916, "grad_norm": 0.2767927646636963, "learning_rate": 5.721889973257373e-06, "loss": 0.0103, "step": 60220 }, { "epoch": 1.0171581045023137, "grad_norm": 0.25182315707206726, "learning_rate": 5.720431633077539e-06, "loss": 0.0077, "step": 60230 }, { "epoch": 1.0173269834836356, "grad_norm": 0.6399406790733337, "learning_rate": 5.718973230308368e-06, "loss": 0.0115, "step": 60240 }, { "epoch": 1.0174958624649577, "grad_norm": 0.09607614576816559, "learning_rate": 5.717514765076564e-06, "loss": 0.0097, "step": 60250 }, { "epoch": 1.0176647414462796, "grad_norm": 0.14758047461509705, "learning_rate": 5.716056237508837e-06, "loss": 0.007, "step": 60260 }, { "epoch": 1.0178336204276015, "grad_norm": 0.27342548966407776, "learning_rate": 5.714597647731899e-06, "loss": 0.0088, "step": 60270 }, { "epoch": 1.0180024994089236, "grad_norm": 0.17415407299995422, "learning_rate": 5.713138995872467e-06, "loss": 0.0091, "step": 60280 }, { "epoch": 1.0181713783902455, "grad_norm": 0.36816832423210144, "learning_rate": 5.711680282057268e-06, "loss": 0.0077, "step": 60290 }, { "epoch": 1.0183402573715676, "grad_norm": 0.44115546345710754, "learning_rate": 5.710221506413028e-06, "loss": 0.0087, "step": 60300 }, { "epoch": 1.0185091363528895, "grad_norm": 0.15158572793006897, "learning_rate": 5.708762669066486e-06, "loss": 0.0088, "step": 60310 }, { "epoch": 1.0186780153342114, "grad_norm": 0.15629784762859344, "learning_rate": 5.707303770144378e-06, "loss": 0.0064, "step": 60320 }, { "epoch": 1.0188468943155335, "grad_norm": 0.3612844944000244, "learning_rate": 5.705844809773453e-06, "loss": 0.0074, "step": 60330 }, { "epoch": 1.0190157732968554, "grad_norm": 0.24816952645778656, "learning_rate": 5.70438578808046e-06, "loss": 0.0088, "step": 60340 }, { "epoch": 1.0191846522781776, "grad_norm": 0.15036135911941528, "learning_rate": 5.702926705192157e-06, "loss": 0.0141, "step": 60350 }, { "epoch": 1.0193535312594995, "grad_norm": 0.23189052939414978, "learning_rate": 5.701467561235302e-06, "loss": 0.0086, "step": 60360 }, { "epoch": 1.0195224102408214, "grad_norm": 0.2156657576560974, "learning_rate": 5.700008356336665e-06, "loss": 0.0092, "step": 60370 }, { "epoch": 1.0196912892221435, "grad_norm": 0.36916035413742065, "learning_rate": 5.698549090623018e-06, "loss": 0.0076, "step": 60380 }, { "epoch": 1.0198601682034654, "grad_norm": 0.25956064462661743, "learning_rate": 5.697089764221136e-06, "loss": 0.0084, "step": 60390 }, { "epoch": 1.0200290471847875, "grad_norm": 0.2256007194519043, "learning_rate": 5.695630377257806e-06, "loss": 0.0094, "step": 60400 }, { "epoch": 1.0201979261661094, "grad_norm": 0.27957388758659363, "learning_rate": 5.694170929859812e-06, "loss": 0.0096, "step": 60410 }, { "epoch": 1.0203668051474313, "grad_norm": 0.27865418791770935, "learning_rate": 5.692711422153949e-06, "loss": 0.0083, "step": 60420 }, { "epoch": 1.0205356841287534, "grad_norm": 0.18120136857032776, "learning_rate": 5.691251854267014e-06, "loss": 0.0067, "step": 60430 }, { "epoch": 1.0207045631100753, "grad_norm": 0.1737716943025589, "learning_rate": 5.689792226325812e-06, "loss": 0.0074, "step": 60440 }, { "epoch": 1.0208734420913974, "grad_norm": 0.3748737871646881, "learning_rate": 5.688332538457152e-06, "loss": 0.0076, "step": 60450 }, { "epoch": 1.0210423210727193, "grad_norm": 0.342766672372818, "learning_rate": 5.6868727907878485e-06, "loss": 0.0144, "step": 60460 }, { "epoch": 1.0212112000540412, "grad_norm": 0.34538763761520386, "learning_rate": 5.685412983444719e-06, "loss": 0.0081, "step": 60470 }, { "epoch": 1.0213800790353633, "grad_norm": 0.2355673909187317, "learning_rate": 5.6839531165545905e-06, "loss": 0.01, "step": 60480 }, { "epoch": 1.0215489580166852, "grad_norm": 0.3085414171218872, "learning_rate": 5.682493190244289e-06, "loss": 0.0066, "step": 60490 }, { "epoch": 1.0217178369980073, "grad_norm": 0.7232803702354431, "learning_rate": 5.681033204640653e-06, "loss": 0.0107, "step": 60500 }, { "epoch": 1.0218867159793292, "grad_norm": 0.3325980305671692, "learning_rate": 5.67957315987052e-06, "loss": 0.0082, "step": 60510 }, { "epoch": 1.0220555949606511, "grad_norm": 0.35927286744117737, "learning_rate": 5.678113056060736e-06, "loss": 0.0096, "step": 60520 }, { "epoch": 1.0222244739419732, "grad_norm": 0.1219601109623909, "learning_rate": 5.676652893338153e-06, "loss": 0.0051, "step": 60530 }, { "epoch": 1.0223933529232951, "grad_norm": 0.3955192267894745, "learning_rate": 5.6751926718296235e-06, "loss": 0.0096, "step": 60540 }, { "epoch": 1.0225622319046173, "grad_norm": 0.6435220241546631, "learning_rate": 5.6737323916620095e-06, "loss": 0.0097, "step": 60550 }, { "epoch": 1.0227311108859392, "grad_norm": 0.3633460998535156, "learning_rate": 5.672272052962174e-06, "loss": 0.0062, "step": 60560 }, { "epoch": 1.022899989867261, "grad_norm": 0.11168825626373291, "learning_rate": 5.670811655856993e-06, "loss": 0.0075, "step": 60570 }, { "epoch": 1.0230688688485832, "grad_norm": 0.3959817588329315, "learning_rate": 5.6693512004733376e-06, "loss": 0.01, "step": 60580 }, { "epoch": 1.023237747829905, "grad_norm": 0.2425455003976822, "learning_rate": 5.66789068693809e-06, "loss": 0.0087, "step": 60590 }, { "epoch": 1.0234066268112272, "grad_norm": 0.15026643872261047, "learning_rate": 5.666430115378137e-06, "loss": 0.0071, "step": 60600 }, { "epoch": 1.023575505792549, "grad_norm": 0.2479819655418396, "learning_rate": 5.664969485920367e-06, "loss": 0.0109, "step": 60610 }, { "epoch": 1.023744384773871, "grad_norm": 0.2004278600215912, "learning_rate": 5.6635087986916795e-06, "loss": 0.0108, "step": 60620 }, { "epoch": 1.023913263755193, "grad_norm": 0.6535119414329529, "learning_rate": 5.6620480538189715e-06, "loss": 0.0087, "step": 60630 }, { "epoch": 1.024082142736515, "grad_norm": 0.2817024290561676, "learning_rate": 5.660587251429153e-06, "loss": 0.0122, "step": 60640 }, { "epoch": 1.024251021717837, "grad_norm": 0.24564304947853088, "learning_rate": 5.65912639164913e-06, "loss": 0.0095, "step": 60650 }, { "epoch": 1.024419900699159, "grad_norm": 0.24377672374248505, "learning_rate": 5.657665474605824e-06, "loss": 0.0082, "step": 60660 }, { "epoch": 1.024588779680481, "grad_norm": 0.331924706697464, "learning_rate": 5.65620450042615e-06, "loss": 0.007, "step": 60670 }, { "epoch": 1.024757658661803, "grad_norm": 0.3638572096824646, "learning_rate": 5.654743469237041e-06, "loss": 0.0112, "step": 60680 }, { "epoch": 1.024926537643125, "grad_norm": 0.28530874848365784, "learning_rate": 5.6532823811654205e-06, "loss": 0.0083, "step": 60690 }, { "epoch": 1.0250954166244468, "grad_norm": 0.5265266299247742, "learning_rate": 5.651821236338229e-06, "loss": 0.0137, "step": 60700 }, { "epoch": 1.025264295605769, "grad_norm": 0.2613804340362549, "learning_rate": 5.650360034882405e-06, "loss": 0.008, "step": 60710 }, { "epoch": 1.0254331745870908, "grad_norm": 0.28365880250930786, "learning_rate": 5.648898776924896e-06, "loss": 0.0083, "step": 60720 }, { "epoch": 1.025602053568413, "grad_norm": 0.1890982985496521, "learning_rate": 5.647437462592649e-06, "loss": 0.0117, "step": 60730 }, { "epoch": 1.0257709325497348, "grad_norm": 0.2774741053581238, "learning_rate": 5.645976092012624e-06, "loss": 0.0097, "step": 60740 }, { "epoch": 1.0259398115310567, "grad_norm": 0.23049703240394592, "learning_rate": 5.644514665311777e-06, "loss": 0.0085, "step": 60750 }, { "epoch": 1.0261086905123789, "grad_norm": 0.3587992489337921, "learning_rate": 5.643053182617078e-06, "loss": 0.0075, "step": 60760 }, { "epoch": 1.0262775694937007, "grad_norm": 0.4264008700847626, "learning_rate": 5.641591644055491e-06, "loss": 0.0085, "step": 60770 }, { "epoch": 1.0264464484750229, "grad_norm": 0.325988233089447, "learning_rate": 5.640130049753997e-06, "loss": 0.0095, "step": 60780 }, { "epoch": 1.0266153274563448, "grad_norm": 0.20590953528881073, "learning_rate": 5.63866839983957e-06, "loss": 0.0138, "step": 60790 }, { "epoch": 1.0267842064376667, "grad_norm": 0.6973934769630432, "learning_rate": 5.637206694439199e-06, "loss": 0.0094, "step": 60800 }, { "epoch": 1.0269530854189888, "grad_norm": 0.3985130488872528, "learning_rate": 5.6357449336798705e-06, "loss": 0.0078, "step": 60810 }, { "epoch": 1.0271219644003107, "grad_norm": 0.21582292020320892, "learning_rate": 5.634283117688583e-06, "loss": 0.009, "step": 60820 }, { "epoch": 1.0272908433816328, "grad_norm": 0.3414575457572937, "learning_rate": 5.632821246592329e-06, "loss": 0.0111, "step": 60830 }, { "epoch": 1.0274597223629547, "grad_norm": 0.1966564655303955, "learning_rate": 5.631359320518117e-06, "loss": 0.0074, "step": 60840 }, { "epoch": 1.0276286013442766, "grad_norm": 0.23496101796627045, "learning_rate": 5.629897339592954e-06, "loss": 0.0078, "step": 60850 }, { "epoch": 1.0277974803255987, "grad_norm": 0.4333299398422241, "learning_rate": 5.628435303943855e-06, "loss": 0.0118, "step": 60860 }, { "epoch": 1.0279663593069206, "grad_norm": 0.12809793651103973, "learning_rate": 5.626973213697836e-06, "loss": 0.0098, "step": 60870 }, { "epoch": 1.0281352382882427, "grad_norm": 0.2220718115568161, "learning_rate": 5.62551106898192e-06, "loss": 0.0074, "step": 60880 }, { "epoch": 1.0283041172695646, "grad_norm": 0.28233784437179565, "learning_rate": 5.624048869923136e-06, "loss": 0.0068, "step": 60890 }, { "epoch": 1.0284729962508865, "grad_norm": 0.5084770321846008, "learning_rate": 5.622586616648515e-06, "loss": 0.0072, "step": 60900 }, { "epoch": 1.0286418752322086, "grad_norm": 0.4239579737186432, "learning_rate": 5.621124309285093e-06, "loss": 0.0076, "step": 60910 }, { "epoch": 1.0288107542135305, "grad_norm": 0.538729727268219, "learning_rate": 5.619661947959914e-06, "loss": 0.0088, "step": 60920 }, { "epoch": 1.0289796331948526, "grad_norm": 0.42442142963409424, "learning_rate": 5.6181995328000246e-06, "loss": 0.0082, "step": 60930 }, { "epoch": 1.0291485121761745, "grad_norm": 0.20147661864757538, "learning_rate": 5.6167370639324726e-06, "loss": 0.01, "step": 60940 }, { "epoch": 1.0293173911574964, "grad_norm": 0.18001486361026764, "learning_rate": 5.615274541484318e-06, "loss": 0.012, "step": 60950 }, { "epoch": 1.0294862701388185, "grad_norm": 0.7304168343544006, "learning_rate": 5.613811965582618e-06, "loss": 0.0102, "step": 60960 }, { "epoch": 1.0296551491201404, "grad_norm": 0.5284837484359741, "learning_rate": 5.6123493363544366e-06, "loss": 0.013, "step": 60970 }, { "epoch": 1.0298240281014626, "grad_norm": 0.3363972008228302, "learning_rate": 5.610886653926846e-06, "loss": 0.0081, "step": 60980 }, { "epoch": 1.0299929070827845, "grad_norm": 0.08849474042654037, "learning_rate": 5.609423918426921e-06, "loss": 0.0058, "step": 60990 }, { "epoch": 1.0301617860641064, "grad_norm": 0.2324065864086151, "learning_rate": 5.607961129981739e-06, "loss": 0.0071, "step": 61000 }, { "epoch": 1.0301617860641064, "eval_loss": 0.00829971395432949, "eval_runtime": 3.0548, "eval_samples_per_second": 65.472, "eval_steps_per_second": 16.368, "step": 61000 }, { "epoch": 1.0303306650454285, "grad_norm": 0.21158383786678314, "learning_rate": 5.606498288718384e-06, "loss": 0.0111, "step": 61010 }, { "epoch": 1.0304995440267504, "grad_norm": 0.09877040237188339, "learning_rate": 5.605035394763943e-06, "loss": 0.01, "step": 61020 }, { "epoch": 1.0306684230080725, "grad_norm": 0.3284768760204315, "learning_rate": 5.60357244824551e-06, "loss": 0.0084, "step": 61030 }, { "epoch": 1.0308373019893944, "grad_norm": 0.4322194755077362, "learning_rate": 5.602109449290181e-06, "loss": 0.0077, "step": 61040 }, { "epoch": 1.0310061809707163, "grad_norm": 0.3363930583000183, "learning_rate": 5.6006463980250605e-06, "loss": 0.0101, "step": 61050 }, { "epoch": 1.0311750599520384, "grad_norm": 0.1504850834608078, "learning_rate": 5.599183294577251e-06, "loss": 0.0072, "step": 61060 }, { "epoch": 1.0313439389333603, "grad_norm": 0.31370505690574646, "learning_rate": 5.597720139073866e-06, "loss": 0.0124, "step": 61070 }, { "epoch": 1.0315128179146824, "grad_norm": 0.1199440285563469, "learning_rate": 5.596256931642018e-06, "loss": 0.0056, "step": 61080 }, { "epoch": 1.0316816968960043, "grad_norm": 0.10583554953336716, "learning_rate": 5.594793672408831e-06, "loss": 0.0073, "step": 61090 }, { "epoch": 1.0318505758773262, "grad_norm": 0.14929577708244324, "learning_rate": 5.593330361501425e-06, "loss": 0.0122, "step": 61100 }, { "epoch": 1.0320194548586483, "grad_norm": 0.5157337188720703, "learning_rate": 5.591866999046933e-06, "loss": 0.0081, "step": 61110 }, { "epoch": 1.0321883338399702, "grad_norm": 0.30804887413978577, "learning_rate": 5.590403585172486e-06, "loss": 0.008, "step": 61120 }, { "epoch": 1.0323572128212923, "grad_norm": 0.6725590825080872, "learning_rate": 5.588940120005222e-06, "loss": 0.0155, "step": 61130 }, { "epoch": 1.0325260918026142, "grad_norm": 0.3846328854560852, "learning_rate": 5.5874766036722835e-06, "loss": 0.0076, "step": 61140 }, { "epoch": 1.0326949707839361, "grad_norm": 0.33600863814353943, "learning_rate": 5.586013036300817e-06, "loss": 0.0075, "step": 61150 }, { "epoch": 1.0328638497652582, "grad_norm": 0.2699756920337677, "learning_rate": 5.584549418017974e-06, "loss": 0.0053, "step": 61160 }, { "epoch": 1.0330327287465801, "grad_norm": 0.23025019466876984, "learning_rate": 5.583085748950911e-06, "loss": 0.007, "step": 61170 }, { "epoch": 1.0332016077279023, "grad_norm": 0.28936704993247986, "learning_rate": 5.581622029226786e-06, "loss": 0.0091, "step": 61180 }, { "epoch": 1.0333704867092242, "grad_norm": 0.17143572866916656, "learning_rate": 5.580158258972764e-06, "loss": 0.0082, "step": 61190 }, { "epoch": 1.033539365690546, "grad_norm": 0.20250976085662842, "learning_rate": 5.578694438316014e-06, "loss": 0.0152, "step": 61200 }, { "epoch": 1.0337082446718682, "grad_norm": 0.19855548441410065, "learning_rate": 5.577230567383709e-06, "loss": 0.0072, "step": 61210 }, { "epoch": 1.03387712365319, "grad_norm": 0.20672611892223358, "learning_rate": 5.575766646303027e-06, "loss": 0.0079, "step": 61220 }, { "epoch": 1.0340460026345122, "grad_norm": 0.21454109251499176, "learning_rate": 5.574302675201151e-06, "loss": 0.006, "step": 61230 }, { "epoch": 1.034214881615834, "grad_norm": 0.23128221929073334, "learning_rate": 5.572838654205264e-06, "loss": 0.0071, "step": 61240 }, { "epoch": 1.034383760597156, "grad_norm": 0.23342351615428925, "learning_rate": 5.571374583442559e-06, "loss": 0.0057, "step": 61250 }, { "epoch": 1.034552639578478, "grad_norm": 0.29835763573646545, "learning_rate": 5.569910463040231e-06, "loss": 0.008, "step": 61260 }, { "epoch": 1.0347215185598, "grad_norm": 0.45131951570510864, "learning_rate": 5.568446293125477e-06, "loss": 0.0115, "step": 61270 }, { "epoch": 1.034890397541122, "grad_norm": 0.31979548931121826, "learning_rate": 5.566982073825503e-06, "loss": 0.0149, "step": 61280 }, { "epoch": 1.035059276522444, "grad_norm": 0.444026917219162, "learning_rate": 5.5655178052675155e-06, "loss": 0.0083, "step": 61290 }, { "epoch": 1.035228155503766, "grad_norm": 0.28769150376319885, "learning_rate": 5.564053487578727e-06, "loss": 0.0064, "step": 61300 }, { "epoch": 1.035397034485088, "grad_norm": 0.3158927857875824, "learning_rate": 5.562589120886353e-06, "loss": 0.0102, "step": 61310 }, { "epoch": 1.03556591346641, "grad_norm": 0.2648624777793884, "learning_rate": 5.561124705317616e-06, "loss": 0.0104, "step": 61320 }, { "epoch": 1.035734792447732, "grad_norm": 0.22978541254997253, "learning_rate": 5.559660240999738e-06, "loss": 0.0087, "step": 61330 }, { "epoch": 1.035903671429054, "grad_norm": 0.30922430753707886, "learning_rate": 5.558195728059951e-06, "loss": 0.0071, "step": 61340 }, { "epoch": 1.0360725504103758, "grad_norm": 0.2978096306324005, "learning_rate": 5.556731166625485e-06, "loss": 0.0124, "step": 61350 }, { "epoch": 1.036241429391698, "grad_norm": 0.630424439907074, "learning_rate": 5.555266556823582e-06, "loss": 0.007, "step": 61360 }, { "epoch": 1.0364103083730198, "grad_norm": 0.24027250707149506, "learning_rate": 5.55380189878148e-06, "loss": 0.0111, "step": 61370 }, { "epoch": 1.036579187354342, "grad_norm": 0.349234938621521, "learning_rate": 5.552337192626427e-06, "loss": 0.0076, "step": 61380 }, { "epoch": 1.0367480663356639, "grad_norm": 0.28127872943878174, "learning_rate": 5.550872438485671e-06, "loss": 0.0075, "step": 61390 }, { "epoch": 1.0369169453169857, "grad_norm": 0.26573866605758667, "learning_rate": 5.549407636486467e-06, "loss": 0.0075, "step": 61400 }, { "epoch": 1.0370858242983079, "grad_norm": 0.16748914122581482, "learning_rate": 5.547942786756073e-06, "loss": 0.01, "step": 61410 }, { "epoch": 1.0372547032796298, "grad_norm": 0.3430432975292206, "learning_rate": 5.546477889421755e-06, "loss": 0.0058, "step": 61420 }, { "epoch": 1.0374235822609519, "grad_norm": 0.04433358460664749, "learning_rate": 5.545012944610776e-06, "loss": 0.0064, "step": 61430 }, { "epoch": 1.0375924612422738, "grad_norm": 0.27564460039138794, "learning_rate": 5.543547952450408e-06, "loss": 0.0085, "step": 61440 }, { "epoch": 1.0377613402235957, "grad_norm": 0.2095029354095459, "learning_rate": 5.5420829130679244e-06, "loss": 0.0062, "step": 61450 }, { "epoch": 1.0379302192049178, "grad_norm": 0.1585063487291336, "learning_rate": 5.5406178265906064e-06, "loss": 0.0064, "step": 61460 }, { "epoch": 1.0380990981862397, "grad_norm": 0.10203471034765244, "learning_rate": 5.539152693145736e-06, "loss": 0.006, "step": 61470 }, { "epoch": 1.0382679771675618, "grad_norm": 0.2740464508533478, "learning_rate": 5.537687512860602e-06, "loss": 0.0088, "step": 61480 }, { "epoch": 1.0384368561488837, "grad_norm": 0.29872822761535645, "learning_rate": 5.536222285862494e-06, "loss": 0.0128, "step": 61490 }, { "epoch": 1.0386057351302056, "grad_norm": 0.15698154270648956, "learning_rate": 5.534757012278707e-06, "loss": 0.0084, "step": 61500 }, { "epoch": 1.0387746141115277, "grad_norm": 0.0954931452870369, "learning_rate": 5.533291692236542e-06, "loss": 0.005, "step": 61510 }, { "epoch": 1.0389434930928496, "grad_norm": 0.29276198148727417, "learning_rate": 5.531826325863301e-06, "loss": 0.0084, "step": 61520 }, { "epoch": 1.0391123720741717, "grad_norm": 0.4151570200920105, "learning_rate": 5.5303609132862925e-06, "loss": 0.0075, "step": 61530 }, { "epoch": 1.0392812510554936, "grad_norm": 0.3805483877658844, "learning_rate": 5.528895454632826e-06, "loss": 0.0086, "step": 61540 }, { "epoch": 1.0394501300368155, "grad_norm": 0.2736756205558777, "learning_rate": 5.52742995003022e-06, "loss": 0.0071, "step": 61550 }, { "epoch": 1.0396190090181376, "grad_norm": 0.2635841369628906, "learning_rate": 5.525964399605792e-06, "loss": 0.0076, "step": 61560 }, { "epoch": 1.0397878879994595, "grad_norm": 0.4242132902145386, "learning_rate": 5.524498803486866e-06, "loss": 0.0086, "step": 61570 }, { "epoch": 1.0399567669807817, "grad_norm": 0.5319623947143555, "learning_rate": 5.523033161800768e-06, "loss": 0.0064, "step": 61580 }, { "epoch": 1.0401256459621036, "grad_norm": 0.17450405657291412, "learning_rate": 5.521567474674831e-06, "loss": 0.0068, "step": 61590 }, { "epoch": 1.0402945249434254, "grad_norm": 0.21989864110946655, "learning_rate": 5.520101742236391e-06, "loss": 0.0073, "step": 61600 }, { "epoch": 1.0404634039247476, "grad_norm": 0.40538910031318665, "learning_rate": 5.5186359646127856e-06, "loss": 0.0091, "step": 61610 }, { "epoch": 1.0406322829060695, "grad_norm": 0.3918309807777405, "learning_rate": 5.517170141931359e-06, "loss": 0.0142, "step": 61620 }, { "epoch": 1.0408011618873916, "grad_norm": 0.1600053459405899, "learning_rate": 5.5157042743194555e-06, "loss": 0.0084, "step": 61630 }, { "epoch": 1.0409700408687135, "grad_norm": 0.5515720844268799, "learning_rate": 5.514238361904429e-06, "loss": 0.0067, "step": 61640 }, { "epoch": 1.0411389198500354, "grad_norm": 0.2836809456348419, "learning_rate": 5.512772404813635e-06, "loss": 0.0093, "step": 61650 }, { "epoch": 1.0413077988313575, "grad_norm": 0.40730714797973633, "learning_rate": 5.51130640317443e-06, "loss": 0.0099, "step": 61660 }, { "epoch": 1.0414766778126794, "grad_norm": 0.4457308053970337, "learning_rate": 5.509840357114178e-06, "loss": 0.0109, "step": 61670 }, { "epoch": 1.0416455567940015, "grad_norm": 0.2244679033756256, "learning_rate": 5.508374266760244e-06, "loss": 0.0085, "step": 61680 }, { "epoch": 1.0418144357753234, "grad_norm": 0.6315599679946899, "learning_rate": 5.50690813224e-06, "loss": 0.0112, "step": 61690 }, { "epoch": 1.0419833147566453, "grad_norm": 0.19115744531154633, "learning_rate": 5.50544195368082e-06, "loss": 0.0068, "step": 61700 }, { "epoch": 1.0421521937379674, "grad_norm": 0.3768104016780853, "learning_rate": 5.5039757312100814e-06, "loss": 0.0099, "step": 61710 }, { "epoch": 1.0423210727192893, "grad_norm": 0.20785459876060486, "learning_rate": 5.502509464955167e-06, "loss": 0.0073, "step": 61720 }, { "epoch": 1.0424899517006114, "grad_norm": 0.4341406524181366, "learning_rate": 5.501043155043461e-06, "loss": 0.0086, "step": 61730 }, { "epoch": 1.0426588306819333, "grad_norm": 0.16726461052894592, "learning_rate": 5.499576801602352e-06, "loss": 0.0042, "step": 61740 }, { "epoch": 1.0428277096632552, "grad_norm": 0.2617450952529907, "learning_rate": 5.498110404759236e-06, "loss": 0.0075, "step": 61750 }, { "epoch": 1.0429965886445773, "grad_norm": 0.2458205670118332, "learning_rate": 5.496643964641508e-06, "loss": 0.0062, "step": 61760 }, { "epoch": 1.0431654676258992, "grad_norm": 0.3295598328113556, "learning_rate": 5.495177481376572e-06, "loss": 0.0073, "step": 61770 }, { "epoch": 1.0433343466072214, "grad_norm": 0.27933797240257263, "learning_rate": 5.4937109550918265e-06, "loss": 0.0084, "step": 61780 }, { "epoch": 1.0435032255885432, "grad_norm": 0.3013060986995697, "learning_rate": 5.492244385914685e-06, "loss": 0.0069, "step": 61790 }, { "epoch": 1.0436721045698651, "grad_norm": 0.2994311451911926, "learning_rate": 5.490777773972555e-06, "loss": 0.0105, "step": 61800 }, { "epoch": 1.0438409835511873, "grad_norm": 0.3785754442214966, "learning_rate": 5.489311119392856e-06, "loss": 0.0082, "step": 61810 }, { "epoch": 1.0440098625325092, "grad_norm": 0.1617637723684311, "learning_rate": 5.4878444223030055e-06, "loss": 0.0064, "step": 61820 }, { "epoch": 1.0441787415138313, "grad_norm": 0.22464147210121155, "learning_rate": 5.486377682830429e-06, "loss": 0.0057, "step": 61830 }, { "epoch": 1.0443476204951532, "grad_norm": 0.27437102794647217, "learning_rate": 5.484910901102549e-06, "loss": 0.0094, "step": 61840 }, { "epoch": 1.044516499476475, "grad_norm": 0.3238867521286011, "learning_rate": 5.4834440772468e-06, "loss": 0.0065, "step": 61850 }, { "epoch": 1.0446853784577972, "grad_norm": 0.12159249931573868, "learning_rate": 5.481977211390613e-06, "loss": 0.0099, "step": 61860 }, { "epoch": 1.044854257439119, "grad_norm": 0.41058555245399475, "learning_rate": 5.480510303661427e-06, "loss": 0.0136, "step": 61870 }, { "epoch": 1.0450231364204412, "grad_norm": 0.27513858675956726, "learning_rate": 5.479043354186683e-06, "loss": 0.0063, "step": 61880 }, { "epoch": 1.045192015401763, "grad_norm": 0.2634575068950653, "learning_rate": 5.477576363093827e-06, "loss": 0.0083, "step": 61890 }, { "epoch": 1.045360894383085, "grad_norm": 0.3082202672958374, "learning_rate": 5.4761093305103066e-06, "loss": 0.0082, "step": 61900 }, { "epoch": 1.0455297733644071, "grad_norm": 0.14399370551109314, "learning_rate": 5.474642256563573e-06, "loss": 0.0074, "step": 61910 }, { "epoch": 1.045698652345729, "grad_norm": 0.4515916705131531, "learning_rate": 5.473175141381085e-06, "loss": 0.0044, "step": 61920 }, { "epoch": 1.0458675313270511, "grad_norm": 0.19214630126953125, "learning_rate": 5.471707985090299e-06, "loss": 0.008, "step": 61930 }, { "epoch": 1.046036410308373, "grad_norm": 0.3090715706348419, "learning_rate": 5.470240787818679e-06, "loss": 0.0124, "step": 61940 }, { "epoch": 1.046205289289695, "grad_norm": 0.5214405059814453, "learning_rate": 5.468773549693693e-06, "loss": 0.0095, "step": 61950 }, { "epoch": 1.046374168271017, "grad_norm": 0.3717401325702667, "learning_rate": 5.467306270842808e-06, "loss": 0.011, "step": 61960 }, { "epoch": 1.046543047252339, "grad_norm": 0.23584645986557007, "learning_rate": 5.465838951393501e-06, "loss": 0.0117, "step": 61970 }, { "epoch": 1.046711926233661, "grad_norm": 0.4534551501274109, "learning_rate": 5.4643715914732455e-06, "loss": 0.0062, "step": 61980 }, { "epoch": 1.046880805214983, "grad_norm": 0.45850205421447754, "learning_rate": 5.462904191209525e-06, "loss": 0.0129, "step": 61990 }, { "epoch": 1.0470496841963048, "grad_norm": 0.12870445847511292, "learning_rate": 5.461436750729822e-06, "loss": 0.0061, "step": 62000 }, { "epoch": 1.047218563177627, "grad_norm": 0.24192732572555542, "learning_rate": 5.459969270161626e-06, "loss": 0.0111, "step": 62010 }, { "epoch": 1.0473874421589489, "grad_norm": 0.7138918042182922, "learning_rate": 5.458501749632426e-06, "loss": 0.0104, "step": 62020 }, { "epoch": 1.047556321140271, "grad_norm": 0.23887550830841064, "learning_rate": 5.457034189269719e-06, "loss": 0.0095, "step": 62030 }, { "epoch": 1.0477252001215929, "grad_norm": 0.485883891582489, "learning_rate": 5.455566589200999e-06, "loss": 0.0087, "step": 62040 }, { "epoch": 1.0478940791029148, "grad_norm": 0.4967198967933655, "learning_rate": 5.454098949553771e-06, "loss": 0.0074, "step": 62050 }, { "epoch": 1.0480629580842369, "grad_norm": 0.35883721709251404, "learning_rate": 5.45263127045554e-06, "loss": 0.006, "step": 62060 }, { "epoch": 1.0482318370655588, "grad_norm": 0.22167669236660004, "learning_rate": 5.451163552033812e-06, "loss": 0.0078, "step": 62070 }, { "epoch": 1.048400716046881, "grad_norm": 0.2856452167034149, "learning_rate": 5.449695794416101e-06, "loss": 0.0067, "step": 62080 }, { "epoch": 1.0485695950282028, "grad_norm": 0.359365850687027, "learning_rate": 5.448227997729921e-06, "loss": 0.0097, "step": 62090 }, { "epoch": 1.0487384740095247, "grad_norm": 0.27964672446250916, "learning_rate": 5.446760162102791e-06, "loss": 0.0069, "step": 62100 }, { "epoch": 1.0489073529908468, "grad_norm": 0.5815972089767456, "learning_rate": 5.445292287662233e-06, "loss": 0.0088, "step": 62110 }, { "epoch": 1.0490762319721687, "grad_norm": 0.3415871560573578, "learning_rate": 5.443824374535771e-06, "loss": 0.0142, "step": 62120 }, { "epoch": 1.0492451109534908, "grad_norm": 0.21124707162380219, "learning_rate": 5.442356422850936e-06, "loss": 0.0084, "step": 62130 }, { "epoch": 1.0494139899348127, "grad_norm": 0.17084188759326935, "learning_rate": 5.440888432735259e-06, "loss": 0.0081, "step": 62140 }, { "epoch": 1.0495828689161346, "grad_norm": 0.28712165355682373, "learning_rate": 5.439420404316276e-06, "loss": 0.0085, "step": 62150 }, { "epoch": 1.0497517478974567, "grad_norm": 0.13315539062023163, "learning_rate": 5.4379523377215235e-06, "loss": 0.0083, "step": 62160 }, { "epoch": 1.0499206268787786, "grad_norm": 0.4378661811351776, "learning_rate": 5.436484233078545e-06, "loss": 0.0114, "step": 62170 }, { "epoch": 1.0500895058601007, "grad_norm": 0.21690118312835693, "learning_rate": 5.435016090514886e-06, "loss": 0.0086, "step": 62180 }, { "epoch": 1.0502583848414226, "grad_norm": 0.16223280131816864, "learning_rate": 5.433547910158095e-06, "loss": 0.0112, "step": 62190 }, { "epoch": 1.0504272638227445, "grad_norm": 0.08130055665969849, "learning_rate": 5.432079692135726e-06, "loss": 0.0072, "step": 62200 }, { "epoch": 1.0505961428040667, "grad_norm": 0.26456549763679504, "learning_rate": 5.430611436575331e-06, "loss": 0.0074, "step": 62210 }, { "epoch": 1.0507650217853886, "grad_norm": 0.10464117676019669, "learning_rate": 5.429143143604469e-06, "loss": 0.0084, "step": 62220 }, { "epoch": 1.0509339007667107, "grad_norm": 0.49293744564056396, "learning_rate": 5.427674813350702e-06, "loss": 0.0051, "step": 62230 }, { "epoch": 1.0511027797480326, "grad_norm": 0.1488034874200821, "learning_rate": 5.426206445941595e-06, "loss": 0.009, "step": 62240 }, { "epoch": 1.0512716587293545, "grad_norm": 0.22355641424655914, "learning_rate": 5.424738041504717e-06, "loss": 0.0076, "step": 62250 }, { "epoch": 1.0514405377106766, "grad_norm": 0.19010910391807556, "learning_rate": 5.423269600167639e-06, "loss": 0.0088, "step": 62260 }, { "epoch": 1.0516094166919985, "grad_norm": 0.4431350529193878, "learning_rate": 5.4218011220579345e-06, "loss": 0.0079, "step": 62270 }, { "epoch": 1.0517782956733206, "grad_norm": 0.514253556728363, "learning_rate": 5.4203326073031826e-06, "loss": 0.011, "step": 62280 }, { "epoch": 1.0519471746546425, "grad_norm": 0.34184393286705017, "learning_rate": 5.418864056030963e-06, "loss": 0.0144, "step": 62290 }, { "epoch": 1.0521160536359644, "grad_norm": 0.20166154205799103, "learning_rate": 5.417395468368861e-06, "loss": 0.011, "step": 62300 }, { "epoch": 1.0522849326172865, "grad_norm": 0.17087972164154053, "learning_rate": 5.415926844444463e-06, "loss": 0.0087, "step": 62310 }, { "epoch": 1.0524538115986084, "grad_norm": 0.13085901737213135, "learning_rate": 5.414458184385361e-06, "loss": 0.0075, "step": 62320 }, { "epoch": 1.0526226905799305, "grad_norm": 0.279938280582428, "learning_rate": 5.412989488319148e-06, "loss": 0.0087, "step": 62330 }, { "epoch": 1.0527915695612524, "grad_norm": 0.28460171818733215, "learning_rate": 5.41152075637342e-06, "loss": 0.0103, "step": 62340 }, { "epoch": 1.0529604485425743, "grad_norm": 0.2445896714925766, "learning_rate": 5.4100519886757764e-06, "loss": 0.0112, "step": 62350 }, { "epoch": 1.0531293275238964, "grad_norm": 0.25646838545799255, "learning_rate": 5.40858318535382e-06, "loss": 0.0132, "step": 62360 }, { "epoch": 1.0532982065052183, "grad_norm": 0.3564800024032593, "learning_rate": 5.407114346535158e-06, "loss": 0.0081, "step": 62370 }, { "epoch": 1.0534670854865404, "grad_norm": 0.2813034951686859, "learning_rate": 5.4056454723474006e-06, "loss": 0.0093, "step": 62380 }, { "epoch": 1.0536359644678623, "grad_norm": 0.47579121589660645, "learning_rate": 5.404176562918158e-06, "loss": 0.0067, "step": 62390 }, { "epoch": 1.0538048434491842, "grad_norm": 0.5182266235351562, "learning_rate": 5.402707618375046e-06, "loss": 0.0095, "step": 62400 }, { "epoch": 1.0539737224305064, "grad_norm": 0.5272021293640137, "learning_rate": 5.401238638845682e-06, "loss": 0.009, "step": 62410 }, { "epoch": 1.0541426014118283, "grad_norm": 0.2516407072544098, "learning_rate": 5.399769624457688e-06, "loss": 0.0075, "step": 62420 }, { "epoch": 1.0543114803931504, "grad_norm": 0.28233593702316284, "learning_rate": 5.398300575338688e-06, "loss": 0.0086, "step": 62430 }, { "epoch": 1.0544803593744723, "grad_norm": 0.4807168245315552, "learning_rate": 5.396831491616314e-06, "loss": 0.0098, "step": 62440 }, { "epoch": 1.0546492383557942, "grad_norm": 0.2799495458602905, "learning_rate": 5.3953623734181885e-06, "loss": 0.0069, "step": 62450 }, { "epoch": 1.0548181173371163, "grad_norm": 0.5080576539039612, "learning_rate": 5.393893220871951e-06, "loss": 0.0106, "step": 62460 }, { "epoch": 1.0549869963184382, "grad_norm": 0.1813606470823288, "learning_rate": 5.392424034105234e-06, "loss": 0.0073, "step": 62470 }, { "epoch": 1.0551558752997603, "grad_norm": 0.3628600835800171, "learning_rate": 5.390954813245679e-06, "loss": 0.0125, "step": 62480 }, { "epoch": 1.0553247542810822, "grad_norm": 0.26580044627189636, "learning_rate": 5.389485558420927e-06, "loss": 0.0072, "step": 62490 }, { "epoch": 1.055493633262404, "grad_norm": 0.3355139493942261, "learning_rate": 5.388016269758626e-06, "loss": 0.01, "step": 62500 }, { "epoch": 1.0556625122437262, "grad_norm": 0.24913442134857178, "learning_rate": 5.386546947386421e-06, "loss": 0.011, "step": 62510 }, { "epoch": 1.055831391225048, "grad_norm": 0.2614472508430481, "learning_rate": 5.385077591431966e-06, "loss": 0.0089, "step": 62520 }, { "epoch": 1.0560002702063702, "grad_norm": 0.27885252237319946, "learning_rate": 5.383608202022912e-06, "loss": 0.0084, "step": 62530 }, { "epoch": 1.0561691491876921, "grad_norm": 0.326092928647995, "learning_rate": 5.382138779286916e-06, "loss": 0.0082, "step": 62540 }, { "epoch": 1.056338028169014, "grad_norm": 0.21382227540016174, "learning_rate": 5.380669323351641e-06, "loss": 0.01, "step": 62550 }, { "epoch": 1.0565069071503361, "grad_norm": 0.2951546013355255, "learning_rate": 5.3791998343447485e-06, "loss": 0.0104, "step": 62560 }, { "epoch": 1.056675786131658, "grad_norm": 0.2329111248254776, "learning_rate": 5.3777303123939025e-06, "loss": 0.0072, "step": 62570 }, { "epoch": 1.0568446651129801, "grad_norm": 0.3919600546360016, "learning_rate": 5.376260757626773e-06, "loss": 0.0084, "step": 62580 }, { "epoch": 1.057013544094302, "grad_norm": 0.28184959292411804, "learning_rate": 5.374791170171031e-06, "loss": 0.009, "step": 62590 }, { "epoch": 1.057182423075624, "grad_norm": 0.032053083181381226, "learning_rate": 5.37332155015435e-06, "loss": 0.0086, "step": 62600 }, { "epoch": 1.057351302056946, "grad_norm": 0.2358408272266388, "learning_rate": 5.3718518977044075e-06, "loss": 0.0098, "step": 62610 }, { "epoch": 1.057520181038268, "grad_norm": 0.1684558093547821, "learning_rate": 5.370382212948883e-06, "loss": 0.0098, "step": 62620 }, { "epoch": 1.05768906001959, "grad_norm": 0.36431020498275757, "learning_rate": 5.36891249601546e-06, "loss": 0.0076, "step": 62630 }, { "epoch": 1.057857939000912, "grad_norm": 0.2650609612464905, "learning_rate": 5.367442747031824e-06, "loss": 0.0076, "step": 62640 }, { "epoch": 1.0580268179822339, "grad_norm": 0.40988603234291077, "learning_rate": 5.365972966125662e-06, "loss": 0.0078, "step": 62650 }, { "epoch": 1.058195696963556, "grad_norm": 0.075395368039608, "learning_rate": 5.364503153424664e-06, "loss": 0.008, "step": 62660 }, { "epoch": 1.0583645759448779, "grad_norm": 0.399586945772171, "learning_rate": 5.3630333090565265e-06, "loss": 0.0099, "step": 62670 }, { "epoch": 1.0585334549262, "grad_norm": 0.3209996521472931, "learning_rate": 5.361563433148945e-06, "loss": 0.0093, "step": 62680 }, { "epoch": 1.0587023339075219, "grad_norm": 0.6055876016616821, "learning_rate": 5.360093525829619e-06, "loss": 0.0077, "step": 62690 }, { "epoch": 1.0588712128888438, "grad_norm": 0.2595467269420624, "learning_rate": 5.358623587226248e-06, "loss": 0.0089, "step": 62700 }, { "epoch": 1.059040091870166, "grad_norm": 0.2555679678916931, "learning_rate": 5.357153617466541e-06, "loss": 0.0105, "step": 62710 }, { "epoch": 1.0592089708514878, "grad_norm": 0.22391964495182037, "learning_rate": 5.355683616678199e-06, "loss": 0.0065, "step": 62720 }, { "epoch": 1.05937784983281, "grad_norm": 0.1709289401769638, "learning_rate": 5.3542135849889386e-06, "loss": 0.0122, "step": 62730 }, { "epoch": 1.0595467288141318, "grad_norm": 0.09010791033506393, "learning_rate": 5.352743522526469e-06, "loss": 0.0055, "step": 62740 }, { "epoch": 1.0597156077954537, "grad_norm": 0.30163347721099854, "learning_rate": 5.351273429418509e-06, "loss": 0.01, "step": 62750 }, { "epoch": 1.0598844867767758, "grad_norm": 0.507188618183136, "learning_rate": 5.349803305792771e-06, "loss": 0.0145, "step": 62760 }, { "epoch": 1.0600533657580977, "grad_norm": 0.2458905279636383, "learning_rate": 5.34833315177698e-06, "loss": 0.0071, "step": 62770 }, { "epoch": 1.0602222447394198, "grad_norm": 0.9836257100105286, "learning_rate": 5.346862967498858e-06, "loss": 0.0071, "step": 62780 }, { "epoch": 1.0603911237207417, "grad_norm": 0.17550872266292572, "learning_rate": 5.345392753086133e-06, "loss": 0.0075, "step": 62790 }, { "epoch": 1.0605600027020636, "grad_norm": 0.15535642206668854, "learning_rate": 5.34392250866653e-06, "loss": 0.0096, "step": 62800 }, { "epoch": 1.0607288816833857, "grad_norm": 0.11539328843355179, "learning_rate": 5.342452234367783e-06, "loss": 0.0075, "step": 62810 }, { "epoch": 1.0608977606647076, "grad_norm": 0.28812599182128906, "learning_rate": 5.340981930317624e-06, "loss": 0.0079, "step": 62820 }, { "epoch": 1.0610666396460298, "grad_norm": 0.4155631363391876, "learning_rate": 5.339511596643793e-06, "loss": 0.009, "step": 62830 }, { "epoch": 1.0612355186273517, "grad_norm": 0.3745133578777313, "learning_rate": 5.338041233474024e-06, "loss": 0.0083, "step": 62840 }, { "epoch": 1.0614043976086736, "grad_norm": 0.15096555650234222, "learning_rate": 5.336570840936062e-06, "loss": 0.0073, "step": 62850 }, { "epoch": 1.0615732765899957, "grad_norm": 0.09679504483938217, "learning_rate": 5.33510041915765e-06, "loss": 0.0062, "step": 62860 }, { "epoch": 1.0617421555713176, "grad_norm": 0.3440473675727844, "learning_rate": 5.3336299682665345e-06, "loss": 0.0114, "step": 62870 }, { "epoch": 1.0619110345526397, "grad_norm": 0.20903046429157257, "learning_rate": 5.332159488390465e-06, "loss": 0.0098, "step": 62880 }, { "epoch": 1.0620799135339616, "grad_norm": 0.19963306188583374, "learning_rate": 5.330688979657193e-06, "loss": 0.0069, "step": 62890 }, { "epoch": 1.0622487925152835, "grad_norm": 0.3395654261112213, "learning_rate": 5.3292184421944715e-06, "loss": 0.0075, "step": 62900 }, { "epoch": 1.0624176714966056, "grad_norm": 0.3264376223087311, "learning_rate": 5.3277478761300605e-06, "loss": 0.0134, "step": 62910 }, { "epoch": 1.0625865504779275, "grad_norm": 0.4354063868522644, "learning_rate": 5.326277281591716e-06, "loss": 0.0104, "step": 62920 }, { "epoch": 1.0627554294592496, "grad_norm": 0.15898685157299042, "learning_rate": 5.324806658707203e-06, "loss": 0.0079, "step": 62930 }, { "epoch": 1.0629243084405715, "grad_norm": 0.35150375962257385, "learning_rate": 5.32333600760428e-06, "loss": 0.0079, "step": 62940 }, { "epoch": 1.0630931874218934, "grad_norm": 0.12798181176185608, "learning_rate": 5.321865328410718e-06, "loss": 0.0089, "step": 62950 }, { "epoch": 1.0632620664032155, "grad_norm": 0.3135482370853424, "learning_rate": 5.320394621254286e-06, "loss": 0.0099, "step": 62960 }, { "epoch": 1.0634309453845374, "grad_norm": 0.2407235950231552, "learning_rate": 5.3189238862627535e-06, "loss": 0.0094, "step": 62970 }, { "epoch": 1.0635998243658595, "grad_norm": 0.7433346509933472, "learning_rate": 5.317453123563896e-06, "loss": 0.0113, "step": 62980 }, { "epoch": 1.0637687033471814, "grad_norm": 0.327851802110672, "learning_rate": 5.315982333285489e-06, "loss": 0.0097, "step": 62990 }, { "epoch": 1.0639375823285033, "grad_norm": 0.37651023268699646, "learning_rate": 5.31451151555531e-06, "loss": 0.0087, "step": 63000 }, { "epoch": 1.0641064613098254, "grad_norm": 0.4438004791736603, "learning_rate": 5.313040670501142e-06, "loss": 0.0078, "step": 63010 }, { "epoch": 1.0642753402911473, "grad_norm": 0.7526397705078125, "learning_rate": 5.311569798250767e-06, "loss": 0.013, "step": 63020 }, { "epoch": 1.0644442192724695, "grad_norm": 0.5865210890769958, "learning_rate": 5.310098898931973e-06, "loss": 0.0089, "step": 63030 }, { "epoch": 1.0646130982537914, "grad_norm": 0.3372586667537689, "learning_rate": 5.308627972672544e-06, "loss": 0.0081, "step": 63040 }, { "epoch": 1.0647819772351133, "grad_norm": 0.44557467103004456, "learning_rate": 5.307157019600275e-06, "loss": 0.0086, "step": 63050 }, { "epoch": 1.0649508562164354, "grad_norm": 0.5404635071754456, "learning_rate": 5.305686039842957e-06, "loss": 0.0095, "step": 63060 }, { "epoch": 1.0651197351977573, "grad_norm": 0.43286076188087463, "learning_rate": 5.304215033528382e-06, "loss": 0.0112, "step": 63070 }, { "epoch": 1.0652886141790794, "grad_norm": 0.28697827458381653, "learning_rate": 5.302744000784353e-06, "loss": 0.0109, "step": 63080 }, { "epoch": 1.0654574931604013, "grad_norm": 0.21899501979351044, "learning_rate": 5.3012729417386665e-06, "loss": 0.0116, "step": 63090 }, { "epoch": 1.0656263721417232, "grad_norm": 0.09306556731462479, "learning_rate": 5.299801856519126e-06, "loss": 0.0089, "step": 63100 }, { "epoch": 1.0657952511230453, "grad_norm": 0.23870229721069336, "learning_rate": 5.298330745253533e-06, "loss": 0.0084, "step": 63110 }, { "epoch": 1.0659641301043672, "grad_norm": 0.4275359511375427, "learning_rate": 5.296859608069697e-06, "loss": 0.008, "step": 63120 }, { "epoch": 1.0661330090856893, "grad_norm": 0.3902883231639862, "learning_rate": 5.2953884450954255e-06, "loss": 0.0085, "step": 63130 }, { "epoch": 1.0663018880670112, "grad_norm": 0.335983544588089, "learning_rate": 5.29391725645853e-06, "loss": 0.0109, "step": 63140 }, { "epoch": 1.066470767048333, "grad_norm": 0.17449235916137695, "learning_rate": 5.292446042286822e-06, "loss": 0.0099, "step": 63150 }, { "epoch": 1.0666396460296552, "grad_norm": 0.34439873695373535, "learning_rate": 5.29097480270812e-06, "loss": 0.0087, "step": 63160 }, { "epoch": 1.0668085250109771, "grad_norm": 0.4183003008365631, "learning_rate": 5.289503537850239e-06, "loss": 0.0106, "step": 63170 }, { "epoch": 1.0669774039922992, "grad_norm": 0.29966244101524353, "learning_rate": 5.288032247841002e-06, "loss": 0.0069, "step": 63180 }, { "epoch": 1.0671462829736211, "grad_norm": 0.37183433771133423, "learning_rate": 5.286560932808227e-06, "loss": 0.0084, "step": 63190 }, { "epoch": 1.067315161954943, "grad_norm": 0.3066804111003876, "learning_rate": 5.285089592879741e-06, "loss": 0.006, "step": 63200 }, { "epoch": 1.0674840409362651, "grad_norm": 0.21243120729923248, "learning_rate": 5.283618228183369e-06, "loss": 0.007, "step": 63210 }, { "epoch": 1.067652919917587, "grad_norm": 0.1929604560136795, "learning_rate": 5.282146838846942e-06, "loss": 0.008, "step": 63220 }, { "epoch": 1.0678217988989092, "grad_norm": 0.17922984063625336, "learning_rate": 5.280675424998288e-06, "loss": 0.0067, "step": 63230 }, { "epoch": 1.067990677880231, "grad_norm": 0.29600435495376587, "learning_rate": 5.279203986765243e-06, "loss": 0.012, "step": 63240 }, { "epoch": 1.068159556861553, "grad_norm": 0.06641265749931335, "learning_rate": 5.2777325242756365e-06, "loss": 0.0079, "step": 63250 }, { "epoch": 1.068328435842875, "grad_norm": 0.15305453538894653, "learning_rate": 5.27626103765731e-06, "loss": 0.0068, "step": 63260 }, { "epoch": 1.068497314824197, "grad_norm": 0.20527544617652893, "learning_rate": 5.274789527038101e-06, "loss": 0.008, "step": 63270 }, { "epoch": 1.068666193805519, "grad_norm": 0.4125652611255646, "learning_rate": 5.273317992545852e-06, "loss": 0.0096, "step": 63280 }, { "epoch": 1.068835072786841, "grad_norm": 0.31472504138946533, "learning_rate": 5.271846434308405e-06, "loss": 0.0102, "step": 63290 }, { "epoch": 1.0690039517681629, "grad_norm": 0.38967272639274597, "learning_rate": 5.270374852453607e-06, "loss": 0.0125, "step": 63300 }, { "epoch": 1.069172830749485, "grad_norm": 0.10778988152742386, "learning_rate": 5.268903247109304e-06, "loss": 0.0102, "step": 63310 }, { "epoch": 1.0693417097308069, "grad_norm": 0.18508228659629822, "learning_rate": 5.267431618403345e-06, "loss": 0.0095, "step": 63320 }, { "epoch": 1.0695105887121288, "grad_norm": 0.10050608217716217, "learning_rate": 5.265959966463582e-06, "loss": 0.007, "step": 63330 }, { "epoch": 1.069679467693451, "grad_norm": 0.20146334171295166, "learning_rate": 5.264488291417871e-06, "loss": 0.0102, "step": 63340 }, { "epoch": 1.0698483466747728, "grad_norm": 0.17839694023132324, "learning_rate": 5.263016593394062e-06, "loss": 0.0059, "step": 63350 }, { "epoch": 1.070017225656095, "grad_norm": 0.215694397687912, "learning_rate": 5.2615448725200194e-06, "loss": 0.0085, "step": 63360 }, { "epoch": 1.0701861046374168, "grad_norm": 0.37112846970558167, "learning_rate": 5.260073128923598e-06, "loss": 0.0088, "step": 63370 }, { "epoch": 1.0703549836187387, "grad_norm": 0.34836432337760925, "learning_rate": 5.2586013627326585e-06, "loss": 0.0097, "step": 63380 }, { "epoch": 1.0705238626000608, "grad_norm": 0.5541031956672668, "learning_rate": 5.257129574075067e-06, "loss": 0.0106, "step": 63390 }, { "epoch": 1.0706927415813827, "grad_norm": 0.1590442657470703, "learning_rate": 5.25565776307869e-06, "loss": 0.0072, "step": 63400 }, { "epoch": 1.0708616205627048, "grad_norm": 0.45405980944633484, "learning_rate": 5.254185929871392e-06, "loss": 0.0125, "step": 63410 }, { "epoch": 1.0710304995440267, "grad_norm": 0.15605071187019348, "learning_rate": 5.252714074581043e-06, "loss": 0.0086, "step": 63420 }, { "epoch": 1.0711993785253486, "grad_norm": 0.287102073431015, "learning_rate": 5.251242197335514e-06, "loss": 0.0108, "step": 63430 }, { "epoch": 1.0713682575066708, "grad_norm": 0.24413499236106873, "learning_rate": 5.24977029826268e-06, "loss": 0.0094, "step": 63440 }, { "epoch": 1.0715371364879926, "grad_norm": 0.3389727473258972, "learning_rate": 5.248298377490413e-06, "loss": 0.0156, "step": 63450 }, { "epoch": 1.0717060154693148, "grad_norm": 0.1317201405763626, "learning_rate": 5.246826435146592e-06, "loss": 0.0098, "step": 63460 }, { "epoch": 1.0718748944506367, "grad_norm": 0.18668386340141296, "learning_rate": 5.245354471359096e-06, "loss": 0.005, "step": 63470 }, { "epoch": 1.0720437734319586, "grad_norm": 0.08170796185731888, "learning_rate": 5.243882486255803e-06, "loss": 0.0096, "step": 63480 }, { "epoch": 1.0722126524132807, "grad_norm": 0.463998019695282, "learning_rate": 5.242410479964599e-06, "loss": 0.0118, "step": 63490 }, { "epoch": 1.0723815313946026, "grad_norm": 0.30916374921798706, "learning_rate": 5.240938452613366e-06, "loss": 0.0056, "step": 63500 }, { "epoch": 1.0725504103759247, "grad_norm": 0.17352540791034698, "learning_rate": 5.239466404329989e-06, "loss": 0.0065, "step": 63510 }, { "epoch": 1.0727192893572466, "grad_norm": 0.2756289839744568, "learning_rate": 5.237994335242359e-06, "loss": 0.0055, "step": 63520 }, { "epoch": 1.0728881683385685, "grad_norm": 0.28615880012512207, "learning_rate": 5.2365222454783634e-06, "loss": 0.0076, "step": 63530 }, { "epoch": 1.0730570473198906, "grad_norm": 0.3524725139141083, "learning_rate": 5.235050135165896e-06, "loss": 0.0096, "step": 63540 }, { "epoch": 1.0732259263012125, "grad_norm": 0.22925791144371033, "learning_rate": 5.233578004432849e-06, "loss": 0.0091, "step": 63550 }, { "epoch": 1.0733948052825346, "grad_norm": 0.0641915574669838, "learning_rate": 5.232105853407115e-06, "loss": 0.0071, "step": 63560 }, { "epoch": 1.0735636842638565, "grad_norm": 0.29456451535224915, "learning_rate": 5.230633682216595e-06, "loss": 0.0072, "step": 63570 }, { "epoch": 1.0737325632451784, "grad_norm": 0.20580258965492249, "learning_rate": 5.229161490989185e-06, "loss": 0.0082, "step": 63580 }, { "epoch": 1.0739014422265005, "grad_norm": 0.16427326202392578, "learning_rate": 5.227689279852788e-06, "loss": 0.0075, "step": 63590 }, { "epoch": 1.0740703212078224, "grad_norm": 0.41593798995018005, "learning_rate": 5.226217048935302e-06, "loss": 0.0107, "step": 63600 }, { "epoch": 1.0742392001891445, "grad_norm": 0.1426260769367218, "learning_rate": 5.224744798364636e-06, "loss": 0.0055, "step": 63610 }, { "epoch": 1.0744080791704664, "grad_norm": 0.241098552942276, "learning_rate": 5.223272528268689e-06, "loss": 0.0093, "step": 63620 }, { "epoch": 1.0745769581517883, "grad_norm": 0.2107977271080017, "learning_rate": 5.221800238775374e-06, "loss": 0.0063, "step": 63630 }, { "epoch": 1.0747458371331104, "grad_norm": 0.24560540914535522, "learning_rate": 5.220327930012597e-06, "loss": 0.0081, "step": 63640 }, { "epoch": 1.0749147161144323, "grad_norm": 0.3561139702796936, "learning_rate": 5.218855602108272e-06, "loss": 0.0074, "step": 63650 }, { "epoch": 1.0750835950957545, "grad_norm": 0.16312265396118164, "learning_rate": 5.217383255190305e-06, "loss": 0.0091, "step": 63660 }, { "epoch": 1.0752524740770764, "grad_norm": 0.25117605924606323, "learning_rate": 5.215910889386615e-06, "loss": 0.0079, "step": 63670 }, { "epoch": 1.0754213530583983, "grad_norm": 0.10051873326301575, "learning_rate": 5.214438504825115e-06, "loss": 0.0151, "step": 63680 }, { "epoch": 1.0755902320397204, "grad_norm": 0.06156251206994057, "learning_rate": 5.212966101633724e-06, "loss": 0.0103, "step": 63690 }, { "epoch": 1.0757591110210423, "grad_norm": 0.3011402487754822, "learning_rate": 5.2114936799403595e-06, "loss": 0.0073, "step": 63700 }, { "epoch": 1.0759279900023644, "grad_norm": 0.06687614321708679, "learning_rate": 5.2100212398729434e-06, "loss": 0.0064, "step": 63710 }, { "epoch": 1.0760968689836863, "grad_norm": 0.39908966422080994, "learning_rate": 5.208548781559396e-06, "loss": 0.0104, "step": 63720 }, { "epoch": 1.0762657479650082, "grad_norm": 0.8522477746009827, "learning_rate": 5.2070763051276415e-06, "loss": 0.0111, "step": 63730 }, { "epoch": 1.0764346269463303, "grad_norm": 0.14477847516536713, "learning_rate": 5.205603810705604e-06, "loss": 0.0073, "step": 63740 }, { "epoch": 1.0766035059276522, "grad_norm": 0.36084580421447754, "learning_rate": 5.204131298421212e-06, "loss": 0.0087, "step": 63750 }, { "epoch": 1.0767723849089743, "grad_norm": 0.17880100011825562, "learning_rate": 5.202658768402393e-06, "loss": 0.0111, "step": 63760 }, { "epoch": 1.0769412638902962, "grad_norm": 0.25471726059913635, "learning_rate": 5.201186220777078e-06, "loss": 0.0057, "step": 63770 }, { "epoch": 1.077110142871618, "grad_norm": 0.2095588892698288, "learning_rate": 5.199713655673195e-06, "loss": 0.0134, "step": 63780 }, { "epoch": 1.0772790218529402, "grad_norm": 0.28346943855285645, "learning_rate": 5.198241073218682e-06, "loss": 0.0067, "step": 63790 }, { "epoch": 1.0774479008342621, "grad_norm": 0.07041703909635544, "learning_rate": 5.196768473541466e-06, "loss": 0.0068, "step": 63800 }, { "epoch": 1.0776167798155842, "grad_norm": 0.23199892044067383, "learning_rate": 5.19529585676949e-06, "loss": 0.0078, "step": 63810 }, { "epoch": 1.0777856587969061, "grad_norm": 0.2850671112537384, "learning_rate": 5.193823223030688e-06, "loss": 0.0086, "step": 63820 }, { "epoch": 1.077954537778228, "grad_norm": 0.36395972967147827, "learning_rate": 5.192350572453e-06, "loss": 0.0071, "step": 63830 }, { "epoch": 1.0781234167595501, "grad_norm": 0.19914880394935608, "learning_rate": 5.1908779051643646e-06, "loss": 0.0102, "step": 63840 }, { "epoch": 1.078292295740872, "grad_norm": 0.2048265039920807, "learning_rate": 5.1894052212927244e-06, "loss": 0.0072, "step": 63850 }, { "epoch": 1.0784611747221942, "grad_norm": 0.10749486833810806, "learning_rate": 5.187932520966023e-06, "loss": 0.0077, "step": 63860 }, { "epoch": 1.078630053703516, "grad_norm": 0.5604692101478577, "learning_rate": 5.186459804312205e-06, "loss": 0.0126, "step": 63870 }, { "epoch": 1.078798932684838, "grad_norm": 0.17934969067573547, "learning_rate": 5.184987071459215e-06, "loss": 0.0107, "step": 63880 }, { "epoch": 1.07896781166616, "grad_norm": 0.0887872651219368, "learning_rate": 5.1835143225350035e-06, "loss": 0.0066, "step": 63890 }, { "epoch": 1.079136690647482, "grad_norm": 0.32047149538993835, "learning_rate": 5.1820415576675166e-06, "loss": 0.0102, "step": 63900 }, { "epoch": 1.079305569628804, "grad_norm": 0.4188673496246338, "learning_rate": 5.180568776984705e-06, "loss": 0.0065, "step": 63910 }, { "epoch": 1.079474448610126, "grad_norm": 0.291268914937973, "learning_rate": 5.179095980614519e-06, "loss": 0.0079, "step": 63920 }, { "epoch": 1.0796433275914479, "grad_norm": 0.25514328479766846, "learning_rate": 5.1776231686849155e-06, "loss": 0.0069, "step": 63930 }, { "epoch": 1.07981220657277, "grad_norm": 0.22070454061031342, "learning_rate": 5.176150341323846e-06, "loss": 0.0073, "step": 63940 }, { "epoch": 1.079981085554092, "grad_norm": 0.22978757321834564, "learning_rate": 5.174677498659265e-06, "loss": 0.0071, "step": 63950 }, { "epoch": 1.080149964535414, "grad_norm": 0.12140045315027237, "learning_rate": 5.1732046408191314e-06, "loss": 0.0069, "step": 63960 }, { "epoch": 1.080318843516736, "grad_norm": 0.09881734102964401, "learning_rate": 5.171731767931404e-06, "loss": 0.0084, "step": 63970 }, { "epoch": 1.0804877224980578, "grad_norm": 0.32982897758483887, "learning_rate": 5.1702588801240406e-06, "loss": 0.0099, "step": 63980 }, { "epoch": 1.08065660147938, "grad_norm": 0.15388202667236328, "learning_rate": 5.168785977525003e-06, "loss": 0.0103, "step": 63990 }, { "epoch": 1.0808254804607018, "grad_norm": 0.21004046499729156, "learning_rate": 5.167313060262253e-06, "loss": 0.0078, "step": 64000 }, { "epoch": 1.080994359442024, "grad_norm": 0.12763558328151703, "learning_rate": 5.165840128463755e-06, "loss": 0.006, "step": 64010 }, { "epoch": 1.0811632384233458, "grad_norm": 0.3352687358856201, "learning_rate": 5.164367182257472e-06, "loss": 0.0112, "step": 64020 }, { "epoch": 1.0813321174046677, "grad_norm": 0.13046909868717194, "learning_rate": 5.162894221771372e-06, "loss": 0.0078, "step": 64030 }, { "epoch": 1.0815009963859898, "grad_norm": 0.11365848034620285, "learning_rate": 5.16142124713342e-06, "loss": 0.0116, "step": 64040 }, { "epoch": 1.0816698753673117, "grad_norm": 0.49339842796325684, "learning_rate": 5.159948258471586e-06, "loss": 0.0084, "step": 64050 }, { "epoch": 1.0818387543486339, "grad_norm": 0.22281862795352936, "learning_rate": 5.1584752559138405e-06, "loss": 0.0042, "step": 64060 }, { "epoch": 1.0820076333299558, "grad_norm": 0.022595230489969254, "learning_rate": 5.157002239588153e-06, "loss": 0.008, "step": 64070 }, { "epoch": 1.0821765123112776, "grad_norm": 0.3098728060722351, "learning_rate": 5.155529209622495e-06, "loss": 0.0093, "step": 64080 }, { "epoch": 1.0823453912925998, "grad_norm": 0.20684176683425903, "learning_rate": 5.15405616614484e-06, "loss": 0.0089, "step": 64090 }, { "epoch": 1.0825142702739217, "grad_norm": 0.16631728410720825, "learning_rate": 5.152583109283162e-06, "loss": 0.0046, "step": 64100 }, { "epoch": 1.0826831492552438, "grad_norm": 0.1589888036251068, "learning_rate": 5.151110039165438e-06, "loss": 0.008, "step": 64110 }, { "epoch": 1.0828520282365657, "grad_norm": 0.21025080978870392, "learning_rate": 5.149636955919646e-06, "loss": 0.0101, "step": 64120 }, { "epoch": 1.0830209072178876, "grad_norm": 0.2506691515445709, "learning_rate": 5.148163859673759e-06, "loss": 0.0064, "step": 64130 }, { "epoch": 1.0831897861992097, "grad_norm": 0.22664818167686462, "learning_rate": 5.146690750555762e-06, "loss": 0.0076, "step": 64140 }, { "epoch": 1.0833586651805316, "grad_norm": 0.30872586369514465, "learning_rate": 5.14521762869363e-06, "loss": 0.0075, "step": 64150 }, { "epoch": 1.0835275441618537, "grad_norm": 0.2837303578853607, "learning_rate": 5.143744494215346e-06, "loss": 0.0101, "step": 64160 }, { "epoch": 1.0836964231431756, "grad_norm": 0.4579952359199524, "learning_rate": 5.142271347248892e-06, "loss": 0.0111, "step": 64170 }, { "epoch": 1.0838653021244975, "grad_norm": 0.15798749029636383, "learning_rate": 5.140798187922253e-06, "loss": 0.0075, "step": 64180 }, { "epoch": 1.0840341811058196, "grad_norm": 0.2197370082139969, "learning_rate": 5.139325016363414e-06, "loss": 0.0078, "step": 64190 }, { "epoch": 1.0842030600871415, "grad_norm": 0.3894931674003601, "learning_rate": 5.137851832700358e-06, "loss": 0.0085, "step": 64200 }, { "epoch": 1.0843719390684636, "grad_norm": 0.27798792719841003, "learning_rate": 5.136378637061071e-06, "loss": 0.0079, "step": 64210 }, { "epoch": 1.0845408180497855, "grad_norm": 0.36836519837379456, "learning_rate": 5.134905429573544e-06, "loss": 0.009, "step": 64220 }, { "epoch": 1.0847096970311074, "grad_norm": 0.2166890949010849, "learning_rate": 5.133432210365762e-06, "loss": 0.008, "step": 64230 }, { "epoch": 1.0848785760124295, "grad_norm": 0.5114786028862, "learning_rate": 5.131958979565719e-06, "loss": 0.0086, "step": 64240 }, { "epoch": 1.0850474549937514, "grad_norm": 0.3101113438606262, "learning_rate": 5.1304857373014e-06, "loss": 0.0078, "step": 64250 }, { "epoch": 1.0852163339750736, "grad_norm": 0.3166721761226654, "learning_rate": 5.129012483700804e-06, "loss": 0.0078, "step": 64260 }, { "epoch": 1.0853852129563955, "grad_norm": 0.43938350677490234, "learning_rate": 5.127539218891916e-06, "loss": 0.0084, "step": 64270 }, { "epoch": 1.0855540919377173, "grad_norm": 0.30345460772514343, "learning_rate": 5.126065943002734e-06, "loss": 0.0086, "step": 64280 }, { "epoch": 1.0857229709190395, "grad_norm": 0.38071316480636597, "learning_rate": 5.124592656161253e-06, "loss": 0.008, "step": 64290 }, { "epoch": 1.0858918499003614, "grad_norm": 0.2279309332370758, "learning_rate": 5.123119358495466e-06, "loss": 0.0064, "step": 64300 }, { "epoch": 1.0860607288816835, "grad_norm": 0.22669093310832977, "learning_rate": 5.121646050133372e-06, "loss": 0.0056, "step": 64310 }, { "epoch": 1.0862296078630054, "grad_norm": 0.3095068335533142, "learning_rate": 5.120172731202967e-06, "loss": 0.0069, "step": 64320 }, { "epoch": 1.0863984868443273, "grad_norm": 0.1299767941236496, "learning_rate": 5.1186994018322485e-06, "loss": 0.0063, "step": 64330 }, { "epoch": 1.0865673658256494, "grad_norm": 0.2944100499153137, "learning_rate": 5.117226062149217e-06, "loss": 0.0062, "step": 64340 }, { "epoch": 1.0867362448069713, "grad_norm": 0.14576838910579681, "learning_rate": 5.115752712281873e-06, "loss": 0.0071, "step": 64350 }, { "epoch": 1.0869051237882934, "grad_norm": 0.21702565252780914, "learning_rate": 5.114279352358218e-06, "loss": 0.0086, "step": 64360 }, { "epoch": 1.0870740027696153, "grad_norm": 0.36353600025177, "learning_rate": 5.1128059825062516e-06, "loss": 0.0083, "step": 64370 }, { "epoch": 1.0872428817509372, "grad_norm": 0.2604784369468689, "learning_rate": 5.111332602853979e-06, "loss": 0.0095, "step": 64380 }, { "epoch": 1.0874117607322593, "grad_norm": 0.3107488751411438, "learning_rate": 5.109859213529401e-06, "loss": 0.0126, "step": 64390 }, { "epoch": 1.0875806397135812, "grad_norm": 0.24918703734874725, "learning_rate": 5.108385814660524e-06, "loss": 0.0077, "step": 64400 }, { "epoch": 1.0877495186949033, "grad_norm": 0.21379266679286957, "learning_rate": 5.106912406375354e-06, "loss": 0.0073, "step": 64410 }, { "epoch": 1.0879183976762252, "grad_norm": 0.13084150850772858, "learning_rate": 5.105438988801896e-06, "loss": 0.0093, "step": 64420 }, { "epoch": 1.0880872766575471, "grad_norm": 0.18018625676631927, "learning_rate": 5.1039655620681565e-06, "loss": 0.005, "step": 64430 }, { "epoch": 1.0882561556388692, "grad_norm": 0.44093945622444153, "learning_rate": 5.102492126302143e-06, "loss": 0.0091, "step": 64440 }, { "epoch": 1.0884250346201911, "grad_norm": 0.26200243830680847, "learning_rate": 5.1010186816318665e-06, "loss": 0.0088, "step": 64450 }, { "epoch": 1.0885939136015133, "grad_norm": 0.23741719126701355, "learning_rate": 5.099545228185333e-06, "loss": 0.007, "step": 64460 }, { "epoch": 1.0887627925828351, "grad_norm": 0.21760453283786774, "learning_rate": 5.098071766090554e-06, "loss": 0.0074, "step": 64470 }, { "epoch": 1.088931671564157, "grad_norm": 0.12578554451465607, "learning_rate": 5.096598295475541e-06, "loss": 0.0071, "step": 64480 }, { "epoch": 1.0891005505454792, "grad_norm": 0.2811545431613922, "learning_rate": 5.095124816468305e-06, "loss": 0.0077, "step": 64490 }, { "epoch": 1.089269429526801, "grad_norm": 0.21505261957645416, "learning_rate": 5.093651329196858e-06, "loss": 0.0064, "step": 64500 }, { "epoch": 1.0894383085081232, "grad_norm": 0.2161913514137268, "learning_rate": 5.092177833789213e-06, "loss": 0.0094, "step": 64510 }, { "epoch": 1.089607187489445, "grad_norm": 0.308852881193161, "learning_rate": 5.090704330373384e-06, "loss": 0.0115, "step": 64520 }, { "epoch": 1.089776066470767, "grad_norm": 0.5373039841651917, "learning_rate": 5.089230819077383e-06, "loss": 0.0084, "step": 64530 }, { "epoch": 1.089944945452089, "grad_norm": 0.2929607629776001, "learning_rate": 5.087757300029228e-06, "loss": 0.0089, "step": 64540 }, { "epoch": 1.090113824433411, "grad_norm": 0.33967939019203186, "learning_rate": 5.086283773356936e-06, "loss": 0.0156, "step": 64550 }, { "epoch": 1.090282703414733, "grad_norm": 0.5182738304138184, "learning_rate": 5.084810239188519e-06, "loss": 0.014, "step": 64560 }, { "epoch": 1.090451582396055, "grad_norm": 0.1513078808784485, "learning_rate": 5.083336697651997e-06, "loss": 0.0099, "step": 64570 }, { "epoch": 1.090620461377377, "grad_norm": 0.24073046445846558, "learning_rate": 5.081863148875387e-06, "loss": 0.0091, "step": 64580 }, { "epoch": 1.090789340358699, "grad_norm": 0.2346435785293579, "learning_rate": 5.080389592986706e-06, "loss": 0.007, "step": 64590 }, { "epoch": 1.090958219340021, "grad_norm": 0.22649729251861572, "learning_rate": 5.078916030113975e-06, "loss": 0.0086, "step": 64600 }, { "epoch": 1.091127098321343, "grad_norm": 0.1570674329996109, "learning_rate": 5.077442460385214e-06, "loss": 0.0049, "step": 64610 }, { "epoch": 1.091295977302665, "grad_norm": 0.5455947518348694, "learning_rate": 5.07596888392844e-06, "loss": 0.01, "step": 64620 }, { "epoch": 1.0914648562839868, "grad_norm": 0.5003747344017029, "learning_rate": 5.0744953008716754e-06, "loss": 0.01, "step": 64630 }, { "epoch": 1.091633735265309, "grad_norm": 0.12199120223522186, "learning_rate": 5.073021711342942e-06, "loss": 0.0066, "step": 64640 }, { "epoch": 1.0918026142466308, "grad_norm": 0.2691020369529724, "learning_rate": 5.0715481154702615e-06, "loss": 0.006, "step": 64650 }, { "epoch": 1.091971493227953, "grad_norm": 0.16651664674282074, "learning_rate": 5.070074513381655e-06, "loss": 0.0051, "step": 64660 }, { "epoch": 1.0921403722092748, "grad_norm": 0.1533619463443756, "learning_rate": 5.068600905205149e-06, "loss": 0.0099, "step": 64670 }, { "epoch": 1.0923092511905967, "grad_norm": 0.18285511434078217, "learning_rate": 5.067127291068763e-06, "loss": 0.012, "step": 64680 }, { "epoch": 1.0924781301719189, "grad_norm": 0.21354830265045166, "learning_rate": 5.065653671100522e-06, "loss": 0.0062, "step": 64690 }, { "epoch": 1.0926470091532408, "grad_norm": 0.27975162863731384, "learning_rate": 5.0641800454284505e-06, "loss": 0.0092, "step": 64700 }, { "epoch": 1.0928158881345629, "grad_norm": 0.3840157687664032, "learning_rate": 5.062706414180576e-06, "loss": 0.0128, "step": 64710 }, { "epoch": 1.0929847671158848, "grad_norm": 0.3034083843231201, "learning_rate": 5.06123277748492e-06, "loss": 0.0093, "step": 64720 }, { "epoch": 1.0931536460972067, "grad_norm": 0.3298279643058777, "learning_rate": 5.059759135469512e-06, "loss": 0.0097, "step": 64730 }, { "epoch": 1.0933225250785288, "grad_norm": 0.26168331503868103, "learning_rate": 5.058285488262375e-06, "loss": 0.009, "step": 64740 }, { "epoch": 1.0934914040598507, "grad_norm": 0.3693603575229645, "learning_rate": 5.056811835991539e-06, "loss": 0.0074, "step": 64750 }, { "epoch": 1.0936602830411728, "grad_norm": 0.10155957192182541, "learning_rate": 5.0553381787850275e-06, "loss": 0.0077, "step": 64760 }, { "epoch": 1.0938291620224947, "grad_norm": 0.11437680572271347, "learning_rate": 5.053864516770873e-06, "loss": 0.0086, "step": 64770 }, { "epoch": 1.0939980410038166, "grad_norm": 0.38230451941490173, "learning_rate": 5.0523908500771e-06, "loss": 0.0085, "step": 64780 }, { "epoch": 1.0941669199851387, "grad_norm": 0.2659294903278351, "learning_rate": 5.05091717883174e-06, "loss": 0.0094, "step": 64790 }, { "epoch": 1.0943357989664606, "grad_norm": 0.1943846046924591, "learning_rate": 5.049443503162819e-06, "loss": 0.0072, "step": 64800 }, { "epoch": 1.0945046779477827, "grad_norm": 0.11932999640703201, "learning_rate": 5.0479698231983684e-06, "loss": 0.0122, "step": 64810 }, { "epoch": 1.0946735569291046, "grad_norm": 0.22662733495235443, "learning_rate": 5.0464961390664145e-06, "loss": 0.0077, "step": 64820 }, { "epoch": 1.0948424359104265, "grad_norm": 0.26488131284713745, "learning_rate": 5.045022450894993e-06, "loss": 0.0062, "step": 64830 }, { "epoch": 1.0950113148917486, "grad_norm": 0.16562853753566742, "learning_rate": 5.04354875881213e-06, "loss": 0.0085, "step": 64840 }, { "epoch": 1.0951801938730705, "grad_norm": 0.24520458281040192, "learning_rate": 5.04207506294586e-06, "loss": 0.0067, "step": 64850 }, { "epoch": 1.0953490728543924, "grad_norm": 0.53618985414505, "learning_rate": 5.040601363424208e-06, "loss": 0.0103, "step": 64860 }, { "epoch": 1.0955179518357145, "grad_norm": 0.3643192648887634, "learning_rate": 5.039127660375212e-06, "loss": 0.0063, "step": 64870 }, { "epoch": 1.0956868308170364, "grad_norm": 0.40187132358551025, "learning_rate": 5.0376539539268965e-06, "loss": 0.008, "step": 64880 }, { "epoch": 1.0958557097983586, "grad_norm": 0.18655341863632202, "learning_rate": 5.0361802442073e-06, "loss": 0.0126, "step": 64890 }, { "epoch": 1.0960245887796805, "grad_norm": 0.3047480583190918, "learning_rate": 5.034706531344454e-06, "loss": 0.0097, "step": 64900 }, { "epoch": 1.0961934677610023, "grad_norm": 0.38092225790023804, "learning_rate": 5.033232815466389e-06, "loss": 0.0071, "step": 64910 }, { "epoch": 1.0963623467423245, "grad_norm": 0.2123788744211197, "learning_rate": 5.031759096701139e-06, "loss": 0.0095, "step": 64920 }, { "epoch": 1.0965312257236464, "grad_norm": 0.3706580698490143, "learning_rate": 5.030285375176734e-06, "loss": 0.0137, "step": 64930 }, { "epoch": 1.0967001047049685, "grad_norm": 0.15878520905971527, "learning_rate": 5.0288116510212114e-06, "loss": 0.009, "step": 64940 }, { "epoch": 1.0968689836862904, "grad_norm": 0.5923088788986206, "learning_rate": 5.027337924362603e-06, "loss": 0.0104, "step": 64950 }, { "epoch": 1.0970378626676123, "grad_norm": 0.24444551765918732, "learning_rate": 5.025864195328943e-06, "loss": 0.0082, "step": 64960 }, { "epoch": 1.0972067416489344, "grad_norm": 0.16826897859573364, "learning_rate": 5.0243904640482655e-06, "loss": 0.0071, "step": 64970 }, { "epoch": 1.0973756206302563, "grad_norm": 0.23001888394355774, "learning_rate": 5.022916730648604e-06, "loss": 0.0055, "step": 64980 }, { "epoch": 1.0975444996115784, "grad_norm": 0.24522250890731812, "learning_rate": 5.021442995257993e-06, "loss": 0.0099, "step": 64990 }, { "epoch": 1.0977133785929003, "grad_norm": 0.801227867603302, "learning_rate": 5.019969258004466e-06, "loss": 0.0084, "step": 65000 }, { "epoch": 1.0978822575742222, "grad_norm": 0.25491607189178467, "learning_rate": 5.018495519016059e-06, "loss": 0.0095, "step": 65010 }, { "epoch": 1.0980511365555443, "grad_norm": 0.3206952214241028, "learning_rate": 5.017021778420809e-06, "loss": 0.0072, "step": 65020 }, { "epoch": 1.0982200155368662, "grad_norm": 0.6014628410339355, "learning_rate": 5.0155480363467455e-06, "loss": 0.0109, "step": 65030 }, { "epoch": 1.0983888945181883, "grad_norm": 0.3466830849647522, "learning_rate": 5.0140742929219084e-06, "loss": 0.0115, "step": 65040 }, { "epoch": 1.0985577734995102, "grad_norm": 0.45457178354263306, "learning_rate": 5.012600548274331e-06, "loss": 0.0072, "step": 65050 }, { "epoch": 1.0987266524808321, "grad_norm": 0.2625296711921692, "learning_rate": 5.011126802532048e-06, "loss": 0.0074, "step": 65060 }, { "epoch": 1.0988955314621542, "grad_norm": 0.19744707643985748, "learning_rate": 5.009653055823097e-06, "loss": 0.0089, "step": 65070 }, { "epoch": 1.0990644104434761, "grad_norm": 0.3044535517692566, "learning_rate": 5.0081793082755115e-06, "loss": 0.0058, "step": 65080 }, { "epoch": 1.0992332894247983, "grad_norm": 0.2821374833583832, "learning_rate": 5.0067055600173274e-06, "loss": 0.0153, "step": 65090 }, { "epoch": 1.0994021684061202, "grad_norm": 0.2759559750556946, "learning_rate": 5.005231811176582e-06, "loss": 0.0113, "step": 65100 }, { "epoch": 1.099571047387442, "grad_norm": 0.23366910219192505, "learning_rate": 5.003758061881309e-06, "loss": 0.0055, "step": 65110 }, { "epoch": 1.0997399263687642, "grad_norm": 0.22166918218135834, "learning_rate": 5.002284312259545e-06, "loss": 0.0076, "step": 65120 }, { "epoch": 1.099908805350086, "grad_norm": 0.17196524143218994, "learning_rate": 5.000810562439323e-06, "loss": 0.0065, "step": 65130 }, { "epoch": 1.1000776843314082, "grad_norm": 1.1922054290771484, "learning_rate": 4.999336812548684e-06, "loss": 0.0089, "step": 65140 }, { "epoch": 1.10024656331273, "grad_norm": 0.30771398544311523, "learning_rate": 4.9978630627156625e-06, "loss": 0.0084, "step": 65150 }, { "epoch": 1.100415442294052, "grad_norm": 0.42340680956840515, "learning_rate": 4.99638931306829e-06, "loss": 0.0121, "step": 65160 }, { "epoch": 1.100584321275374, "grad_norm": 0.8784505128860474, "learning_rate": 4.9949155637346055e-06, "loss": 0.0096, "step": 65170 }, { "epoch": 1.100753200256696, "grad_norm": 0.7296611666679382, "learning_rate": 4.993441814842645e-06, "loss": 0.0118, "step": 65180 }, { "epoch": 1.100922079238018, "grad_norm": 0.2074946165084839, "learning_rate": 4.991968066520444e-06, "loss": 0.0073, "step": 65190 }, { "epoch": 1.10109095821934, "grad_norm": 0.16048216819763184, "learning_rate": 4.990494318896037e-06, "loss": 0.0076, "step": 65200 }, { "epoch": 1.101259837200662, "grad_norm": 0.30493247509002686, "learning_rate": 4.98902057209746e-06, "loss": 0.0054, "step": 65210 }, { "epoch": 1.101428716181984, "grad_norm": 0.2592158019542694, "learning_rate": 4.987546826252749e-06, "loss": 0.007, "step": 65220 }, { "epoch": 1.101597595163306, "grad_norm": 0.16738925874233246, "learning_rate": 4.98607308148994e-06, "loss": 0.0054, "step": 65230 }, { "epoch": 1.101766474144628, "grad_norm": 0.11649847030639648, "learning_rate": 4.984599337937067e-06, "loss": 0.0055, "step": 65240 }, { "epoch": 1.10193535312595, "grad_norm": 0.01307998038828373, "learning_rate": 4.9831255957221654e-06, "loss": 0.0109, "step": 65250 }, { "epoch": 1.1021042321072718, "grad_norm": 0.33460330963134766, "learning_rate": 4.981651854973271e-06, "loss": 0.0073, "step": 65260 }, { "epoch": 1.102273111088594, "grad_norm": 0.2907170355319977, "learning_rate": 4.98017811581842e-06, "loss": 0.0136, "step": 65270 }, { "epoch": 1.1024419900699158, "grad_norm": 0.135965496301651, "learning_rate": 4.978704378385644e-06, "loss": 0.0125, "step": 65280 }, { "epoch": 1.102610869051238, "grad_norm": 0.21073366701602936, "learning_rate": 4.977230642802979e-06, "loss": 0.0131, "step": 65290 }, { "epoch": 1.1027797480325598, "grad_norm": 0.26622018218040466, "learning_rate": 4.97575690919846e-06, "loss": 0.0103, "step": 65300 }, { "epoch": 1.1029486270138817, "grad_norm": 0.389342337846756, "learning_rate": 4.974283177700123e-06, "loss": 0.0105, "step": 65310 }, { "epoch": 1.1031175059952039, "grad_norm": 0.2740905284881592, "learning_rate": 4.972809448436e-06, "loss": 0.0074, "step": 65320 }, { "epoch": 1.1032863849765258, "grad_norm": 0.39481863379478455, "learning_rate": 4.971335721534124e-06, "loss": 0.0082, "step": 65330 }, { "epoch": 1.1034552639578479, "grad_norm": 0.35922369360923767, "learning_rate": 4.969861997122531e-06, "loss": 0.0086, "step": 65340 }, { "epoch": 1.1036241429391698, "grad_norm": 0.3984062373638153, "learning_rate": 4.968388275329254e-06, "loss": 0.0072, "step": 65350 }, { "epoch": 1.1037930219204917, "grad_norm": 0.37357842922210693, "learning_rate": 4.966914556282327e-06, "loss": 0.0095, "step": 65360 }, { "epoch": 1.1039619009018138, "grad_norm": 0.5445627570152283, "learning_rate": 4.965440840109781e-06, "loss": 0.015, "step": 65370 }, { "epoch": 1.1041307798831357, "grad_norm": 0.3871108293533325, "learning_rate": 4.963967126939651e-06, "loss": 0.0079, "step": 65380 }, { "epoch": 1.1042996588644578, "grad_norm": 0.1970381736755371, "learning_rate": 4.962493416899968e-06, "loss": 0.0075, "step": 65390 }, { "epoch": 1.1044685378457797, "grad_norm": 0.2385249137878418, "learning_rate": 4.961019710118767e-06, "loss": 0.0116, "step": 65400 }, { "epoch": 1.1046374168271016, "grad_norm": 0.3592974841594696, "learning_rate": 4.959546006724076e-06, "loss": 0.0073, "step": 65410 }, { "epoch": 1.1048062958084237, "grad_norm": 0.4237782061100006, "learning_rate": 4.958072306843929e-06, "loss": 0.0072, "step": 65420 }, { "epoch": 1.1049751747897456, "grad_norm": 0.289541631937027, "learning_rate": 4.956598610606356e-06, "loss": 0.008, "step": 65430 }, { "epoch": 1.1051440537710677, "grad_norm": 0.3747820556163788, "learning_rate": 4.955124918139393e-06, "loss": 0.0074, "step": 65440 }, { "epoch": 1.1053129327523896, "grad_norm": 0.09555180370807648, "learning_rate": 4.953651229571064e-06, "loss": 0.0113, "step": 65450 }, { "epoch": 1.1054818117337115, "grad_norm": 0.6429426670074463, "learning_rate": 4.952177545029404e-06, "loss": 0.0069, "step": 65460 }, { "epoch": 1.1056506907150336, "grad_norm": 0.27546432614326477, "learning_rate": 4.95070386464244e-06, "loss": 0.0125, "step": 65470 }, { "epoch": 1.1058195696963555, "grad_norm": 0.08389178663492203, "learning_rate": 4.949230188538206e-06, "loss": 0.0077, "step": 65480 }, { "epoch": 1.1059884486776776, "grad_norm": 0.3384682536125183, "learning_rate": 4.947756516844728e-06, "loss": 0.0072, "step": 65490 }, { "epoch": 1.1061573276589995, "grad_norm": 0.37307554483413696, "learning_rate": 4.946282849690035e-06, "loss": 0.0055, "step": 65500 }, { "epoch": 1.1063262066403214, "grad_norm": 0.16888302564620972, "learning_rate": 4.944809187202156e-06, "loss": 0.0063, "step": 65510 }, { "epoch": 1.1064950856216436, "grad_norm": 0.3835907578468323, "learning_rate": 4.943335529509122e-06, "loss": 0.0093, "step": 65520 }, { "epoch": 1.1066639646029655, "grad_norm": 1.0279337167739868, "learning_rate": 4.941861876738957e-06, "loss": 0.0157, "step": 65530 }, { "epoch": 1.1068328435842876, "grad_norm": 0.22645534574985504, "learning_rate": 4.940388229019689e-06, "loss": 0.0086, "step": 65540 }, { "epoch": 1.1070017225656095, "grad_norm": 0.3834129869937897, "learning_rate": 4.938914586479345e-06, "loss": 0.0093, "step": 65550 }, { "epoch": 1.1071706015469314, "grad_norm": 0.1281002312898636, "learning_rate": 4.937440949245954e-06, "loss": 0.0056, "step": 65560 }, { "epoch": 1.1073394805282535, "grad_norm": 0.38878458738327026, "learning_rate": 4.935967317447539e-06, "loss": 0.0082, "step": 65570 }, { "epoch": 1.1075083595095754, "grad_norm": 0.24966038763523102, "learning_rate": 4.934493691212127e-06, "loss": 0.0057, "step": 65580 }, { "epoch": 1.1076772384908975, "grad_norm": 0.2943115830421448, "learning_rate": 4.933020070667742e-06, "loss": 0.0085, "step": 65590 }, { "epoch": 1.1078461174722194, "grad_norm": 0.23721031844615936, "learning_rate": 4.9315464559424095e-06, "loss": 0.0117, "step": 65600 }, { "epoch": 1.1080149964535413, "grad_norm": 0.06986691057682037, "learning_rate": 4.930072847164153e-06, "loss": 0.0073, "step": 65610 }, { "epoch": 1.1081838754348634, "grad_norm": 0.22105063498020172, "learning_rate": 4.928599244460996e-06, "loss": 0.0085, "step": 65620 }, { "epoch": 1.1083527544161853, "grad_norm": 0.21381188929080963, "learning_rate": 4.927125647960962e-06, "loss": 0.0048, "step": 65630 }, { "epoch": 1.1085216333975074, "grad_norm": 0.3844258785247803, "learning_rate": 4.925652057792074e-06, "loss": 0.0121, "step": 65640 }, { "epoch": 1.1086905123788293, "grad_norm": 0.19224731624126434, "learning_rate": 4.924178474082353e-06, "loss": 0.0044, "step": 65650 }, { "epoch": 1.1088593913601512, "grad_norm": 0.4105004370212555, "learning_rate": 4.922704896959818e-06, "loss": 0.0066, "step": 65660 }, { "epoch": 1.1090282703414733, "grad_norm": 0.1616693139076233, "learning_rate": 4.921231326552492e-06, "loss": 0.0065, "step": 65670 }, { "epoch": 1.1091971493227952, "grad_norm": 0.3330088257789612, "learning_rate": 4.919757762988399e-06, "loss": 0.0072, "step": 65680 }, { "epoch": 1.1093660283041173, "grad_norm": 0.5581793785095215, "learning_rate": 4.918284206395553e-06, "loss": 0.0122, "step": 65690 }, { "epoch": 1.1095349072854392, "grad_norm": 0.19671468436717987, "learning_rate": 4.916810656901974e-06, "loss": 0.0071, "step": 65700 }, { "epoch": 1.1097037862667611, "grad_norm": 0.2872672379016876, "learning_rate": 4.915337114635682e-06, "loss": 0.0125, "step": 65710 }, { "epoch": 1.1098726652480833, "grad_norm": 0.3490653932094574, "learning_rate": 4.913863579724695e-06, "loss": 0.0094, "step": 65720 }, { "epoch": 1.1100415442294052, "grad_norm": 0.06535673141479492, "learning_rate": 4.912390052297028e-06, "loss": 0.0065, "step": 65730 }, { "epoch": 1.1102104232107273, "grad_norm": 0.24840447306632996, "learning_rate": 4.910916532480698e-06, "loss": 0.006, "step": 65740 }, { "epoch": 1.1103793021920492, "grad_norm": 0.40061822533607483, "learning_rate": 4.909443020403723e-06, "loss": 0.0087, "step": 65750 }, { "epoch": 1.110548181173371, "grad_norm": 0.11030689626932144, "learning_rate": 4.9079695161941165e-06, "loss": 0.0086, "step": 65760 }, { "epoch": 1.1107170601546932, "grad_norm": 0.23537763953208923, "learning_rate": 4.906496019979892e-06, "loss": 0.0107, "step": 65770 }, { "epoch": 1.110885939136015, "grad_norm": 0.21458689868450165, "learning_rate": 4.905022531889063e-06, "loss": 0.007, "step": 65780 }, { "epoch": 1.1110548181173372, "grad_norm": 0.2992173135280609, "learning_rate": 4.903549052049644e-06, "loss": 0.0088, "step": 65790 }, { "epoch": 1.111223697098659, "grad_norm": 0.16462342441082, "learning_rate": 4.902075580589648e-06, "loss": 0.0075, "step": 65800 }, { "epoch": 1.111392576079981, "grad_norm": 0.19640593230724335, "learning_rate": 4.9006021176370875e-06, "loss": 0.0105, "step": 65810 }, { "epoch": 1.111561455061303, "grad_norm": 0.2558022737503052, "learning_rate": 4.899128663319969e-06, "loss": 0.0049, "step": 65820 }, { "epoch": 1.111730334042625, "grad_norm": 0.12805849313735962, "learning_rate": 4.897655217766306e-06, "loss": 0.0074, "step": 65830 }, { "epoch": 1.1118992130239471, "grad_norm": 0.2369185984134674, "learning_rate": 4.896181781104106e-06, "loss": 0.0088, "step": 65840 }, { "epoch": 1.112068092005269, "grad_norm": 0.2160257250070572, "learning_rate": 4.89470835346138e-06, "loss": 0.0115, "step": 65850 }, { "epoch": 1.112236970986591, "grad_norm": 0.22266682982444763, "learning_rate": 4.8932349349661324e-06, "loss": 0.0055, "step": 65860 }, { "epoch": 1.112405849967913, "grad_norm": 0.2219821661710739, "learning_rate": 4.891761525746373e-06, "loss": 0.0098, "step": 65870 }, { "epoch": 1.112574728949235, "grad_norm": 0.3065343499183655, "learning_rate": 4.890288125930106e-06, "loss": 0.0069, "step": 65880 }, { "epoch": 1.112743607930557, "grad_norm": 0.14732633531093597, "learning_rate": 4.888814735645341e-06, "loss": 0.0059, "step": 65890 }, { "epoch": 1.112912486911879, "grad_norm": 0.09766492247581482, "learning_rate": 4.8873413550200755e-06, "loss": 0.0054, "step": 65900 }, { "epoch": 1.1130813658932008, "grad_norm": 0.2690957188606262, "learning_rate": 4.885867984182318e-06, "loss": 0.0063, "step": 65910 }, { "epoch": 1.113250244874523, "grad_norm": 0.24883396923542023, "learning_rate": 4.88439462326007e-06, "loss": 0.0094, "step": 65920 }, { "epoch": 1.1134191238558449, "grad_norm": 0.25195181369781494, "learning_rate": 4.882921272381337e-06, "loss": 0.012, "step": 65930 }, { "epoch": 1.113588002837167, "grad_norm": 0.41682755947113037, "learning_rate": 4.8814479316741144e-06, "loss": 0.0109, "step": 65940 }, { "epoch": 1.1137568818184889, "grad_norm": 0.30446097254753113, "learning_rate": 4.879974601266405e-06, "loss": 0.0116, "step": 65950 }, { "epoch": 1.1139257607998108, "grad_norm": 0.220688134431839, "learning_rate": 4.878501281286208e-06, "loss": 0.0084, "step": 65960 }, { "epoch": 1.1140946397811329, "grad_norm": 0.3050573170185089, "learning_rate": 4.877027971861523e-06, "loss": 0.0071, "step": 65970 }, { "epoch": 1.1142635187624548, "grad_norm": 0.36831653118133545, "learning_rate": 4.875554673120345e-06, "loss": 0.0079, "step": 65980 }, { "epoch": 1.114432397743777, "grad_norm": 0.390803724527359, "learning_rate": 4.874081385190673e-06, "loss": 0.0105, "step": 65990 }, { "epoch": 1.1146012767250988, "grad_norm": 0.1786070615053177, "learning_rate": 4.872608108200502e-06, "loss": 0.0068, "step": 66000 }, { "epoch": 1.1147701557064207, "grad_norm": 0.15264828503131866, "learning_rate": 4.8711348422778275e-06, "loss": 0.007, "step": 66010 }, { "epoch": 1.1149390346877428, "grad_norm": 1.2446224689483643, "learning_rate": 4.869661587550639e-06, "loss": 0.0077, "step": 66020 }, { "epoch": 1.1151079136690647, "grad_norm": 0.38719287514686584, "learning_rate": 4.868188344146934e-06, "loss": 0.0074, "step": 66030 }, { "epoch": 1.1152767926503868, "grad_norm": 0.1065649762749672, "learning_rate": 4.8667151121947025e-06, "loss": 0.0088, "step": 66040 }, { "epoch": 1.1154456716317087, "grad_norm": 0.2620736062526703, "learning_rate": 4.865241891821937e-06, "loss": 0.0091, "step": 66050 }, { "epoch": 1.1156145506130306, "grad_norm": 0.20533917844295502, "learning_rate": 4.863768683156625e-06, "loss": 0.012, "step": 66060 }, { "epoch": 1.1157834295943527, "grad_norm": 0.19232794642448425, "learning_rate": 4.862295486326755e-06, "loss": 0.0059, "step": 66070 }, { "epoch": 1.1159523085756746, "grad_norm": 0.17090369760990143, "learning_rate": 4.860822301460315e-06, "loss": 0.0063, "step": 66080 }, { "epoch": 1.1161211875569967, "grad_norm": 0.22430239617824554, "learning_rate": 4.859349128685295e-06, "loss": 0.0072, "step": 66090 }, { "epoch": 1.1162900665383186, "grad_norm": 0.51833575963974, "learning_rate": 4.857875968129676e-06, "loss": 0.0094, "step": 66100 }, { "epoch": 1.1164589455196405, "grad_norm": 0.23637375235557556, "learning_rate": 4.856402819921446e-06, "loss": 0.0072, "step": 66110 }, { "epoch": 1.1166278245009627, "grad_norm": 0.16183899343013763, "learning_rate": 4.8549296841885855e-06, "loss": 0.006, "step": 66120 }, { "epoch": 1.1167967034822845, "grad_norm": 0.0925481840968132, "learning_rate": 4.853456561059079e-06, "loss": 0.0083, "step": 66130 }, { "epoch": 1.1169655824636067, "grad_norm": 0.30956628918647766, "learning_rate": 4.851983450660908e-06, "loss": 0.0079, "step": 66140 }, { "epoch": 1.1171344614449286, "grad_norm": 0.2720670998096466, "learning_rate": 4.850510353122052e-06, "loss": 0.0086, "step": 66150 }, { "epoch": 1.1173033404262505, "grad_norm": 0.25475233793258667, "learning_rate": 4.849037268570489e-06, "loss": 0.0075, "step": 66160 }, { "epoch": 1.1174722194075726, "grad_norm": 0.3151414692401886, "learning_rate": 4.847564197134202e-06, "loss": 0.0095, "step": 66170 }, { "epoch": 1.1176410983888945, "grad_norm": 0.28757190704345703, "learning_rate": 4.846091138941161e-06, "loss": 0.0076, "step": 66180 }, { "epoch": 1.1178099773702166, "grad_norm": 0.3148218095302582, "learning_rate": 4.844618094119344e-06, "loss": 0.0081, "step": 66190 }, { "epoch": 1.1179788563515385, "grad_norm": 0.3707195222377777, "learning_rate": 4.843145062796727e-06, "loss": 0.0121, "step": 66200 }, { "epoch": 1.1181477353328604, "grad_norm": 0.1620979756116867, "learning_rate": 4.841672045101285e-06, "loss": 0.0078, "step": 66210 }, { "epoch": 1.1183166143141825, "grad_norm": 0.29850801825523376, "learning_rate": 4.840199041160985e-06, "loss": 0.0096, "step": 66220 }, { "epoch": 1.1184854932955044, "grad_norm": 0.1868313103914261, "learning_rate": 4.838726051103801e-06, "loss": 0.007, "step": 66230 }, { "epoch": 1.1186543722768265, "grad_norm": 0.1574598103761673, "learning_rate": 4.837253075057702e-06, "loss": 0.0054, "step": 66240 }, { "epoch": 1.1188232512581484, "grad_norm": 0.4170683026313782, "learning_rate": 4.835780113150658e-06, "loss": 0.0112, "step": 66250 }, { "epoch": 1.1189921302394703, "grad_norm": 0.12076272815465927, "learning_rate": 4.8343071655106364e-06, "loss": 0.0083, "step": 66260 }, { "epoch": 1.1191610092207924, "grad_norm": 0.06867657601833344, "learning_rate": 4.8328342322656e-06, "loss": 0.0045, "step": 66270 }, { "epoch": 1.1193298882021143, "grad_norm": 0.6497325897216797, "learning_rate": 4.831361313543517e-06, "loss": 0.0112, "step": 66280 }, { "epoch": 1.1194987671834364, "grad_norm": 0.1054227203130722, "learning_rate": 4.8298884094723495e-06, "loss": 0.0075, "step": 66290 }, { "epoch": 1.1196676461647583, "grad_norm": 0.19700537621974945, "learning_rate": 4.828415520180063e-06, "loss": 0.0098, "step": 66300 }, { "epoch": 1.1198365251460802, "grad_norm": 0.6097468137741089, "learning_rate": 4.8269426457946125e-06, "loss": 0.0094, "step": 66310 }, { "epoch": 1.1200054041274023, "grad_norm": 0.1746620088815689, "learning_rate": 4.825469786443961e-06, "loss": 0.0104, "step": 66320 }, { "epoch": 1.1201742831087242, "grad_norm": 0.13545122742652893, "learning_rate": 4.823996942256067e-06, "loss": 0.0084, "step": 66330 }, { "epoch": 1.1203431620900464, "grad_norm": 0.3595692813396454, "learning_rate": 4.822524113358889e-06, "loss": 0.0088, "step": 66340 }, { "epoch": 1.1205120410713683, "grad_norm": 0.32351091504096985, "learning_rate": 4.82105129988038e-06, "loss": 0.0102, "step": 66350 }, { "epoch": 1.1206809200526902, "grad_norm": 0.30524879693984985, "learning_rate": 4.819578501948495e-06, "loss": 0.0051, "step": 66360 }, { "epoch": 1.1208497990340123, "grad_norm": 0.17988651990890503, "learning_rate": 4.818105719691187e-06, "loss": 0.0077, "step": 66370 }, { "epoch": 1.1210186780153342, "grad_norm": 0.13240450620651245, "learning_rate": 4.8166329532364105e-06, "loss": 0.0079, "step": 66380 }, { "epoch": 1.1211875569966563, "grad_norm": 0.21142126619815826, "learning_rate": 4.815160202712113e-06, "loss": 0.0083, "step": 66390 }, { "epoch": 1.1213564359779782, "grad_norm": 0.2506466209888458, "learning_rate": 4.8136874682462435e-06, "loss": 0.0074, "step": 66400 }, { "epoch": 1.1215253149593, "grad_norm": 0.4706380069255829, "learning_rate": 4.8122147499667495e-06, "loss": 0.0089, "step": 66410 }, { "epoch": 1.1216941939406222, "grad_norm": 0.3651686906814575, "learning_rate": 4.810742048001581e-06, "loss": 0.0089, "step": 66420 }, { "epoch": 1.121863072921944, "grad_norm": 0.4070417582988739, "learning_rate": 4.809269362478677e-06, "loss": 0.0124, "step": 66430 }, { "epoch": 1.1220319519032662, "grad_norm": 0.35082972049713135, "learning_rate": 4.807796693525984e-06, "loss": 0.0071, "step": 66440 }, { "epoch": 1.122200830884588, "grad_norm": 0.37658411264419556, "learning_rate": 4.806324041271443e-06, "loss": 0.0065, "step": 66450 }, { "epoch": 1.12236970986591, "grad_norm": 0.4784534275531769, "learning_rate": 4.804851405842995e-06, "loss": 0.0112, "step": 66460 }, { "epoch": 1.1225385888472321, "grad_norm": 0.5621770620346069, "learning_rate": 4.803378787368579e-06, "loss": 0.0092, "step": 66470 }, { "epoch": 1.122707467828554, "grad_norm": 0.1361473947763443, "learning_rate": 4.801906185976131e-06, "loss": 0.0081, "step": 66480 }, { "epoch": 1.1228763468098761, "grad_norm": 0.32393065094947815, "learning_rate": 4.8004336017935894e-06, "loss": 0.011, "step": 66490 }, { "epoch": 1.123045225791198, "grad_norm": 0.35338032245635986, "learning_rate": 4.798961034948888e-06, "loss": 0.0099, "step": 66500 }, { "epoch": 1.12321410477252, "grad_norm": 0.10926263779401779, "learning_rate": 4.797488485569957e-06, "loss": 0.0069, "step": 66510 }, { "epoch": 1.123382983753842, "grad_norm": 0.6194117069244385, "learning_rate": 4.796015953784732e-06, "loss": 0.0099, "step": 66520 }, { "epoch": 1.123551862735164, "grad_norm": 0.5273902416229248, "learning_rate": 4.79454343972114e-06, "loss": 0.006, "step": 66530 }, { "epoch": 1.123720741716486, "grad_norm": 0.15648990869522095, "learning_rate": 4.793070943507114e-06, "loss": 0.0054, "step": 66540 }, { "epoch": 1.123889620697808, "grad_norm": 0.2822222113609314, "learning_rate": 4.7915984652705735e-06, "loss": 0.0087, "step": 66550 }, { "epoch": 1.1240584996791299, "grad_norm": 0.19895018637180328, "learning_rate": 4.790126005139448e-06, "loss": 0.006, "step": 66560 }, { "epoch": 1.124227378660452, "grad_norm": 0.21445763111114502, "learning_rate": 4.78865356324166e-06, "loss": 0.0069, "step": 66570 }, { "epoch": 1.1243962576417739, "grad_norm": 0.1351335346698761, "learning_rate": 4.787181139705136e-06, "loss": 0.0111, "step": 66580 }, { "epoch": 1.124565136623096, "grad_norm": 0.2564261555671692, "learning_rate": 4.785708734657792e-06, "loss": 0.0098, "step": 66590 }, { "epoch": 1.1247340156044179, "grad_norm": 0.3055364191532135, "learning_rate": 4.784236348227546e-06, "loss": 0.0061, "step": 66600 }, { "epoch": 1.1249028945857398, "grad_norm": 0.24688449501991272, "learning_rate": 4.782763980542318e-06, "loss": 0.0069, "step": 66610 }, { "epoch": 1.125071773567062, "grad_norm": 0.18588003516197205, "learning_rate": 4.781291631730025e-06, "loss": 0.0088, "step": 66620 }, { "epoch": 1.1252406525483838, "grad_norm": 0.2518293261528015, "learning_rate": 4.779819301918577e-06, "loss": 0.0111, "step": 66630 }, { "epoch": 1.125409531529706, "grad_norm": 0.25423189997673035, "learning_rate": 4.778346991235889e-06, "loss": 0.0081, "step": 66640 }, { "epoch": 1.1255784105110278, "grad_norm": 0.3092019259929657, "learning_rate": 4.776874699809872e-06, "loss": 0.0111, "step": 66650 }, { "epoch": 1.1257472894923497, "grad_norm": 0.20538298785686493, "learning_rate": 4.7754024277684356e-06, "loss": 0.0082, "step": 66660 }, { "epoch": 1.1259161684736718, "grad_norm": 0.2610275447368622, "learning_rate": 4.773930175239483e-06, "loss": 0.0085, "step": 66670 }, { "epoch": 1.1260850474549937, "grad_norm": 0.17519573867321014, "learning_rate": 4.772457942350924e-06, "loss": 0.0109, "step": 66680 }, { "epoch": 1.1262539264363158, "grad_norm": 0.457217812538147, "learning_rate": 4.77098572923066e-06, "loss": 0.0115, "step": 66690 }, { "epoch": 1.1264228054176377, "grad_norm": 0.34682103991508484, "learning_rate": 4.769513536006596e-06, "loss": 0.0107, "step": 66700 }, { "epoch": 1.1265916843989596, "grad_norm": 0.2683617174625397, "learning_rate": 4.768041362806633e-06, "loss": 0.0078, "step": 66710 }, { "epoch": 1.1267605633802817, "grad_norm": 0.29283207654953003, "learning_rate": 4.766569209758666e-06, "loss": 0.0095, "step": 66720 }, { "epoch": 1.1269294423616036, "grad_norm": 0.475791335105896, "learning_rate": 4.765097076990594e-06, "loss": 0.0113, "step": 66730 }, { "epoch": 1.1270983213429258, "grad_norm": 0.2856588363647461, "learning_rate": 4.763624964630313e-06, "loss": 0.0104, "step": 66740 }, { "epoch": 1.1272672003242477, "grad_norm": 0.33088091015815735, "learning_rate": 4.762152872805716e-06, "loss": 0.0119, "step": 66750 }, { "epoch": 1.1274360793055695, "grad_norm": 0.16190795600414276, "learning_rate": 4.760680801644694e-06, "loss": 0.0094, "step": 66760 }, { "epoch": 1.1276049582868917, "grad_norm": 0.17243948578834534, "learning_rate": 4.759208751275138e-06, "loss": 0.0085, "step": 66770 }, { "epoch": 1.1277738372682136, "grad_norm": 0.24644550681114197, "learning_rate": 4.757736721824935e-06, "loss": 0.0082, "step": 66780 }, { "epoch": 1.1279427162495357, "grad_norm": 0.24362532794475555, "learning_rate": 4.756264713421974e-06, "loss": 0.0124, "step": 66790 }, { "epoch": 1.1281115952308576, "grad_norm": 0.20677950978279114, "learning_rate": 4.754792726194135e-06, "loss": 0.008, "step": 66800 }, { "epoch": 1.1282804742121795, "grad_norm": 0.2750169336795807, "learning_rate": 4.753320760269303e-06, "loss": 0.007, "step": 66810 }, { "epoch": 1.1284493531935016, "grad_norm": 0.34831956028938293, "learning_rate": 4.751848815775359e-06, "loss": 0.0075, "step": 66820 }, { "epoch": 1.1286182321748235, "grad_norm": 0.11419502645730972, "learning_rate": 4.750376892840185e-06, "loss": 0.0054, "step": 66830 }, { "epoch": 1.1287871111561456, "grad_norm": 0.3812716007232666, "learning_rate": 4.748904991591652e-06, "loss": 0.009, "step": 66840 }, { "epoch": 1.1289559901374675, "grad_norm": 0.3807617425918579, "learning_rate": 4.747433112157638e-06, "loss": 0.0087, "step": 66850 }, { "epoch": 1.1291248691187894, "grad_norm": 0.5121008157730103, "learning_rate": 4.745961254666016e-06, "loss": 0.0094, "step": 66860 }, { "epoch": 1.1292937481001115, "grad_norm": 0.39239758253097534, "learning_rate": 4.74448941924466e-06, "loss": 0.0107, "step": 66870 }, { "epoch": 1.1294626270814334, "grad_norm": 0.2393598109483719, "learning_rate": 4.743017606021436e-06, "loss": 0.0078, "step": 66880 }, { "epoch": 1.1296315060627555, "grad_norm": 0.10245704650878906, "learning_rate": 4.7415458151242116e-06, "loss": 0.0059, "step": 66890 }, { "epoch": 1.1298003850440774, "grad_norm": 0.20991386473178864, "learning_rate": 4.740074046680854e-06, "loss": 0.0089, "step": 66900 }, { "epoch": 1.1299692640253993, "grad_norm": 0.20488472282886505, "learning_rate": 4.738602300819229e-06, "loss": 0.0063, "step": 66910 }, { "epoch": 1.1301381430067214, "grad_norm": 0.2148391306400299, "learning_rate": 4.737130577667192e-06, "loss": 0.0078, "step": 66920 }, { "epoch": 1.1303070219880433, "grad_norm": 0.3337653875350952, "learning_rate": 4.735658877352607e-06, "loss": 0.0063, "step": 66930 }, { "epoch": 1.1304759009693655, "grad_norm": 0.25808537006378174, "learning_rate": 4.734187200003331e-06, "loss": 0.006, "step": 66940 }, { "epoch": 1.1306447799506874, "grad_norm": 0.4894070029258728, "learning_rate": 4.7327155457472215e-06, "loss": 0.009, "step": 66950 }, { "epoch": 1.1308136589320092, "grad_norm": 0.2716466188430786, "learning_rate": 4.731243914712129e-06, "loss": 0.0068, "step": 66960 }, { "epoch": 1.1309825379133314, "grad_norm": 0.197316974401474, "learning_rate": 4.729772307025907e-06, "loss": 0.0076, "step": 66970 }, { "epoch": 1.1311514168946533, "grad_norm": 0.2055530548095703, "learning_rate": 4.728300722816404e-06, "loss": 0.0079, "step": 66980 }, { "epoch": 1.1313202958759754, "grad_norm": 0.2942984402179718, "learning_rate": 4.72682916221147e-06, "loss": 0.0107, "step": 66990 }, { "epoch": 1.1314891748572973, "grad_norm": 0.15588624775409698, "learning_rate": 4.7253576253389475e-06, "loss": 0.0104, "step": 67000 }, { "epoch": 1.1316580538386192, "grad_norm": 0.1127132922410965, "learning_rate": 4.723886112326682e-06, "loss": 0.0073, "step": 67010 }, { "epoch": 1.1318269328199413, "grad_norm": 0.39285391569137573, "learning_rate": 4.7224146233025145e-06, "loss": 0.0122, "step": 67020 }, { "epoch": 1.1319958118012632, "grad_norm": 0.1688190996646881, "learning_rate": 4.720943158394286e-06, "loss": 0.0095, "step": 67030 }, { "epoch": 1.1321646907825853, "grad_norm": 0.10882676392793655, "learning_rate": 4.719471717729829e-06, "loss": 0.0089, "step": 67040 }, { "epoch": 1.1323335697639072, "grad_norm": 0.3096831440925598, "learning_rate": 4.718000301436983e-06, "loss": 0.0081, "step": 67050 }, { "epoch": 1.132502448745229, "grad_norm": 0.07141681015491486, "learning_rate": 4.71652890964358e-06, "loss": 0.008, "step": 67060 }, { "epoch": 1.1326713277265512, "grad_norm": 0.2541039288043976, "learning_rate": 4.7150575424774525e-06, "loss": 0.012, "step": 67070 }, { "epoch": 1.132840206707873, "grad_norm": 0.28348857164382935, "learning_rate": 4.713586200066426e-06, "loss": 0.0078, "step": 67080 }, { "epoch": 1.1330090856891952, "grad_norm": 0.1598510444164276, "learning_rate": 4.712114882538328e-06, "loss": 0.0129, "step": 67090 }, { "epoch": 1.1331779646705171, "grad_norm": 0.20879605412483215, "learning_rate": 4.710643590020984e-06, "loss": 0.0044, "step": 67100 }, { "epoch": 1.133346843651839, "grad_norm": 0.23999811708927155, "learning_rate": 4.709172322642216e-06, "loss": 0.0087, "step": 67110 }, { "epoch": 1.1335157226331611, "grad_norm": 0.3385375142097473, "learning_rate": 4.707701080529843e-06, "loss": 0.0076, "step": 67120 }, { "epoch": 1.133684601614483, "grad_norm": 0.28912240266799927, "learning_rate": 4.706229863811685e-06, "loss": 0.0091, "step": 67130 }, { "epoch": 1.1338534805958052, "grad_norm": 0.4117699861526489, "learning_rate": 4.704758672615556e-06, "loss": 0.0092, "step": 67140 }, { "epoch": 1.134022359577127, "grad_norm": 0.2954718768596649, "learning_rate": 4.70328750706927e-06, "loss": 0.0086, "step": 67150 }, { "epoch": 1.134191238558449, "grad_norm": 0.3704835772514343, "learning_rate": 4.701816367300639e-06, "loss": 0.0094, "step": 67160 }, { "epoch": 1.134360117539771, "grad_norm": 0.5430120825767517, "learning_rate": 4.700345253437472e-06, "loss": 0.0107, "step": 67170 }, { "epoch": 1.134528996521093, "grad_norm": 0.2637659013271332, "learning_rate": 4.6988741656075736e-06, "loss": 0.0112, "step": 67180 }, { "epoch": 1.134697875502415, "grad_norm": 0.19241151213645935, "learning_rate": 4.69740310393875e-06, "loss": 0.0097, "step": 67190 }, { "epoch": 1.134866754483737, "grad_norm": 0.24134349822998047, "learning_rate": 4.695932068558805e-06, "loss": 0.007, "step": 67200 }, { "epoch": 1.1350356334650589, "grad_norm": 0.27203917503356934, "learning_rate": 4.694461059595535e-06, "loss": 0.0129, "step": 67210 }, { "epoch": 1.135204512446381, "grad_norm": 0.3134553134441376, "learning_rate": 4.69299007717674e-06, "loss": 0.0118, "step": 67220 }, { "epoch": 1.1353733914277029, "grad_norm": 0.24952466785907745, "learning_rate": 4.691519121430214e-06, "loss": 0.0054, "step": 67230 }, { "epoch": 1.135542270409025, "grad_norm": 0.32774418592453003, "learning_rate": 4.690048192483753e-06, "loss": 0.0091, "step": 67240 }, { "epoch": 1.135711149390347, "grad_norm": 0.11276369541883469, "learning_rate": 4.688577290465143e-06, "loss": 0.011, "step": 67250 }, { "epoch": 1.1358800283716688, "grad_norm": 0.19845274090766907, "learning_rate": 4.687106415502176e-06, "loss": 0.0051, "step": 67260 }, { "epoch": 1.136048907352991, "grad_norm": 0.3383355140686035, "learning_rate": 4.6856355677226365e-06, "loss": 0.0067, "step": 67270 }, { "epoch": 1.1362177863343128, "grad_norm": 0.10339406132698059, "learning_rate": 4.68416474725431e-06, "loss": 0.0056, "step": 67280 }, { "epoch": 1.136386665315635, "grad_norm": 0.2033054083585739, "learning_rate": 4.682693954224975e-06, "loss": 0.0096, "step": 67290 }, { "epoch": 1.1365555442969568, "grad_norm": 0.25225237011909485, "learning_rate": 4.681223188762412e-06, "loss": 0.0095, "step": 67300 }, { "epoch": 1.1367244232782787, "grad_norm": 0.29456815123558044, "learning_rate": 4.679752450994396e-06, "loss": 0.0074, "step": 67310 }, { "epoch": 1.1368933022596008, "grad_norm": 0.28294461965560913, "learning_rate": 4.678281741048706e-06, "loss": 0.008, "step": 67320 }, { "epoch": 1.1370621812409227, "grad_norm": 0.4314151704311371, "learning_rate": 4.6768110590531075e-06, "loss": 0.0086, "step": 67330 }, { "epoch": 1.1372310602222448, "grad_norm": 0.24246761202812195, "learning_rate": 4.675340405135373e-06, "loss": 0.0063, "step": 67340 }, { "epoch": 1.1373999392035667, "grad_norm": 0.44372472167015076, "learning_rate": 4.673869779423267e-06, "loss": 0.0113, "step": 67350 }, { "epoch": 1.1375688181848886, "grad_norm": 0.4432196617126465, "learning_rate": 4.672399182044557e-06, "loss": 0.015, "step": 67360 }, { "epoch": 1.1377376971662108, "grad_norm": 0.17142172157764435, "learning_rate": 4.670928613127002e-06, "loss": 0.0088, "step": 67370 }, { "epoch": 1.1379065761475327, "grad_norm": 0.15922503173351288, "learning_rate": 4.669458072798364e-06, "loss": 0.0109, "step": 67380 }, { "epoch": 1.1380754551288548, "grad_norm": 0.3477354347705841, "learning_rate": 4.667987561186397e-06, "loss": 0.0138, "step": 67390 }, { "epoch": 1.1382443341101767, "grad_norm": 0.22171719372272491, "learning_rate": 4.666517078418859e-06, "loss": 0.0073, "step": 67400 }, { "epoch": 1.1384132130914986, "grad_norm": 0.22298133373260498, "learning_rate": 4.665046624623499e-06, "loss": 0.0062, "step": 67410 }, { "epoch": 1.1385820920728207, "grad_norm": 0.45470184087753296, "learning_rate": 4.663576199928068e-06, "loss": 0.0098, "step": 67420 }, { "epoch": 1.1387509710541426, "grad_norm": 0.07063696533441544, "learning_rate": 4.662105804460312e-06, "loss": 0.0092, "step": 67430 }, { "epoch": 1.1389198500354647, "grad_norm": 0.20915429294109344, "learning_rate": 4.660635438347977e-06, "loss": 0.006, "step": 67440 }, { "epoch": 1.1390887290167866, "grad_norm": 0.29703548550605774, "learning_rate": 4.659165101718802e-06, "loss": 0.0077, "step": 67450 }, { "epoch": 1.1392576079981085, "grad_norm": 0.19313468039035797, "learning_rate": 4.657694794700528e-06, "loss": 0.0118, "step": 67460 }, { "epoch": 1.1394264869794306, "grad_norm": 0.2539554238319397, "learning_rate": 4.656224517420892e-06, "loss": 0.0069, "step": 67470 }, { "epoch": 1.1395953659607525, "grad_norm": 0.23674023151397705, "learning_rate": 4.654754270007628e-06, "loss": 0.0072, "step": 67480 }, { "epoch": 1.1397642449420746, "grad_norm": 0.28372427821159363, "learning_rate": 4.653284052588466e-06, "loss": 0.0086, "step": 67490 }, { "epoch": 1.1399331239233965, "grad_norm": 0.1787058562040329, "learning_rate": 4.651813865291136e-06, "loss": 0.0077, "step": 67500 }, { "epoch": 1.1401020029047184, "grad_norm": 0.12178103625774384, "learning_rate": 4.6503437082433635e-06, "loss": 0.0064, "step": 67510 }, { "epoch": 1.1402708818860405, "grad_norm": 0.1961190104484558, "learning_rate": 4.648873581572875e-06, "loss": 0.0077, "step": 67520 }, { "epoch": 1.1404397608673624, "grad_norm": 0.1450987011194229, "learning_rate": 4.647403485407388e-06, "loss": 0.0095, "step": 67530 }, { "epoch": 1.1406086398486845, "grad_norm": 0.6275972723960876, "learning_rate": 4.645933419874622e-06, "loss": 0.01, "step": 67540 }, { "epoch": 1.1407775188300064, "grad_norm": 0.248934805393219, "learning_rate": 4.644463385102293e-06, "loss": 0.0093, "step": 67550 }, { "epoch": 1.1409463978113283, "grad_norm": 0.31351417303085327, "learning_rate": 4.642993381218116e-06, "loss": 0.0068, "step": 67560 }, { "epoch": 1.1411152767926505, "grad_norm": 0.12080257385969162, "learning_rate": 4.641523408349796e-06, "loss": 0.0065, "step": 67570 }, { "epoch": 1.1412841557739724, "grad_norm": 0.18323159217834473, "learning_rate": 4.640053466625045e-06, "loss": 0.0111, "step": 67580 }, { "epoch": 1.1414530347552945, "grad_norm": 0.34684351086616516, "learning_rate": 4.638583556171567e-06, "loss": 0.0114, "step": 67590 }, { "epoch": 1.1416219137366164, "grad_norm": 0.2467269003391266, "learning_rate": 4.637113677117063e-06, "loss": 0.0086, "step": 67600 }, { "epoch": 1.1417907927179383, "grad_norm": 0.44591277837753296, "learning_rate": 4.6356438295892366e-06, "loss": 0.0165, "step": 67610 }, { "epoch": 1.1419596716992604, "grad_norm": 0.19008368253707886, "learning_rate": 4.634174013715779e-06, "loss": 0.0087, "step": 67620 }, { "epoch": 1.1421285506805823, "grad_norm": 0.3577948808670044, "learning_rate": 4.632704229624387e-06, "loss": 0.0058, "step": 67630 }, { "epoch": 1.1422974296619044, "grad_norm": 0.08614520728588104, "learning_rate": 4.631234477442751e-06, "loss": 0.011, "step": 67640 }, { "epoch": 1.1424663086432263, "grad_norm": 0.148655503988266, "learning_rate": 4.629764757298561e-06, "loss": 0.0059, "step": 67650 }, { "epoch": 1.1426351876245482, "grad_norm": 0.3073185384273529, "learning_rate": 4.628295069319502e-06, "loss": 0.0094, "step": 67660 }, { "epoch": 1.1428040666058703, "grad_norm": 0.40118592977523804, "learning_rate": 4.6268254136332555e-06, "loss": 0.0077, "step": 67670 }, { "epoch": 1.1429729455871922, "grad_norm": 0.35044366121292114, "learning_rate": 4.625355790367503e-06, "loss": 0.0069, "step": 67680 }, { "epoch": 1.1431418245685143, "grad_norm": 0.22635404765605927, "learning_rate": 4.623886199649923e-06, "loss": 0.0082, "step": 67690 }, { "epoch": 1.1433107035498362, "grad_norm": 1.2219370603561401, "learning_rate": 4.622416641608186e-06, "loss": 0.0099, "step": 67700 }, { "epoch": 1.1434795825311581, "grad_norm": 0.1976272165775299, "learning_rate": 4.620947116369966e-06, "loss": 0.0074, "step": 67710 }, { "epoch": 1.1436484615124802, "grad_norm": 0.21662113070487976, "learning_rate": 4.619477624062932e-06, "loss": 0.0104, "step": 67720 }, { "epoch": 1.1438173404938021, "grad_norm": 0.14232107996940613, "learning_rate": 4.618008164814752e-06, "loss": 0.0069, "step": 67730 }, { "epoch": 1.1439862194751242, "grad_norm": 0.694155216217041, "learning_rate": 4.616538738753084e-06, "loss": 0.0116, "step": 67740 }, { "epoch": 1.1441550984564461, "grad_norm": 0.24687613546848297, "learning_rate": 4.615069346005591e-06, "loss": 0.009, "step": 67750 }, { "epoch": 1.144323977437768, "grad_norm": 0.1488044708967209, "learning_rate": 4.61359998669993e-06, "loss": 0.0065, "step": 67760 }, { "epoch": 1.1444928564190902, "grad_norm": 0.23315821588039398, "learning_rate": 4.612130660963758e-06, "loss": 0.0098, "step": 67770 }, { "epoch": 1.144661735400412, "grad_norm": 0.17453984916210175, "learning_rate": 4.6106613689247205e-06, "loss": 0.008, "step": 67780 }, { "epoch": 1.1448306143817342, "grad_norm": 0.16899724304676056, "learning_rate": 4.6091921107104695e-06, "loss": 0.0054, "step": 67790 }, { "epoch": 1.144999493363056, "grad_norm": 0.2045096457004547, "learning_rate": 4.6077228864486515e-06, "loss": 0.0102, "step": 67800 }, { "epoch": 1.145168372344378, "grad_norm": 0.12974686920642853, "learning_rate": 4.6062536962669086e-06, "loss": 0.0093, "step": 67810 }, { "epoch": 1.1453372513257, "grad_norm": 0.2636060118675232, "learning_rate": 4.604784540292878e-06, "loss": 0.009, "step": 67820 }, { "epoch": 1.145506130307022, "grad_norm": 0.29691147804260254, "learning_rate": 4.603315418654198e-06, "loss": 0.0055, "step": 67830 }, { "epoch": 1.145675009288344, "grad_norm": 0.34332770109176636, "learning_rate": 4.601846331478503e-06, "loss": 0.0082, "step": 67840 }, { "epoch": 1.145843888269666, "grad_norm": 0.3899199962615967, "learning_rate": 4.600377278893425e-06, "loss": 0.0079, "step": 67850 }, { "epoch": 1.1460127672509879, "grad_norm": 0.20456895232200623, "learning_rate": 4.598908261026587e-06, "loss": 0.0074, "step": 67860 }, { "epoch": 1.14618164623231, "grad_norm": 0.20545677840709686, "learning_rate": 4.597439278005617e-06, "loss": 0.0042, "step": 67870 }, { "epoch": 1.146350525213632, "grad_norm": 0.28427281975746155, "learning_rate": 4.5959703299581365e-06, "loss": 0.0093, "step": 67880 }, { "epoch": 1.146519404194954, "grad_norm": 0.7172454595565796, "learning_rate": 4.594501417011765e-06, "loss": 0.0082, "step": 67890 }, { "epoch": 1.146688283176276, "grad_norm": 0.18775899708271027, "learning_rate": 4.593032539294115e-06, "loss": 0.0052, "step": 67900 }, { "epoch": 1.1468571621575978, "grad_norm": 0.3274245858192444, "learning_rate": 4.591563696932801e-06, "loss": 0.0067, "step": 67910 }, { "epoch": 1.14702604113892, "grad_norm": 0.162098228931427, "learning_rate": 4.590094890055432e-06, "loss": 0.0065, "step": 67920 }, { "epoch": 1.1471949201202418, "grad_norm": 0.22105596959590912, "learning_rate": 4.588626118789617e-06, "loss": 0.0046, "step": 67930 }, { "epoch": 1.147363799101564, "grad_norm": 0.4338003396987915, "learning_rate": 4.587157383262954e-06, "loss": 0.0072, "step": 67940 }, { "epoch": 1.1475326780828858, "grad_norm": 0.2321426123380661, "learning_rate": 4.585688683603046e-06, "loss": 0.0086, "step": 67950 }, { "epoch": 1.1477015570642077, "grad_norm": 0.22471898794174194, "learning_rate": 4.58422001993749e-06, "loss": 0.0095, "step": 67960 }, { "epoch": 1.1478704360455299, "grad_norm": 0.14768287539482117, "learning_rate": 4.5827513923938824e-06, "loss": 0.0051, "step": 67970 }, { "epoch": 1.1480393150268517, "grad_norm": 0.21787838637828827, "learning_rate": 4.581282801099809e-06, "loss": 0.0114, "step": 67980 }, { "epoch": 1.1482081940081739, "grad_norm": 0.18961890041828156, "learning_rate": 4.57981424618286e-06, "loss": 0.0064, "step": 67990 }, { "epoch": 1.1483770729894958, "grad_norm": 0.16566728055477142, "learning_rate": 4.57834572777062e-06, "loss": 0.0094, "step": 68000 }, { "epoch": 1.1485459519708177, "grad_norm": 0.4343724250793457, "learning_rate": 4.576877245990671e-06, "loss": 0.0069, "step": 68010 }, { "epoch": 1.1487148309521398, "grad_norm": 0.32081255316734314, "learning_rate": 4.575408800970588e-06, "loss": 0.0101, "step": 68020 }, { "epoch": 1.1488837099334617, "grad_norm": 0.42787861824035645, "learning_rate": 4.573940392837949e-06, "loss": 0.0129, "step": 68030 }, { "epoch": 1.1490525889147838, "grad_norm": 0.27824094891548157, "learning_rate": 4.572472021720324e-06, "loss": 0.0076, "step": 68040 }, { "epoch": 1.1492214678961057, "grad_norm": 0.11693456768989563, "learning_rate": 4.5710036877452826e-06, "loss": 0.0057, "step": 68050 }, { "epoch": 1.1493903468774276, "grad_norm": 0.250881165266037, "learning_rate": 4.569535391040391e-06, "loss": 0.0091, "step": 68060 }, { "epoch": 1.1495592258587497, "grad_norm": 0.07264257222414017, "learning_rate": 4.5680671317332085e-06, "loss": 0.007, "step": 68070 }, { "epoch": 1.1497281048400716, "grad_norm": 0.8154550194740295, "learning_rate": 4.566598909951295e-06, "loss": 0.0091, "step": 68080 }, { "epoch": 1.1498969838213935, "grad_norm": 0.17567996680736542, "learning_rate": 4.565130725822206e-06, "loss": 0.0063, "step": 68090 }, { "epoch": 1.1500658628027156, "grad_norm": 0.2757561206817627, "learning_rate": 4.563662579473496e-06, "loss": 0.0072, "step": 68100 }, { "epoch": 1.1502347417840375, "grad_norm": 0.16488640010356903, "learning_rate": 4.562194471032711e-06, "loss": 0.0084, "step": 68110 }, { "epoch": 1.1504036207653596, "grad_norm": 0.2529110908508301, "learning_rate": 4.560726400627397e-06, "loss": 0.0052, "step": 68120 }, { "epoch": 1.1505724997466815, "grad_norm": 0.25812506675720215, "learning_rate": 4.559258368385096e-06, "loss": 0.0084, "step": 68130 }, { "epoch": 1.1507413787280034, "grad_norm": 0.18886438012123108, "learning_rate": 4.5577903744333495e-06, "loss": 0.0062, "step": 68140 }, { "epoch": 1.1509102577093255, "grad_norm": 0.3175337314605713, "learning_rate": 4.556322418899691e-06, "loss": 0.0071, "step": 68150 }, { "epoch": 1.1510791366906474, "grad_norm": 0.3384389877319336, "learning_rate": 4.554854501911653e-06, "loss": 0.0085, "step": 68160 }, { "epoch": 1.1512480156719695, "grad_norm": 0.4676032066345215, "learning_rate": 4.553386623596766e-06, "loss": 0.0056, "step": 68170 }, { "epoch": 1.1514168946532914, "grad_norm": 0.13975276052951813, "learning_rate": 4.551918784082555e-06, "loss": 0.0071, "step": 68180 }, { "epoch": 1.1515857736346133, "grad_norm": 0.24515250325202942, "learning_rate": 4.550450983496541e-06, "loss": 0.0081, "step": 68190 }, { "epoch": 1.1517546526159355, "grad_norm": 0.35957327485084534, "learning_rate": 4.548983221966244e-06, "loss": 0.0094, "step": 68200 }, { "epoch": 1.1519235315972574, "grad_norm": 0.4543392062187195, "learning_rate": 4.547515499619181e-06, "loss": 0.0076, "step": 68210 }, { "epoch": 1.1520924105785795, "grad_norm": 0.30805540084838867, "learning_rate": 4.546047816582863e-06, "loss": 0.0068, "step": 68220 }, { "epoch": 1.1522612895599014, "grad_norm": 0.3256872892379761, "learning_rate": 4.544580172984796e-06, "loss": 0.008, "step": 68230 }, { "epoch": 1.1524301685412233, "grad_norm": 0.31291887164115906, "learning_rate": 4.543112568952489e-06, "loss": 0.0107, "step": 68240 }, { "epoch": 1.1525990475225454, "grad_norm": 0.35349521040916443, "learning_rate": 4.541645004613442e-06, "loss": 0.0049, "step": 68250 }, { "epoch": 1.1527679265038673, "grad_norm": 0.1925279051065445, "learning_rate": 4.540177480095155e-06, "loss": 0.0087, "step": 68260 }, { "epoch": 1.1529368054851894, "grad_norm": 0.2660684883594513, "learning_rate": 4.53870999552512e-06, "loss": 0.0072, "step": 68270 }, { "epoch": 1.1531056844665113, "grad_norm": 0.1487540304660797, "learning_rate": 4.537242551030831e-06, "loss": 0.0099, "step": 68280 }, { "epoch": 1.1532745634478332, "grad_norm": 0.28399941325187683, "learning_rate": 4.535775146739775e-06, "loss": 0.006, "step": 68290 }, { "epoch": 1.1534434424291553, "grad_norm": 0.15049625933170319, "learning_rate": 4.534307782779438e-06, "loss": 0.0069, "step": 68300 }, { "epoch": 1.1536123214104772, "grad_norm": 0.3202882409095764, "learning_rate": 4.532840459277299e-06, "loss": 0.0113, "step": 68310 }, { "epoch": 1.1537812003917993, "grad_norm": 0.2771105468273163, "learning_rate": 4.531373176360835e-06, "loss": 0.0093, "step": 68320 }, { "epoch": 1.1539500793731212, "grad_norm": 0.18925470113754272, "learning_rate": 4.529905934157521e-06, "loss": 0.0104, "step": 68330 }, { "epoch": 1.1541189583544431, "grad_norm": 0.4918578863143921, "learning_rate": 4.528438732794831e-06, "loss": 0.014, "step": 68340 }, { "epoch": 1.1542878373357652, "grad_norm": 0.3840930759906769, "learning_rate": 4.5269715724002254e-06, "loss": 0.0074, "step": 68350 }, { "epoch": 1.1544567163170871, "grad_norm": 0.3742201626300812, "learning_rate": 4.525504453101171e-06, "loss": 0.0069, "step": 68360 }, { "epoch": 1.1546255952984092, "grad_norm": 0.24056603014469147, "learning_rate": 4.524037375025127e-06, "loss": 0.011, "step": 68370 }, { "epoch": 1.1547944742797311, "grad_norm": 0.17444409430027008, "learning_rate": 4.52257033829955e-06, "loss": 0.0078, "step": 68380 }, { "epoch": 1.154963353261053, "grad_norm": 0.13323140144348145, "learning_rate": 4.5211033430518915e-06, "loss": 0.0058, "step": 68390 }, { "epoch": 1.1551322322423752, "grad_norm": 0.2928089201450348, "learning_rate": 4.519636389409601e-06, "loss": 0.0054, "step": 68400 }, { "epoch": 1.155301111223697, "grad_norm": 0.15081875026226044, "learning_rate": 4.518169477500124e-06, "loss": 0.0078, "step": 68410 }, { "epoch": 1.1554699902050192, "grad_norm": 0.3692879378795624, "learning_rate": 4.516702607450904e-06, "loss": 0.0076, "step": 68420 }, { "epoch": 1.155638869186341, "grad_norm": 0.3587944209575653, "learning_rate": 4.515235779389375e-06, "loss": 0.0056, "step": 68430 }, { "epoch": 1.155807748167663, "grad_norm": 0.45915746688842773, "learning_rate": 4.513768993442974e-06, "loss": 0.0069, "step": 68440 }, { "epoch": 1.155976627148985, "grad_norm": 0.574506402015686, "learning_rate": 4.512302249739131e-06, "loss": 0.0063, "step": 68450 }, { "epoch": 1.156145506130307, "grad_norm": 0.1981852501630783, "learning_rate": 4.510835548405276e-06, "loss": 0.0097, "step": 68460 }, { "epoch": 1.156314385111629, "grad_norm": 0.6063634157180786, "learning_rate": 4.509368889568828e-06, "loss": 0.0141, "step": 68470 }, { "epoch": 1.156483264092951, "grad_norm": 0.1889401376247406, "learning_rate": 4.507902273357208e-06, "loss": 0.0154, "step": 68480 }, { "epoch": 1.1566521430742729, "grad_norm": 0.22151117026805878, "learning_rate": 4.5064356998978335e-06, "loss": 0.0109, "step": 68490 }, { "epoch": 1.156821022055595, "grad_norm": 0.17166301608085632, "learning_rate": 4.504969169318114e-06, "loss": 0.0079, "step": 68500 }, { "epoch": 1.156989901036917, "grad_norm": 0.23545387387275696, "learning_rate": 4.503502681745464e-06, "loss": 0.0086, "step": 68510 }, { "epoch": 1.157158780018239, "grad_norm": 0.26070231199264526, "learning_rate": 4.502036237307283e-06, "loss": 0.0099, "step": 68520 }, { "epoch": 1.157327658999561, "grad_norm": 0.5029522776603699, "learning_rate": 4.500569836130972e-06, "loss": 0.0091, "step": 68530 }, { "epoch": 1.1574965379808828, "grad_norm": 0.23133619129657745, "learning_rate": 4.49910347834393e-06, "loss": 0.0062, "step": 68540 }, { "epoch": 1.157665416962205, "grad_norm": 0.18846115469932556, "learning_rate": 4.497637164073552e-06, "loss": 0.0057, "step": 68550 }, { "epoch": 1.1578342959435268, "grad_norm": 0.3254927694797516, "learning_rate": 4.4961708934472245e-06, "loss": 0.009, "step": 68560 }, { "epoch": 1.158003174924849, "grad_norm": 0.18467561900615692, "learning_rate": 4.494704666592336e-06, "loss": 0.0083, "step": 68570 }, { "epoch": 1.1581720539061708, "grad_norm": 0.45083585381507874, "learning_rate": 4.493238483636268e-06, "loss": 0.0108, "step": 68580 }, { "epoch": 1.1583409328874927, "grad_norm": 0.40228918194770813, "learning_rate": 4.4917723447064005e-06, "loss": 0.0094, "step": 68590 }, { "epoch": 1.1585098118688149, "grad_norm": 0.30488690733909607, "learning_rate": 4.490306249930104e-06, "loss": 0.0082, "step": 68600 }, { "epoch": 1.1586786908501367, "grad_norm": 0.3306867182254791, "learning_rate": 4.488840199434752e-06, "loss": 0.0085, "step": 68610 }, { "epoch": 1.1588475698314589, "grad_norm": 0.16045542061328888, "learning_rate": 4.487374193347709e-06, "loss": 0.0085, "step": 68620 }, { "epoch": 1.1590164488127808, "grad_norm": 0.2498568594455719, "learning_rate": 4.485908231796344e-06, "loss": 0.0079, "step": 68630 }, { "epoch": 1.1591853277941027, "grad_norm": 0.2582831382751465, "learning_rate": 4.484442314908011e-06, "loss": 0.011, "step": 68640 }, { "epoch": 1.1593542067754248, "grad_norm": 0.5118612051010132, "learning_rate": 4.482976442810064e-06, "loss": 0.0079, "step": 68650 }, { "epoch": 1.1595230857567467, "grad_norm": 0.26736772060394287, "learning_rate": 4.481510615629858e-06, "loss": 0.0072, "step": 68660 }, { "epoch": 1.1596919647380688, "grad_norm": 0.1943594217300415, "learning_rate": 4.480044833494741e-06, "loss": 0.0077, "step": 68670 }, { "epoch": 1.1598608437193907, "grad_norm": 0.5054818987846375, "learning_rate": 4.478579096532052e-06, "loss": 0.0096, "step": 68680 }, { "epoch": 1.1600297227007126, "grad_norm": 0.13552400469779968, "learning_rate": 4.477113404869134e-06, "loss": 0.0065, "step": 68690 }, { "epoch": 1.1601986016820347, "grad_norm": 0.4650624990463257, "learning_rate": 4.475647758633323e-06, "loss": 0.0076, "step": 68700 }, { "epoch": 1.1603674806633566, "grad_norm": 0.23654931783676147, "learning_rate": 4.474182157951951e-06, "loss": 0.0081, "step": 68710 }, { "epoch": 1.1605363596446787, "grad_norm": 0.3383621871471405, "learning_rate": 4.472716602952344e-06, "loss": 0.01, "step": 68720 }, { "epoch": 1.1607052386260006, "grad_norm": 0.2853885591030121, "learning_rate": 4.471251093761824e-06, "loss": 0.0101, "step": 68730 }, { "epoch": 1.1608741176073225, "grad_norm": 0.42934972047805786, "learning_rate": 4.469785630507715e-06, "loss": 0.0093, "step": 68740 }, { "epoch": 1.1610429965886446, "grad_norm": 0.23781250417232513, "learning_rate": 4.468320213317333e-06, "loss": 0.0115, "step": 68750 }, { "epoch": 1.1612118755699665, "grad_norm": 0.05118342861533165, "learning_rate": 4.466854842317986e-06, "loss": 0.0137, "step": 68760 }, { "epoch": 1.1613807545512886, "grad_norm": 0.2613302171230316, "learning_rate": 4.4653895176369845e-06, "loss": 0.0082, "step": 68770 }, { "epoch": 1.1615496335326105, "grad_norm": 0.25869616866111755, "learning_rate": 4.463924239401631e-06, "loss": 0.0089, "step": 68780 }, { "epoch": 1.1617185125139324, "grad_norm": 0.5486878752708435, "learning_rate": 4.462459007739227e-06, "loss": 0.0128, "step": 68790 }, { "epoch": 1.1618873914952546, "grad_norm": 0.21708573400974274, "learning_rate": 4.460993822777066e-06, "loss": 0.0088, "step": 68800 }, { "epoch": 1.1620562704765764, "grad_norm": 0.22013048827648163, "learning_rate": 4.459528684642442e-06, "loss": 0.0062, "step": 68810 }, { "epoch": 1.1622251494578986, "grad_norm": 0.2028864026069641, "learning_rate": 4.45806359346264e-06, "loss": 0.0096, "step": 68820 }, { "epoch": 1.1623940284392205, "grad_norm": 0.16022086143493652, "learning_rate": 4.456598549364949e-06, "loss": 0.0063, "step": 68830 }, { "epoch": 1.1625629074205424, "grad_norm": 0.28651702404022217, "learning_rate": 4.455133552476641e-06, "loss": 0.007, "step": 68840 }, { "epoch": 1.1627317864018645, "grad_norm": 0.31895676255226135, "learning_rate": 4.453668602924994e-06, "loss": 0.0071, "step": 68850 }, { "epoch": 1.1629006653831864, "grad_norm": 0.18529324233531952, "learning_rate": 4.452203700837282e-06, "loss": 0.0069, "step": 68860 }, { "epoch": 1.1630695443645085, "grad_norm": 0.24052338302135468, "learning_rate": 4.450738846340771e-06, "loss": 0.0077, "step": 68870 }, { "epoch": 1.1632384233458304, "grad_norm": 0.24749639630317688, "learning_rate": 4.4492740395627215e-06, "loss": 0.01, "step": 68880 }, { "epoch": 1.1634073023271523, "grad_norm": 0.4544038772583008, "learning_rate": 4.447809280630394e-06, "loss": 0.0104, "step": 68890 }, { "epoch": 1.1635761813084744, "grad_norm": 0.26557856798171997, "learning_rate": 4.4463445696710435e-06, "loss": 0.0096, "step": 68900 }, { "epoch": 1.1637450602897963, "grad_norm": 0.38130563497543335, "learning_rate": 4.444879906811921e-06, "loss": 0.0145, "step": 68910 }, { "epoch": 1.1639139392711182, "grad_norm": 0.23191824555397034, "learning_rate": 4.44341529218027e-06, "loss": 0.0071, "step": 68920 }, { "epoch": 1.1640828182524403, "grad_norm": 0.2086542248725891, "learning_rate": 4.441950725903336e-06, "loss": 0.0103, "step": 68930 }, { "epoch": 1.1642516972337622, "grad_norm": 0.5771766304969788, "learning_rate": 4.440486208108354e-06, "loss": 0.0085, "step": 68940 }, { "epoch": 1.1644205762150843, "grad_norm": 0.08592957258224487, "learning_rate": 4.43902173892256e-06, "loss": 0.0054, "step": 68950 }, { "epoch": 1.1645894551964062, "grad_norm": 0.41366636753082275, "learning_rate": 4.437557318473186e-06, "loss": 0.0073, "step": 68960 }, { "epoch": 1.1647583341777281, "grad_norm": 0.178660586476326, "learning_rate": 4.436092946887449e-06, "loss": 0.0085, "step": 68970 }, { "epoch": 1.1649272131590502, "grad_norm": 0.3489326536655426, "learning_rate": 4.4346286242925765e-06, "loss": 0.0083, "step": 68980 }, { "epoch": 1.1650960921403721, "grad_norm": 0.24852894246578217, "learning_rate": 4.433164350815784e-06, "loss": 0.0071, "step": 68990 }, { "epoch": 1.1652649711216942, "grad_norm": 0.4604361057281494, "learning_rate": 4.431700126584286e-06, "loss": 0.007, "step": 69000 }, { "epoch": 1.1654338501030161, "grad_norm": 0.3614623248577118, "learning_rate": 4.430235951725286e-06, "loss": 0.0045, "step": 69010 }, { "epoch": 1.165602729084338, "grad_norm": 0.3053005039691925, "learning_rate": 4.428771826365991e-06, "loss": 0.0073, "step": 69020 }, { "epoch": 1.1657716080656602, "grad_norm": 0.3099530339241028, "learning_rate": 4.4273077506336e-06, "loss": 0.0077, "step": 69030 }, { "epoch": 1.165940487046982, "grad_norm": 0.16216282546520233, "learning_rate": 4.4258437246553086e-06, "loss": 0.0099, "step": 69040 }, { "epoch": 1.1661093660283042, "grad_norm": 0.31746962666511536, "learning_rate": 4.424379748558306e-06, "loss": 0.0062, "step": 69050 }, { "epoch": 1.166278245009626, "grad_norm": 0.1863057017326355, "learning_rate": 4.422915822469782e-06, "loss": 0.0067, "step": 69060 }, { "epoch": 1.166447123990948, "grad_norm": 0.3229554295539856, "learning_rate": 4.421451946516916e-06, "loss": 0.0076, "step": 69070 }, { "epoch": 1.16661600297227, "grad_norm": 0.2142338901758194, "learning_rate": 4.419988120826889e-06, "loss": 0.0071, "step": 69080 }, { "epoch": 1.166784881953592, "grad_norm": 0.15937045216560364, "learning_rate": 4.418524345526871e-06, "loss": 0.008, "step": 69090 }, { "epoch": 1.166953760934914, "grad_norm": 0.5425235033035278, "learning_rate": 4.417060620744033e-06, "loss": 0.0094, "step": 69100 }, { "epoch": 1.167122639916236, "grad_norm": 0.13147644698619843, "learning_rate": 4.415596946605541e-06, "loss": 0.0074, "step": 69110 }, { "epoch": 1.167291518897558, "grad_norm": 0.3385246694087982, "learning_rate": 4.4141333232385555e-06, "loss": 0.0112, "step": 69120 }, { "epoch": 1.16746039787888, "grad_norm": 0.34414300322532654, "learning_rate": 4.412669750770229e-06, "loss": 0.0101, "step": 69130 }, { "epoch": 1.167629276860202, "grad_norm": 0.18946412205696106, "learning_rate": 4.411206229327716e-06, "loss": 0.0084, "step": 69140 }, { "epoch": 1.167798155841524, "grad_norm": 0.21561139822006226, "learning_rate": 4.409742759038163e-06, "loss": 0.0062, "step": 69150 }, { "epoch": 1.167967034822846, "grad_norm": 0.386078804731369, "learning_rate": 4.408279340028714e-06, "loss": 0.0105, "step": 69160 }, { "epoch": 1.1681359138041678, "grad_norm": 0.4148978292942047, "learning_rate": 4.406815972426504e-06, "loss": 0.0076, "step": 69170 }, { "epoch": 1.16830479278549, "grad_norm": 0.25330349802970886, "learning_rate": 4.40535265635867e-06, "loss": 0.0114, "step": 69180 }, { "epoch": 1.1684736717668118, "grad_norm": 0.4131411910057068, "learning_rate": 4.403889391952339e-06, "loss": 0.0105, "step": 69190 }, { "epoch": 1.168642550748134, "grad_norm": 0.12428773939609528, "learning_rate": 4.402426179334639e-06, "loss": 0.0073, "step": 69200 }, { "epoch": 1.1688114297294558, "grad_norm": 0.12680970132350922, "learning_rate": 4.4009630186326855e-06, "loss": 0.0094, "step": 69210 }, { "epoch": 1.1689803087107777, "grad_norm": 0.6543113589286804, "learning_rate": 4.399499909973599e-06, "loss": 0.0119, "step": 69220 }, { "epoch": 1.1691491876920999, "grad_norm": 0.5606405735015869, "learning_rate": 4.398036853484487e-06, "loss": 0.0108, "step": 69230 }, { "epoch": 1.1693180666734218, "grad_norm": 0.04463266208767891, "learning_rate": 4.396573849292462e-06, "loss": 0.0091, "step": 69240 }, { "epoch": 1.1694869456547439, "grad_norm": 0.1514582633972168, "learning_rate": 4.39511089752462e-06, "loss": 0.0055, "step": 69250 }, { "epoch": 1.1696558246360658, "grad_norm": 0.28288477659225464, "learning_rate": 4.3936479983080595e-06, "loss": 0.0081, "step": 69260 }, { "epoch": 1.1698247036173877, "grad_norm": 0.23908619582653046, "learning_rate": 4.392185151769877e-06, "loss": 0.0082, "step": 69270 }, { "epoch": 1.1699935825987098, "grad_norm": 0.39538517594337463, "learning_rate": 4.3907223580371595e-06, "loss": 0.0081, "step": 69280 }, { "epoch": 1.1701624615800317, "grad_norm": 0.1129835769534111, "learning_rate": 4.389259617236989e-06, "loss": 0.008, "step": 69290 }, { "epoch": 1.1703313405613538, "grad_norm": 0.11899622529745102, "learning_rate": 4.387796929496447e-06, "loss": 0.0098, "step": 69300 }, { "epoch": 1.1705002195426757, "grad_norm": 0.29918935894966125, "learning_rate": 4.386334294942608e-06, "loss": 0.0072, "step": 69310 }, { "epoch": 1.1706690985239976, "grad_norm": 0.11773567646741867, "learning_rate": 4.384871713702541e-06, "loss": 0.0062, "step": 69320 }, { "epoch": 1.1708379775053197, "grad_norm": 0.43977004289627075, "learning_rate": 4.383409185903313e-06, "loss": 0.0057, "step": 69330 }, { "epoch": 1.1710068564866416, "grad_norm": 0.396370530128479, "learning_rate": 4.3819467116719835e-06, "loss": 0.0062, "step": 69340 }, { "epoch": 1.1711757354679637, "grad_norm": 0.3525404930114746, "learning_rate": 4.380484291135609e-06, "loss": 0.0079, "step": 69350 }, { "epoch": 1.1713446144492856, "grad_norm": 0.28462764620780945, "learning_rate": 4.379021924421243e-06, "loss": 0.0071, "step": 69360 }, { "epoch": 1.1715134934306075, "grad_norm": 0.2046707570552826, "learning_rate": 4.377559611655929e-06, "loss": 0.0068, "step": 69370 }, { "epoch": 1.1716823724119296, "grad_norm": 0.2844829261302948, "learning_rate": 4.376097352966711e-06, "loss": 0.016, "step": 69380 }, { "epoch": 1.1718512513932515, "grad_norm": 0.19030441343784332, "learning_rate": 4.374635148480626e-06, "loss": 0.0092, "step": 69390 }, { "epoch": 1.1720201303745736, "grad_norm": 0.21065744757652283, "learning_rate": 4.373172998324707e-06, "loss": 0.0079, "step": 69400 }, { "epoch": 1.1721890093558955, "grad_norm": 0.40300101041793823, "learning_rate": 4.371710902625983e-06, "loss": 0.0094, "step": 69410 }, { "epoch": 1.1723578883372174, "grad_norm": 0.11379712074995041, "learning_rate": 4.370248861511474e-06, "loss": 0.0068, "step": 69420 }, { "epoch": 1.1725267673185396, "grad_norm": 0.250619113445282, "learning_rate": 4.368786875108203e-06, "loss": 0.006, "step": 69430 }, { "epoch": 1.1726956462998614, "grad_norm": 0.3278614282608032, "learning_rate": 4.36732494354318e-06, "loss": 0.0099, "step": 69440 }, { "epoch": 1.1728645252811836, "grad_norm": 0.2733420431613922, "learning_rate": 4.365863066943417e-06, "loss": 0.0087, "step": 69450 }, { "epoch": 1.1730334042625055, "grad_norm": 0.26955515146255493, "learning_rate": 4.3644012454359165e-06, "loss": 0.0096, "step": 69460 }, { "epoch": 1.1732022832438274, "grad_norm": 0.9887766242027283, "learning_rate": 4.362939479147678e-06, "loss": 0.0084, "step": 69470 }, { "epoch": 1.1733711622251495, "grad_norm": 0.3153895437717438, "learning_rate": 4.3614777682056955e-06, "loss": 0.0076, "step": 69480 }, { "epoch": 1.1735400412064714, "grad_norm": 0.19209131598472595, "learning_rate": 4.3600161127369625e-06, "loss": 0.01, "step": 69490 }, { "epoch": 1.1737089201877935, "grad_norm": 0.2932160794734955, "learning_rate": 4.35855451286846e-06, "loss": 0.0086, "step": 69500 }, { "epoch": 1.1738777991691154, "grad_norm": 0.29521793127059937, "learning_rate": 4.357092968727169e-06, "loss": 0.0044, "step": 69510 }, { "epoch": 1.1740466781504373, "grad_norm": 0.3206355571746826, "learning_rate": 4.355631480440064e-06, "loss": 0.0109, "step": 69520 }, { "epoch": 1.1742155571317594, "grad_norm": 0.3587499260902405, "learning_rate": 4.354170048134121e-06, "loss": 0.0067, "step": 69530 }, { "epoch": 1.1743844361130813, "grad_norm": 0.1823807656764984, "learning_rate": 4.3527086719363e-06, "loss": 0.0075, "step": 69540 }, { "epoch": 1.1745533150944034, "grad_norm": 0.46675971150398254, "learning_rate": 4.351247351973563e-06, "loss": 0.0095, "step": 69550 }, { "epoch": 1.1747221940757253, "grad_norm": 0.20539680123329163, "learning_rate": 4.349786088372866e-06, "loss": 0.0101, "step": 69560 }, { "epoch": 1.1748910730570472, "grad_norm": 0.22284221649169922, "learning_rate": 4.348324881261162e-06, "loss": 0.008, "step": 69570 }, { "epoch": 1.1750599520383693, "grad_norm": 0.14460794627666473, "learning_rate": 4.3468637307653936e-06, "loss": 0.0077, "step": 69580 }, { "epoch": 1.1752288310196912, "grad_norm": 0.11201423406600952, "learning_rate": 4.345402637012505e-06, "loss": 0.0071, "step": 69590 }, { "epoch": 1.1753977100010133, "grad_norm": 0.23618817329406738, "learning_rate": 4.34394160012943e-06, "loss": 0.0109, "step": 69600 }, { "epoch": 1.1755665889823352, "grad_norm": 0.34201428294181824, "learning_rate": 4.342480620243104e-06, "loss": 0.0086, "step": 69610 }, { "epoch": 1.1757354679636571, "grad_norm": 0.11140964180231094, "learning_rate": 4.341019697480448e-06, "loss": 0.0082, "step": 69620 }, { "epoch": 1.1759043469449793, "grad_norm": 0.13600318133831024, "learning_rate": 4.339558831968387e-06, "loss": 0.0097, "step": 69630 }, { "epoch": 1.1760732259263011, "grad_norm": 0.2799990475177765, "learning_rate": 4.338098023833835e-06, "loss": 0.0066, "step": 69640 }, { "epoch": 1.1762421049076233, "grad_norm": 0.2522858679294586, "learning_rate": 4.3366372732037065e-06, "loss": 0.0096, "step": 69650 }, { "epoch": 1.1764109838889452, "grad_norm": 0.40534883737564087, "learning_rate": 4.3351765802049055e-06, "loss": 0.0077, "step": 69660 }, { "epoch": 1.176579862870267, "grad_norm": 0.212742418050766, "learning_rate": 4.333715944964334e-06, "loss": 0.0072, "step": 69670 }, { "epoch": 1.1767487418515892, "grad_norm": 0.2466823011636734, "learning_rate": 4.332255367608888e-06, "loss": 0.0087, "step": 69680 }, { "epoch": 1.176917620832911, "grad_norm": 0.407388299703598, "learning_rate": 4.330794848265461e-06, "loss": 0.0082, "step": 69690 }, { "epoch": 1.1770864998142332, "grad_norm": 0.06392227858304977, "learning_rate": 4.329334387060937e-06, "loss": 0.0058, "step": 69700 }, { "epoch": 1.177255378795555, "grad_norm": 0.016628801822662354, "learning_rate": 4.327873984122197e-06, "loss": 0.0082, "step": 69710 }, { "epoch": 1.177424257776877, "grad_norm": 0.38394081592559814, "learning_rate": 4.3264136395761194e-06, "loss": 0.0088, "step": 69720 }, { "epoch": 1.177593136758199, "grad_norm": 0.3518714904785156, "learning_rate": 4.324953353549576e-06, "loss": 0.0073, "step": 69730 }, { "epoch": 1.177762015739521, "grad_norm": 0.21557971835136414, "learning_rate": 4.323493126169429e-06, "loss": 0.0075, "step": 69740 }, { "epoch": 1.1779308947208431, "grad_norm": 0.3271346092224121, "learning_rate": 4.322032957562541e-06, "loss": 0.0044, "step": 69750 }, { "epoch": 1.178099773702165, "grad_norm": 0.07772006839513779, "learning_rate": 4.320572847855768e-06, "loss": 0.0084, "step": 69760 }, { "epoch": 1.178268652683487, "grad_norm": 0.20831018686294556, "learning_rate": 4.319112797175964e-06, "loss": 0.0074, "step": 69770 }, { "epoch": 1.178437531664809, "grad_norm": 0.39170244336128235, "learning_rate": 4.31765280564997e-06, "loss": 0.0091, "step": 69780 }, { "epoch": 1.178606410646131, "grad_norm": 0.15584668517112732, "learning_rate": 4.316192873404628e-06, "loss": 0.009, "step": 69790 }, { "epoch": 1.178775289627453, "grad_norm": 0.23796036839485168, "learning_rate": 4.314733000566773e-06, "loss": 0.0058, "step": 69800 }, { "epoch": 1.178944168608775, "grad_norm": 0.23329687118530273, "learning_rate": 4.313273187263237e-06, "loss": 0.0075, "step": 69810 }, { "epoch": 1.1791130475900968, "grad_norm": 0.3091210126876831, "learning_rate": 4.311813433620843e-06, "loss": 0.0101, "step": 69820 }, { "epoch": 1.179281926571419, "grad_norm": 0.25214138627052307, "learning_rate": 4.310353739766412e-06, "loss": 0.0086, "step": 69830 }, { "epoch": 1.1794508055527408, "grad_norm": 0.38157275319099426, "learning_rate": 4.308894105826757e-06, "loss": 0.0099, "step": 69840 }, { "epoch": 1.179619684534063, "grad_norm": 0.22888194024562836, "learning_rate": 4.30743453192869e-06, "loss": 0.0059, "step": 69850 }, { "epoch": 1.1797885635153849, "grad_norm": 0.3006170690059662, "learning_rate": 4.305975018199015e-06, "loss": 0.0066, "step": 69860 }, { "epoch": 1.1799574424967068, "grad_norm": 0.25731489062309265, "learning_rate": 4.304515564764527e-06, "loss": 0.0081, "step": 69870 }, { "epoch": 1.1801263214780289, "grad_norm": 0.23146015405654907, "learning_rate": 4.303056171752024e-06, "loss": 0.0099, "step": 69880 }, { "epoch": 1.1802952004593508, "grad_norm": 0.18720391392707825, "learning_rate": 4.3015968392882924e-06, "loss": 0.0154, "step": 69890 }, { "epoch": 1.1804640794406729, "grad_norm": 0.11638464778661728, "learning_rate": 4.300137567500118e-06, "loss": 0.0053, "step": 69900 }, { "epoch": 1.1806329584219948, "grad_norm": 0.18725889921188354, "learning_rate": 4.298678356514275e-06, "loss": 0.0105, "step": 69910 }, { "epoch": 1.1808018374033167, "grad_norm": 0.32586735486984253, "learning_rate": 4.297219206457538e-06, "loss": 0.0077, "step": 69920 }, { "epoch": 1.1809707163846388, "grad_norm": 0.15266475081443787, "learning_rate": 4.295760117456674e-06, "loss": 0.0065, "step": 69930 }, { "epoch": 1.1811395953659607, "grad_norm": 0.2573769688606262, "learning_rate": 4.2943010896384466e-06, "loss": 0.009, "step": 69940 }, { "epoch": 1.1813084743472828, "grad_norm": 0.1793893724679947, "learning_rate": 4.29284212312961e-06, "loss": 0.0129, "step": 69950 }, { "epoch": 1.1814773533286047, "grad_norm": 0.19360701739788055, "learning_rate": 4.291383218056917e-06, "loss": 0.0068, "step": 69960 }, { "epoch": 1.1816462323099266, "grad_norm": 0.32373669743537903, "learning_rate": 4.2899243745471146e-06, "loss": 0.0083, "step": 69970 }, { "epoch": 1.1818151112912487, "grad_norm": 0.2484724074602127, "learning_rate": 4.288465592726943e-06, "loss": 0.0079, "step": 69980 }, { "epoch": 1.1819839902725706, "grad_norm": 0.1610557734966278, "learning_rate": 4.287006872723136e-06, "loss": 0.0066, "step": 69990 }, { "epoch": 1.1821528692538927, "grad_norm": 0.3037176728248596, "learning_rate": 4.285548214662424e-06, "loss": 0.009, "step": 70000 }, { "epoch": 1.1823217482352146, "grad_norm": 0.14785347878932953, "learning_rate": 4.284089618671533e-06, "loss": 0.0062, "step": 70010 }, { "epoch": 1.1824906272165365, "grad_norm": 0.26593461632728577, "learning_rate": 4.282631084877185e-06, "loss": 0.0068, "step": 70020 }, { "epoch": 1.1826595061978586, "grad_norm": 0.27723953127861023, "learning_rate": 4.281172613406087e-06, "loss": 0.0074, "step": 70030 }, { "epoch": 1.1828283851791805, "grad_norm": 0.1018187403678894, "learning_rate": 4.279714204384951e-06, "loss": 0.0067, "step": 70040 }, { "epoch": 1.1829972641605027, "grad_norm": 0.25362229347229004, "learning_rate": 4.278255857940481e-06, "loss": 0.0102, "step": 70050 }, { "epoch": 1.1831661431418246, "grad_norm": 0.3288753032684326, "learning_rate": 4.276797574199373e-06, "loss": 0.0092, "step": 70060 }, { "epoch": 1.1833350221231465, "grad_norm": 0.07212936878204346, "learning_rate": 4.275339353288319e-06, "loss": 0.0095, "step": 70070 }, { "epoch": 1.1835039011044686, "grad_norm": 0.2550819218158722, "learning_rate": 4.2738811953340075e-06, "loss": 0.0076, "step": 70080 }, { "epoch": 1.1836727800857905, "grad_norm": 0.1815934032201767, "learning_rate": 4.272423100463118e-06, "loss": 0.0103, "step": 70090 }, { "epoch": 1.1838416590671126, "grad_norm": 0.1634124368429184, "learning_rate": 4.270965068802328e-06, "loss": 0.0062, "step": 70100 }, { "epoch": 1.1840105380484345, "grad_norm": 0.25877711176872253, "learning_rate": 4.269507100478304e-06, "loss": 0.0079, "step": 70110 }, { "epoch": 1.1841794170297564, "grad_norm": 0.20767173171043396, "learning_rate": 4.268049195617714e-06, "loss": 0.0089, "step": 70120 }, { "epoch": 1.1843482960110785, "grad_norm": 0.27166298031806946, "learning_rate": 4.266591354347216e-06, "loss": 0.0095, "step": 70130 }, { "epoch": 1.1845171749924004, "grad_norm": 0.23472578823566437, "learning_rate": 4.265133576793467e-06, "loss": 0.0081, "step": 70140 }, { "epoch": 1.1846860539737225, "grad_norm": 0.01121202576905489, "learning_rate": 4.263675863083109e-06, "loss": 0.0063, "step": 70150 }, { "epoch": 1.1848549329550444, "grad_norm": 0.18319427967071533, "learning_rate": 4.262218213342789e-06, "loss": 0.006, "step": 70160 }, { "epoch": 1.1850238119363663, "grad_norm": 0.18813112378120422, "learning_rate": 4.260760627699142e-06, "loss": 0.0097, "step": 70170 }, { "epoch": 1.1851926909176884, "grad_norm": 0.290745347738266, "learning_rate": 4.259303106278801e-06, "loss": 0.0044, "step": 70180 }, { "epoch": 1.1853615698990103, "grad_norm": 0.5257778167724609, "learning_rate": 4.2578456492083905e-06, "loss": 0.0072, "step": 70190 }, { "epoch": 1.1855304488803324, "grad_norm": 0.3761325180530548, "learning_rate": 4.256388256614531e-06, "loss": 0.0062, "step": 70200 }, { "epoch": 1.1856993278616543, "grad_norm": 0.28656333684921265, "learning_rate": 4.254930928623838e-06, "loss": 0.0106, "step": 70210 }, { "epoch": 1.1858682068429762, "grad_norm": 0.25611189007759094, "learning_rate": 4.253473665362922e-06, "loss": 0.01, "step": 70220 }, { "epoch": 1.1860370858242983, "grad_norm": 0.30792781710624695, "learning_rate": 4.2520164669583825e-06, "loss": 0.0084, "step": 70230 }, { "epoch": 1.1862059648056202, "grad_norm": 0.33281078934669495, "learning_rate": 4.25055933353682e-06, "loss": 0.0087, "step": 70240 }, { "epoch": 1.1863748437869424, "grad_norm": 0.26255014538764954, "learning_rate": 4.249102265224826e-06, "loss": 0.0078, "step": 70250 }, { "epoch": 1.1865437227682643, "grad_norm": 0.17831303179264069, "learning_rate": 4.2476452621489896e-06, "loss": 0.011, "step": 70260 }, { "epoch": 1.1867126017495861, "grad_norm": 0.1879650205373764, "learning_rate": 4.246188324435886e-06, "loss": 0.0081, "step": 70270 }, { "epoch": 1.1868814807309083, "grad_norm": 0.33812543749809265, "learning_rate": 4.244731452212095e-06, "loss": 0.0091, "step": 70280 }, { "epoch": 1.1870503597122302, "grad_norm": 0.42309537529945374, "learning_rate": 4.243274645604184e-06, "loss": 0.0072, "step": 70290 }, { "epoch": 1.1872192386935523, "grad_norm": 0.3014518618583679, "learning_rate": 4.241817904738719e-06, "loss": 0.0116, "step": 70300 }, { "epoch": 1.1873881176748742, "grad_norm": 0.1426267772912979, "learning_rate": 4.240361229742256e-06, "loss": 0.0077, "step": 70310 }, { "epoch": 1.187556996656196, "grad_norm": 0.24136321246623993, "learning_rate": 4.238904620741347e-06, "loss": 0.0077, "step": 70320 }, { "epoch": 1.1877258756375182, "grad_norm": 0.04120459035038948, "learning_rate": 4.2374480778625405e-06, "loss": 0.0044, "step": 70330 }, { "epoch": 1.18789475461884, "grad_norm": 0.38512372970581055, "learning_rate": 4.235991601232376e-06, "loss": 0.0093, "step": 70340 }, { "epoch": 1.1880636336001622, "grad_norm": 0.23310497403144836, "learning_rate": 4.23453519097739e-06, "loss": 0.0112, "step": 70350 }, { "epoch": 1.188232512581484, "grad_norm": 0.305466890335083, "learning_rate": 4.23307884722411e-06, "loss": 0.0088, "step": 70360 }, { "epoch": 1.188401391562806, "grad_norm": 0.5021274089813232, "learning_rate": 4.231622570099061e-06, "loss": 0.0141, "step": 70370 }, { "epoch": 1.1885702705441281, "grad_norm": 0.20439894497394562, "learning_rate": 4.230166359728759e-06, "loss": 0.0106, "step": 70380 }, { "epoch": 1.18873914952545, "grad_norm": 0.214186429977417, "learning_rate": 4.228710216239721e-06, "loss": 0.0088, "step": 70390 }, { "epoch": 1.1889080285067721, "grad_norm": 0.20126771926879883, "learning_rate": 4.227254139758447e-06, "loss": 0.0062, "step": 70400 }, { "epoch": 1.189076907488094, "grad_norm": 0.5516355633735657, "learning_rate": 4.225798130411439e-06, "loss": 0.0126, "step": 70410 }, { "epoch": 1.189245786469416, "grad_norm": 0.34578269720077515, "learning_rate": 4.2243421883251926e-06, "loss": 0.0124, "step": 70420 }, { "epoch": 1.189414665450738, "grad_norm": 0.20631663501262665, "learning_rate": 4.2228863136261975e-06, "loss": 0.0058, "step": 70430 }, { "epoch": 1.18958354443206, "grad_norm": 0.2208952009677887, "learning_rate": 4.221430506440934e-06, "loss": 0.0057, "step": 70440 }, { "epoch": 1.189752423413382, "grad_norm": 0.22210335731506348, "learning_rate": 4.21997476689588e-06, "loss": 0.0061, "step": 70450 }, { "epoch": 1.189921302394704, "grad_norm": 0.18087783455848694, "learning_rate": 4.218519095117508e-06, "loss": 0.0079, "step": 70460 }, { "epoch": 1.1900901813760258, "grad_norm": 0.16550375521183014, "learning_rate": 4.217063491232282e-06, "loss": 0.0072, "step": 70470 }, { "epoch": 1.190259060357348, "grad_norm": 0.22142010927200317, "learning_rate": 4.21560795536666e-06, "loss": 0.0078, "step": 70480 }, { "epoch": 1.1904279393386699, "grad_norm": 0.20192578434944153, "learning_rate": 4.214152487647099e-06, "loss": 0.0085, "step": 70490 }, { "epoch": 1.190596818319992, "grad_norm": 0.2848874032497406, "learning_rate": 4.212697088200042e-06, "loss": 0.0087, "step": 70500 }, { "epoch": 1.1907656973013139, "grad_norm": 0.3470187485218048, "learning_rate": 4.211241757151936e-06, "loss": 0.0125, "step": 70510 }, { "epoch": 1.1909345762826358, "grad_norm": 0.3037634491920471, "learning_rate": 4.209786494629211e-06, "loss": 0.0086, "step": 70520 }, { "epoch": 1.1911034552639579, "grad_norm": 0.23977047204971313, "learning_rate": 4.208331300758299e-06, "loss": 0.008, "step": 70530 }, { "epoch": 1.1912723342452798, "grad_norm": 0.28952139616012573, "learning_rate": 4.206876175665624e-06, "loss": 0.0083, "step": 70540 }, { "epoch": 1.191441213226602, "grad_norm": 0.10148795694112778, "learning_rate": 4.205421119477604e-06, "loss": 0.0053, "step": 70550 }, { "epoch": 1.1916100922079238, "grad_norm": 0.5268381834030151, "learning_rate": 4.20396613232065e-06, "loss": 0.0068, "step": 70560 }, { "epoch": 1.1917789711892457, "grad_norm": 0.33596405386924744, "learning_rate": 4.202511214321166e-06, "loss": 0.0075, "step": 70570 }, { "epoch": 1.1919478501705678, "grad_norm": 0.12524275481700897, "learning_rate": 4.201056365605555e-06, "loss": 0.0086, "step": 70580 }, { "epoch": 1.1921167291518897, "grad_norm": 0.17274604737758636, "learning_rate": 4.1996015863002105e-06, "loss": 0.0086, "step": 70590 }, { "epoch": 1.1922856081332118, "grad_norm": 0.6954255700111389, "learning_rate": 4.198146876531517e-06, "loss": 0.0075, "step": 70600 }, { "epoch": 1.1924544871145337, "grad_norm": 0.22098775207996368, "learning_rate": 4.196692236425859e-06, "loss": 0.0091, "step": 70610 }, { "epoch": 1.1926233660958556, "grad_norm": 0.2365575134754181, "learning_rate": 4.1952376661096104e-06, "loss": 0.0091, "step": 70620 }, { "epoch": 1.1927922450771777, "grad_norm": 0.5432769060134888, "learning_rate": 4.193783165709143e-06, "loss": 0.0075, "step": 70630 }, { "epoch": 1.1929611240584996, "grad_norm": 0.18583381175994873, "learning_rate": 4.192328735350817e-06, "loss": 0.0083, "step": 70640 }, { "epoch": 1.1931300030398218, "grad_norm": 0.24336552619934082, "learning_rate": 4.190874375160991e-06, "loss": 0.0094, "step": 70650 }, { "epoch": 1.1932988820211436, "grad_norm": 0.14688773453235626, "learning_rate": 4.189420085266017e-06, "loss": 0.0075, "step": 70660 }, { "epoch": 1.1934677610024655, "grad_norm": 0.341487318277359, "learning_rate": 4.1879658657922415e-06, "loss": 0.0058, "step": 70670 }, { "epoch": 1.1936366399837877, "grad_norm": 0.43334394693374634, "learning_rate": 4.186511716866e-06, "loss": 0.0097, "step": 70680 }, { "epoch": 1.1938055189651096, "grad_norm": 0.1879872828722, "learning_rate": 4.185057638613628e-06, "loss": 0.008, "step": 70690 }, { "epoch": 1.1939743979464317, "grad_norm": 0.11756359040737152, "learning_rate": 4.183603631161451e-06, "loss": 0.0068, "step": 70700 }, { "epoch": 1.1941432769277536, "grad_norm": 0.1979334056377411, "learning_rate": 4.18214969463579e-06, "loss": 0.0076, "step": 70710 }, { "epoch": 1.1943121559090755, "grad_norm": 0.1497284322977066, "learning_rate": 4.1806958291629595e-06, "loss": 0.0073, "step": 70720 }, { "epoch": 1.1944810348903976, "grad_norm": 0.14308342337608337, "learning_rate": 4.179242034869267e-06, "loss": 0.0062, "step": 70730 }, { "epoch": 1.1946499138717195, "grad_norm": 0.19537287950515747, "learning_rate": 4.1777883118810164e-06, "loss": 0.0067, "step": 70740 }, { "epoch": 1.1948187928530416, "grad_norm": 0.19110047817230225, "learning_rate": 4.176334660324502e-06, "loss": 0.0072, "step": 70750 }, { "epoch": 1.1949876718343635, "grad_norm": 0.12222521752119064, "learning_rate": 4.174881080326017e-06, "loss": 0.0085, "step": 70760 }, { "epoch": 1.1951565508156854, "grad_norm": 0.31525617837905884, "learning_rate": 4.173427572011838e-06, "loss": 0.0103, "step": 70770 }, { "epoch": 1.1953254297970075, "grad_norm": 0.1915806084871292, "learning_rate": 4.171974135508247e-06, "loss": 0.0068, "step": 70780 }, { "epoch": 1.1954943087783294, "grad_norm": 0.11286275088787079, "learning_rate": 4.170520770941513e-06, "loss": 0.0066, "step": 70790 }, { "epoch": 1.1956631877596515, "grad_norm": 0.31168702244758606, "learning_rate": 4.169067478437906e-06, "loss": 0.0064, "step": 70800 }, { "epoch": 1.1958320667409734, "grad_norm": 0.3449682593345642, "learning_rate": 4.167614258123677e-06, "loss": 0.0067, "step": 70810 }, { "epoch": 1.1960009457222953, "grad_norm": 0.23658639192581177, "learning_rate": 4.166161110125082e-06, "loss": 0.0073, "step": 70820 }, { "epoch": 1.1961698247036174, "grad_norm": 0.41771480441093445, "learning_rate": 4.164708034568367e-06, "loss": 0.0082, "step": 70830 }, { "epoch": 1.1963387036849393, "grad_norm": 0.20078416168689728, "learning_rate": 4.163255031579772e-06, "loss": 0.0056, "step": 70840 }, { "epoch": 1.1965075826662614, "grad_norm": 0.17070446908473969, "learning_rate": 4.1618021012855285e-06, "loss": 0.0072, "step": 70850 }, { "epoch": 1.1966764616475833, "grad_norm": 0.1639431118965149, "learning_rate": 4.160349243811864e-06, "loss": 0.0055, "step": 70860 }, { "epoch": 1.1968453406289052, "grad_norm": 0.25752896070480347, "learning_rate": 4.158896459285e-06, "loss": 0.0101, "step": 70870 }, { "epoch": 1.1970142196102274, "grad_norm": 0.12478002160787582, "learning_rate": 4.157443747831153e-06, "loss": 0.0078, "step": 70880 }, { "epoch": 1.1971830985915493, "grad_norm": 0.14365607500076294, "learning_rate": 4.155991109576526e-06, "loss": 0.0096, "step": 70890 }, { "epoch": 1.1973519775728714, "grad_norm": 0.17292462289333344, "learning_rate": 4.154538544647323e-06, "loss": 0.009, "step": 70900 }, { "epoch": 1.1975208565541933, "grad_norm": 0.45835959911346436, "learning_rate": 4.15308605316974e-06, "loss": 0.0091, "step": 70910 }, { "epoch": 1.1976897355355152, "grad_norm": 0.5535182952880859, "learning_rate": 4.1516336352699675e-06, "loss": 0.0132, "step": 70920 }, { "epoch": 1.1978586145168373, "grad_norm": 0.3718709647655487, "learning_rate": 4.150181291074184e-06, "loss": 0.0045, "step": 70930 }, { "epoch": 1.1980274934981592, "grad_norm": 0.48499903082847595, "learning_rate": 4.148729020708568e-06, "loss": 0.006, "step": 70940 }, { "epoch": 1.1981963724794813, "grad_norm": 0.28094568848609924, "learning_rate": 4.147276824299288e-06, "loss": 0.0062, "step": 70950 }, { "epoch": 1.1983652514608032, "grad_norm": 0.20450125634670258, "learning_rate": 4.14582470197251e-06, "loss": 0.009, "step": 70960 }, { "epoch": 1.198534130442125, "grad_norm": 0.2023567408323288, "learning_rate": 4.144372653854386e-06, "loss": 0.0072, "step": 70970 }, { "epoch": 1.1987030094234472, "grad_norm": 0.2648862302303314, "learning_rate": 4.1429206800710705e-06, "loss": 0.0091, "step": 70980 }, { "epoch": 1.198871888404769, "grad_norm": 0.3082559108734131, "learning_rate": 4.141468780748707e-06, "loss": 0.0075, "step": 70990 }, { "epoch": 1.1990407673860912, "grad_norm": 0.2816048264503479, "learning_rate": 4.1400169560134325e-06, "loss": 0.0094, "step": 71000 }, { "epoch": 1.1992096463674131, "grad_norm": 0.3423038125038147, "learning_rate": 4.138565205991375e-06, "loss": 0.008, "step": 71010 }, { "epoch": 1.199378525348735, "grad_norm": 0.21808892488479614, "learning_rate": 4.137113530808663e-06, "loss": 0.0062, "step": 71020 }, { "epoch": 1.1995474043300571, "grad_norm": 0.0604805164039135, "learning_rate": 4.1356619305914135e-06, "loss": 0.0061, "step": 71030 }, { "epoch": 1.199716283311379, "grad_norm": 0.19599950313568115, "learning_rate": 4.134210405465738e-06, "loss": 0.0086, "step": 71040 }, { "epoch": 1.1998851622927011, "grad_norm": 0.15933607518672943, "learning_rate": 4.132758955557741e-06, "loss": 0.0078, "step": 71050 }, { "epoch": 1.200054041274023, "grad_norm": 0.24132387340068817, "learning_rate": 4.131307580993519e-06, "loss": 0.0074, "step": 71060 }, { "epoch": 1.200222920255345, "grad_norm": 0.17931866645812988, "learning_rate": 4.129856281899166e-06, "loss": 0.0072, "step": 71070 }, { "epoch": 1.200391799236667, "grad_norm": 0.30089735984802246, "learning_rate": 4.128405058400769e-06, "loss": 0.0054, "step": 71080 }, { "epoch": 1.200560678217989, "grad_norm": 0.19696825742721558, "learning_rate": 4.126953910624403e-06, "loss": 0.0083, "step": 71090 }, { "epoch": 1.200729557199311, "grad_norm": 0.2630006670951843, "learning_rate": 4.1255028386961426e-06, "loss": 0.0069, "step": 71100 }, { "epoch": 1.200898436180633, "grad_norm": 0.21174941956996918, "learning_rate": 4.124051842742052e-06, "loss": 0.0113, "step": 71110 }, { "epoch": 1.2010673151619549, "grad_norm": 0.25234025716781616, "learning_rate": 4.1226009228881935e-06, "loss": 0.007, "step": 71120 }, { "epoch": 1.201236194143277, "grad_norm": 0.40945878624916077, "learning_rate": 4.121150079260614e-06, "loss": 0.0082, "step": 71130 }, { "epoch": 1.2014050731245989, "grad_norm": 0.18443617224693298, "learning_rate": 4.119699311985363e-06, "loss": 0.0062, "step": 71140 }, { "epoch": 1.201573952105921, "grad_norm": 0.2270355373620987, "learning_rate": 4.118248621188478e-06, "loss": 0.0085, "step": 71150 }, { "epoch": 1.201742831087243, "grad_norm": 0.17324891686439514, "learning_rate": 4.1167980069959954e-06, "loss": 0.0042, "step": 71160 }, { "epoch": 1.2019117100685648, "grad_norm": 0.3483163118362427, "learning_rate": 4.115347469533934e-06, "loss": 0.0074, "step": 71170 }, { "epoch": 1.202080589049887, "grad_norm": 0.14150413870811462, "learning_rate": 4.113897008928318e-06, "loss": 0.0086, "step": 71180 }, { "epoch": 1.2022494680312088, "grad_norm": 0.2893237769603729, "learning_rate": 4.112446625305159e-06, "loss": 0.008, "step": 71190 }, { "epoch": 1.202418347012531, "grad_norm": 0.24595589935779572, "learning_rate": 4.110996318790461e-06, "loss": 0.0071, "step": 71200 }, { "epoch": 1.2025872259938528, "grad_norm": 0.3201713562011719, "learning_rate": 4.109546089510226e-06, "loss": 0.0085, "step": 71210 }, { "epoch": 1.2027561049751747, "grad_norm": 0.3007300794124603, "learning_rate": 4.108095937590444e-06, "loss": 0.0091, "step": 71220 }, { "epoch": 1.2029249839564968, "grad_norm": 0.14036019146442413, "learning_rate": 4.1066458631571016e-06, "loss": 0.0073, "step": 71230 }, { "epoch": 1.2030938629378187, "grad_norm": 0.08551019430160522, "learning_rate": 4.1051958663361765e-06, "loss": 0.008, "step": 71240 }, { "epoch": 1.2032627419191408, "grad_norm": 0.24289001524448395, "learning_rate": 4.103745947253644e-06, "loss": 0.0052, "step": 71250 }, { "epoch": 1.2034316209004627, "grad_norm": 0.1439283788204193, "learning_rate": 4.102296106035467e-06, "loss": 0.0081, "step": 71260 }, { "epoch": 1.2036004998817846, "grad_norm": 0.08797132223844528, "learning_rate": 4.100846342807603e-06, "loss": 0.0059, "step": 71270 }, { "epoch": 1.2037693788631068, "grad_norm": 0.17756755650043488, "learning_rate": 4.099396657696006e-06, "loss": 0.0055, "step": 71280 }, { "epoch": 1.2039382578444286, "grad_norm": 0.44893062114715576, "learning_rate": 4.097947050826623e-06, "loss": 0.0072, "step": 71290 }, { "epoch": 1.2041071368257508, "grad_norm": 0.3814970552921295, "learning_rate": 4.0964975223253875e-06, "loss": 0.0112, "step": 71300 }, { "epoch": 1.2042760158070727, "grad_norm": 0.20422162115573883, "learning_rate": 4.095048072318234e-06, "loss": 0.0061, "step": 71310 }, { "epoch": 1.2044448947883946, "grad_norm": 0.3756529688835144, "learning_rate": 4.093598700931087e-06, "loss": 0.0094, "step": 71320 }, { "epoch": 1.2046137737697167, "grad_norm": 0.1708090901374817, "learning_rate": 4.092149408289863e-06, "loss": 0.0055, "step": 71330 }, { "epoch": 1.2047826527510386, "grad_norm": 0.6782200932502747, "learning_rate": 4.090700194520474e-06, "loss": 0.009, "step": 71340 }, { "epoch": 1.2049515317323607, "grad_norm": 0.2722223699092865, "learning_rate": 4.089251059748823e-06, "loss": 0.0114, "step": 71350 }, { "epoch": 1.2051204107136826, "grad_norm": 0.24132853746414185, "learning_rate": 4.087802004100809e-06, "loss": 0.0094, "step": 71360 }, { "epoch": 1.2052892896950045, "grad_norm": 0.24939638376235962, "learning_rate": 4.086353027702323e-06, "loss": 0.0107, "step": 71370 }, { "epoch": 1.2054581686763266, "grad_norm": 0.3684559464454651, "learning_rate": 4.084904130679245e-06, "loss": 0.0065, "step": 71380 }, { "epoch": 1.2056270476576485, "grad_norm": 0.08438919484615326, "learning_rate": 4.083455313157455e-06, "loss": 0.0052, "step": 71390 }, { "epoch": 1.2057959266389706, "grad_norm": 0.35453641414642334, "learning_rate": 4.082006575262821e-06, "loss": 0.0067, "step": 71400 }, { "epoch": 1.2059648056202925, "grad_norm": 0.3074810206890106, "learning_rate": 4.080557917121209e-06, "loss": 0.0075, "step": 71410 }, { "epoch": 1.2061336846016144, "grad_norm": 0.494399756193161, "learning_rate": 4.079109338858469e-06, "loss": 0.0138, "step": 71420 }, { "epoch": 1.2063025635829365, "grad_norm": 0.38401350378990173, "learning_rate": 4.077660840600453e-06, "loss": 0.0055, "step": 71430 }, { "epoch": 1.2064714425642584, "grad_norm": 0.44689273834228516, "learning_rate": 4.076212422473004e-06, "loss": 0.0075, "step": 71440 }, { "epoch": 1.2066403215455805, "grad_norm": 0.17787949740886688, "learning_rate": 4.074764084601957e-06, "loss": 0.0083, "step": 71450 }, { "epoch": 1.2068092005269024, "grad_norm": 0.3723842203617096, "learning_rate": 4.073315827113138e-06, "loss": 0.0079, "step": 71460 }, { "epoch": 1.2069780795082243, "grad_norm": 0.3057025074958801, "learning_rate": 4.071867650132369e-06, "loss": 0.007, "step": 71470 }, { "epoch": 1.2071469584895465, "grad_norm": 0.2094649076461792, "learning_rate": 4.070419553785465e-06, "loss": 0.0069, "step": 71480 }, { "epoch": 1.2073158374708683, "grad_norm": 0.36007624864578247, "learning_rate": 4.068971538198233e-06, "loss": 0.0052, "step": 71490 }, { "epoch": 1.2074847164521905, "grad_norm": 0.46436163783073425, "learning_rate": 4.067523603496471e-06, "loss": 0.0092, "step": 71500 }, { "epoch": 1.2076535954335124, "grad_norm": 0.1478596180677414, "learning_rate": 4.066075749805974e-06, "loss": 0.0113, "step": 71510 }, { "epoch": 1.2078224744148343, "grad_norm": 0.2996593713760376, "learning_rate": 4.064627977252527e-06, "loss": 0.0104, "step": 71520 }, { "epoch": 1.2079913533961564, "grad_norm": 0.33359330892562866, "learning_rate": 4.063180285961912e-06, "loss": 0.0077, "step": 71530 }, { "epoch": 1.2081602323774783, "grad_norm": 0.24245472252368927, "learning_rate": 4.0617326760598945e-06, "loss": 0.0096, "step": 71540 }, { "epoch": 1.2083291113588004, "grad_norm": 0.2089715451002121, "learning_rate": 4.060285147672245e-06, "loss": 0.0057, "step": 71550 }, { "epoch": 1.2084979903401223, "grad_norm": 0.2642921209335327, "learning_rate": 4.058837700924718e-06, "loss": 0.0085, "step": 71560 }, { "epoch": 1.2086668693214442, "grad_norm": 0.1999884843826294, "learning_rate": 4.057390335943067e-06, "loss": 0.0067, "step": 71570 }, { "epoch": 1.2088357483027663, "grad_norm": 0.3332034945487976, "learning_rate": 4.055943052853032e-06, "loss": 0.0075, "step": 71580 }, { "epoch": 1.2090046272840882, "grad_norm": 0.37586501240730286, "learning_rate": 4.054495851780352e-06, "loss": 0.0084, "step": 71590 }, { "epoch": 1.2091735062654103, "grad_norm": 0.4471692740917206, "learning_rate": 4.053048732850755e-06, "loss": 0.0154, "step": 71600 }, { "epoch": 1.2093423852467322, "grad_norm": 0.1067652627825737, "learning_rate": 4.0516016961899655e-06, "loss": 0.0067, "step": 71610 }, { "epoch": 1.209511264228054, "grad_norm": 0.22348983585834503, "learning_rate": 4.050154741923694e-06, "loss": 0.0088, "step": 71620 }, { "epoch": 1.2096801432093762, "grad_norm": 0.15519769489765167, "learning_rate": 4.048707870177653e-06, "loss": 0.0082, "step": 71630 }, { "epoch": 1.2098490221906981, "grad_norm": 0.25835901498794556, "learning_rate": 4.047261081077541e-06, "loss": 0.0093, "step": 71640 }, { "epoch": 1.2100179011720202, "grad_norm": 0.5028764605522156, "learning_rate": 4.045814374749051e-06, "loss": 0.0108, "step": 71650 }, { "epoch": 1.2101867801533421, "grad_norm": 0.1779155284166336, "learning_rate": 4.044367751317872e-06, "loss": 0.0084, "step": 71660 }, { "epoch": 1.210355659134664, "grad_norm": 0.484157919883728, "learning_rate": 4.042921210909679e-06, "loss": 0.0105, "step": 71670 }, { "epoch": 1.2105245381159861, "grad_norm": 0.3162906765937805, "learning_rate": 4.041474753650146e-06, "loss": 0.007, "step": 71680 }, { "epoch": 1.210693417097308, "grad_norm": 0.2832969129085541, "learning_rate": 4.040028379664938e-06, "loss": 0.0132, "step": 71690 }, { "epoch": 1.2108622960786302, "grad_norm": 0.1028054878115654, "learning_rate": 4.038582089079714e-06, "loss": 0.0069, "step": 71700 }, { "epoch": 1.211031175059952, "grad_norm": 0.15049923956394196, "learning_rate": 4.03713588202012e-06, "loss": 0.0117, "step": 71710 }, { "epoch": 1.211200054041274, "grad_norm": 0.24045215547084808, "learning_rate": 4.035689758611801e-06, "loss": 0.008, "step": 71720 }, { "epoch": 1.211368933022596, "grad_norm": 0.3289395868778229, "learning_rate": 4.034243718980394e-06, "loss": 0.0054, "step": 71730 }, { "epoch": 1.211537812003918, "grad_norm": 0.17234347760677338, "learning_rate": 4.032797763251527e-06, "loss": 0.0097, "step": 71740 }, { "epoch": 1.21170669098524, "grad_norm": 0.16487842798233032, "learning_rate": 4.031351891550819e-06, "loss": 0.0072, "step": 71750 }, { "epoch": 1.211875569966562, "grad_norm": 0.2204405516386032, "learning_rate": 4.029906104003885e-06, "loss": 0.006, "step": 71760 }, { "epoch": 1.2120444489478839, "grad_norm": 0.31835222244262695, "learning_rate": 4.028460400736331e-06, "loss": 0.0141, "step": 71770 }, { "epoch": 1.212213327929206, "grad_norm": 0.2978806793689728, "learning_rate": 4.027014781873759e-06, "loss": 0.0064, "step": 71780 }, { "epoch": 1.212382206910528, "grad_norm": 0.4675742983818054, "learning_rate": 4.025569247541757e-06, "loss": 0.008, "step": 71790 }, { "epoch": 1.21255108589185, "grad_norm": 0.20901432633399963, "learning_rate": 4.024123797865911e-06, "loss": 0.0088, "step": 71800 }, { "epoch": 1.212719964873172, "grad_norm": 0.38770243525505066, "learning_rate": 4.022678432971798e-06, "loss": 0.0087, "step": 71810 }, { "epoch": 1.2128888438544938, "grad_norm": 0.28365159034729004, "learning_rate": 4.02123315298499e-06, "loss": 0.0077, "step": 71820 }, { "epoch": 1.213057722835816, "grad_norm": 0.30138784646987915, "learning_rate": 4.019787958031045e-06, "loss": 0.0098, "step": 71830 }, { "epoch": 1.2132266018171378, "grad_norm": 0.25672420859336853, "learning_rate": 4.018342848235521e-06, "loss": 0.0097, "step": 71840 }, { "epoch": 1.21339548079846, "grad_norm": 0.18349997699260712, "learning_rate": 4.016897823723965e-06, "loss": 0.0083, "step": 71850 }, { "epoch": 1.2135643597797818, "grad_norm": 0.17125175893306732, "learning_rate": 4.0154528846219175e-06, "loss": 0.01, "step": 71860 }, { "epoch": 1.2137332387611037, "grad_norm": 0.23438721895217896, "learning_rate": 4.01400803105491e-06, "loss": 0.0074, "step": 71870 }, { "epoch": 1.2139021177424258, "grad_norm": 0.1466967910528183, "learning_rate": 4.012563263148469e-06, "loss": 0.006, "step": 71880 }, { "epoch": 1.2140709967237477, "grad_norm": 0.3093009889125824, "learning_rate": 4.011118581028111e-06, "loss": 0.0084, "step": 71890 }, { "epoch": 1.2142398757050699, "grad_norm": 0.32302725315093994, "learning_rate": 4.0096739848193515e-06, "loss": 0.0114, "step": 71900 }, { "epoch": 1.2144087546863918, "grad_norm": 0.4711049795150757, "learning_rate": 4.008229474647687e-06, "loss": 0.0097, "step": 71910 }, { "epoch": 1.2145776336677137, "grad_norm": 0.2919837534427643, "learning_rate": 4.006785050638614e-06, "loss": 0.0076, "step": 71920 }, { "epoch": 1.2147465126490358, "grad_norm": 0.34103336930274963, "learning_rate": 4.005340712917623e-06, "loss": 0.0106, "step": 71930 }, { "epoch": 1.2149153916303577, "grad_norm": 0.27741536498069763, "learning_rate": 4.003896461610195e-06, "loss": 0.0048, "step": 71940 }, { "epoch": 1.2150842706116798, "grad_norm": 0.1832248568534851, "learning_rate": 4.002452296841801e-06, "loss": 0.0093, "step": 71950 }, { "epoch": 1.2152531495930017, "grad_norm": 0.22346185147762299, "learning_rate": 4.0010082187379066e-06, "loss": 0.0102, "step": 71960 }, { "epoch": 1.2154220285743236, "grad_norm": 0.30087554454803467, "learning_rate": 3.99956422742397e-06, "loss": 0.0079, "step": 71970 }, { "epoch": 1.2155909075556457, "grad_norm": 0.23812417685985565, "learning_rate": 3.998120323025443e-06, "loss": 0.0078, "step": 71980 }, { "epoch": 1.2157597865369676, "grad_norm": 0.1276027411222458, "learning_rate": 3.996676505667766e-06, "loss": 0.0058, "step": 71990 }, { "epoch": 1.2159286655182897, "grad_norm": 0.19284304976463318, "learning_rate": 3.995232775476376e-06, "loss": 0.0062, "step": 72000 }, { "epoch": 1.2160975444996116, "grad_norm": 0.13796797394752502, "learning_rate": 3.9937891325767004e-06, "loss": 0.0069, "step": 72010 }, { "epoch": 1.2162664234809335, "grad_norm": 0.29852139949798584, "learning_rate": 3.992345577094161e-06, "loss": 0.0073, "step": 72020 }, { "epoch": 1.2164353024622556, "grad_norm": 0.3634522259235382, "learning_rate": 3.990902109154166e-06, "loss": 0.0085, "step": 72030 }, { "epoch": 1.2166041814435775, "grad_norm": 0.25531914830207825, "learning_rate": 3.989458728882124e-06, "loss": 0.0082, "step": 72040 }, { "epoch": 1.2167730604248996, "grad_norm": 0.1995982527732849, "learning_rate": 3.988015436403432e-06, "loss": 0.0046, "step": 72050 }, { "epoch": 1.2169419394062215, "grad_norm": 0.3836853504180908, "learning_rate": 3.98657223184348e-06, "loss": 0.0074, "step": 72060 }, { "epoch": 1.2171108183875434, "grad_norm": 0.3047633767127991, "learning_rate": 3.9851291153276485e-06, "loss": 0.0103, "step": 72070 }, { "epoch": 1.2172796973688655, "grad_norm": 0.22928482294082642, "learning_rate": 3.9836860869813124e-06, "loss": 0.0071, "step": 72080 }, { "epoch": 1.2174485763501874, "grad_norm": 0.1541268229484558, "learning_rate": 3.982243146929838e-06, "loss": 0.0062, "step": 72090 }, { "epoch": 1.2176174553315096, "grad_norm": 0.3222639560699463, "learning_rate": 3.980800295298586e-06, "loss": 0.0081, "step": 72100 }, { "epoch": 1.2177863343128315, "grad_norm": 0.2530595362186432, "learning_rate": 3.979357532212908e-06, "loss": 0.0065, "step": 72110 }, { "epoch": 1.2179552132941533, "grad_norm": 0.19201964139938354, "learning_rate": 3.977914857798144e-06, "loss": 0.0126, "step": 72120 }, { "epoch": 1.2181240922754755, "grad_norm": 0.31849405169487, "learning_rate": 3.976472272179634e-06, "loss": 0.0116, "step": 72130 }, { "epoch": 1.2182929712567974, "grad_norm": 0.41695263981819153, "learning_rate": 3.975029775482705e-06, "loss": 0.0085, "step": 72140 }, { "epoch": 1.2184618502381195, "grad_norm": 0.3169828951358795, "learning_rate": 3.973587367832678e-06, "loss": 0.0088, "step": 72150 }, { "epoch": 1.2186307292194414, "grad_norm": 1.0790756940841675, "learning_rate": 3.972145049354864e-06, "loss": 0.0073, "step": 72160 }, { "epoch": 1.2187996082007633, "grad_norm": 0.2273607701063156, "learning_rate": 3.9707028201745705e-06, "loss": 0.0096, "step": 72170 }, { "epoch": 1.2189684871820854, "grad_norm": 0.284244567155838, "learning_rate": 3.969260680417093e-06, "loss": 0.0112, "step": 72180 }, { "epoch": 1.2191373661634073, "grad_norm": 1.064010739326477, "learning_rate": 3.967818630207725e-06, "loss": 0.0109, "step": 72190 }, { "epoch": 1.2193062451447294, "grad_norm": 0.2933267652988434, "learning_rate": 3.966376669671743e-06, "loss": 0.009, "step": 72200 }, { "epoch": 1.2194751241260513, "grad_norm": 0.6287699341773987, "learning_rate": 3.964934798934422e-06, "loss": 0.0075, "step": 72210 }, { "epoch": 1.2196440031073732, "grad_norm": 0.33533352613449097, "learning_rate": 3.96349301812103e-06, "loss": 0.0104, "step": 72220 }, { "epoch": 1.2198128820886953, "grad_norm": 0.336328387260437, "learning_rate": 3.962051327356826e-06, "loss": 0.0065, "step": 72230 }, { "epoch": 1.2199817610700172, "grad_norm": 0.17460978031158447, "learning_rate": 3.960609726767058e-06, "loss": 0.0098, "step": 72240 }, { "epoch": 1.2201506400513393, "grad_norm": 0.33926647901535034, "learning_rate": 3.95916821647697e-06, "loss": 0.0127, "step": 72250 }, { "epoch": 1.2203195190326612, "grad_norm": 0.30397987365722656, "learning_rate": 3.957726796611797e-06, "loss": 0.006, "step": 72260 }, { "epoch": 1.2204883980139831, "grad_norm": 0.13308776915073395, "learning_rate": 3.956285467296768e-06, "loss": 0.0067, "step": 72270 }, { "epoch": 1.2206572769953052, "grad_norm": 0.12479046732187271, "learning_rate": 3.954844228657097e-06, "loss": 0.0079, "step": 72280 }, { "epoch": 1.2208261559766271, "grad_norm": 0.3501831293106079, "learning_rate": 3.953403080818e-06, "loss": 0.0084, "step": 72290 }, { "epoch": 1.2209950349579493, "grad_norm": 0.15423378348350525, "learning_rate": 3.951962023904678e-06, "loss": 0.0091, "step": 72300 }, { "epoch": 1.2211639139392712, "grad_norm": 0.27473029494285583, "learning_rate": 3.95052105804233e-06, "loss": 0.0099, "step": 72310 }, { "epoch": 1.221332792920593, "grad_norm": 0.24098236858844757, "learning_rate": 3.949080183356139e-06, "loss": 0.008, "step": 72320 }, { "epoch": 1.2215016719019152, "grad_norm": 0.17416948080062866, "learning_rate": 3.9476393999712855e-06, "loss": 0.0114, "step": 72330 }, { "epoch": 1.221670550883237, "grad_norm": 0.2730764150619507, "learning_rate": 3.946198708012943e-06, "loss": 0.0073, "step": 72340 }, { "epoch": 1.2218394298645592, "grad_norm": 0.32581064105033875, "learning_rate": 3.944758107606275e-06, "loss": 0.0047, "step": 72350 }, { "epoch": 1.222008308845881, "grad_norm": 0.17169946432113647, "learning_rate": 3.943317598876436e-06, "loss": 0.0052, "step": 72360 }, { "epoch": 1.222177187827203, "grad_norm": 0.3146178126335144, "learning_rate": 3.941877181948575e-06, "loss": 0.0076, "step": 72370 }, { "epoch": 1.222346066808525, "grad_norm": 0.19437797367572784, "learning_rate": 3.940436856947831e-06, "loss": 0.0115, "step": 72380 }, { "epoch": 1.222514945789847, "grad_norm": 0.30425530672073364, "learning_rate": 3.938996623999339e-06, "loss": 0.0081, "step": 72390 }, { "epoch": 1.222683824771169, "grad_norm": 0.36054477095603943, "learning_rate": 3.937556483228218e-06, "loss": 0.0104, "step": 72400 }, { "epoch": 1.222852703752491, "grad_norm": 0.39145344495773315, "learning_rate": 3.936116434759587e-06, "loss": 0.0082, "step": 72410 }, { "epoch": 1.223021582733813, "grad_norm": 0.1025315448641777, "learning_rate": 3.9346764787185535e-06, "loss": 0.0081, "step": 72420 }, { "epoch": 1.223190461715135, "grad_norm": 0.23256997764110565, "learning_rate": 3.933236615230218e-06, "loss": 0.0049, "step": 72430 }, { "epoch": 1.223359340696457, "grad_norm": 0.143926739692688, "learning_rate": 3.931796844419671e-06, "loss": 0.0082, "step": 72440 }, { "epoch": 1.223528219677779, "grad_norm": 0.21347659826278687, "learning_rate": 3.9303571664119955e-06, "loss": 0.0082, "step": 72450 }, { "epoch": 1.223697098659101, "grad_norm": 0.3213053345680237, "learning_rate": 3.928917581332268e-06, "loss": 0.0078, "step": 72460 }, { "epoch": 1.2238659776404228, "grad_norm": 0.16281379759311676, "learning_rate": 3.927478089305558e-06, "loss": 0.0112, "step": 72470 }, { "epoch": 1.224034856621745, "grad_norm": 0.14065751433372498, "learning_rate": 3.926038690456923e-06, "loss": 0.0072, "step": 72480 }, { "epoch": 1.2242037356030668, "grad_norm": 0.19785866141319275, "learning_rate": 3.924599384911414e-06, "loss": 0.0104, "step": 72490 }, { "epoch": 1.2243726145843887, "grad_norm": 0.10442577302455902, "learning_rate": 3.923160172794076e-06, "loss": 0.0069, "step": 72500 }, { "epoch": 1.2245414935657108, "grad_norm": 0.15364578366279602, "learning_rate": 3.921721054229945e-06, "loss": 0.0102, "step": 72510 }, { "epoch": 1.2247103725470327, "grad_norm": 0.4856370985507965, "learning_rate": 3.920282029344045e-06, "loss": 0.0081, "step": 72520 }, { "epoch": 1.2248792515283549, "grad_norm": 0.29332074522972107, "learning_rate": 3.918843098261397e-06, "loss": 0.0097, "step": 72530 }, { "epoch": 1.2250481305096768, "grad_norm": 0.49357640743255615, "learning_rate": 3.917404261107011e-06, "loss": 0.01, "step": 72540 }, { "epoch": 1.2252170094909987, "grad_norm": 0.6630716323852539, "learning_rate": 3.915965518005889e-06, "loss": 0.0073, "step": 72550 }, { "epoch": 1.2253858884723208, "grad_norm": 0.08719513565301895, "learning_rate": 3.91452686908303e-06, "loss": 0.0081, "step": 72560 }, { "epoch": 1.2255547674536427, "grad_norm": 0.24179106950759888, "learning_rate": 3.913088314463415e-06, "loss": 0.0081, "step": 72570 }, { "epoch": 1.2257236464349648, "grad_norm": 0.1494702696800232, "learning_rate": 3.911649854272023e-06, "loss": 0.0057, "step": 72580 }, { "epoch": 1.2258925254162867, "grad_norm": 0.3160296380519867, "learning_rate": 3.910211488633824e-06, "loss": 0.0068, "step": 72590 }, { "epoch": 1.2260614043976086, "grad_norm": 0.29707711935043335, "learning_rate": 3.908773217673783e-06, "loss": 0.0085, "step": 72600 }, { "epoch": 1.2262302833789307, "grad_norm": 0.11931238323450089, "learning_rate": 3.9073350415168496e-06, "loss": 0.0069, "step": 72610 }, { "epoch": 1.2263991623602526, "grad_norm": 0.25237226486206055, "learning_rate": 3.90589696028797e-06, "loss": 0.0077, "step": 72620 }, { "epoch": 1.2265680413415747, "grad_norm": 0.2610713541507721, "learning_rate": 3.904458974112082e-06, "loss": 0.0091, "step": 72630 }, { "epoch": 1.2267369203228966, "grad_norm": 0.1463874876499176, "learning_rate": 3.903021083114115e-06, "loss": 0.0083, "step": 72640 }, { "epoch": 1.2269057993042185, "grad_norm": 0.20262371003627777, "learning_rate": 3.901583287418987e-06, "loss": 0.0055, "step": 72650 }, { "epoch": 1.2270746782855406, "grad_norm": 0.2443646341562271, "learning_rate": 3.90014558715161e-06, "loss": 0.0105, "step": 72660 }, { "epoch": 1.2272435572668625, "grad_norm": 0.2919884920120239, "learning_rate": 3.898707982436891e-06, "loss": 0.0098, "step": 72670 }, { "epoch": 1.2274124362481846, "grad_norm": 0.12071173638105392, "learning_rate": 3.897270473399726e-06, "loss": 0.0059, "step": 72680 }, { "epoch": 1.2275813152295065, "grad_norm": 0.3722818195819855, "learning_rate": 3.895833060164997e-06, "loss": 0.0093, "step": 72690 }, { "epoch": 1.2277501942108284, "grad_norm": 0.2432742416858673, "learning_rate": 3.894395742857587e-06, "loss": 0.0105, "step": 72700 }, { "epoch": 1.2279190731921505, "grad_norm": 0.2152351588010788, "learning_rate": 3.8929585216023645e-06, "loss": 0.0057, "step": 72710 }, { "epoch": 1.2280879521734724, "grad_norm": 0.24838951230049133, "learning_rate": 3.891521396524196e-06, "loss": 0.0072, "step": 72720 }, { "epoch": 1.2282568311547946, "grad_norm": 0.25856608152389526, "learning_rate": 3.890084367747931e-06, "loss": 0.0081, "step": 72730 }, { "epoch": 1.2284257101361165, "grad_norm": 0.1177263930439949, "learning_rate": 3.888647435398415e-06, "loss": 0.0068, "step": 72740 }, { "epoch": 1.2285945891174384, "grad_norm": 0.1122438982129097, "learning_rate": 3.887210599600488e-06, "loss": 0.0056, "step": 72750 }, { "epoch": 1.2287634680987605, "grad_norm": 0.19730965793132782, "learning_rate": 3.885773860478977e-06, "loss": 0.0066, "step": 72760 }, { "epoch": 1.2289323470800824, "grad_norm": 0.17400069534778595, "learning_rate": 3.884337218158703e-06, "loss": 0.0054, "step": 72770 }, { "epoch": 1.2291012260614045, "grad_norm": 0.14206242561340332, "learning_rate": 3.882900672764477e-06, "loss": 0.0098, "step": 72780 }, { "epoch": 1.2292701050427264, "grad_norm": 0.14177606999874115, "learning_rate": 3.881464224421104e-06, "loss": 0.009, "step": 72790 }, { "epoch": 1.2294389840240483, "grad_norm": 0.27194520831108093, "learning_rate": 3.88002787325338e-06, "loss": 0.005, "step": 72800 }, { "epoch": 1.2296078630053704, "grad_norm": 0.24705371260643005, "learning_rate": 3.8785916193860876e-06, "loss": 0.012, "step": 72810 }, { "epoch": 1.2297767419866923, "grad_norm": 0.37471067905426025, "learning_rate": 3.877155462944006e-06, "loss": 0.0112, "step": 72820 }, { "epoch": 1.2299456209680144, "grad_norm": 0.318717360496521, "learning_rate": 3.875719404051908e-06, "loss": 0.0077, "step": 72830 }, { "epoch": 1.2301144999493363, "grad_norm": 0.26002809405326843, "learning_rate": 3.8742834428345555e-06, "loss": 0.0103, "step": 72840 }, { "epoch": 1.2302833789306582, "grad_norm": 0.2932513952255249, "learning_rate": 3.872847579416697e-06, "loss": 0.0073, "step": 72850 }, { "epoch": 1.2304522579119803, "grad_norm": 0.33743295073509216, "learning_rate": 3.871411813923078e-06, "loss": 0.0061, "step": 72860 }, { "epoch": 1.2306211368933022, "grad_norm": 0.3009105622768402, "learning_rate": 3.869976146478436e-06, "loss": 0.0099, "step": 72870 }, { "epoch": 1.2307900158746243, "grad_norm": 0.3617449104785919, "learning_rate": 3.868540577207499e-06, "loss": 0.009, "step": 72880 }, { "epoch": 1.2309588948559462, "grad_norm": 0.29211342334747314, "learning_rate": 3.867105106234982e-06, "loss": 0.0094, "step": 72890 }, { "epoch": 1.2311277738372681, "grad_norm": 0.47703418135643005, "learning_rate": 3.865669733685597e-06, "loss": 0.01, "step": 72900 }, { "epoch": 1.2312966528185902, "grad_norm": 0.32100600004196167, "learning_rate": 3.864234459684048e-06, "loss": 0.0096, "step": 72910 }, { "epoch": 1.2314655317999121, "grad_norm": 0.43554815649986267, "learning_rate": 3.862799284355026e-06, "loss": 0.0089, "step": 72920 }, { "epoch": 1.2316344107812343, "grad_norm": 0.45512014627456665, "learning_rate": 3.861364207823215e-06, "loss": 0.0092, "step": 72930 }, { "epoch": 1.2318032897625562, "grad_norm": 0.19548270106315613, "learning_rate": 3.859929230213289e-06, "loss": 0.0084, "step": 72940 }, { "epoch": 1.231972168743878, "grad_norm": 0.24600264430046082, "learning_rate": 3.858494351649921e-06, "loss": 0.0108, "step": 72950 }, { "epoch": 1.2321410477252002, "grad_norm": 0.14939740300178528, "learning_rate": 3.8570595722577676e-06, "loss": 0.0065, "step": 72960 }, { "epoch": 1.232309926706522, "grad_norm": 0.033837854862213135, "learning_rate": 3.855624892161476e-06, "loss": 0.0068, "step": 72970 }, { "epoch": 1.2324788056878442, "grad_norm": 0.29022452235221863, "learning_rate": 3.854190311485689e-06, "loss": 0.0079, "step": 72980 }, { "epoch": 1.232647684669166, "grad_norm": 0.32564499974250793, "learning_rate": 3.8527558303550405e-06, "loss": 0.006, "step": 72990 }, { "epoch": 1.232816563650488, "grad_norm": 0.20963993668556213, "learning_rate": 3.851321448894154e-06, "loss": 0.0077, "step": 73000 }, { "epoch": 1.23298544263181, "grad_norm": 0.2756209075450897, "learning_rate": 3.849887167227647e-06, "loss": 0.0118, "step": 73010 }, { "epoch": 1.233154321613132, "grad_norm": 0.5194330215454102, "learning_rate": 3.848452985480123e-06, "loss": 0.008, "step": 73020 }, { "epoch": 1.233323200594454, "grad_norm": 0.3682123124599457, "learning_rate": 3.847018903776182e-06, "loss": 0.0099, "step": 73030 }, { "epoch": 1.233492079575776, "grad_norm": 0.26341912150382996, "learning_rate": 3.8455849222404144e-06, "loss": 0.0094, "step": 73040 }, { "epoch": 1.233660958557098, "grad_norm": 0.18728718161582947, "learning_rate": 3.844151040997401e-06, "loss": 0.0056, "step": 73050 }, { "epoch": 1.23382983753842, "grad_norm": 0.2513704001903534, "learning_rate": 3.84271726017171e-06, "loss": 0.0102, "step": 73060 }, { "epoch": 1.233998716519742, "grad_norm": 0.2534792125225067, "learning_rate": 3.841283579887909e-06, "loss": 0.0074, "step": 73070 }, { "epoch": 1.234167595501064, "grad_norm": 0.2849181890487671, "learning_rate": 3.839850000270551e-06, "loss": 0.0125, "step": 73080 }, { "epoch": 1.234336474482386, "grad_norm": 0.26044219732284546, "learning_rate": 3.8384165214441846e-06, "loss": 0.0072, "step": 73090 }, { "epoch": 1.2345053534637078, "grad_norm": 0.24057182669639587, "learning_rate": 3.836983143533341e-06, "loss": 0.0074, "step": 73100 }, { "epoch": 1.23467423244503, "grad_norm": 0.21312712132930756, "learning_rate": 3.835549866662554e-06, "loss": 0.009, "step": 73110 }, { "epoch": 1.2348431114263518, "grad_norm": 0.2866261899471283, "learning_rate": 3.8341166909563405e-06, "loss": 0.009, "step": 73120 }, { "epoch": 1.235011990407674, "grad_norm": 0.48872214555740356, "learning_rate": 3.832683616539213e-06, "loss": 0.009, "step": 73130 }, { "epoch": 1.2351808693889959, "grad_norm": 0.1811029464006424, "learning_rate": 3.831250643535671e-06, "loss": 0.0102, "step": 73140 }, { "epoch": 1.2353497483703177, "grad_norm": 0.2886519134044647, "learning_rate": 3.82981777207021e-06, "loss": 0.0103, "step": 73150 }, { "epoch": 1.2355186273516399, "grad_norm": 0.2227483093738556, "learning_rate": 3.8283850022673135e-06, "loss": 0.0063, "step": 73160 }, { "epoch": 1.2356875063329618, "grad_norm": 0.23390330374240875, "learning_rate": 3.826952334251458e-06, "loss": 0.0071, "step": 73170 }, { "epoch": 1.2358563853142839, "grad_norm": 0.2705450654029846, "learning_rate": 3.825519768147108e-06, "loss": 0.0075, "step": 73180 }, { "epoch": 1.2360252642956058, "grad_norm": 0.43719780445098877, "learning_rate": 3.824087304078723e-06, "loss": 0.0103, "step": 73190 }, { "epoch": 1.2361941432769277, "grad_norm": 0.18860694766044617, "learning_rate": 3.822654942170752e-06, "loss": 0.0097, "step": 73200 }, { "epoch": 1.2363630222582498, "grad_norm": 0.2599909007549286, "learning_rate": 3.8212226825476354e-06, "loss": 0.0127, "step": 73210 }, { "epoch": 1.2365319012395717, "grad_norm": 0.1997309923171997, "learning_rate": 3.819790525333803e-06, "loss": 0.0119, "step": 73220 }, { "epoch": 1.2367007802208938, "grad_norm": 0.1761108934879303, "learning_rate": 3.8183584706536766e-06, "loss": 0.0088, "step": 73230 }, { "epoch": 1.2368696592022157, "grad_norm": 0.21430754661560059, "learning_rate": 3.816926518631672e-06, "loss": 0.0078, "step": 73240 }, { "epoch": 1.2370385381835376, "grad_norm": 0.20792025327682495, "learning_rate": 3.8154946693921925e-06, "loss": 0.007, "step": 73250 }, { "epoch": 1.2372074171648597, "grad_norm": 0.3603726923465729, "learning_rate": 3.814062923059633e-06, "loss": 0.0079, "step": 73260 }, { "epoch": 1.2373762961461816, "grad_norm": 0.16127043962478638, "learning_rate": 3.812631279758381e-06, "loss": 0.0128, "step": 73270 }, { "epoch": 1.2375451751275037, "grad_norm": 0.19716772437095642, "learning_rate": 3.8111997396128136e-06, "loss": 0.0072, "step": 73280 }, { "epoch": 1.2377140541088256, "grad_norm": 0.26160863041877747, "learning_rate": 3.809768302747301e-06, "loss": 0.0078, "step": 73290 }, { "epoch": 1.2378829330901475, "grad_norm": 0.646500289440155, "learning_rate": 3.8083369692862003e-06, "loss": 0.0088, "step": 73300 }, { "epoch": 1.2380518120714696, "grad_norm": 0.19329127669334412, "learning_rate": 3.8069057393538646e-06, "loss": 0.0039, "step": 73310 }, { "epoch": 1.2382206910527915, "grad_norm": 0.294058620929718, "learning_rate": 3.805474613074634e-06, "loss": 0.0075, "step": 73320 }, { "epoch": 1.2383895700341134, "grad_norm": 0.16506992280483246, "learning_rate": 3.8040435905728444e-06, "loss": 0.0093, "step": 73330 }, { "epoch": 1.2385584490154355, "grad_norm": 0.07999836653470993, "learning_rate": 3.802612671972815e-06, "loss": 0.0076, "step": 73340 }, { "epoch": 1.2387273279967574, "grad_norm": 0.2079339474439621, "learning_rate": 3.801181857398862e-06, "loss": 0.0102, "step": 73350 }, { "epoch": 1.2388962069780796, "grad_norm": 0.25963717699050903, "learning_rate": 3.7997511469752924e-06, "loss": 0.0072, "step": 73360 }, { "epoch": 1.2390650859594015, "grad_norm": 0.4154834747314453, "learning_rate": 3.7983205408264036e-06, "loss": 0.0076, "step": 73370 }, { "epoch": 1.2392339649407234, "grad_norm": 0.5064836740493774, "learning_rate": 3.7968900390764794e-06, "loss": 0.0079, "step": 73380 }, { "epoch": 1.2394028439220455, "grad_norm": 0.39703845977783203, "learning_rate": 3.7954596418498014e-06, "loss": 0.0096, "step": 73390 }, { "epoch": 1.2395717229033674, "grad_norm": 0.3104313910007477, "learning_rate": 3.7940293492706377e-06, "loss": 0.0063, "step": 73400 }, { "epoch": 1.2397406018846895, "grad_norm": 0.13164272904396057, "learning_rate": 3.79259916146325e-06, "loss": 0.0035, "step": 73410 }, { "epoch": 1.2399094808660114, "grad_norm": 0.20785826444625854, "learning_rate": 3.7911690785518878e-06, "loss": 0.0076, "step": 73420 }, { "epoch": 1.2400783598473333, "grad_norm": 0.20353709161281586, "learning_rate": 3.7897391006607944e-06, "loss": 0.0069, "step": 73430 }, { "epoch": 1.2402472388286554, "grad_norm": 0.6132499575614929, "learning_rate": 3.7883092279142013e-06, "loss": 0.0095, "step": 73440 }, { "epoch": 1.2404161178099773, "grad_norm": 0.3535434603691101, "learning_rate": 3.7868794604363365e-06, "loss": 0.0087, "step": 73450 }, { "epoch": 1.2405849967912994, "grad_norm": 0.22503377497196198, "learning_rate": 3.7854497983514084e-06, "loss": 0.0068, "step": 73460 }, { "epoch": 1.2407538757726213, "grad_norm": 0.41617855429649353, "learning_rate": 3.7840202417836263e-06, "loss": 0.0101, "step": 73470 }, { "epoch": 1.2409227547539432, "grad_norm": 0.22121065855026245, "learning_rate": 3.782590790857186e-06, "loss": 0.0077, "step": 73480 }, { "epoch": 1.2410916337352653, "grad_norm": 0.15439969301223755, "learning_rate": 3.7811614456962748e-06, "loss": 0.0079, "step": 73490 }, { "epoch": 1.2412605127165872, "grad_norm": 0.2812191843986511, "learning_rate": 3.7797322064250718e-06, "loss": 0.0079, "step": 73500 }, { "epoch": 1.2414293916979093, "grad_norm": 0.15229982137680054, "learning_rate": 3.778303073167743e-06, "loss": 0.0085, "step": 73510 }, { "epoch": 1.2415982706792312, "grad_norm": 0.4467087686061859, "learning_rate": 3.7768740460484497e-06, "loss": 0.0106, "step": 73520 }, { "epoch": 1.2417671496605531, "grad_norm": 0.07167502492666245, "learning_rate": 3.7754451251913426e-06, "loss": 0.0084, "step": 73530 }, { "epoch": 1.2419360286418752, "grad_norm": 0.6423201560974121, "learning_rate": 3.774016310720563e-06, "loss": 0.0076, "step": 73540 }, { "epoch": 1.2421049076231971, "grad_norm": 0.367173969745636, "learning_rate": 3.7725876027602415e-06, "loss": 0.0074, "step": 73550 }, { "epoch": 1.2422737866045193, "grad_norm": 0.2000722885131836, "learning_rate": 3.771159001434501e-06, "loss": 0.0072, "step": 73560 }, { "epoch": 1.2424426655858412, "grad_norm": 0.19783809781074524, "learning_rate": 3.7697305068674558e-06, "loss": 0.0076, "step": 73570 }, { "epoch": 1.242611544567163, "grad_norm": 0.24409860372543335, "learning_rate": 3.7683021191832113e-06, "loss": 0.01, "step": 73580 }, { "epoch": 1.2427804235484852, "grad_norm": 0.21525108814239502, "learning_rate": 3.766873838505859e-06, "loss": 0.0058, "step": 73590 }, { "epoch": 1.242949302529807, "grad_norm": 0.22924844920635223, "learning_rate": 3.765445664959485e-06, "loss": 0.0086, "step": 73600 }, { "epoch": 1.2431181815111292, "grad_norm": 0.0720076635479927, "learning_rate": 3.764017598668166e-06, "loss": 0.0106, "step": 73610 }, { "epoch": 1.243287060492451, "grad_norm": 0.357442170381546, "learning_rate": 3.762589639755973e-06, "loss": 0.0073, "step": 73620 }, { "epoch": 1.243455939473773, "grad_norm": 0.34079834818840027, "learning_rate": 3.7611617883469574e-06, "loss": 0.0069, "step": 73630 }, { "epoch": 1.243624818455095, "grad_norm": 0.19295448064804077, "learning_rate": 3.7597340445651705e-06, "loss": 0.0088, "step": 73640 }, { "epoch": 1.243793697436417, "grad_norm": 0.19786480069160461, "learning_rate": 3.7583064085346498e-06, "loss": 0.0073, "step": 73650 }, { "epoch": 1.243962576417739, "grad_norm": 0.2486698478460312, "learning_rate": 3.7568788803794276e-06, "loss": 0.0089, "step": 73660 }, { "epoch": 1.244131455399061, "grad_norm": 0.3559258282184601, "learning_rate": 3.7554514602235205e-06, "loss": 0.01, "step": 73670 }, { "epoch": 1.244300334380383, "grad_norm": 0.34639421105384827, "learning_rate": 3.754024148190941e-06, "loss": 0.0075, "step": 73680 }, { "epoch": 1.244469213361705, "grad_norm": 0.21717974543571472, "learning_rate": 3.752596944405691e-06, "loss": 0.0077, "step": 73690 }, { "epoch": 1.244638092343027, "grad_norm": 0.2937442660331726, "learning_rate": 3.7511698489917634e-06, "loss": 0.0073, "step": 73700 }, { "epoch": 1.244806971324349, "grad_norm": 0.23412364721298218, "learning_rate": 3.7497428620731376e-06, "loss": 0.0106, "step": 73710 }, { "epoch": 1.244975850305671, "grad_norm": 0.34900030493736267, "learning_rate": 3.7483159837737877e-06, "loss": 0.0078, "step": 73720 }, { "epoch": 1.2451447292869928, "grad_norm": 0.27090054750442505, "learning_rate": 3.746889214217677e-06, "loss": 0.0073, "step": 73730 }, { "epoch": 1.245313608268315, "grad_norm": 0.32926997542381287, "learning_rate": 3.745462553528765e-06, "loss": 0.0071, "step": 73740 }, { "epoch": 1.2454824872496368, "grad_norm": 0.2642349600791931, "learning_rate": 3.744036001830989e-06, "loss": 0.005, "step": 73750 }, { "epoch": 1.245651366230959, "grad_norm": 0.3776201605796814, "learning_rate": 3.7426095592482882e-06, "loss": 0.0115, "step": 73760 }, { "epoch": 1.2458202452122809, "grad_norm": 0.21264077723026276, "learning_rate": 3.741183225904587e-06, "loss": 0.0074, "step": 73770 }, { "epoch": 1.2459891241936027, "grad_norm": 0.2588350176811218, "learning_rate": 3.7397570019238046e-06, "loss": 0.0074, "step": 73780 }, { "epoch": 1.2461580031749249, "grad_norm": 0.348982572555542, "learning_rate": 3.738330887429844e-06, "loss": 0.006, "step": 73790 }, { "epoch": 1.2463268821562468, "grad_norm": 0.2759462893009186, "learning_rate": 3.7369048825466046e-06, "loss": 0.0093, "step": 73800 }, { "epoch": 1.2464957611375689, "grad_norm": 0.19774897396564484, "learning_rate": 3.735478987397975e-06, "loss": 0.0047, "step": 73810 }, { "epoch": 1.2466646401188908, "grad_norm": 0.41877150535583496, "learning_rate": 3.734053202107833e-06, "loss": 0.0132, "step": 73820 }, { "epoch": 1.2468335191002127, "grad_norm": 0.30692341923713684, "learning_rate": 3.7326275268000457e-06, "loss": 0.0077, "step": 73830 }, { "epoch": 1.2470023980815348, "grad_norm": 0.1795741468667984, "learning_rate": 3.731201961598472e-06, "loss": 0.009, "step": 73840 }, { "epoch": 1.2471712770628567, "grad_norm": 0.19173161685466766, "learning_rate": 3.729776506626963e-06, "loss": 0.0054, "step": 73850 }, { "epoch": 1.2473401560441788, "grad_norm": 0.5036739110946655, "learning_rate": 3.7283511620093617e-06, "loss": 0.0091, "step": 73860 }, { "epoch": 1.2475090350255007, "grad_norm": 0.06312243640422821, "learning_rate": 3.7269259278694924e-06, "loss": 0.0067, "step": 73870 }, { "epoch": 1.2476779140068226, "grad_norm": 0.15540878474712372, "learning_rate": 3.7255008043311803e-06, "loss": 0.0092, "step": 73880 }, { "epoch": 1.2478467929881447, "grad_norm": 0.2705223262310028, "learning_rate": 3.7240757915182342e-06, "loss": 0.007, "step": 73890 }, { "epoch": 1.2480156719694666, "grad_norm": 0.19072139263153076, "learning_rate": 3.7226508895544584e-06, "loss": 0.0051, "step": 73900 }, { "epoch": 1.2481845509507887, "grad_norm": 0.22292520105838776, "learning_rate": 3.7212260985636416e-06, "loss": 0.0045, "step": 73910 }, { "epoch": 1.2483534299321106, "grad_norm": 0.016903594136238098, "learning_rate": 3.7198014186695688e-06, "loss": 0.0083, "step": 73920 }, { "epoch": 1.2485223089134325, "grad_norm": 0.12426161020994186, "learning_rate": 3.7183768499960117e-06, "loss": 0.0077, "step": 73930 }, { "epoch": 1.2486911878947546, "grad_norm": 0.5764173269271851, "learning_rate": 3.716952392666733e-06, "loss": 0.0116, "step": 73940 }, { "epoch": 1.2488600668760765, "grad_norm": 0.12414137274026871, "learning_rate": 3.7155280468054887e-06, "loss": 0.0047, "step": 73950 }, { "epoch": 1.2490289458573987, "grad_norm": 0.18357117474079132, "learning_rate": 3.7141038125360173e-06, "loss": 0.0099, "step": 73960 }, { "epoch": 1.2491978248387205, "grad_norm": 0.2108776569366455, "learning_rate": 3.712679689982056e-06, "loss": 0.0083, "step": 73970 }, { "epoch": 1.2493667038200424, "grad_norm": 0.2535441815853119, "learning_rate": 3.7112556792673297e-06, "loss": 0.0084, "step": 73980 }, { "epoch": 1.2495355828013646, "grad_norm": 0.14360012114048004, "learning_rate": 3.7098317805155544e-06, "loss": 0.0049, "step": 73990 }, { "epoch": 1.2497044617826865, "grad_norm": 0.058609865605831146, "learning_rate": 3.7084079938504296e-06, "loss": 0.0052, "step": 74000 }, { "epoch": 1.2498733407640086, "grad_norm": 0.23946665227413177, "learning_rate": 3.706984319395654e-06, "loss": 0.0067, "step": 74010 }, { "epoch": 1.2500422197453305, "grad_norm": 0.2181386947631836, "learning_rate": 3.7055607572749117e-06, "loss": 0.0071, "step": 74020 }, { "epoch": 1.2502110987266524, "grad_norm": 0.24842756986618042, "learning_rate": 3.7041373076118815e-06, "loss": 0.009, "step": 74030 }, { "epoch": 1.2503799777079745, "grad_norm": 0.3663182556629181, "learning_rate": 3.7027139705302238e-06, "loss": 0.0092, "step": 74040 }, { "epoch": 1.2505488566892964, "grad_norm": 0.161089226603508, "learning_rate": 3.7012907461535986e-06, "loss": 0.0093, "step": 74050 }, { "epoch": 1.2507177356706185, "grad_norm": 0.29735180735588074, "learning_rate": 3.6998676346056506e-06, "loss": 0.0055, "step": 74060 }, { "epoch": 1.2508866146519404, "grad_norm": 0.5812447667121887, "learning_rate": 3.698444636010019e-06, "loss": 0.0098, "step": 74070 }, { "epoch": 1.2510554936332623, "grad_norm": 0.2410133332014084, "learning_rate": 3.6970217504903248e-06, "loss": 0.0065, "step": 74080 }, { "epoch": 1.2512243726145844, "grad_norm": 0.3061872720718384, "learning_rate": 3.6955989781701894e-06, "loss": 0.0088, "step": 74090 }, { "epoch": 1.2513932515959063, "grad_norm": 0.3396587073802948, "learning_rate": 3.694176319173218e-06, "loss": 0.0079, "step": 74100 }, { "epoch": 1.2515621305772284, "grad_norm": 0.25755706429481506, "learning_rate": 3.6927537736230113e-06, "loss": 0.0072, "step": 74110 }, { "epoch": 1.2517310095585503, "grad_norm": 0.20748694241046906, "learning_rate": 3.6913313416431506e-06, "loss": 0.0059, "step": 74120 }, { "epoch": 1.2518998885398722, "grad_norm": 0.2887241244316101, "learning_rate": 3.6899090233572165e-06, "loss": 0.0096, "step": 74130 }, { "epoch": 1.2520687675211943, "grad_norm": 0.27828502655029297, "learning_rate": 3.6884868188887766e-06, "loss": 0.0064, "step": 74140 }, { "epoch": 1.2522376465025162, "grad_norm": 0.3903811573982239, "learning_rate": 3.6870647283613893e-06, "loss": 0.0079, "step": 74150 }, { "epoch": 1.2524065254838384, "grad_norm": 0.33260589838027954, "learning_rate": 3.685642751898599e-06, "loss": 0.0104, "step": 74160 }, { "epoch": 1.2525754044651602, "grad_norm": 0.27427971363067627, "learning_rate": 3.6842208896239466e-06, "loss": 0.0084, "step": 74170 }, { "epoch": 1.2527442834464821, "grad_norm": 0.2773388624191284, "learning_rate": 3.6827991416609587e-06, "loss": 0.0091, "step": 74180 }, { "epoch": 1.2529131624278043, "grad_norm": 0.18519484996795654, "learning_rate": 3.6813775081331554e-06, "loss": 0.0048, "step": 74190 }, { "epoch": 1.2530820414091262, "grad_norm": 0.1413937509059906, "learning_rate": 3.6799559891640417e-06, "loss": 0.0052, "step": 74200 }, { "epoch": 1.2532509203904483, "grad_norm": 0.4049247205257416, "learning_rate": 3.678534584877117e-06, "loss": 0.0089, "step": 74210 }, { "epoch": 1.2534197993717702, "grad_norm": 0.20720942318439484, "learning_rate": 3.67711329539587e-06, "loss": 0.0116, "step": 74220 }, { "epoch": 1.253588678353092, "grad_norm": 0.1559746414422989, "learning_rate": 3.67569212084378e-06, "loss": 0.007, "step": 74230 }, { "epoch": 1.2537575573344142, "grad_norm": 0.20387528836727142, "learning_rate": 3.674271061344311e-06, "loss": 0.0108, "step": 74240 }, { "epoch": 1.253926436315736, "grad_norm": 0.1437283754348755, "learning_rate": 3.672850117020924e-06, "loss": 0.0077, "step": 74250 }, { "epoch": 1.2540953152970582, "grad_norm": 0.14470282196998596, "learning_rate": 3.6714292879970668e-06, "loss": 0.0132, "step": 74260 }, { "epoch": 1.25426419427838, "grad_norm": 0.2571609914302826, "learning_rate": 3.670008574396179e-06, "loss": 0.0062, "step": 74270 }, { "epoch": 1.254433073259702, "grad_norm": 0.15763649344444275, "learning_rate": 3.6685879763416854e-06, "loss": 0.0075, "step": 74280 }, { "epoch": 1.254601952241024, "grad_norm": 0.11793103069067001, "learning_rate": 3.6671674939570066e-06, "loss": 0.0051, "step": 74290 }, { "epoch": 1.254770831222346, "grad_norm": 0.25507280230522156, "learning_rate": 3.6657471273655496e-06, "loss": 0.0069, "step": 74300 }, { "epoch": 1.2549397102036681, "grad_norm": 0.40255552530288696, "learning_rate": 3.6643268766907146e-06, "loss": 0.0106, "step": 74310 }, { "epoch": 1.25510858918499, "grad_norm": 0.3837962746620178, "learning_rate": 3.662906742055886e-06, "loss": 0.0075, "step": 74320 }, { "epoch": 1.255277468166312, "grad_norm": 0.12220504134893417, "learning_rate": 3.6614867235844432e-06, "loss": 0.0067, "step": 74330 }, { "epoch": 1.255446347147634, "grad_norm": 0.25787678360939026, "learning_rate": 3.6600668213997544e-06, "loss": 0.0064, "step": 74340 }, { "epoch": 1.255615226128956, "grad_norm": 0.23213444650173187, "learning_rate": 3.6586470356251786e-06, "loss": 0.0073, "step": 74350 }, { "epoch": 1.255784105110278, "grad_norm": 0.0825062245130539, "learning_rate": 3.6572273663840594e-06, "loss": 0.0077, "step": 74360 }, { "epoch": 1.2559529840916, "grad_norm": 0.3195607364177704, "learning_rate": 3.6558078137997364e-06, "loss": 0.0081, "step": 74370 }, { "epoch": 1.2561218630729218, "grad_norm": 0.3880787789821625, "learning_rate": 3.654388377995537e-06, "loss": 0.0073, "step": 74380 }, { "epoch": 1.256290742054244, "grad_norm": 0.15366502106189728, "learning_rate": 3.6529690590947776e-06, "loss": 0.0074, "step": 74390 }, { "epoch": 1.2564596210355659, "grad_norm": 0.2849588096141815, "learning_rate": 3.6515498572207664e-06, "loss": 0.0072, "step": 74400 }, { "epoch": 1.256628500016888, "grad_norm": 0.2598864436149597, "learning_rate": 3.6501307724967995e-06, "loss": 0.0128, "step": 74410 }, { "epoch": 1.2567973789982099, "grad_norm": 0.10078582912683487, "learning_rate": 3.6487118050461623e-06, "loss": 0.0069, "step": 74420 }, { "epoch": 1.2569662579795318, "grad_norm": 0.23824436962604523, "learning_rate": 3.6472929549921325e-06, "loss": 0.0045, "step": 74430 }, { "epoch": 1.2571351369608539, "grad_norm": 0.483810693025589, "learning_rate": 3.645874222457978e-06, "loss": 0.0076, "step": 74440 }, { "epoch": 1.2573040159421758, "grad_norm": 0.3709277808666229, "learning_rate": 3.6444556075669513e-06, "loss": 0.0081, "step": 74450 }, { "epoch": 1.257472894923498, "grad_norm": 0.38755983114242554, "learning_rate": 3.6430371104422997e-06, "loss": 0.0105, "step": 74460 }, { "epoch": 1.2576417739048198, "grad_norm": 0.34249478578567505, "learning_rate": 3.6416187312072587e-06, "loss": 0.0079, "step": 74470 }, { "epoch": 1.2578106528861417, "grad_norm": 0.533613383769989, "learning_rate": 3.6402004699850563e-06, "loss": 0.01, "step": 74480 }, { "epoch": 1.2579795318674638, "grad_norm": 0.25605055689811707, "learning_rate": 3.638782326898903e-06, "loss": 0.0062, "step": 74490 }, { "epoch": 1.2581484108487857, "grad_norm": 0.18442653119564056, "learning_rate": 3.637364302072005e-06, "loss": 0.0064, "step": 74500 }, { "epoch": 1.2583172898301078, "grad_norm": 0.1874728798866272, "learning_rate": 3.6359463956275583e-06, "loss": 0.0076, "step": 74510 }, { "epoch": 1.2584861688114297, "grad_norm": 0.21230395138263702, "learning_rate": 3.634528607688747e-06, "loss": 0.0053, "step": 74520 }, { "epoch": 1.2586550477927516, "grad_norm": 0.4515397250652313, "learning_rate": 3.6331109383787432e-06, "loss": 0.0063, "step": 74530 }, { "epoch": 1.2588239267740737, "grad_norm": 0.29178446531295776, "learning_rate": 3.6316933878207115e-06, "loss": 0.0081, "step": 74540 }, { "epoch": 1.2589928057553956, "grad_norm": 0.18999093770980835, "learning_rate": 3.6302759561378057e-06, "loss": 0.0061, "step": 74550 }, { "epoch": 1.2591616847367177, "grad_norm": 0.3154400587081909, "learning_rate": 3.62885864345317e-06, "loss": 0.0078, "step": 74560 }, { "epoch": 1.2593305637180396, "grad_norm": 0.22971440851688385, "learning_rate": 3.6274414498899336e-06, "loss": 0.0071, "step": 74570 }, { "epoch": 1.2594994426993615, "grad_norm": 0.18429569900035858, "learning_rate": 3.6260243755712218e-06, "loss": 0.011, "step": 74580 }, { "epoch": 1.2596683216806837, "grad_norm": 0.26147329807281494, "learning_rate": 3.6246074206201444e-06, "loss": 0.0106, "step": 74590 }, { "epoch": 1.2598372006620056, "grad_norm": 0.34320926666259766, "learning_rate": 3.623190585159807e-06, "loss": 0.0093, "step": 74600 }, { "epoch": 1.2600060796433277, "grad_norm": 0.281152606010437, "learning_rate": 3.6217738693132952e-06, "loss": 0.0074, "step": 74610 }, { "epoch": 1.2601749586246496, "grad_norm": 0.14293934404850006, "learning_rate": 3.620357273203693e-06, "loss": 0.0065, "step": 74620 }, { "epoch": 1.2603438376059715, "grad_norm": 0.2205793559551239, "learning_rate": 3.6189407969540695e-06, "loss": 0.0086, "step": 74630 }, { "epoch": 1.2605127165872936, "grad_norm": 0.26255157589912415, "learning_rate": 3.617524440687487e-06, "loss": 0.0107, "step": 74640 }, { "epoch": 1.2606815955686155, "grad_norm": 0.1544024795293808, "learning_rate": 3.6161082045269922e-06, "loss": 0.007, "step": 74650 }, { "epoch": 1.2608504745499376, "grad_norm": 0.8830717206001282, "learning_rate": 3.614692088595626e-06, "loss": 0.0148, "step": 74660 }, { "epoch": 1.2610193535312595, "grad_norm": 0.1897009313106537, "learning_rate": 3.613276093016416e-06, "loss": 0.0065, "step": 74670 }, { "epoch": 1.2611882325125814, "grad_norm": 0.18791905045509338, "learning_rate": 3.611860217912383e-06, "loss": 0.0117, "step": 74680 }, { "epoch": 1.2613571114939035, "grad_norm": 0.16812267899513245, "learning_rate": 3.6104444634065307e-06, "loss": 0.005, "step": 74690 }, { "epoch": 1.2615259904752254, "grad_norm": 0.2030843198299408, "learning_rate": 3.609028829621859e-06, "loss": 0.0086, "step": 74700 }, { "epoch": 1.2616948694565475, "grad_norm": 0.26506495475769043, "learning_rate": 3.607613316681354e-06, "loss": 0.0107, "step": 74710 }, { "epoch": 1.2618637484378694, "grad_norm": 0.402353435754776, "learning_rate": 3.606197924707995e-06, "loss": 0.0112, "step": 74720 }, { "epoch": 1.2620326274191913, "grad_norm": 0.33372828364372253, "learning_rate": 3.6047826538247423e-06, "loss": 0.0077, "step": 74730 }, { "epoch": 1.2622015064005134, "grad_norm": 0.297248512506485, "learning_rate": 3.6033675041545534e-06, "loss": 0.01, "step": 74740 }, { "epoch": 1.2623703853818353, "grad_norm": 0.19897779822349548, "learning_rate": 3.6019524758203727e-06, "loss": 0.0061, "step": 74750 }, { "epoch": 1.2625392643631574, "grad_norm": 0.3695213198661804, "learning_rate": 3.600537568945139e-06, "loss": 0.0062, "step": 74760 }, { "epoch": 1.2627081433444793, "grad_norm": 0.2874540090560913, "learning_rate": 3.5991227836517696e-06, "loss": 0.0077, "step": 74770 }, { "epoch": 1.2628770223258012, "grad_norm": 0.15911412239074707, "learning_rate": 3.59770812006318e-06, "loss": 0.01, "step": 74780 }, { "epoch": 1.2630459013071234, "grad_norm": 0.2510358691215515, "learning_rate": 3.596293578302272e-06, "loss": 0.0067, "step": 74790 }, { "epoch": 1.2632147802884452, "grad_norm": 0.4126601815223694, "learning_rate": 3.5948791584919407e-06, "loss": 0.0083, "step": 74800 }, { "epoch": 1.2633836592697674, "grad_norm": 0.15415127575397491, "learning_rate": 3.5934648607550634e-06, "loss": 0.0095, "step": 74810 }, { "epoch": 1.2635525382510893, "grad_norm": 0.28196433186531067, "learning_rate": 3.592050685214512e-06, "loss": 0.0083, "step": 74820 }, { "epoch": 1.2637214172324112, "grad_norm": 0.3101348876953125, "learning_rate": 3.5906366319931474e-06, "loss": 0.0098, "step": 74830 }, { "epoch": 1.2638902962137333, "grad_norm": 0.28407904505729675, "learning_rate": 3.589222701213819e-06, "loss": 0.0053, "step": 74840 }, { "epoch": 1.2640591751950552, "grad_norm": 0.30389469861984253, "learning_rate": 3.5878088929993674e-06, "loss": 0.0109, "step": 74850 }, { "epoch": 1.2642280541763773, "grad_norm": 0.27213624119758606, "learning_rate": 3.586395207472617e-06, "loss": 0.0069, "step": 74860 }, { "epoch": 1.2643969331576992, "grad_norm": 0.4454040825366974, "learning_rate": 3.5849816447563855e-06, "loss": 0.0106, "step": 74870 }, { "epoch": 1.264565812139021, "grad_norm": 0.35279157757759094, "learning_rate": 3.583568204973483e-06, "loss": 0.0054, "step": 74880 }, { "epoch": 1.2647346911203432, "grad_norm": 0.3215842545032501, "learning_rate": 3.582154888246705e-06, "loss": 0.0069, "step": 74890 }, { "epoch": 1.264903570101665, "grad_norm": 0.25571876764297485, "learning_rate": 3.580741694698835e-06, "loss": 0.01, "step": 74900 }, { "epoch": 1.2650724490829872, "grad_norm": 0.6149976253509521, "learning_rate": 3.5793286244526487e-06, "loss": 0.0185, "step": 74910 }, { "epoch": 1.2652413280643091, "grad_norm": 0.4494854807853699, "learning_rate": 3.57791567763091e-06, "loss": 0.0099, "step": 74920 }, { "epoch": 1.265410207045631, "grad_norm": 0.49523329734802246, "learning_rate": 3.576502854356374e-06, "loss": 0.0079, "step": 74930 }, { "epoch": 1.2655790860269531, "grad_norm": 0.4842776358127594, "learning_rate": 3.5750901547517806e-06, "loss": 0.0093, "step": 74940 }, { "epoch": 1.265747965008275, "grad_norm": 0.3712717592716217, "learning_rate": 3.573677578939863e-06, "loss": 0.0059, "step": 74950 }, { "epoch": 1.2659168439895971, "grad_norm": 0.3180020749568939, "learning_rate": 3.572265127043343e-06, "loss": 0.0065, "step": 74960 }, { "epoch": 1.266085722970919, "grad_norm": 0.26546958088874817, "learning_rate": 3.570852799184932e-06, "loss": 0.0078, "step": 74970 }, { "epoch": 1.266254601952241, "grad_norm": 0.1731143444776535, "learning_rate": 3.569440595487324e-06, "loss": 0.0097, "step": 74980 }, { "epoch": 1.266423480933563, "grad_norm": 0.3297431766986847, "learning_rate": 3.568028516073213e-06, "loss": 0.0106, "step": 74990 }, { "epoch": 1.266592359914885, "grad_norm": 0.1808425337076187, "learning_rate": 3.566616561065276e-06, "loss": 0.0086, "step": 75000 }, { "epoch": 1.266761238896207, "grad_norm": 0.21002814173698425, "learning_rate": 3.565204730586182e-06, "loss": 0.0064, "step": 75010 }, { "epoch": 1.266930117877529, "grad_norm": 0.26531511545181274, "learning_rate": 3.563793024758583e-06, "loss": 0.0086, "step": 75020 }, { "epoch": 1.2670989968588509, "grad_norm": 0.16497504711151123, "learning_rate": 3.5623814437051262e-06, "loss": 0.0072, "step": 75030 }, { "epoch": 1.267267875840173, "grad_norm": 0.2664133906364441, "learning_rate": 3.560969987548448e-06, "loss": 0.0076, "step": 75040 }, { "epoch": 1.2674367548214949, "grad_norm": 0.4308799207210541, "learning_rate": 3.559558656411171e-06, "loss": 0.0078, "step": 75050 }, { "epoch": 1.267605633802817, "grad_norm": 0.21942433714866638, "learning_rate": 3.558147450415908e-06, "loss": 0.0075, "step": 75060 }, { "epoch": 1.2677745127841389, "grad_norm": 0.8877831101417542, "learning_rate": 3.5567363696852614e-06, "loss": 0.0141, "step": 75070 }, { "epoch": 1.2679433917654608, "grad_norm": 0.19324654340744019, "learning_rate": 3.5553254143418224e-06, "loss": 0.006, "step": 75080 }, { "epoch": 1.268112270746783, "grad_norm": 0.1688198298215866, "learning_rate": 3.5539145845081735e-06, "loss": 0.0067, "step": 75090 }, { "epoch": 1.2682811497281048, "grad_norm": 0.3305750787258148, "learning_rate": 3.5525038803068783e-06, "loss": 0.0094, "step": 75100 }, { "epoch": 1.268450028709427, "grad_norm": 0.1456054300069809, "learning_rate": 3.551093301860501e-06, "loss": 0.0112, "step": 75110 }, { "epoch": 1.2686189076907488, "grad_norm": 0.15948450565338135, "learning_rate": 3.549682849291587e-06, "loss": 0.0126, "step": 75120 }, { "epoch": 1.2687877866720707, "grad_norm": 0.25525468587875366, "learning_rate": 3.548272522722675e-06, "loss": 0.007, "step": 75130 }, { "epoch": 1.2689566656533928, "grad_norm": 0.16151708364486694, "learning_rate": 3.5468623222762867e-06, "loss": 0.0056, "step": 75140 }, { "epoch": 1.2691255446347147, "grad_norm": 0.21774795651435852, "learning_rate": 3.5454522480749397e-06, "loss": 0.0081, "step": 75150 }, { "epoch": 1.2692944236160368, "grad_norm": 0.37515532970428467, "learning_rate": 3.544042300241137e-06, "loss": 0.0083, "step": 75160 }, { "epoch": 1.2694633025973587, "grad_norm": 0.21986491978168488, "learning_rate": 3.5426324788973726e-06, "loss": 0.0094, "step": 75170 }, { "epoch": 1.2696321815786806, "grad_norm": 0.18041673302650452, "learning_rate": 3.541222784166126e-06, "loss": 0.0068, "step": 75180 }, { "epoch": 1.2698010605600027, "grad_norm": 0.1939111202955246, "learning_rate": 3.53981321616987e-06, "loss": 0.0051, "step": 75190 }, { "epoch": 1.2699699395413246, "grad_norm": 0.11824183166027069, "learning_rate": 3.5384037750310634e-06, "loss": 0.0059, "step": 75200 }, { "epoch": 1.2701388185226468, "grad_norm": 0.17716708779335022, "learning_rate": 3.536994460872157e-06, "loss": 0.0063, "step": 75210 }, { "epoch": 1.2703076975039687, "grad_norm": 0.2129250168800354, "learning_rate": 3.535585273815585e-06, "loss": 0.0065, "step": 75220 }, { "epoch": 1.2704765764852906, "grad_norm": 0.19774799048900604, "learning_rate": 3.5341762139837767e-06, "loss": 0.0065, "step": 75230 }, { "epoch": 1.2706454554666127, "grad_norm": 0.35435009002685547, "learning_rate": 3.5327672814991464e-06, "loss": 0.0076, "step": 75240 }, { "epoch": 1.2708143344479346, "grad_norm": 0.5839194059371948, "learning_rate": 3.5313584764841025e-06, "loss": 0.0108, "step": 75250 }, { "epoch": 1.2709832134292567, "grad_norm": 0.2844749987125397, "learning_rate": 3.529949799061032e-06, "loss": 0.0096, "step": 75260 }, { "epoch": 1.2711520924105786, "grad_norm": 0.3027724623680115, "learning_rate": 3.528541249352322e-06, "loss": 0.007, "step": 75270 }, { "epoch": 1.2713209713919005, "grad_norm": 0.4398250877857208, "learning_rate": 3.5271328274803416e-06, "loss": 0.0078, "step": 75280 }, { "epoch": 1.2714898503732226, "grad_norm": 0.19001257419586182, "learning_rate": 3.5257245335674527e-06, "loss": 0.0053, "step": 75290 }, { "epoch": 1.2716587293545445, "grad_norm": 0.09115243703126907, "learning_rate": 3.524316367736005e-06, "loss": 0.0054, "step": 75300 }, { "epoch": 1.2718276083358666, "grad_norm": 0.20111767947673798, "learning_rate": 3.5229083301083344e-06, "loss": 0.0075, "step": 75310 }, { "epoch": 1.2719964873171885, "grad_norm": 0.3285139203071594, "learning_rate": 3.5215004208067683e-06, "loss": 0.0063, "step": 75320 }, { "epoch": 1.2721653662985104, "grad_norm": 0.1611223965883255, "learning_rate": 3.5200926399536228e-06, "loss": 0.0085, "step": 75330 }, { "epoch": 1.2723342452798325, "grad_norm": 0.15610730648040771, "learning_rate": 3.518684987671204e-06, "loss": 0.0041, "step": 75340 }, { "epoch": 1.2725031242611544, "grad_norm": 0.37431997060775757, "learning_rate": 3.5172774640818015e-06, "loss": 0.0075, "step": 75350 }, { "epoch": 1.2726720032424765, "grad_norm": 0.38093364238739014, "learning_rate": 3.5158700693077e-06, "loss": 0.0097, "step": 75360 }, { "epoch": 1.2728408822237984, "grad_norm": 0.25522318482398987, "learning_rate": 3.5144628034711704e-06, "loss": 0.0064, "step": 75370 }, { "epoch": 1.2730097612051203, "grad_norm": 0.26114270091056824, "learning_rate": 3.513055666694475e-06, "loss": 0.0062, "step": 75380 }, { "epoch": 1.2731786401864424, "grad_norm": 0.11578971147537231, "learning_rate": 3.5116486590998567e-06, "loss": 0.0081, "step": 75390 }, { "epoch": 1.2733475191677643, "grad_norm": 0.31538593769073486, "learning_rate": 3.510241780809556e-06, "loss": 0.0069, "step": 75400 }, { "epoch": 1.2735163981490865, "grad_norm": 0.35447365045547485, "learning_rate": 3.5088350319458e-06, "loss": 0.0076, "step": 75410 }, { "epoch": 1.2736852771304084, "grad_norm": 0.14822711050510406, "learning_rate": 3.507428412630802e-06, "loss": 0.0103, "step": 75420 }, { "epoch": 1.2738541561117303, "grad_norm": 0.2533973455429077, "learning_rate": 3.5060219229867664e-06, "loss": 0.0062, "step": 75430 }, { "epoch": 1.2740230350930524, "grad_norm": 0.30657193064689636, "learning_rate": 3.5046155631358845e-06, "loss": 0.0082, "step": 75440 }, { "epoch": 1.2741919140743743, "grad_norm": 0.15176577866077423, "learning_rate": 3.5032093332003386e-06, "loss": 0.0059, "step": 75450 }, { "epoch": 1.2743607930556964, "grad_norm": 0.24200142920017242, "learning_rate": 3.5018032333022987e-06, "loss": 0.0095, "step": 75460 }, { "epoch": 1.2745296720370183, "grad_norm": 0.24000561237335205, "learning_rate": 3.5003972635639227e-06, "loss": 0.0084, "step": 75470 }, { "epoch": 1.2746985510183402, "grad_norm": 0.176400825381279, "learning_rate": 3.498991424107357e-06, "loss": 0.004, "step": 75480 }, { "epoch": 1.2748674299996623, "grad_norm": 0.2877437472343445, "learning_rate": 3.497585715054739e-06, "loss": 0.0078, "step": 75490 }, { "epoch": 1.2750363089809842, "grad_norm": 0.15094074606895447, "learning_rate": 3.4961801365281937e-06, "loss": 0.008, "step": 75500 }, { "epoch": 1.2752051879623063, "grad_norm": 0.1860654354095459, "learning_rate": 3.494774688649832e-06, "loss": 0.0069, "step": 75510 }, { "epoch": 1.2753740669436282, "grad_norm": 0.2987332344055176, "learning_rate": 3.493369371541756e-06, "loss": 0.0114, "step": 75520 }, { "epoch": 1.27554294592495, "grad_norm": 0.38204216957092285, "learning_rate": 3.4919641853260583e-06, "loss": 0.0064, "step": 75530 }, { "epoch": 1.2757118249062722, "grad_norm": 0.2379399538040161, "learning_rate": 3.4905591301248178e-06, "loss": 0.0068, "step": 75540 }, { "epoch": 1.2758807038875941, "grad_norm": 0.23044875264167786, "learning_rate": 3.4891542060600996e-06, "loss": 0.0093, "step": 75550 }, { "epoch": 1.2760495828689162, "grad_norm": 0.20040163397789001, "learning_rate": 3.4877494132539626e-06, "loss": 0.0074, "step": 75560 }, { "epoch": 1.2762184618502381, "grad_norm": 0.24217504262924194, "learning_rate": 3.4863447518284505e-06, "loss": 0.0088, "step": 75570 }, { "epoch": 1.27638734083156, "grad_norm": 0.3156968653202057, "learning_rate": 3.4849402219055985e-06, "loss": 0.0071, "step": 75580 }, { "epoch": 1.2765562198128821, "grad_norm": 0.11169972270727158, "learning_rate": 3.4835358236074266e-06, "loss": 0.0066, "step": 75590 }, { "epoch": 1.276725098794204, "grad_norm": 0.2128584235906601, "learning_rate": 3.4821315570559466e-06, "loss": 0.0072, "step": 75600 }, { "epoch": 1.2768939777755262, "grad_norm": 0.3470707833766937, "learning_rate": 3.4807274223731575e-06, "loss": 0.0128, "step": 75610 }, { "epoch": 1.277062856756848, "grad_norm": 0.24959179759025574, "learning_rate": 3.4793234196810486e-06, "loss": 0.0076, "step": 75620 }, { "epoch": 1.27723173573817, "grad_norm": 0.43305516242980957, "learning_rate": 3.477919549101594e-06, "loss": 0.0084, "step": 75630 }, { "epoch": 1.277400614719492, "grad_norm": 0.38319873809814453, "learning_rate": 3.4765158107567583e-06, "loss": 0.0092, "step": 75640 }, { "epoch": 1.277569493700814, "grad_norm": 0.22737447917461395, "learning_rate": 3.475112204768496e-06, "loss": 0.0054, "step": 75650 }, { "epoch": 1.277738372682136, "grad_norm": 0.5364413857460022, "learning_rate": 3.4737087312587502e-06, "loss": 0.0072, "step": 75660 }, { "epoch": 1.277907251663458, "grad_norm": 0.2860819399356842, "learning_rate": 3.4723053903494486e-06, "loss": 0.0075, "step": 75670 }, { "epoch": 1.2780761306447799, "grad_norm": 0.5203148722648621, "learning_rate": 3.470902182162511e-06, "loss": 0.0106, "step": 75680 }, { "epoch": 1.278245009626102, "grad_norm": 0.20560018718242645, "learning_rate": 3.469499106819845e-06, "loss": 0.0066, "step": 75690 }, { "epoch": 1.2784138886074239, "grad_norm": 0.28353482484817505, "learning_rate": 3.4680961644433476e-06, "loss": 0.0087, "step": 75700 }, { "epoch": 1.278582767588746, "grad_norm": 0.337650328874588, "learning_rate": 3.466693355154901e-06, "loss": 0.0058, "step": 75710 }, { "epoch": 1.278751646570068, "grad_norm": 0.27772828936576843, "learning_rate": 3.4652906790763772e-06, "loss": 0.0104, "step": 75720 }, { "epoch": 1.2789205255513898, "grad_norm": 0.11472299695014954, "learning_rate": 3.4638881363296395e-06, "loss": 0.005, "step": 75730 }, { "epoch": 1.279089404532712, "grad_norm": 0.44906091690063477, "learning_rate": 3.4624857270365362e-06, "loss": 0.0079, "step": 75740 }, { "epoch": 1.2792582835140338, "grad_norm": 0.12250755727291107, "learning_rate": 3.4610834513189083e-06, "loss": 0.0062, "step": 75750 }, { "epoch": 1.279427162495356, "grad_norm": 0.1814729869365692, "learning_rate": 3.459681309298577e-06, "loss": 0.0102, "step": 75760 }, { "epoch": 1.2795960414766778, "grad_norm": 0.20197820663452148, "learning_rate": 3.4582793010973577e-06, "loss": 0.0073, "step": 75770 }, { "epoch": 1.2797649204579997, "grad_norm": 0.18632732331752777, "learning_rate": 3.4568774268370557e-06, "loss": 0.0085, "step": 75780 }, { "epoch": 1.2799337994393218, "grad_norm": 0.19109079241752625, "learning_rate": 3.4554756866394644e-06, "loss": 0.0048, "step": 75790 }, { "epoch": 1.2801026784206437, "grad_norm": 0.15802207589149475, "learning_rate": 3.4540740806263585e-06, "loss": 0.0067, "step": 75800 }, { "epoch": 1.2802715574019659, "grad_norm": 0.45784303545951843, "learning_rate": 3.4526726089195078e-06, "loss": 0.0061, "step": 75810 }, { "epoch": 1.2804404363832878, "grad_norm": 0.2682691812515259, "learning_rate": 3.4512712716406694e-06, "loss": 0.0089, "step": 75820 }, { "epoch": 1.2806093153646096, "grad_norm": 0.17585457861423492, "learning_rate": 3.449870068911589e-06, "loss": 0.0081, "step": 75830 }, { "epoch": 1.2807781943459318, "grad_norm": 0.24483714997768402, "learning_rate": 3.448469000853999e-06, "loss": 0.0074, "step": 75840 }, { "epoch": 1.2809470733272537, "grad_norm": 0.17757035791873932, "learning_rate": 3.4470680675896182e-06, "loss": 0.0064, "step": 75850 }, { "epoch": 1.2811159523085758, "grad_norm": 0.18366625905036926, "learning_rate": 3.4456672692401606e-06, "loss": 0.0104, "step": 75860 }, { "epoch": 1.2812848312898977, "grad_norm": 0.261635422706604, "learning_rate": 3.444266605927322e-06, "loss": 0.0067, "step": 75870 }, { "epoch": 1.2814537102712196, "grad_norm": 0.09060463309288025, "learning_rate": 3.4428660777727877e-06, "loss": 0.0117, "step": 75880 }, { "epoch": 1.2816225892525417, "grad_norm": 0.11781081557273865, "learning_rate": 3.441465684898232e-06, "loss": 0.007, "step": 75890 }, { "epoch": 1.2817914682338636, "grad_norm": 0.23328343033790588, "learning_rate": 3.4400654274253185e-06, "loss": 0.0052, "step": 75900 }, { "epoch": 1.2819603472151857, "grad_norm": 0.15631213784217834, "learning_rate": 3.4386653054757014e-06, "loss": 0.007, "step": 75910 }, { "epoch": 1.2821292261965076, "grad_norm": 0.4013758897781372, "learning_rate": 3.437265319171015e-06, "loss": 0.0082, "step": 75920 }, { "epoch": 1.2822981051778295, "grad_norm": 0.28723928332328796, "learning_rate": 3.4358654686328874e-06, "loss": 0.007, "step": 75930 }, { "epoch": 1.2824669841591516, "grad_norm": 0.33214032649993896, "learning_rate": 3.4344657539829352e-06, "loss": 0.0081, "step": 75940 }, { "epoch": 1.2826358631404735, "grad_norm": 0.1823393851518631, "learning_rate": 3.4330661753427633e-06, "loss": 0.0112, "step": 75950 }, { "epoch": 1.2828047421217956, "grad_norm": 0.28283625841140747, "learning_rate": 3.431666732833962e-06, "loss": 0.0079, "step": 75960 }, { "epoch": 1.2829736211031175, "grad_norm": 0.13517490029335022, "learning_rate": 3.4302674265781116e-06, "loss": 0.0078, "step": 75970 }, { "epoch": 1.2831425000844394, "grad_norm": 0.38377097249031067, "learning_rate": 3.4288682566967807e-06, "loss": 0.0096, "step": 75980 }, { "epoch": 1.2833113790657615, "grad_norm": 0.27927538752555847, "learning_rate": 3.427469223311528e-06, "loss": 0.0075, "step": 75990 }, { "epoch": 1.2834802580470834, "grad_norm": 0.16138030588626862, "learning_rate": 3.4260703265438937e-06, "loss": 0.0073, "step": 76000 }, { "epoch": 1.2836491370284056, "grad_norm": 0.20143862068653107, "learning_rate": 3.424671566515412e-06, "loss": 0.0058, "step": 76010 }, { "epoch": 1.2838180160097274, "grad_norm": 0.32463333010673523, "learning_rate": 3.4232729433476055e-06, "loss": 0.0084, "step": 76020 }, { "epoch": 1.2839868949910493, "grad_norm": 0.41062068939208984, "learning_rate": 3.4218744571619834e-06, "loss": 0.0065, "step": 76030 }, { "epoch": 1.2841557739723715, "grad_norm": 0.09260699898004532, "learning_rate": 3.42047610808004e-06, "loss": 0.008, "step": 76040 }, { "epoch": 1.2843246529536934, "grad_norm": 0.3950791358947754, "learning_rate": 3.4190778962232617e-06, "loss": 0.008, "step": 76050 }, { "epoch": 1.2844935319350155, "grad_norm": 0.4547955095767975, "learning_rate": 3.417679821713122e-06, "loss": 0.0061, "step": 76060 }, { "epoch": 1.2846624109163374, "grad_norm": 0.2660565674304962, "learning_rate": 3.416281884671083e-06, "loss": 0.0115, "step": 76070 }, { "epoch": 1.2848312898976593, "grad_norm": 0.6099165678024292, "learning_rate": 3.414884085218592e-06, "loss": 0.0105, "step": 76080 }, { "epoch": 1.2850001688789814, "grad_norm": 0.2018381804227829, "learning_rate": 3.4134864234770883e-06, "loss": 0.0076, "step": 76090 }, { "epoch": 1.2851690478603033, "grad_norm": 0.2952708303928375, "learning_rate": 3.4120888995679957e-06, "loss": 0.0078, "step": 76100 }, { "epoch": 1.2853379268416254, "grad_norm": 0.06053813919425011, "learning_rate": 3.410691513612731e-06, "loss": 0.0081, "step": 76110 }, { "epoch": 1.2855068058229473, "grad_norm": 0.17342892289161682, "learning_rate": 3.40929426573269e-06, "loss": 0.0062, "step": 76120 }, { "epoch": 1.2856756848042692, "grad_norm": 0.20802193880081177, "learning_rate": 3.4078971560492667e-06, "loss": 0.0072, "step": 76130 }, { "epoch": 1.2858445637855913, "grad_norm": 0.19221903383731842, "learning_rate": 3.406500184683837e-06, "loss": 0.0044, "step": 76140 }, { "epoch": 1.2860134427669132, "grad_norm": 0.12134389579296112, "learning_rate": 3.4051033517577686e-06, "loss": 0.0055, "step": 76150 }, { "epoch": 1.2861823217482353, "grad_norm": 0.12303811311721802, "learning_rate": 3.403706657392411e-06, "loss": 0.005, "step": 76160 }, { "epoch": 1.2863512007295572, "grad_norm": 0.1943042129278183, "learning_rate": 3.4023101017091077e-06, "loss": 0.0081, "step": 76170 }, { "epoch": 1.2865200797108791, "grad_norm": 0.25757312774658203, "learning_rate": 3.400913684829187e-06, "loss": 0.0076, "step": 76180 }, { "epoch": 1.2866889586922012, "grad_norm": 0.04292551800608635, "learning_rate": 3.399517406873968e-06, "loss": 0.0064, "step": 76190 }, { "epoch": 1.2868578376735231, "grad_norm": 0.23412638902664185, "learning_rate": 3.398121267964755e-06, "loss": 0.0125, "step": 76200 }, { "epoch": 1.2870267166548452, "grad_norm": 0.4448637068271637, "learning_rate": 3.3967252682228406e-06, "loss": 0.0054, "step": 76210 }, { "epoch": 1.2871955956361671, "grad_norm": 0.31751012802124023, "learning_rate": 3.3953294077695065e-06, "loss": 0.0074, "step": 76220 }, { "epoch": 1.287364474617489, "grad_norm": 0.16605499386787415, "learning_rate": 3.393933686726021e-06, "loss": 0.0061, "step": 76230 }, { "epoch": 1.2875333535988112, "grad_norm": 0.16709193587303162, "learning_rate": 3.392538105213642e-06, "loss": 0.007, "step": 76240 }, { "epoch": 1.287702232580133, "grad_norm": 0.45141229033470154, "learning_rate": 3.3911426633536133e-06, "loss": 0.0071, "step": 76250 }, { "epoch": 1.2878711115614552, "grad_norm": 0.2615332007408142, "learning_rate": 3.389747361267167e-06, "loss": 0.0075, "step": 76260 }, { "epoch": 1.288039990542777, "grad_norm": 0.06484338641166687, "learning_rate": 3.388352199075525e-06, "loss": 0.0091, "step": 76270 }, { "epoch": 1.288208869524099, "grad_norm": 0.24299710988998413, "learning_rate": 3.386957176899896e-06, "loss": 0.0076, "step": 76280 }, { "epoch": 1.288377748505421, "grad_norm": 0.1848417967557907, "learning_rate": 3.3855622948614732e-06, "loss": 0.0074, "step": 76290 }, { "epoch": 1.288546627486743, "grad_norm": 0.49629688262939453, "learning_rate": 3.3841675530814423e-06, "loss": 0.0082, "step": 76300 }, { "epoch": 1.288715506468065, "grad_norm": 0.11043436080217361, "learning_rate": 3.382772951680975e-06, "loss": 0.0103, "step": 76310 }, { "epoch": 1.288884385449387, "grad_norm": 0.20306357741355896, "learning_rate": 3.381378490781232e-06, "loss": 0.0108, "step": 76320 }, { "epoch": 1.289053264430709, "grad_norm": 0.09997536987066269, "learning_rate": 3.379984170503359e-06, "loss": 0.0064, "step": 76330 }, { "epoch": 1.289222143412031, "grad_norm": 0.3597079813480377, "learning_rate": 3.3785899909684903e-06, "loss": 0.0067, "step": 76340 }, { "epoch": 1.289391022393353, "grad_norm": 0.11882035434246063, "learning_rate": 3.3771959522977505e-06, "loss": 0.005, "step": 76350 }, { "epoch": 1.289559901374675, "grad_norm": 0.24651101231575012, "learning_rate": 3.3758020546122515e-06, "loss": 0.0058, "step": 76360 }, { "epoch": 1.289728780355997, "grad_norm": 0.21340034902095795, "learning_rate": 3.374408298033089e-06, "loss": 0.0074, "step": 76370 }, { "epoch": 1.2898976593373188, "grad_norm": 0.204790860414505, "learning_rate": 3.3730146826813497e-06, "loss": 0.0057, "step": 76380 }, { "epoch": 1.290066538318641, "grad_norm": 0.3942287862300873, "learning_rate": 3.3716212086781086e-06, "loss": 0.0108, "step": 76390 }, { "epoch": 1.2902354172999628, "grad_norm": 0.21153530478477478, "learning_rate": 3.3702278761444283e-06, "loss": 0.0128, "step": 76400 }, { "epoch": 1.290404296281285, "grad_norm": 0.20528532564640045, "learning_rate": 3.368834685201355e-06, "loss": 0.0099, "step": 76410 }, { "epoch": 1.2905731752626068, "grad_norm": 0.1571851670742035, "learning_rate": 3.367441635969927e-06, "loss": 0.0062, "step": 76420 }, { "epoch": 1.2907420542439287, "grad_norm": 0.14839041233062744, "learning_rate": 3.3660487285711698e-06, "loss": 0.0062, "step": 76430 }, { "epoch": 1.2909109332252509, "grad_norm": 0.2823587954044342, "learning_rate": 3.3646559631260954e-06, "loss": 0.0105, "step": 76440 }, { "epoch": 1.2910798122065728, "grad_norm": 0.3313886821269989, "learning_rate": 3.3632633397557035e-06, "loss": 0.0114, "step": 76450 }, { "epoch": 1.2912486911878949, "grad_norm": 0.1651046723127365, "learning_rate": 3.361870858580981e-06, "loss": 0.0073, "step": 76460 }, { "epoch": 1.2914175701692168, "grad_norm": 0.356820285320282, "learning_rate": 3.360478519722905e-06, "loss": 0.0079, "step": 76470 }, { "epoch": 1.2915864491505387, "grad_norm": 0.270885705947876, "learning_rate": 3.3590863233024397e-06, "loss": 0.014, "step": 76480 }, { "epoch": 1.2917553281318608, "grad_norm": 0.1713687926530838, "learning_rate": 3.357694269440531e-06, "loss": 0.0047, "step": 76490 }, { "epoch": 1.2919242071131827, "grad_norm": 0.30697470903396606, "learning_rate": 3.3563023582581213e-06, "loss": 0.0131, "step": 76500 }, { "epoch": 1.2920930860945048, "grad_norm": 0.2715558409690857, "learning_rate": 3.354910589876134e-06, "loss": 0.0086, "step": 76510 }, { "epoch": 1.2922619650758267, "grad_norm": 0.13072945177555084, "learning_rate": 3.3535189644154865e-06, "loss": 0.0107, "step": 76520 }, { "epoch": 1.2924308440571486, "grad_norm": 0.14055050909519196, "learning_rate": 3.352127481997074e-06, "loss": 0.0066, "step": 76530 }, { "epoch": 1.2925997230384707, "grad_norm": 0.35599201917648315, "learning_rate": 3.3507361427417882e-06, "loss": 0.0073, "step": 76540 }, { "epoch": 1.2927686020197926, "grad_norm": 0.2884027361869812, "learning_rate": 3.3493449467705053e-06, "loss": 0.0078, "step": 76550 }, { "epoch": 1.2929374810011147, "grad_norm": 0.2645648717880249, "learning_rate": 3.3479538942040894e-06, "loss": 0.008, "step": 76560 }, { "epoch": 1.2931063599824366, "grad_norm": 0.20062515139579773, "learning_rate": 3.3465629851633907e-06, "loss": 0.0082, "step": 76570 }, { "epoch": 1.2932752389637585, "grad_norm": 0.20254433155059814, "learning_rate": 3.3451722197692477e-06, "loss": 0.0056, "step": 76580 }, { "epoch": 1.2934441179450806, "grad_norm": 0.5189048051834106, "learning_rate": 3.343781598142487e-06, "loss": 0.0046, "step": 76590 }, { "epoch": 1.2936129969264025, "grad_norm": 0.38620609045028687, "learning_rate": 3.3423911204039245e-06, "loss": 0.0065, "step": 76600 }, { "epoch": 1.2937818759077246, "grad_norm": 0.20549604296684265, "learning_rate": 3.341000786674358e-06, "loss": 0.0061, "step": 76610 }, { "epoch": 1.2939507548890465, "grad_norm": 0.30846869945526123, "learning_rate": 3.3396105970745783e-06, "loss": 0.0065, "step": 76620 }, { "epoch": 1.2941196338703684, "grad_norm": 0.12589919567108154, "learning_rate": 3.338220551725362e-06, "loss": 0.0053, "step": 76630 }, { "epoch": 1.2942885128516906, "grad_norm": 0.2225256860256195, "learning_rate": 3.3368306507474714e-06, "loss": 0.0064, "step": 76640 }, { "epoch": 1.2944573918330124, "grad_norm": 0.11248483508825302, "learning_rate": 3.33544089426166e-06, "loss": 0.0068, "step": 76650 }, { "epoch": 1.2946262708143346, "grad_norm": 0.2322070449590683, "learning_rate": 3.334051282388664e-06, "loss": 0.0068, "step": 76660 }, { "epoch": 1.2947951497956565, "grad_norm": 0.22254173457622528, "learning_rate": 3.3326618152492098e-06, "loss": 0.0076, "step": 76670 }, { "epoch": 1.2949640287769784, "grad_norm": 0.1084233745932579, "learning_rate": 3.3312724929640107e-06, "loss": 0.0045, "step": 76680 }, { "epoch": 1.2951329077583005, "grad_norm": 0.23281224071979523, "learning_rate": 3.329883315653771e-06, "loss": 0.0046, "step": 76690 }, { "epoch": 1.2953017867396224, "grad_norm": 0.34757763147354126, "learning_rate": 3.328494283439175e-06, "loss": 0.0153, "step": 76700 }, { "epoch": 1.2954706657209445, "grad_norm": 0.30541756749153137, "learning_rate": 3.3271053964408997e-06, "loss": 0.0045, "step": 76710 }, { "epoch": 1.2956395447022664, "grad_norm": 0.2066415250301361, "learning_rate": 3.3257166547796077e-06, "loss": 0.0068, "step": 76720 }, { "epoch": 1.2958084236835883, "grad_norm": 0.13935980200767517, "learning_rate": 3.324328058575952e-06, "loss": 0.009, "step": 76730 }, { "epoch": 1.2959773026649104, "grad_norm": 0.1379367560148239, "learning_rate": 3.3229396079505672e-06, "loss": 0.0105, "step": 76740 }, { "epoch": 1.2961461816462323, "grad_norm": 0.2386491745710373, "learning_rate": 3.321551303024079e-06, "loss": 0.0053, "step": 76750 }, { "epoch": 1.2963150606275544, "grad_norm": 0.18386636674404144, "learning_rate": 3.3201631439171023e-06, "loss": 0.0066, "step": 76760 }, { "epoch": 1.2964839396088763, "grad_norm": 0.18002255260944366, "learning_rate": 3.3187751307502362e-06, "loss": 0.0059, "step": 76770 }, { "epoch": 1.2966528185901982, "grad_norm": 0.31730014085769653, "learning_rate": 3.317387263644066e-06, "loss": 0.0081, "step": 76780 }, { "epoch": 1.2968216975715203, "grad_norm": 0.341464102268219, "learning_rate": 3.3159995427191665e-06, "loss": 0.0096, "step": 76790 }, { "epoch": 1.2969905765528422, "grad_norm": 0.07813757658004761, "learning_rate": 3.3146119680961e-06, "loss": 0.0106, "step": 76800 }, { "epoch": 1.2971594555341643, "grad_norm": 0.25039729475975037, "learning_rate": 3.313224539895418e-06, "loss": 0.0071, "step": 76810 }, { "epoch": 1.2973283345154862, "grad_norm": 0.3279843032360077, "learning_rate": 3.3118372582376534e-06, "loss": 0.012, "step": 76820 }, { "epoch": 1.2974972134968081, "grad_norm": 0.22937914729118347, "learning_rate": 3.3104501232433313e-06, "loss": 0.0074, "step": 76830 }, { "epoch": 1.2976660924781303, "grad_norm": 0.1913721263408661, "learning_rate": 3.3090631350329623e-06, "loss": 0.0085, "step": 76840 }, { "epoch": 1.2978349714594521, "grad_norm": 0.09939918667078018, "learning_rate": 3.3076762937270456e-06, "loss": 0.0097, "step": 76850 }, { "epoch": 1.2980038504407743, "grad_norm": 0.2367565780878067, "learning_rate": 3.3062895994460646e-06, "loss": 0.0099, "step": 76860 }, { "epoch": 1.2981727294220962, "grad_norm": 0.32620295882225037, "learning_rate": 3.304903052310493e-06, "loss": 0.0077, "step": 76870 }, { "epoch": 1.298341608403418, "grad_norm": 0.2545156478881836, "learning_rate": 3.303516652440791e-06, "loss": 0.0078, "step": 76880 }, { "epoch": 1.2985104873847402, "grad_norm": 0.44648683071136475, "learning_rate": 3.302130399957407e-06, "loss": 0.0085, "step": 76890 }, { "epoch": 1.298679366366062, "grad_norm": 0.2971574068069458, "learning_rate": 3.300744294980771e-06, "loss": 0.0072, "step": 76900 }, { "epoch": 1.2988482453473842, "grad_norm": 0.251748263835907, "learning_rate": 3.2993583376313064e-06, "loss": 0.0087, "step": 76910 }, { "epoch": 1.299017124328706, "grad_norm": 0.1539025455713272, "learning_rate": 3.297972528029423e-06, "loss": 0.0052, "step": 76920 }, { "epoch": 1.299186003310028, "grad_norm": 0.15377838909626007, "learning_rate": 3.2965868662955174e-06, "loss": 0.0061, "step": 76930 }, { "epoch": 1.29935488229135, "grad_norm": 0.27053841948509216, "learning_rate": 3.2952013525499692e-06, "loss": 0.01, "step": 76940 }, { "epoch": 1.299523761272672, "grad_norm": 0.26223447918891907, "learning_rate": 3.29381598691315e-06, "loss": 0.0085, "step": 76950 }, { "epoch": 1.2996926402539941, "grad_norm": 0.1332259625196457, "learning_rate": 3.292430769505417e-06, "loss": 0.0051, "step": 76960 }, { "epoch": 1.299861519235316, "grad_norm": 0.24239392578601837, "learning_rate": 3.291045700447116e-06, "loss": 0.0063, "step": 76970 }, { "epoch": 1.300030398216638, "grad_norm": 0.2595266103744507, "learning_rate": 3.2896607798585756e-06, "loss": 0.0083, "step": 76980 }, { "epoch": 1.30019927719796, "grad_norm": 0.45457786321640015, "learning_rate": 3.288276007860116e-06, "loss": 0.0066, "step": 76990 }, { "epoch": 1.300368156179282, "grad_norm": 0.1857648491859436, "learning_rate": 3.286891384572042e-06, "loss": 0.0086, "step": 77000 }, { "epoch": 1.300537035160604, "grad_norm": 0.20266593992710114, "learning_rate": 3.2855069101146493e-06, "loss": 0.0074, "step": 77010 }, { "epoch": 1.300705914141926, "grad_norm": 0.317097008228302, "learning_rate": 3.2841225846082126e-06, "loss": 0.0094, "step": 77020 }, { "epoch": 1.3008747931232478, "grad_norm": 0.2016058713197708, "learning_rate": 3.282738408173001e-06, "loss": 0.0116, "step": 77030 }, { "epoch": 1.30104367210457, "grad_norm": 0.12154050916433334, "learning_rate": 3.281354380929268e-06, "loss": 0.0094, "step": 77040 }, { "epoch": 1.3012125510858918, "grad_norm": 0.2572433650493622, "learning_rate": 3.2799705029972583e-06, "loss": 0.007, "step": 77050 }, { "epoch": 1.301381430067214, "grad_norm": 0.12950026988983154, "learning_rate": 3.2785867744971943e-06, "loss": 0.0064, "step": 77060 }, { "epoch": 1.3015503090485359, "grad_norm": 0.24239006638526917, "learning_rate": 3.2772031955492932e-06, "loss": 0.01, "step": 77070 }, { "epoch": 1.3017191880298578, "grad_norm": 0.19370684027671814, "learning_rate": 3.2758197662737568e-06, "loss": 0.0048, "step": 77080 }, { "epoch": 1.3018880670111797, "grad_norm": 0.20251180231571198, "learning_rate": 3.2744364867907747e-06, "loss": 0.0064, "step": 77090 }, { "epoch": 1.3020569459925018, "grad_norm": 0.2704416811466217, "learning_rate": 3.273053357220523e-06, "loss": 0.0109, "step": 77100 }, { "epoch": 1.3022258249738239, "grad_norm": 0.4133005440235138, "learning_rate": 3.2716703776831634e-06, "loss": 0.0084, "step": 77110 }, { "epoch": 1.3023947039551458, "grad_norm": 0.3399215042591095, "learning_rate": 3.2702875482988455e-06, "loss": 0.0108, "step": 77120 }, { "epoch": 1.3025635829364677, "grad_norm": 0.3306865692138672, "learning_rate": 3.268904869187708e-06, "loss": 0.0068, "step": 77130 }, { "epoch": 1.3027324619177896, "grad_norm": 0.37932655215263367, "learning_rate": 3.2675223404698735e-06, "loss": 0.0084, "step": 77140 }, { "epoch": 1.3029013408991117, "grad_norm": 0.25528568029403687, "learning_rate": 3.2661399622654524e-06, "loss": 0.0103, "step": 77150 }, { "epoch": 1.3030702198804338, "grad_norm": 0.23565642535686493, "learning_rate": 3.2647577346945423e-06, "loss": 0.0095, "step": 77160 }, { "epoch": 1.3032390988617557, "grad_norm": 0.08010946959257126, "learning_rate": 3.263375657877229e-06, "loss": 0.0081, "step": 77170 }, { "epoch": 1.3034079778430776, "grad_norm": 0.039444006979465485, "learning_rate": 3.261993731933585e-06, "loss": 0.0054, "step": 77180 }, { "epoch": 1.3035768568243995, "grad_norm": 0.262728214263916, "learning_rate": 3.260611956983665e-06, "loss": 0.0128, "step": 77190 }, { "epoch": 1.3037457358057216, "grad_norm": 0.2592155337333679, "learning_rate": 3.259230333147515e-06, "loss": 0.0049, "step": 77200 }, { "epoch": 1.3039146147870437, "grad_norm": 0.14949461817741394, "learning_rate": 3.257848860545169e-06, "loss": 0.0088, "step": 77210 }, { "epoch": 1.3040834937683656, "grad_norm": 0.22397568821907043, "learning_rate": 3.256467539296646e-06, "loss": 0.0064, "step": 77220 }, { "epoch": 1.3042523727496875, "grad_norm": 0.09794653952121735, "learning_rate": 3.2550863695219497e-06, "loss": 0.0104, "step": 77230 }, { "epoch": 1.3044212517310094, "grad_norm": 0.18000316619873047, "learning_rate": 3.2537053513410742e-06, "loss": 0.008, "step": 77240 }, { "epoch": 1.3045901307123315, "grad_norm": 0.23705582320690155, "learning_rate": 3.2523244848739983e-06, "loss": 0.0116, "step": 77250 }, { "epoch": 1.3047590096936537, "grad_norm": 0.16159893572330475, "learning_rate": 3.2509437702406897e-06, "loss": 0.0118, "step": 77260 }, { "epoch": 1.3049278886749756, "grad_norm": 0.1169959232211113, "learning_rate": 3.2495632075610994e-06, "loss": 0.0063, "step": 77270 }, { "epoch": 1.3050967676562975, "grad_norm": 0.3458990454673767, "learning_rate": 3.2481827969551693e-06, "loss": 0.0108, "step": 77280 }, { "epoch": 1.3052656466376193, "grad_norm": 0.1558685600757599, "learning_rate": 3.246802538542824e-06, "loss": 0.0059, "step": 77290 }, { "epoch": 1.3054345256189415, "grad_norm": 0.17494109272956848, "learning_rate": 3.2454224324439807e-06, "loss": 0.0044, "step": 77300 }, { "epoch": 1.3056034046002636, "grad_norm": 0.321817547082901, "learning_rate": 3.244042478778535e-06, "loss": 0.0115, "step": 77310 }, { "epoch": 1.3057722835815855, "grad_norm": 0.24669042229652405, "learning_rate": 3.242662677666376e-06, "loss": 0.0068, "step": 77320 }, { "epoch": 1.3059411625629074, "grad_norm": 0.21770739555358887, "learning_rate": 3.241283029227378e-06, "loss": 0.0075, "step": 77330 }, { "epoch": 1.3061100415442293, "grad_norm": 0.06303948163986206, "learning_rate": 3.239903533581401e-06, "loss": 0.006, "step": 77340 }, { "epoch": 1.3062789205255514, "grad_norm": 0.41740378737449646, "learning_rate": 3.2385241908482923e-06, "loss": 0.0099, "step": 77350 }, { "epoch": 1.3064477995068735, "grad_norm": 0.19805829226970673, "learning_rate": 3.2371450011478843e-06, "loss": 0.0081, "step": 77360 }, { "epoch": 1.3066166784881954, "grad_norm": 0.2758050858974457, "learning_rate": 3.2357659645999994e-06, "loss": 0.0062, "step": 77370 }, { "epoch": 1.3067855574695173, "grad_norm": 0.26291465759277344, "learning_rate": 3.234387081324446e-06, "loss": 0.006, "step": 77380 }, { "epoch": 1.3069544364508392, "grad_norm": 0.3794949948787689, "learning_rate": 3.233008351441015e-06, "loss": 0.0095, "step": 77390 }, { "epoch": 1.3071233154321613, "grad_norm": 0.28105872869491577, "learning_rate": 3.2316297750694887e-06, "loss": 0.0117, "step": 77400 }, { "epoch": 1.3072921944134834, "grad_norm": 0.4572855234146118, "learning_rate": 3.2302513523296342e-06, "loss": 0.0075, "step": 77410 }, { "epoch": 1.3074610733948053, "grad_norm": 0.21550501883029938, "learning_rate": 3.2288730833412086e-06, "loss": 0.0073, "step": 77420 }, { "epoch": 1.3076299523761272, "grad_norm": 0.32721149921417236, "learning_rate": 3.2274949682239466e-06, "loss": 0.0094, "step": 77430 }, { "epoch": 1.3077988313574491, "grad_norm": 0.28906506299972534, "learning_rate": 3.226117007097579e-06, "loss": 0.0085, "step": 77440 }, { "epoch": 1.3079677103387712, "grad_norm": 0.24290132522583008, "learning_rate": 3.2247392000818184e-06, "loss": 0.0041, "step": 77450 }, { "epoch": 1.3081365893200934, "grad_norm": 0.22008033096790314, "learning_rate": 3.2233615472963674e-06, "loss": 0.0095, "step": 77460 }, { "epoch": 1.3083054683014153, "grad_norm": 0.8616555333137512, "learning_rate": 3.221984048860911e-06, "loss": 0.0104, "step": 77470 }, { "epoch": 1.3084743472827371, "grad_norm": 0.4175812005996704, "learning_rate": 3.220606704895124e-06, "loss": 0.0094, "step": 77480 }, { "epoch": 1.308643226264059, "grad_norm": 0.2738248407840729, "learning_rate": 3.2192295155186654e-06, "loss": 0.0067, "step": 77490 }, { "epoch": 1.3088121052453812, "grad_norm": 0.30116766691207886, "learning_rate": 3.217852480851185e-06, "loss": 0.0065, "step": 77500 }, { "epoch": 1.3089809842267033, "grad_norm": 0.12062279880046844, "learning_rate": 3.216475601012312e-06, "loss": 0.0095, "step": 77510 }, { "epoch": 1.3091498632080252, "grad_norm": 0.6143406629562378, "learning_rate": 3.2150988761216683e-06, "loss": 0.0095, "step": 77520 }, { "epoch": 1.309318742189347, "grad_norm": 0.09921646863222122, "learning_rate": 3.213722306298862e-06, "loss": 0.0051, "step": 77530 }, { "epoch": 1.309487621170669, "grad_norm": 1.1012955904006958, "learning_rate": 3.2123458916634837e-06, "loss": 0.01, "step": 77540 }, { "epoch": 1.309656500151991, "grad_norm": 0.3775148391723633, "learning_rate": 3.2109696323351157e-06, "loss": 0.0066, "step": 77550 }, { "epoch": 1.3098253791333132, "grad_norm": 0.15610173344612122, "learning_rate": 3.2095935284333213e-06, "loss": 0.0078, "step": 77560 }, { "epoch": 1.309994258114635, "grad_norm": 0.27522435784339905, "learning_rate": 3.208217580077653e-06, "loss": 0.0088, "step": 77570 }, { "epoch": 1.310163137095957, "grad_norm": 0.14150865375995636, "learning_rate": 3.206841787387651e-06, "loss": 0.0049, "step": 77580 }, { "epoch": 1.310332016077279, "grad_norm": 0.39948317408561707, "learning_rate": 3.205466150482842e-06, "loss": 0.0103, "step": 77590 }, { "epoch": 1.310500895058601, "grad_norm": 0.3529495298862457, "learning_rate": 3.204090669482735e-06, "loss": 0.0051, "step": 77600 }, { "epoch": 1.310669774039923, "grad_norm": 0.09750164300203323, "learning_rate": 3.2027153445068302e-06, "loss": 0.0045, "step": 77610 }, { "epoch": 1.310838653021245, "grad_norm": 0.18024390935897827, "learning_rate": 3.2013401756746125e-06, "loss": 0.0056, "step": 77620 }, { "epoch": 1.311007532002567, "grad_norm": 0.20823343098163605, "learning_rate": 3.199965163105554e-06, "loss": 0.0081, "step": 77630 }, { "epoch": 1.3111764109838888, "grad_norm": 0.058046456426382065, "learning_rate": 3.1985903069191103e-06, "loss": 0.0069, "step": 77640 }, { "epoch": 1.311345289965211, "grad_norm": 1.026665210723877, "learning_rate": 3.197215607234726e-06, "loss": 0.008, "step": 77650 }, { "epoch": 1.3115141689465328, "grad_norm": 0.09148981422185898, "learning_rate": 3.1958410641718328e-06, "loss": 0.005, "step": 77660 }, { "epoch": 1.311683047927855, "grad_norm": 0.25739338994026184, "learning_rate": 3.194466677849849e-06, "loss": 0.0054, "step": 77670 }, { "epoch": 1.3118519269091768, "grad_norm": 0.27412208914756775, "learning_rate": 3.193092448388174e-06, "loss": 0.0085, "step": 77680 }, { "epoch": 1.3120208058904987, "grad_norm": 0.25598034262657166, "learning_rate": 3.1917183759062e-06, "loss": 0.0065, "step": 77690 }, { "epoch": 1.3121896848718209, "grad_norm": 0.16823787987232208, "learning_rate": 3.1903444605233004e-06, "loss": 0.0068, "step": 77700 }, { "epoch": 1.3123585638531428, "grad_norm": 0.44798731803894043, "learning_rate": 3.1889707023588435e-06, "loss": 0.0068, "step": 77710 }, { "epoch": 1.3125274428344649, "grad_norm": 0.23699355125427246, "learning_rate": 3.1875971015321723e-06, "loss": 0.0056, "step": 77720 }, { "epoch": 1.3126963218157868, "grad_norm": 0.3091232478618622, "learning_rate": 3.1862236581626237e-06, "loss": 0.0078, "step": 77730 }, { "epoch": 1.3128652007971087, "grad_norm": 0.26882895827293396, "learning_rate": 3.184850372369519e-06, "loss": 0.0078, "step": 77740 }, { "epoch": 1.3130340797784308, "grad_norm": 0.12825192511081696, "learning_rate": 3.1834772442721675e-06, "loss": 0.008, "step": 77750 }, { "epoch": 1.3132029587597527, "grad_norm": 0.2650512456893921, "learning_rate": 3.1821042739898606e-06, "loss": 0.006, "step": 77760 }, { "epoch": 1.3133718377410748, "grad_norm": 0.18982666730880737, "learning_rate": 3.18073146164188e-06, "loss": 0.0081, "step": 77770 }, { "epoch": 1.3135407167223967, "grad_norm": 0.26161548495292664, "learning_rate": 3.1793588073474925e-06, "loss": 0.0111, "step": 77780 }, { "epoch": 1.3137095957037186, "grad_norm": 0.17672418057918549, "learning_rate": 3.177986311225952e-06, "loss": 0.0096, "step": 77790 }, { "epoch": 1.3138784746850407, "grad_norm": 0.14424827694892883, "learning_rate": 3.176613973396494e-06, "loss": 0.0106, "step": 77800 }, { "epoch": 1.3140473536663626, "grad_norm": 0.19780315458774567, "learning_rate": 3.1752417939783463e-06, "loss": 0.0076, "step": 77810 }, { "epoch": 1.3142162326476847, "grad_norm": 0.24573275446891785, "learning_rate": 3.17386977309072e-06, "loss": 0.0058, "step": 77820 }, { "epoch": 1.3143851116290066, "grad_norm": 0.10357173532247543, "learning_rate": 3.1724979108528147e-06, "loss": 0.0076, "step": 77830 }, { "epoch": 1.3145539906103285, "grad_norm": 0.1417396366596222, "learning_rate": 3.1711262073838113e-06, "loss": 0.0091, "step": 77840 }, { "epoch": 1.3147228695916506, "grad_norm": 0.2270199954509735, "learning_rate": 3.169754662802882e-06, "loss": 0.0053, "step": 77850 }, { "epoch": 1.3148917485729725, "grad_norm": 0.30454394221305847, "learning_rate": 3.1683832772291824e-06, "loss": 0.0063, "step": 77860 }, { "epoch": 1.3150606275542946, "grad_norm": 0.22159899771213531, "learning_rate": 3.167012050781857e-06, "loss": 0.0084, "step": 77870 }, { "epoch": 1.3152295065356165, "grad_norm": 0.16495642066001892, "learning_rate": 3.1656409835800316e-06, "loss": 0.0064, "step": 77880 }, { "epoch": 1.3153983855169384, "grad_norm": 0.4288928508758545, "learning_rate": 3.164270075742823e-06, "loss": 0.0083, "step": 77890 }, { "epoch": 1.3155672644982606, "grad_norm": 0.2705841064453125, "learning_rate": 3.1628993273893315e-06, "loss": 0.0061, "step": 77900 }, { "epoch": 1.3157361434795825, "grad_norm": 0.22780577838420868, "learning_rate": 3.161528738638647e-06, "loss": 0.0063, "step": 77910 }, { "epoch": 1.3159050224609046, "grad_norm": 0.21674275398254395, "learning_rate": 3.1601583096098397e-06, "loss": 0.0063, "step": 77920 }, { "epoch": 1.3160739014422265, "grad_norm": 0.21835730969905853, "learning_rate": 3.1587880404219685e-06, "loss": 0.0086, "step": 77930 }, { "epoch": 1.3162427804235484, "grad_norm": 0.25517478585243225, "learning_rate": 3.157417931194081e-06, "loss": 0.0054, "step": 77940 }, { "epoch": 1.3164116594048705, "grad_norm": 0.1743002086877823, "learning_rate": 3.156047982045212e-06, "loss": 0.0085, "step": 77950 }, { "epoch": 1.3165805383861924, "grad_norm": 0.30499550700187683, "learning_rate": 3.1546781930943726e-06, "loss": 0.0081, "step": 77960 }, { "epoch": 1.3167494173675145, "grad_norm": 0.13063134253025055, "learning_rate": 3.153308564460571e-06, "loss": 0.0085, "step": 77970 }, { "epoch": 1.3169182963488364, "grad_norm": 0.16271239519119263, "learning_rate": 3.151939096262796e-06, "loss": 0.0086, "step": 77980 }, { "epoch": 1.3170871753301583, "grad_norm": 0.21331001818180084, "learning_rate": 3.1505697886200235e-06, "loss": 0.0084, "step": 77990 }, { "epoch": 1.3172560543114804, "grad_norm": 0.18985627591609955, "learning_rate": 3.149200641651218e-06, "loss": 0.0078, "step": 78000 }, { "epoch": 1.3174249332928023, "grad_norm": 0.2433658242225647, "learning_rate": 3.147831655475323e-06, "loss": 0.005, "step": 78010 }, { "epoch": 1.3175938122741244, "grad_norm": 0.39435842633247375, "learning_rate": 3.1464628302112766e-06, "loss": 0.0058, "step": 78020 }, { "epoch": 1.3177626912554463, "grad_norm": 0.346824049949646, "learning_rate": 3.1450941659779966e-06, "loss": 0.0075, "step": 78030 }, { "epoch": 1.3179315702367682, "grad_norm": 0.08942098915576935, "learning_rate": 3.1437256628943937e-06, "loss": 0.0068, "step": 78040 }, { "epoch": 1.3181004492180903, "grad_norm": 0.19932153820991516, "learning_rate": 3.1423573210793524e-06, "loss": 0.0078, "step": 78050 }, { "epoch": 1.3182693281994122, "grad_norm": 0.09797131270170212, "learning_rate": 3.1409891406517564e-06, "loss": 0.0052, "step": 78060 }, { "epoch": 1.3184382071807343, "grad_norm": 0.3072916269302368, "learning_rate": 3.1396211217304683e-06, "loss": 0.008, "step": 78070 }, { "epoch": 1.3186070861620562, "grad_norm": 0.1184469535946846, "learning_rate": 3.138253264434341e-06, "loss": 0.0064, "step": 78080 }, { "epoch": 1.3187759651433781, "grad_norm": 0.15313486754894257, "learning_rate": 3.1368855688822066e-06, "loss": 0.011, "step": 78090 }, { "epoch": 1.3189448441247003, "grad_norm": 0.11617664247751236, "learning_rate": 3.135518035192887e-06, "loss": 0.008, "step": 78100 }, { "epoch": 1.3191137231060222, "grad_norm": 0.2680816650390625, "learning_rate": 3.134150663485194e-06, "loss": 0.0109, "step": 78110 }, { "epoch": 1.3192826020873443, "grad_norm": 0.33083784580230713, "learning_rate": 3.132783453877919e-06, "loss": 0.0062, "step": 78120 }, { "epoch": 1.3194514810686662, "grad_norm": 0.23338764905929565, "learning_rate": 3.131416406489841e-06, "loss": 0.0047, "step": 78130 }, { "epoch": 1.319620360049988, "grad_norm": 0.11642409861087799, "learning_rate": 3.130049521439727e-06, "loss": 0.0104, "step": 78140 }, { "epoch": 1.3197892390313102, "grad_norm": 0.1426660716533661, "learning_rate": 3.1286827988463285e-06, "loss": 0.0066, "step": 78150 }, { "epoch": 1.319958118012632, "grad_norm": 0.20483219623565674, "learning_rate": 3.127316238828385e-06, "loss": 0.0064, "step": 78160 }, { "epoch": 1.3201269969939542, "grad_norm": 0.2970915138721466, "learning_rate": 3.125949841504615e-06, "loss": 0.0122, "step": 78170 }, { "epoch": 1.320295875975276, "grad_norm": 0.1778431087732315, "learning_rate": 3.124583606993732e-06, "loss": 0.0059, "step": 78180 }, { "epoch": 1.320464754956598, "grad_norm": 0.39894911646842957, "learning_rate": 3.12321753541443e-06, "loss": 0.0098, "step": 78190 }, { "epoch": 1.32063363393792, "grad_norm": 0.23434637486934662, "learning_rate": 3.121851626885391e-06, "loss": 0.0099, "step": 78200 }, { "epoch": 1.320802512919242, "grad_norm": 0.12134696543216705, "learning_rate": 3.1204858815252785e-06, "loss": 0.0098, "step": 78210 }, { "epoch": 1.3209713919005641, "grad_norm": 0.13534823060035706, "learning_rate": 3.1191202994527475e-06, "loss": 0.0072, "step": 78220 }, { "epoch": 1.321140270881886, "grad_norm": 0.12583117187023163, "learning_rate": 3.1177548807864356e-06, "loss": 0.0047, "step": 78230 }, { "epoch": 1.321309149863208, "grad_norm": 0.1728520691394806, "learning_rate": 3.116389625644969e-06, "loss": 0.0067, "step": 78240 }, { "epoch": 1.32147802884453, "grad_norm": 0.30174097418785095, "learning_rate": 3.1150245341469544e-06, "loss": 0.0085, "step": 78250 }, { "epoch": 1.321646907825852, "grad_norm": 0.32523113489151, "learning_rate": 3.1136596064109903e-06, "loss": 0.0069, "step": 78260 }, { "epoch": 1.321815786807174, "grad_norm": 0.43300801515579224, "learning_rate": 3.1122948425556565e-06, "loss": 0.0102, "step": 78270 }, { "epoch": 1.321984665788496, "grad_norm": 0.3831111788749695, "learning_rate": 3.1109302426995225e-06, "loss": 0.0069, "step": 78280 }, { "epoch": 1.3221535447698178, "grad_norm": 0.22319313883781433, "learning_rate": 3.109565806961139e-06, "loss": 0.0061, "step": 78290 }, { "epoch": 1.32232242375114, "grad_norm": 0.13034215569496155, "learning_rate": 3.108201535459047e-06, "loss": 0.0088, "step": 78300 }, { "epoch": 1.3224913027324618, "grad_norm": 0.16441096365451813, "learning_rate": 3.1068374283117686e-06, "loss": 0.0057, "step": 78310 }, { "epoch": 1.322660181713784, "grad_norm": 0.48924916982650757, "learning_rate": 3.1054734856378175e-06, "loss": 0.0091, "step": 78320 }, { "epoch": 1.3228290606951059, "grad_norm": 0.303460031747818, "learning_rate": 3.1041097075556865e-06, "loss": 0.0075, "step": 78330 }, { "epoch": 1.3229979396764278, "grad_norm": 0.2003507763147354, "learning_rate": 3.102746094183858e-06, "loss": 0.0071, "step": 78340 }, { "epoch": 1.3231668186577499, "grad_norm": 0.21310636401176453, "learning_rate": 3.1013826456407996e-06, "loss": 0.0108, "step": 78350 }, { "epoch": 1.3233356976390718, "grad_norm": 0.22794616222381592, "learning_rate": 3.1000193620449657e-06, "loss": 0.007, "step": 78360 }, { "epoch": 1.323504576620394, "grad_norm": 0.33721813559532166, "learning_rate": 3.098656243514794e-06, "loss": 0.0054, "step": 78370 }, { "epoch": 1.3236734556017158, "grad_norm": 0.4585207402706146, "learning_rate": 3.0972932901687073e-06, "loss": 0.0078, "step": 78380 }, { "epoch": 1.3238423345830377, "grad_norm": 0.17822852730751038, "learning_rate": 3.0959305021251184e-06, "loss": 0.0066, "step": 78390 }, { "epoch": 1.3240112135643598, "grad_norm": 0.35963761806488037, "learning_rate": 3.0945678795024227e-06, "loss": 0.0076, "step": 78400 }, { "epoch": 1.3241800925456817, "grad_norm": 0.17994917929172516, "learning_rate": 3.093205422418999e-06, "loss": 0.0056, "step": 78410 }, { "epoch": 1.3243489715270038, "grad_norm": 0.4703652560710907, "learning_rate": 3.091843130993216e-06, "loss": 0.0088, "step": 78420 }, { "epoch": 1.3245178505083257, "grad_norm": 0.2591179609298706, "learning_rate": 3.090481005343426e-06, "loss": 0.007, "step": 78430 }, { "epoch": 1.3246867294896476, "grad_norm": 0.19698704779148102, "learning_rate": 3.0891190455879672e-06, "loss": 0.0057, "step": 78440 }, { "epoch": 1.3248556084709697, "grad_norm": 0.32511475682258606, "learning_rate": 3.087757251845166e-06, "loss": 0.0104, "step": 78450 }, { "epoch": 1.3250244874522916, "grad_norm": 0.13770100474357605, "learning_rate": 3.086395624233326e-06, "loss": 0.0056, "step": 78460 }, { "epoch": 1.3251933664336137, "grad_norm": 0.13968460261821747, "learning_rate": 3.0850341628707458e-06, "loss": 0.0095, "step": 78470 }, { "epoch": 1.3253622454149356, "grad_norm": 0.29466739296913147, "learning_rate": 3.0836728678757056e-06, "loss": 0.0064, "step": 78480 }, { "epoch": 1.3255311243962575, "grad_norm": 0.3268745541572571, "learning_rate": 3.082311739366471e-06, "loss": 0.0095, "step": 78490 }, { "epoch": 1.3257000033775796, "grad_norm": 0.36792024970054626, "learning_rate": 3.0809507774612927e-06, "loss": 0.0129, "step": 78500 }, { "epoch": 1.3258688823589015, "grad_norm": 0.2441137433052063, "learning_rate": 3.0795899822784087e-06, "loss": 0.0057, "step": 78510 }, { "epoch": 1.3260377613402237, "grad_norm": 0.22421890497207642, "learning_rate": 3.078229353936041e-06, "loss": 0.0054, "step": 78520 }, { "epoch": 1.3262066403215456, "grad_norm": 0.252164751291275, "learning_rate": 3.0768688925523993e-06, "loss": 0.0086, "step": 78530 }, { "epoch": 1.3263755193028675, "grad_norm": 0.6438063383102417, "learning_rate": 3.0755085982456744e-06, "loss": 0.0093, "step": 78540 }, { "epoch": 1.3265443982841896, "grad_norm": 0.18405963480472565, "learning_rate": 3.0741484711340473e-06, "loss": 0.0083, "step": 78550 }, { "epoch": 1.3267132772655115, "grad_norm": 0.2798435688018799, "learning_rate": 3.0727885113356812e-06, "loss": 0.0055, "step": 78560 }, { "epoch": 1.3268821562468336, "grad_norm": 0.2657744884490967, "learning_rate": 3.0714287189687297e-06, "loss": 0.0048, "step": 78570 }, { "epoch": 1.3270510352281555, "grad_norm": 0.3335052728652954, "learning_rate": 3.0700690941513232e-06, "loss": 0.0075, "step": 78580 }, { "epoch": 1.3272199142094774, "grad_norm": 0.17950665950775146, "learning_rate": 3.0687096370015844e-06, "loss": 0.0063, "step": 78590 }, { "epoch": 1.3273887931907995, "grad_norm": 0.26264575123786926, "learning_rate": 3.0673503476376198e-06, "loss": 0.0089, "step": 78600 }, { "epoch": 1.3275576721721214, "grad_norm": 0.2541740834712982, "learning_rate": 3.0659912261775225e-06, "loss": 0.0078, "step": 78610 }, { "epoch": 1.3277265511534435, "grad_norm": 0.21936559677124023, "learning_rate": 3.064632272739368e-06, "loss": 0.0056, "step": 78620 }, { "epoch": 1.3278954301347654, "grad_norm": 0.13960029184818268, "learning_rate": 3.0632734874412186e-06, "loss": 0.0073, "step": 78630 }, { "epoch": 1.3280643091160873, "grad_norm": 0.23567645251750946, "learning_rate": 3.0619148704011233e-06, "loss": 0.0071, "step": 78640 }, { "epoch": 1.3282331880974094, "grad_norm": 0.1402289718389511, "learning_rate": 3.0605564217371158e-06, "loss": 0.0087, "step": 78650 }, { "epoch": 1.3284020670787313, "grad_norm": 0.23454083502292633, "learning_rate": 3.0591981415672133e-06, "loss": 0.0068, "step": 78660 }, { "epoch": 1.3285709460600534, "grad_norm": 0.2527196407318115, "learning_rate": 3.05784003000942e-06, "loss": 0.0064, "step": 78670 }, { "epoch": 1.3287398250413753, "grad_norm": 0.3676067292690277, "learning_rate": 3.0564820871817256e-06, "loss": 0.0112, "step": 78680 }, { "epoch": 1.3289087040226972, "grad_norm": 0.23698033392429352, "learning_rate": 3.055124313202108e-06, "loss": 0.008, "step": 78690 }, { "epoch": 1.3290775830040193, "grad_norm": 0.302389919757843, "learning_rate": 3.0537667081885227e-06, "loss": 0.0067, "step": 78700 }, { "epoch": 1.3292464619853412, "grad_norm": 0.07867204397916794, "learning_rate": 3.052409272258916e-06, "loss": 0.0057, "step": 78710 }, { "epoch": 1.3294153409666634, "grad_norm": 0.144886776804924, "learning_rate": 3.0510520055312186e-06, "loss": 0.0128, "step": 78720 }, { "epoch": 1.3295842199479853, "grad_norm": 0.21567465364933014, "learning_rate": 3.04969490812335e-06, "loss": 0.0082, "step": 78730 }, { "epoch": 1.3297530989293072, "grad_norm": 0.5427302718162537, "learning_rate": 3.0483379801532077e-06, "loss": 0.0098, "step": 78740 }, { "epoch": 1.3299219779106293, "grad_norm": 0.20362186431884766, "learning_rate": 3.0469812217386786e-06, "loss": 0.0072, "step": 78750 }, { "epoch": 1.3300908568919512, "grad_norm": 0.16029959917068481, "learning_rate": 3.045624632997635e-06, "loss": 0.0077, "step": 78760 }, { "epoch": 1.3302597358732733, "grad_norm": 0.18167763948440552, "learning_rate": 3.0442682140479365e-06, "loss": 0.0109, "step": 78770 }, { "epoch": 1.3304286148545952, "grad_norm": 0.020015330985188484, "learning_rate": 3.042911965007421e-06, "loss": 0.0061, "step": 78780 }, { "epoch": 1.330597493835917, "grad_norm": 0.3117382526397705, "learning_rate": 3.041555885993919e-06, "loss": 0.0047, "step": 78790 }, { "epoch": 1.3307663728172392, "grad_norm": 0.18618355691432953, "learning_rate": 3.0401999771252427e-06, "loss": 0.0108, "step": 78800 }, { "epoch": 1.330935251798561, "grad_norm": 0.28209641575813293, "learning_rate": 3.0388442385191924e-06, "loss": 0.0067, "step": 78810 }, { "epoch": 1.3311041307798832, "grad_norm": 0.1848260909318924, "learning_rate": 3.037488670293547e-06, "loss": 0.0089, "step": 78820 }, { "epoch": 1.331273009761205, "grad_norm": 0.23188859224319458, "learning_rate": 3.0361332725660764e-06, "loss": 0.0085, "step": 78830 }, { "epoch": 1.331441888742527, "grad_norm": 0.3405068516731262, "learning_rate": 3.0347780454545344e-06, "loss": 0.006, "step": 78840 }, { "epoch": 1.3316107677238491, "grad_norm": 0.370419442653656, "learning_rate": 3.033422989076663e-06, "loss": 0.0085, "step": 78850 }, { "epoch": 1.331779646705171, "grad_norm": 0.1485784649848938, "learning_rate": 3.032068103550182e-06, "loss": 0.0098, "step": 78860 }, { "epoch": 1.3319485256864931, "grad_norm": 0.2694201171398163, "learning_rate": 3.030713388992802e-06, "loss": 0.0137, "step": 78870 }, { "epoch": 1.332117404667815, "grad_norm": 0.17716610431671143, "learning_rate": 3.029358845522217e-06, "loss": 0.0077, "step": 78880 }, { "epoch": 1.332286283649137, "grad_norm": 0.16364862024784088, "learning_rate": 3.0280044732561064e-06, "loss": 0.0089, "step": 78890 }, { "epoch": 1.332455162630459, "grad_norm": 0.2262091487646103, "learning_rate": 3.026650272312136e-06, "loss": 0.0063, "step": 78900 }, { "epoch": 1.332624041611781, "grad_norm": 0.4568445086479187, "learning_rate": 3.0252962428079536e-06, "loss": 0.0076, "step": 78910 }, { "epoch": 1.332792920593103, "grad_norm": 0.3119371235370636, "learning_rate": 3.0239423848611953e-06, "loss": 0.0064, "step": 78920 }, { "epoch": 1.332961799574425, "grad_norm": 0.6476218104362488, "learning_rate": 3.0225886985894802e-06, "loss": 0.0084, "step": 78930 }, { "epoch": 1.3331306785557469, "grad_norm": 0.08139139413833618, "learning_rate": 3.021235184110416e-06, "loss": 0.0097, "step": 78940 }, { "epoch": 1.333299557537069, "grad_norm": 0.257887065410614, "learning_rate": 3.0198818415415875e-06, "loss": 0.0065, "step": 78950 }, { "epoch": 1.3334684365183909, "grad_norm": 0.21057583391666412, "learning_rate": 3.0185286710005725e-06, "loss": 0.007, "step": 78960 }, { "epoch": 1.333637315499713, "grad_norm": 0.15526847541332245, "learning_rate": 3.017175672604932e-06, "loss": 0.0081, "step": 78970 }, { "epoch": 1.3338061944810349, "grad_norm": 0.33971333503723145, "learning_rate": 3.0158228464722116e-06, "loss": 0.0076, "step": 78980 }, { "epoch": 1.3339750734623568, "grad_norm": 0.15014977753162384, "learning_rate": 3.0144701927199386e-06, "loss": 0.0069, "step": 78990 }, { "epoch": 1.334143952443679, "grad_norm": 0.2924710810184479, "learning_rate": 3.01311771146563e-06, "loss": 0.0095, "step": 79000 }, { "epoch": 1.3343128314250008, "grad_norm": 0.6641076803207397, "learning_rate": 3.011765402826786e-06, "loss": 0.0095, "step": 79010 }, { "epoch": 1.334481710406323, "grad_norm": 0.09122662991285324, "learning_rate": 3.0104132669208934e-06, "loss": 0.0046, "step": 79020 }, { "epoch": 1.3346505893876448, "grad_norm": 0.22348128259181976, "learning_rate": 3.00906130386542e-06, "loss": 0.0076, "step": 79030 }, { "epoch": 1.3348194683689667, "grad_norm": 0.2920100688934326, "learning_rate": 3.007709513777821e-06, "loss": 0.0092, "step": 79040 }, { "epoch": 1.3349883473502888, "grad_norm": 0.236643984913826, "learning_rate": 3.0063578967755386e-06, "loss": 0.0059, "step": 79050 }, { "epoch": 1.3351572263316107, "grad_norm": 0.4161838889122009, "learning_rate": 3.0050064529759985e-06, "loss": 0.0115, "step": 79060 }, { "epoch": 1.3353261053129328, "grad_norm": 0.3123241364955902, "learning_rate": 3.0036551824966065e-06, "loss": 0.006, "step": 79070 }, { "epoch": 1.3354949842942547, "grad_norm": 0.20337659120559692, "learning_rate": 3.0023040854547613e-06, "loss": 0.0043, "step": 79080 }, { "epoch": 1.3356638632755766, "grad_norm": 0.964779794216156, "learning_rate": 3.0009531619678423e-06, "loss": 0.0072, "step": 79090 }, { "epoch": 1.3358327422568987, "grad_norm": 0.23988080024719238, "learning_rate": 2.999602412153215e-06, "loss": 0.0102, "step": 79100 }, { "epoch": 1.3360016212382206, "grad_norm": 0.12055898457765579, "learning_rate": 2.9982518361282276e-06, "loss": 0.0049, "step": 79110 }, { "epoch": 1.3361705002195428, "grad_norm": 0.20093703269958496, "learning_rate": 2.996901434010215e-06, "loss": 0.0063, "step": 79120 }, { "epoch": 1.3363393792008647, "grad_norm": 0.3940306007862091, "learning_rate": 2.995551205916498e-06, "loss": 0.0079, "step": 79130 }, { "epoch": 1.3365082581821865, "grad_norm": 0.22178632020950317, "learning_rate": 2.9942011519643823e-06, "loss": 0.0103, "step": 79140 }, { "epoch": 1.3366771371635087, "grad_norm": 0.30639973282814026, "learning_rate": 2.9928512722711535e-06, "loss": 0.0059, "step": 79150 }, { "epoch": 1.3368460161448306, "grad_norm": 0.26573696732521057, "learning_rate": 2.991501566954088e-06, "loss": 0.0079, "step": 79160 }, { "epoch": 1.3370148951261527, "grad_norm": 0.29252058267593384, "learning_rate": 2.9901520361304446e-06, "loss": 0.009, "step": 79170 }, { "epoch": 1.3371837741074746, "grad_norm": 0.19378070533275604, "learning_rate": 2.98880267991747e-06, "loss": 0.0051, "step": 79180 }, { "epoch": 1.3373526530887965, "grad_norm": 0.4282369315624237, "learning_rate": 2.9874534984323865e-06, "loss": 0.0078, "step": 79190 }, { "epoch": 1.3375215320701186, "grad_norm": 0.1630321443080902, "learning_rate": 2.9861044917924133e-06, "loss": 0.0047, "step": 79200 }, { "epoch": 1.3376904110514405, "grad_norm": 0.20862995088100433, "learning_rate": 2.984755660114747e-06, "loss": 0.0067, "step": 79210 }, { "epoch": 1.3378592900327626, "grad_norm": 0.23668649792671204, "learning_rate": 2.983407003516573e-06, "loss": 0.0068, "step": 79220 }, { "epoch": 1.3380281690140845, "grad_norm": 0.4086633324623108, "learning_rate": 2.9820585221150545e-06, "loss": 0.0094, "step": 79230 }, { "epoch": 1.3381970479954064, "grad_norm": 0.369245707988739, "learning_rate": 2.980710216027347e-06, "loss": 0.008, "step": 79240 }, { "epoch": 1.3383659269767285, "grad_norm": 0.3186657130718231, "learning_rate": 2.9793620853705886e-06, "loss": 0.0061, "step": 79250 }, { "epoch": 1.3385348059580504, "grad_norm": 0.16303865611553192, "learning_rate": 2.9780141302619017e-06, "loss": 0.0103, "step": 79260 }, { "epoch": 1.3387036849393725, "grad_norm": 0.2505386769771576, "learning_rate": 2.9766663508183913e-06, "loss": 0.0071, "step": 79270 }, { "epoch": 1.3388725639206944, "grad_norm": 0.6792827248573303, "learning_rate": 2.9753187471571496e-06, "loss": 0.0081, "step": 79280 }, { "epoch": 1.3390414429020163, "grad_norm": 0.27331966161727905, "learning_rate": 2.9739713193952553e-06, "loss": 0.0117, "step": 79290 }, { "epoch": 1.3392103218833384, "grad_norm": 0.38039419054985046, "learning_rate": 2.9726240676497686e-06, "loss": 0.0088, "step": 79300 }, { "epoch": 1.3393792008646603, "grad_norm": 0.18801549077033997, "learning_rate": 2.971276992037734e-06, "loss": 0.0079, "step": 79310 }, { "epoch": 1.3395480798459825, "grad_norm": 0.19558413326740265, "learning_rate": 2.969930092676183e-06, "loss": 0.007, "step": 79320 }, { "epoch": 1.3397169588273043, "grad_norm": 0.38631290197372437, "learning_rate": 2.968583369682132e-06, "loss": 0.0043, "step": 79330 }, { "epoch": 1.3398858378086262, "grad_norm": 0.33018502593040466, "learning_rate": 2.967236823172579e-06, "loss": 0.007, "step": 79340 }, { "epoch": 1.3400547167899484, "grad_norm": 0.27429690957069397, "learning_rate": 2.9658904532645126e-06, "loss": 0.0067, "step": 79350 }, { "epoch": 1.3402235957712703, "grad_norm": 0.22618752717971802, "learning_rate": 2.9645442600748974e-06, "loss": 0.0059, "step": 79360 }, { "epoch": 1.3403924747525924, "grad_norm": 0.14975601434707642, "learning_rate": 2.9631982437206886e-06, "loss": 0.0056, "step": 79370 }, { "epoch": 1.3405613537339143, "grad_norm": 0.1723846048116684, "learning_rate": 2.961852404318826e-06, "loss": 0.0088, "step": 79380 }, { "epoch": 1.3407302327152362, "grad_norm": 0.2989548146724701, "learning_rate": 2.960506741986233e-06, "loss": 0.0097, "step": 79390 }, { "epoch": 1.3408991116965583, "grad_norm": 0.30760887265205383, "learning_rate": 2.9591612568398154e-06, "loss": 0.0074, "step": 79400 }, { "epoch": 1.3410679906778802, "grad_norm": 0.3679409921169281, "learning_rate": 2.9578159489964675e-06, "loss": 0.0089, "step": 79410 }, { "epoch": 1.3412368696592023, "grad_norm": 0.259915828704834, "learning_rate": 2.9564708185730654e-06, "loss": 0.0057, "step": 79420 }, { "epoch": 1.3414057486405242, "grad_norm": 0.24542808532714844, "learning_rate": 2.9551258656864728e-06, "loss": 0.0082, "step": 79430 }, { "epoch": 1.341574627621846, "grad_norm": 0.3854770064353943, "learning_rate": 2.953781090453533e-06, "loss": 0.0055, "step": 79440 }, { "epoch": 1.3417435066031682, "grad_norm": 0.08972276002168655, "learning_rate": 2.952436492991078e-06, "loss": 0.0082, "step": 79450 }, { "epoch": 1.34191238558449, "grad_norm": 0.4935832619667053, "learning_rate": 2.951092073415923e-06, "loss": 0.0084, "step": 79460 }, { "epoch": 1.3420812645658122, "grad_norm": 0.39611881971359253, "learning_rate": 2.9497478318448703e-06, "loss": 0.0088, "step": 79470 }, { "epoch": 1.3422501435471341, "grad_norm": 0.333006888628006, "learning_rate": 2.9484037683946997e-06, "loss": 0.0067, "step": 79480 }, { "epoch": 1.342419022528456, "grad_norm": 0.24609224498271942, "learning_rate": 2.947059883182183e-06, "loss": 0.0066, "step": 79490 }, { "epoch": 1.3425879015097781, "grad_norm": 0.17252331972122192, "learning_rate": 2.9457161763240734e-06, "loss": 0.0101, "step": 79500 }, { "epoch": 1.3427567804911, "grad_norm": 0.38260194659233093, "learning_rate": 2.9443726479371095e-06, "loss": 0.0074, "step": 79510 }, { "epoch": 1.3429256594724222, "grad_norm": 0.18139418959617615, "learning_rate": 2.9430292981380115e-06, "loss": 0.0071, "step": 79520 }, { "epoch": 1.343094538453744, "grad_norm": 0.09417281299829483, "learning_rate": 2.941686127043488e-06, "loss": 0.011, "step": 79530 }, { "epoch": 1.343263417435066, "grad_norm": 0.6665037870407104, "learning_rate": 2.9403431347702295e-06, "loss": 0.0122, "step": 79540 }, { "epoch": 1.343432296416388, "grad_norm": 0.5428504943847656, "learning_rate": 2.9390003214349145e-06, "loss": 0.0106, "step": 79550 }, { "epoch": 1.34360117539771, "grad_norm": 0.7836925983428955, "learning_rate": 2.9376576871541996e-06, "loss": 0.0082, "step": 79560 }, { "epoch": 1.343770054379032, "grad_norm": 0.294154554605484, "learning_rate": 2.9363152320447317e-06, "loss": 0.0088, "step": 79570 }, { "epoch": 1.343938933360354, "grad_norm": 0.21191000938415527, "learning_rate": 2.93497295622314e-06, "loss": 0.0097, "step": 79580 }, { "epoch": 1.3441078123416759, "grad_norm": 0.18189816176891327, "learning_rate": 2.933630859806039e-06, "loss": 0.0079, "step": 79590 }, { "epoch": 1.344276691322998, "grad_norm": 0.1141907349228859, "learning_rate": 2.9322889429100236e-06, "loss": 0.0084, "step": 79600 }, { "epoch": 1.3444455703043199, "grad_norm": 0.2546742260456085, "learning_rate": 2.9309472056516786e-06, "loss": 0.0071, "step": 79610 }, { "epoch": 1.344614449285642, "grad_norm": 0.33053043484687805, "learning_rate": 2.9296056481475705e-06, "loss": 0.0071, "step": 79620 }, { "epoch": 1.344783328266964, "grad_norm": 0.20560389757156372, "learning_rate": 2.9282642705142512e-06, "loss": 0.0099, "step": 79630 }, { "epoch": 1.3449522072482858, "grad_norm": 0.243041530251503, "learning_rate": 2.9269230728682542e-06, "loss": 0.0106, "step": 79640 }, { "epoch": 1.345121086229608, "grad_norm": 1.1708146333694458, "learning_rate": 2.9255820553261016e-06, "loss": 0.0085, "step": 79650 }, { "epoch": 1.3452899652109298, "grad_norm": 0.3664478063583374, "learning_rate": 2.9242412180042963e-06, "loss": 0.0085, "step": 79660 }, { "epoch": 1.345458844192252, "grad_norm": 0.24491968750953674, "learning_rate": 2.9229005610193283e-06, "loss": 0.0095, "step": 79670 }, { "epoch": 1.3456277231735738, "grad_norm": 0.6386386752128601, "learning_rate": 2.92156008448767e-06, "loss": 0.0071, "step": 79680 }, { "epoch": 1.3457966021548957, "grad_norm": 0.18958550691604614, "learning_rate": 2.9202197885257777e-06, "loss": 0.014, "step": 79690 }, { "epoch": 1.3459654811362178, "grad_norm": 0.16555845737457275, "learning_rate": 2.9188796732500958e-06, "loss": 0.0069, "step": 79700 }, { "epoch": 1.3461343601175397, "grad_norm": 0.28942716121673584, "learning_rate": 2.9175397387770464e-06, "loss": 0.0049, "step": 79710 }, { "epoch": 1.3463032390988618, "grad_norm": 0.10461883991956711, "learning_rate": 2.9161999852230415e-06, "loss": 0.0045, "step": 79720 }, { "epoch": 1.3464721180801837, "grad_norm": 0.2541009187698364, "learning_rate": 2.9148604127044767e-06, "loss": 0.0058, "step": 79730 }, { "epoch": 1.3466409970615056, "grad_norm": 0.378120481967926, "learning_rate": 2.9135210213377295e-06, "loss": 0.0063, "step": 79740 }, { "epoch": 1.3468098760428278, "grad_norm": 0.23540440201759338, "learning_rate": 2.9121818112391653e-06, "loss": 0.0095, "step": 79750 }, { "epoch": 1.3469787550241497, "grad_norm": 0.2105170339345932, "learning_rate": 2.9108427825251275e-06, "loss": 0.0039, "step": 79760 }, { "epoch": 1.3471476340054718, "grad_norm": 0.10066394507884979, "learning_rate": 2.9095039353119498e-06, "loss": 0.004, "step": 79770 }, { "epoch": 1.3473165129867937, "grad_norm": 0.3276132047176361, "learning_rate": 2.9081652697159466e-06, "loss": 0.0088, "step": 79780 }, { "epoch": 1.3474853919681156, "grad_norm": 0.2988300025463104, "learning_rate": 2.9068267858534203e-06, "loss": 0.01, "step": 79790 }, { "epoch": 1.3476542709494377, "grad_norm": 0.32280784845352173, "learning_rate": 2.905488483840655e-06, "loss": 0.0104, "step": 79800 }, { "epoch": 1.3478231499307596, "grad_norm": 0.3574509024620056, "learning_rate": 2.904150363793916e-06, "loss": 0.0064, "step": 79810 }, { "epoch": 1.3479920289120817, "grad_norm": 0.20220017433166504, "learning_rate": 2.9028124258294588e-06, "loss": 0.0082, "step": 79820 }, { "epoch": 1.3481609078934036, "grad_norm": 0.156647190451622, "learning_rate": 2.9014746700635176e-06, "loss": 0.0075, "step": 79830 }, { "epoch": 1.3483297868747255, "grad_norm": 0.27398768067359924, "learning_rate": 2.9001370966123153e-06, "loss": 0.007, "step": 79840 }, { "epoch": 1.3484986658560476, "grad_norm": 0.10319831222295761, "learning_rate": 2.8987997055920576e-06, "loss": 0.0059, "step": 79850 }, { "epoch": 1.3486675448373695, "grad_norm": 0.22217339277267456, "learning_rate": 2.897462497118932e-06, "loss": 0.0105, "step": 79860 }, { "epoch": 1.3488364238186916, "grad_norm": 0.26180392503738403, "learning_rate": 2.8961254713091127e-06, "loss": 0.0074, "step": 79870 }, { "epoch": 1.3490053028000135, "grad_norm": 0.16717754304409027, "learning_rate": 2.8947886282787586e-06, "loss": 0.0048, "step": 79880 }, { "epoch": 1.3491741817813354, "grad_norm": 0.30561691522598267, "learning_rate": 2.893451968144009e-06, "loss": 0.0096, "step": 79890 }, { "epoch": 1.3493430607626575, "grad_norm": 0.16209663450717926, "learning_rate": 2.8921154910209897e-06, "loss": 0.0091, "step": 79900 }, { "epoch": 1.3495119397439794, "grad_norm": 0.1145910918712616, "learning_rate": 2.8907791970258114e-06, "loss": 0.0094, "step": 79910 }, { "epoch": 1.3496808187253015, "grad_norm": 0.12048536539077759, "learning_rate": 2.8894430862745704e-06, "loss": 0.0061, "step": 79920 }, { "epoch": 1.3498496977066234, "grad_norm": 0.10524918884038925, "learning_rate": 2.888107158883341e-06, "loss": 0.0061, "step": 79930 }, { "epoch": 1.3500185766879453, "grad_norm": 0.23313377797603607, "learning_rate": 2.8867714149681857e-06, "loss": 0.0118, "step": 79940 }, { "epoch": 1.3501874556692675, "grad_norm": 0.25346502661705017, "learning_rate": 2.885435854645152e-06, "loss": 0.0111, "step": 79950 }, { "epoch": 1.3503563346505894, "grad_norm": 0.2763037085533142, "learning_rate": 2.8841004780302707e-06, "loss": 0.0078, "step": 79960 }, { "epoch": 1.3505252136319115, "grad_norm": 0.2184762954711914, "learning_rate": 2.882765285239554e-06, "loss": 0.0052, "step": 79970 }, { "epoch": 1.3506940926132334, "grad_norm": 0.09938138723373413, "learning_rate": 2.8814302763890013e-06, "loss": 0.0076, "step": 79980 }, { "epoch": 1.3508629715945553, "grad_norm": 0.27607542276382446, "learning_rate": 2.8800954515945954e-06, "loss": 0.0107, "step": 79990 }, { "epoch": 1.3510318505758774, "grad_norm": 0.4299454391002655, "learning_rate": 2.8787608109723043e-06, "loss": 0.0065, "step": 80000 }, { "epoch": 1.3512007295571993, "grad_norm": 0.5847559571266174, "learning_rate": 2.877426354638073e-06, "loss": 0.0062, "step": 80010 }, { "epoch": 1.3513696085385214, "grad_norm": 0.022533750161528587, "learning_rate": 2.8760920827078398e-06, "loss": 0.0062, "step": 80020 }, { "epoch": 1.3515384875198433, "grad_norm": 0.3580186665058136, "learning_rate": 2.8747579952975226e-06, "loss": 0.0102, "step": 80030 }, { "epoch": 1.3517073665011652, "grad_norm": 0.26872655749320984, "learning_rate": 2.873424092523024e-06, "loss": 0.0097, "step": 80040 }, { "epoch": 1.3518762454824873, "grad_norm": 0.3349015712738037, "learning_rate": 2.8720903745002276e-06, "loss": 0.0082, "step": 80050 }, { "epoch": 1.3520451244638092, "grad_norm": 0.3766493797302246, "learning_rate": 2.8707568413450056e-06, "loss": 0.0082, "step": 80060 }, { "epoch": 1.3522140034451313, "grad_norm": 0.2176567167043686, "learning_rate": 2.869423493173211e-06, "loss": 0.0088, "step": 80070 }, { "epoch": 1.3523828824264532, "grad_norm": 0.1708797812461853, "learning_rate": 2.8680903301006834e-06, "loss": 0.0081, "step": 80080 }, { "epoch": 1.352551761407775, "grad_norm": 0.1634896695613861, "learning_rate": 2.8667573522432447e-06, "loss": 0.0072, "step": 80090 }, { "epoch": 1.3527206403890972, "grad_norm": 0.24769769608974457, "learning_rate": 2.8654245597166987e-06, "loss": 0.008, "step": 80100 }, { "epoch": 1.3528895193704191, "grad_norm": 0.15682794153690338, "learning_rate": 2.8640919526368365e-06, "loss": 0.0091, "step": 80110 }, { "epoch": 1.3530583983517412, "grad_norm": 0.29665589332580566, "learning_rate": 2.8627595311194344e-06, "loss": 0.0094, "step": 80120 }, { "epoch": 1.3532272773330631, "grad_norm": 0.5824762582778931, "learning_rate": 2.8614272952802445e-06, "loss": 0.0118, "step": 80130 }, { "epoch": 1.353396156314385, "grad_norm": 0.12033642828464508, "learning_rate": 2.8600952452350118e-06, "loss": 0.008, "step": 80140 }, { "epoch": 1.3535650352957072, "grad_norm": 0.24642729759216309, "learning_rate": 2.8587633810994596e-06, "loss": 0.0046, "step": 80150 }, { "epoch": 1.353733914277029, "grad_norm": 0.33196383714675903, "learning_rate": 2.8574317029893006e-06, "loss": 0.0055, "step": 80160 }, { "epoch": 1.3539027932583512, "grad_norm": 0.32445040345191956, "learning_rate": 2.856100211020223e-06, "loss": 0.007, "step": 80170 }, { "epoch": 1.354071672239673, "grad_norm": 0.5033735632896423, "learning_rate": 2.8547689053079053e-06, "loss": 0.0086, "step": 80180 }, { "epoch": 1.354240551220995, "grad_norm": 0.24921192228794098, "learning_rate": 2.853437785968009e-06, "loss": 0.0084, "step": 80190 }, { "epoch": 1.354409430202317, "grad_norm": 0.2481057494878769, "learning_rate": 2.8521068531161777e-06, "loss": 0.0072, "step": 80200 }, { "epoch": 1.354578309183639, "grad_norm": 0.2660357654094696, "learning_rate": 2.85077610686804e-06, "loss": 0.0065, "step": 80210 }, { "epoch": 1.354747188164961, "grad_norm": 0.11295757442712784, "learning_rate": 2.849445547339208e-06, "loss": 0.0064, "step": 80220 }, { "epoch": 1.354916067146283, "grad_norm": 0.16676141321659088, "learning_rate": 2.8481151746452766e-06, "loss": 0.008, "step": 80230 }, { "epoch": 1.3550849461276049, "grad_norm": 0.22581127285957336, "learning_rate": 2.846784988901826e-06, "loss": 0.0086, "step": 80240 }, { "epoch": 1.355253825108927, "grad_norm": 0.4618884325027466, "learning_rate": 2.8454549902244223e-06, "loss": 0.0081, "step": 80250 }, { "epoch": 1.355422704090249, "grad_norm": 0.21548061072826385, "learning_rate": 2.844125178728607e-06, "loss": 0.0066, "step": 80260 }, { "epoch": 1.355591583071571, "grad_norm": 0.1377248764038086, "learning_rate": 2.842795554529913e-06, "loss": 0.0069, "step": 80270 }, { "epoch": 1.355760462052893, "grad_norm": 0.2439013570547104, "learning_rate": 2.841466117743856e-06, "loss": 0.01, "step": 80280 }, { "epoch": 1.3559293410342148, "grad_norm": 0.7173863053321838, "learning_rate": 2.840136868485935e-06, "loss": 0.0091, "step": 80290 }, { "epoch": 1.356098220015537, "grad_norm": 0.31926241517066956, "learning_rate": 2.8388078068716286e-06, "loss": 0.0075, "step": 80300 }, { "epoch": 1.3562670989968588, "grad_norm": 0.4193738102912903, "learning_rate": 2.8374789330164037e-06, "loss": 0.0096, "step": 80310 }, { "epoch": 1.356435977978181, "grad_norm": 0.33678334951400757, "learning_rate": 2.8361502470357085e-06, "loss": 0.0067, "step": 80320 }, { "epoch": 1.3566048569595028, "grad_norm": 0.7406383752822876, "learning_rate": 2.8348217490449812e-06, "loss": 0.0126, "step": 80330 }, { "epoch": 1.3567737359408247, "grad_norm": 0.36509788036346436, "learning_rate": 2.8334934391596336e-06, "loss": 0.0062, "step": 80340 }, { "epoch": 1.3569426149221469, "grad_norm": 0.20483124256134033, "learning_rate": 2.832165317495067e-06, "loss": 0.0107, "step": 80350 }, { "epoch": 1.3571114939034687, "grad_norm": 0.08764871209859848, "learning_rate": 2.8308373841666657e-06, "loss": 0.0056, "step": 80360 }, { "epoch": 1.3572803728847909, "grad_norm": 0.215460866689682, "learning_rate": 2.8295096392897993e-06, "loss": 0.007, "step": 80370 }, { "epoch": 1.3574492518661128, "grad_norm": 0.12419673800468445, "learning_rate": 2.8281820829798146e-06, "loss": 0.0053, "step": 80380 }, { "epoch": 1.3576181308474347, "grad_norm": 0.3084765076637268, "learning_rate": 2.8268547153520486e-06, "loss": 0.0067, "step": 80390 }, { "epoch": 1.3577870098287568, "grad_norm": 0.22742244601249695, "learning_rate": 2.8255275365218204e-06, "loss": 0.0059, "step": 80400 }, { "epoch": 1.3579558888100787, "grad_norm": 0.628301203250885, "learning_rate": 2.824200546604433e-06, "loss": 0.0112, "step": 80410 }, { "epoch": 1.3581247677914008, "grad_norm": 0.15873201191425323, "learning_rate": 2.822873745715169e-06, "loss": 0.0074, "step": 80420 }, { "epoch": 1.3582936467727227, "grad_norm": 0.16380830109119415, "learning_rate": 2.821547133969297e-06, "loss": 0.0057, "step": 80430 }, { "epoch": 1.3584625257540446, "grad_norm": 0.2352565973997116, "learning_rate": 2.820220711482074e-06, "loss": 0.0066, "step": 80440 }, { "epoch": 1.3586314047353667, "grad_norm": 0.15311312675476074, "learning_rate": 2.8188944783687366e-06, "loss": 0.0088, "step": 80450 }, { "epoch": 1.3588002837166886, "grad_norm": 0.15975527465343475, "learning_rate": 2.8175684347445e-06, "loss": 0.0051, "step": 80460 }, { "epoch": 1.3589691626980107, "grad_norm": 0.1936812549829483, "learning_rate": 2.81624258072457e-06, "loss": 0.0058, "step": 80470 }, { "epoch": 1.3591380416793326, "grad_norm": 0.3782055377960205, "learning_rate": 2.8149169164241334e-06, "loss": 0.0059, "step": 80480 }, { "epoch": 1.3593069206606545, "grad_norm": 0.2675686776638031, "learning_rate": 2.8135914419583634e-06, "loss": 0.007, "step": 80490 }, { "epoch": 1.3594757996419766, "grad_norm": 0.17176055908203125, "learning_rate": 2.8122661574424092e-06, "loss": 0.008, "step": 80500 }, { "epoch": 1.3596446786232985, "grad_norm": 0.43690812587738037, "learning_rate": 2.8109410629914103e-06, "loss": 0.0103, "step": 80510 }, { "epoch": 1.3598135576046206, "grad_norm": 0.3031787574291229, "learning_rate": 2.8096161587204885e-06, "loss": 0.0071, "step": 80520 }, { "epoch": 1.3599824365859425, "grad_norm": 0.3218194246292114, "learning_rate": 2.8082914447447484e-06, "loss": 0.0097, "step": 80530 }, { "epoch": 1.3601513155672644, "grad_norm": 0.2694239318370819, "learning_rate": 2.806966921179276e-06, "loss": 0.007, "step": 80540 }, { "epoch": 1.3603201945485865, "grad_norm": 0.5140647888183594, "learning_rate": 2.8056425881391415e-06, "loss": 0.0087, "step": 80550 }, { "epoch": 1.3604890735299084, "grad_norm": 0.18681493401527405, "learning_rate": 2.8043184457394033e-06, "loss": 0.0097, "step": 80560 }, { "epoch": 1.3606579525112306, "grad_norm": 0.3390238881111145, "learning_rate": 2.8029944940951013e-06, "loss": 0.0083, "step": 80570 }, { "epoch": 1.3608268314925525, "grad_norm": 0.44344717264175415, "learning_rate": 2.801670733321252e-06, "loss": 0.0092, "step": 80580 }, { "epoch": 1.3609957104738744, "grad_norm": 0.3993111252784729, "learning_rate": 2.8003471635328623e-06, "loss": 0.0062, "step": 80590 }, { "epoch": 1.3611645894551965, "grad_norm": 0.4897790253162384, "learning_rate": 2.799023784844921e-06, "loss": 0.0075, "step": 80600 }, { "epoch": 1.3613334684365184, "grad_norm": 0.07087881118059158, "learning_rate": 2.7977005973724025e-06, "loss": 0.0097, "step": 80610 }, { "epoch": 1.3615023474178405, "grad_norm": 0.2696091830730438, "learning_rate": 2.796377601230257e-06, "loss": 0.0094, "step": 80620 }, { "epoch": 1.3616712263991624, "grad_norm": 0.24983015656471252, "learning_rate": 2.7950547965334263e-06, "loss": 0.0079, "step": 80630 }, { "epoch": 1.3618401053804843, "grad_norm": 0.366435706615448, "learning_rate": 2.7937321833968315e-06, "loss": 0.007, "step": 80640 }, { "epoch": 1.3620089843618064, "grad_norm": 0.22313478589057922, "learning_rate": 2.792409761935381e-06, "loss": 0.0077, "step": 80650 }, { "epoch": 1.3621778633431283, "grad_norm": 0.28268635272979736, "learning_rate": 2.791087532263956e-06, "loss": 0.0082, "step": 80660 }, { "epoch": 1.3623467423244504, "grad_norm": 0.3585094213485718, "learning_rate": 2.7897654944974365e-06, "loss": 0.007, "step": 80670 }, { "epoch": 1.3625156213057723, "grad_norm": 0.22064922749996185, "learning_rate": 2.788443648750675e-06, "loss": 0.0044, "step": 80680 }, { "epoch": 1.3626845002870942, "grad_norm": 0.09901641309261322, "learning_rate": 2.7871219951385096e-06, "loss": 0.0071, "step": 80690 }, { "epoch": 1.3628533792684163, "grad_norm": 0.19533489644527435, "learning_rate": 2.7858005337757655e-06, "loss": 0.0072, "step": 80700 }, { "epoch": 1.3630222582497382, "grad_norm": 0.08750300109386444, "learning_rate": 2.784479264777243e-06, "loss": 0.0082, "step": 80710 }, { "epoch": 1.3631911372310603, "grad_norm": 0.21496929228305817, "learning_rate": 2.783158188257733e-06, "loss": 0.0096, "step": 80720 }, { "epoch": 1.3633600162123822, "grad_norm": 0.2012525051832199, "learning_rate": 2.7818373043320073e-06, "loss": 0.0085, "step": 80730 }, { "epoch": 1.3635288951937041, "grad_norm": 0.1894376128911972, "learning_rate": 2.7805166131148237e-06, "loss": 0.0048, "step": 80740 }, { "epoch": 1.3636977741750262, "grad_norm": 0.2184475213289261, "learning_rate": 2.779196114720916e-06, "loss": 0.0097, "step": 80750 }, { "epoch": 1.3638666531563481, "grad_norm": 0.43169471621513367, "learning_rate": 2.777875809265008e-06, "loss": 0.0094, "step": 80760 }, { "epoch": 1.3640355321376703, "grad_norm": 0.37339770793914795, "learning_rate": 2.7765556968618052e-06, "loss": 0.0094, "step": 80770 }, { "epoch": 1.3642044111189922, "grad_norm": 0.2585262060165405, "learning_rate": 2.775235777625995e-06, "loss": 0.0068, "step": 80780 }, { "epoch": 1.364373290100314, "grad_norm": 0.5593042969703674, "learning_rate": 2.7739160516722487e-06, "loss": 0.0099, "step": 80790 }, { "epoch": 1.3645421690816362, "grad_norm": 0.1311023235321045, "learning_rate": 2.7725965191152203e-06, "loss": 0.0051, "step": 80800 }, { "epoch": 1.364711048062958, "grad_norm": 0.5659757852554321, "learning_rate": 2.7712771800695493e-06, "loss": 0.007, "step": 80810 }, { "epoch": 1.3648799270442802, "grad_norm": 0.448679655790329, "learning_rate": 2.769958034649858e-06, "loss": 0.0057, "step": 80820 }, { "epoch": 1.365048806025602, "grad_norm": 0.28306472301483154, "learning_rate": 2.7686390829707457e-06, "loss": 0.0077, "step": 80830 }, { "epoch": 1.365217685006924, "grad_norm": 0.07558977603912354, "learning_rate": 2.767320325146802e-06, "loss": 0.0058, "step": 80840 }, { "epoch": 1.365386563988246, "grad_norm": 0.6769158244132996, "learning_rate": 2.7660017612925984e-06, "loss": 0.0072, "step": 80850 }, { "epoch": 1.365555442969568, "grad_norm": 0.1825881004333496, "learning_rate": 2.7646833915226883e-06, "loss": 0.0053, "step": 80860 }, { "epoch": 1.36572432195089, "grad_norm": 0.17207777500152588, "learning_rate": 2.7633652159516066e-06, "loss": 0.0091, "step": 80870 }, { "epoch": 1.365893200932212, "grad_norm": 0.31308019161224365, "learning_rate": 2.762047234693874e-06, "loss": 0.0111, "step": 80880 }, { "epoch": 1.366062079913534, "grad_norm": 0.30702054500579834, "learning_rate": 2.7607294478639947e-06, "loss": 0.0078, "step": 80890 }, { "epoch": 1.366230958894856, "grad_norm": 0.2869018018245697, "learning_rate": 2.7594118555764526e-06, "loss": 0.009, "step": 80900 }, { "epoch": 1.366399837876178, "grad_norm": 0.1946825087070465, "learning_rate": 2.758094457945719e-06, "loss": 0.0097, "step": 80910 }, { "epoch": 1.3665687168575, "grad_norm": 0.12846355140209198, "learning_rate": 2.756777255086246e-06, "loss": 0.0064, "step": 80920 }, { "epoch": 1.366737595838822, "grad_norm": 0.3524220287799835, "learning_rate": 2.755460247112467e-06, "loss": 0.0096, "step": 80930 }, { "epoch": 1.3669064748201438, "grad_norm": 0.2656666338443756, "learning_rate": 2.7541434341388044e-06, "loss": 0.0074, "step": 80940 }, { "epoch": 1.367075353801466, "grad_norm": 0.10681386291980743, "learning_rate": 2.7528268162796556e-06, "loss": 0.0078, "step": 80950 }, { "epoch": 1.3672442327827878, "grad_norm": 0.039235133677721024, "learning_rate": 2.751510393649406e-06, "loss": 0.0055, "step": 80960 }, { "epoch": 1.36741311176411, "grad_norm": 0.27172961831092834, "learning_rate": 2.750194166362423e-06, "loss": 0.0109, "step": 80970 }, { "epoch": 1.3675819907454319, "grad_norm": 0.31933632493019104, "learning_rate": 2.7488781345330617e-06, "loss": 0.0087, "step": 80980 }, { "epoch": 1.3677508697267537, "grad_norm": 0.5565377473831177, "learning_rate": 2.747562298275649e-06, "loss": 0.0079, "step": 80990 }, { "epoch": 1.3679197487080759, "grad_norm": 0.1780015081167221, "learning_rate": 2.746246657704504e-06, "loss": 0.0073, "step": 81000 }, { "epoch": 1.3680886276893978, "grad_norm": 0.33710458874702454, "learning_rate": 2.744931212933928e-06, "loss": 0.0091, "step": 81010 }, { "epoch": 1.3682575066707199, "grad_norm": 0.1726846694946289, "learning_rate": 2.7436159640782012e-06, "loss": 0.0074, "step": 81020 }, { "epoch": 1.3684263856520418, "grad_norm": 0.09117510169744492, "learning_rate": 2.7423009112515912e-06, "loss": 0.0082, "step": 81030 }, { "epoch": 1.3685952646333637, "grad_norm": 0.20213554799556732, "learning_rate": 2.7409860545683457e-06, "loss": 0.0091, "step": 81040 }, { "epoch": 1.3687641436146858, "grad_norm": 0.3986390233039856, "learning_rate": 2.7396713941426967e-06, "loss": 0.0094, "step": 81050 }, { "epoch": 1.3689330225960077, "grad_norm": 0.24483118951320648, "learning_rate": 2.7383569300888603e-06, "loss": 0.008, "step": 81060 }, { "epoch": 1.3691019015773298, "grad_norm": 0.5611473321914673, "learning_rate": 2.7370426625210288e-06, "loss": 0.0061, "step": 81070 }, { "epoch": 1.3692707805586517, "grad_norm": 0.3409503698348999, "learning_rate": 2.735728591553387e-06, "loss": 0.0074, "step": 81080 }, { "epoch": 1.3694396595399736, "grad_norm": 0.19742384552955627, "learning_rate": 2.734414717300096e-06, "loss": 0.0106, "step": 81090 }, { "epoch": 1.3696085385212957, "grad_norm": 0.2165084183216095, "learning_rate": 2.7331010398753054e-06, "loss": 0.0077, "step": 81100 }, { "epoch": 1.3697774175026176, "grad_norm": 0.36088958382606506, "learning_rate": 2.73178755939314e-06, "loss": 0.0086, "step": 81110 }, { "epoch": 1.3699462964839397, "grad_norm": 0.20618577301502228, "learning_rate": 2.7304742759677126e-06, "loss": 0.0051, "step": 81120 }, { "epoch": 1.3701151754652616, "grad_norm": 0.10439061373472214, "learning_rate": 2.7291611897131194e-06, "loss": 0.0114, "step": 81130 }, { "epoch": 1.3702840544465835, "grad_norm": 0.30608609318733215, "learning_rate": 2.7278483007434377e-06, "loss": 0.0062, "step": 81140 }, { "epoch": 1.3704529334279056, "grad_norm": 0.32505202293395996, "learning_rate": 2.7265356091727273e-06, "loss": 0.0082, "step": 81150 }, { "epoch": 1.3706218124092275, "grad_norm": 0.14552012085914612, "learning_rate": 2.7252231151150333e-06, "loss": 0.008, "step": 81160 }, { "epoch": 1.3707906913905497, "grad_norm": 0.28349611163139343, "learning_rate": 2.7239108186843805e-06, "loss": 0.0061, "step": 81170 }, { "epoch": 1.3709595703718715, "grad_norm": 0.20909248292446136, "learning_rate": 2.7225987199947785e-06, "loss": 0.0067, "step": 81180 }, { "epoch": 1.3711284493531934, "grad_norm": 0.4535333812236786, "learning_rate": 2.7212868191602216e-06, "loss": 0.0135, "step": 81190 }, { "epoch": 1.3712973283345156, "grad_norm": 0.11936462670564651, "learning_rate": 2.7199751162946795e-06, "loss": 0.0085, "step": 81200 }, { "epoch": 1.3714662073158375, "grad_norm": 0.22081099450588226, "learning_rate": 2.7186636115121125e-06, "loss": 0.0082, "step": 81210 }, { "epoch": 1.3716350862971596, "grad_norm": 0.21885444223880768, "learning_rate": 2.7173523049264616e-06, "loss": 0.0109, "step": 81220 }, { "epoch": 1.3718039652784815, "grad_norm": 0.08970603346824646, "learning_rate": 2.71604119665165e-06, "loss": 0.0094, "step": 81230 }, { "epoch": 1.3719728442598034, "grad_norm": 0.14779432117938995, "learning_rate": 2.714730286801581e-06, "loss": 0.0038, "step": 81240 }, { "epoch": 1.3721417232411255, "grad_norm": 0.32948732376098633, "learning_rate": 2.7134195754901455e-06, "loss": 0.0064, "step": 81250 }, { "epoch": 1.3723106022224474, "grad_norm": 0.2770748734474182, "learning_rate": 2.712109062831214e-06, "loss": 0.0062, "step": 81260 }, { "epoch": 1.3724794812037695, "grad_norm": 0.3686327338218689, "learning_rate": 2.7107987489386413e-06, "loss": 0.008, "step": 81270 }, { "epoch": 1.3726483601850914, "grad_norm": 0.23861393332481384, "learning_rate": 2.7094886339262625e-06, "loss": 0.0071, "step": 81280 }, { "epoch": 1.3728172391664133, "grad_norm": 0.4466188848018646, "learning_rate": 2.7081787179079e-06, "loss": 0.0089, "step": 81290 }, { "epoch": 1.3729861181477354, "grad_norm": 0.32807132601737976, "learning_rate": 2.7068690009973543e-06, "loss": 0.0116, "step": 81300 }, { "epoch": 1.3731549971290573, "grad_norm": 0.1738046556711197, "learning_rate": 2.7055594833084124e-06, "loss": 0.0081, "step": 81310 }, { "epoch": 1.3733238761103794, "grad_norm": 0.3138364553451538, "learning_rate": 2.704250164954839e-06, "loss": 0.0073, "step": 81320 }, { "epoch": 1.3734927550917013, "grad_norm": 0.2400394082069397, "learning_rate": 2.7029410460503853e-06, "loss": 0.0103, "step": 81330 }, { "epoch": 1.3736616340730232, "grad_norm": 0.22545240819454193, "learning_rate": 2.701632126708785e-06, "loss": 0.0039, "step": 81340 }, { "epoch": 1.3738305130543453, "grad_norm": 0.4268087148666382, "learning_rate": 2.7003234070437557e-06, "loss": 0.0052, "step": 81350 }, { "epoch": 1.3739993920356672, "grad_norm": 0.30668339133262634, "learning_rate": 2.6990148871689914e-06, "loss": 0.0092, "step": 81360 }, { "epoch": 1.3741682710169894, "grad_norm": 0.2578008472919464, "learning_rate": 2.6977065671981763e-06, "loss": 0.0091, "step": 81370 }, { "epoch": 1.3743371499983112, "grad_norm": 0.1549130231142044, "learning_rate": 2.696398447244972e-06, "loss": 0.0049, "step": 81380 }, { "epoch": 1.3745060289796331, "grad_norm": 0.24084654450416565, "learning_rate": 2.6950905274230268e-06, "loss": 0.0113, "step": 81390 }, { "epoch": 1.3746749079609553, "grad_norm": 0.2691340148448944, "learning_rate": 2.693782807845968e-06, "loss": 0.0066, "step": 81400 }, { "epoch": 1.3748437869422772, "grad_norm": 0.18176999688148499, "learning_rate": 2.6924752886274085e-06, "loss": 0.0068, "step": 81410 }, { "epoch": 1.3750126659235993, "grad_norm": 0.10544741153717041, "learning_rate": 2.6911679698809413e-06, "loss": 0.0078, "step": 81420 }, { "epoch": 1.3751815449049212, "grad_norm": 0.1447933316230774, "learning_rate": 2.689860851720145e-06, "loss": 0.0118, "step": 81430 }, { "epoch": 1.375350423886243, "grad_norm": 0.3222818076610565, "learning_rate": 2.6885539342585754e-06, "loss": 0.0085, "step": 81440 }, { "epoch": 1.3755193028675652, "grad_norm": 0.3180679976940155, "learning_rate": 2.6872472176097764e-06, "loss": 0.0082, "step": 81450 }, { "epoch": 1.375688181848887, "grad_norm": 0.5267148017883301, "learning_rate": 2.685940701887272e-06, "loss": 0.0101, "step": 81460 }, { "epoch": 1.3758570608302092, "grad_norm": 0.46726202964782715, "learning_rate": 2.684634387204571e-06, "loss": 0.0066, "step": 81470 }, { "epoch": 1.376025939811531, "grad_norm": 0.23730362951755524, "learning_rate": 2.6833282736751587e-06, "loss": 0.009, "step": 81480 }, { "epoch": 1.376194818792853, "grad_norm": 0.15751418471336365, "learning_rate": 2.6820223614125097e-06, "loss": 0.0053, "step": 81490 }, { "epoch": 1.3763636977741749, "grad_norm": 0.11508701741695404, "learning_rate": 2.6807166505300775e-06, "loss": 0.0065, "step": 81500 }, { "epoch": 1.376532576755497, "grad_norm": 0.2172517031431198, "learning_rate": 2.6794111411412997e-06, "loss": 0.0068, "step": 81510 }, { "epoch": 1.3767014557368191, "grad_norm": 0.23474900424480438, "learning_rate": 2.6781058333595957e-06, "loss": 0.0044, "step": 81520 }, { "epoch": 1.376870334718141, "grad_norm": 0.35926946997642517, "learning_rate": 2.6768007272983675e-06, "loss": 0.0103, "step": 81530 }, { "epoch": 1.377039213699463, "grad_norm": 0.22592340409755707, "learning_rate": 2.6754958230709994e-06, "loss": 0.0066, "step": 81540 }, { "epoch": 1.3772080926807848, "grad_norm": 0.29170796275138855, "learning_rate": 2.6741911207908604e-06, "loss": 0.0061, "step": 81550 }, { "epoch": 1.377376971662107, "grad_norm": 0.37557244300842285, "learning_rate": 2.6728866205712954e-06, "loss": 0.0092, "step": 81560 }, { "epoch": 1.377545850643429, "grad_norm": 0.3018769323825836, "learning_rate": 2.6715823225256386e-06, "loss": 0.008, "step": 81570 }, { "epoch": 1.377714729624751, "grad_norm": 0.484544962644577, "learning_rate": 2.6702782267672044e-06, "loss": 0.0115, "step": 81580 }, { "epoch": 1.3778836086060728, "grad_norm": 0.315108060836792, "learning_rate": 2.6689743334092886e-06, "loss": 0.0075, "step": 81590 }, { "epoch": 1.3780524875873947, "grad_norm": 0.23842541873455048, "learning_rate": 2.6676706425651737e-06, "loss": 0.0061, "step": 81600 }, { "epoch": 1.3782213665687169, "grad_norm": 0.22332468628883362, "learning_rate": 2.6663671543481162e-06, "loss": 0.0084, "step": 81610 }, { "epoch": 1.378390245550039, "grad_norm": 0.307372123003006, "learning_rate": 2.6650638688713613e-06, "loss": 0.0082, "step": 81620 }, { "epoch": 1.3785591245313609, "grad_norm": 0.27107834815979004, "learning_rate": 2.6637607862481363e-06, "loss": 0.0097, "step": 81630 }, { "epoch": 1.3787280035126828, "grad_norm": 0.399905264377594, "learning_rate": 2.66245790659165e-06, "loss": 0.0046, "step": 81640 }, { "epoch": 1.3788968824940047, "grad_norm": 0.13718627393245697, "learning_rate": 2.661155230015092e-06, "loss": 0.0087, "step": 81650 }, { "epoch": 1.3790657614753268, "grad_norm": 0.2961863577365875, "learning_rate": 2.6598527566316374e-06, "loss": 0.0081, "step": 81660 }, { "epoch": 1.379234640456649, "grad_norm": 0.1538916826248169, "learning_rate": 2.658550486554441e-06, "loss": 0.0067, "step": 81670 }, { "epoch": 1.3794035194379708, "grad_norm": 0.34538891911506653, "learning_rate": 2.657248419896642e-06, "loss": 0.0106, "step": 81680 }, { "epoch": 1.3795723984192927, "grad_norm": 0.12537364661693573, "learning_rate": 2.6559465567713583e-06, "loss": 0.0056, "step": 81690 }, { "epoch": 1.3797412774006146, "grad_norm": 0.17303290963172913, "learning_rate": 2.6546448972916945e-06, "loss": 0.0041, "step": 81700 }, { "epoch": 1.3799101563819367, "grad_norm": 0.34099069237709045, "learning_rate": 2.653343441570734e-06, "loss": 0.0068, "step": 81710 }, { "epoch": 1.3800790353632588, "grad_norm": 0.08724551647901535, "learning_rate": 2.652042189721548e-06, "loss": 0.0068, "step": 81720 }, { "epoch": 1.3802479143445807, "grad_norm": 0.27652692794799805, "learning_rate": 2.6507411418571803e-06, "loss": 0.011, "step": 81730 }, { "epoch": 1.3804167933259026, "grad_norm": 0.2666003108024597, "learning_rate": 2.6494402980906663e-06, "loss": 0.0063, "step": 81740 }, { "epoch": 1.3805856723072245, "grad_norm": 0.3755659759044647, "learning_rate": 2.6481396585350192e-06, "loss": 0.0056, "step": 81750 }, { "epoch": 1.3807545512885466, "grad_norm": 0.22701941430568695, "learning_rate": 2.646839223303236e-06, "loss": 0.0092, "step": 81760 }, { "epoch": 1.3809234302698687, "grad_norm": 0.13043935596942902, "learning_rate": 2.645538992508294e-06, "loss": 0.0034, "step": 81770 }, { "epoch": 1.3810923092511906, "grad_norm": 0.3961047828197479, "learning_rate": 2.6442389662631563e-06, "loss": 0.0082, "step": 81780 }, { "epoch": 1.3812611882325125, "grad_norm": 0.3648819625377655, "learning_rate": 2.642939144680764e-06, "loss": 0.0056, "step": 81790 }, { "epoch": 1.3814300672138344, "grad_norm": 0.15979477763175964, "learning_rate": 2.6416395278740446e-06, "loss": 0.0065, "step": 81800 }, { "epoch": 1.3815989461951566, "grad_norm": 0.7957839965820312, "learning_rate": 2.6403401159559032e-06, "loss": 0.0076, "step": 81810 }, { "epoch": 1.3817678251764787, "grad_norm": 0.20423194766044617, "learning_rate": 2.63904090903923e-06, "loss": 0.0097, "step": 81820 }, { "epoch": 1.3819367041578006, "grad_norm": 0.2838907241821289, "learning_rate": 2.6377419072368976e-06, "loss": 0.0057, "step": 81830 }, { "epoch": 1.3821055831391225, "grad_norm": 0.1225535050034523, "learning_rate": 2.6364431106617616e-06, "loss": 0.0095, "step": 81840 }, { "epoch": 1.3822744621204444, "grad_norm": 0.23411224782466888, "learning_rate": 2.6351445194266545e-06, "loss": 0.0056, "step": 81850 }, { "epoch": 1.3824433411017665, "grad_norm": 0.18399879336357117, "learning_rate": 2.6338461336443965e-06, "loss": 0.0063, "step": 81860 }, { "epoch": 1.3826122200830886, "grad_norm": 0.3943118155002594, "learning_rate": 2.6325479534277893e-06, "loss": 0.0101, "step": 81870 }, { "epoch": 1.3827810990644105, "grad_norm": 0.350994348526001, "learning_rate": 2.631249978889614e-06, "loss": 0.0098, "step": 81880 }, { "epoch": 1.3829499780457324, "grad_norm": 0.2962242066860199, "learning_rate": 2.629952210142637e-06, "loss": 0.0065, "step": 81890 }, { "epoch": 1.3831188570270543, "grad_norm": 0.40022793412208557, "learning_rate": 2.628654647299604e-06, "loss": 0.0095, "step": 81900 }, { "epoch": 1.3832877360083764, "grad_norm": 0.19619962573051453, "learning_rate": 2.627357290473245e-06, "loss": 0.0076, "step": 81910 }, { "epoch": 1.3834566149896985, "grad_norm": 0.15769892930984497, "learning_rate": 2.626060139776272e-06, "loss": 0.0064, "step": 81920 }, { "epoch": 1.3836254939710204, "grad_norm": 0.09712288528680801, "learning_rate": 2.6247631953213747e-06, "loss": 0.0111, "step": 81930 }, { "epoch": 1.3837943729523423, "grad_norm": 1.0053421258926392, "learning_rate": 2.623466457221232e-06, "loss": 0.0112, "step": 81940 }, { "epoch": 1.3839632519336642, "grad_norm": 0.15988652408123016, "learning_rate": 2.622169925588499e-06, "loss": 0.0062, "step": 81950 }, { "epoch": 1.3841321309149863, "grad_norm": 0.4745902121067047, "learning_rate": 2.620873600535819e-06, "loss": 0.0103, "step": 81960 }, { "epoch": 1.3843010098963084, "grad_norm": 0.2433343380689621, "learning_rate": 2.6195774821758086e-06, "loss": 0.0134, "step": 81970 }, { "epoch": 1.3844698888776303, "grad_norm": 0.22221051156520844, "learning_rate": 2.6182815706210736e-06, "loss": 0.0075, "step": 81980 }, { "epoch": 1.3846387678589522, "grad_norm": 0.27765515446662903, "learning_rate": 2.6169858659841997e-06, "loss": 0.0089, "step": 81990 }, { "epoch": 1.3848076468402741, "grad_norm": 0.3135737180709839, "learning_rate": 2.6156903683777547e-06, "loss": 0.0045, "step": 82000 }, { "epoch": 1.3849765258215962, "grad_norm": 0.4153648316860199, "learning_rate": 2.6143950779142875e-06, "loss": 0.0101, "step": 82010 }, { "epoch": 1.3851454048029184, "grad_norm": 0.24776718020439148, "learning_rate": 2.613099994706331e-06, "loss": 0.0062, "step": 82020 }, { "epoch": 1.3853142837842403, "grad_norm": 0.13808627426624298, "learning_rate": 2.6118051188663972e-06, "loss": 0.0077, "step": 82030 }, { "epoch": 1.3854831627655622, "grad_norm": 0.44806942343711853, "learning_rate": 2.6105104505069834e-06, "loss": 0.0071, "step": 82040 }, { "epoch": 1.385652041746884, "grad_norm": 0.2077297568321228, "learning_rate": 2.6092159897405683e-06, "loss": 0.0093, "step": 82050 }, { "epoch": 1.3858209207282062, "grad_norm": 0.20162436366081238, "learning_rate": 2.6079217366796077e-06, "loss": 0.0158, "step": 82060 }, { "epoch": 1.385989799709528, "grad_norm": 0.24410247802734375, "learning_rate": 2.606627691436545e-06, "loss": 0.0068, "step": 82070 }, { "epoch": 1.3861586786908502, "grad_norm": 0.16704806685447693, "learning_rate": 2.6053338541238045e-06, "loss": 0.0074, "step": 82080 }, { "epoch": 1.386327557672172, "grad_norm": 0.07521862536668777, "learning_rate": 2.604040224853792e-06, "loss": 0.0081, "step": 82090 }, { "epoch": 1.386496436653494, "grad_norm": 0.18556928634643555, "learning_rate": 2.6027468037388924e-06, "loss": 0.0065, "step": 82100 }, { "epoch": 1.386665315634816, "grad_norm": 0.48468244075775146, "learning_rate": 2.6014535908914763e-06, "loss": 0.0072, "step": 82110 }, { "epoch": 1.386834194616138, "grad_norm": 0.32812562584877014, "learning_rate": 2.6001605864238943e-06, "loss": 0.0078, "step": 82120 }, { "epoch": 1.3870030735974601, "grad_norm": 0.3885965049266815, "learning_rate": 2.5988677904484803e-06, "loss": 0.006, "step": 82130 }, { "epoch": 1.387171952578782, "grad_norm": 0.23656344413757324, "learning_rate": 2.597575203077549e-06, "loss": 0.0063, "step": 82140 }, { "epoch": 1.387340831560104, "grad_norm": 0.26947131752967834, "learning_rate": 2.596282824423398e-06, "loss": 0.0083, "step": 82150 }, { "epoch": 1.387509710541426, "grad_norm": 0.17703098058700562, "learning_rate": 2.5949906545983046e-06, "loss": 0.0086, "step": 82160 }, { "epoch": 1.387678589522748, "grad_norm": 0.23775514960289001, "learning_rate": 2.5936986937145312e-06, "loss": 0.0104, "step": 82170 }, { "epoch": 1.38784746850407, "grad_norm": 0.27403825521469116, "learning_rate": 2.5924069418843185e-06, "loss": 0.0076, "step": 82180 }, { "epoch": 1.388016347485392, "grad_norm": 0.29439789056777954, "learning_rate": 2.59111539921989e-06, "loss": 0.0064, "step": 82190 }, { "epoch": 1.3881852264667138, "grad_norm": 0.2034139633178711, "learning_rate": 2.5898240658334534e-06, "loss": 0.0053, "step": 82200 }, { "epoch": 1.388354105448036, "grad_norm": 0.3413747549057007, "learning_rate": 2.588532941837198e-06, "loss": 0.0044, "step": 82210 }, { "epoch": 1.3885229844293578, "grad_norm": 0.16253314912319183, "learning_rate": 2.58724202734329e-06, "loss": 0.0086, "step": 82220 }, { "epoch": 1.38869186341068, "grad_norm": 0.23342323303222656, "learning_rate": 2.585951322463882e-06, "loss": 0.0056, "step": 82230 }, { "epoch": 1.3888607423920019, "grad_norm": 0.2460262030363083, "learning_rate": 2.5846608273111058e-06, "loss": 0.0073, "step": 82240 }, { "epoch": 1.3890296213733238, "grad_norm": 0.36670583486557007, "learning_rate": 2.583370541997082e-06, "loss": 0.0081, "step": 82250 }, { "epoch": 1.3891985003546459, "grad_norm": 0.0911441370844841, "learning_rate": 2.582080466633903e-06, "loss": 0.0066, "step": 82260 }, { "epoch": 1.3893673793359678, "grad_norm": 0.1388777792453766, "learning_rate": 2.5807906013336476e-06, "loss": 0.0052, "step": 82270 }, { "epoch": 1.3895362583172899, "grad_norm": 0.14588049054145813, "learning_rate": 2.579500946208377e-06, "loss": 0.0075, "step": 82280 }, { "epoch": 1.3897051372986118, "grad_norm": 0.37802809476852417, "learning_rate": 2.5782115013701346e-06, "loss": 0.0068, "step": 82290 }, { "epoch": 1.3898740162799337, "grad_norm": 0.3763407766819, "learning_rate": 2.5769222669309412e-06, "loss": 0.0056, "step": 82300 }, { "epoch": 1.3900428952612558, "grad_norm": 0.19702982902526855, "learning_rate": 2.5756332430028036e-06, "loss": 0.0061, "step": 82310 }, { "epoch": 1.3902117742425777, "grad_norm": 0.08519946038722992, "learning_rate": 2.5743444296977093e-06, "loss": 0.0061, "step": 82320 }, { "epoch": 1.3903806532238998, "grad_norm": 0.18504340946674347, "learning_rate": 2.573055827127629e-06, "loss": 0.0053, "step": 82330 }, { "epoch": 1.3905495322052217, "grad_norm": 0.14275266230106354, "learning_rate": 2.5717674354045097e-06, "loss": 0.0079, "step": 82340 }, { "epoch": 1.3907184111865436, "grad_norm": 0.26911866664886475, "learning_rate": 2.570479254640286e-06, "loss": 0.0098, "step": 82350 }, { "epoch": 1.3908872901678657, "grad_norm": 0.3207036256790161, "learning_rate": 2.569191284946868e-06, "loss": 0.0072, "step": 82360 }, { "epoch": 1.3910561691491876, "grad_norm": 0.16299007833003998, "learning_rate": 2.56790352643616e-06, "loss": 0.005, "step": 82370 }, { "epoch": 1.3912250481305097, "grad_norm": 0.3831333518028259, "learning_rate": 2.566615979220032e-06, "loss": 0.0122, "step": 82380 }, { "epoch": 1.3913939271118316, "grad_norm": 0.10516313463449478, "learning_rate": 2.5653286434103443e-06, "loss": 0.0064, "step": 82390 }, { "epoch": 1.3915628060931535, "grad_norm": 0.2731698155403137, "learning_rate": 2.564041519118939e-06, "loss": 0.0064, "step": 82400 }, { "epoch": 1.3917316850744756, "grad_norm": 0.29186099767684937, "learning_rate": 2.5627546064576393e-06, "loss": 0.0062, "step": 82410 }, { "epoch": 1.3919005640557975, "grad_norm": 0.1146743968129158, "learning_rate": 2.5614679055382453e-06, "loss": 0.008, "step": 82420 }, { "epoch": 1.3920694430371197, "grad_norm": 0.08431839942932129, "learning_rate": 2.5601814164725436e-06, "loss": 0.0089, "step": 82430 }, { "epoch": 1.3922383220184416, "grad_norm": 0.32889121770858765, "learning_rate": 2.5588951393723028e-06, "loss": 0.0064, "step": 82440 }, { "epoch": 1.3924072009997635, "grad_norm": 0.2961125671863556, "learning_rate": 2.557609074349272e-06, "loss": 0.0094, "step": 82450 }, { "epoch": 1.3925760799810856, "grad_norm": 0.08020351082086563, "learning_rate": 2.556323221515179e-06, "loss": 0.0068, "step": 82460 }, { "epoch": 1.3927449589624075, "grad_norm": 0.17797069251537323, "learning_rate": 2.555037580981736e-06, "loss": 0.0097, "step": 82470 }, { "epoch": 1.3929138379437296, "grad_norm": 0.18300007283687592, "learning_rate": 2.5537521528606347e-06, "loss": 0.006, "step": 82480 }, { "epoch": 1.3930827169250515, "grad_norm": 0.15590257942676544, "learning_rate": 2.5524669372635547e-06, "loss": 0.0071, "step": 82490 }, { "epoch": 1.3932515959063734, "grad_norm": 0.5660548806190491, "learning_rate": 2.551181934302151e-06, "loss": 0.0089, "step": 82500 }, { "epoch": 1.3934204748876955, "grad_norm": 0.16240012645721436, "learning_rate": 2.549897144088059e-06, "loss": 0.0058, "step": 82510 }, { "epoch": 1.3935893538690174, "grad_norm": 0.3108969032764435, "learning_rate": 2.5486125667328987e-06, "loss": 0.0068, "step": 82520 }, { "epoch": 1.3937582328503395, "grad_norm": 0.23278281092643738, "learning_rate": 2.5473282023482726e-06, "loss": 0.0063, "step": 82530 }, { "epoch": 1.3939271118316614, "grad_norm": 0.2610183656215668, "learning_rate": 2.546044051045764e-06, "loss": 0.0104, "step": 82540 }, { "epoch": 1.3940959908129833, "grad_norm": 0.29768791794776917, "learning_rate": 2.544760112936933e-06, "loss": 0.0075, "step": 82550 }, { "epoch": 1.3942648697943054, "grad_norm": 0.066364586353302, "learning_rate": 2.543476388133328e-06, "loss": 0.0067, "step": 82560 }, { "epoch": 1.3944337487756273, "grad_norm": 0.2505658268928528, "learning_rate": 2.5421928767464744e-06, "loss": 0.0138, "step": 82570 }, { "epoch": 1.3946026277569494, "grad_norm": 0.3158978819847107, "learning_rate": 2.5409095788878823e-06, "loss": 0.0072, "step": 82580 }, { "epoch": 1.3947715067382713, "grad_norm": 0.19746050238609314, "learning_rate": 2.5396264946690374e-06, "loss": 0.0099, "step": 82590 }, { "epoch": 1.3949403857195932, "grad_norm": 0.3915213346481323, "learning_rate": 2.5383436242014153e-06, "loss": 0.007, "step": 82600 }, { "epoch": 1.3951092647009153, "grad_norm": 0.2109527289867401, "learning_rate": 2.5370609675964677e-06, "loss": 0.0059, "step": 82610 }, { "epoch": 1.3952781436822372, "grad_norm": 0.31337445974349976, "learning_rate": 2.535778524965629e-06, "loss": 0.0066, "step": 82620 }, { "epoch": 1.3954470226635594, "grad_norm": 0.17798732221126556, "learning_rate": 2.5344962964203123e-06, "loss": 0.0091, "step": 82630 }, { "epoch": 1.3956159016448813, "grad_norm": 0.5474833250045776, "learning_rate": 2.533214282071916e-06, "loss": 0.0071, "step": 82640 }, { "epoch": 1.3957847806262031, "grad_norm": 0.11690893769264221, "learning_rate": 2.5319324820318194e-06, "loss": 0.0039, "step": 82650 }, { "epoch": 1.3959536596075253, "grad_norm": 0.5122937560081482, "learning_rate": 2.530650896411382e-06, "loss": 0.0081, "step": 82660 }, { "epoch": 1.3961225385888472, "grad_norm": 0.4612899720668793, "learning_rate": 2.5293695253219417e-06, "loss": 0.0103, "step": 82670 }, { "epoch": 1.3962914175701693, "grad_norm": 0.3874357044696808, "learning_rate": 2.528088368874824e-06, "loss": 0.0078, "step": 82680 }, { "epoch": 1.3964602965514912, "grad_norm": 0.36767494678497314, "learning_rate": 2.5268074271813303e-06, "loss": 0.0087, "step": 82690 }, { "epoch": 1.396629175532813, "grad_norm": 0.1853298395872116, "learning_rate": 2.52552670035275e-06, "loss": 0.0061, "step": 82700 }, { "epoch": 1.3967980545141352, "grad_norm": 0.3122617304325104, "learning_rate": 2.524246188500342e-06, "loss": 0.0056, "step": 82710 }, { "epoch": 1.396966933495457, "grad_norm": 0.27085259556770325, "learning_rate": 2.5229658917353617e-06, "loss": 0.0053, "step": 82720 }, { "epoch": 1.3971358124767792, "grad_norm": 0.20155014097690582, "learning_rate": 2.5216858101690347e-06, "loss": 0.0086, "step": 82730 }, { "epoch": 1.397304691458101, "grad_norm": 0.5412729978561401, "learning_rate": 2.5204059439125727e-06, "loss": 0.0053, "step": 82740 }, { "epoch": 1.397473570439423, "grad_norm": 0.28285086154937744, "learning_rate": 2.5191262930771653e-06, "loss": 0.0078, "step": 82750 }, { "epoch": 1.3976424494207451, "grad_norm": 0.1600205898284912, "learning_rate": 2.517846857773987e-06, "loss": 0.0047, "step": 82760 }, { "epoch": 1.397811328402067, "grad_norm": 0.2865801453590393, "learning_rate": 2.51656763811419e-06, "loss": 0.0098, "step": 82770 }, { "epoch": 1.3979802073833891, "grad_norm": 0.22366830706596375, "learning_rate": 2.5152886342089143e-06, "loss": 0.0066, "step": 82780 }, { "epoch": 1.398149086364711, "grad_norm": 0.17909985780715942, "learning_rate": 2.514009846169271e-06, "loss": 0.0071, "step": 82790 }, { "epoch": 1.398317965346033, "grad_norm": 0.25217556953430176, "learning_rate": 2.5127312741063613e-06, "loss": 0.0078, "step": 82800 }, { "epoch": 1.398486844327355, "grad_norm": 0.21962983906269073, "learning_rate": 2.511452918131263e-06, "loss": 0.0084, "step": 82810 }, { "epoch": 1.398655723308677, "grad_norm": 0.4597854018211365, "learning_rate": 2.510174778355038e-06, "loss": 0.0085, "step": 82820 }, { "epoch": 1.398824602289999, "grad_norm": 0.4005381166934967, "learning_rate": 2.5088968548887273e-06, "loss": 0.0083, "step": 82830 }, { "epoch": 1.398993481271321, "grad_norm": 0.16868160665035248, "learning_rate": 2.507619147843353e-06, "loss": 0.0054, "step": 82840 }, { "epoch": 1.3991623602526428, "grad_norm": 0.18219229578971863, "learning_rate": 2.5063416573299205e-06, "loss": 0.0075, "step": 82850 }, { "epoch": 1.399331239233965, "grad_norm": 0.11724040657281876, "learning_rate": 2.5050643834594163e-06, "loss": 0.0066, "step": 82860 }, { "epoch": 1.3995001182152869, "grad_norm": 0.16602471470832825, "learning_rate": 2.5037873263428023e-06, "loss": 0.0102, "step": 82870 }, { "epoch": 1.399668997196609, "grad_norm": 0.29444029927253723, "learning_rate": 2.5025104860910297e-06, "loss": 0.006, "step": 82880 }, { "epoch": 1.3998378761779309, "grad_norm": 0.40762996673583984, "learning_rate": 2.5012338628150258e-06, "loss": 0.0072, "step": 82890 }, { "epoch": 1.4000067551592528, "grad_norm": 0.24955341219902039, "learning_rate": 2.499957456625703e-06, "loss": 0.0061, "step": 82900 }, { "epoch": 1.4001756341405749, "grad_norm": 0.22140957415103912, "learning_rate": 2.4986812676339485e-06, "loss": 0.006, "step": 82910 }, { "epoch": 1.4003445131218968, "grad_norm": 0.5137169361114502, "learning_rate": 2.4974052959506364e-06, "loss": 0.0084, "step": 82920 }, { "epoch": 1.400513392103219, "grad_norm": 0.3484666049480438, "learning_rate": 2.4961295416866193e-06, "loss": 0.0098, "step": 82930 }, { "epoch": 1.4006822710845408, "grad_norm": 0.20155397057533264, "learning_rate": 2.4948540049527325e-06, "loss": 0.008, "step": 82940 }, { "epoch": 1.4008511500658627, "grad_norm": 0.16981098055839539, "learning_rate": 2.4935786858597916e-06, "loss": 0.0146, "step": 82950 }, { "epoch": 1.4010200290471848, "grad_norm": 0.17259225249290466, "learning_rate": 2.4923035845185922e-06, "loss": 0.008, "step": 82960 }, { "epoch": 1.4011889080285067, "grad_norm": 0.33194634318351746, "learning_rate": 2.4910287010399132e-06, "loss": 0.005, "step": 82970 }, { "epoch": 1.4013577870098288, "grad_norm": 0.4550047218799591, "learning_rate": 2.4897540355345118e-06, "loss": 0.0112, "step": 82980 }, { "epoch": 1.4015266659911507, "grad_norm": 0.2401055544614792, "learning_rate": 2.4884795881131315e-06, "loss": 0.0061, "step": 82990 }, { "epoch": 1.4016955449724726, "grad_norm": 0.25898486375808716, "learning_rate": 2.487205358886488e-06, "loss": 0.0063, "step": 83000 }, { "epoch": 1.4018644239537947, "grad_norm": 0.362682968378067, "learning_rate": 2.4859313479652865e-06, "loss": 0.0096, "step": 83010 }, { "epoch": 1.4020333029351166, "grad_norm": 0.05593141168355942, "learning_rate": 2.4846575554602082e-06, "loss": 0.0079, "step": 83020 }, { "epoch": 1.4022021819164388, "grad_norm": 0.13092295825481415, "learning_rate": 2.48338398148192e-06, "loss": 0.0058, "step": 83030 }, { "epoch": 1.4023710608977606, "grad_norm": 0.08838517963886261, "learning_rate": 2.4821106261410633e-06, "loss": 0.0055, "step": 83040 }, { "epoch": 1.4025399398790825, "grad_norm": 0.05220460146665573, "learning_rate": 2.480837489548265e-06, "loss": 0.007, "step": 83050 }, { "epoch": 1.4027088188604047, "grad_norm": 0.3093039095401764, "learning_rate": 2.479564571814133e-06, "loss": 0.0087, "step": 83060 }, { "epoch": 1.4028776978417266, "grad_norm": 0.20085160434246063, "learning_rate": 2.4782918730492555e-06, "loss": 0.0079, "step": 83070 }, { "epoch": 1.4030465768230487, "grad_norm": 0.16859129071235657, "learning_rate": 2.4770193933642e-06, "loss": 0.008, "step": 83080 }, { "epoch": 1.4032154558043706, "grad_norm": 0.4469100832939148, "learning_rate": 2.475747132869518e-06, "loss": 0.0106, "step": 83090 }, { "epoch": 1.4033843347856925, "grad_norm": 0.344674289226532, "learning_rate": 2.4744750916757397e-06, "loss": 0.0074, "step": 83100 }, { "epoch": 1.4035532137670146, "grad_norm": 0.2567276954650879, "learning_rate": 2.473203269893379e-06, "loss": 0.0072, "step": 83110 }, { "epoch": 1.4037220927483365, "grad_norm": 0.15838047862052917, "learning_rate": 2.471931667632924e-06, "loss": 0.0083, "step": 83120 }, { "epoch": 1.4038909717296586, "grad_norm": 0.17316100001335144, "learning_rate": 2.4706602850048515e-06, "loss": 0.0068, "step": 83130 }, { "epoch": 1.4040598507109805, "grad_norm": 0.21628321707248688, "learning_rate": 2.469389122119616e-06, "loss": 0.0054, "step": 83140 }, { "epoch": 1.4042287296923024, "grad_norm": 0.3366624116897583, "learning_rate": 2.468118179087654e-06, "loss": 0.0055, "step": 83150 }, { "epoch": 1.4043976086736245, "grad_norm": 0.23446649312973022, "learning_rate": 2.466847456019379e-06, "loss": 0.0046, "step": 83160 }, { "epoch": 1.4045664876549464, "grad_norm": 0.15828397870063782, "learning_rate": 2.4655769530251894e-06, "loss": 0.0042, "step": 83170 }, { "epoch": 1.4047353666362685, "grad_norm": 0.28422799706459045, "learning_rate": 2.4643066702154637e-06, "loss": 0.0059, "step": 83180 }, { "epoch": 1.4049042456175904, "grad_norm": 0.2590557038784027, "learning_rate": 2.4630366077005618e-06, "loss": 0.0058, "step": 83190 }, { "epoch": 1.4050731245989123, "grad_norm": 0.2913511097431183, "learning_rate": 2.4617667655908223e-06, "loss": 0.0068, "step": 83200 }, { "epoch": 1.4052420035802344, "grad_norm": 0.13895103335380554, "learning_rate": 2.4604971439965668e-06, "loss": 0.006, "step": 83210 }, { "epoch": 1.4054108825615563, "grad_norm": 0.10887869447469711, "learning_rate": 2.459227743028097e-06, "loss": 0.0047, "step": 83220 }, { "epoch": 1.4055797615428784, "grad_norm": 0.13688361644744873, "learning_rate": 2.4579585627956967e-06, "loss": 0.006, "step": 83230 }, { "epoch": 1.4057486405242003, "grad_norm": 0.3831477165222168, "learning_rate": 2.4566896034096253e-06, "loss": 0.0064, "step": 83240 }, { "epoch": 1.4059175195055222, "grad_norm": 0.20957957208156586, "learning_rate": 2.45542086498013e-06, "loss": 0.0054, "step": 83250 }, { "epoch": 1.4060863984868444, "grad_norm": 0.2947638928890228, "learning_rate": 2.454152347617434e-06, "loss": 0.0105, "step": 83260 }, { "epoch": 1.4062552774681663, "grad_norm": 0.1560084968805313, "learning_rate": 2.4528840514317455e-06, "loss": 0.0066, "step": 83270 }, { "epoch": 1.4064241564494884, "grad_norm": 0.11629830300807953, "learning_rate": 2.4516159765332474e-06, "loss": 0.0044, "step": 83280 }, { "epoch": 1.4065930354308103, "grad_norm": 0.29040127992630005, "learning_rate": 2.450348123032109e-06, "loss": 0.0063, "step": 83290 }, { "epoch": 1.4067619144121322, "grad_norm": 0.3020731508731842, "learning_rate": 2.449080491038478e-06, "loss": 0.0063, "step": 83300 }, { "epoch": 1.4069307933934543, "grad_norm": 0.35332775115966797, "learning_rate": 2.447813080662483e-06, "loss": 0.0078, "step": 83310 }, { "epoch": 1.4070996723747762, "grad_norm": 0.1729866862297058, "learning_rate": 2.446545892014234e-06, "loss": 0.0071, "step": 83320 }, { "epoch": 1.4072685513560983, "grad_norm": 0.1476905792951584, "learning_rate": 2.4452789252038218e-06, "loss": 0.0073, "step": 83330 }, { "epoch": 1.4074374303374202, "grad_norm": 0.17938823997974396, "learning_rate": 2.4440121803413153e-06, "loss": 0.006, "step": 83340 }, { "epoch": 1.407606309318742, "grad_norm": 0.4515429139137268, "learning_rate": 2.4427456575367697e-06, "loss": 0.0091, "step": 83350 }, { "epoch": 1.4077751883000642, "grad_norm": 0.324974000453949, "learning_rate": 2.441479356900214e-06, "loss": 0.0056, "step": 83360 }, { "epoch": 1.407944067281386, "grad_norm": 0.23433233797550201, "learning_rate": 2.4402132785416615e-06, "loss": 0.0063, "step": 83370 }, { "epoch": 1.4081129462627082, "grad_norm": 0.27897942066192627, "learning_rate": 2.4389474225711072e-06, "loss": 0.0047, "step": 83380 }, { "epoch": 1.4082818252440301, "grad_norm": 0.3301739990711212, "learning_rate": 2.4376817890985255e-06, "loss": 0.0063, "step": 83390 }, { "epoch": 1.408450704225352, "grad_norm": 0.21431997418403625, "learning_rate": 2.436416378233873e-06, "loss": 0.0106, "step": 83400 }, { "epoch": 1.4086195832066741, "grad_norm": 0.25603342056274414, "learning_rate": 2.4351511900870815e-06, "loss": 0.0061, "step": 83410 }, { "epoch": 1.408788462187996, "grad_norm": 0.1207921952009201, "learning_rate": 2.4338862247680705e-06, "loss": 0.0043, "step": 83420 }, { "epoch": 1.4089573411693181, "grad_norm": 0.1437235325574875, "learning_rate": 2.432621482386736e-06, "loss": 0.01, "step": 83430 }, { "epoch": 1.40912622015064, "grad_norm": 0.30709829926490784, "learning_rate": 2.431356963052956e-06, "loss": 0.0067, "step": 83440 }, { "epoch": 1.409295099131962, "grad_norm": 0.3702941834926605, "learning_rate": 2.430092666876589e-06, "loss": 0.0085, "step": 83450 }, { "epoch": 1.409463978113284, "grad_norm": 0.16103219985961914, "learning_rate": 2.428828593967474e-06, "loss": 0.0072, "step": 83460 }, { "epoch": 1.409632857094606, "grad_norm": 0.2831626236438751, "learning_rate": 2.4275647444354307e-06, "loss": 0.0076, "step": 83470 }, { "epoch": 1.409801736075928, "grad_norm": 0.26668626070022583, "learning_rate": 2.4263011183902603e-06, "loss": 0.0071, "step": 83480 }, { "epoch": 1.40997061505725, "grad_norm": 0.24571749567985535, "learning_rate": 2.4250377159417403e-06, "loss": 0.0078, "step": 83490 }, { "epoch": 1.4101394940385719, "grad_norm": 0.2535814046859741, "learning_rate": 2.423774537199634e-06, "loss": 0.0101, "step": 83500 }, { "epoch": 1.410308373019894, "grad_norm": 0.19020313024520874, "learning_rate": 2.4225115822736835e-06, "loss": 0.0053, "step": 83510 }, { "epoch": 1.4104772520012159, "grad_norm": 0.12064613401889801, "learning_rate": 2.4212488512736125e-06, "loss": 0.0068, "step": 83520 }, { "epoch": 1.410646130982538, "grad_norm": 0.3842843174934387, "learning_rate": 2.419986344309121e-06, "loss": 0.0059, "step": 83530 }, { "epoch": 1.41081500996386, "grad_norm": 0.23791496455669403, "learning_rate": 2.418724061489893e-06, "loss": 0.0057, "step": 83540 }, { "epoch": 1.4109838889451818, "grad_norm": 0.26797252893447876, "learning_rate": 2.417462002925594e-06, "loss": 0.0092, "step": 83550 }, { "epoch": 1.411152767926504, "grad_norm": 0.18075278401374817, "learning_rate": 2.416200168725868e-06, "loss": 0.0087, "step": 83560 }, { "epoch": 1.4113216469078258, "grad_norm": 0.3412936329841614, "learning_rate": 2.4149385590003407e-06, "loss": 0.0081, "step": 83570 }, { "epoch": 1.411490525889148, "grad_norm": 0.3874068856239319, "learning_rate": 2.4136771738586157e-06, "loss": 0.0077, "step": 83580 }, { "epoch": 1.4116594048704698, "grad_norm": 0.6774266362190247, "learning_rate": 2.4124160134102815e-06, "loss": 0.0129, "step": 83590 }, { "epoch": 1.4118282838517917, "grad_norm": 0.2994675040245056, "learning_rate": 2.4111550777649045e-06, "loss": 0.0061, "step": 83600 }, { "epoch": 1.4119971628331138, "grad_norm": 0.11088579893112183, "learning_rate": 2.4098943670320297e-06, "loss": 0.0065, "step": 83610 }, { "epoch": 1.4121660418144357, "grad_norm": 0.13148866593837738, "learning_rate": 2.4086338813211844e-06, "loss": 0.0056, "step": 83620 }, { "epoch": 1.4123349207957578, "grad_norm": 0.20246046781539917, "learning_rate": 2.407373620741879e-06, "loss": 0.0069, "step": 83630 }, { "epoch": 1.4125037997770797, "grad_norm": 0.13691578805446625, "learning_rate": 2.4061135854036016e-06, "loss": 0.011, "step": 83640 }, { "epoch": 1.4126726787584016, "grad_norm": 0.29529622197151184, "learning_rate": 2.404853775415818e-06, "loss": 0.0078, "step": 83650 }, { "epoch": 1.4128415577397238, "grad_norm": 0.0651887059211731, "learning_rate": 2.4035941908879794e-06, "loss": 0.0054, "step": 83660 }, { "epoch": 1.4130104367210456, "grad_norm": 0.15999209880828857, "learning_rate": 2.402334831929516e-06, "loss": 0.0072, "step": 83670 }, { "epoch": 1.4131793157023678, "grad_norm": 0.27516505122184753, "learning_rate": 2.4010756986498364e-06, "loss": 0.0059, "step": 83680 }, { "epoch": 1.4133481946836897, "grad_norm": 0.503521740436554, "learning_rate": 2.399816791158332e-06, "loss": 0.0072, "step": 83690 }, { "epoch": 1.4135170736650116, "grad_norm": 0.2756020724773407, "learning_rate": 2.3985581095643736e-06, "loss": 0.0062, "step": 83700 }, { "epoch": 1.4136859526463337, "grad_norm": 0.16988185048103333, "learning_rate": 2.3972996539773115e-06, "loss": 0.0092, "step": 83710 }, { "epoch": 1.4138548316276556, "grad_norm": 0.5626576542854309, "learning_rate": 2.39604142450648e-06, "loss": 0.0082, "step": 83720 }, { "epoch": 1.4140237106089777, "grad_norm": 0.24524444341659546, "learning_rate": 2.3947834212611875e-06, "loss": 0.0074, "step": 83730 }, { "epoch": 1.4141925895902996, "grad_norm": 0.0892842561006546, "learning_rate": 2.393525644350727e-06, "loss": 0.0074, "step": 83740 }, { "epoch": 1.4143614685716215, "grad_norm": 0.2327607423067093, "learning_rate": 2.3922680938843716e-06, "loss": 0.0077, "step": 83750 }, { "epoch": 1.4145303475529436, "grad_norm": 0.09104054421186447, "learning_rate": 2.391010769971377e-06, "loss": 0.0078, "step": 83760 }, { "epoch": 1.4146992265342655, "grad_norm": 0.3040735423564911, "learning_rate": 2.3897536727209708e-06, "loss": 0.0046, "step": 83770 }, { "epoch": 1.4148681055155876, "grad_norm": 0.546439528465271, "learning_rate": 2.3884968022423697e-06, "loss": 0.0113, "step": 83780 }, { "epoch": 1.4150369844969095, "grad_norm": 0.3515140414237976, "learning_rate": 2.387240158644768e-06, "loss": 0.01, "step": 83790 }, { "epoch": 1.4152058634782314, "grad_norm": 0.16090501844882965, "learning_rate": 2.3859837420373383e-06, "loss": 0.0037, "step": 83800 }, { "epoch": 1.4153747424595535, "grad_norm": 0.21741217374801636, "learning_rate": 2.384727552529236e-06, "loss": 0.0117, "step": 83810 }, { "epoch": 1.4155436214408754, "grad_norm": 0.07109465450048447, "learning_rate": 2.3834715902295957e-06, "loss": 0.0063, "step": 83820 }, { "epoch": 1.4157125004221975, "grad_norm": 0.2350197434425354, "learning_rate": 2.382215855247532e-06, "loss": 0.0074, "step": 83830 }, { "epoch": 1.4158813794035194, "grad_norm": 0.05693034082651138, "learning_rate": 2.38096034769214e-06, "loss": 0.0044, "step": 83840 }, { "epoch": 1.4160502583848413, "grad_norm": 0.20710204541683197, "learning_rate": 2.3797050676724977e-06, "loss": 0.0066, "step": 83850 }, { "epoch": 1.4162191373661634, "grad_norm": 0.11281237006187439, "learning_rate": 2.3784500152976565e-06, "loss": 0.0086, "step": 83860 }, { "epoch": 1.4163880163474853, "grad_norm": 0.20696328580379486, "learning_rate": 2.377195190676654e-06, "loss": 0.0104, "step": 83870 }, { "epoch": 1.4165568953288075, "grad_norm": 0.2178099900484085, "learning_rate": 2.3759405939185066e-06, "loss": 0.0052, "step": 83880 }, { "epoch": 1.4167257743101294, "grad_norm": 0.1714048683643341, "learning_rate": 2.3746862251322123e-06, "loss": 0.006, "step": 83890 }, { "epoch": 1.4168946532914513, "grad_norm": 0.1985642910003662, "learning_rate": 2.373432084426744e-06, "loss": 0.0081, "step": 83900 }, { "epoch": 1.4170635322727734, "grad_norm": 0.20956426858901978, "learning_rate": 2.3721781719110598e-06, "loss": 0.0091, "step": 83910 }, { "epoch": 1.4172324112540953, "grad_norm": 0.2243034988641739, "learning_rate": 2.370924487694097e-06, "loss": 0.0065, "step": 83920 }, { "epoch": 1.4174012902354174, "grad_norm": 0.15497854351997375, "learning_rate": 2.369671031884772e-06, "loss": 0.0098, "step": 83930 }, { "epoch": 1.4175701692167393, "grad_norm": 0.16861796379089355, "learning_rate": 2.368417804591983e-06, "loss": 0.005, "step": 83940 }, { "epoch": 1.4177390481980612, "grad_norm": 0.18123695254325867, "learning_rate": 2.3671648059246073e-06, "loss": 0.0041, "step": 83950 }, { "epoch": 1.4179079271793833, "grad_norm": 0.27215176820755005, "learning_rate": 2.365912035991501e-06, "loss": 0.0078, "step": 83960 }, { "epoch": 1.4180768061607052, "grad_norm": 0.396404892206192, "learning_rate": 2.364659494901505e-06, "loss": 0.007, "step": 83970 }, { "epoch": 1.4182456851420273, "grad_norm": 0.2300552874803543, "learning_rate": 2.3634071827634318e-06, "loss": 0.0052, "step": 83980 }, { "epoch": 1.4184145641233492, "grad_norm": 0.2073599398136139, "learning_rate": 2.3621550996860826e-06, "loss": 0.0073, "step": 83990 }, { "epoch": 1.418583443104671, "grad_norm": 0.47020700573921204, "learning_rate": 2.3609032457782343e-06, "loss": 0.0108, "step": 84000 }, { "epoch": 1.4187523220859932, "grad_norm": 0.34486493468284607, "learning_rate": 2.3596516211486477e-06, "loss": 0.0036, "step": 84010 }, { "epoch": 1.4189212010673151, "grad_norm": 0.11721929162740707, "learning_rate": 2.3584002259060563e-06, "loss": 0.0046, "step": 84020 }, { "epoch": 1.4190900800486372, "grad_norm": 0.5112520456314087, "learning_rate": 2.3571490601591803e-06, "loss": 0.006, "step": 84030 }, { "epoch": 1.4192589590299591, "grad_norm": 0.6285560727119446, "learning_rate": 2.355898124016717e-06, "loss": 0.0084, "step": 84040 }, { "epoch": 1.419427838011281, "grad_norm": 0.35983869433403015, "learning_rate": 2.354647417587347e-06, "loss": 0.01, "step": 84050 }, { "epoch": 1.4195967169926031, "grad_norm": 0.2027018964290619, "learning_rate": 2.353396940979727e-06, "loss": 0.0066, "step": 84060 }, { "epoch": 1.419765595973925, "grad_norm": 0.430722177028656, "learning_rate": 2.352146694302495e-06, "loss": 0.0105, "step": 84070 }, { "epoch": 1.4199344749552472, "grad_norm": 0.27368226647377014, "learning_rate": 2.3508966776642707e-06, "loss": 0.0073, "step": 84080 }, { "epoch": 1.420103353936569, "grad_norm": 0.1770060956478119, "learning_rate": 2.349646891173653e-06, "loss": 0.0056, "step": 84090 }, { "epoch": 1.420272232917891, "grad_norm": 0.25824370980262756, "learning_rate": 2.3483973349392173e-06, "loss": 0.0054, "step": 84100 }, { "epoch": 1.420441111899213, "grad_norm": 0.2349841445684433, "learning_rate": 2.3471480090695235e-06, "loss": 0.0072, "step": 84110 }, { "epoch": 1.420609990880535, "grad_norm": 0.2400086373090744, "learning_rate": 2.345898913673111e-06, "loss": 0.0054, "step": 84120 }, { "epoch": 1.420778869861857, "grad_norm": 0.219358429312706, "learning_rate": 2.344650048858498e-06, "loss": 0.0068, "step": 84130 }, { "epoch": 1.420947748843179, "grad_norm": 0.2681359648704529, "learning_rate": 2.3434014147341816e-06, "loss": 0.005, "step": 84140 }, { "epoch": 1.4211166278245009, "grad_norm": 0.4664376676082611, "learning_rate": 2.3421530114086397e-06, "loss": 0.0113, "step": 84150 }, { "epoch": 1.421285506805823, "grad_norm": 0.07285850495100021, "learning_rate": 2.340904838990332e-06, "loss": 0.0087, "step": 84160 }, { "epoch": 1.421454385787145, "grad_norm": 0.23041369020938873, "learning_rate": 2.339656897587696e-06, "loss": 0.0095, "step": 84170 }, { "epoch": 1.421623264768467, "grad_norm": 0.2972767651081085, "learning_rate": 2.3384091873091496e-06, "loss": 0.0059, "step": 84180 }, { "epoch": 1.421792143749789, "grad_norm": 0.2210075557231903, "learning_rate": 2.3371617082630917e-06, "loss": 0.0064, "step": 84190 }, { "epoch": 1.4219610227311108, "grad_norm": 0.35202756524086, "learning_rate": 2.3359144605578994e-06, "loss": 0.0055, "step": 84200 }, { "epoch": 1.422129901712433, "grad_norm": 0.3320033848285675, "learning_rate": 2.334667444301932e-06, "loss": 0.0077, "step": 84210 }, { "epoch": 1.4222987806937548, "grad_norm": 0.4293763339519501, "learning_rate": 2.3334206596035246e-06, "loss": 0.0076, "step": 84220 }, { "epoch": 1.422467659675077, "grad_norm": 0.1991073489189148, "learning_rate": 2.332174106570996e-06, "loss": 0.0118, "step": 84230 }, { "epoch": 1.4226365386563988, "grad_norm": 0.21523432433605194, "learning_rate": 2.3309277853126444e-06, "loss": 0.0069, "step": 84240 }, { "epoch": 1.4228054176377207, "grad_norm": 0.1483774185180664, "learning_rate": 2.329681695936748e-06, "loss": 0.0055, "step": 84250 }, { "epoch": 1.4229742966190428, "grad_norm": 0.1306125968694687, "learning_rate": 2.3284358385515605e-06, "loss": 0.0056, "step": 84260 }, { "epoch": 1.4231431756003647, "grad_norm": 0.232992485165596, "learning_rate": 2.327190213265321e-06, "loss": 0.0131, "step": 84270 }, { "epoch": 1.4233120545816869, "grad_norm": 0.12195228040218353, "learning_rate": 2.3259448201862473e-06, "loss": 0.0078, "step": 84280 }, { "epoch": 1.4234809335630088, "grad_norm": 0.22009241580963135, "learning_rate": 2.3246996594225323e-06, "loss": 0.0047, "step": 84290 }, { "epoch": 1.4236498125443307, "grad_norm": 0.3617177903652191, "learning_rate": 2.3234547310823596e-06, "loss": 0.0075, "step": 84300 }, { "epoch": 1.4238186915256528, "grad_norm": 0.2270575761795044, "learning_rate": 2.3222100352738793e-06, "loss": 0.0049, "step": 84310 }, { "epoch": 1.4239875705069747, "grad_norm": 0.22242505848407745, "learning_rate": 2.32096557210523e-06, "loss": 0.0048, "step": 84320 }, { "epoch": 1.4241564494882968, "grad_norm": 0.3180725574493408, "learning_rate": 2.3197213416845265e-06, "loss": 0.0071, "step": 84330 }, { "epoch": 1.4243253284696187, "grad_norm": 0.28491708636283875, "learning_rate": 2.3184773441198673e-06, "loss": 0.0066, "step": 84340 }, { "epoch": 1.4244942074509406, "grad_norm": 0.2526169717311859, "learning_rate": 2.3172335795193235e-06, "loss": 0.0095, "step": 84350 }, { "epoch": 1.4246630864322627, "grad_norm": 0.22085964679718018, "learning_rate": 2.315990047990953e-06, "loss": 0.0039, "step": 84360 }, { "epoch": 1.4248319654135846, "grad_norm": 0.20607832074165344, "learning_rate": 2.31474674964279e-06, "loss": 0.0064, "step": 84370 }, { "epoch": 1.4250008443949067, "grad_norm": 0.25682947039604187, "learning_rate": 2.3135036845828516e-06, "loss": 0.0068, "step": 84380 }, { "epoch": 1.4251697233762286, "grad_norm": 0.3383942246437073, "learning_rate": 2.3122608529191283e-06, "loss": 0.0086, "step": 84390 }, { "epoch": 1.4253386023575505, "grad_norm": 0.47355514764785767, "learning_rate": 2.3110182547595945e-06, "loss": 0.0102, "step": 84400 }, { "epoch": 1.4255074813388726, "grad_norm": 0.17102240025997162, "learning_rate": 2.309775890212207e-06, "loss": 0.0122, "step": 84410 }, { "epoch": 1.4256763603201945, "grad_norm": 0.35532912611961365, "learning_rate": 2.3085337593849004e-06, "loss": 0.0145, "step": 84420 }, { "epoch": 1.4258452393015166, "grad_norm": 0.12229350209236145, "learning_rate": 2.307291862385583e-06, "loss": 0.0047, "step": 84430 }, { "epoch": 1.4260141182828385, "grad_norm": 0.2812713086605072, "learning_rate": 2.3060501993221517e-06, "loss": 0.0086, "step": 84440 }, { "epoch": 1.4261829972641604, "grad_norm": 0.23465657234191895, "learning_rate": 2.3048087703024774e-06, "loss": 0.0058, "step": 84450 }, { "epoch": 1.4263518762454825, "grad_norm": 0.15353301167488098, "learning_rate": 2.3035675754344145e-06, "loss": 0.0055, "step": 84460 }, { "epoch": 1.4265207552268044, "grad_norm": 0.24746140837669373, "learning_rate": 2.3023266148257916e-06, "loss": 0.0068, "step": 84470 }, { "epoch": 1.4266896342081266, "grad_norm": 0.32435914874076843, "learning_rate": 2.301085888584422e-06, "loss": 0.0069, "step": 84480 }, { "epoch": 1.4268585131894485, "grad_norm": 0.13365374505519867, "learning_rate": 2.2998453968180974e-06, "loss": 0.0047, "step": 84490 }, { "epoch": 1.4270273921707703, "grad_norm": 0.16382712125778198, "learning_rate": 2.2986051396345903e-06, "loss": 0.0067, "step": 84500 }, { "epoch": 1.4271962711520925, "grad_norm": 0.3593394160270691, "learning_rate": 2.2973651171416474e-06, "loss": 0.0079, "step": 84510 }, { "epoch": 1.4273651501334144, "grad_norm": 0.19733601808547974, "learning_rate": 2.296125329446998e-06, "loss": 0.0067, "step": 84520 }, { "epoch": 1.4275340291147365, "grad_norm": 0.17982816696166992, "learning_rate": 2.294885776658357e-06, "loss": 0.0081, "step": 84530 }, { "epoch": 1.4277029080960584, "grad_norm": 0.380235880613327, "learning_rate": 2.2936464588834127e-06, "loss": 0.0067, "step": 84540 }, { "epoch": 1.4278717870773803, "grad_norm": 0.22314724326133728, "learning_rate": 2.292407376229831e-06, "loss": 0.0076, "step": 84550 }, { "epoch": 1.4280406660587024, "grad_norm": 0.25626516342163086, "learning_rate": 2.2911685288052615e-06, "loss": 0.0096, "step": 84560 }, { "epoch": 1.4282095450400243, "grad_norm": 0.2848013937473297, "learning_rate": 2.2899299167173327e-06, "loss": 0.0074, "step": 84570 }, { "epoch": 1.4283784240213464, "grad_norm": 0.13539567589759827, "learning_rate": 2.2886915400736538e-06, "loss": 0.0054, "step": 84580 }, { "epoch": 1.4285473030026683, "grad_norm": 0.20218604803085327, "learning_rate": 2.287453398981808e-06, "loss": 0.0073, "step": 84590 }, { "epoch": 1.4287161819839902, "grad_norm": 0.2045627236366272, "learning_rate": 2.2862154935493652e-06, "loss": 0.0061, "step": 84600 }, { "epoch": 1.4288850609653123, "grad_norm": 0.08080592006444931, "learning_rate": 2.28497782388387e-06, "loss": 0.0076, "step": 84610 }, { "epoch": 1.4290539399466342, "grad_norm": 0.4376172423362732, "learning_rate": 2.28374039009285e-06, "loss": 0.0067, "step": 84620 }, { "epoch": 1.4292228189279563, "grad_norm": 0.11766140908002853, "learning_rate": 2.2825031922838076e-06, "loss": 0.0087, "step": 84630 }, { "epoch": 1.4293916979092782, "grad_norm": 0.46114516258239746, "learning_rate": 2.281266230564227e-06, "loss": 0.0068, "step": 84640 }, { "epoch": 1.4295605768906001, "grad_norm": 0.8552824258804321, "learning_rate": 2.2800295050415766e-06, "loss": 0.0103, "step": 84650 }, { "epoch": 1.4297294558719222, "grad_norm": 0.11050309985876083, "learning_rate": 2.278793015823299e-06, "loss": 0.0058, "step": 84660 }, { "epoch": 1.4298983348532441, "grad_norm": 0.30284348130226135, "learning_rate": 2.277556763016815e-06, "loss": 0.0059, "step": 84670 }, { "epoch": 1.4300672138345663, "grad_norm": 0.5488636493682861, "learning_rate": 2.2763207467295285e-06, "loss": 0.0068, "step": 84680 }, { "epoch": 1.4302360928158881, "grad_norm": 0.3899575173854828, "learning_rate": 2.275084967068821e-06, "loss": 0.0076, "step": 84690 }, { "epoch": 1.43040497179721, "grad_norm": 0.14657171070575714, "learning_rate": 2.2738494241420566e-06, "loss": 0.0085, "step": 84700 }, { "epoch": 1.4305738507785322, "grad_norm": 0.27528661489486694, "learning_rate": 2.272614118056572e-06, "loss": 0.0098, "step": 84710 }, { "epoch": 1.430742729759854, "grad_norm": 0.2646579444408417, "learning_rate": 2.2713790489196904e-06, "loss": 0.0073, "step": 84720 }, { "epoch": 1.4309116087411762, "grad_norm": 0.1610947847366333, "learning_rate": 2.27014421683871e-06, "loss": 0.0094, "step": 84730 }, { "epoch": 1.431080487722498, "grad_norm": 0.17950524389743805, "learning_rate": 2.2689096219209112e-06, "loss": 0.011, "step": 84740 }, { "epoch": 1.43124936670382, "grad_norm": 0.3100448250770569, "learning_rate": 2.2676752642735526e-06, "loss": 0.0078, "step": 84750 }, { "epoch": 1.431418245685142, "grad_norm": 0.4136620759963989, "learning_rate": 2.2664411440038716e-06, "loss": 0.0065, "step": 84760 }, { "epoch": 1.431587124666464, "grad_norm": 0.2676279544830322, "learning_rate": 2.265207261219086e-06, "loss": 0.0075, "step": 84770 }, { "epoch": 1.431756003647786, "grad_norm": 0.27482521533966064, "learning_rate": 2.2639736160263913e-06, "loss": 0.0051, "step": 84780 }, { "epoch": 1.431924882629108, "grad_norm": 0.01582573540508747, "learning_rate": 2.2627402085329676e-06, "loss": 0.0096, "step": 84790 }, { "epoch": 1.43209376161043, "grad_norm": 0.41188836097717285, "learning_rate": 2.2615070388459646e-06, "loss": 0.008, "step": 84800 }, { "epoch": 1.432262640591752, "grad_norm": 0.08204884082078934, "learning_rate": 2.2602741070725204e-06, "loss": 0.0073, "step": 84810 }, { "epoch": 1.432431519573074, "grad_norm": 0.21315321326255798, "learning_rate": 2.259041413319748e-06, "loss": 0.0093, "step": 84820 }, { "epoch": 1.432600398554396, "grad_norm": 0.15717388689517975, "learning_rate": 2.257808957694743e-06, "loss": 0.0079, "step": 84830 }, { "epoch": 1.432769277535718, "grad_norm": 0.15801560878753662, "learning_rate": 2.2565767403045744e-06, "loss": 0.0095, "step": 84840 }, { "epoch": 1.4329381565170398, "grad_norm": 0.3904339075088501, "learning_rate": 2.255344761256297e-06, "loss": 0.0057, "step": 84850 }, { "epoch": 1.433107035498362, "grad_norm": 0.31449902057647705, "learning_rate": 2.2541130206569405e-06, "loss": 0.0047, "step": 84860 }, { "epoch": 1.4332759144796838, "grad_norm": 0.3841516375541687, "learning_rate": 2.2528815186135167e-06, "loss": 0.0127, "step": 84870 }, { "epoch": 1.433444793461006, "grad_norm": 0.18288129568099976, "learning_rate": 2.251650255233015e-06, "loss": 0.0055, "step": 84880 }, { "epoch": 1.4336136724423278, "grad_norm": 0.1616164892911911, "learning_rate": 2.250419230622405e-06, "loss": 0.0112, "step": 84890 }, { "epoch": 1.4337825514236497, "grad_norm": 0.38921666145324707, "learning_rate": 2.249188444888634e-06, "loss": 0.0072, "step": 84900 }, { "epoch": 1.4339514304049719, "grad_norm": 0.12986469268798828, "learning_rate": 2.2479578981386325e-06, "loss": 0.0051, "step": 84910 }, { "epoch": 1.4341203093862938, "grad_norm": 0.2823808193206787, "learning_rate": 2.2467275904793033e-06, "loss": 0.0059, "step": 84920 }, { "epoch": 1.4342891883676159, "grad_norm": 0.28270360827445984, "learning_rate": 2.2454975220175353e-06, "loss": 0.0051, "step": 84930 }, { "epoch": 1.4344580673489378, "grad_norm": 0.011885455809533596, "learning_rate": 2.244267692860192e-06, "loss": 0.0055, "step": 84940 }, { "epoch": 1.4346269463302597, "grad_norm": 0.20180194079875946, "learning_rate": 2.2430381031141214e-06, "loss": 0.0114, "step": 84950 }, { "epoch": 1.4347958253115818, "grad_norm": 0.18628908693790436, "learning_rate": 2.2418087528861424e-06, "loss": 0.0083, "step": 84960 }, { "epoch": 1.4349647042929037, "grad_norm": 0.2438545525074005, "learning_rate": 2.2405796422830613e-06, "loss": 0.0106, "step": 84970 }, { "epoch": 1.4351335832742258, "grad_norm": 0.18251140415668488, "learning_rate": 2.2393507714116584e-06, "loss": 0.0071, "step": 84980 }, { "epoch": 1.4353024622555477, "grad_norm": 0.2544744312763214, "learning_rate": 2.2381221403786962e-06, "loss": 0.0063, "step": 84990 }, { "epoch": 1.4354713412368696, "grad_norm": 0.26389366388320923, "learning_rate": 2.236893749290915e-06, "loss": 0.0053, "step": 85000 }, { "epoch": 1.4356402202181917, "grad_norm": 0.14641453325748444, "learning_rate": 2.235665598255034e-06, "loss": 0.0069, "step": 85010 }, { "epoch": 1.4358090991995136, "grad_norm": 0.2633204162120819, "learning_rate": 2.2344376873777523e-06, "loss": 0.0056, "step": 85020 }, { "epoch": 1.4359779781808357, "grad_norm": 0.34223151206970215, "learning_rate": 2.2332100167657495e-06, "loss": 0.0107, "step": 85030 }, { "epoch": 1.4361468571621576, "grad_norm": 0.14023815095424652, "learning_rate": 2.2319825865256785e-06, "loss": 0.0061, "step": 85040 }, { "epoch": 1.4363157361434795, "grad_norm": 0.31108540296554565, "learning_rate": 2.230755396764178e-06, "loss": 0.0101, "step": 85050 }, { "epoch": 1.4364846151248016, "grad_norm": 0.15371277928352356, "learning_rate": 2.2295284475878633e-06, "loss": 0.0068, "step": 85060 }, { "epoch": 1.4366534941061235, "grad_norm": 0.053034454584121704, "learning_rate": 2.2283017391033295e-06, "loss": 0.0065, "step": 85070 }, { "epoch": 1.4368223730874456, "grad_norm": 0.27821868658065796, "learning_rate": 2.2270752714171472e-06, "loss": 0.008, "step": 85080 }, { "epoch": 1.4369912520687675, "grad_norm": 0.17690226435661316, "learning_rate": 2.225849044635871e-06, "loss": 0.0095, "step": 85090 }, { "epoch": 1.4371601310500894, "grad_norm": 0.1511441022157669, "learning_rate": 2.224623058866032e-06, "loss": 0.0044, "step": 85100 }, { "epoch": 1.4373290100314116, "grad_norm": 0.20171627402305603, "learning_rate": 2.2233973142141413e-06, "loss": 0.0055, "step": 85110 }, { "epoch": 1.4374978890127335, "grad_norm": 0.23392079770565033, "learning_rate": 2.2221718107866875e-06, "loss": 0.0102, "step": 85120 }, { "epoch": 1.4376667679940556, "grad_norm": 0.20977267622947693, "learning_rate": 2.2209465486901405e-06, "loss": 0.0044, "step": 85130 }, { "epoch": 1.4378356469753775, "grad_norm": 0.14118283987045288, "learning_rate": 2.2197215280309475e-06, "loss": 0.0075, "step": 85140 }, { "epoch": 1.4380045259566994, "grad_norm": 0.24956223368644714, "learning_rate": 2.2184967489155372e-06, "loss": 0.0119, "step": 85150 }, { "epoch": 1.4381734049380215, "grad_norm": 0.18293386697769165, "learning_rate": 2.217272211450312e-06, "loss": 0.0075, "step": 85160 }, { "epoch": 1.4383422839193434, "grad_norm": 0.16707603633403778, "learning_rate": 2.216047915741658e-06, "loss": 0.0059, "step": 85170 }, { "epoch": 1.4385111629006655, "grad_norm": 0.22236992418766022, "learning_rate": 2.2148238618959394e-06, "loss": 0.0107, "step": 85180 }, { "epoch": 1.4386800418819874, "grad_norm": 0.12699837982654572, "learning_rate": 2.2136000500194994e-06, "loss": 0.0081, "step": 85190 }, { "epoch": 1.4388489208633093, "grad_norm": 0.25731176137924194, "learning_rate": 2.2123764802186607e-06, "loss": 0.0094, "step": 85200 }, { "epoch": 1.4390177998446314, "grad_norm": 0.3392102122306824, "learning_rate": 2.211153152599721e-06, "loss": 0.0051, "step": 85210 }, { "epoch": 1.4391866788259533, "grad_norm": 0.14411301910877228, "learning_rate": 2.2099300672689615e-06, "loss": 0.0079, "step": 85220 }, { "epoch": 1.4393555578072754, "grad_norm": 0.2121451050043106, "learning_rate": 2.2087072243326414e-06, "loss": 0.0084, "step": 85230 }, { "epoch": 1.4395244367885973, "grad_norm": 0.11138641089200974, "learning_rate": 2.2074846238969976e-06, "loss": 0.0074, "step": 85240 }, { "epoch": 1.4396933157699192, "grad_norm": 0.2476179301738739, "learning_rate": 2.2062622660682472e-06, "loss": 0.0072, "step": 85250 }, { "epoch": 1.4398621947512413, "grad_norm": 0.16035272181034088, "learning_rate": 2.2050401509525853e-06, "loss": 0.0071, "step": 85260 }, { "epoch": 1.4400310737325632, "grad_norm": 0.25947919487953186, "learning_rate": 2.2038182786561863e-06, "loss": 0.0066, "step": 85270 }, { "epoch": 1.4401999527138853, "grad_norm": 0.306869775056839, "learning_rate": 2.2025966492852056e-06, "loss": 0.0083, "step": 85280 }, { "epoch": 1.4403688316952072, "grad_norm": 0.11927466839551926, "learning_rate": 2.201375262945771e-06, "loss": 0.008, "step": 85290 }, { "epoch": 1.4405377106765291, "grad_norm": 0.35581034421920776, "learning_rate": 2.2001541197439967e-06, "loss": 0.0078, "step": 85300 }, { "epoch": 1.4407065896578513, "grad_norm": 0.2098139226436615, "learning_rate": 2.1989332197859713e-06, "loss": 0.0062, "step": 85310 }, { "epoch": 1.4408754686391732, "grad_norm": 0.296707421541214, "learning_rate": 2.1977125631777665e-06, "loss": 0.0079, "step": 85320 }, { "epoch": 1.4410443476204953, "grad_norm": 0.22420084476470947, "learning_rate": 2.196492150025425e-06, "loss": 0.0077, "step": 85330 }, { "epoch": 1.4412132266018172, "grad_norm": 0.22970961034297943, "learning_rate": 2.195271980434976e-06, "loss": 0.0057, "step": 85340 }, { "epoch": 1.441382105583139, "grad_norm": 0.19813260436058044, "learning_rate": 2.194052054512425e-06, "loss": 0.0069, "step": 85350 }, { "epoch": 1.4415509845644612, "grad_norm": 0.45946162939071655, "learning_rate": 2.192832372363756e-06, "loss": 0.0073, "step": 85360 }, { "epoch": 1.441719863545783, "grad_norm": 0.22238129377365112, "learning_rate": 2.1916129340949312e-06, "loss": 0.0044, "step": 85370 }, { "epoch": 1.4418887425271052, "grad_norm": 0.1388147920370102, "learning_rate": 2.1903937398118934e-06, "loss": 0.0061, "step": 85380 }, { "epoch": 1.442057621508427, "grad_norm": 0.23120395839214325, "learning_rate": 2.1891747896205627e-06, "loss": 0.005, "step": 85390 }, { "epoch": 1.442226500489749, "grad_norm": 0.48205891251564026, "learning_rate": 2.187956083626841e-06, "loss": 0.0068, "step": 85400 }, { "epoch": 1.442395379471071, "grad_norm": 0.4806283712387085, "learning_rate": 2.186737621936602e-06, "loss": 0.0075, "step": 85410 }, { "epoch": 1.442564258452393, "grad_norm": 0.44181838631629944, "learning_rate": 2.1855194046557053e-06, "loss": 0.0058, "step": 85420 }, { "epoch": 1.4427331374337151, "grad_norm": 0.2834097743034363, "learning_rate": 2.1843014318899858e-06, "loss": 0.0058, "step": 85430 }, { "epoch": 1.442902016415037, "grad_norm": 0.16881097853183746, "learning_rate": 2.18308370374526e-06, "loss": 0.0053, "step": 85440 }, { "epoch": 1.443070895396359, "grad_norm": 0.4035075902938843, "learning_rate": 2.181866220327318e-06, "loss": 0.0063, "step": 85450 }, { "epoch": 1.443239774377681, "grad_norm": 0.15986868739128113, "learning_rate": 2.1806489817419336e-06, "loss": 0.0056, "step": 85460 }, { "epoch": 1.443408653359003, "grad_norm": 0.16003510355949402, "learning_rate": 2.1794319880948565e-06, "loss": 0.0074, "step": 85470 }, { "epoch": 1.443577532340325, "grad_norm": 0.2736814022064209, "learning_rate": 2.1782152394918175e-06, "loss": 0.0045, "step": 85480 }, { "epoch": 1.443746411321647, "grad_norm": 0.2727096676826477, "learning_rate": 2.176998736038524e-06, "loss": 0.0058, "step": 85490 }, { "epoch": 1.4439152903029688, "grad_norm": 0.33490777015686035, "learning_rate": 2.1757824778406627e-06, "loss": 0.008, "step": 85500 }, { "epoch": 1.444084169284291, "grad_norm": 0.26508602499961853, "learning_rate": 2.1745664650038994e-06, "loss": 0.0084, "step": 85510 }, { "epoch": 1.4442530482656128, "grad_norm": 0.4261009991168976, "learning_rate": 2.1733506976338797e-06, "loss": 0.0086, "step": 85520 }, { "epoch": 1.444421927246935, "grad_norm": 0.2002224624156952, "learning_rate": 2.1721351758362237e-06, "loss": 0.0081, "step": 85530 }, { "epoch": 1.4445908062282569, "grad_norm": 0.4350050389766693, "learning_rate": 2.170919899716534e-06, "loss": 0.0051, "step": 85540 }, { "epoch": 1.4447596852095788, "grad_norm": 0.33140721917152405, "learning_rate": 2.1697048693803914e-06, "loss": 0.0065, "step": 85550 }, { "epoch": 1.4449285641909009, "grad_norm": 0.14647401869297028, "learning_rate": 2.1684900849333563e-06, "loss": 0.0058, "step": 85560 }, { "epoch": 1.4450974431722228, "grad_norm": 0.16552525758743286, "learning_rate": 2.167275546480962e-06, "loss": 0.0138, "step": 85570 }, { "epoch": 1.445266322153545, "grad_norm": 0.13242901861667633, "learning_rate": 2.1660612541287276e-06, "loss": 0.0068, "step": 85580 }, { "epoch": 1.4454352011348668, "grad_norm": 0.28038644790649414, "learning_rate": 2.1648472079821464e-06, "loss": 0.0092, "step": 85590 }, { "epoch": 1.4456040801161887, "grad_norm": 0.33573341369628906, "learning_rate": 2.163633408146694e-06, "loss": 0.0042, "step": 85600 }, { "epoch": 1.4457729590975108, "grad_norm": 0.16882047057151794, "learning_rate": 2.1624198547278195e-06, "loss": 0.0101, "step": 85610 }, { "epoch": 1.4459418380788327, "grad_norm": 0.2077537626028061, "learning_rate": 2.1612065478309553e-06, "loss": 0.0065, "step": 85620 }, { "epoch": 1.4461107170601548, "grad_norm": 0.44220346212387085, "learning_rate": 2.1599934875615103e-06, "loss": 0.0057, "step": 85630 }, { "epoch": 1.4462795960414767, "grad_norm": 0.16814284026622772, "learning_rate": 2.1587806740248722e-06, "loss": 0.0091, "step": 85640 }, { "epoch": 1.4464484750227986, "grad_norm": 0.4074956774711609, "learning_rate": 2.1575681073264087e-06, "loss": 0.0074, "step": 85650 }, { "epoch": 1.4466173540041207, "grad_norm": 0.1592702567577362, "learning_rate": 2.1563557875714614e-06, "loss": 0.0091, "step": 85660 }, { "epoch": 1.4467862329854426, "grad_norm": 0.24338984489440918, "learning_rate": 2.1551437148653546e-06, "loss": 0.0081, "step": 85670 }, { "epoch": 1.4469551119667647, "grad_norm": 0.6828794479370117, "learning_rate": 2.1539318893133914e-06, "loss": 0.0065, "step": 85680 }, { "epoch": 1.4471239909480866, "grad_norm": 0.3726428151130676, "learning_rate": 2.1527203110208538e-06, "loss": 0.0105, "step": 85690 }, { "epoch": 1.4472928699294085, "grad_norm": 0.1917886734008789, "learning_rate": 2.1515089800929966e-06, "loss": 0.0086, "step": 85700 }, { "epoch": 1.4474617489107307, "grad_norm": 0.5915666818618774, "learning_rate": 2.150297896635059e-06, "loss": 0.0087, "step": 85710 }, { "epoch": 1.4476306278920525, "grad_norm": 0.24095603823661804, "learning_rate": 2.149087060752258e-06, "loss": 0.0057, "step": 85720 }, { "epoch": 1.4477995068733747, "grad_norm": 0.35286378860473633, "learning_rate": 2.147876472549787e-06, "loss": 0.0063, "step": 85730 }, { "epoch": 1.4479683858546966, "grad_norm": 0.29086416959762573, "learning_rate": 2.146666132132819e-06, "loss": 0.0082, "step": 85740 }, { "epoch": 1.4481372648360185, "grad_norm": 0.18463316559791565, "learning_rate": 2.145456039606506e-06, "loss": 0.0063, "step": 85750 }, { "epoch": 1.4483061438173406, "grad_norm": 0.33874765038490295, "learning_rate": 2.1442461950759773e-06, "loss": 0.0065, "step": 85760 }, { "epoch": 1.4484750227986625, "grad_norm": 0.5080716609954834, "learning_rate": 2.143036598646344e-06, "loss": 0.0085, "step": 85770 }, { "epoch": 1.4486439017799846, "grad_norm": 0.4406660497188568, "learning_rate": 2.141827250422688e-06, "loss": 0.0035, "step": 85780 }, { "epoch": 1.4488127807613065, "grad_norm": 0.2276977151632309, "learning_rate": 2.140618150510077e-06, "loss": 0.0061, "step": 85790 }, { "epoch": 1.4489816597426284, "grad_norm": 0.22084641456604004, "learning_rate": 2.1394092990135544e-06, "loss": 0.0075, "step": 85800 }, { "epoch": 1.4491505387239505, "grad_norm": 0.15179035067558289, "learning_rate": 2.1382006960381445e-06, "loss": 0.0053, "step": 85810 }, { "epoch": 1.4493194177052724, "grad_norm": 0.33124056458473206, "learning_rate": 2.1369923416888433e-06, "loss": 0.0067, "step": 85820 }, { "epoch": 1.4494882966865945, "grad_norm": 0.34690794348716736, "learning_rate": 2.1357842360706327e-06, "loss": 0.0068, "step": 85830 }, { "epoch": 1.4496571756679164, "grad_norm": 0.23772196471691132, "learning_rate": 2.1345763792884688e-06, "loss": 0.0083, "step": 85840 }, { "epoch": 1.4498260546492383, "grad_norm": 0.10000689327716827, "learning_rate": 2.1333687714472877e-06, "loss": 0.0047, "step": 85850 }, { "epoch": 1.4499949336305604, "grad_norm": 0.27815452218055725, "learning_rate": 2.132161412652004e-06, "loss": 0.0064, "step": 85860 }, { "epoch": 1.4501638126118823, "grad_norm": 0.7441808581352234, "learning_rate": 2.1309543030075085e-06, "loss": 0.0128, "step": 85870 }, { "epoch": 1.4503326915932044, "grad_norm": 0.38335758447647095, "learning_rate": 2.1297474426186738e-06, "loss": 0.0084, "step": 85880 }, { "epoch": 1.4505015705745263, "grad_norm": 0.01838550716638565, "learning_rate": 2.1285408315903494e-06, "loss": 0.0086, "step": 85890 }, { "epoch": 1.4506704495558482, "grad_norm": 0.08900311589241028, "learning_rate": 2.1273344700273603e-06, "loss": 0.0058, "step": 85900 }, { "epoch": 1.4508393285371701, "grad_norm": 0.18195775151252747, "learning_rate": 2.126128358034513e-06, "loss": 0.0073, "step": 85910 }, { "epoch": 1.4510082075184922, "grad_norm": 0.09398947656154633, "learning_rate": 2.1249224957165913e-06, "loss": 0.0061, "step": 85920 }, { "epoch": 1.4511770864998144, "grad_norm": 0.22377361357212067, "learning_rate": 2.1237168831783606e-06, "loss": 0.0074, "step": 85930 }, { "epoch": 1.4513459654811363, "grad_norm": 0.316695898771286, "learning_rate": 2.1225115205245568e-06, "loss": 0.0082, "step": 85940 }, { "epoch": 1.4515148444624582, "grad_norm": 0.3088488280773163, "learning_rate": 2.1213064078599015e-06, "loss": 0.0114, "step": 85950 }, { "epoch": 1.45168372344378, "grad_norm": 0.15711936354637146, "learning_rate": 2.1201015452890917e-06, "loss": 0.0099, "step": 85960 }, { "epoch": 1.4518526024251022, "grad_norm": 0.17671263217926025, "learning_rate": 2.1188969329168022e-06, "loss": 0.0075, "step": 85970 }, { "epoch": 1.4520214814064243, "grad_norm": 0.36819541454315186, "learning_rate": 2.117692570847688e-06, "loss": 0.011, "step": 85980 }, { "epoch": 1.4521903603877462, "grad_norm": 0.4064221978187561, "learning_rate": 2.1164884591863804e-06, "loss": 0.0096, "step": 85990 }, { "epoch": 1.452359239369068, "grad_norm": 0.09374011307954788, "learning_rate": 2.11528459803749e-06, "loss": 0.0076, "step": 86000 }, { "epoch": 1.45252811835039, "grad_norm": 0.4655105173587799, "learning_rate": 2.1140809875056062e-06, "loss": 0.0152, "step": 86010 }, { "epoch": 1.452696997331712, "grad_norm": 0.037615444511175156, "learning_rate": 2.1128776276952933e-06, "loss": 0.0052, "step": 86020 }, { "epoch": 1.4528658763130342, "grad_norm": 0.1544814109802246, "learning_rate": 2.1116745187110974e-06, "loss": 0.0071, "step": 86030 }, { "epoch": 1.453034755294356, "grad_norm": 0.03179072588682175, "learning_rate": 2.110471660657542e-06, "loss": 0.0037, "step": 86040 }, { "epoch": 1.453203634275678, "grad_norm": 0.28291240334510803, "learning_rate": 2.10926905363913e-06, "loss": 0.0164, "step": 86050 }, { "epoch": 1.453372513257, "grad_norm": 0.39316001534461975, "learning_rate": 2.1080666977603383e-06, "loss": 0.0064, "step": 86060 }, { "epoch": 1.453541392238322, "grad_norm": 0.28352832794189453, "learning_rate": 2.106864593125624e-06, "loss": 0.0091, "step": 86070 }, { "epoch": 1.4537102712196441, "grad_norm": 0.19693590700626373, "learning_rate": 2.105662739839426e-06, "loss": 0.0058, "step": 86080 }, { "epoch": 1.453879150200966, "grad_norm": 0.2887444794178009, "learning_rate": 2.104461138006156e-06, "loss": 0.009, "step": 86090 }, { "epoch": 1.454048029182288, "grad_norm": 0.19137927889823914, "learning_rate": 2.1032597877302074e-06, "loss": 0.0071, "step": 86100 }, { "epoch": 1.4542169081636098, "grad_norm": 0.14992445707321167, "learning_rate": 2.1020586891159505e-06, "loss": 0.0062, "step": 86110 }, { "epoch": 1.454385787144932, "grad_norm": 0.1999923586845398, "learning_rate": 2.100857842267734e-06, "loss": 0.0093, "step": 86120 }, { "epoch": 1.454554666126254, "grad_norm": 0.16361674666404724, "learning_rate": 2.0996572472898835e-06, "loss": 0.0059, "step": 86130 }, { "epoch": 1.454723545107576, "grad_norm": 0.1750631034374237, "learning_rate": 2.0984569042867065e-06, "loss": 0.0071, "step": 86140 }, { "epoch": 1.4548924240888979, "grad_norm": 0.2673828899860382, "learning_rate": 2.097256813362482e-06, "loss": 0.0053, "step": 86150 }, { "epoch": 1.4550613030702197, "grad_norm": 0.15513113141059875, "learning_rate": 2.0960569746214724e-06, "loss": 0.0081, "step": 86160 }, { "epoch": 1.4552301820515419, "grad_norm": 0.15881435573101044, "learning_rate": 2.0948573881679176e-06, "loss": 0.0119, "step": 86170 }, { "epoch": 1.455399061032864, "grad_norm": 0.17550528049468994, "learning_rate": 2.093658054106036e-06, "loss": 0.0078, "step": 86180 }, { "epoch": 1.4555679400141859, "grad_norm": 0.14839479327201843, "learning_rate": 2.092458972540019e-06, "loss": 0.0054, "step": 86190 }, { "epoch": 1.4557368189955078, "grad_norm": 0.4235363304615021, "learning_rate": 2.091260143574042e-06, "loss": 0.0057, "step": 86200 }, { "epoch": 1.4559056979768297, "grad_norm": 0.18389825522899628, "learning_rate": 2.090061567312256e-06, "loss": 0.0066, "step": 86210 }, { "epoch": 1.4560745769581518, "grad_norm": 0.215192511677742, "learning_rate": 2.088863243858791e-06, "loss": 0.0071, "step": 86220 }, { "epoch": 1.456243455939474, "grad_norm": 0.5847659111022949, "learning_rate": 2.087665173317754e-06, "loss": 0.0065, "step": 86230 }, { "epoch": 1.4564123349207958, "grad_norm": 0.3581962287425995, "learning_rate": 2.0864673557932304e-06, "loss": 0.0101, "step": 86240 }, { "epoch": 1.4565812139021177, "grad_norm": 0.3040156662464142, "learning_rate": 2.085269791389283e-06, "loss": 0.0076, "step": 86250 }, { "epoch": 1.4567500928834396, "grad_norm": 0.25318726897239685, "learning_rate": 2.0840724802099564e-06, "loss": 0.0118, "step": 86260 }, { "epoch": 1.4569189718647617, "grad_norm": 0.220906600356102, "learning_rate": 2.0828754223592656e-06, "loss": 0.0074, "step": 86270 }, { "epoch": 1.4570878508460838, "grad_norm": 0.3892528712749481, "learning_rate": 2.08167861794121e-06, "loss": 0.01, "step": 86280 }, { "epoch": 1.4572567298274057, "grad_norm": 0.20608040690422058, "learning_rate": 2.0804820670597655e-06, "loss": 0.0052, "step": 86290 }, { "epoch": 1.4574256088087276, "grad_norm": 0.4299549162387848, "learning_rate": 2.0792857698188866e-06, "loss": 0.0079, "step": 86300 }, { "epoch": 1.4575944877900495, "grad_norm": 0.16444772481918335, "learning_rate": 2.0780897263225013e-06, "loss": 0.0078, "step": 86310 }, { "epoch": 1.4577633667713716, "grad_norm": 0.2216777354478836, "learning_rate": 2.076893936674521e-06, "loss": 0.0108, "step": 86320 }, { "epoch": 1.4579322457526938, "grad_norm": 0.24181094765663147, "learning_rate": 2.0756984009788304e-06, "loss": 0.0043, "step": 86330 }, { "epoch": 1.4581011247340157, "grad_norm": 0.11128148436546326, "learning_rate": 2.0745031193393017e-06, "loss": 0.0076, "step": 86340 }, { "epoch": 1.4582700037153375, "grad_norm": 0.5807087421417236, "learning_rate": 2.073308091859771e-06, "loss": 0.0066, "step": 86350 }, { "epoch": 1.4584388826966594, "grad_norm": 0.1828954517841339, "learning_rate": 2.0721133186440617e-06, "loss": 0.0044, "step": 86360 }, { "epoch": 1.4586077616779816, "grad_norm": 0.10094529390335083, "learning_rate": 2.070918799795973e-06, "loss": 0.0068, "step": 86370 }, { "epoch": 1.4587766406593037, "grad_norm": 0.20527850091457367, "learning_rate": 2.0697245354192834e-06, "loss": 0.0101, "step": 86380 }, { "epoch": 1.4589455196406256, "grad_norm": 0.14232635498046875, "learning_rate": 2.068530525617744e-06, "loss": 0.0073, "step": 86390 }, { "epoch": 1.4591143986219475, "grad_norm": 0.1541873663663864, "learning_rate": 2.0673367704950898e-06, "loss": 0.0098, "step": 86400 }, { "epoch": 1.4592832776032694, "grad_norm": 0.12881577014923096, "learning_rate": 2.0661432701550304e-06, "loss": 0.0057, "step": 86410 }, { "epoch": 1.4594521565845915, "grad_norm": 0.2957373559474945, "learning_rate": 2.0649500247012566e-06, "loss": 0.0104, "step": 86420 }, { "epoch": 1.4596210355659136, "grad_norm": 0.16873662173748016, "learning_rate": 2.0637570342374308e-06, "loss": 0.0076, "step": 86430 }, { "epoch": 1.4597899145472355, "grad_norm": 0.49538740515708923, "learning_rate": 2.062564298867199e-06, "loss": 0.0115, "step": 86440 }, { "epoch": 1.4599587935285574, "grad_norm": 0.28501594066619873, "learning_rate": 2.0613718186941815e-06, "loss": 0.0083, "step": 86450 }, { "epoch": 1.4601276725098793, "grad_norm": 0.14975698292255402, "learning_rate": 2.060179593821983e-06, "loss": 0.0094, "step": 86460 }, { "epoch": 1.4602965514912014, "grad_norm": 0.16094987094402313, "learning_rate": 2.0589876243541763e-06, "loss": 0.0072, "step": 86470 }, { "epoch": 1.4604654304725233, "grad_norm": 0.22481565177440643, "learning_rate": 2.0577959103943173e-06, "loss": 0.0077, "step": 86480 }, { "epoch": 1.4606343094538454, "grad_norm": 0.3661649227142334, "learning_rate": 2.056604452045941e-06, "loss": 0.0088, "step": 86490 }, { "epoch": 1.4608031884351673, "grad_norm": 0.0764295756816864, "learning_rate": 2.0554132494125588e-06, "loss": 0.0082, "step": 86500 }, { "epoch": 1.4609720674164892, "grad_norm": 0.20328937470912933, "learning_rate": 2.0542223025976567e-06, "loss": 0.0053, "step": 86510 }, { "epoch": 1.4611409463978113, "grad_norm": 0.36858123540878296, "learning_rate": 2.0530316117047027e-06, "loss": 0.0096, "step": 86520 }, { "epoch": 1.4613098253791332, "grad_norm": 0.18612708151340485, "learning_rate": 2.051841176837141e-06, "loss": 0.0057, "step": 86530 }, { "epoch": 1.4614787043604553, "grad_norm": 0.19125555455684662, "learning_rate": 2.0506509980983935e-06, "loss": 0.0059, "step": 86540 }, { "epoch": 1.4616475833417772, "grad_norm": 0.26482319831848145, "learning_rate": 2.0494610755918625e-06, "loss": 0.0063, "step": 86550 }, { "epoch": 1.4618164623230991, "grad_norm": 0.2407994270324707, "learning_rate": 2.048271409420921e-06, "loss": 0.0073, "step": 86560 }, { "epoch": 1.4619853413044213, "grad_norm": 0.4383265972137451, "learning_rate": 2.0470819996889252e-06, "loss": 0.0083, "step": 86570 }, { "epoch": 1.4621542202857432, "grad_norm": 0.1709323525428772, "learning_rate": 2.045892846499211e-06, "loss": 0.0072, "step": 86580 }, { "epoch": 1.4623230992670653, "grad_norm": 0.2806861698627472, "learning_rate": 2.0447039499550897e-06, "loss": 0.0074, "step": 86590 }, { "epoch": 1.4624919782483872, "grad_norm": 0.18078120052814484, "learning_rate": 2.043515310159846e-06, "loss": 0.0076, "step": 86600 }, { "epoch": 1.462660857229709, "grad_norm": 0.13237348198890686, "learning_rate": 2.042326927216747e-06, "loss": 0.0073, "step": 86610 }, { "epoch": 1.4628297362110312, "grad_norm": 0.2384171187877655, "learning_rate": 2.041138801229038e-06, "loss": 0.0088, "step": 86620 }, { "epoch": 1.462998615192353, "grad_norm": 0.36708399653434753, "learning_rate": 2.039950932299941e-06, "loss": 0.0077, "step": 86630 }, { "epoch": 1.4631674941736752, "grad_norm": 0.36027035117149353, "learning_rate": 2.0387633205326522e-06, "loss": 0.0061, "step": 86640 }, { "epoch": 1.463336373154997, "grad_norm": 0.1298767626285553, "learning_rate": 2.0375759660303497e-06, "loss": 0.0066, "step": 86650 }, { "epoch": 1.463505252136319, "grad_norm": 0.2647435665130615, "learning_rate": 2.036388868896188e-06, "loss": 0.0113, "step": 86660 }, { "epoch": 1.463674131117641, "grad_norm": 0.427153080701828, "learning_rate": 2.0352020292333017e-06, "loss": 0.0105, "step": 86670 }, { "epoch": 1.463843010098963, "grad_norm": 0.2024463266134262, "learning_rate": 2.034015447144795e-06, "loss": 0.004, "step": 86680 }, { "epoch": 1.4640118890802851, "grad_norm": 0.20248742401599884, "learning_rate": 2.0328291227337595e-06, "loss": 0.0064, "step": 86690 }, { "epoch": 1.464180768061607, "grad_norm": 0.13956010341644287, "learning_rate": 2.031643056103259e-06, "loss": 0.0066, "step": 86700 }, { "epoch": 1.464349647042929, "grad_norm": 0.40430212020874023, "learning_rate": 2.030457247356338e-06, "loss": 0.0049, "step": 86710 }, { "epoch": 1.464518526024251, "grad_norm": 0.17757590115070343, "learning_rate": 2.029271696596014e-06, "loss": 0.0062, "step": 86720 }, { "epoch": 1.464687405005573, "grad_norm": 0.3046104609966278, "learning_rate": 2.0280864039252853e-06, "loss": 0.0075, "step": 86730 }, { "epoch": 1.464856283986895, "grad_norm": 0.16074851155281067, "learning_rate": 2.026901369447127e-06, "loss": 0.0094, "step": 86740 }, { "epoch": 1.465025162968217, "grad_norm": 0.15396477282047272, "learning_rate": 2.0257165932644944e-06, "loss": 0.0067, "step": 86750 }, { "epoch": 1.4651940419495388, "grad_norm": 0.2464066743850708, "learning_rate": 2.0245320754803145e-06, "loss": 0.0061, "step": 86760 }, { "epoch": 1.465362920930861, "grad_norm": 0.2847066819667816, "learning_rate": 2.0233478161974967e-06, "loss": 0.0045, "step": 86770 }, { "epoch": 1.4655317999121829, "grad_norm": 0.12883678078651428, "learning_rate": 2.022163815518927e-06, "loss": 0.005, "step": 86780 }, { "epoch": 1.465700678893505, "grad_norm": 0.2081635445356369, "learning_rate": 2.02098007354747e-06, "loss": 0.0088, "step": 86790 }, { "epoch": 1.4658695578748269, "grad_norm": 0.5118513107299805, "learning_rate": 2.0197965903859606e-06, "loss": 0.01, "step": 86800 }, { "epoch": 1.4660384368561488, "grad_norm": 0.20528168976306915, "learning_rate": 2.0186133661372228e-06, "loss": 0.006, "step": 86810 }, { "epoch": 1.4662073158374709, "grad_norm": 0.24691735208034515, "learning_rate": 2.0174304009040502e-06, "loss": 0.0061, "step": 86820 }, { "epoch": 1.4663761948187928, "grad_norm": 0.13448567688465118, "learning_rate": 2.0162476947892173e-06, "loss": 0.0086, "step": 86830 }, { "epoch": 1.466545073800115, "grad_norm": 0.19359703361988068, "learning_rate": 2.0150652478954725e-06, "loss": 0.0071, "step": 86840 }, { "epoch": 1.4667139527814368, "grad_norm": 0.550370991230011, "learning_rate": 2.013883060325544e-06, "loss": 0.0061, "step": 86850 }, { "epoch": 1.4668828317627587, "grad_norm": 0.20725291967391968, "learning_rate": 2.0127011321821384e-06, "loss": 0.0059, "step": 86860 }, { "epoch": 1.4670517107440808, "grad_norm": 0.20032654702663422, "learning_rate": 2.0115194635679404e-06, "loss": 0.006, "step": 86870 }, { "epoch": 1.4672205897254027, "grad_norm": 0.16212889552116394, "learning_rate": 2.010338054585606e-06, "loss": 0.0065, "step": 86880 }, { "epoch": 1.4673894687067248, "grad_norm": 0.8283079266548157, "learning_rate": 2.0091569053377764e-06, "loss": 0.0075, "step": 86890 }, { "epoch": 1.4675583476880467, "grad_norm": 0.4111917316913605, "learning_rate": 2.0079760159270657e-06, "loss": 0.0101, "step": 86900 }, { "epoch": 1.4677272266693686, "grad_norm": 0.46494558453559875, "learning_rate": 2.0067953864560675e-06, "loss": 0.0073, "step": 86910 }, { "epoch": 1.4678961056506907, "grad_norm": 0.4098507761955261, "learning_rate": 2.0056150170273516e-06, "loss": 0.0085, "step": 86920 }, { "epoch": 1.4680649846320126, "grad_norm": 0.11537700146436691, "learning_rate": 2.0044349077434653e-06, "loss": 0.0075, "step": 86930 }, { "epoch": 1.4682338636133347, "grad_norm": 0.8462586402893066, "learning_rate": 2.0032550587069343e-06, "loss": 0.0072, "step": 86940 }, { "epoch": 1.4684027425946566, "grad_norm": 0.2229105681180954, "learning_rate": 2.0020754700202623e-06, "loss": 0.0084, "step": 86950 }, { "epoch": 1.4685716215759785, "grad_norm": 0.20910656452178955, "learning_rate": 2.000896141785926e-06, "loss": 0.0101, "step": 86960 }, { "epoch": 1.4687405005573007, "grad_norm": 0.22367975115776062, "learning_rate": 1.999717074106383e-06, "loss": 0.0085, "step": 86970 }, { "epoch": 1.4689093795386226, "grad_norm": 0.25158628821372986, "learning_rate": 1.9985382670840697e-06, "loss": 0.0095, "step": 86980 }, { "epoch": 1.4690782585199447, "grad_norm": 0.29177212715148926, "learning_rate": 1.9973597208213968e-06, "loss": 0.0056, "step": 86990 }, { "epoch": 1.4692471375012666, "grad_norm": 0.29595187306404114, "learning_rate": 1.996181435420756e-06, "loss": 0.0085, "step": 87000 }, { "epoch": 1.4694160164825885, "grad_norm": 0.270368754863739, "learning_rate": 1.995003410984509e-06, "loss": 0.0073, "step": 87010 }, { "epoch": 1.4695848954639106, "grad_norm": 0.3377944827079773, "learning_rate": 1.9938256476150025e-06, "loss": 0.0072, "step": 87020 }, { "epoch": 1.4697537744452325, "grad_norm": 0.16606996953487396, "learning_rate": 1.992648145414558e-06, "loss": 0.0096, "step": 87030 }, { "epoch": 1.4699226534265546, "grad_norm": 0.43453630805015564, "learning_rate": 1.991470904485473e-06, "loss": 0.0064, "step": 87040 }, { "epoch": 1.4700915324078765, "grad_norm": 0.09952428191900253, "learning_rate": 1.990293924930024e-06, "loss": 0.0075, "step": 87050 }, { "epoch": 1.4702604113891984, "grad_norm": 0.41392722725868225, "learning_rate": 1.989117206850464e-06, "loss": 0.0102, "step": 87060 }, { "epoch": 1.4704292903705205, "grad_norm": 0.18343304097652435, "learning_rate": 1.9879407503490228e-06, "loss": 0.0119, "step": 87070 }, { "epoch": 1.4705981693518424, "grad_norm": 0.34064340591430664, "learning_rate": 1.9867645555279108e-06, "loss": 0.0044, "step": 87080 }, { "epoch": 1.4707670483331645, "grad_norm": 0.48917093873023987, "learning_rate": 1.985588622489308e-06, "loss": 0.0089, "step": 87090 }, { "epoch": 1.4709359273144864, "grad_norm": 0.16796568036079407, "learning_rate": 1.9844129513353807e-06, "loss": 0.0097, "step": 87100 }, { "epoch": 1.4711048062958083, "grad_norm": 0.1587691754102707, "learning_rate": 1.983237542168266e-06, "loss": 0.0081, "step": 87110 }, { "epoch": 1.4712736852771304, "grad_norm": 0.24275298416614532, "learning_rate": 1.982062395090083e-06, "loss": 0.0086, "step": 87120 }, { "epoch": 1.4714425642584523, "grad_norm": 0.301078200340271, "learning_rate": 1.980887510202923e-06, "loss": 0.0117, "step": 87130 }, { "epoch": 1.4716114432397744, "grad_norm": 0.09248541295528412, "learning_rate": 1.979712887608857e-06, "loss": 0.0072, "step": 87140 }, { "epoch": 1.4717803222210963, "grad_norm": 0.20221517980098724, "learning_rate": 1.9785385274099354e-06, "loss": 0.0145, "step": 87150 }, { "epoch": 1.4719492012024182, "grad_norm": 0.26907631754875183, "learning_rate": 1.977364429708183e-06, "loss": 0.0036, "step": 87160 }, { "epoch": 1.4721180801837404, "grad_norm": 0.4407276213169098, "learning_rate": 1.976190594605602e-06, "loss": 0.0116, "step": 87170 }, { "epoch": 1.4722869591650622, "grad_norm": 0.3046872913837433, "learning_rate": 1.9750170222041725e-06, "loss": 0.0052, "step": 87180 }, { "epoch": 1.4724558381463844, "grad_norm": 0.22581836581230164, "learning_rate": 1.973843712605852e-06, "loss": 0.0109, "step": 87190 }, { "epoch": 1.4726247171277063, "grad_norm": 0.2556800842285156, "learning_rate": 1.9726706659125768e-06, "loss": 0.0055, "step": 87200 }, { "epoch": 1.4727935961090282, "grad_norm": 0.19990003108978271, "learning_rate": 1.9714978822262533e-06, "loss": 0.0057, "step": 87210 }, { "epoch": 1.4729624750903503, "grad_norm": 0.09360221028327942, "learning_rate": 1.9703253616487734e-06, "loss": 0.0063, "step": 87220 }, { "epoch": 1.4731313540716722, "grad_norm": 0.23730474710464478, "learning_rate": 1.969153104282002e-06, "loss": 0.0105, "step": 87230 }, { "epoch": 1.4733002330529943, "grad_norm": 0.15439178049564362, "learning_rate": 1.967981110227784e-06, "loss": 0.0081, "step": 87240 }, { "epoch": 1.4734691120343162, "grad_norm": 0.22379504144191742, "learning_rate": 1.9668093795879355e-06, "loss": 0.0081, "step": 87250 }, { "epoch": 1.473637991015638, "grad_norm": 0.386837363243103, "learning_rate": 1.965637912464257e-06, "loss": 0.0094, "step": 87260 }, { "epoch": 1.4738068699969602, "grad_norm": 0.06871764361858368, "learning_rate": 1.9644667089585194e-06, "loss": 0.0049, "step": 87270 }, { "epoch": 1.473975748978282, "grad_norm": 0.3502276539802551, "learning_rate": 1.9632957691724776e-06, "loss": 0.0112, "step": 87280 }, { "epoch": 1.4741446279596042, "grad_norm": 0.4738439619541168, "learning_rate": 1.9621250932078573e-06, "loss": 0.0109, "step": 87290 }, { "epoch": 1.474313506940926, "grad_norm": 0.19938713312149048, "learning_rate": 1.960954681166365e-06, "loss": 0.0065, "step": 87300 }, { "epoch": 1.474482385922248, "grad_norm": 0.2188456654548645, "learning_rate": 1.9597845331496834e-06, "loss": 0.0059, "step": 87310 }, { "epoch": 1.4746512649035701, "grad_norm": 0.5428177714347839, "learning_rate": 1.9586146492594736e-06, "loss": 0.0098, "step": 87320 }, { "epoch": 1.474820143884892, "grad_norm": 0.24404583871364594, "learning_rate": 1.957445029597369e-06, "loss": 0.0061, "step": 87330 }, { "epoch": 1.4749890228662141, "grad_norm": 0.35927894711494446, "learning_rate": 1.9562756742649852e-06, "loss": 0.007, "step": 87340 }, { "epoch": 1.475157901847536, "grad_norm": 0.1827738732099533, "learning_rate": 1.9551065833639116e-06, "loss": 0.011, "step": 87350 }, { "epoch": 1.475326780828858, "grad_norm": 0.3412972390651703, "learning_rate": 1.953937756995719e-06, "loss": 0.005, "step": 87360 }, { "epoch": 1.47549565981018, "grad_norm": 0.17859044671058655, "learning_rate": 1.9527691952619487e-06, "loss": 0.0059, "step": 87370 }, { "epoch": 1.475664538791502, "grad_norm": 0.11227138340473175, "learning_rate": 1.9516008982641237e-06, "loss": 0.0058, "step": 87380 }, { "epoch": 1.475833417772824, "grad_norm": 0.2920042872428894, "learning_rate": 1.950432866103743e-06, "loss": 0.0081, "step": 87390 }, { "epoch": 1.476002296754146, "grad_norm": 0.16088692843914032, "learning_rate": 1.949265098882282e-06, "loss": 0.0093, "step": 87400 }, { "epoch": 1.4761711757354679, "grad_norm": 0.2230256199836731, "learning_rate": 1.948097596701194e-06, "loss": 0.0092, "step": 87410 }, { "epoch": 1.47634005471679, "grad_norm": 0.1835094839334488, "learning_rate": 1.9469303596619085e-06, "loss": 0.0084, "step": 87420 }, { "epoch": 1.4765089336981119, "grad_norm": 0.2013857662677765, "learning_rate": 1.9457633878658323e-06, "loss": 0.0052, "step": 87430 }, { "epoch": 1.476677812679434, "grad_norm": 0.17607730627059937, "learning_rate": 1.9445966814143487e-06, "loss": 0.0052, "step": 87440 }, { "epoch": 1.4768466916607559, "grad_norm": 0.13387832045555115, "learning_rate": 1.94343024040882e-06, "loss": 0.0067, "step": 87450 }, { "epoch": 1.4770155706420778, "grad_norm": 0.10880474746227264, "learning_rate": 1.9422640649505807e-06, "loss": 0.0092, "step": 87460 }, { "epoch": 1.4771844496234, "grad_norm": 0.2027139514684677, "learning_rate": 1.9410981551409473e-06, "loss": 0.0038, "step": 87470 }, { "epoch": 1.4773533286047218, "grad_norm": 0.09702357649803162, "learning_rate": 1.9399325110812107e-06, "loss": 0.0052, "step": 87480 }, { "epoch": 1.477522207586044, "grad_norm": 0.0389745868742466, "learning_rate": 1.93876713287264e-06, "loss": 0.0064, "step": 87490 }, { "epoch": 1.4776910865673658, "grad_norm": 0.2238345742225647, "learning_rate": 1.937602020616479e-06, "loss": 0.0064, "step": 87500 }, { "epoch": 1.4778599655486877, "grad_norm": 0.3479391634464264, "learning_rate": 1.93643717441395e-06, "loss": 0.006, "step": 87510 }, { "epoch": 1.4780288445300098, "grad_norm": 1.013633370399475, "learning_rate": 1.9352725943662526e-06, "loss": 0.0095, "step": 87520 }, { "epoch": 1.4781977235113317, "grad_norm": 0.13898257911205292, "learning_rate": 1.934108280574562e-06, "loss": 0.0086, "step": 87530 }, { "epoch": 1.4783666024926538, "grad_norm": 0.21604666113853455, "learning_rate": 1.9329442331400315e-06, "loss": 0.0053, "step": 87540 }, { "epoch": 1.4785354814739757, "grad_norm": 0.06308116018772125, "learning_rate": 1.9317804521637913e-06, "loss": 0.0097, "step": 87550 }, { "epoch": 1.4787043604552976, "grad_norm": 0.26699548959732056, "learning_rate": 1.9306169377469464e-06, "loss": 0.0063, "step": 87560 }, { "epoch": 1.4788732394366197, "grad_norm": 0.2704387903213501, "learning_rate": 1.929453689990583e-06, "loss": 0.0087, "step": 87570 }, { "epoch": 1.4790421184179416, "grad_norm": 0.2568197250366211, "learning_rate": 1.928290708995757e-06, "loss": 0.0092, "step": 87580 }, { "epoch": 1.4792109973992638, "grad_norm": 0.17344413697719574, "learning_rate": 1.9271279948635076e-06, "loss": 0.0082, "step": 87590 }, { "epoch": 1.4793798763805857, "grad_norm": 0.24647995829582214, "learning_rate": 1.9259655476948473e-06, "loss": 0.0065, "step": 87600 }, { "epoch": 1.4795487553619076, "grad_norm": 0.22275058925151825, "learning_rate": 1.9248033675907706e-06, "loss": 0.0061, "step": 87610 }, { "epoch": 1.4797176343432297, "grad_norm": 0.42059993743896484, "learning_rate": 1.92364145465224e-06, "loss": 0.0063, "step": 87620 }, { "epoch": 1.4798865133245516, "grad_norm": 0.20431801676750183, "learning_rate": 1.9224798089802016e-06, "loss": 0.0067, "step": 87630 }, { "epoch": 1.4800553923058737, "grad_norm": 0.26756197214126587, "learning_rate": 1.921318430675576e-06, "loss": 0.0158, "step": 87640 }, { "epoch": 1.4802242712871956, "grad_norm": 0.46046173572540283, "learning_rate": 1.9201573198392616e-06, "loss": 0.0039, "step": 87650 }, { "epoch": 1.4803931502685175, "grad_norm": 0.17454586923122406, "learning_rate": 1.918996476572132e-06, "loss": 0.0061, "step": 87660 }, { "epoch": 1.4805620292498396, "grad_norm": 0.11722637712955475, "learning_rate": 1.917835900975039e-06, "loss": 0.0062, "step": 87670 }, { "epoch": 1.4807309082311615, "grad_norm": 0.2964455187320709, "learning_rate": 1.9166755931488106e-06, "loss": 0.0068, "step": 87680 }, { "epoch": 1.4808997872124836, "grad_norm": 0.36382216215133667, "learning_rate": 1.915515553194253e-06, "loss": 0.0119, "step": 87690 }, { "epoch": 1.4810686661938055, "grad_norm": 0.25444090366363525, "learning_rate": 1.9143557812121443e-06, "loss": 0.0091, "step": 87700 }, { "epoch": 1.4812375451751274, "grad_norm": 0.4529168903827667, "learning_rate": 1.9131962773032447e-06, "loss": 0.0063, "step": 87710 }, { "epoch": 1.4814064241564495, "grad_norm": 0.06974531710147858, "learning_rate": 1.912037041568288e-06, "loss": 0.0087, "step": 87720 }, { "epoch": 1.4815753031377714, "grad_norm": 0.3715318739414215, "learning_rate": 1.910878074107989e-06, "loss": 0.0092, "step": 87730 }, { "epoch": 1.4817441821190935, "grad_norm": 0.15400859713554382, "learning_rate": 1.9097193750230314e-06, "loss": 0.0077, "step": 87740 }, { "epoch": 1.4819130611004154, "grad_norm": 0.26867595314979553, "learning_rate": 1.908560944414082e-06, "loss": 0.0059, "step": 87750 }, { "epoch": 1.4820819400817373, "grad_norm": 0.3710693418979645, "learning_rate": 1.9074027823817826e-06, "loss": 0.0074, "step": 87760 }, { "epoch": 1.4822508190630594, "grad_norm": 0.2173626720905304, "learning_rate": 1.9062448890267516e-06, "loss": 0.0089, "step": 87770 }, { "epoch": 1.4824196980443813, "grad_norm": 0.3025785982608795, "learning_rate": 1.9050872644495839e-06, "loss": 0.0104, "step": 87780 }, { "epoch": 1.4825885770257035, "grad_norm": 0.22543185949325562, "learning_rate": 1.9039299087508506e-06, "loss": 0.0067, "step": 87790 }, { "epoch": 1.4827574560070254, "grad_norm": 0.04709796607494354, "learning_rate": 1.9027728220311004e-06, "loss": 0.0108, "step": 87800 }, { "epoch": 1.4829263349883473, "grad_norm": 0.3018815219402313, "learning_rate": 1.9016160043908593e-06, "loss": 0.0077, "step": 87810 }, { "epoch": 1.4830952139696694, "grad_norm": 0.05038442835211754, "learning_rate": 1.9004594559306267e-06, "loss": 0.0105, "step": 87820 }, { "epoch": 1.4832640929509913, "grad_norm": 0.10266929119825363, "learning_rate": 1.8993031767508812e-06, "loss": 0.0062, "step": 87830 }, { "epoch": 1.4834329719323134, "grad_norm": 0.35087546706199646, "learning_rate": 1.8981471669520778e-06, "loss": 0.0076, "step": 87840 }, { "epoch": 1.4836018509136353, "grad_norm": 0.14313426613807678, "learning_rate": 1.8969914266346494e-06, "loss": 0.0066, "step": 87850 }, { "epoch": 1.4837707298949572, "grad_norm": 0.2540971040725708, "learning_rate": 1.8958359558990008e-06, "loss": 0.0045, "step": 87860 }, { "epoch": 1.4839396088762793, "grad_norm": 0.2523602247238159, "learning_rate": 1.8946807548455177e-06, "loss": 0.0096, "step": 87870 }, { "epoch": 1.4841084878576012, "grad_norm": 0.27401843667030334, "learning_rate": 1.8935258235745619e-06, "loss": 0.0057, "step": 87880 }, { "epoch": 1.4842773668389233, "grad_norm": 0.04169837012887001, "learning_rate": 1.89237116218647e-06, "loss": 0.0091, "step": 87890 }, { "epoch": 1.4844462458202452, "grad_norm": 0.3726212680339813, "learning_rate": 1.8912167707815571e-06, "loss": 0.0069, "step": 87900 }, { "epoch": 1.484615124801567, "grad_norm": 0.21047581732273102, "learning_rate": 1.8900626494601132e-06, "loss": 0.0102, "step": 87910 }, { "epoch": 1.4847840037828892, "grad_norm": 0.4719408452510834, "learning_rate": 1.888908798322406e-06, "loss": 0.0082, "step": 87920 }, { "epoch": 1.4849528827642111, "grad_norm": 0.1554892361164093, "learning_rate": 1.8877552174686792e-06, "loss": 0.0076, "step": 87930 }, { "epoch": 1.4851217617455332, "grad_norm": 0.1306590586900711, "learning_rate": 1.8866019069991543e-06, "loss": 0.0091, "step": 87940 }, { "epoch": 1.4852906407268551, "grad_norm": 0.21668247878551483, "learning_rate": 1.8854488670140253e-06, "loss": 0.0076, "step": 87950 }, { "epoch": 1.485459519708177, "grad_norm": 0.408243864774704, "learning_rate": 1.8842960976134666e-06, "loss": 0.0067, "step": 87960 }, { "epoch": 1.4856283986894991, "grad_norm": 0.31879377365112305, "learning_rate": 1.8831435988976281e-06, "loss": 0.0054, "step": 87970 }, { "epoch": 1.485797277670821, "grad_norm": 0.21656621992588043, "learning_rate": 1.8819913709666388e-06, "loss": 0.0052, "step": 87980 }, { "epoch": 1.4859661566521432, "grad_norm": 0.2913704812526703, "learning_rate": 1.8808394139205965e-06, "loss": 0.0063, "step": 87990 }, { "epoch": 1.486135035633465, "grad_norm": 0.27065351605415344, "learning_rate": 1.8796877278595833e-06, "loss": 0.007, "step": 88000 }, { "epoch": 1.486303914614787, "grad_norm": 0.28466182947158813, "learning_rate": 1.8785363128836537e-06, "loss": 0.0076, "step": 88010 }, { "epoch": 1.486472793596109, "grad_norm": 0.16388511657714844, "learning_rate": 1.8773851690928406e-06, "loss": 0.0042, "step": 88020 }, { "epoch": 1.486641672577431, "grad_norm": 0.13069675862789154, "learning_rate": 1.8762342965871533e-06, "loss": 0.0065, "step": 88030 }, { "epoch": 1.486810551558753, "grad_norm": 0.1733010709285736, "learning_rate": 1.8750836954665753e-06, "loss": 0.0079, "step": 88040 }, { "epoch": 1.486979430540075, "grad_norm": 0.2471299022436142, "learning_rate": 1.8739333658310683e-06, "loss": 0.0064, "step": 88050 }, { "epoch": 1.4871483095213969, "grad_norm": 0.3643784821033478, "learning_rate": 1.8727833077805728e-06, "loss": 0.0048, "step": 88060 }, { "epoch": 1.487317188502719, "grad_norm": 0.14548259973526, "learning_rate": 1.8716335214149982e-06, "loss": 0.0079, "step": 88070 }, { "epoch": 1.4874860674840409, "grad_norm": 0.18284961581230164, "learning_rate": 1.8704840068342384e-06, "loss": 0.0058, "step": 88080 }, { "epoch": 1.487654946465363, "grad_norm": 0.1493273377418518, "learning_rate": 1.8693347641381593e-06, "loss": 0.005, "step": 88090 }, { "epoch": 1.487823825446685, "grad_norm": 0.15319882333278656, "learning_rate": 1.8681857934266062e-06, "loss": 0.0058, "step": 88100 }, { "epoch": 1.4879927044280068, "grad_norm": 0.3102925419807434, "learning_rate": 1.8670370947993954e-06, "loss": 0.0058, "step": 88110 }, { "epoch": 1.488161583409329, "grad_norm": 0.3431660830974579, "learning_rate": 1.8658886683563253e-06, "loss": 0.0098, "step": 88120 }, { "epoch": 1.4883304623906508, "grad_norm": 0.06928624212741852, "learning_rate": 1.8647405141971676e-06, "loss": 0.007, "step": 88130 }, { "epoch": 1.488499341371973, "grad_norm": 0.18130570650100708, "learning_rate": 1.8635926324216714e-06, "loss": 0.0064, "step": 88140 }, { "epoch": 1.4886682203532948, "grad_norm": 0.22806915640830994, "learning_rate": 1.862445023129562e-06, "loss": 0.0088, "step": 88150 }, { "epoch": 1.4888370993346167, "grad_norm": 0.29101476073265076, "learning_rate": 1.8612976864205412e-06, "loss": 0.0069, "step": 88160 }, { "epoch": 1.4890059783159388, "grad_norm": 0.18309643864631653, "learning_rate": 1.860150622394286e-06, "loss": 0.0057, "step": 88170 }, { "epoch": 1.4891748572972607, "grad_norm": 0.39195001125335693, "learning_rate": 1.8590038311504527e-06, "loss": 0.0074, "step": 88180 }, { "epoch": 1.4893437362785829, "grad_norm": 0.1368594616651535, "learning_rate": 1.8578573127886683e-06, "loss": 0.0064, "step": 88190 }, { "epoch": 1.4895126152599047, "grad_norm": 0.11136768758296967, "learning_rate": 1.8567110674085409e-06, "loss": 0.0047, "step": 88200 }, { "epoch": 1.4896814942412266, "grad_norm": 0.14666160941123962, "learning_rate": 1.8555650951096543e-06, "loss": 0.0074, "step": 88210 }, { "epoch": 1.4898503732225488, "grad_norm": 0.16055810451507568, "learning_rate": 1.8544193959915685e-06, "loss": 0.0049, "step": 88220 }, { "epoch": 1.4900192522038707, "grad_norm": 0.3634088635444641, "learning_rate": 1.853273970153816e-06, "loss": 0.0085, "step": 88230 }, { "epoch": 1.4901881311851928, "grad_norm": 0.5173183083534241, "learning_rate": 1.85212881769591e-06, "loss": 0.0083, "step": 88240 }, { "epoch": 1.4903570101665147, "grad_norm": 0.13324463367462158, "learning_rate": 1.8509839387173394e-06, "loss": 0.0036, "step": 88250 }, { "epoch": 1.4905258891478366, "grad_norm": 0.2769422233104706, "learning_rate": 1.849839333317568e-06, "loss": 0.0117, "step": 88260 }, { "epoch": 1.4906947681291587, "grad_norm": 0.21500970423221588, "learning_rate": 1.8486950015960359e-06, "loss": 0.0081, "step": 88270 }, { "epoch": 1.4908636471104806, "grad_norm": 0.2671213150024414, "learning_rate": 1.84755094365216e-06, "loss": 0.0068, "step": 88280 }, { "epoch": 1.4910325260918027, "grad_norm": 0.3659992814064026, "learning_rate": 1.846407159585334e-06, "loss": 0.0116, "step": 88290 }, { "epoch": 1.4912014050731246, "grad_norm": 0.26213446259498596, "learning_rate": 1.8452636494949278e-06, "loss": 0.0108, "step": 88300 }, { "epoch": 1.4913702840544465, "grad_norm": 0.2274366021156311, "learning_rate": 1.8441204134802837e-06, "loss": 0.0044, "step": 88310 }, { "epoch": 1.4915391630357686, "grad_norm": 0.28643083572387695, "learning_rate": 1.842977451640725e-06, "loss": 0.0081, "step": 88320 }, { "epoch": 1.4917080420170905, "grad_norm": 0.16780468821525574, "learning_rate": 1.8418347640755496e-06, "loss": 0.0071, "step": 88330 }, { "epoch": 1.4918769209984126, "grad_norm": 0.25860872864723206, "learning_rate": 1.8406923508840313e-06, "loss": 0.0087, "step": 88340 }, { "epoch": 1.4920457999797345, "grad_norm": 0.366657555103302, "learning_rate": 1.8395502121654219e-06, "loss": 0.0067, "step": 88350 }, { "epoch": 1.4922146789610564, "grad_norm": 0.25161322951316833, "learning_rate": 1.8384083480189435e-06, "loss": 0.006, "step": 88360 }, { "epoch": 1.4923835579423785, "grad_norm": 0.3273131251335144, "learning_rate": 1.837266758543801e-06, "loss": 0.0064, "step": 88370 }, { "epoch": 1.4925524369237004, "grad_norm": 0.22828130424022675, "learning_rate": 1.8361254438391712e-06, "loss": 0.0067, "step": 88380 }, { "epoch": 1.4927213159050225, "grad_norm": 0.22408895194530487, "learning_rate": 1.8349844040042131e-06, "loss": 0.0116, "step": 88390 }, { "epoch": 1.4928901948863444, "grad_norm": 0.3370923399925232, "learning_rate": 1.8338436391380532e-06, "loss": 0.0082, "step": 88400 }, { "epoch": 1.4930590738676663, "grad_norm": 0.5352900624275208, "learning_rate": 1.8327031493397995e-06, "loss": 0.0072, "step": 88410 }, { "epoch": 1.4932279528489885, "grad_norm": 0.2389545440673828, "learning_rate": 1.831562934708535e-06, "loss": 0.0069, "step": 88420 }, { "epoch": 1.4933968318303104, "grad_norm": 0.05108720809221268, "learning_rate": 1.8304229953433206e-06, "loss": 0.0047, "step": 88430 }, { "epoch": 1.4935657108116325, "grad_norm": 0.27742424607276917, "learning_rate": 1.8292833313431885e-06, "loss": 0.0111, "step": 88440 }, { "epoch": 1.4937345897929544, "grad_norm": 0.04015423357486725, "learning_rate": 1.8281439428071507e-06, "loss": 0.0052, "step": 88450 }, { "epoch": 1.4939034687742763, "grad_norm": 0.37881192564964294, "learning_rate": 1.8270048298341948e-06, "loss": 0.0086, "step": 88460 }, { "epoch": 1.4940723477555984, "grad_norm": 0.1822461485862732, "learning_rate": 1.8258659925232858e-06, "loss": 0.0054, "step": 88470 }, { "epoch": 1.4942412267369203, "grad_norm": 0.10812842845916748, "learning_rate": 1.8247274309733603e-06, "loss": 0.0057, "step": 88480 }, { "epoch": 1.4944101057182424, "grad_norm": 0.3843798041343689, "learning_rate": 1.8235891452833333e-06, "loss": 0.0105, "step": 88490 }, { "epoch": 1.4945789846995643, "grad_norm": 0.1384495496749878, "learning_rate": 1.8224511355520997e-06, "loss": 0.0043, "step": 88500 }, { "epoch": 1.4947478636808862, "grad_norm": 0.18348540365695953, "learning_rate": 1.8213134018785267e-06, "loss": 0.0074, "step": 88510 }, { "epoch": 1.4949167426622083, "grad_norm": 0.31505024433135986, "learning_rate": 1.8201759443614547e-06, "loss": 0.0092, "step": 88520 }, { "epoch": 1.4950856216435302, "grad_norm": 0.23237060010433197, "learning_rate": 1.8190387630997046e-06, "loss": 0.0057, "step": 88530 }, { "epoch": 1.4952545006248523, "grad_norm": 0.2817801237106323, "learning_rate": 1.8179018581920727e-06, "loss": 0.0049, "step": 88540 }, { "epoch": 1.4954233796061742, "grad_norm": 0.2298104166984558, "learning_rate": 1.8167652297373317e-06, "loss": 0.0076, "step": 88550 }, { "epoch": 1.4955922585874961, "grad_norm": 0.15074612200260162, "learning_rate": 1.8156288778342257e-06, "loss": 0.006, "step": 88560 }, { "epoch": 1.4957611375688182, "grad_norm": 0.1623050719499588, "learning_rate": 1.8144928025814795e-06, "loss": 0.0077, "step": 88570 }, { "epoch": 1.4959300165501401, "grad_norm": 0.2273925393819809, "learning_rate": 1.8133570040777932e-06, "loss": 0.0081, "step": 88580 }, { "epoch": 1.4960988955314622, "grad_norm": 0.11194604635238647, "learning_rate": 1.8122214824218438e-06, "loss": 0.0083, "step": 88590 }, { "epoch": 1.4962677745127841, "grad_norm": 0.193846195936203, "learning_rate": 1.811086237712279e-06, "loss": 0.0104, "step": 88600 }, { "epoch": 1.496436653494106, "grad_norm": 0.24978336691856384, "learning_rate": 1.8099512700477257e-06, "loss": 0.0062, "step": 88610 }, { "epoch": 1.4966055324754282, "grad_norm": 0.19333668053150177, "learning_rate": 1.8088165795267914e-06, "loss": 0.0072, "step": 88620 }, { "epoch": 1.49677441145675, "grad_norm": 0.25668442249298096, "learning_rate": 1.8076821662480548e-06, "loss": 0.0053, "step": 88630 }, { "epoch": 1.4969432904380722, "grad_norm": 0.6678081750869751, "learning_rate": 1.8065480303100674e-06, "loss": 0.0054, "step": 88640 }, { "epoch": 1.497112169419394, "grad_norm": 0.2525957226753235, "learning_rate": 1.8054141718113616e-06, "loss": 0.0149, "step": 88650 }, { "epoch": 1.497281048400716, "grad_norm": 0.2711910605430603, "learning_rate": 1.8042805908504451e-06, "loss": 0.0073, "step": 88660 }, { "epoch": 1.497449927382038, "grad_norm": 0.24841688573360443, "learning_rate": 1.8031472875258017e-06, "loss": 0.005, "step": 88670 }, { "epoch": 1.49761880636336, "grad_norm": 0.2307833433151245, "learning_rate": 1.8020142619358872e-06, "loss": 0.0071, "step": 88680 }, { "epoch": 1.497787685344682, "grad_norm": 0.3859976530075073, "learning_rate": 1.8008815141791363e-06, "loss": 0.0046, "step": 88690 }, { "epoch": 1.497956564326004, "grad_norm": 0.10798486322164536, "learning_rate": 1.7997490443539612e-06, "loss": 0.0076, "step": 88700 }, { "epoch": 1.4981254433073259, "grad_norm": 0.2126142680644989, "learning_rate": 1.7986168525587488e-06, "loss": 0.0091, "step": 88710 }, { "epoch": 1.498294322288648, "grad_norm": 0.15279050171375275, "learning_rate": 1.797484938891858e-06, "loss": 0.009, "step": 88720 }, { "epoch": 1.49846320126997, "grad_norm": 0.41479527950286865, "learning_rate": 1.7963533034516267e-06, "loss": 0.0069, "step": 88730 }, { "epoch": 1.498632080251292, "grad_norm": 0.22528377175331116, "learning_rate": 1.7952219463363724e-06, "loss": 0.0099, "step": 88740 }, { "epoch": 1.498800959232614, "grad_norm": 0.1634819507598877, "learning_rate": 1.7940908676443836e-06, "loss": 0.0058, "step": 88750 }, { "epoch": 1.4989698382139358, "grad_norm": 0.22164030373096466, "learning_rate": 1.7929600674739229e-06, "loss": 0.0052, "step": 88760 }, { "epoch": 1.499138717195258, "grad_norm": 0.1410595178604126, "learning_rate": 1.7918295459232332e-06, "loss": 0.0039, "step": 88770 }, { "epoch": 1.4993075961765798, "grad_norm": 0.3489946126937866, "learning_rate": 1.7906993030905313e-06, "loss": 0.0079, "step": 88780 }, { "epoch": 1.499476475157902, "grad_norm": 0.2268928736448288, "learning_rate": 1.78956933907401e-06, "loss": 0.01, "step": 88790 }, { "epoch": 1.4996453541392238, "grad_norm": 0.39085468649864197, "learning_rate": 1.788439653971839e-06, "loss": 0.0116, "step": 88800 }, { "epoch": 1.4998142331205457, "grad_norm": 0.44152581691741943, "learning_rate": 1.7873102478821603e-06, "loss": 0.0054, "step": 88810 }, { "epoch": 1.4999831121018679, "grad_norm": 0.2643633782863617, "learning_rate": 1.786181120903095e-06, "loss": 0.0104, "step": 88820 }, { "epoch": 1.5001519910831898, "grad_norm": 0.134572371840477, "learning_rate": 1.7850522731327386e-06, "loss": 0.0049, "step": 88830 }, { "epoch": 1.5003208700645119, "grad_norm": 0.23937295377254486, "learning_rate": 1.7839237046691626e-06, "loss": 0.0083, "step": 88840 }, { "epoch": 1.5004897490458338, "grad_norm": 0.28585126996040344, "learning_rate": 1.7827954156104148e-06, "loss": 0.0134, "step": 88850 }, { "epoch": 1.5006586280271557, "grad_norm": 0.4034074544906616, "learning_rate": 1.781667406054518e-06, "loss": 0.0071, "step": 88860 }, { "epoch": 1.5008275070084778, "grad_norm": 0.09146541357040405, "learning_rate": 1.780539676099471e-06, "loss": 0.0044, "step": 88870 }, { "epoch": 1.5009963859897997, "grad_norm": 0.1615159809589386, "learning_rate": 1.7794122258432495e-06, "loss": 0.0081, "step": 88880 }, { "epoch": 1.5011652649711218, "grad_norm": 0.16472579538822174, "learning_rate": 1.7782850553838005e-06, "loss": 0.0061, "step": 88890 }, { "epoch": 1.5013341439524437, "grad_norm": 0.10639221221208572, "learning_rate": 1.7771581648190512e-06, "loss": 0.0059, "step": 88900 }, { "epoch": 1.5015030229337656, "grad_norm": 0.3725208640098572, "learning_rate": 1.7760315542469037e-06, "loss": 0.0068, "step": 88910 }, { "epoch": 1.5016719019150877, "grad_norm": 0.3240465521812439, "learning_rate": 1.7749052237652364e-06, "loss": 0.008, "step": 88920 }, { "epoch": 1.5018407808964096, "grad_norm": 0.35837918519973755, "learning_rate": 1.773779173471899e-06, "loss": 0.0089, "step": 88930 }, { "epoch": 1.5020096598777317, "grad_norm": 0.7293274998664856, "learning_rate": 1.7726534034647213e-06, "loss": 0.0168, "step": 88940 }, { "epoch": 1.5021785388590536, "grad_norm": 0.3005536198616028, "learning_rate": 1.7715279138415082e-06, "loss": 0.0069, "step": 88950 }, { "epoch": 1.5023474178403755, "grad_norm": 0.26688921451568604, "learning_rate": 1.7704027047000387e-06, "loss": 0.0051, "step": 88960 }, { "epoch": 1.5025162968216974, "grad_norm": 0.4211863875389099, "learning_rate": 1.7692777761380687e-06, "loss": 0.006, "step": 88970 }, { "epoch": 1.5026851758030195, "grad_norm": 0.23388855159282684, "learning_rate": 1.7681531282533288e-06, "loss": 0.006, "step": 88980 }, { "epoch": 1.5028540547843416, "grad_norm": 0.42264172434806824, "learning_rate": 1.7670287611435255e-06, "loss": 0.0121, "step": 88990 }, { "epoch": 1.5030229337656635, "grad_norm": 0.2387513667345047, "learning_rate": 1.7659046749063436e-06, "loss": 0.0072, "step": 89000 }, { "epoch": 1.5031918127469854, "grad_norm": 0.22922074794769287, "learning_rate": 1.7647808696394374e-06, "loss": 0.0125, "step": 89010 }, { "epoch": 1.5033606917283073, "grad_norm": 0.25591838359832764, "learning_rate": 1.763657345440441e-06, "loss": 0.0101, "step": 89020 }, { "epoch": 1.5035295707096294, "grad_norm": 0.178465336561203, "learning_rate": 1.7625341024069647e-06, "loss": 0.0048, "step": 89030 }, { "epoch": 1.5036984496909516, "grad_norm": 0.17332495748996735, "learning_rate": 1.7614111406365941e-06, "loss": 0.0056, "step": 89040 }, { "epoch": 1.5038673286722735, "grad_norm": 0.2787199020385742, "learning_rate": 1.7602884602268866e-06, "loss": 0.0052, "step": 89050 }, { "epoch": 1.5040362076535954, "grad_norm": 0.24954847991466522, "learning_rate": 1.7591660612753785e-06, "loss": 0.0059, "step": 89060 }, { "epoch": 1.5042050866349173, "grad_norm": 0.47474566102027893, "learning_rate": 1.758043943879582e-06, "loss": 0.0085, "step": 89070 }, { "epoch": 1.5043739656162394, "grad_norm": 0.15348511934280396, "learning_rate": 1.7569221081369837e-06, "loss": 0.0104, "step": 89080 }, { "epoch": 1.5045428445975615, "grad_norm": 0.2857603430747986, "learning_rate": 1.7558005541450456e-06, "loss": 0.006, "step": 89090 }, { "epoch": 1.5047117235788834, "grad_norm": 0.2024184614419937, "learning_rate": 1.7546792820012065e-06, "loss": 0.0067, "step": 89100 }, { "epoch": 1.5048806025602053, "grad_norm": 0.3182826042175293, "learning_rate": 1.7535582918028786e-06, "loss": 0.0085, "step": 89110 }, { "epoch": 1.5050494815415272, "grad_norm": 0.2948496341705322, "learning_rate": 1.7524375836474527e-06, "loss": 0.0049, "step": 89120 }, { "epoch": 1.5052183605228493, "grad_norm": 0.22993867099285126, "learning_rate": 1.7513171576322908e-06, "loss": 0.0063, "step": 89130 }, { "epoch": 1.5053872395041714, "grad_norm": 0.33370473980903625, "learning_rate": 1.7501970138547336e-06, "loss": 0.0069, "step": 89140 }, { "epoch": 1.5055561184854933, "grad_norm": 0.11914637684822083, "learning_rate": 1.7490771524120964e-06, "loss": 0.0082, "step": 89150 }, { "epoch": 1.5057249974668152, "grad_norm": 0.20572149753570557, "learning_rate": 1.7479575734016719e-06, "loss": 0.0082, "step": 89160 }, { "epoch": 1.505893876448137, "grad_norm": 0.3264540731906891, "learning_rate": 1.7468382769207232e-06, "loss": 0.0061, "step": 89170 }, { "epoch": 1.5060627554294592, "grad_norm": 0.628608763217926, "learning_rate": 1.7457192630664931e-06, "loss": 0.0105, "step": 89180 }, { "epoch": 1.5062316344107813, "grad_norm": 0.32254546880722046, "learning_rate": 1.7446005319361998e-06, "loss": 0.0119, "step": 89190 }, { "epoch": 1.5064005133921032, "grad_norm": 0.22812379896640778, "learning_rate": 1.743482083627035e-06, "loss": 0.0062, "step": 89200 }, { "epoch": 1.5065693923734251, "grad_norm": 0.243082195520401, "learning_rate": 1.7423639182361674e-06, "loss": 0.0097, "step": 89210 }, { "epoch": 1.506738271354747, "grad_norm": 0.2268172651529312, "learning_rate": 1.7412460358607396e-06, "loss": 0.0134, "step": 89220 }, { "epoch": 1.5069071503360691, "grad_norm": 0.2283640205860138, "learning_rate": 1.7401284365978715e-06, "loss": 0.0089, "step": 89230 }, { "epoch": 1.5070760293173913, "grad_norm": 0.20169052481651306, "learning_rate": 1.739011120544657e-06, "loss": 0.006, "step": 89240 }, { "epoch": 1.5072449082987132, "grad_norm": 0.2912038564682007, "learning_rate": 1.737894087798167e-06, "loss": 0.0088, "step": 89250 }, { "epoch": 1.507413787280035, "grad_norm": 0.16802236437797546, "learning_rate": 1.7367773384554437e-06, "loss": 0.0057, "step": 89260 }, { "epoch": 1.507582666261357, "grad_norm": 0.2389748990535736, "learning_rate": 1.7356608726135093e-06, "loss": 0.0053, "step": 89270 }, { "epoch": 1.507751545242679, "grad_norm": 0.37445753812789917, "learning_rate": 1.734544690369359e-06, "loss": 0.0092, "step": 89280 }, { "epoch": 1.5079204242240012, "grad_norm": 0.2598535418510437, "learning_rate": 1.7334287918199666e-06, "loss": 0.0057, "step": 89290 }, { "epoch": 1.508089303205323, "grad_norm": 0.20913076400756836, "learning_rate": 1.7323131770622742e-06, "loss": 0.0084, "step": 89300 }, { "epoch": 1.508258182186645, "grad_norm": 0.2708488702774048, "learning_rate": 1.7311978461932066e-06, "loss": 0.0081, "step": 89310 }, { "epoch": 1.5084270611679669, "grad_norm": 0.2540355622768402, "learning_rate": 1.7300827993096597e-06, "loss": 0.0046, "step": 89320 }, { "epoch": 1.508595940149289, "grad_norm": 0.33465275168418884, "learning_rate": 1.728968036508507e-06, "loss": 0.0061, "step": 89330 }, { "epoch": 1.5087648191306111, "grad_norm": 0.28092068433761597, "learning_rate": 1.7278535578865962e-06, "loss": 0.0091, "step": 89340 }, { "epoch": 1.508933698111933, "grad_norm": 0.2978143095970154, "learning_rate": 1.7267393635407504e-06, "loss": 0.0063, "step": 89350 }, { "epoch": 1.509102577093255, "grad_norm": 0.3117835223674774, "learning_rate": 1.7256254535677674e-06, "loss": 0.0059, "step": 89360 }, { "epoch": 1.5092714560745768, "grad_norm": 0.2570177912712097, "learning_rate": 1.7245118280644235e-06, "loss": 0.007, "step": 89370 }, { "epoch": 1.509440335055899, "grad_norm": 0.2723340094089508, "learning_rate": 1.7233984871274645e-06, "loss": 0.0075, "step": 89380 }, { "epoch": 1.509609214037221, "grad_norm": 0.6705772280693054, "learning_rate": 1.7222854308536151e-06, "loss": 0.0057, "step": 89390 }, { "epoch": 1.509778093018543, "grad_norm": 0.13466477394104004, "learning_rate": 1.7211726593395767e-06, "loss": 0.0056, "step": 89400 }, { "epoch": 1.5099469719998648, "grad_norm": 0.40338432788848877, "learning_rate": 1.7200601726820248e-06, "loss": 0.0061, "step": 89410 }, { "epoch": 1.5101158509811867, "grad_norm": 0.11012104898691177, "learning_rate": 1.7189479709776064e-06, "loss": 0.0048, "step": 89420 }, { "epoch": 1.5102847299625088, "grad_norm": 0.3115156292915344, "learning_rate": 1.7178360543229483e-06, "loss": 0.0059, "step": 89430 }, { "epoch": 1.510453608943831, "grad_norm": 0.19092996418476105, "learning_rate": 1.7167244228146512e-06, "loss": 0.0083, "step": 89440 }, { "epoch": 1.5106224879251529, "grad_norm": 0.15435543656349182, "learning_rate": 1.7156130765492913e-06, "loss": 0.0096, "step": 89450 }, { "epoch": 1.5107913669064748, "grad_norm": 0.09230855852365494, "learning_rate": 1.7145020156234189e-06, "loss": 0.006, "step": 89460 }, { "epoch": 1.5109602458877966, "grad_norm": 0.18787212669849396, "learning_rate": 1.7133912401335605e-06, "loss": 0.0057, "step": 89470 }, { "epoch": 1.5111291248691188, "grad_norm": 0.05841245502233505, "learning_rate": 1.7122807501762184e-06, "loss": 0.0041, "step": 89480 }, { "epoch": 1.5112980038504409, "grad_norm": 0.2951892614364624, "learning_rate": 1.71117054584787e-06, "loss": 0.0081, "step": 89490 }, { "epoch": 1.5114668828317628, "grad_norm": 0.3894273638725281, "learning_rate": 1.7100606272449638e-06, "loss": 0.0085, "step": 89500 }, { "epoch": 1.5116357618130847, "grad_norm": 0.26780787110328674, "learning_rate": 1.7089509944639292e-06, "loss": 0.0045, "step": 89510 }, { "epoch": 1.5118046407944066, "grad_norm": 0.14051537215709686, "learning_rate": 1.7078416476011678e-06, "loss": 0.0045, "step": 89520 }, { "epoch": 1.5119735197757287, "grad_norm": 0.29301217198371887, "learning_rate": 1.7067325867530582e-06, "loss": 0.0079, "step": 89530 }, { "epoch": 1.5121423987570508, "grad_norm": 0.2846381962299347, "learning_rate": 1.7056238120159507e-06, "loss": 0.0092, "step": 89540 }, { "epoch": 1.5123112777383727, "grad_norm": 0.9999265670776367, "learning_rate": 1.7045153234861733e-06, "loss": 0.0055, "step": 89550 }, { "epoch": 1.5124801567196946, "grad_norm": 0.26221510767936707, "learning_rate": 1.70340712126003e-06, "loss": 0.0061, "step": 89560 }, { "epoch": 1.5126490357010165, "grad_norm": 0.08048330247402191, "learning_rate": 1.7022992054337977e-06, "loss": 0.0044, "step": 89570 }, { "epoch": 1.5128179146823386, "grad_norm": 0.24217528104782104, "learning_rate": 1.70119157610373e-06, "loss": 0.0054, "step": 89580 }, { "epoch": 1.5129867936636607, "grad_norm": 0.33677414059638977, "learning_rate": 1.700084233366055e-06, "loss": 0.0077, "step": 89590 }, { "epoch": 1.5131556726449826, "grad_norm": 0.3278217315673828, "learning_rate": 1.6989771773169755e-06, "loss": 0.0067, "step": 89600 }, { "epoch": 1.5133245516263045, "grad_norm": 0.26009660959243774, "learning_rate": 1.697870408052672e-06, "loss": 0.01, "step": 89610 }, { "epoch": 1.5134934306076264, "grad_norm": 0.03133450075984001, "learning_rate": 1.6967639256692942e-06, "loss": 0.0076, "step": 89620 }, { "epoch": 1.5136623095889485, "grad_norm": 0.0995061993598938, "learning_rate": 1.6956577302629723e-06, "loss": 0.0055, "step": 89630 }, { "epoch": 1.5138311885702707, "grad_norm": 0.4886474013328552, "learning_rate": 1.6945518219298102e-06, "loss": 0.0091, "step": 89640 }, { "epoch": 1.5140000675515926, "grad_norm": 0.3646506369113922, "learning_rate": 1.693446200765888e-06, "loss": 0.0086, "step": 89650 }, { "epoch": 1.5141689465329145, "grad_norm": 0.37797942757606506, "learning_rate": 1.6923408668672553e-06, "loss": 0.0083, "step": 89660 }, { "epoch": 1.5143378255142363, "grad_norm": 0.23662203550338745, "learning_rate": 1.6912358203299434e-06, "loss": 0.005, "step": 89670 }, { "epoch": 1.5145067044955585, "grad_norm": 0.1467934250831604, "learning_rate": 1.690131061249956e-06, "loss": 0.0122, "step": 89680 }, { "epoch": 1.5146755834768806, "grad_norm": 0.11784204095602036, "learning_rate": 1.689026589723271e-06, "loss": 0.0062, "step": 89690 }, { "epoch": 1.5148444624582025, "grad_norm": 0.1170232966542244, "learning_rate": 1.6879224058458431e-06, "loss": 0.0045, "step": 89700 }, { "epoch": 1.5150133414395244, "grad_norm": 0.21961107850074768, "learning_rate": 1.686818509713601e-06, "loss": 0.0073, "step": 89710 }, { "epoch": 1.5151822204208463, "grad_norm": 0.1532040238380432, "learning_rate": 1.6857149014224477e-06, "loss": 0.0052, "step": 89720 }, { "epoch": 1.5153510994021684, "grad_norm": 0.17300133407115936, "learning_rate": 1.684611581068263e-06, "loss": 0.0092, "step": 89730 }, { "epoch": 1.5155199783834905, "grad_norm": 0.131069153547287, "learning_rate": 1.6835085487469015e-06, "loss": 0.0072, "step": 89740 }, { "epoch": 1.5156888573648124, "grad_norm": 0.16624464094638824, "learning_rate": 1.6824058045541892e-06, "loss": 0.005, "step": 89750 }, { "epoch": 1.5158577363461343, "grad_norm": 0.20212538540363312, "learning_rate": 1.681303348585931e-06, "loss": 0.0071, "step": 89760 }, { "epoch": 1.5160266153274562, "grad_norm": 0.2852572500705719, "learning_rate": 1.6802011809379059e-06, "loss": 0.0104, "step": 89770 }, { "epoch": 1.5161954943087783, "grad_norm": 0.9248525500297546, "learning_rate": 1.6790993017058692e-06, "loss": 0.0092, "step": 89780 }, { "epoch": 1.5163643732901004, "grad_norm": 0.25801223516464233, "learning_rate": 1.6779977109855461e-06, "loss": 0.0092, "step": 89790 }, { "epoch": 1.5165332522714223, "grad_norm": 0.6052269339561462, "learning_rate": 1.676896408872642e-06, "loss": 0.0075, "step": 89800 }, { "epoch": 1.5167021312527442, "grad_norm": 0.17547070980072021, "learning_rate": 1.6757953954628347e-06, "loss": 0.0086, "step": 89810 }, { "epoch": 1.5168710102340661, "grad_norm": 0.15936514735221863, "learning_rate": 1.6746946708517776e-06, "loss": 0.0049, "step": 89820 }, { "epoch": 1.5170398892153882, "grad_norm": 0.07928994297981262, "learning_rate": 1.6735942351350997e-06, "loss": 0.0054, "step": 89830 }, { "epoch": 1.5172087681967104, "grad_norm": 0.13298840820789337, "learning_rate": 1.6724940884084035e-06, "loss": 0.0095, "step": 89840 }, { "epoch": 1.5173776471780323, "grad_norm": 0.451370507478714, "learning_rate": 1.6713942307672677e-06, "loss": 0.0079, "step": 89850 }, { "epoch": 1.5175465261593541, "grad_norm": 0.19861403107643127, "learning_rate": 1.6702946623072458e-06, "loss": 0.0052, "step": 89860 }, { "epoch": 1.517715405140676, "grad_norm": 0.2867325246334076, "learning_rate": 1.6691953831238632e-06, "loss": 0.0075, "step": 89870 }, { "epoch": 1.5178842841219982, "grad_norm": 0.049924515187740326, "learning_rate": 1.6680963933126243e-06, "loss": 0.0076, "step": 89880 }, { "epoch": 1.5180531631033203, "grad_norm": 0.281514972448349, "learning_rate": 1.6669976929690057e-06, "loss": 0.0076, "step": 89890 }, { "epoch": 1.5182220420846422, "grad_norm": 0.07135220617055893, "learning_rate": 1.6658992821884629e-06, "loss": 0.0055, "step": 89900 }, { "epoch": 1.518390921065964, "grad_norm": 0.31084415316581726, "learning_rate": 1.6648011610664184e-06, "loss": 0.0098, "step": 89910 }, { "epoch": 1.518559800047286, "grad_norm": 0.4038971960544586, "learning_rate": 1.6637033296982768e-06, "loss": 0.0056, "step": 89920 }, { "epoch": 1.518728679028608, "grad_norm": 0.15398003160953522, "learning_rate": 1.6626057881794145e-06, "loss": 0.0061, "step": 89930 }, { "epoch": 1.5188975580099302, "grad_norm": 0.15996411442756653, "learning_rate": 1.6615085366051836e-06, "loss": 0.0078, "step": 89940 }, { "epoch": 1.519066436991252, "grad_norm": 0.15016373991966248, "learning_rate": 1.6604115750709104e-06, "loss": 0.0086, "step": 89950 }, { "epoch": 1.519235315972574, "grad_norm": 0.18953894078731537, "learning_rate": 1.6593149036718965e-06, "loss": 0.0066, "step": 89960 }, { "epoch": 1.519404194953896, "grad_norm": 0.12717771530151367, "learning_rate": 1.658218522503418e-06, "loss": 0.0068, "step": 89970 }, { "epoch": 1.519573073935218, "grad_norm": 0.2963031530380249, "learning_rate": 1.6571224316607271e-06, "loss": 0.0129, "step": 89980 }, { "epoch": 1.5197419529165401, "grad_norm": 0.2067631036043167, "learning_rate": 1.656026631239046e-06, "loss": 0.0075, "step": 89990 }, { "epoch": 1.519910831897862, "grad_norm": 0.48922809958457947, "learning_rate": 1.6549311213335778e-06, "loss": 0.0096, "step": 90000 }, { "epoch": 1.520079710879184, "grad_norm": 0.08950477093458176, "learning_rate": 1.6538359020394967e-06, "loss": 0.0046, "step": 90010 }, { "epoch": 1.5202485898605058, "grad_norm": 0.11313431710004807, "learning_rate": 1.6527409734519551e-06, "loss": 0.0067, "step": 90020 }, { "epoch": 1.520417468841828, "grad_norm": 0.24629570543766022, "learning_rate": 1.651646335666074e-06, "loss": 0.0081, "step": 90030 }, { "epoch": 1.52058634782315, "grad_norm": 0.15760380029678345, "learning_rate": 1.650551988776954e-06, "loss": 0.0088, "step": 90040 }, { "epoch": 1.520755226804472, "grad_norm": 0.46779677271842957, "learning_rate": 1.64945793287967e-06, "loss": 0.0059, "step": 90050 }, { "epoch": 1.5209241057857938, "grad_norm": 0.1851746290922165, "learning_rate": 1.6483641680692708e-06, "loss": 0.0055, "step": 90060 }, { "epoch": 1.5210929847671157, "grad_norm": 0.3050585091114044, "learning_rate": 1.6472706944407802e-06, "loss": 0.0085, "step": 90070 }, { "epoch": 1.5212618637484379, "grad_norm": 0.07871969789266586, "learning_rate": 1.646177512089196e-06, "loss": 0.0077, "step": 90080 }, { "epoch": 1.52143074272976, "grad_norm": 0.3383280634880066, "learning_rate": 1.6450846211094912e-06, "loss": 0.0094, "step": 90090 }, { "epoch": 1.5215996217110819, "grad_norm": 0.2634241282939911, "learning_rate": 1.643992021596616e-06, "loss": 0.0065, "step": 90100 }, { "epoch": 1.5217685006924038, "grad_norm": 0.16920092701911926, "learning_rate": 1.6428997136454883e-06, "loss": 0.0051, "step": 90110 }, { "epoch": 1.5219373796737257, "grad_norm": 0.16470706462860107, "learning_rate": 1.6418076973510072e-06, "loss": 0.0088, "step": 90120 }, { "epoch": 1.5221062586550478, "grad_norm": 0.12380111217498779, "learning_rate": 1.6407159728080452e-06, "loss": 0.0054, "step": 90130 }, { "epoch": 1.52227513763637, "grad_norm": 0.11784376204013824, "learning_rate": 1.6396245401114475e-06, "loss": 0.0044, "step": 90140 }, { "epoch": 1.5224440166176918, "grad_norm": 0.15939418971538544, "learning_rate": 1.6385333993560372e-06, "loss": 0.0078, "step": 90150 }, { "epoch": 1.5226128955990137, "grad_norm": 0.12441401183605194, "learning_rate": 1.6374425506366064e-06, "loss": 0.009, "step": 90160 }, { "epoch": 1.5227817745803356, "grad_norm": 0.16341820359230042, "learning_rate": 1.6363519940479277e-06, "loss": 0.0044, "step": 90170 }, { "epoch": 1.5229506535616577, "grad_norm": 0.19458167254924774, "learning_rate": 1.6352617296847457e-06, "loss": 0.0056, "step": 90180 }, { "epoch": 1.5231195325429798, "grad_norm": 0.6504482626914978, "learning_rate": 1.6341717576417792e-06, "loss": 0.0062, "step": 90190 }, { "epoch": 1.5232884115243017, "grad_norm": 0.44069337844848633, "learning_rate": 1.6330820780137235e-06, "loss": 0.0099, "step": 90200 }, { "epoch": 1.5234572905056236, "grad_norm": 0.3977881968021393, "learning_rate": 1.631992690895246e-06, "loss": 0.0107, "step": 90210 }, { "epoch": 1.5236261694869455, "grad_norm": 0.4458715319633484, "learning_rate": 1.630903596380991e-06, "loss": 0.0071, "step": 90220 }, { "epoch": 1.5237950484682676, "grad_norm": 0.2337072342634201, "learning_rate": 1.6298147945655778e-06, "loss": 0.008, "step": 90230 }, { "epoch": 1.5239639274495898, "grad_norm": 0.3319973647594452, "learning_rate": 1.6287262855435954e-06, "loss": 0.0061, "step": 90240 }, { "epoch": 1.5241328064309116, "grad_norm": 0.18661418557167053, "learning_rate": 1.6276380694096122e-06, "loss": 0.0053, "step": 90250 }, { "epoch": 1.5243016854122335, "grad_norm": 0.14476324617862701, "learning_rate": 1.6265501462581702e-06, "loss": 0.0055, "step": 90260 }, { "epoch": 1.5244705643935554, "grad_norm": 0.1369650959968567, "learning_rate": 1.6254625161837862e-06, "loss": 0.0078, "step": 90270 }, { "epoch": 1.5246394433748776, "grad_norm": 0.4307112395763397, "learning_rate": 1.6243751792809493e-06, "loss": 0.0044, "step": 90280 }, { "epoch": 1.5248083223561997, "grad_norm": 0.14694111049175262, "learning_rate": 1.6232881356441243e-06, "loss": 0.0065, "step": 90290 }, { "epoch": 1.5249772013375216, "grad_norm": 0.1783107966184616, "learning_rate": 1.6222013853677526e-06, "loss": 0.005, "step": 90300 }, { "epoch": 1.5251460803188435, "grad_norm": 0.23105044662952423, "learning_rate": 1.6211149285462475e-06, "loss": 0.0118, "step": 90310 }, { "epoch": 1.5253149593001654, "grad_norm": 0.33945146203041077, "learning_rate": 1.6200287652739976e-06, "loss": 0.0063, "step": 90320 }, { "epoch": 1.5254838382814875, "grad_norm": 0.33325687050819397, "learning_rate": 1.6189428956453668e-06, "loss": 0.0048, "step": 90330 }, { "epoch": 1.5256527172628096, "grad_norm": 0.15964867174625397, "learning_rate": 1.617857319754692e-06, "loss": 0.0067, "step": 90340 }, { "epoch": 1.5258215962441315, "grad_norm": 0.18893928825855255, "learning_rate": 1.6167720376962876e-06, "loss": 0.006, "step": 90350 }, { "epoch": 1.5259904752254534, "grad_norm": 0.3119153380393982, "learning_rate": 1.6156870495644367e-06, "loss": 0.0096, "step": 90360 }, { "epoch": 1.5261593542067753, "grad_norm": 0.19826938211917877, "learning_rate": 1.6146023554534018e-06, "loss": 0.0078, "step": 90370 }, { "epoch": 1.5263282331880974, "grad_norm": 0.2855994701385498, "learning_rate": 1.6135179554574193e-06, "loss": 0.0078, "step": 90380 }, { "epoch": 1.5264971121694195, "grad_norm": 0.10663870722055435, "learning_rate": 1.6124338496706998e-06, "loss": 0.0083, "step": 90390 }, { "epoch": 1.5266659911507414, "grad_norm": 0.41429972648620605, "learning_rate": 1.6113500381874253e-06, "loss": 0.0097, "step": 90400 }, { "epoch": 1.5268348701320633, "grad_norm": 0.31082531809806824, "learning_rate": 1.610266521101756e-06, "loss": 0.0075, "step": 90410 }, { "epoch": 1.5270037491133852, "grad_norm": 0.1656813770532608, "learning_rate": 1.6091832985078237e-06, "loss": 0.0051, "step": 90420 }, { "epoch": 1.5271726280947073, "grad_norm": 0.13209845125675201, "learning_rate": 1.6081003704997405e-06, "loss": 0.0092, "step": 90430 }, { "epoch": 1.5273415070760294, "grad_norm": 0.2801678776741028, "learning_rate": 1.6070177371715844e-06, "loss": 0.0059, "step": 90440 }, { "epoch": 1.5275103860573513, "grad_norm": 0.16393934190273285, "learning_rate": 1.6059353986174131e-06, "loss": 0.0064, "step": 90450 }, { "epoch": 1.5276792650386732, "grad_norm": 0.2700050175189972, "learning_rate": 1.6048533549312577e-06, "loss": 0.0083, "step": 90460 }, { "epoch": 1.5278481440199951, "grad_norm": 0.36010298132896423, "learning_rate": 1.603771606207125e-06, "loss": 0.0068, "step": 90470 }, { "epoch": 1.5280170230013173, "grad_norm": 0.12933507561683655, "learning_rate": 1.6026901525389916e-06, "loss": 0.0045, "step": 90480 }, { "epoch": 1.5281859019826394, "grad_norm": 0.2734428346157074, "learning_rate": 1.601608994020813e-06, "loss": 0.0049, "step": 90490 }, { "epoch": 1.5283547809639613, "grad_norm": 0.31873011589050293, "learning_rate": 1.6005281307465176e-06, "loss": 0.0084, "step": 90500 }, { "epoch": 1.5285236599452832, "grad_norm": 0.3061809241771698, "learning_rate": 1.5994475628100098e-06, "loss": 0.0055, "step": 90510 }, { "epoch": 1.528692538926605, "grad_norm": 0.17223259806632996, "learning_rate": 1.598367290305164e-06, "loss": 0.0091, "step": 90520 }, { "epoch": 1.5288614179079272, "grad_norm": 0.2489268034696579, "learning_rate": 1.5972873133258322e-06, "loss": 0.0081, "step": 90530 }, { "epoch": 1.5290302968892493, "grad_norm": 0.28788766264915466, "learning_rate": 1.5962076319658392e-06, "loss": 0.0058, "step": 90540 }, { "epoch": 1.5291991758705712, "grad_norm": 0.24561253190040588, "learning_rate": 1.5951282463189898e-06, "loss": 0.009, "step": 90550 }, { "epoch": 1.529368054851893, "grad_norm": 0.36693045496940613, "learning_rate": 1.5940491564790538e-06, "loss": 0.0051, "step": 90560 }, { "epoch": 1.529536933833215, "grad_norm": 0.3080708682537079, "learning_rate": 1.5929703625397807e-06, "loss": 0.0053, "step": 90570 }, { "epoch": 1.529705812814537, "grad_norm": 0.21099726855754852, "learning_rate": 1.5918918645948944e-06, "loss": 0.0091, "step": 90580 }, { "epoch": 1.5298746917958592, "grad_norm": 0.26734107732772827, "learning_rate": 1.5908136627380915e-06, "loss": 0.0056, "step": 90590 }, { "epoch": 1.5300435707771811, "grad_norm": 0.10035435110330582, "learning_rate": 1.5897357570630455e-06, "loss": 0.0042, "step": 90600 }, { "epoch": 1.530212449758503, "grad_norm": 0.30933836102485657, "learning_rate": 1.5886581476633994e-06, "loss": 0.0065, "step": 90610 }, { "epoch": 1.530381328739825, "grad_norm": 0.2711483836174011, "learning_rate": 1.5875808346327736e-06, "loss": 0.0078, "step": 90620 }, { "epoch": 1.530550207721147, "grad_norm": 0.16499248147010803, "learning_rate": 1.586503818064764e-06, "loss": 0.0044, "step": 90630 }, { "epoch": 1.5307190867024691, "grad_norm": 0.41695085167884827, "learning_rate": 1.585427098052939e-06, "loss": 0.0069, "step": 90640 }, { "epoch": 1.530887965683791, "grad_norm": 0.28281790018081665, "learning_rate": 1.5843506746908382e-06, "loss": 0.0113, "step": 90650 }, { "epoch": 1.531056844665113, "grad_norm": 0.19170309603214264, "learning_rate": 1.5832745480719825e-06, "loss": 0.0072, "step": 90660 }, { "epoch": 1.5312257236464348, "grad_norm": 0.09412922710180283, "learning_rate": 1.5821987182898617e-06, "loss": 0.007, "step": 90670 }, { "epoch": 1.531394602627757, "grad_norm": 0.23261050879955292, "learning_rate": 1.5811231854379433e-06, "loss": 0.007, "step": 90680 }, { "epoch": 1.531563481609079, "grad_norm": 0.513963520526886, "learning_rate": 1.5800479496096633e-06, "loss": 0.0096, "step": 90690 }, { "epoch": 1.531732360590401, "grad_norm": 0.16076813638210297, "learning_rate": 1.5789730108984374e-06, "loss": 0.0112, "step": 90700 }, { "epoch": 1.5319012395717229, "grad_norm": 0.14245793223381042, "learning_rate": 1.577898369397653e-06, "loss": 0.0061, "step": 90710 }, { "epoch": 1.5320701185530448, "grad_norm": 0.2980967164039612, "learning_rate": 1.576824025200675e-06, "loss": 0.0057, "step": 90720 }, { "epoch": 1.5322389975343669, "grad_norm": 0.3060637414455414, "learning_rate": 1.5757499784008357e-06, "loss": 0.0092, "step": 90730 }, { "epoch": 1.532407876515689, "grad_norm": 0.6583830714225769, "learning_rate": 1.574676229091448e-06, "loss": 0.0104, "step": 90740 }, { "epoch": 1.532576755497011, "grad_norm": 0.1727248728275299, "learning_rate": 1.573602777365796e-06, "loss": 0.0084, "step": 90750 }, { "epoch": 1.5327456344783328, "grad_norm": 0.20679517090320587, "learning_rate": 1.57252962331714e-06, "loss": 0.0041, "step": 90760 }, { "epoch": 1.5329145134596547, "grad_norm": 0.3122953772544861, "learning_rate": 1.5714567670387087e-06, "loss": 0.0044, "step": 90770 }, { "epoch": 1.5330833924409768, "grad_norm": 0.2475225031375885, "learning_rate": 1.5703842086237137e-06, "loss": 0.0057, "step": 90780 }, { "epoch": 1.533252271422299, "grad_norm": 0.2638086676597595, "learning_rate": 1.5693119481653352e-06, "loss": 0.0054, "step": 90790 }, { "epoch": 1.5334211504036208, "grad_norm": 0.3574577271938324, "learning_rate": 1.5682399857567292e-06, "loss": 0.007, "step": 90800 }, { "epoch": 1.5335900293849427, "grad_norm": 0.25921016931533813, "learning_rate": 1.5671683214910233e-06, "loss": 0.0068, "step": 90810 }, { "epoch": 1.5337589083662646, "grad_norm": 0.37586459517478943, "learning_rate": 1.5660969554613215e-06, "loss": 0.0097, "step": 90820 }, { "epoch": 1.5339277873475867, "grad_norm": 0.3818265199661255, "learning_rate": 1.5650258877607021e-06, "loss": 0.0057, "step": 90830 }, { "epoch": 1.5340966663289088, "grad_norm": 0.2923118472099304, "learning_rate": 1.5639551184822178e-06, "loss": 0.0097, "step": 90840 }, { "epoch": 1.5342655453102307, "grad_norm": 0.2607077956199646, "learning_rate": 1.562884647718892e-06, "loss": 0.0064, "step": 90850 }, { "epoch": 1.5344344242915526, "grad_norm": 0.2406982183456421, "learning_rate": 1.5618144755637255e-06, "loss": 0.006, "step": 90860 }, { "epoch": 1.5346033032728745, "grad_norm": 0.29846322536468506, "learning_rate": 1.5607446021096928e-06, "loss": 0.0098, "step": 90870 }, { "epoch": 1.5347721822541966, "grad_norm": 0.2676604986190796, "learning_rate": 1.559675027449743e-06, "loss": 0.0065, "step": 90880 }, { "epoch": 1.5349410612355188, "grad_norm": 0.39481931924819946, "learning_rate": 1.5586057516767933e-06, "loss": 0.006, "step": 90890 }, { "epoch": 1.5351099402168407, "grad_norm": 0.33554792404174805, "learning_rate": 1.557536774883745e-06, "loss": 0.0079, "step": 90900 }, { "epoch": 1.5352788191981626, "grad_norm": 0.4048278331756592, "learning_rate": 1.5564680971634666e-06, "loss": 0.0071, "step": 90910 }, { "epoch": 1.5354476981794845, "grad_norm": 0.219808891415596, "learning_rate": 1.5553997186088032e-06, "loss": 0.007, "step": 90920 }, { "epoch": 1.5356165771608066, "grad_norm": 0.29835593700408936, "learning_rate": 1.5543316393125702e-06, "loss": 0.006, "step": 90930 }, { "epoch": 1.5357854561421287, "grad_norm": 0.3231121003627777, "learning_rate": 1.5532638593675614e-06, "loss": 0.0052, "step": 90940 }, { "epoch": 1.5359543351234506, "grad_norm": 0.14814047515392303, "learning_rate": 1.5521963788665428e-06, "loss": 0.008, "step": 90950 }, { "epoch": 1.5361232141047725, "grad_norm": 0.23711735010147095, "learning_rate": 1.5511291979022559e-06, "loss": 0.0049, "step": 90960 }, { "epoch": 1.5362920930860944, "grad_norm": 0.17196540534496307, "learning_rate": 1.5500623165674112e-06, "loss": 0.0074, "step": 90970 }, { "epoch": 1.5364609720674165, "grad_norm": 0.3295391798019409, "learning_rate": 1.5489957349546992e-06, "loss": 0.0061, "step": 90980 }, { "epoch": 1.5366298510487386, "grad_norm": 0.22794316709041595, "learning_rate": 1.5479294531567812e-06, "loss": 0.0082, "step": 90990 }, { "epoch": 1.5367987300300605, "grad_norm": 0.08771447837352753, "learning_rate": 1.546863471266294e-06, "loss": 0.0083, "step": 91000 }, { "epoch": 1.5369676090113824, "grad_norm": 0.15633563697338104, "learning_rate": 1.5457977893758463e-06, "loss": 0.0063, "step": 91010 }, { "epoch": 1.5371364879927043, "grad_norm": 0.2735581398010254, "learning_rate": 1.5447324075780228e-06, "loss": 0.0077, "step": 91020 }, { "epoch": 1.5373053669740264, "grad_norm": 0.12078716605901718, "learning_rate": 1.5436673259653811e-06, "loss": 0.0085, "step": 91030 }, { "epoch": 1.5374742459553485, "grad_norm": 0.3235858380794525, "learning_rate": 1.5426025446304532e-06, "loss": 0.0133, "step": 91040 }, { "epoch": 1.5376431249366704, "grad_norm": 0.1786438226699829, "learning_rate": 1.5415380636657451e-06, "loss": 0.0093, "step": 91050 }, { "epoch": 1.5378120039179923, "grad_norm": 0.16401609778404236, "learning_rate": 1.5404738831637346e-06, "loss": 0.0047, "step": 91060 }, { "epoch": 1.5379808828993142, "grad_norm": 0.2261107861995697, "learning_rate": 1.5394100032168758e-06, "loss": 0.0069, "step": 91070 }, { "epoch": 1.5381497618806363, "grad_norm": 0.34044039249420166, "learning_rate": 1.538346423917596e-06, "loss": 0.0083, "step": 91080 }, { "epoch": 1.5383186408619585, "grad_norm": 0.2949581742286682, "learning_rate": 1.5372831453582982e-06, "loss": 0.0074, "step": 91090 }, { "epoch": 1.5384875198432804, "grad_norm": 0.25315865874290466, "learning_rate": 1.5362201676313542e-06, "loss": 0.009, "step": 91100 }, { "epoch": 1.5386563988246023, "grad_norm": 0.13954895734786987, "learning_rate": 1.5351574908291145e-06, "loss": 0.0095, "step": 91110 }, { "epoch": 1.5388252778059242, "grad_norm": 0.20973271131515503, "learning_rate": 1.5340951150439015e-06, "loss": 0.0074, "step": 91120 }, { "epoch": 1.5389941567872463, "grad_norm": 0.521665096282959, "learning_rate": 1.5330330403680127e-06, "loss": 0.0067, "step": 91130 }, { "epoch": 1.5391630357685684, "grad_norm": 0.360476016998291, "learning_rate": 1.5319712668937177e-06, "loss": 0.0075, "step": 91140 }, { "epoch": 1.5393319147498903, "grad_norm": 0.39501914381980896, "learning_rate": 1.5309097947132612e-06, "loss": 0.0091, "step": 91150 }, { "epoch": 1.5395007937312122, "grad_norm": 0.3146434426307678, "learning_rate": 1.529848623918861e-06, "loss": 0.0065, "step": 91160 }, { "epoch": 1.539669672712534, "grad_norm": 0.2837121784687042, "learning_rate": 1.5287877546027109e-06, "loss": 0.0088, "step": 91170 }, { "epoch": 1.5398385516938562, "grad_norm": 0.1510642021894455, "learning_rate": 1.5277271868569732e-06, "loss": 0.0063, "step": 91180 }, { "epoch": 1.5400074306751783, "grad_norm": 0.1694706380367279, "learning_rate": 1.5266669207737894e-06, "loss": 0.0061, "step": 91190 }, { "epoch": 1.5401763096565002, "grad_norm": 0.1554570198059082, "learning_rate": 1.5256069564452724e-06, "loss": 0.0055, "step": 91200 }, { "epoch": 1.540345188637822, "grad_norm": 0.34243419766426086, "learning_rate": 1.5245472939635109e-06, "loss": 0.0074, "step": 91210 }, { "epoch": 1.540514067619144, "grad_norm": 0.22962433099746704, "learning_rate": 1.5234879334205627e-06, "loss": 0.0098, "step": 91220 }, { "epoch": 1.5406829466004661, "grad_norm": 0.2484315186738968, "learning_rate": 1.5224288749084642e-06, "loss": 0.0149, "step": 91230 }, { "epoch": 1.5408518255817882, "grad_norm": 0.10039517283439636, "learning_rate": 1.5213701185192238e-06, "loss": 0.0065, "step": 91240 }, { "epoch": 1.5410207045631101, "grad_norm": 0.18806777894496918, "learning_rate": 1.5203116643448236e-06, "loss": 0.0084, "step": 91250 }, { "epoch": 1.541189583544432, "grad_norm": 0.25889459252357483, "learning_rate": 1.5192535124772196e-06, "loss": 0.0072, "step": 91260 }, { "epoch": 1.541358462525754, "grad_norm": 0.2648742198944092, "learning_rate": 1.5181956630083405e-06, "loss": 0.0049, "step": 91270 }, { "epoch": 1.541527341507076, "grad_norm": 0.20867595076560974, "learning_rate": 1.5171381160300913e-06, "loss": 0.0086, "step": 91280 }, { "epoch": 1.5416962204883982, "grad_norm": 0.2689388692378998, "learning_rate": 1.5160808716343495e-06, "loss": 0.0068, "step": 91290 }, { "epoch": 1.54186509946972, "grad_norm": 0.08583426475524902, "learning_rate": 1.5150239299129632e-06, "loss": 0.0068, "step": 91300 }, { "epoch": 1.542033978451042, "grad_norm": 0.18540742993354797, "learning_rate": 1.5139672909577585e-06, "loss": 0.0078, "step": 91310 }, { "epoch": 1.5422028574323638, "grad_norm": 0.24574023485183716, "learning_rate": 1.512910954860533e-06, "loss": 0.0052, "step": 91320 }, { "epoch": 1.542371736413686, "grad_norm": 0.4511604607105255, "learning_rate": 1.5118549217130613e-06, "loss": 0.0063, "step": 91330 }, { "epoch": 1.542540615395008, "grad_norm": 0.4019410014152527, "learning_rate": 1.5107991916070847e-06, "loss": 0.0076, "step": 91340 }, { "epoch": 1.54270949437633, "grad_norm": 0.19157733023166656, "learning_rate": 1.5097437646343244e-06, "loss": 0.0067, "step": 91350 }, { "epoch": 1.5428783733576519, "grad_norm": 0.18283168971538544, "learning_rate": 1.5086886408864737e-06, "loss": 0.0036, "step": 91360 }, { "epoch": 1.5430472523389738, "grad_norm": 0.20495349168777466, "learning_rate": 1.5076338204551987e-06, "loss": 0.0025, "step": 91370 }, { "epoch": 1.543216131320296, "grad_norm": 0.26955246925354004, "learning_rate": 1.5065793034321396e-06, "loss": 0.0081, "step": 91380 }, { "epoch": 1.543385010301618, "grad_norm": 0.4024305045604706, "learning_rate": 1.5055250899089107e-06, "loss": 0.0088, "step": 91390 }, { "epoch": 1.54355388928294, "grad_norm": 0.3338182270526886, "learning_rate": 1.5044711799770994e-06, "loss": 0.0064, "step": 91400 }, { "epoch": 1.5437227682642618, "grad_norm": 0.11984498798847198, "learning_rate": 1.5034175737282681e-06, "loss": 0.0071, "step": 91410 }, { "epoch": 1.5438916472455837, "grad_norm": 0.1342463493347168, "learning_rate": 1.5023642712539482e-06, "loss": 0.0075, "step": 91420 }, { "epoch": 1.5440605262269058, "grad_norm": 0.45444872975349426, "learning_rate": 1.5013112726456502e-06, "loss": 0.0102, "step": 91430 }, { "epoch": 1.544229405208228, "grad_norm": 0.2530445158481598, "learning_rate": 1.5002585779948553e-06, "loss": 0.0087, "step": 91440 }, { "epoch": 1.5443982841895498, "grad_norm": 0.24034713208675385, "learning_rate": 1.4992061873930215e-06, "loss": 0.0058, "step": 91450 }, { "epoch": 1.5445671631708717, "grad_norm": 0.18228483200073242, "learning_rate": 1.4981541009315743e-06, "loss": 0.0084, "step": 91460 }, { "epoch": 1.5447360421521936, "grad_norm": 0.19927701354026794, "learning_rate": 1.4971023187019173e-06, "loss": 0.0046, "step": 91470 }, { "epoch": 1.5449049211335157, "grad_norm": 0.3799806237220764, "learning_rate": 1.4960508407954282e-06, "loss": 0.0044, "step": 91480 }, { "epoch": 1.5450738001148379, "grad_norm": 0.20757903158664703, "learning_rate": 1.4949996673034555e-06, "loss": 0.006, "step": 91490 }, { "epoch": 1.5452426790961598, "grad_norm": 0.2036544382572174, "learning_rate": 1.493948798317324e-06, "loss": 0.005, "step": 91500 }, { "epoch": 1.5454115580774817, "grad_norm": 0.2971948981285095, "learning_rate": 1.4928982339283288e-06, "loss": 0.0086, "step": 91510 }, { "epoch": 1.5455804370588035, "grad_norm": 0.2638988196849823, "learning_rate": 1.491847974227742e-06, "loss": 0.0096, "step": 91520 }, { "epoch": 1.5457493160401257, "grad_norm": 0.41960808634757996, "learning_rate": 1.4907980193068067e-06, "loss": 0.0069, "step": 91530 }, { "epoch": 1.5459181950214478, "grad_norm": 0.30051594972610474, "learning_rate": 1.4897483692567422e-06, "loss": 0.0071, "step": 91540 }, { "epoch": 1.5460870740027697, "grad_norm": 0.3967204689979553, "learning_rate": 1.488699024168736e-06, "loss": 0.0089, "step": 91550 }, { "epoch": 1.5462559529840916, "grad_norm": 0.18513385951519012, "learning_rate": 1.4876499841339549e-06, "loss": 0.0061, "step": 91560 }, { "epoch": 1.5464248319654135, "grad_norm": 0.24113669991493225, "learning_rate": 1.4866012492435367e-06, "loss": 0.0089, "step": 91570 }, { "epoch": 1.5465937109467356, "grad_norm": 0.583152711391449, "learning_rate": 1.4855528195885944e-06, "loss": 0.0085, "step": 91580 }, { "epoch": 1.5467625899280577, "grad_norm": 0.22899197041988373, "learning_rate": 1.4845046952602093e-06, "loss": 0.0066, "step": 91590 }, { "epoch": 1.5469314689093796, "grad_norm": 0.20689257979393005, "learning_rate": 1.4834568763494427e-06, "loss": 0.0074, "step": 91600 }, { "epoch": 1.5471003478907015, "grad_norm": 0.19141149520874023, "learning_rate": 1.4824093629473257e-06, "loss": 0.004, "step": 91610 }, { "epoch": 1.5472692268720234, "grad_norm": 0.04769069701433182, "learning_rate": 1.4813621551448638e-06, "loss": 0.0068, "step": 91620 }, { "epoch": 1.5474381058533455, "grad_norm": 0.21179497241973877, "learning_rate": 1.4803152530330356e-06, "loss": 0.0096, "step": 91630 }, { "epoch": 1.5476069848346676, "grad_norm": 0.153322234749794, "learning_rate": 1.4792686567027936e-06, "loss": 0.0071, "step": 91640 }, { "epoch": 1.5477758638159895, "grad_norm": 0.33166810870170593, "learning_rate": 1.478222366245064e-06, "loss": 0.0063, "step": 91650 }, { "epoch": 1.5479447427973114, "grad_norm": 0.45535168051719666, "learning_rate": 1.4771763817507473e-06, "loss": 0.0049, "step": 91660 }, { "epoch": 1.5481136217786333, "grad_norm": 0.18553678691387177, "learning_rate": 1.476130703310712e-06, "loss": 0.0045, "step": 91670 }, { "epoch": 1.5482825007599554, "grad_norm": 0.2688693404197693, "learning_rate": 1.475085331015807e-06, "loss": 0.0039, "step": 91680 }, { "epoch": 1.5484513797412776, "grad_norm": 0.1898116022348404, "learning_rate": 1.474040264956851e-06, "loss": 0.0089, "step": 91690 }, { "epoch": 1.5486202587225995, "grad_norm": 0.32482391595840454, "learning_rate": 1.472995505224638e-06, "loss": 0.0097, "step": 91700 }, { "epoch": 1.5487891377039213, "grad_norm": 0.17350994050502777, "learning_rate": 1.4719510519099318e-06, "loss": 0.01, "step": 91710 }, { "epoch": 1.5489580166852432, "grad_norm": 0.34825265407562256, "learning_rate": 1.470906905103473e-06, "loss": 0.0045, "step": 91720 }, { "epoch": 1.5491268956665654, "grad_norm": 0.17789161205291748, "learning_rate": 1.4698630648959749e-06, "loss": 0.0044, "step": 91730 }, { "epoch": 1.5492957746478875, "grad_norm": 0.22813421487808228, "learning_rate": 1.4688195313781234e-06, "loss": 0.0102, "step": 91740 }, { "epoch": 1.5494646536292094, "grad_norm": 0.3261614143848419, "learning_rate": 1.4677763046405786e-06, "loss": 0.0085, "step": 91750 }, { "epoch": 1.5496335326105313, "grad_norm": 0.42442429065704346, "learning_rate": 1.4667333847739728e-06, "loss": 0.0062, "step": 91760 }, { "epoch": 1.5498024115918532, "grad_norm": 0.21399733424186707, "learning_rate": 1.4656907718689127e-06, "loss": 0.006, "step": 91770 }, { "epoch": 1.5499712905731753, "grad_norm": 0.2563317120075226, "learning_rate": 1.4646484660159799e-06, "loss": 0.0071, "step": 91780 }, { "epoch": 1.5501401695544974, "grad_norm": 0.16963879764080048, "learning_rate": 1.4636064673057237e-06, "loss": 0.0069, "step": 91790 }, { "epoch": 1.5503090485358193, "grad_norm": 0.32810264825820923, "learning_rate": 1.462564775828672e-06, "loss": 0.0088, "step": 91800 }, { "epoch": 1.5504779275171412, "grad_norm": 0.3207018971443176, "learning_rate": 1.4615233916753247e-06, "loss": 0.0078, "step": 91810 }, { "epoch": 1.550646806498463, "grad_norm": 0.1382068693637848, "learning_rate": 1.4604823149361563e-06, "loss": 0.0078, "step": 91820 }, { "epoch": 1.5508156854797852, "grad_norm": 0.10574039816856384, "learning_rate": 1.4594415457016092e-06, "loss": 0.0084, "step": 91830 }, { "epoch": 1.5509845644611073, "grad_norm": 0.1408025026321411, "learning_rate": 1.4584010840621049e-06, "loss": 0.0066, "step": 91840 }, { "epoch": 1.5511534434424292, "grad_norm": 0.14606548845767975, "learning_rate": 1.4573609301080367e-06, "loss": 0.0093, "step": 91850 }, { "epoch": 1.5513223224237511, "grad_norm": 0.5777989029884338, "learning_rate": 1.4563210839297697e-06, "loss": 0.0119, "step": 91860 }, { "epoch": 1.551491201405073, "grad_norm": 0.23676921427249908, "learning_rate": 1.4552815456176439e-06, "loss": 0.0046, "step": 91870 }, { "epoch": 1.5516600803863951, "grad_norm": 0.1615525186061859, "learning_rate": 1.454242315261971e-06, "loss": 0.0067, "step": 91880 }, { "epoch": 1.5518289593677173, "grad_norm": 0.2186032235622406, "learning_rate": 1.4532033929530376e-06, "loss": 0.0088, "step": 91890 }, { "epoch": 1.5519978383490391, "grad_norm": 0.13456939160823822, "learning_rate": 1.452164778781104e-06, "loss": 0.0048, "step": 91900 }, { "epoch": 1.552166717330361, "grad_norm": 0.22747042775154114, "learning_rate": 1.4511264728363994e-06, "loss": 0.0067, "step": 91910 }, { "epoch": 1.552335596311683, "grad_norm": 0.2547588050365448, "learning_rate": 1.450088475209131e-06, "loss": 0.0076, "step": 91920 }, { "epoch": 1.552504475293005, "grad_norm": 0.20289844274520874, "learning_rate": 1.4490507859894765e-06, "loss": 0.008, "step": 91930 }, { "epoch": 1.5526733542743272, "grad_norm": 0.27847275137901306, "learning_rate": 1.4480134052675893e-06, "loss": 0.0061, "step": 91940 }, { "epoch": 1.552842233255649, "grad_norm": 0.5148569941520691, "learning_rate": 1.4469763331335952e-06, "loss": 0.0081, "step": 91950 }, { "epoch": 1.553011112236971, "grad_norm": 0.27939873933792114, "learning_rate": 1.4459395696775892e-06, "loss": 0.0095, "step": 91960 }, { "epoch": 1.5531799912182929, "grad_norm": 0.21568915247917175, "learning_rate": 1.4449031149896447e-06, "loss": 0.0072, "step": 91970 }, { "epoch": 1.553348870199615, "grad_norm": 0.18930888175964355, "learning_rate": 1.443866969159806e-06, "loss": 0.0076, "step": 91980 }, { "epoch": 1.553517749180937, "grad_norm": 0.4940907657146454, "learning_rate": 1.442831132278092e-06, "loss": 0.0065, "step": 91990 }, { "epoch": 1.553686628162259, "grad_norm": 0.26581141352653503, "learning_rate": 1.4417956044344922e-06, "loss": 0.0091, "step": 92000 }, { "epoch": 1.553855507143581, "grad_norm": 0.31545644998550415, "learning_rate": 1.4407603857189711e-06, "loss": 0.0058, "step": 92010 }, { "epoch": 1.5540243861249028, "grad_norm": 0.33755970001220703, "learning_rate": 1.4397254762214663e-06, "loss": 0.0066, "step": 92020 }, { "epoch": 1.554193265106225, "grad_norm": 0.23415276408195496, "learning_rate": 1.4386908760318902e-06, "loss": 0.0069, "step": 92030 }, { "epoch": 1.554362144087547, "grad_norm": 0.17431148886680603, "learning_rate": 1.4376565852401226e-06, "loss": 0.0088, "step": 92040 }, { "epoch": 1.554531023068869, "grad_norm": 0.32386115193367004, "learning_rate": 1.4366226039360215e-06, "loss": 0.0093, "step": 92050 }, { "epoch": 1.5546999020501908, "grad_norm": 0.23419158160686493, "learning_rate": 1.4355889322094168e-06, "loss": 0.0079, "step": 92060 }, { "epoch": 1.5548687810315127, "grad_norm": 0.4210623800754547, "learning_rate": 1.4345555701501135e-06, "loss": 0.0065, "step": 92070 }, { "epoch": 1.5550376600128348, "grad_norm": 0.34963324666023254, "learning_rate": 1.433522517847884e-06, "loss": 0.0075, "step": 92080 }, { "epoch": 1.555206538994157, "grad_norm": 0.20422935485839844, "learning_rate": 1.4324897753924788e-06, "loss": 0.0087, "step": 92090 }, { "epoch": 1.5553754179754788, "grad_norm": 0.5509907007217407, "learning_rate": 1.43145734287362e-06, "loss": 0.0092, "step": 92100 }, { "epoch": 1.5555442969568007, "grad_norm": 0.13205111026763916, "learning_rate": 1.4304252203810032e-06, "loss": 0.008, "step": 92110 }, { "epoch": 1.5557131759381226, "grad_norm": 0.22556594014167786, "learning_rate": 1.4293934080042964e-06, "loss": 0.0068, "step": 92120 }, { "epoch": 1.5558820549194448, "grad_norm": 0.16418778896331787, "learning_rate": 1.4283619058331411e-06, "loss": 0.0059, "step": 92130 }, { "epoch": 1.5560509339007669, "grad_norm": 0.20101957023143768, "learning_rate": 1.4273307139571519e-06, "loss": 0.0067, "step": 92140 }, { "epoch": 1.5562198128820888, "grad_norm": 0.13270290195941925, "learning_rate": 1.4262998324659166e-06, "loss": 0.0086, "step": 92150 }, { "epoch": 1.5563886918634107, "grad_norm": 0.33432987332344055, "learning_rate": 1.4252692614489939e-06, "loss": 0.0073, "step": 92160 }, { "epoch": 1.5565575708447326, "grad_norm": 0.2739185690879822, "learning_rate": 1.4242390009959183e-06, "loss": 0.005, "step": 92170 }, { "epoch": 1.5567264498260547, "grad_norm": 0.2990514039993286, "learning_rate": 1.4232090511961966e-06, "loss": 0.0075, "step": 92180 }, { "epoch": 1.5568953288073768, "grad_norm": 0.30031436681747437, "learning_rate": 1.422179412139309e-06, "loss": 0.0067, "step": 92190 }, { "epoch": 1.5570642077886987, "grad_norm": 0.5756700038909912, "learning_rate": 1.4211500839147057e-06, "loss": 0.0075, "step": 92200 }, { "epoch": 1.5572330867700206, "grad_norm": 0.1760416477918625, "learning_rate": 1.4201210666118142e-06, "loss": 0.0063, "step": 92210 }, { "epoch": 1.5574019657513425, "grad_norm": 0.09368746727705002, "learning_rate": 1.4190923603200318e-06, "loss": 0.0117, "step": 92220 }, { "epoch": 1.5575708447326646, "grad_norm": 0.13201257586479187, "learning_rate": 1.4180639651287309e-06, "loss": 0.0099, "step": 92230 }, { "epoch": 1.5577397237139867, "grad_norm": 0.21359746158123016, "learning_rate": 1.417035881127255e-06, "loss": 0.0056, "step": 92240 }, { "epoch": 1.5579086026953086, "grad_norm": 0.36918017268180847, "learning_rate": 1.4160081084049227e-06, "loss": 0.0054, "step": 92250 }, { "epoch": 1.5580774816766305, "grad_norm": 0.15260423719882965, "learning_rate": 1.4149806470510236e-06, "loss": 0.0058, "step": 92260 }, { "epoch": 1.5582463606579524, "grad_norm": 0.14852456748485565, "learning_rate": 1.413953497154823e-06, "loss": 0.0056, "step": 92270 }, { "epoch": 1.5584152396392745, "grad_norm": 0.26261037588119507, "learning_rate": 1.4129266588055535e-06, "loss": 0.0064, "step": 92280 }, { "epoch": 1.5585841186205964, "grad_norm": 0.18466758728027344, "learning_rate": 1.411900132092427e-06, "loss": 0.0073, "step": 92290 }, { "epoch": 1.5587529976019185, "grad_norm": 0.24164973199367523, "learning_rate": 1.4108739171046237e-06, "loss": 0.0088, "step": 92300 }, { "epoch": 1.5589218765832404, "grad_norm": 0.2998329997062683, "learning_rate": 1.409848013931302e-06, "loss": 0.0072, "step": 92310 }, { "epoch": 1.5590907555645623, "grad_norm": 0.19436417520046234, "learning_rate": 1.408822422661586e-06, "loss": 0.0065, "step": 92320 }, { "epoch": 1.5592596345458845, "grad_norm": 0.30765479803085327, "learning_rate": 1.4077971433845784e-06, "loss": 0.0081, "step": 92330 }, { "epoch": 1.5594285135272064, "grad_norm": 0.23705057799816132, "learning_rate": 1.4067721761893526e-06, "loss": 0.0038, "step": 92340 }, { "epoch": 1.5595973925085285, "grad_norm": 0.11947569251060486, "learning_rate": 1.405747521164955e-06, "loss": 0.0064, "step": 92350 }, { "epoch": 1.5597662714898504, "grad_norm": 0.08210153132677078, "learning_rate": 1.4047231784004061e-06, "loss": 0.0055, "step": 92360 }, { "epoch": 1.5599351504711723, "grad_norm": 0.16126655042171478, "learning_rate": 1.4036991479846974e-06, "loss": 0.0047, "step": 92370 }, { "epoch": 1.5601040294524944, "grad_norm": 0.2172977328300476, "learning_rate": 1.402675430006794e-06, "loss": 0.0086, "step": 92380 }, { "epoch": 1.5602729084338163, "grad_norm": 0.42786210775375366, "learning_rate": 1.4016520245556353e-06, "loss": 0.0089, "step": 92390 }, { "epoch": 1.5604417874151384, "grad_norm": 0.2729649245738983, "learning_rate": 1.4006289317201322e-06, "loss": 0.007, "step": 92400 }, { "epoch": 1.5606106663964603, "grad_norm": 0.20087294280529022, "learning_rate": 1.3996061515891668e-06, "loss": 0.0042, "step": 92410 }, { "epoch": 1.5607795453777822, "grad_norm": 0.045434921979904175, "learning_rate": 1.398583684251596e-06, "loss": 0.0088, "step": 92420 }, { "epoch": 1.5609484243591043, "grad_norm": 0.08750958740711212, "learning_rate": 1.3975615297962502e-06, "loss": 0.0089, "step": 92430 }, { "epoch": 1.5611173033404262, "grad_norm": 0.24756549298763275, "learning_rate": 1.396539688311933e-06, "loss": 0.0088, "step": 92440 }, { "epoch": 1.5612861823217483, "grad_norm": 0.2870174050331116, "learning_rate": 1.3955181598874162e-06, "loss": 0.0062, "step": 92450 }, { "epoch": 1.5614550613030702, "grad_norm": 0.2742849588394165, "learning_rate": 1.3944969446114492e-06, "loss": 0.0082, "step": 92460 }, { "epoch": 1.561623940284392, "grad_norm": 0.1790764331817627, "learning_rate": 1.3934760425727516e-06, "loss": 0.0053, "step": 92470 }, { "epoch": 1.5617928192657142, "grad_norm": 0.2856588363647461, "learning_rate": 1.392455453860021e-06, "loss": 0.0082, "step": 92480 }, { "epoch": 1.5619616982470361, "grad_norm": 0.17537035048007965, "learning_rate": 1.391435178561919e-06, "loss": 0.0088, "step": 92490 }, { "epoch": 1.5621305772283582, "grad_norm": 0.17657659947872162, "learning_rate": 1.3904152167670864e-06, "loss": 0.0051, "step": 92500 }, { "epoch": 1.5622994562096801, "grad_norm": 0.2318347692489624, "learning_rate": 1.3893955685641352e-06, "loss": 0.0063, "step": 92510 }, { "epoch": 1.562468335191002, "grad_norm": 0.27743643522262573, "learning_rate": 1.3883762340416512e-06, "loss": 0.0076, "step": 92520 }, { "epoch": 1.5626372141723242, "grad_norm": 0.2088923454284668, "learning_rate": 1.3873572132881885e-06, "loss": 0.0083, "step": 92530 }, { "epoch": 1.562806093153646, "grad_norm": 0.21024677157402039, "learning_rate": 1.386338506392279e-06, "loss": 0.0057, "step": 92540 }, { "epoch": 1.5629749721349682, "grad_norm": 0.19977067410945892, "learning_rate": 1.385320113442425e-06, "loss": 0.0069, "step": 92550 }, { "epoch": 1.56314385111629, "grad_norm": 0.5939353108406067, "learning_rate": 1.3843020345271035e-06, "loss": 0.0102, "step": 92560 }, { "epoch": 1.563312730097612, "grad_norm": 0.4176747500896454, "learning_rate": 1.3832842697347604e-06, "loss": 0.0069, "step": 92570 }, { "epoch": 1.563481609078934, "grad_norm": 0.21642661094665527, "learning_rate": 1.3822668191538158e-06, "loss": 0.0057, "step": 92580 }, { "epoch": 1.563650488060256, "grad_norm": 0.23161539435386658, "learning_rate": 1.3812496828726667e-06, "loss": 0.0064, "step": 92590 }, { "epoch": 1.563819367041578, "grad_norm": 0.6061761379241943, "learning_rate": 1.3802328609796795e-06, "loss": 0.009, "step": 92600 }, { "epoch": 1.5639882460229, "grad_norm": 0.37045392394065857, "learning_rate": 1.3792163535631897e-06, "loss": 0.0077, "step": 92610 }, { "epoch": 1.5641571250042219, "grad_norm": 0.14525927603244781, "learning_rate": 1.3782001607115114e-06, "loss": 0.007, "step": 92620 }, { "epoch": 1.564326003985544, "grad_norm": 0.21058620512485504, "learning_rate": 1.3771842825129279e-06, "loss": 0.0047, "step": 92630 }, { "epoch": 1.564494882966866, "grad_norm": 0.15763860940933228, "learning_rate": 1.3761687190556983e-06, "loss": 0.0081, "step": 92640 }, { "epoch": 1.564663761948188, "grad_norm": 0.1619877815246582, "learning_rate": 1.3751534704280495e-06, "loss": 0.0073, "step": 92650 }, { "epoch": 1.56483264092951, "grad_norm": 0.16004563868045807, "learning_rate": 1.3741385367181848e-06, "loss": 0.007, "step": 92660 }, { "epoch": 1.5650015199108318, "grad_norm": 0.2979746460914612, "learning_rate": 1.373123918014279e-06, "loss": 0.0068, "step": 92670 }, { "epoch": 1.565170398892154, "grad_norm": 0.4001114070415497, "learning_rate": 1.3721096144044816e-06, "loss": 0.0063, "step": 92680 }, { "epoch": 1.5653392778734758, "grad_norm": 0.20691315829753876, "learning_rate": 1.3710956259769098e-06, "loss": 0.0035, "step": 92690 }, { "epoch": 1.565508156854798, "grad_norm": 0.06712105125188828, "learning_rate": 1.3700819528196562e-06, "loss": 0.0055, "step": 92700 }, { "epoch": 1.5656770358361198, "grad_norm": 0.10882502049207687, "learning_rate": 1.3690685950207894e-06, "loss": 0.0072, "step": 92710 }, { "epoch": 1.5658459148174417, "grad_norm": 0.13428619503974915, "learning_rate": 1.3680555526683475e-06, "loss": 0.0087, "step": 92720 }, { "epoch": 1.5660147937987638, "grad_norm": 0.2453986406326294, "learning_rate": 1.3670428258503383e-06, "loss": 0.0068, "step": 92730 }, { "epoch": 1.5661836727800857, "grad_norm": 0.24798329174518585, "learning_rate": 1.366030414654746e-06, "loss": 0.0055, "step": 92740 }, { "epoch": 1.5663525517614079, "grad_norm": 0.15961088240146637, "learning_rate": 1.3650183191695266e-06, "loss": 0.0056, "step": 92750 }, { "epoch": 1.5665214307427298, "grad_norm": 0.2043093591928482, "learning_rate": 1.3640065394826102e-06, "loss": 0.0085, "step": 92760 }, { "epoch": 1.5666903097240517, "grad_norm": 0.10676877200603485, "learning_rate": 1.362995075681895e-06, "loss": 0.0065, "step": 92770 }, { "epoch": 1.5668591887053738, "grad_norm": 0.23764680325984955, "learning_rate": 1.361983927855255e-06, "loss": 0.0064, "step": 92780 }, { "epoch": 1.5670280676866957, "grad_norm": 0.40470173954963684, "learning_rate": 1.360973096090537e-06, "loss": 0.0094, "step": 92790 }, { "epoch": 1.5671969466680178, "grad_norm": 0.2449384182691574, "learning_rate": 1.3599625804755612e-06, "loss": 0.0071, "step": 92800 }, { "epoch": 1.5673658256493397, "grad_norm": 0.21231989562511444, "learning_rate": 1.3589523810981148e-06, "loss": 0.0057, "step": 92810 }, { "epoch": 1.5675347046306616, "grad_norm": 0.1999911665916443, "learning_rate": 1.3579424980459627e-06, "loss": 0.0036, "step": 92820 }, { "epoch": 1.5677035836119837, "grad_norm": 0.25964319705963135, "learning_rate": 1.356932931406843e-06, "loss": 0.0088, "step": 92830 }, { "epoch": 1.5678724625933056, "grad_norm": 0.2622658908367157, "learning_rate": 1.355923681268464e-06, "loss": 0.0061, "step": 92840 }, { "epoch": 1.5680413415746277, "grad_norm": 0.16037465631961823, "learning_rate": 1.354914747718507e-06, "loss": 0.0081, "step": 92850 }, { "epoch": 1.5682102205559496, "grad_norm": 0.17758099734783173, "learning_rate": 1.3539061308446238e-06, "loss": 0.0063, "step": 92860 }, { "epoch": 1.5683790995372715, "grad_norm": 0.25079676508903503, "learning_rate": 1.3528978307344421e-06, "loss": 0.0103, "step": 92870 }, { "epoch": 1.5685479785185936, "grad_norm": 0.3789307475090027, "learning_rate": 1.3518898474755593e-06, "loss": 0.0074, "step": 92880 }, { "epoch": 1.5687168574999155, "grad_norm": 0.5259920954704285, "learning_rate": 1.3508821811555488e-06, "loss": 0.0045, "step": 92890 }, { "epoch": 1.5688857364812376, "grad_norm": 0.4133734405040741, "learning_rate": 1.349874831861952e-06, "loss": 0.0078, "step": 92900 }, { "epoch": 1.5690546154625595, "grad_norm": 0.2856430411338806, "learning_rate": 1.3488677996822851e-06, "loss": 0.0062, "step": 92910 }, { "epoch": 1.5692234944438814, "grad_norm": 0.16734158992767334, "learning_rate": 1.347861084704037e-06, "loss": 0.0125, "step": 92920 }, { "epoch": 1.5693923734252035, "grad_norm": 0.28637608885765076, "learning_rate": 1.3468546870146692e-06, "loss": 0.0061, "step": 92930 }, { "epoch": 1.5695612524065254, "grad_norm": 0.0835343599319458, "learning_rate": 1.3458486067016141e-06, "loss": 0.0082, "step": 92940 }, { "epoch": 1.5697301313878476, "grad_norm": 0.3356364667415619, "learning_rate": 1.344842843852278e-06, "loss": 0.0093, "step": 92950 }, { "epoch": 1.5698990103691695, "grad_norm": 0.13361042737960815, "learning_rate": 1.3438373985540399e-06, "loss": 0.0046, "step": 92960 }, { "epoch": 1.5700678893504914, "grad_norm": 0.1978093534708023, "learning_rate": 1.3428322708942504e-06, "loss": 0.0084, "step": 92970 }, { "epoch": 1.5702367683318135, "grad_norm": 0.34699001908302307, "learning_rate": 1.3418274609602305e-06, "loss": 0.0104, "step": 92980 }, { "epoch": 1.5704056473131354, "grad_norm": 0.44136831164360046, "learning_rate": 1.3408229688392771e-06, "loss": 0.0081, "step": 92990 }, { "epoch": 1.5705745262944575, "grad_norm": 0.1991048902273178, "learning_rate": 1.3398187946186575e-06, "loss": 0.0054, "step": 93000 }, { "epoch": 1.5707434052757794, "grad_norm": 0.18838383257389069, "learning_rate": 1.3388149383856136e-06, "loss": 0.0052, "step": 93010 }, { "epoch": 1.5709122842571013, "grad_norm": 0.15025141835212708, "learning_rate": 1.337811400227355e-06, "loss": 0.0097, "step": 93020 }, { "epoch": 1.5710811632384234, "grad_norm": 0.2026173174381256, "learning_rate": 1.336808180231069e-06, "loss": 0.0063, "step": 93030 }, { "epoch": 1.5712500422197453, "grad_norm": 0.31511035561561584, "learning_rate": 1.3358052784839115e-06, "loss": 0.0068, "step": 93040 }, { "epoch": 1.5714189212010674, "grad_norm": 0.21218377351760864, "learning_rate": 1.334802695073013e-06, "loss": 0.006, "step": 93050 }, { "epoch": 1.5715878001823893, "grad_norm": 0.2571500837802887, "learning_rate": 1.333800430085475e-06, "loss": 0.0081, "step": 93060 }, { "epoch": 1.5717566791637112, "grad_norm": 0.4067869782447815, "learning_rate": 1.3327984836083718e-06, "loss": 0.0073, "step": 93070 }, { "epoch": 1.5719255581450333, "grad_norm": 0.26039209961891174, "learning_rate": 1.3317968557287508e-06, "loss": 0.0048, "step": 93080 }, { "epoch": 1.5720944371263552, "grad_norm": 0.10956083238124847, "learning_rate": 1.330795546533632e-06, "loss": 0.0053, "step": 93090 }, { "epoch": 1.5722633161076773, "grad_norm": 0.5938934087753296, "learning_rate": 1.3297945561100039e-06, "loss": 0.0059, "step": 93100 }, { "epoch": 1.5724321950889992, "grad_norm": 0.601500391960144, "learning_rate": 1.3287938845448312e-06, "loss": 0.0113, "step": 93110 }, { "epoch": 1.5726010740703211, "grad_norm": 0.6862872242927551, "learning_rate": 1.3277935319250495e-06, "loss": 0.0061, "step": 93120 }, { "epoch": 1.5727699530516432, "grad_norm": 0.35648828744888306, "learning_rate": 1.3267934983375696e-06, "loss": 0.0086, "step": 93130 }, { "epoch": 1.5729388320329651, "grad_norm": 0.28468069434165955, "learning_rate": 1.325793783869268e-06, "loss": 0.0065, "step": 93140 }, { "epoch": 1.5731077110142873, "grad_norm": 0.2569955289363861, "learning_rate": 1.3247943886069992e-06, "loss": 0.0098, "step": 93150 }, { "epoch": 1.5732765899956092, "grad_norm": 0.47800594568252563, "learning_rate": 1.3237953126375885e-06, "loss": 0.0062, "step": 93160 }, { "epoch": 1.573445468976931, "grad_norm": 0.1301216036081314, "learning_rate": 1.3227965560478329e-06, "loss": 0.0069, "step": 93170 }, { "epoch": 1.5736143479582532, "grad_norm": 0.25945553183555603, "learning_rate": 1.3217981189245017e-06, "loss": 0.0067, "step": 93180 }, { "epoch": 1.573783226939575, "grad_norm": 0.24792547523975372, "learning_rate": 1.3208000013543376e-06, "loss": 0.0085, "step": 93190 }, { "epoch": 1.5739521059208972, "grad_norm": 0.22206033766269684, "learning_rate": 1.3198022034240532e-06, "loss": 0.0075, "step": 93200 }, { "epoch": 1.574120984902219, "grad_norm": 0.06824957579374313, "learning_rate": 1.3188047252203373e-06, "loss": 0.0086, "step": 93210 }, { "epoch": 1.574289863883541, "grad_norm": 0.20073477923870087, "learning_rate": 1.3178075668298456e-06, "loss": 0.0071, "step": 93220 }, { "epoch": 1.574458742864863, "grad_norm": 0.177218958735466, "learning_rate": 1.316810728339209e-06, "loss": 0.0063, "step": 93230 }, { "epoch": 1.574627621846185, "grad_norm": 0.4057711362838745, "learning_rate": 1.3158142098350313e-06, "loss": 0.0092, "step": 93240 }, { "epoch": 1.574796500827507, "grad_norm": 0.22103971242904663, "learning_rate": 1.3148180114038893e-06, "loss": 0.0039, "step": 93250 }, { "epoch": 1.574965379808829, "grad_norm": 0.07506751269102097, "learning_rate": 1.3138221331323264e-06, "loss": 0.0058, "step": 93260 }, { "epoch": 1.575134258790151, "grad_norm": 0.22518394887447357, "learning_rate": 1.3128265751068647e-06, "loss": 0.0073, "step": 93270 }, { "epoch": 1.575303137771473, "grad_norm": 0.10161873698234558, "learning_rate": 1.3118313374139945e-06, "loss": 0.0097, "step": 93280 }, { "epoch": 1.575472016752795, "grad_norm": 0.14101415872573853, "learning_rate": 1.3108364201401808e-06, "loss": 0.005, "step": 93290 }, { "epoch": 1.575640895734117, "grad_norm": 0.09854933619499207, "learning_rate": 1.3098418233718584e-06, "loss": 0.0084, "step": 93300 }, { "epoch": 1.575809774715439, "grad_norm": 0.2579280138015747, "learning_rate": 1.308847547195437e-06, "loss": 0.0087, "step": 93310 }, { "epoch": 1.5759786536967608, "grad_norm": 0.3055852949619293, "learning_rate": 1.3078535916972956e-06, "loss": 0.0097, "step": 93320 }, { "epoch": 1.576147532678083, "grad_norm": 0.11529263108968735, "learning_rate": 1.3068599569637868e-06, "loss": 0.0076, "step": 93330 }, { "epoch": 1.5763164116594048, "grad_norm": 0.19782331585884094, "learning_rate": 1.3058666430812373e-06, "loss": 0.0045, "step": 93340 }, { "epoch": 1.576485290640727, "grad_norm": 0.25247886776924133, "learning_rate": 1.30487365013594e-06, "loss": 0.0077, "step": 93350 }, { "epoch": 1.5766541696220489, "grad_norm": 0.26165780425071716, "learning_rate": 1.3038809782141655e-06, "loss": 0.0059, "step": 93360 }, { "epoch": 1.5768230486033707, "grad_norm": 0.12950284779071808, "learning_rate": 1.3028886274021551e-06, "loss": 0.0065, "step": 93370 }, { "epoch": 1.5769919275846926, "grad_norm": 0.3748783469200134, "learning_rate": 1.3018965977861226e-06, "loss": 0.0051, "step": 93380 }, { "epoch": 1.5771608065660148, "grad_norm": 0.14414837956428528, "learning_rate": 1.3009048894522508e-06, "loss": 0.007, "step": 93390 }, { "epoch": 1.5773296855473369, "grad_norm": 0.2602640390396118, "learning_rate": 1.2999135024866976e-06, "loss": 0.0066, "step": 93400 }, { "epoch": 1.5774985645286588, "grad_norm": 0.16439414024353027, "learning_rate": 1.2989224369755927e-06, "loss": 0.006, "step": 93410 }, { "epoch": 1.5776674435099807, "grad_norm": 0.622212827205658, "learning_rate": 1.297931693005038e-06, "loss": 0.0085, "step": 93420 }, { "epoch": 1.5778363224913026, "grad_norm": 0.23621369898319244, "learning_rate": 1.2969412706611056e-06, "loss": 0.0076, "step": 93430 }, { "epoch": 1.5780052014726247, "grad_norm": 0.24327847361564636, "learning_rate": 1.295951170029842e-06, "loss": 0.0083, "step": 93440 }, { "epoch": 1.5781740804539468, "grad_norm": 0.14321348071098328, "learning_rate": 1.2949613911972648e-06, "loss": 0.0055, "step": 93450 }, { "epoch": 1.5783429594352687, "grad_norm": 0.12120956182479858, "learning_rate": 1.2939719342493644e-06, "loss": 0.0069, "step": 93460 }, { "epoch": 1.5785118384165906, "grad_norm": 0.2134964019060135, "learning_rate": 1.2929827992721e-06, "loss": 0.0089, "step": 93470 }, { "epoch": 1.5786807173979125, "grad_norm": 0.1833123117685318, "learning_rate": 1.2919939863514063e-06, "loss": 0.0063, "step": 93480 }, { "epoch": 1.5788495963792346, "grad_norm": 0.17309239506721497, "learning_rate": 1.2910054955731888e-06, "loss": 0.0063, "step": 93490 }, { "epoch": 1.5790184753605567, "grad_norm": 0.1880311369895935, "learning_rate": 1.2900173270233269e-06, "loss": 0.0089, "step": 93500 }, { "epoch": 1.5791873543418786, "grad_norm": 0.22637298703193665, "learning_rate": 1.2890294807876674e-06, "loss": 0.011, "step": 93510 }, { "epoch": 1.5793562333232005, "grad_norm": 0.18384888768196106, "learning_rate": 1.2880419569520336e-06, "loss": 0.0043, "step": 93520 }, { "epoch": 1.5795251123045224, "grad_norm": 0.46320661902427673, "learning_rate": 1.2870547556022184e-06, "loss": 0.0066, "step": 93530 }, { "epoch": 1.5796939912858445, "grad_norm": 0.23047466576099396, "learning_rate": 1.2860678768239881e-06, "loss": 0.0085, "step": 93540 }, { "epoch": 1.5798628702671667, "grad_norm": 0.23232246935367584, "learning_rate": 1.2850813207030805e-06, "loss": 0.0086, "step": 93550 }, { "epoch": 1.5800317492484885, "grad_norm": 0.2477850615978241, "learning_rate": 1.2840950873252045e-06, "loss": 0.0064, "step": 93560 }, { "epoch": 1.5802006282298104, "grad_norm": 0.19390082359313965, "learning_rate": 1.2831091767760424e-06, "loss": 0.0061, "step": 93570 }, { "epoch": 1.5803695072111323, "grad_norm": 0.16838987171649933, "learning_rate": 1.2821235891412481e-06, "loss": 0.0062, "step": 93580 }, { "epoch": 1.5805383861924545, "grad_norm": 0.37105032801628113, "learning_rate": 1.2811383245064446e-06, "loss": 0.0078, "step": 93590 }, { "epoch": 1.5807072651737766, "grad_norm": 0.28576186299324036, "learning_rate": 1.2801533829572316e-06, "loss": 0.0059, "step": 93600 }, { "epoch": 1.5808761441550985, "grad_norm": 0.2303893268108368, "learning_rate": 1.2791687645791774e-06, "loss": 0.0068, "step": 93610 }, { "epoch": 1.5810450231364204, "grad_norm": 0.09795695543289185, "learning_rate": 1.2781844694578245e-06, "loss": 0.0043, "step": 93620 }, { "epoch": 1.5812139021177423, "grad_norm": 0.4694047272205353, "learning_rate": 1.277200497678684e-06, "loss": 0.0067, "step": 93630 }, { "epoch": 1.5813827810990644, "grad_norm": 0.38059043884277344, "learning_rate": 1.276216849327242e-06, "loss": 0.0055, "step": 93640 }, { "epoch": 1.5815516600803865, "grad_norm": 0.2559269070625305, "learning_rate": 1.2752335244889552e-06, "loss": 0.0063, "step": 93650 }, { "epoch": 1.5817205390617084, "grad_norm": 0.13330888748168945, "learning_rate": 1.274250523249253e-06, "loss": 0.0057, "step": 93660 }, { "epoch": 1.5818894180430303, "grad_norm": 0.07932376861572266, "learning_rate": 1.273267845693536e-06, "loss": 0.009, "step": 93670 }, { "epoch": 1.5820582970243522, "grad_norm": 0.24326063692569733, "learning_rate": 1.2722854919071764e-06, "loss": 0.0102, "step": 93680 }, { "epoch": 1.5822271760056743, "grad_norm": 0.30447089672088623, "learning_rate": 1.2713034619755187e-06, "loss": 0.0058, "step": 93690 }, { "epoch": 1.5823960549869964, "grad_norm": 0.29543864727020264, "learning_rate": 1.2703217559838815e-06, "loss": 0.006, "step": 93700 }, { "epoch": 1.5825649339683183, "grad_norm": 0.4362604022026062, "learning_rate": 1.2693403740175492e-06, "loss": 0.0057, "step": 93710 }, { "epoch": 1.5827338129496402, "grad_norm": 0.15509255230426788, "learning_rate": 1.2683593161617842e-06, "loss": 0.0059, "step": 93720 }, { "epoch": 1.5829026919309621, "grad_norm": 0.15237849950790405, "learning_rate": 1.2673785825018176e-06, "loss": 0.0091, "step": 93730 }, { "epoch": 1.5830715709122842, "grad_norm": 0.31851503252983093, "learning_rate": 1.2663981731228531e-06, "loss": 0.0064, "step": 93740 }, { "epoch": 1.5832404498936063, "grad_norm": 0.19888710975646973, "learning_rate": 1.2654180881100687e-06, "loss": 0.0055, "step": 93750 }, { "epoch": 1.5834093288749282, "grad_norm": 0.2908340394496918, "learning_rate": 1.2644383275486077e-06, "loss": 0.0063, "step": 93760 }, { "epoch": 1.5835782078562501, "grad_norm": 0.1553337723016739, "learning_rate": 1.2634588915235912e-06, "loss": 0.0075, "step": 93770 }, { "epoch": 1.583747086837572, "grad_norm": 0.5512916445732117, "learning_rate": 1.2624797801201104e-06, "loss": 0.0076, "step": 93780 }, { "epoch": 1.5839159658188942, "grad_norm": 0.2672019600868225, "learning_rate": 1.261500993423228e-06, "loss": 0.0048, "step": 93790 }, { "epoch": 1.5840848448002163, "grad_norm": 0.2685050964355469, "learning_rate": 1.2605225315179792e-06, "loss": 0.0063, "step": 93800 }, { "epoch": 1.5842537237815382, "grad_norm": 0.15313485264778137, "learning_rate": 1.2595443944893687e-06, "loss": 0.0056, "step": 93810 }, { "epoch": 1.58442260276286, "grad_norm": 0.3193275034427643, "learning_rate": 1.2585665824223764e-06, "loss": 0.0074, "step": 93820 }, { "epoch": 1.584591481744182, "grad_norm": 0.2715371251106262, "learning_rate": 1.2575890954019527e-06, "loss": 0.0059, "step": 93830 }, { "epoch": 1.584760360725504, "grad_norm": 0.40566587448120117, "learning_rate": 1.2566119335130167e-06, "loss": 0.0084, "step": 93840 }, { "epoch": 1.5849292397068262, "grad_norm": 0.3678395748138428, "learning_rate": 1.2556350968404635e-06, "loss": 0.0066, "step": 93850 }, { "epoch": 1.585098118688148, "grad_norm": 0.29872646927833557, "learning_rate": 1.2546585854691574e-06, "loss": 0.0069, "step": 93860 }, { "epoch": 1.58526699766947, "grad_norm": 0.23382195830345154, "learning_rate": 1.2536823994839376e-06, "loss": 0.0055, "step": 93870 }, { "epoch": 1.5854358766507919, "grad_norm": 0.15155015885829926, "learning_rate": 1.2527065389696097e-06, "loss": 0.0053, "step": 93880 }, { "epoch": 1.585604755632114, "grad_norm": 0.47442352771759033, "learning_rate": 1.2517310040109554e-06, "loss": 0.0047, "step": 93890 }, { "epoch": 1.5857736346134361, "grad_norm": 0.14337024092674255, "learning_rate": 1.2507557946927267e-06, "loss": 0.0057, "step": 93900 }, { "epoch": 1.585942513594758, "grad_norm": 0.1426224261522293, "learning_rate": 1.249780911099648e-06, "loss": 0.0053, "step": 93910 }, { "epoch": 1.58611139257608, "grad_norm": 0.2859329879283905, "learning_rate": 1.2488063533164135e-06, "loss": 0.0051, "step": 93920 }, { "epoch": 1.5862802715574018, "grad_norm": 0.1817440688610077, "learning_rate": 1.2478321214276918e-06, "loss": 0.0082, "step": 93930 }, { "epoch": 1.586449150538724, "grad_norm": 0.18217779695987701, "learning_rate": 1.2468582155181208e-06, "loss": 0.0066, "step": 93940 }, { "epoch": 1.586618029520046, "grad_norm": 0.16842471063137054, "learning_rate": 1.2458846356723132e-06, "loss": 0.0038, "step": 93950 }, { "epoch": 1.586786908501368, "grad_norm": 0.375222384929657, "learning_rate": 1.2449113819748477e-06, "loss": 0.0052, "step": 93960 }, { "epoch": 1.5869557874826898, "grad_norm": 0.1474483609199524, "learning_rate": 1.24393845451028e-06, "loss": 0.0113, "step": 93970 }, { "epoch": 1.5871246664640117, "grad_norm": 0.4518353044986725, "learning_rate": 1.2429658533631356e-06, "loss": 0.0068, "step": 93980 }, { "epoch": 1.5872935454453339, "grad_norm": 0.24007178843021393, "learning_rate": 1.2419935786179132e-06, "loss": 0.0056, "step": 93990 }, { "epoch": 1.587462424426656, "grad_norm": 0.23316162824630737, "learning_rate": 1.2410216303590788e-06, "loss": 0.0078, "step": 94000 }, { "epoch": 1.5876313034079779, "grad_norm": 0.3071874976158142, "learning_rate": 1.2400500086710743e-06, "loss": 0.0061, "step": 94010 }, { "epoch": 1.5878001823892998, "grad_norm": 0.11134036630392075, "learning_rate": 1.239078713638312e-06, "loss": 0.0067, "step": 94020 }, { "epoch": 1.5879690613706217, "grad_norm": 0.3221209943294525, "learning_rate": 1.2381077453451751e-06, "loss": 0.007, "step": 94030 }, { "epoch": 1.5881379403519438, "grad_norm": 0.14645172655582428, "learning_rate": 1.2371371038760188e-06, "loss": 0.0058, "step": 94040 }, { "epoch": 1.588306819333266, "grad_norm": 0.15048204362392426, "learning_rate": 1.2361667893151713e-06, "loss": 0.0058, "step": 94050 }, { "epoch": 1.5884756983145878, "grad_norm": 0.2471405267715454, "learning_rate": 1.2351968017469295e-06, "loss": 0.0082, "step": 94060 }, { "epoch": 1.5886445772959097, "grad_norm": 0.35472825169563293, "learning_rate": 1.2342271412555668e-06, "loss": 0.0082, "step": 94070 }, { "epoch": 1.5888134562772316, "grad_norm": 0.32005447149276733, "learning_rate": 1.2332578079253204e-06, "loss": 0.0086, "step": 94080 }, { "epoch": 1.5889823352585537, "grad_norm": 0.16335654258728027, "learning_rate": 1.2322888018404062e-06, "loss": 0.0065, "step": 94090 }, { "epoch": 1.5891512142398758, "grad_norm": 0.1551981121301651, "learning_rate": 1.231320123085008e-06, "loss": 0.0064, "step": 94100 }, { "epoch": 1.5893200932211977, "grad_norm": 0.2515435814857483, "learning_rate": 1.2303517717432845e-06, "loss": 0.0071, "step": 94110 }, { "epoch": 1.5894889722025196, "grad_norm": 0.2154790461063385, "learning_rate": 1.2293837478993608e-06, "loss": 0.0053, "step": 94120 }, { "epoch": 1.5896578511838415, "grad_norm": 0.29901042580604553, "learning_rate": 1.2284160516373373e-06, "loss": 0.0068, "step": 94130 }, { "epoch": 1.5898267301651636, "grad_norm": 0.22835604846477509, "learning_rate": 1.2274486830412856e-06, "loss": 0.0074, "step": 94140 }, { "epoch": 1.5899956091464857, "grad_norm": 0.4466160833835602, "learning_rate": 1.226481642195248e-06, "loss": 0.0125, "step": 94150 }, { "epoch": 1.5901644881278076, "grad_norm": 0.28905877470970154, "learning_rate": 1.225514929183239e-06, "loss": 0.0073, "step": 94160 }, { "epoch": 1.5903333671091295, "grad_norm": 0.16465148329734802, "learning_rate": 1.224548544089244e-06, "loss": 0.0098, "step": 94170 }, { "epoch": 1.5905022460904514, "grad_norm": 0.20343312621116638, "learning_rate": 1.2235824869972208e-06, "loss": 0.0053, "step": 94180 }, { "epoch": 1.5906711250717736, "grad_norm": 0.3887011408805847, "learning_rate": 1.2226167579910964e-06, "loss": 0.0082, "step": 94190 }, { "epoch": 1.5908400040530957, "grad_norm": 0.09969981759786606, "learning_rate": 1.2216513571547744e-06, "loss": 0.0064, "step": 94200 }, { "epoch": 1.5910088830344176, "grad_norm": 1.0792628526687622, "learning_rate": 1.2206862845721223e-06, "loss": 0.0054, "step": 94210 }, { "epoch": 1.5911777620157395, "grad_norm": 0.22901657223701477, "learning_rate": 1.2197215403269847e-06, "loss": 0.0077, "step": 94220 }, { "epoch": 1.5913466409970614, "grad_norm": 0.26748961210250854, "learning_rate": 1.2187571245031772e-06, "loss": 0.007, "step": 94230 }, { "epoch": 1.5915155199783835, "grad_norm": 0.1274121105670929, "learning_rate": 1.2177930371844866e-06, "loss": 0.0065, "step": 94240 }, { "epoch": 1.5916843989597056, "grad_norm": 0.42642849683761597, "learning_rate": 1.2168292784546676e-06, "loss": 0.0084, "step": 94250 }, { "epoch": 1.5918532779410275, "grad_norm": 0.38929659128189087, "learning_rate": 1.2158658483974506e-06, "loss": 0.0132, "step": 94260 }, { "epoch": 1.5920221569223494, "grad_norm": 0.22592447698116302, "learning_rate": 1.2149027470965358e-06, "loss": 0.0095, "step": 94270 }, { "epoch": 1.5921910359036713, "grad_norm": 0.2965625822544098, "learning_rate": 1.2139399746355956e-06, "loss": 0.0074, "step": 94280 }, { "epoch": 1.5923599148849934, "grad_norm": 0.15074887871742249, "learning_rate": 1.2129775310982738e-06, "loss": 0.0062, "step": 94290 }, { "epoch": 1.5925287938663155, "grad_norm": 0.2650754749774933, "learning_rate": 1.2120154165681836e-06, "loss": 0.0119, "step": 94300 }, { "epoch": 1.5926976728476374, "grad_norm": 0.2624334692955017, "learning_rate": 1.2110536311289129e-06, "loss": 0.0052, "step": 94310 }, { "epoch": 1.5928665518289593, "grad_norm": 0.1007172241806984, "learning_rate": 1.2100921748640194e-06, "loss": 0.0056, "step": 94320 }, { "epoch": 1.5930354308102812, "grad_norm": 0.15364578366279602, "learning_rate": 1.2091310478570295e-06, "loss": 0.0044, "step": 94330 }, { "epoch": 1.5932043097916033, "grad_norm": 0.26197800040245056, "learning_rate": 1.208170250191445e-06, "loss": 0.0046, "step": 94340 }, { "epoch": 1.5933731887729254, "grad_norm": 0.3436712324619293, "learning_rate": 1.2072097819507378e-06, "loss": 0.0063, "step": 94350 }, { "epoch": 1.5935420677542473, "grad_norm": 0.1913089007139206, "learning_rate": 1.206249643218353e-06, "loss": 0.0062, "step": 94360 }, { "epoch": 1.5937109467355692, "grad_norm": 0.175154909491539, "learning_rate": 1.2052898340777008e-06, "loss": 0.0037, "step": 94370 }, { "epoch": 1.5938798257168911, "grad_norm": 0.5943410992622375, "learning_rate": 1.2043303546121698e-06, "loss": 0.0059, "step": 94380 }, { "epoch": 1.5940487046982132, "grad_norm": 0.26171791553497314, "learning_rate": 1.2033712049051167e-06, "loss": 0.0045, "step": 94390 }, { "epoch": 1.5942175836795354, "grad_norm": 0.24117986857891083, "learning_rate": 1.2024123850398705e-06, "loss": 0.0052, "step": 94400 }, { "epoch": 1.5943864626608573, "grad_norm": 0.14630240201950073, "learning_rate": 1.20145389509973e-06, "loss": 0.0071, "step": 94410 }, { "epoch": 1.5945553416421792, "grad_norm": 0.21505260467529297, "learning_rate": 1.200495735167968e-06, "loss": 0.0058, "step": 94420 }, { "epoch": 1.594724220623501, "grad_norm": 0.3857842981815338, "learning_rate": 1.1995379053278262e-06, "loss": 0.0097, "step": 94430 }, { "epoch": 1.5948930996048232, "grad_norm": 0.1631254106760025, "learning_rate": 1.19858040566252e-06, "loss": 0.0067, "step": 94440 }, { "epoch": 1.5950619785861453, "grad_norm": 0.30620360374450684, "learning_rate": 1.1976232362552316e-06, "loss": 0.0053, "step": 94450 }, { "epoch": 1.5952308575674672, "grad_norm": 0.20277686417102814, "learning_rate": 1.196666397189119e-06, "loss": 0.0048, "step": 94460 }, { "epoch": 1.595399736548789, "grad_norm": 0.29699254035949707, "learning_rate": 1.1957098885473107e-06, "loss": 0.0114, "step": 94470 }, { "epoch": 1.595568615530111, "grad_norm": 0.0963674932718277, "learning_rate": 1.1947537104129065e-06, "loss": 0.0057, "step": 94480 }, { "epoch": 1.595737494511433, "grad_norm": 0.1525232046842575, "learning_rate": 1.1937978628689745e-06, "loss": 0.0069, "step": 94490 }, { "epoch": 1.5959063734927552, "grad_norm": 0.24970895051956177, "learning_rate": 1.1928423459985572e-06, "loss": 0.0083, "step": 94500 }, { "epoch": 1.596075252474077, "grad_norm": 0.17038126289844513, "learning_rate": 1.1918871598846665e-06, "loss": 0.0092, "step": 94510 }, { "epoch": 1.596244131455399, "grad_norm": 0.275420218706131, "learning_rate": 1.190932304610291e-06, "loss": 0.0053, "step": 94520 }, { "epoch": 1.596413010436721, "grad_norm": 0.3652498722076416, "learning_rate": 1.1899777802583822e-06, "loss": 0.0085, "step": 94530 }, { "epoch": 1.596581889418043, "grad_norm": 0.23795917630195618, "learning_rate": 1.1890235869118678e-06, "loss": 0.0052, "step": 94540 }, { "epoch": 1.5967507683993651, "grad_norm": 0.3216855823993683, "learning_rate": 1.1880697246536453e-06, "loss": 0.0095, "step": 94550 }, { "epoch": 1.596919647380687, "grad_norm": 0.21097423136234283, "learning_rate": 1.1871161935665865e-06, "loss": 0.0068, "step": 94560 }, { "epoch": 1.597088526362009, "grad_norm": 0.27101027965545654, "learning_rate": 1.1861629937335279e-06, "loss": 0.0072, "step": 94570 }, { "epoch": 1.5972574053433308, "grad_norm": 0.30053263902664185, "learning_rate": 1.1852101252372832e-06, "loss": 0.0063, "step": 94580 }, { "epoch": 1.597426284324653, "grad_norm": 0.06730969995260239, "learning_rate": 1.184257588160635e-06, "loss": 0.0044, "step": 94590 }, { "epoch": 1.597595163305975, "grad_norm": 0.10700501501560211, "learning_rate": 1.1833053825863395e-06, "loss": 0.0059, "step": 94600 }, { "epoch": 1.597764042287297, "grad_norm": 0.22746187448501587, "learning_rate": 1.1823535085971182e-06, "loss": 0.0044, "step": 94610 }, { "epoch": 1.5979329212686189, "grad_norm": 0.12587617337703705, "learning_rate": 1.1814019662756694e-06, "loss": 0.0075, "step": 94620 }, { "epoch": 1.5981018002499408, "grad_norm": 0.44205430150032043, "learning_rate": 1.1804507557046596e-06, "loss": 0.0086, "step": 94630 }, { "epoch": 1.5982706792312629, "grad_norm": 0.2197415977716446, "learning_rate": 1.1794998769667298e-06, "loss": 0.0083, "step": 94640 }, { "epoch": 1.598439558212585, "grad_norm": 0.3997720181941986, "learning_rate": 1.1785493301444905e-06, "loss": 0.0056, "step": 94650 }, { "epoch": 1.5986084371939069, "grad_norm": 0.2975583076477051, "learning_rate": 1.1775991153205203e-06, "loss": 0.0062, "step": 94660 }, { "epoch": 1.5987773161752288, "grad_norm": 0.12858469784259796, "learning_rate": 1.1766492325773726e-06, "loss": 0.007, "step": 94670 }, { "epoch": 1.5989461951565507, "grad_norm": 0.14630548655986786, "learning_rate": 1.1756996819975709e-06, "loss": 0.0075, "step": 94680 }, { "epoch": 1.5991150741378728, "grad_norm": 0.25718215107917786, "learning_rate": 1.1747504636636115e-06, "loss": 0.0107, "step": 94690 }, { "epoch": 1.599283953119195, "grad_norm": 0.22575388848781586, "learning_rate": 1.173801577657957e-06, "loss": 0.0057, "step": 94700 }, { "epoch": 1.5994528321005168, "grad_norm": 0.13803258538246155, "learning_rate": 1.1728530240630454e-06, "loss": 0.0058, "step": 94710 }, { "epoch": 1.5996217110818387, "grad_norm": 0.7438579201698303, "learning_rate": 1.1719048029612856e-06, "loss": 0.0082, "step": 94720 }, { "epoch": 1.5997905900631606, "grad_norm": 0.32591328024864197, "learning_rate": 1.1709569144350574e-06, "loss": 0.0089, "step": 94730 }, { "epoch": 1.5999594690444827, "grad_norm": 0.2296489179134369, "learning_rate": 1.1700093585667073e-06, "loss": 0.0059, "step": 94740 }, { "epoch": 1.6001283480258048, "grad_norm": 0.3475607633590698, "learning_rate": 1.1690621354385601e-06, "loss": 0.0058, "step": 94750 }, { "epoch": 1.6002972270071267, "grad_norm": 0.05391499772667885, "learning_rate": 1.1681152451329075e-06, "loss": 0.0056, "step": 94760 }, { "epoch": 1.6004661059884486, "grad_norm": 0.33643683791160583, "learning_rate": 1.1671686877320148e-06, "loss": 0.006, "step": 94770 }, { "epoch": 1.6006349849697705, "grad_norm": 0.30730360746383667, "learning_rate": 1.166222463318113e-06, "loss": 0.0071, "step": 94780 }, { "epoch": 1.6008038639510926, "grad_norm": 0.2806408703327179, "learning_rate": 1.165276571973409e-06, "loss": 0.0051, "step": 94790 }, { "epoch": 1.6009727429324148, "grad_norm": 0.1754123866558075, "learning_rate": 1.16433101378008e-06, "loss": 0.0079, "step": 94800 }, { "epoch": 1.6011416219137367, "grad_norm": 0.22363029420375824, "learning_rate": 1.1633857888202755e-06, "loss": 0.0121, "step": 94810 }, { "epoch": 1.6013105008950586, "grad_norm": 0.37763476371765137, "learning_rate": 1.162440897176111e-06, "loss": 0.0065, "step": 94820 }, { "epoch": 1.6014793798763804, "grad_norm": 0.15492478013038635, "learning_rate": 1.161496338929678e-06, "loss": 0.0039, "step": 94830 }, { "epoch": 1.6016482588577026, "grad_norm": 0.1775829941034317, "learning_rate": 1.1605521141630372e-06, "loss": 0.0061, "step": 94840 }, { "epoch": 1.6018171378390247, "grad_norm": 0.48568567633628845, "learning_rate": 1.1596082229582217e-06, "loss": 0.0068, "step": 94850 }, { "epoch": 1.6019860168203466, "grad_norm": 0.3370717465877533, "learning_rate": 1.1586646653972312e-06, "loss": 0.0061, "step": 94860 }, { "epoch": 1.6021548958016685, "grad_norm": 0.16781699657440186, "learning_rate": 1.1577214415620425e-06, "loss": 0.0069, "step": 94870 }, { "epoch": 1.6023237747829904, "grad_norm": 0.17913228273391724, "learning_rate": 1.1567785515346003e-06, "loss": 0.0062, "step": 94880 }, { "epoch": 1.6024926537643125, "grad_norm": 0.3131340742111206, "learning_rate": 1.1558359953968218e-06, "loss": 0.0097, "step": 94890 }, { "epoch": 1.6026615327456346, "grad_norm": 0.24715536832809448, "learning_rate": 1.1548937732305909e-06, "loss": 0.0071, "step": 94900 }, { "epoch": 1.6028304117269565, "grad_norm": 0.4395432472229004, "learning_rate": 1.1539518851177667e-06, "loss": 0.0067, "step": 94910 }, { "epoch": 1.6029992907082784, "grad_norm": 0.2553063929080963, "learning_rate": 1.1530103311401786e-06, "loss": 0.0044, "step": 94920 }, { "epoch": 1.6031681696896003, "grad_norm": 0.05600898712873459, "learning_rate": 1.1520691113796283e-06, "loss": 0.0057, "step": 94930 }, { "epoch": 1.6033370486709224, "grad_norm": 0.16285531222820282, "learning_rate": 1.1511282259178829e-06, "loss": 0.006, "step": 94940 }, { "epoch": 1.6035059276522445, "grad_norm": 0.27085280418395996, "learning_rate": 1.1501876748366858e-06, "loss": 0.0104, "step": 94950 }, { "epoch": 1.6036748066335664, "grad_norm": 0.2525533139705658, "learning_rate": 1.1492474582177505e-06, "loss": 0.0062, "step": 94960 }, { "epoch": 1.6038436856148883, "grad_norm": 0.3289136588573456, "learning_rate": 1.1483075761427615e-06, "loss": 0.0076, "step": 94970 }, { "epoch": 1.6040125645962102, "grad_norm": 0.1535079926252365, "learning_rate": 1.1473680286933692e-06, "loss": 0.005, "step": 94980 }, { "epoch": 1.6041814435775323, "grad_norm": 0.3750899136066437, "learning_rate": 1.1464288159512038e-06, "loss": 0.0083, "step": 94990 }, { "epoch": 1.6043503225588545, "grad_norm": 0.07939480990171432, "learning_rate": 1.1454899379978602e-06, "loss": 0.0066, "step": 95000 }, { "epoch": 1.6045192015401764, "grad_norm": 0.32850322127342224, "learning_rate": 1.1445513949149066e-06, "loss": 0.0066, "step": 95010 }, { "epoch": 1.6046880805214983, "grad_norm": 0.20746538043022156, "learning_rate": 1.1436131867838796e-06, "loss": 0.0104, "step": 95020 }, { "epoch": 1.6048569595028201, "grad_norm": 0.39296677708625793, "learning_rate": 1.1426753136862894e-06, "loss": 0.008, "step": 95030 }, { "epoch": 1.6050258384841423, "grad_norm": 0.2729795575141907, "learning_rate": 1.1417377757036162e-06, "loss": 0.0075, "step": 95040 }, { "epoch": 1.6051947174654644, "grad_norm": 0.31495195627212524, "learning_rate": 1.1408005729173122e-06, "loss": 0.0087, "step": 95050 }, { "epoch": 1.6053635964467863, "grad_norm": 0.32598307728767395, "learning_rate": 1.1398637054087963e-06, "loss": 0.0064, "step": 95060 }, { "epoch": 1.6055324754281082, "grad_norm": 0.36420971155166626, "learning_rate": 1.1389271732594631e-06, "loss": 0.0097, "step": 95070 }, { "epoch": 1.60570135440943, "grad_norm": 0.11040354520082474, "learning_rate": 1.1379909765506758e-06, "loss": 0.0072, "step": 95080 }, { "epoch": 1.6058702333907522, "grad_norm": 0.323682963848114, "learning_rate": 1.1370551153637687e-06, "loss": 0.0091, "step": 95090 }, { "epoch": 1.6060391123720743, "grad_norm": 0.14302028715610504, "learning_rate": 1.1361195897800481e-06, "loss": 0.0065, "step": 95100 }, { "epoch": 1.6062079913533962, "grad_norm": 0.23914401233196259, "learning_rate": 1.13518439988079e-06, "loss": 0.0039, "step": 95110 }, { "epoch": 1.606376870334718, "grad_norm": 0.28718721866607666, "learning_rate": 1.1342495457472403e-06, "loss": 0.0081, "step": 95120 }, { "epoch": 1.60654574931604, "grad_norm": 0.19017738103866577, "learning_rate": 1.1333150274606176e-06, "loss": 0.0049, "step": 95130 }, { "epoch": 1.6067146282973621, "grad_norm": 0.4798569679260254, "learning_rate": 1.1323808451021118e-06, "loss": 0.0098, "step": 95140 }, { "epoch": 1.6068835072786842, "grad_norm": 0.31097617745399475, "learning_rate": 1.1314469987528798e-06, "loss": 0.0089, "step": 95150 }, { "epoch": 1.6070523862600061, "grad_norm": 0.44561105966567993, "learning_rate": 1.1305134884940534e-06, "loss": 0.0056, "step": 95160 }, { "epoch": 1.607221265241328, "grad_norm": 0.16590279340744019, "learning_rate": 1.129580314406733e-06, "loss": 0.0078, "step": 95170 }, { "epoch": 1.60739014422265, "grad_norm": 0.24359124898910522, "learning_rate": 1.1286474765719928e-06, "loss": 0.0089, "step": 95180 }, { "epoch": 1.607559023203972, "grad_norm": 0.1478123813867569, "learning_rate": 1.1277149750708721e-06, "loss": 0.0057, "step": 95190 }, { "epoch": 1.6077279021852942, "grad_norm": 0.21705737709999084, "learning_rate": 1.1267828099843852e-06, "loss": 0.0067, "step": 95200 }, { "epoch": 1.607896781166616, "grad_norm": 0.16929388046264648, "learning_rate": 1.1258509813935176e-06, "loss": 0.0063, "step": 95210 }, { "epoch": 1.608065660147938, "grad_norm": 0.27452704310417175, "learning_rate": 1.124919489379223e-06, "loss": 0.0052, "step": 95220 }, { "epoch": 1.6082345391292598, "grad_norm": 0.13474945724010468, "learning_rate": 1.1239883340224284e-06, "loss": 0.005, "step": 95230 }, { "epoch": 1.608403418110582, "grad_norm": 0.22039610147476196, "learning_rate": 1.1230575154040297e-06, "loss": 0.0082, "step": 95240 }, { "epoch": 1.608572297091904, "grad_norm": 0.24797336757183075, "learning_rate": 1.1221270336048935e-06, "loss": 0.0051, "step": 95250 }, { "epoch": 1.608741176073226, "grad_norm": 0.1646007001399994, "learning_rate": 1.12119688870586e-06, "loss": 0.0059, "step": 95260 }, { "epoch": 1.6089100550545479, "grad_norm": 0.1939273327589035, "learning_rate": 1.1202670807877352e-06, "loss": 0.0053, "step": 95270 }, { "epoch": 1.6090789340358698, "grad_norm": 0.412212073802948, "learning_rate": 1.1193376099312991e-06, "loss": 0.0067, "step": 95280 }, { "epoch": 1.6092478130171919, "grad_norm": 0.2356317937374115, "learning_rate": 1.1184084762173025e-06, "loss": 0.0059, "step": 95290 }, { "epoch": 1.609416691998514, "grad_norm": 0.2354651689529419, "learning_rate": 1.1174796797264682e-06, "loss": 0.0067, "step": 95300 }, { "epoch": 1.609585570979836, "grad_norm": 0.23165875673294067, "learning_rate": 1.1165512205394834e-06, "loss": 0.0063, "step": 95310 }, { "epoch": 1.6097544499611578, "grad_norm": 0.18902306258678436, "learning_rate": 1.1156230987370132e-06, "loss": 0.0055, "step": 95320 }, { "epoch": 1.6099233289424797, "grad_norm": 0.24187999963760376, "learning_rate": 1.1146953143996897e-06, "loss": 0.0071, "step": 95330 }, { "epoch": 1.6100922079238018, "grad_norm": 0.1501360982656479, "learning_rate": 1.113767867608117e-06, "loss": 0.0062, "step": 95340 }, { "epoch": 1.610261086905124, "grad_norm": 0.16253870725631714, "learning_rate": 1.112840758442869e-06, "loss": 0.0057, "step": 95350 }, { "epoch": 1.6104299658864458, "grad_norm": 0.19845494627952576, "learning_rate": 1.1119139869844914e-06, "loss": 0.0067, "step": 95360 }, { "epoch": 1.6105988448677677, "grad_norm": 0.1855931282043457, "learning_rate": 1.1109875533134988e-06, "loss": 0.0057, "step": 95370 }, { "epoch": 1.6107677238490896, "grad_norm": 0.20716257393360138, "learning_rate": 1.1100614575103801e-06, "loss": 0.0063, "step": 95380 }, { "epoch": 1.6109366028304117, "grad_norm": 0.2250363826751709, "learning_rate": 1.109135699655588e-06, "loss": 0.0057, "step": 95390 }, { "epoch": 1.6111054818117339, "grad_norm": 0.29112106561660767, "learning_rate": 1.1082102798295529e-06, "loss": 0.0059, "step": 95400 }, { "epoch": 1.6112743607930557, "grad_norm": 0.3264842629432678, "learning_rate": 1.107285198112672e-06, "loss": 0.0093, "step": 95410 }, { "epoch": 1.6114432397743776, "grad_norm": 0.2645511031150818, "learning_rate": 1.1063604545853157e-06, "loss": 0.0069, "step": 95420 }, { "epoch": 1.6116121187556995, "grad_norm": 0.11898195743560791, "learning_rate": 1.105436049327821e-06, "loss": 0.0036, "step": 95430 }, { "epoch": 1.6117809977370217, "grad_norm": 0.13619336485862732, "learning_rate": 1.1045119824204998e-06, "loss": 0.005, "step": 95440 }, { "epoch": 1.6119498767183438, "grad_norm": 0.20801959931850433, "learning_rate": 1.103588253943631e-06, "loss": 0.0051, "step": 95450 }, { "epoch": 1.6121187556996657, "grad_norm": 0.22777007520198822, "learning_rate": 1.102664863977468e-06, "loss": 0.0104, "step": 95460 }, { "epoch": 1.6122876346809876, "grad_norm": 0.18361471593379974, "learning_rate": 1.1017418126022311e-06, "loss": 0.0098, "step": 95470 }, { "epoch": 1.6124565136623095, "grad_norm": 0.18752850592136383, "learning_rate": 1.100819099898114e-06, "loss": 0.0049, "step": 95480 }, { "epoch": 1.6126253926436316, "grad_norm": 0.3380207419395447, "learning_rate": 1.099896725945278e-06, "loss": 0.005, "step": 95490 }, { "epoch": 1.6127942716249537, "grad_norm": 0.5022734999656677, "learning_rate": 1.0989746908238603e-06, "loss": 0.0078, "step": 95500 }, { "epoch": 1.6129631506062756, "grad_norm": 0.4472713768482208, "learning_rate": 1.09805299461396e-06, "loss": 0.0077, "step": 95510 }, { "epoch": 1.6131320295875975, "grad_norm": 0.2328581064939499, "learning_rate": 1.0971316373956547e-06, "loss": 0.0103, "step": 95520 }, { "epoch": 1.6133009085689194, "grad_norm": 0.38849759101867676, "learning_rate": 1.0962106192489896e-06, "loss": 0.0056, "step": 95530 }, { "epoch": 1.6134697875502415, "grad_norm": 0.38355451822280884, "learning_rate": 1.0952899402539801e-06, "loss": 0.0069, "step": 95540 }, { "epoch": 1.6136386665315636, "grad_norm": 0.4390168786048889, "learning_rate": 1.0943696004906135e-06, "loss": 0.0094, "step": 95550 }, { "epoch": 1.6138075455128855, "grad_norm": 0.14620156586170197, "learning_rate": 1.093449600038845e-06, "loss": 0.0073, "step": 95560 }, { "epoch": 1.6139764244942074, "grad_norm": 0.2733306884765625, "learning_rate": 1.092529938978602e-06, "loss": 0.0083, "step": 95570 }, { "epoch": 1.6141453034755293, "grad_norm": 0.3290782868862152, "learning_rate": 1.091610617389784e-06, "loss": 0.0085, "step": 95580 }, { "epoch": 1.6143141824568514, "grad_norm": 0.3640982210636139, "learning_rate": 1.090691635352258e-06, "loss": 0.0114, "step": 95590 }, { "epoch": 1.6144830614381736, "grad_norm": 0.2411685287952423, "learning_rate": 1.0897729929458633e-06, "loss": 0.0086, "step": 95600 }, { "epoch": 1.6146519404194954, "grad_norm": 0.3897819221019745, "learning_rate": 1.0888546902504099e-06, "loss": 0.0081, "step": 95610 }, { "epoch": 1.6148208194008173, "grad_norm": 0.33981549739837646, "learning_rate": 1.087936727345677e-06, "loss": 0.0108, "step": 95620 }, { "epoch": 1.6149896983821392, "grad_norm": 0.5095965266227722, "learning_rate": 1.0870191043114165e-06, "loss": 0.0117, "step": 95630 }, { "epoch": 1.6151585773634614, "grad_norm": 0.41942721605300903, "learning_rate": 1.0861018212273466e-06, "loss": 0.0113, "step": 95640 }, { "epoch": 1.6153274563447835, "grad_norm": 0.22638417780399323, "learning_rate": 1.08518487817316e-06, "loss": 0.0056, "step": 95650 }, { "epoch": 1.6154963353261054, "grad_norm": 0.2358722686767578, "learning_rate": 1.0842682752285182e-06, "loss": 0.0056, "step": 95660 }, { "epoch": 1.6156652143074273, "grad_norm": 0.2978079319000244, "learning_rate": 1.0833520124730557e-06, "loss": 0.0034, "step": 95670 }, { "epoch": 1.6158340932887492, "grad_norm": 0.2774273455142975, "learning_rate": 1.082436089986371e-06, "loss": 0.0053, "step": 95680 }, { "epoch": 1.6160029722700713, "grad_norm": 0.25308433175086975, "learning_rate": 1.0815205078480395e-06, "loss": 0.0052, "step": 95690 }, { "epoch": 1.6161718512513934, "grad_norm": 0.195325568318367, "learning_rate": 1.0806052661376049e-06, "loss": 0.0063, "step": 95700 }, { "epoch": 1.6163407302327153, "grad_norm": 0.27311426401138306, "learning_rate": 1.0796903649345802e-06, "loss": 0.0064, "step": 95710 }, { "epoch": 1.6165096092140372, "grad_norm": 0.46070972084999084, "learning_rate": 1.078775804318451e-06, "loss": 0.0051, "step": 95720 }, { "epoch": 1.616678488195359, "grad_norm": 0.11075764894485474, "learning_rate": 1.0778615843686714e-06, "loss": 0.0071, "step": 95730 }, { "epoch": 1.6168473671766812, "grad_norm": 0.405740886926651, "learning_rate": 1.076947705164666e-06, "loss": 0.0065, "step": 95740 }, { "epoch": 1.6170162461580033, "grad_norm": 0.18072955310344696, "learning_rate": 1.0760341667858327e-06, "loss": 0.008, "step": 95750 }, { "epoch": 1.6171851251393252, "grad_norm": 0.22280630469322205, "learning_rate": 1.0751209693115345e-06, "loss": 0.0065, "step": 95760 }, { "epoch": 1.6173540041206471, "grad_norm": 0.31302714347839355, "learning_rate": 1.074208112821109e-06, "loss": 0.0064, "step": 95770 }, { "epoch": 1.617522883101969, "grad_norm": 0.1564646065235138, "learning_rate": 1.073295597393863e-06, "loss": 0.0046, "step": 95780 }, { "epoch": 1.6176917620832911, "grad_norm": 0.21016216278076172, "learning_rate": 1.0723834231090752e-06, "loss": 0.0049, "step": 95790 }, { "epoch": 1.6178606410646132, "grad_norm": 0.3178768455982208, "learning_rate": 1.07147159004599e-06, "loss": 0.0078, "step": 95800 }, { "epoch": 1.6180295200459351, "grad_norm": 0.2661949098110199, "learning_rate": 1.0705600982838266e-06, "loss": 0.0049, "step": 95810 }, { "epoch": 1.618198399027257, "grad_norm": 0.3321068286895752, "learning_rate": 1.0696489479017736e-06, "loss": 0.0044, "step": 95820 }, { "epoch": 1.618367278008579, "grad_norm": 0.17408107221126556, "learning_rate": 1.0687381389789885e-06, "loss": 0.0094, "step": 95830 }, { "epoch": 1.618536156989901, "grad_norm": 0.22450503706932068, "learning_rate": 1.0678276715946011e-06, "loss": 0.0096, "step": 95840 }, { "epoch": 1.6187050359712232, "grad_norm": 0.2936862111091614, "learning_rate": 1.0669175458277103e-06, "loss": 0.0052, "step": 95850 }, { "epoch": 1.618873914952545, "grad_norm": 0.5286139249801636, "learning_rate": 1.066007761757386e-06, "loss": 0.0045, "step": 95860 }, { "epoch": 1.619042793933867, "grad_norm": 0.2639281451702118, "learning_rate": 1.0650983194626686e-06, "loss": 0.0056, "step": 95870 }, { "epoch": 1.6192116729151889, "grad_norm": 0.1519448608160019, "learning_rate": 1.0641892190225655e-06, "loss": 0.0069, "step": 95880 }, { "epoch": 1.619380551896511, "grad_norm": 0.42973682284355164, "learning_rate": 1.0632804605160596e-06, "loss": 0.0072, "step": 95890 }, { "epoch": 1.619549430877833, "grad_norm": 0.20193323493003845, "learning_rate": 1.0623720440221002e-06, "loss": 0.0096, "step": 95900 }, { "epoch": 1.619718309859155, "grad_norm": 0.18036654591560364, "learning_rate": 1.0614639696196111e-06, "loss": 0.006, "step": 95910 }, { "epoch": 1.6198871888404769, "grad_norm": 0.3398933708667755, "learning_rate": 1.0605562373874795e-06, "loss": 0.0056, "step": 95920 }, { "epoch": 1.6200560678217988, "grad_norm": 0.5934280753135681, "learning_rate": 1.0596488474045686e-06, "loss": 0.008, "step": 95930 }, { "epoch": 1.620224946803121, "grad_norm": 0.26310423016548157, "learning_rate": 1.0587417997497108e-06, "loss": 0.0059, "step": 95940 }, { "epoch": 1.620393825784443, "grad_norm": 0.19379626214504242, "learning_rate": 1.0578350945017079e-06, "loss": 0.0079, "step": 95950 }, { "epoch": 1.620562704765765, "grad_norm": 0.2382504940032959, "learning_rate": 1.0569287317393322e-06, "loss": 0.0101, "step": 95960 }, { "epoch": 1.6207315837470868, "grad_norm": 0.30267128348350525, "learning_rate": 1.0560227115413263e-06, "loss": 0.0064, "step": 95970 }, { "epoch": 1.6209004627284087, "grad_norm": 0.33940228819847107, "learning_rate": 1.055117033986403e-06, "loss": 0.0062, "step": 95980 }, { "epoch": 1.6210693417097308, "grad_norm": 0.12618854641914368, "learning_rate": 1.0542116991532458e-06, "loss": 0.0062, "step": 95990 }, { "epoch": 1.621238220691053, "grad_norm": 0.5227153897285461, "learning_rate": 1.0533067071205082e-06, "loss": 0.0078, "step": 96000 }, { "epoch": 1.6214070996723748, "grad_norm": 0.18774208426475525, "learning_rate": 1.0524020579668121e-06, "loss": 0.0061, "step": 96010 }, { "epoch": 1.6215759786536967, "grad_norm": 0.10928221791982651, "learning_rate": 1.051497751770752e-06, "loss": 0.011, "step": 96020 }, { "epoch": 1.6217448576350186, "grad_norm": 0.2069905400276184, "learning_rate": 1.0505937886108912e-06, "loss": 0.0105, "step": 96030 }, { "epoch": 1.6219137366163408, "grad_norm": 0.18459837138652802, "learning_rate": 1.0496901685657668e-06, "loss": 0.0082, "step": 96040 }, { "epoch": 1.6220826155976629, "grad_norm": 0.23832443356513977, "learning_rate": 1.0487868917138794e-06, "loss": 0.0083, "step": 96050 }, { "epoch": 1.6222514945789848, "grad_norm": 0.2575031816959381, "learning_rate": 1.0478839581337048e-06, "loss": 0.0067, "step": 96060 }, { "epoch": 1.6224203735603067, "grad_norm": 0.16864407062530518, "learning_rate": 1.0469813679036871e-06, "loss": 0.0065, "step": 96070 }, { "epoch": 1.6225892525416286, "grad_norm": 0.2034904956817627, "learning_rate": 1.0460791211022426e-06, "loss": 0.0057, "step": 96080 }, { "epoch": 1.6227581315229507, "grad_norm": 0.3154459595680237, "learning_rate": 1.0451772178077552e-06, "loss": 0.0092, "step": 96090 }, { "epoch": 1.6229270105042728, "grad_norm": 0.12819190323352814, "learning_rate": 1.0442756580985801e-06, "loss": 0.0075, "step": 96100 }, { "epoch": 1.6230958894855947, "grad_norm": 0.21486304700374603, "learning_rate": 1.0433744420530428e-06, "loss": 0.0059, "step": 96110 }, { "epoch": 1.6232647684669166, "grad_norm": 0.23490843176841736, "learning_rate": 1.0424735697494403e-06, "loss": 0.0087, "step": 96120 }, { "epoch": 1.6234336474482385, "grad_norm": 0.21416151523590088, "learning_rate": 1.041573041266035e-06, "loss": 0.007, "step": 96130 }, { "epoch": 1.6236025264295606, "grad_norm": 0.2393854856491089, "learning_rate": 1.0406728566810647e-06, "loss": 0.0074, "step": 96140 }, { "epoch": 1.6237714054108827, "grad_norm": 0.0692099779844284, "learning_rate": 1.0397730160727349e-06, "loss": 0.0089, "step": 96150 }, { "epoch": 1.6239402843922046, "grad_norm": 0.05907744914293289, "learning_rate": 1.0388735195192224e-06, "loss": 0.0053, "step": 96160 }, { "epoch": 1.6241091633735265, "grad_norm": 0.20856516063213348, "learning_rate": 1.0379743670986708e-06, "loss": 0.003, "step": 96170 }, { "epoch": 1.6242780423548484, "grad_norm": 0.38822442293167114, "learning_rate": 1.0370755588891985e-06, "loss": 0.0089, "step": 96180 }, { "epoch": 1.6244469213361705, "grad_norm": 0.21895170211791992, "learning_rate": 1.0361770949688903e-06, "loss": 0.0071, "step": 96190 }, { "epoch": 1.6246158003174926, "grad_norm": 0.16827774047851562, "learning_rate": 1.0352789754158033e-06, "loss": 0.0066, "step": 96200 }, { "epoch": 1.6247846792988145, "grad_norm": 0.1128600537776947, "learning_rate": 1.034381200307964e-06, "loss": 0.0065, "step": 96210 }, { "epoch": 1.6249535582801364, "grad_norm": 0.24981164932250977, "learning_rate": 1.0334837697233685e-06, "loss": 0.007, "step": 96220 }, { "epoch": 1.6251224372614583, "grad_norm": 0.20278775691986084, "learning_rate": 1.032586683739984e-06, "loss": 0.0087, "step": 96230 }, { "epoch": 1.6252913162427804, "grad_norm": 0.22625164687633514, "learning_rate": 1.031689942435748e-06, "loss": 0.006, "step": 96240 }, { "epoch": 1.6254601952241026, "grad_norm": 0.49440109729766846, "learning_rate": 1.0307935458885644e-06, "loss": 0.0109, "step": 96250 }, { "epoch": 1.6256290742054245, "grad_norm": 0.4370952248573303, "learning_rate": 1.0298974941763117e-06, "loss": 0.0064, "step": 96260 }, { "epoch": 1.6257979531867464, "grad_norm": 0.39724430441856384, "learning_rate": 1.0290017873768365e-06, "loss": 0.0052, "step": 96270 }, { "epoch": 1.6259668321680683, "grad_norm": 0.4436674118041992, "learning_rate": 1.0281064255679568e-06, "loss": 0.0067, "step": 96280 }, { "epoch": 1.6261357111493904, "grad_norm": 0.10995475202798843, "learning_rate": 1.0272114088274571e-06, "loss": 0.0077, "step": 96290 }, { "epoch": 1.6263045901307125, "grad_norm": 0.5143037438392639, "learning_rate": 1.026316737233095e-06, "loss": 0.0079, "step": 96300 }, { "epoch": 1.6264734691120344, "grad_norm": 0.7084434032440186, "learning_rate": 1.0254224108625982e-06, "loss": 0.0059, "step": 96310 }, { "epoch": 1.6266423480933563, "grad_norm": 0.17783372104167938, "learning_rate": 1.0245284297936625e-06, "loss": 0.0073, "step": 96320 }, { "epoch": 1.6268112270746782, "grad_norm": 0.2513267993927002, "learning_rate": 1.023634794103956e-06, "loss": 0.009, "step": 96330 }, { "epoch": 1.6269801060560003, "grad_norm": 0.2466067522764206, "learning_rate": 1.0227415038711148e-06, "loss": 0.0095, "step": 96340 }, { "epoch": 1.6271489850373224, "grad_norm": 0.28267523646354675, "learning_rate": 1.0218485591727462e-06, "loss": 0.0072, "step": 96350 }, { "epoch": 1.6273178640186443, "grad_norm": 0.2658662796020508, "learning_rate": 1.0209559600864278e-06, "loss": 0.0072, "step": 96360 }, { "epoch": 1.6274867429999662, "grad_norm": 0.148530051112175, "learning_rate": 1.020063706689704e-06, "loss": 0.0048, "step": 96370 }, { "epoch": 1.627655621981288, "grad_norm": 0.14537294209003448, "learning_rate": 1.0191717990600936e-06, "loss": 0.0061, "step": 96380 }, { "epoch": 1.6278245009626102, "grad_norm": 0.2559412121772766, "learning_rate": 1.0182802372750823e-06, "loss": 0.0116, "step": 96390 }, { "epoch": 1.6279933799439323, "grad_norm": 0.31108105182647705, "learning_rate": 1.017389021412129e-06, "loss": 0.0068, "step": 96400 }, { "epoch": 1.6281622589252542, "grad_norm": 0.05188196524977684, "learning_rate": 1.0164981515486572e-06, "loss": 0.0065, "step": 96410 }, { "epoch": 1.6283311379065761, "grad_norm": 0.3046746850013733, "learning_rate": 1.0156076277620646e-06, "loss": 0.006, "step": 96420 }, { "epoch": 1.628500016887898, "grad_norm": 0.33996060490608215, "learning_rate": 1.014717450129718e-06, "loss": 0.0106, "step": 96430 }, { "epoch": 1.6286688958692201, "grad_norm": 0.3147149384021759, "learning_rate": 1.0138276187289531e-06, "loss": 0.0076, "step": 96440 }, { "epoch": 1.6288377748505423, "grad_norm": 0.43696680665016174, "learning_rate": 1.0129381336370791e-06, "loss": 0.0051, "step": 96450 }, { "epoch": 1.6290066538318642, "grad_norm": 0.17993459105491638, "learning_rate": 1.0120489949313693e-06, "loss": 0.0084, "step": 96460 }, { "epoch": 1.629175532813186, "grad_norm": 0.2229500263929367, "learning_rate": 1.011160202689071e-06, "loss": 0.0068, "step": 96470 }, { "epoch": 1.629344411794508, "grad_norm": 0.17535261809825897, "learning_rate": 1.0102717569873998e-06, "loss": 0.0048, "step": 96480 }, { "epoch": 1.62951329077583, "grad_norm": 0.7023826241493225, "learning_rate": 1.0093836579035437e-06, "loss": 0.0086, "step": 96490 }, { "epoch": 1.6296821697571522, "grad_norm": 0.25084683299064636, "learning_rate": 1.0084959055146553e-06, "loss": 0.0052, "step": 96500 }, { "epoch": 1.629851048738474, "grad_norm": 0.20402829349040985, "learning_rate": 1.0076084998978618e-06, "loss": 0.0054, "step": 96510 }, { "epoch": 1.630019927719796, "grad_norm": 0.15130503475666046, "learning_rate": 1.0067214411302595e-06, "loss": 0.0079, "step": 96520 }, { "epoch": 1.6301888067011179, "grad_norm": 0.47299325466156006, "learning_rate": 1.0058347292889143e-06, "loss": 0.0079, "step": 96530 }, { "epoch": 1.63035768568244, "grad_norm": 0.18710747361183167, "learning_rate": 1.0049483644508595e-06, "loss": 0.0086, "step": 96540 }, { "epoch": 1.6305265646637621, "grad_norm": 0.21237142384052277, "learning_rate": 1.004062346693101e-06, "loss": 0.0049, "step": 96550 }, { "epoch": 1.630695443645084, "grad_norm": 0.20256291329860687, "learning_rate": 1.0031766760926132e-06, "loss": 0.0053, "step": 96560 }, { "epoch": 1.630864322626406, "grad_norm": 0.24582727253437042, "learning_rate": 1.002291352726345e-06, "loss": 0.0058, "step": 96570 }, { "epoch": 1.6310332016077278, "grad_norm": 0.704014241695404, "learning_rate": 1.0014063766712062e-06, "loss": 0.0117, "step": 96580 }, { "epoch": 1.63120208058905, "grad_norm": 0.09118902683258057, "learning_rate": 1.000521748004084e-06, "loss": 0.0063, "step": 96590 }, { "epoch": 1.631370959570372, "grad_norm": 0.4723844528198242, "learning_rate": 9.996374668018322e-07, "loss": 0.006, "step": 96600 }, { "epoch": 1.631539838551694, "grad_norm": 0.4614121615886688, "learning_rate": 9.987535331412757e-07, "loss": 0.0068, "step": 96610 }, { "epoch": 1.6317087175330158, "grad_norm": 0.26724866032600403, "learning_rate": 9.978699470992066e-07, "loss": 0.0072, "step": 96620 }, { "epoch": 1.6318775965143377, "grad_norm": 0.17867286503314972, "learning_rate": 9.969867087523893e-07, "loss": 0.0058, "step": 96630 }, { "epoch": 1.6320464754956598, "grad_norm": 0.21592429280281067, "learning_rate": 9.961038181775584e-07, "loss": 0.0048, "step": 96640 }, { "epoch": 1.632215354476982, "grad_norm": 0.1307811141014099, "learning_rate": 9.952212754514184e-07, "loss": 0.0061, "step": 96650 }, { "epoch": 1.6323842334583039, "grad_norm": 0.19031262397766113, "learning_rate": 9.943390806506387e-07, "loss": 0.0057, "step": 96660 }, { "epoch": 1.6325531124396258, "grad_norm": 0.21200773119926453, "learning_rate": 9.934572338518632e-07, "loss": 0.0068, "step": 96670 }, { "epoch": 1.6327219914209476, "grad_norm": 0.20718026161193848, "learning_rate": 9.925757351317073e-07, "loss": 0.005, "step": 96680 }, { "epoch": 1.6328908704022698, "grad_norm": 0.262368381023407, "learning_rate": 9.916945845667531e-07, "loss": 0.0066, "step": 96690 }, { "epoch": 1.6330597493835917, "grad_norm": 0.46791020035743713, "learning_rate": 9.9081378223355e-07, "loss": 0.0094, "step": 96700 }, { "epoch": 1.6332286283649138, "grad_norm": 0.16467061638832092, "learning_rate": 9.899333282086215e-07, "loss": 0.0082, "step": 96710 }, { "epoch": 1.6333975073462357, "grad_norm": 0.08326137810945511, "learning_rate": 9.890532225684596e-07, "loss": 0.0064, "step": 96720 }, { "epoch": 1.6335663863275576, "grad_norm": 0.2377702295780182, "learning_rate": 9.881734653895264e-07, "loss": 0.007, "step": 96730 }, { "epoch": 1.6337352653088797, "grad_norm": 0.25926047563552856, "learning_rate": 9.872940567482508e-07, "loss": 0.0053, "step": 96740 }, { "epoch": 1.6339041442902016, "grad_norm": 0.18148685991764069, "learning_rate": 9.864149967210346e-07, "loss": 0.004, "step": 96750 }, { "epoch": 1.6340730232715237, "grad_norm": 0.20057560503482819, "learning_rate": 9.855362853842492e-07, "loss": 0.0077, "step": 96760 }, { "epoch": 1.6342419022528456, "grad_norm": 0.3277484178543091, "learning_rate": 9.846579228142355e-07, "loss": 0.0058, "step": 96770 }, { "epoch": 1.6344107812341675, "grad_norm": 0.33020034432411194, "learning_rate": 9.837799090873012e-07, "loss": 0.0059, "step": 96780 }, { "epoch": 1.6345796602154896, "grad_norm": 0.3081018924713135, "learning_rate": 9.829022442797255e-07, "loss": 0.0067, "step": 96790 }, { "epoch": 1.6347485391968115, "grad_norm": 0.11921725422143936, "learning_rate": 9.820249284677613e-07, "loss": 0.0044, "step": 96800 }, { "epoch": 1.6349174181781336, "grad_norm": 0.21805745363235474, "learning_rate": 9.811479617276265e-07, "loss": 0.0062, "step": 96810 }, { "epoch": 1.6350862971594555, "grad_norm": 0.1938524842262268, "learning_rate": 9.802713441355083e-07, "loss": 0.0061, "step": 96820 }, { "epoch": 1.6352551761407774, "grad_norm": 0.6462770700454712, "learning_rate": 9.793950757675653e-07, "loss": 0.0193, "step": 96830 }, { "epoch": 1.6354240551220995, "grad_norm": 0.12311344593763351, "learning_rate": 9.785191566999263e-07, "loss": 0.0068, "step": 96840 }, { "epoch": 1.6355929341034214, "grad_norm": 0.13041715323925018, "learning_rate": 9.776435870086903e-07, "loss": 0.0091, "step": 96850 }, { "epoch": 1.6357618130847436, "grad_norm": 0.15062429010868073, "learning_rate": 9.767683667699217e-07, "loss": 0.008, "step": 96860 }, { "epoch": 1.6359306920660655, "grad_norm": 0.41089537739753723, "learning_rate": 9.758934960596595e-07, "loss": 0.0077, "step": 96870 }, { "epoch": 1.6360995710473873, "grad_norm": 0.3166494369506836, "learning_rate": 9.75018974953909e-07, "loss": 0.0053, "step": 96880 }, { "epoch": 1.6362684500287095, "grad_norm": 0.2923777401447296, "learning_rate": 9.741448035286478e-07, "loss": 0.005, "step": 96890 }, { "epoch": 1.6364373290100314, "grad_norm": 0.29312384128570557, "learning_rate": 9.732709818598212e-07, "loss": 0.0098, "step": 96900 }, { "epoch": 1.6366062079913535, "grad_norm": 0.23535634577274323, "learning_rate": 9.723975100233445e-07, "loss": 0.0081, "step": 96910 }, { "epoch": 1.6367750869726754, "grad_norm": 0.3174360394477844, "learning_rate": 9.71524388095103e-07, "loss": 0.0072, "step": 96920 }, { "epoch": 1.6369439659539973, "grad_norm": 0.2300049364566803, "learning_rate": 9.706516161509516e-07, "loss": 0.0079, "step": 96930 }, { "epoch": 1.6371128449353194, "grad_norm": 0.30377838015556335, "learning_rate": 9.697791942667156e-07, "loss": 0.0107, "step": 96940 }, { "epoch": 1.6372817239166413, "grad_norm": 0.15749457478523254, "learning_rate": 9.689071225181861e-07, "loss": 0.0067, "step": 96950 }, { "epoch": 1.6374506028979634, "grad_norm": 0.15095487236976624, "learning_rate": 9.680354009811282e-07, "loss": 0.004, "step": 96960 }, { "epoch": 1.6376194818792853, "grad_norm": 0.08477216958999634, "learning_rate": 9.671640297312752e-07, "loss": 0.0059, "step": 96970 }, { "epoch": 1.6377883608606072, "grad_norm": 0.1565854251384735, "learning_rate": 9.6629300884433e-07, "loss": 0.0072, "step": 96980 }, { "epoch": 1.6379572398419293, "grad_norm": 0.2988349199295044, "learning_rate": 9.654223383959632e-07, "loss": 0.0071, "step": 96990 }, { "epoch": 1.6381261188232512, "grad_norm": 0.35215142369270325, "learning_rate": 9.645520184618172e-07, "loss": 0.0091, "step": 97000 }, { "epoch": 1.6382949978045733, "grad_norm": 0.21346184611320496, "learning_rate": 9.636820491175042e-07, "loss": 0.0077, "step": 97010 }, { "epoch": 1.6384638767858952, "grad_norm": 0.443792462348938, "learning_rate": 9.628124304386033e-07, "loss": 0.0168, "step": 97020 }, { "epoch": 1.6386327557672171, "grad_norm": 0.19931183755397797, "learning_rate": 9.619431625006665e-07, "loss": 0.0054, "step": 97030 }, { "epoch": 1.6388016347485392, "grad_norm": 0.1429000049829483, "learning_rate": 9.610742453792127e-07, "loss": 0.0059, "step": 97040 }, { "epoch": 1.6389705137298611, "grad_norm": 0.4150009751319885, "learning_rate": 9.602056791497316e-07, "loss": 0.0088, "step": 97050 }, { "epoch": 1.6391393927111833, "grad_norm": 0.1823146939277649, "learning_rate": 9.593374638876835e-07, "loss": 0.0089, "step": 97060 }, { "epoch": 1.6393082716925051, "grad_norm": 0.3266793489456177, "learning_rate": 9.58469599668494e-07, "loss": 0.0082, "step": 97070 }, { "epoch": 1.639477150673827, "grad_norm": 0.1693917214870453, "learning_rate": 9.576020865675624e-07, "loss": 0.0068, "step": 97080 }, { "epoch": 1.6396460296551492, "grad_norm": 0.04656997323036194, "learning_rate": 9.567349246602564e-07, "loss": 0.006, "step": 97090 }, { "epoch": 1.639814908636471, "grad_norm": 0.19968318939208984, "learning_rate": 9.558681140219134e-07, "loss": 0.0089, "step": 97100 }, { "epoch": 1.6399837876177932, "grad_norm": 0.3855486214160919, "learning_rate": 9.550016547278379e-07, "loss": 0.006, "step": 97110 }, { "epoch": 1.640152666599115, "grad_norm": 0.18902449309825897, "learning_rate": 9.54135546853307e-07, "loss": 0.0047, "step": 97120 }, { "epoch": 1.640321545580437, "grad_norm": 0.15921850502490997, "learning_rate": 9.53269790473566e-07, "loss": 0.0064, "step": 97130 }, { "epoch": 1.640490424561759, "grad_norm": 0.26923033595085144, "learning_rate": 9.524043856638294e-07, "loss": 0.0093, "step": 97140 }, { "epoch": 1.640659303543081, "grad_norm": 0.1518290638923645, "learning_rate": 9.515393324992816e-07, "loss": 0.0069, "step": 97150 }, { "epoch": 1.640828182524403, "grad_norm": 0.23968274891376495, "learning_rate": 9.506746310550769e-07, "loss": 0.0074, "step": 97160 }, { "epoch": 1.640997061505725, "grad_norm": 0.2045508325099945, "learning_rate": 9.49810281406337e-07, "loss": 0.0065, "step": 97170 }, { "epoch": 1.641165940487047, "grad_norm": 0.18316741287708282, "learning_rate": 9.489462836281571e-07, "loss": 0.0054, "step": 97180 }, { "epoch": 1.641334819468369, "grad_norm": 0.11202168464660645, "learning_rate": 9.480826377955965e-07, "loss": 0.0041, "step": 97190 }, { "epoch": 1.641503698449691, "grad_norm": 0.40260761976242065, "learning_rate": 9.472193439836874e-07, "loss": 0.0135, "step": 97200 }, { "epoch": 1.641672577431013, "grad_norm": 0.31846192479133606, "learning_rate": 9.463564022674304e-07, "loss": 0.0067, "step": 97210 }, { "epoch": 1.641841456412335, "grad_norm": 0.5215148329734802, "learning_rate": 9.454938127217983e-07, "loss": 0.0096, "step": 97220 }, { "epoch": 1.6420103353936568, "grad_norm": 0.2284245789051056, "learning_rate": 9.446315754217267e-07, "loss": 0.0062, "step": 97230 }, { "epoch": 1.642179214374979, "grad_norm": 0.29736706614494324, "learning_rate": 9.437696904421273e-07, "loss": 0.0052, "step": 97240 }, { "epoch": 1.6423480933563008, "grad_norm": 0.21766920387744904, "learning_rate": 9.429081578578775e-07, "loss": 0.0077, "step": 97250 }, { "epoch": 1.642516972337623, "grad_norm": 0.1629682332277298, "learning_rate": 9.420469777438257e-07, "loss": 0.0063, "step": 97260 }, { "epoch": 1.6426858513189448, "grad_norm": 0.1775190681219101, "learning_rate": 9.411861501747888e-07, "loss": 0.0052, "step": 97270 }, { "epoch": 1.6428547303002667, "grad_norm": 0.4536900818347931, "learning_rate": 9.403256752255535e-07, "loss": 0.0057, "step": 97280 }, { "epoch": 1.6430236092815889, "grad_norm": 0.22122754156589508, "learning_rate": 9.394655529708757e-07, "loss": 0.0069, "step": 97290 }, { "epoch": 1.6431924882629108, "grad_norm": 0.323932409286499, "learning_rate": 9.386057834854828e-07, "loss": 0.0068, "step": 97300 }, { "epoch": 1.6433613672442329, "grad_norm": 0.25944504141807556, "learning_rate": 9.377463668440656e-07, "loss": 0.0099, "step": 97310 }, { "epoch": 1.6435302462255548, "grad_norm": 0.47188451886177063, "learning_rate": 9.368873031212899e-07, "loss": 0.0071, "step": 97320 }, { "epoch": 1.6436991252068767, "grad_norm": 0.18662452697753906, "learning_rate": 9.360285923917895e-07, "loss": 0.0058, "step": 97330 }, { "epoch": 1.6438680041881988, "grad_norm": 0.48346981406211853, "learning_rate": 9.351702347301666e-07, "loss": 0.0069, "step": 97340 }, { "epoch": 1.6440368831695207, "grad_norm": 0.28571441769599915, "learning_rate": 9.343122302109947e-07, "loss": 0.0075, "step": 97350 }, { "epoch": 1.6442057621508428, "grad_norm": 0.2470397800207138, "learning_rate": 9.334545789088129e-07, "loss": 0.009, "step": 97360 }, { "epoch": 1.6443746411321647, "grad_norm": 0.22825247049331665, "learning_rate": 9.325972808981321e-07, "loss": 0.0046, "step": 97370 }, { "epoch": 1.6445435201134866, "grad_norm": 0.252840518951416, "learning_rate": 9.317403362534333e-07, "loss": 0.0109, "step": 97380 }, { "epoch": 1.6447123990948087, "grad_norm": 0.21235023438930511, "learning_rate": 9.308837450491648e-07, "loss": 0.0072, "step": 97390 }, { "epoch": 1.6448812780761306, "grad_norm": 0.2690380811691284, "learning_rate": 9.300275073597465e-07, "loss": 0.0031, "step": 97400 }, { "epoch": 1.6450501570574527, "grad_norm": 0.059178441762924194, "learning_rate": 9.291716232595649e-07, "loss": 0.0116, "step": 97410 }, { "epoch": 1.6452190360387746, "grad_norm": 0.25452888011932373, "learning_rate": 9.283160928229773e-07, "loss": 0.0092, "step": 97420 }, { "epoch": 1.6453879150200965, "grad_norm": 0.32501107454299927, "learning_rate": 9.27460916124312e-07, "loss": 0.0083, "step": 97430 }, { "epoch": 1.6455567940014186, "grad_norm": 0.2172098457813263, "learning_rate": 9.266060932378618e-07, "loss": 0.0093, "step": 97440 }, { "epoch": 1.6457256729827405, "grad_norm": 0.23663131892681122, "learning_rate": 9.257516242378928e-07, "loss": 0.0051, "step": 97450 }, { "epoch": 1.6458945519640626, "grad_norm": 0.30458950996398926, "learning_rate": 9.248975091986395e-07, "loss": 0.0077, "step": 97460 }, { "epoch": 1.6460634309453845, "grad_norm": 0.20861642062664032, "learning_rate": 9.24043748194306e-07, "loss": 0.0049, "step": 97470 }, { "epoch": 1.6462323099267064, "grad_norm": 0.17468546330928802, "learning_rate": 9.231903412990628e-07, "loss": 0.0072, "step": 97480 }, { "epoch": 1.6464011889080286, "grad_norm": 0.17763331532478333, "learning_rate": 9.223372885870535e-07, "loss": 0.0075, "step": 97490 }, { "epoch": 1.6465700678893505, "grad_norm": 0.14959828555583954, "learning_rate": 9.214845901323883e-07, "loss": 0.008, "step": 97500 }, { "epoch": 1.6467389468706726, "grad_norm": 0.18037350475788116, "learning_rate": 9.206322460091483e-07, "loss": 0.0068, "step": 97510 }, { "epoch": 1.6469078258519945, "grad_norm": 0.4506903290748596, "learning_rate": 9.197802562913826e-07, "loss": 0.0062, "step": 97520 }, { "epoch": 1.6470767048333164, "grad_norm": 0.26974162459373474, "learning_rate": 9.189286210531101e-07, "loss": 0.0064, "step": 97530 }, { "epoch": 1.6472455838146385, "grad_norm": 0.3444531261920929, "learning_rate": 9.180773403683191e-07, "loss": 0.0111, "step": 97540 }, { "epoch": 1.6474144627959604, "grad_norm": 0.2111002802848816, "learning_rate": 9.172264143109677e-07, "loss": 0.0051, "step": 97550 }, { "epoch": 1.6475833417772825, "grad_norm": 0.5646640658378601, "learning_rate": 9.163758429549796e-07, "loss": 0.009, "step": 97560 }, { "epoch": 1.6477522207586044, "grad_norm": 0.22730211913585663, "learning_rate": 9.155256263742518e-07, "loss": 0.0067, "step": 97570 }, { "epoch": 1.6479210997399263, "grad_norm": 0.442869633436203, "learning_rate": 9.146757646426496e-07, "loss": 0.0073, "step": 97580 }, { "epoch": 1.6480899787212484, "grad_norm": 0.25004804134368896, "learning_rate": 9.138262578340074e-07, "loss": 0.0056, "step": 97590 }, { "epoch": 1.6482588577025703, "grad_norm": 0.13262927532196045, "learning_rate": 9.129771060221259e-07, "loss": 0.0068, "step": 97600 }, { "epoch": 1.6484277366838924, "grad_norm": 0.13225498795509338, "learning_rate": 9.121283092807786e-07, "loss": 0.0051, "step": 97610 }, { "epoch": 1.6485966156652143, "grad_norm": 0.18320585787296295, "learning_rate": 9.112798676837076e-07, "loss": 0.0056, "step": 97620 }, { "epoch": 1.6487654946465362, "grad_norm": 0.22835871577262878, "learning_rate": 9.10431781304622e-07, "loss": 0.0077, "step": 97630 }, { "epoch": 1.6489343736278583, "grad_norm": 0.5094330310821533, "learning_rate": 9.095840502172026e-07, "loss": 0.007, "step": 97640 }, { "epoch": 1.6491032526091802, "grad_norm": 0.40654975175857544, "learning_rate": 9.087366744950982e-07, "loss": 0.0067, "step": 97650 }, { "epoch": 1.6492721315905023, "grad_norm": 0.13769443333148956, "learning_rate": 9.078896542119259e-07, "loss": 0.0075, "step": 97660 }, { "epoch": 1.6494410105718242, "grad_norm": 1.0841474533081055, "learning_rate": 9.070429894412747e-07, "loss": 0.0111, "step": 97670 }, { "epoch": 1.6496098895531461, "grad_norm": 0.3619527220726013, "learning_rate": 9.061966802566979e-07, "loss": 0.007, "step": 97680 }, { "epoch": 1.6497787685344683, "grad_norm": 0.15878793597221375, "learning_rate": 9.053507267317225e-07, "loss": 0.01, "step": 97690 }, { "epoch": 1.6499476475157902, "grad_norm": 0.2408675253391266, "learning_rate": 9.045051289398421e-07, "loss": 0.0061, "step": 97700 }, { "epoch": 1.6501165264971123, "grad_norm": 0.27571040391921997, "learning_rate": 9.036598869545222e-07, "loss": 0.0073, "step": 97710 }, { "epoch": 1.6502854054784342, "grad_norm": 0.6083569526672363, "learning_rate": 9.028150008491921e-07, "loss": 0.0092, "step": 97720 }, { "epoch": 1.650454284459756, "grad_norm": 0.17418527603149414, "learning_rate": 9.019704706972554e-07, "loss": 0.0053, "step": 97730 }, { "epoch": 1.6506231634410782, "grad_norm": 0.1447894126176834, "learning_rate": 9.011262965720824e-07, "loss": 0.0065, "step": 97740 }, { "epoch": 1.6507920424224, "grad_norm": 0.18837015330791473, "learning_rate": 9.002824785470132e-07, "loss": 0.0071, "step": 97750 }, { "epoch": 1.6509609214037222, "grad_norm": 0.2423142045736313, "learning_rate": 8.994390166953559e-07, "loss": 0.007, "step": 97760 }, { "epoch": 1.651129800385044, "grad_norm": 1.2356377840042114, "learning_rate": 8.985959110903897e-07, "loss": 0.0063, "step": 97770 }, { "epoch": 1.651298679366366, "grad_norm": 0.21273097395896912, "learning_rate": 8.977531618053603e-07, "loss": 0.0064, "step": 97780 }, { "epoch": 1.6514675583476879, "grad_norm": 0.32731446623802185, "learning_rate": 8.969107689134837e-07, "loss": 0.0068, "step": 97790 }, { "epoch": 1.65163643732901, "grad_norm": 0.10326522588729858, "learning_rate": 8.960687324879475e-07, "loss": 0.0042, "step": 97800 }, { "epoch": 1.6518053163103321, "grad_norm": 0.2227402627468109, "learning_rate": 8.952270526019019e-07, "loss": 0.0069, "step": 97810 }, { "epoch": 1.651974195291654, "grad_norm": 0.26174962520599365, "learning_rate": 8.943857293284719e-07, "loss": 0.0084, "step": 97820 }, { "epoch": 1.652143074272976, "grad_norm": 0.2322562038898468, "learning_rate": 8.935447627407495e-07, "loss": 0.0046, "step": 97830 }, { "epoch": 1.6523119532542978, "grad_norm": 0.21471433341503143, "learning_rate": 8.92704152911797e-07, "loss": 0.0078, "step": 97840 }, { "epoch": 1.65248083223562, "grad_norm": 0.2895064949989319, "learning_rate": 8.918638999146417e-07, "loss": 0.0049, "step": 97850 }, { "epoch": 1.652649711216942, "grad_norm": 0.4181858003139496, "learning_rate": 8.910240038222845e-07, "loss": 0.006, "step": 97860 }, { "epoch": 1.652818590198264, "grad_norm": 0.09845809638500214, "learning_rate": 8.901844647076935e-07, "loss": 0.0064, "step": 97870 }, { "epoch": 1.6529874691795858, "grad_norm": 0.15003740787506104, "learning_rate": 8.893452826438053e-07, "loss": 0.0064, "step": 97880 }, { "epoch": 1.6531563481609077, "grad_norm": 0.2793683409690857, "learning_rate": 8.885064577035268e-07, "loss": 0.0107, "step": 97890 }, { "epoch": 1.6533252271422298, "grad_norm": 0.2602807879447937, "learning_rate": 8.876679899597324e-07, "loss": 0.0118, "step": 97900 }, { "epoch": 1.653494106123552, "grad_norm": 0.21997641026973724, "learning_rate": 8.868298794852659e-07, "loss": 0.0062, "step": 97910 }, { "epoch": 1.6536629851048739, "grad_norm": 0.18137545883655548, "learning_rate": 8.85992126352942e-07, "loss": 0.0068, "step": 97920 }, { "epoch": 1.6538318640861958, "grad_norm": 0.340628445148468, "learning_rate": 8.851547306355402e-07, "loss": 0.0101, "step": 97930 }, { "epoch": 1.6540007430675177, "grad_norm": 0.1271527260541916, "learning_rate": 8.843176924058128e-07, "loss": 0.0104, "step": 97940 }, { "epoch": 1.6541696220488398, "grad_norm": 0.3407340943813324, "learning_rate": 8.83481011736479e-07, "loss": 0.0093, "step": 97950 }, { "epoch": 1.654338501030162, "grad_norm": 0.32436588406562805, "learning_rate": 8.826446887002288e-07, "loss": 0.0064, "step": 97960 }, { "epoch": 1.6545073800114838, "grad_norm": 0.23129510879516602, "learning_rate": 8.818087233697181e-07, "loss": 0.0095, "step": 97970 }, { "epoch": 1.6546762589928057, "grad_norm": 0.18229418992996216, "learning_rate": 8.809731158175744e-07, "loss": 0.0065, "step": 97980 }, { "epoch": 1.6548451379741276, "grad_norm": 0.2205924391746521, "learning_rate": 8.801378661163934e-07, "loss": 0.007, "step": 97990 }, { "epoch": 1.6550140169554497, "grad_norm": 0.20348188281059265, "learning_rate": 8.793029743387388e-07, "loss": 0.0076, "step": 98000 }, { "epoch": 1.6551828959367718, "grad_norm": 0.26347845792770386, "learning_rate": 8.784684405571454e-07, "loss": 0.0056, "step": 98010 }, { "epoch": 1.6553517749180937, "grad_norm": 0.20435212552547455, "learning_rate": 8.776342648441138e-07, "loss": 0.0068, "step": 98020 }, { "epoch": 1.6555206538994156, "grad_norm": 0.2030053436756134, "learning_rate": 8.768004472721165e-07, "loss": 0.0094, "step": 98030 }, { "epoch": 1.6556895328807375, "grad_norm": 0.5910047292709351, "learning_rate": 8.759669879135935e-07, "loss": 0.0081, "step": 98040 }, { "epoch": 1.6558584118620596, "grad_norm": 0.1960606724023819, "learning_rate": 8.751338868409526e-07, "loss": 0.0051, "step": 98050 }, { "epoch": 1.6560272908433817, "grad_norm": 0.35158517956733704, "learning_rate": 8.74301144126572e-07, "loss": 0.0064, "step": 98060 }, { "epoch": 1.6561961698247036, "grad_norm": 0.08569783717393875, "learning_rate": 8.734687598427983e-07, "loss": 0.0056, "step": 98070 }, { "epoch": 1.6563650488060255, "grad_norm": 0.13778603076934814, "learning_rate": 8.726367340619485e-07, "loss": 0.0079, "step": 98080 }, { "epoch": 1.6565339277873474, "grad_norm": 0.07541143894195557, "learning_rate": 8.718050668563044e-07, "loss": 0.007, "step": 98090 }, { "epoch": 1.6567028067686695, "grad_norm": 0.2464100867509842, "learning_rate": 8.709737582981203e-07, "loss": 0.0067, "step": 98100 }, { "epoch": 1.6568716857499917, "grad_norm": 0.4179292321205139, "learning_rate": 8.701428084596191e-07, "loss": 0.0065, "step": 98110 }, { "epoch": 1.6570405647313136, "grad_norm": 0.2245015799999237, "learning_rate": 8.693122174129903e-07, "loss": 0.0056, "step": 98120 }, { "epoch": 1.6572094437126355, "grad_norm": 0.41404691338539124, "learning_rate": 8.684819852303945e-07, "loss": 0.0061, "step": 98130 }, { "epoch": 1.6573783226939574, "grad_norm": 0.06809736043214798, "learning_rate": 8.676521119839604e-07, "loss": 0.0052, "step": 98140 }, { "epoch": 1.6575472016752795, "grad_norm": 0.26317891478538513, "learning_rate": 8.668225977457845e-07, "loss": 0.0075, "step": 98150 }, { "epoch": 1.6577160806566016, "grad_norm": 0.4558241367340088, "learning_rate": 8.659934425879352e-07, "loss": 0.0061, "step": 98160 }, { "epoch": 1.6578849596379235, "grad_norm": 0.12399186939001083, "learning_rate": 8.651646465824443e-07, "loss": 0.0046, "step": 98170 }, { "epoch": 1.6580538386192454, "grad_norm": 0.18685850501060486, "learning_rate": 8.64336209801317e-07, "loss": 0.0046, "step": 98180 }, { "epoch": 1.6582227176005673, "grad_norm": 0.22777605056762695, "learning_rate": 8.635081323165257e-07, "loss": 0.0085, "step": 98190 }, { "epoch": 1.6583915965818894, "grad_norm": 0.1524573564529419, "learning_rate": 8.626804142000134e-07, "loss": 0.0051, "step": 98200 }, { "epoch": 1.6585604755632115, "grad_norm": 0.11913646012544632, "learning_rate": 8.61853055523687e-07, "loss": 0.0048, "step": 98210 }, { "epoch": 1.6587293545445334, "grad_norm": 0.30925074219703674, "learning_rate": 8.610260563594275e-07, "loss": 0.0077, "step": 98220 }, { "epoch": 1.6588982335258553, "grad_norm": 0.2518329918384552, "learning_rate": 8.601994167790822e-07, "loss": 0.0049, "step": 98230 }, { "epoch": 1.6590671125071772, "grad_norm": 0.17404812574386597, "learning_rate": 8.593731368544672e-07, "loss": 0.0065, "step": 98240 }, { "epoch": 1.6592359914884993, "grad_norm": 0.5639759302139282, "learning_rate": 8.585472166573683e-07, "loss": 0.0075, "step": 98250 }, { "epoch": 1.6594048704698214, "grad_norm": 0.2819994390010834, "learning_rate": 8.577216562595392e-07, "loss": 0.0059, "step": 98260 }, { "epoch": 1.6595737494511433, "grad_norm": 0.22255577147006989, "learning_rate": 8.568964557327025e-07, "loss": 0.0055, "step": 98270 }, { "epoch": 1.6597426284324652, "grad_norm": 0.35961249470710754, "learning_rate": 8.560716151485494e-07, "loss": 0.0045, "step": 98280 }, { "epoch": 1.6599115074137871, "grad_norm": 0.29452377557754517, "learning_rate": 8.552471345787417e-07, "loss": 0.0056, "step": 98290 }, { "epoch": 1.6600803863951092, "grad_norm": 0.22097057104110718, "learning_rate": 8.54423014094905e-07, "loss": 0.0038, "step": 98300 }, { "epoch": 1.6602492653764314, "grad_norm": 0.31754788756370544, "learning_rate": 8.535992537686388e-07, "loss": 0.0059, "step": 98310 }, { "epoch": 1.6604181443577533, "grad_norm": 0.46432173252105713, "learning_rate": 8.527758536715092e-07, "loss": 0.0083, "step": 98320 }, { "epoch": 1.6605870233390752, "grad_norm": 0.35920417308807373, "learning_rate": 8.519528138750527e-07, "loss": 0.0084, "step": 98330 }, { "epoch": 1.660755902320397, "grad_norm": 0.20040516555309296, "learning_rate": 8.5113013445077e-07, "loss": 0.0072, "step": 98340 }, { "epoch": 1.6609247813017192, "grad_norm": 0.22903771698474884, "learning_rate": 8.50307815470135e-07, "loss": 0.0079, "step": 98350 }, { "epoch": 1.6610936602830413, "grad_norm": 0.13725435733795166, "learning_rate": 8.494858570045883e-07, "loss": 0.006, "step": 98360 }, { "epoch": 1.6612625392643632, "grad_norm": 0.3617895245552063, "learning_rate": 8.486642591255406e-07, "loss": 0.0069, "step": 98370 }, { "epoch": 1.661431418245685, "grad_norm": 0.08647888898849487, "learning_rate": 8.478430219043693e-07, "loss": 0.0061, "step": 98380 }, { "epoch": 1.661600297227007, "grad_norm": 0.21567541360855103, "learning_rate": 8.470221454124222e-07, "loss": 0.0088, "step": 98390 }, { "epoch": 1.661769176208329, "grad_norm": 0.1648903340101242, "learning_rate": 8.462016297210146e-07, "loss": 0.0086, "step": 98400 }, { "epoch": 1.6619380551896512, "grad_norm": 0.5188198089599609, "learning_rate": 8.453814749014327e-07, "loss": 0.0104, "step": 98410 }, { "epoch": 1.662106934170973, "grad_norm": 0.05852222070097923, "learning_rate": 8.445616810249263e-07, "loss": 0.0045, "step": 98420 }, { "epoch": 1.662275813152295, "grad_norm": 0.3690684735774994, "learning_rate": 8.437422481627183e-07, "loss": 0.0048, "step": 98430 }, { "epoch": 1.662444692133617, "grad_norm": 0.07310667634010315, "learning_rate": 8.429231763860002e-07, "loss": 0.005, "step": 98440 }, { "epoch": 1.662613571114939, "grad_norm": 0.8632025122642517, "learning_rate": 8.421044657659305e-07, "loss": 0.0051, "step": 98450 }, { "epoch": 1.6627824500962611, "grad_norm": 0.6886683702468872, "learning_rate": 8.412861163736358e-07, "loss": 0.0098, "step": 98460 }, { "epoch": 1.662951329077583, "grad_norm": 0.548505425453186, "learning_rate": 8.404681282802129e-07, "loss": 0.0056, "step": 98470 }, { "epoch": 1.663120208058905, "grad_norm": 0.23085421323776245, "learning_rate": 8.396505015567252e-07, "loss": 0.0069, "step": 98480 }, { "epoch": 1.6632890870402268, "grad_norm": 0.2816769480705261, "learning_rate": 8.388332362742096e-07, "loss": 0.0098, "step": 98490 }, { "epoch": 1.663457966021549, "grad_norm": 0.22303539514541626, "learning_rate": 8.380163325036655e-07, "loss": 0.0083, "step": 98500 }, { "epoch": 1.663626845002871, "grad_norm": 0.24166962504386902, "learning_rate": 8.371997903160633e-07, "loss": 0.0054, "step": 98510 }, { "epoch": 1.663795723984193, "grad_norm": 0.24360346794128418, "learning_rate": 8.363836097823436e-07, "loss": 0.0072, "step": 98520 }, { "epoch": 1.6639646029655148, "grad_norm": 0.32719650864601135, "learning_rate": 8.355677909734144e-07, "loss": 0.0064, "step": 98530 }, { "epoch": 1.6641334819468367, "grad_norm": 0.07767380774021149, "learning_rate": 8.347523339601499e-07, "loss": 0.0099, "step": 98540 }, { "epoch": 1.6643023609281589, "grad_norm": 0.3233058452606201, "learning_rate": 8.339372388133959e-07, "loss": 0.0074, "step": 98550 }, { "epoch": 1.664471239909481, "grad_norm": 0.17617525160312653, "learning_rate": 8.331225056039666e-07, "loss": 0.0047, "step": 98560 }, { "epoch": 1.6646401188908029, "grad_norm": 0.1290726512670517, "learning_rate": 8.323081344026445e-07, "loss": 0.0076, "step": 98570 }, { "epoch": 1.6648089978721248, "grad_norm": 0.10632937401533127, "learning_rate": 8.314941252801784e-07, "loss": 0.0036, "step": 98580 }, { "epoch": 1.6649778768534467, "grad_norm": 0.3072943687438965, "learning_rate": 8.306804783072881e-07, "loss": 0.0116, "step": 98590 }, { "epoch": 1.6651467558347688, "grad_norm": 0.20639002323150635, "learning_rate": 8.298671935546598e-07, "loss": 0.0074, "step": 98600 }, { "epoch": 1.665315634816091, "grad_norm": 0.1115589514374733, "learning_rate": 8.290542710929544e-07, "loss": 0.0067, "step": 98610 }, { "epoch": 1.6654845137974128, "grad_norm": 0.15231028199195862, "learning_rate": 8.28241710992792e-07, "loss": 0.0057, "step": 98620 }, { "epoch": 1.6656533927787347, "grad_norm": 0.2727239727973938, "learning_rate": 8.274295133247673e-07, "loss": 0.0064, "step": 98630 }, { "epoch": 1.6658222717600566, "grad_norm": 0.41438257694244385, "learning_rate": 8.266176781594426e-07, "loss": 0.0074, "step": 98640 }, { "epoch": 1.6659911507413787, "grad_norm": 0.3526778519153595, "learning_rate": 8.258062055673488e-07, "loss": 0.0055, "step": 98650 }, { "epoch": 1.6661600297227008, "grad_norm": 0.26081711053848267, "learning_rate": 8.249950956189818e-07, "loss": 0.0059, "step": 98660 }, { "epoch": 1.6663289087040227, "grad_norm": 0.18268093466758728, "learning_rate": 8.24184348384811e-07, "loss": 0.0072, "step": 98670 }, { "epoch": 1.6664977876853446, "grad_norm": 0.1674421727657318, "learning_rate": 8.233739639352717e-07, "loss": 0.0074, "step": 98680 }, { "epoch": 1.6666666666666665, "grad_norm": 0.17758288979530334, "learning_rate": 8.225639423407677e-07, "loss": 0.0099, "step": 98690 }, { "epoch": 1.6668355456479886, "grad_norm": 0.16331273317337036, "learning_rate": 8.217542836716735e-07, "loss": 0.006, "step": 98700 }, { "epoch": 1.6670044246293108, "grad_norm": 0.26406675577163696, "learning_rate": 8.209449879983278e-07, "loss": 0.0061, "step": 98710 }, { "epoch": 1.6671733036106327, "grad_norm": 0.23951022326946259, "learning_rate": 8.201360553910404e-07, "loss": 0.0078, "step": 98720 }, { "epoch": 1.6673421825919545, "grad_norm": 0.13094088435173035, "learning_rate": 8.193274859200911e-07, "loss": 0.0074, "step": 98730 }, { "epoch": 1.6675110615732764, "grad_norm": 0.20664441585540771, "learning_rate": 8.185192796557268e-07, "loss": 0.0081, "step": 98740 }, { "epoch": 1.6676799405545986, "grad_norm": 0.3068816363811493, "learning_rate": 8.177114366681599e-07, "loss": 0.0077, "step": 98750 }, { "epoch": 1.6678488195359207, "grad_norm": 0.20600591599941254, "learning_rate": 8.169039570275755e-07, "loss": 0.0071, "step": 98760 }, { "epoch": 1.6680176985172426, "grad_norm": 0.24655123054981232, "learning_rate": 8.160968408041253e-07, "loss": 0.0051, "step": 98770 }, { "epoch": 1.6681865774985645, "grad_norm": 0.11421435326337814, "learning_rate": 8.152900880679304e-07, "loss": 0.007, "step": 98780 }, { "epoch": 1.6683554564798864, "grad_norm": 0.33267122507095337, "learning_rate": 8.144836988890775e-07, "loss": 0.0132, "step": 98790 }, { "epoch": 1.6685243354612085, "grad_norm": 0.15922872722148895, "learning_rate": 8.136776733376245e-07, "loss": 0.0056, "step": 98800 }, { "epoch": 1.6686932144425306, "grad_norm": 0.30252957344055176, "learning_rate": 8.128720114835975e-07, "loss": 0.0092, "step": 98810 }, { "epoch": 1.6688620934238525, "grad_norm": 0.2610555589199066, "learning_rate": 8.120667133969907e-07, "loss": 0.0052, "step": 98820 }, { "epoch": 1.6690309724051744, "grad_norm": 0.10531444102525711, "learning_rate": 8.11261779147764e-07, "loss": 0.0091, "step": 98830 }, { "epoch": 1.6691998513864963, "grad_norm": 0.10922461748123169, "learning_rate": 8.10457208805851e-07, "loss": 0.0063, "step": 98840 }, { "epoch": 1.6693687303678184, "grad_norm": 0.11158224195241928, "learning_rate": 8.096530024411497e-07, "loss": 0.0054, "step": 98850 }, { "epoch": 1.6695376093491405, "grad_norm": 0.15139929950237274, "learning_rate": 8.088491601235288e-07, "loss": 0.0059, "step": 98860 }, { "epoch": 1.6697064883304624, "grad_norm": 0.33801668882369995, "learning_rate": 8.080456819228222e-07, "loss": 0.0074, "step": 98870 }, { "epoch": 1.6698753673117843, "grad_norm": 0.1215205043554306, "learning_rate": 8.072425679088342e-07, "loss": 0.0067, "step": 98880 }, { "epoch": 1.6700442462931062, "grad_norm": 0.2959464490413666, "learning_rate": 8.064398181513389e-07, "loss": 0.0046, "step": 98890 }, { "epoch": 1.6702131252744283, "grad_norm": 0.2621283531188965, "learning_rate": 8.056374327200767e-07, "loss": 0.0063, "step": 98900 }, { "epoch": 1.6703820042557505, "grad_norm": 0.19322249293327332, "learning_rate": 8.048354116847562e-07, "loss": 0.0054, "step": 98910 }, { "epoch": 1.6705508832370723, "grad_norm": 0.40867850184440613, "learning_rate": 8.040337551150551e-07, "loss": 0.0067, "step": 98920 }, { "epoch": 1.6707197622183942, "grad_norm": 0.08794877678155899, "learning_rate": 8.032324630806193e-07, "loss": 0.0113, "step": 98930 }, { "epoch": 1.6708886411997161, "grad_norm": 0.18301205337047577, "learning_rate": 8.024315356510648e-07, "loss": 0.0064, "step": 98940 }, { "epoch": 1.6710575201810383, "grad_norm": 0.4014149010181427, "learning_rate": 8.016309728959709e-07, "loss": 0.0077, "step": 98950 }, { "epoch": 1.6712263991623604, "grad_norm": 0.25953245162963867, "learning_rate": 8.008307748848915e-07, "loss": 0.0069, "step": 98960 }, { "epoch": 1.6713952781436823, "grad_norm": 0.4013743996620178, "learning_rate": 8.000309416873448e-07, "loss": 0.0071, "step": 98970 }, { "epoch": 1.6715641571250042, "grad_norm": 0.4370080232620239, "learning_rate": 7.992314733728196e-07, "loss": 0.007, "step": 98980 }, { "epoch": 1.671733036106326, "grad_norm": 0.3914807140827179, "learning_rate": 7.984323700107688e-07, "loss": 0.0059, "step": 98990 }, { "epoch": 1.6719019150876482, "grad_norm": 0.26308387517929077, "learning_rate": 7.97633631670619e-07, "loss": 0.0059, "step": 99000 }, { "epoch": 1.6720707940689703, "grad_norm": 0.29304373264312744, "learning_rate": 7.968352584217615e-07, "loss": 0.0056, "step": 99010 }, { "epoch": 1.6722396730502922, "grad_norm": 0.1682315468788147, "learning_rate": 7.960372503335589e-07, "loss": 0.0063, "step": 99020 }, { "epoch": 1.672408552031614, "grad_norm": 0.19240495562553406, "learning_rate": 7.952396074753377e-07, "loss": 0.0055, "step": 99030 }, { "epoch": 1.672577431012936, "grad_norm": 0.13638801872730255, "learning_rate": 7.944423299163956e-07, "loss": 0.0035, "step": 99040 }, { "epoch": 1.672746309994258, "grad_norm": 0.15206240117549896, "learning_rate": 7.936454177259989e-07, "loss": 0.008, "step": 99050 }, { "epoch": 1.6729151889755802, "grad_norm": 0.22359926998615265, "learning_rate": 7.928488709733823e-07, "loss": 0.0094, "step": 99060 }, { "epoch": 1.6730840679569021, "grad_norm": 0.20469431579113007, "learning_rate": 7.920526897277448e-07, "loss": 0.0047, "step": 99070 }, { "epoch": 1.673252946938224, "grad_norm": 0.08688589185476303, "learning_rate": 7.912568740582593e-07, "loss": 0.0093, "step": 99080 }, { "epoch": 1.673421825919546, "grad_norm": 0.16685454547405243, "learning_rate": 7.904614240340636e-07, "loss": 0.0046, "step": 99090 }, { "epoch": 1.673590704900868, "grad_norm": 0.24969875812530518, "learning_rate": 7.896663397242654e-07, "loss": 0.0058, "step": 99100 }, { "epoch": 1.6737595838821901, "grad_norm": 0.5379891395568848, "learning_rate": 7.88871621197938e-07, "loss": 0.0109, "step": 99110 }, { "epoch": 1.673928462863512, "grad_norm": 0.2085590958595276, "learning_rate": 7.880772685241251e-07, "loss": 0.007, "step": 99120 }, { "epoch": 1.674097341844834, "grad_norm": 0.21839368343353271, "learning_rate": 7.872832817718384e-07, "loss": 0.0053, "step": 99130 }, { "epoch": 1.6742662208261558, "grad_norm": 0.2199089676141739, "learning_rate": 7.864896610100575e-07, "loss": 0.006, "step": 99140 }, { "epoch": 1.674435099807478, "grad_norm": 0.2662990689277649, "learning_rate": 7.85696406307731e-07, "loss": 0.0049, "step": 99150 }, { "epoch": 1.6746039787888, "grad_norm": 0.4455795884132385, "learning_rate": 7.849035177337733e-07, "loss": 0.0075, "step": 99160 }, { "epoch": 1.674772857770122, "grad_norm": 0.34463319182395935, "learning_rate": 7.841109953570692e-07, "loss": 0.0105, "step": 99170 }, { "epoch": 1.6749417367514439, "grad_norm": 0.4741804301738739, "learning_rate": 7.833188392464714e-07, "loss": 0.0054, "step": 99180 }, { "epoch": 1.6751106157327658, "grad_norm": 0.33766821026802063, "learning_rate": 7.825270494708004e-07, "loss": 0.0067, "step": 99190 }, { "epoch": 1.6752794947140879, "grad_norm": 0.15653783082962036, "learning_rate": 7.817356260988445e-07, "loss": 0.0062, "step": 99200 }, { "epoch": 1.67544837369541, "grad_norm": 0.20312537252902985, "learning_rate": 7.809445691993617e-07, "loss": 0.0076, "step": 99210 }, { "epoch": 1.675617252676732, "grad_norm": 0.3069104850292206, "learning_rate": 7.801538788410762e-07, "loss": 0.0104, "step": 99220 }, { "epoch": 1.6757861316580538, "grad_norm": 0.24154219031333923, "learning_rate": 7.793635550926831e-07, "loss": 0.0063, "step": 99230 }, { "epoch": 1.6759550106393757, "grad_norm": 0.42730554938316345, "learning_rate": 7.785735980228403e-07, "loss": 0.0097, "step": 99240 }, { "epoch": 1.6761238896206978, "grad_norm": 0.32460886240005493, "learning_rate": 7.777840077001797e-07, "loss": 0.0064, "step": 99250 }, { "epoch": 1.67629276860202, "grad_norm": 0.0881303995847702, "learning_rate": 7.769947841932984e-07, "loss": 0.0079, "step": 99260 }, { "epoch": 1.6764616475833418, "grad_norm": 0.2043897956609726, "learning_rate": 7.762059275707634e-07, "loss": 0.0091, "step": 99270 }, { "epoch": 1.6766305265646637, "grad_norm": 0.3082512319087982, "learning_rate": 7.754174379011064e-07, "loss": 0.0077, "step": 99280 }, { "epoch": 1.6767994055459856, "grad_norm": 0.32387638092041016, "learning_rate": 7.746293152528305e-07, "loss": 0.0047, "step": 99290 }, { "epoch": 1.6769682845273077, "grad_norm": 0.20587113499641418, "learning_rate": 7.73841559694406e-07, "loss": 0.005, "step": 99300 }, { "epoch": 1.6771371635086298, "grad_norm": 0.16846974194049835, "learning_rate": 7.730541712942713e-07, "loss": 0.0049, "step": 99310 }, { "epoch": 1.6773060424899517, "grad_norm": 0.37731480598449707, "learning_rate": 7.722671501208323e-07, "loss": 0.0068, "step": 99320 }, { "epoch": 1.6774749214712736, "grad_norm": 0.2550012469291687, "learning_rate": 7.714804962424644e-07, "loss": 0.0089, "step": 99330 }, { "epoch": 1.6776438004525955, "grad_norm": 0.5445082187652588, "learning_rate": 7.706942097275088e-07, "loss": 0.0056, "step": 99340 }, { "epoch": 1.6778126794339177, "grad_norm": 0.1770206242799759, "learning_rate": 7.699082906442784e-07, "loss": 0.0062, "step": 99350 }, { "epoch": 1.6779815584152398, "grad_norm": 0.23575328290462494, "learning_rate": 7.691227390610489e-07, "loss": 0.0056, "step": 99360 }, { "epoch": 1.6781504373965617, "grad_norm": 0.08440467715263367, "learning_rate": 7.683375550460693e-07, "loss": 0.008, "step": 99370 }, { "epoch": 1.6783193163778836, "grad_norm": 0.40695133805274963, "learning_rate": 7.67552738667553e-07, "loss": 0.0106, "step": 99380 }, { "epoch": 1.6784881953592055, "grad_norm": 0.2391102910041809, "learning_rate": 7.667682899936851e-07, "loss": 0.0081, "step": 99390 }, { "epoch": 1.6786570743405276, "grad_norm": 0.20262949168682098, "learning_rate": 7.65984209092614e-07, "loss": 0.0051, "step": 99400 }, { "epoch": 1.6788259533218497, "grad_norm": 0.07832688093185425, "learning_rate": 7.652004960324599e-07, "loss": 0.0046, "step": 99410 }, { "epoch": 1.6789948323031716, "grad_norm": 0.2639196515083313, "learning_rate": 7.644171508813102e-07, "loss": 0.0073, "step": 99420 }, { "epoch": 1.6791637112844935, "grad_norm": 0.14090441167354584, "learning_rate": 7.636341737072195e-07, "loss": 0.0045, "step": 99430 }, { "epoch": 1.6793325902658154, "grad_norm": 0.25528863072395325, "learning_rate": 7.628515645782108e-07, "loss": 0.0073, "step": 99440 }, { "epoch": 1.6795014692471375, "grad_norm": 0.23141279816627502, "learning_rate": 7.620693235622756e-07, "loss": 0.0038, "step": 99450 }, { "epoch": 1.6796703482284596, "grad_norm": 0.2530237138271332, "learning_rate": 7.612874507273732e-07, "loss": 0.0053, "step": 99460 }, { "epoch": 1.6798392272097815, "grad_norm": 0.14872433245182037, "learning_rate": 7.605059461414316e-07, "loss": 0.0053, "step": 99470 }, { "epoch": 1.6800081061911034, "grad_norm": 0.29777657985687256, "learning_rate": 7.597248098723442e-07, "loss": 0.0062, "step": 99480 }, { "epoch": 1.6801769851724253, "grad_norm": 0.3262838125228882, "learning_rate": 7.589440419879751e-07, "loss": 0.0082, "step": 99490 }, { "epoch": 1.6803458641537474, "grad_norm": 0.2663889229297638, "learning_rate": 7.581636425561551e-07, "loss": 0.0084, "step": 99500 }, { "epoch": 1.6805147431350695, "grad_norm": 0.23628416657447815, "learning_rate": 7.573836116446853e-07, "loss": 0.0077, "step": 99510 }, { "epoch": 1.6806836221163914, "grad_norm": 0.3343411087989807, "learning_rate": 7.566039493213301e-07, "loss": 0.0075, "step": 99520 }, { "epoch": 1.6808525010977133, "grad_norm": 0.2369805872440338, "learning_rate": 7.558246556538257e-07, "loss": 0.0065, "step": 99530 }, { "epoch": 1.6810213800790352, "grad_norm": 0.17805388569831848, "learning_rate": 7.550457307098752e-07, "loss": 0.0072, "step": 99540 }, { "epoch": 1.6811902590603574, "grad_norm": 0.16641899943351746, "learning_rate": 7.542671745571501e-07, "loss": 0.0068, "step": 99550 }, { "epoch": 1.6813591380416795, "grad_norm": 0.3558274507522583, "learning_rate": 7.534889872632894e-07, "loss": 0.0093, "step": 99560 }, { "epoch": 1.6815280170230014, "grad_norm": 0.19263486564159393, "learning_rate": 7.527111688958999e-07, "loss": 0.0077, "step": 99570 }, { "epoch": 1.6816968960043233, "grad_norm": 0.07161208987236023, "learning_rate": 7.519337195225562e-07, "loss": 0.0047, "step": 99580 }, { "epoch": 1.6818657749856452, "grad_norm": 0.22707439959049225, "learning_rate": 7.511566392108022e-07, "loss": 0.0059, "step": 99590 }, { "epoch": 1.6820346539669673, "grad_norm": 0.2696225047111511, "learning_rate": 7.503799280281488e-07, "loss": 0.0076, "step": 99600 }, { "epoch": 1.6822035329482894, "grad_norm": 1.0382771492004395, "learning_rate": 7.496035860420731e-07, "loss": 0.007, "step": 99610 }, { "epoch": 1.6823724119296113, "grad_norm": 0.09015626460313797, "learning_rate": 7.488276133200229e-07, "loss": 0.0036, "step": 99620 }, { "epoch": 1.6825412909109332, "grad_norm": 0.2881481945514679, "learning_rate": 7.480520099294125e-07, "loss": 0.0049, "step": 99630 }, { "epoch": 1.682710169892255, "grad_norm": 0.2070060819387436, "learning_rate": 7.472767759376259e-07, "loss": 0.0093, "step": 99640 }, { "epoch": 1.6828790488735772, "grad_norm": 0.4438017010688782, "learning_rate": 7.465019114120109e-07, "loss": 0.0078, "step": 99650 }, { "epoch": 1.6830479278548993, "grad_norm": 0.2097405642271042, "learning_rate": 7.457274164198874e-07, "loss": 0.0049, "step": 99660 }, { "epoch": 1.6832168068362212, "grad_norm": 0.4366208612918854, "learning_rate": 7.449532910285412e-07, "loss": 0.0077, "step": 99670 }, { "epoch": 1.683385685817543, "grad_norm": 0.21220043301582336, "learning_rate": 7.441795353052267e-07, "loss": 0.01, "step": 99680 }, { "epoch": 1.683554564798865, "grad_norm": 0.2622162997722626, "learning_rate": 7.434061493171657e-07, "loss": 0.0075, "step": 99690 }, { "epoch": 1.6837234437801871, "grad_norm": 0.20931914448738098, "learning_rate": 7.426331331315484e-07, "loss": 0.0054, "step": 99700 }, { "epoch": 1.6838923227615092, "grad_norm": 0.3223349452018738, "learning_rate": 7.418604868155322e-07, "loss": 0.0051, "step": 99710 }, { "epoch": 1.6840612017428311, "grad_norm": 0.13352984189987183, "learning_rate": 7.410882104362443e-07, "loss": 0.0073, "step": 99720 }, { "epoch": 1.684230080724153, "grad_norm": 0.23356781899929047, "learning_rate": 7.403163040607752e-07, "loss": 0.0088, "step": 99730 }, { "epoch": 1.684398959705475, "grad_norm": 0.20705637335777283, "learning_rate": 7.395447677561879e-07, "loss": 0.0073, "step": 99740 }, { "epoch": 1.684567838686797, "grad_norm": 0.35450267791748047, "learning_rate": 7.387736015895119e-07, "loss": 0.0044, "step": 99750 }, { "epoch": 1.6847367176681192, "grad_norm": 0.14420001208782196, "learning_rate": 7.380028056277444e-07, "loss": 0.0056, "step": 99760 }, { "epoch": 1.684905596649441, "grad_norm": 0.20281857252120972, "learning_rate": 7.372323799378489e-07, "loss": 0.0062, "step": 99770 }, { "epoch": 1.685074475630763, "grad_norm": 0.3904884159564972, "learning_rate": 7.364623245867585e-07, "loss": 0.0072, "step": 99780 }, { "epoch": 1.6852433546120849, "grad_norm": 0.15573608875274658, "learning_rate": 7.356926396413744e-07, "loss": 0.0071, "step": 99790 }, { "epoch": 1.685412233593407, "grad_norm": 0.17755058407783508, "learning_rate": 7.349233251685645e-07, "loss": 0.0066, "step": 99800 }, { "epoch": 1.685581112574729, "grad_norm": 0.1886858493089676, "learning_rate": 7.341543812351648e-07, "loss": 0.0065, "step": 99810 }, { "epoch": 1.685749991556051, "grad_norm": 0.2574860155582428, "learning_rate": 7.333858079079797e-07, "loss": 0.0081, "step": 99820 }, { "epoch": 1.6859188705373729, "grad_norm": 0.4261011779308319, "learning_rate": 7.326176052537809e-07, "loss": 0.0087, "step": 99830 }, { "epoch": 1.6860877495186948, "grad_norm": 0.2839013338088989, "learning_rate": 7.318497733393087e-07, "loss": 0.0093, "step": 99840 }, { "epoch": 1.686256628500017, "grad_norm": 0.21713875234127045, "learning_rate": 7.310823122312688e-07, "loss": 0.0078, "step": 99850 }, { "epoch": 1.686425507481339, "grad_norm": 0.2655833065509796, "learning_rate": 7.303152219963372e-07, "loss": 0.0053, "step": 99860 }, { "epoch": 1.686594386462661, "grad_norm": 0.14479242265224457, "learning_rate": 7.295485027011568e-07, "loss": 0.0064, "step": 99870 }, { "epoch": 1.6867632654439828, "grad_norm": 0.2713658809661865, "learning_rate": 7.287821544123391e-07, "loss": 0.0051, "step": 99880 }, { "epoch": 1.6869321444253047, "grad_norm": 0.23894813656806946, "learning_rate": 7.280161771964611e-07, "loss": 0.0085, "step": 99890 }, { "epoch": 1.6871010234066268, "grad_norm": 0.12747366726398468, "learning_rate": 7.272505711200694e-07, "loss": 0.007, "step": 99900 }, { "epoch": 1.687269902387949, "grad_norm": 0.11410053074359894, "learning_rate": 7.264853362496782e-07, "loss": 0.0056, "step": 99910 }, { "epoch": 1.6874387813692708, "grad_norm": 0.28591957688331604, "learning_rate": 7.257204726517691e-07, "loss": 0.0079, "step": 99920 }, { "epoch": 1.6876076603505927, "grad_norm": 0.19742192327976227, "learning_rate": 7.249559803927919e-07, "loss": 0.0104, "step": 99930 }, { "epoch": 1.6877765393319146, "grad_norm": 0.23251590132713318, "learning_rate": 7.241918595391639e-07, "loss": 0.0046, "step": 99940 }, { "epoch": 1.6879454183132367, "grad_norm": 0.4698283076286316, "learning_rate": 7.234281101572699e-07, "loss": 0.0069, "step": 99950 }, { "epoch": 1.6881142972945589, "grad_norm": 0.37367936968803406, "learning_rate": 7.226647323134633e-07, "loss": 0.0062, "step": 99960 }, { "epoch": 1.6882831762758808, "grad_norm": 0.3067644238471985, "learning_rate": 7.219017260740624e-07, "loss": 0.0105, "step": 99970 }, { "epoch": 1.6884520552572027, "grad_norm": 0.20691262185573578, "learning_rate": 7.211390915053568e-07, "loss": 0.0047, "step": 99980 }, { "epoch": 1.6886209342385246, "grad_norm": 0.16777700185775757, "learning_rate": 7.203768286736018e-07, "loss": 0.0071, "step": 99990 }, { "epoch": 1.6887898132198467, "grad_norm": 0.18492646515369415, "learning_rate": 7.196149376450229e-07, "loss": 0.007, "step": 100000 }, { "epoch": 1.6889586922011688, "grad_norm": 0.15265798568725586, "learning_rate": 7.188534184858081e-07, "loss": 0.0068, "step": 100010 }, { "epoch": 1.6891275711824907, "grad_norm": 0.5263768434524536, "learning_rate": 7.180922712621186e-07, "loss": 0.0099, "step": 100020 }, { "epoch": 1.6892964501638126, "grad_norm": 0.28002220392227173, "learning_rate": 7.173314960400796e-07, "loss": 0.006, "step": 100030 }, { "epoch": 1.6894653291451345, "grad_norm": 0.19583621621131897, "learning_rate": 7.165710928857861e-07, "loss": 0.0038, "step": 100040 }, { "epoch": 1.6896342081264566, "grad_norm": 0.1843734234571457, "learning_rate": 7.158110618653003e-07, "loss": 0.0067, "step": 100050 }, { "epoch": 1.6898030871077787, "grad_norm": 0.14195576310157776, "learning_rate": 7.150514030446515e-07, "loss": 0.0053, "step": 100060 }, { "epoch": 1.6899719660891006, "grad_norm": 0.2298867404460907, "learning_rate": 7.14292116489837e-07, "loss": 0.0064, "step": 100070 }, { "epoch": 1.6901408450704225, "grad_norm": 0.6216193437576294, "learning_rate": 7.135332022668223e-07, "loss": 0.0126, "step": 100080 }, { "epoch": 1.6903097240517444, "grad_norm": 0.0836702287197113, "learning_rate": 7.127746604415403e-07, "loss": 0.006, "step": 100090 }, { "epoch": 1.6904786030330665, "grad_norm": 0.24385108053684235, "learning_rate": 7.120164910798893e-07, "loss": 0.0046, "step": 100100 }, { "epoch": 1.6906474820143886, "grad_norm": 0.18316559493541718, "learning_rate": 7.112586942477385e-07, "loss": 0.0091, "step": 100110 }, { "epoch": 1.6908163609957105, "grad_norm": 0.5093297958374023, "learning_rate": 7.105012700109232e-07, "loss": 0.0087, "step": 100120 }, { "epoch": 1.6909852399770324, "grad_norm": 0.2647272050380707, "learning_rate": 7.097442184352476e-07, "loss": 0.005, "step": 100130 }, { "epoch": 1.6911541189583543, "grad_norm": 0.14383573830127716, "learning_rate": 7.08987539586481e-07, "loss": 0.0041, "step": 100140 }, { "epoch": 1.6913229979396764, "grad_norm": 0.12171764671802521, "learning_rate": 7.08231233530362e-07, "loss": 0.0059, "step": 100150 }, { "epoch": 1.6914918769209986, "grad_norm": 0.2497597485780716, "learning_rate": 7.074753003325968e-07, "loss": 0.0049, "step": 100160 }, { "epoch": 1.6916607559023205, "grad_norm": 0.36243492364883423, "learning_rate": 7.067197400588593e-07, "loss": 0.0056, "step": 100170 }, { "epoch": 1.6918296348836424, "grad_norm": 0.3890259563922882, "learning_rate": 7.05964552774791e-07, "loss": 0.0064, "step": 100180 }, { "epoch": 1.6919985138649642, "grad_norm": 0.3271164894104004, "learning_rate": 7.052097385460005e-07, "loss": 0.0057, "step": 100190 }, { "epoch": 1.6921673928462864, "grad_norm": 0.14175602793693542, "learning_rate": 7.044552974380637e-07, "loss": 0.0066, "step": 100200 }, { "epoch": 1.6923362718276085, "grad_norm": 0.10959785431623459, "learning_rate": 7.037012295165257e-07, "loss": 0.0067, "step": 100210 }, { "epoch": 1.6925051508089304, "grad_norm": 0.6093137860298157, "learning_rate": 7.029475348468967e-07, "loss": 0.0078, "step": 100220 }, { "epoch": 1.6926740297902523, "grad_norm": 0.16558727622032166, "learning_rate": 7.021942134946569e-07, "loss": 0.0045, "step": 100230 }, { "epoch": 1.6928429087715742, "grad_norm": 0.2792398929595947, "learning_rate": 7.014412655252517e-07, "loss": 0.0059, "step": 100240 }, { "epoch": 1.6930117877528963, "grad_norm": 0.12567263841629028, "learning_rate": 7.006886910040977e-07, "loss": 0.0073, "step": 100250 }, { "epoch": 1.6931806667342184, "grad_norm": 0.18136584758758545, "learning_rate": 6.999364899965744e-07, "loss": 0.0056, "step": 100260 }, { "epoch": 1.6933495457155403, "grad_norm": 0.3649853467941284, "learning_rate": 6.991846625680321e-07, "loss": 0.0086, "step": 100270 }, { "epoch": 1.6935184246968622, "grad_norm": 0.29528817534446716, "learning_rate": 6.984332087837875e-07, "loss": 0.0082, "step": 100280 }, { "epoch": 1.693687303678184, "grad_norm": 0.5181960463523865, "learning_rate": 6.976821287091257e-07, "loss": 0.007, "step": 100290 }, { "epoch": 1.6938561826595062, "grad_norm": 0.1985069215297699, "learning_rate": 6.969314224092977e-07, "loss": 0.0066, "step": 100300 }, { "epoch": 1.6940250616408283, "grad_norm": 0.16064156591892242, "learning_rate": 6.961810899495236e-07, "loss": 0.0063, "step": 100310 }, { "epoch": 1.6941939406221502, "grad_norm": 0.17865246534347534, "learning_rate": 6.954311313949907e-07, "loss": 0.0049, "step": 100320 }, { "epoch": 1.6943628196034721, "grad_norm": 0.1487051248550415, "learning_rate": 6.946815468108548e-07, "loss": 0.0049, "step": 100330 }, { "epoch": 1.694531698584794, "grad_norm": 0.20704887807369232, "learning_rate": 6.939323362622346e-07, "loss": 0.0055, "step": 100340 }, { "epoch": 1.6947005775661161, "grad_norm": 0.21348725259304047, "learning_rate": 6.931834998142217e-07, "loss": 0.0085, "step": 100350 }, { "epoch": 1.6948694565474383, "grad_norm": 0.45931994915008545, "learning_rate": 6.924350375318733e-07, "loss": 0.0076, "step": 100360 }, { "epoch": 1.6950383355287602, "grad_norm": 0.16996392607688904, "learning_rate": 6.916869494802147e-07, "loss": 0.0084, "step": 100370 }, { "epoch": 1.695207214510082, "grad_norm": 0.13492366671562195, "learning_rate": 6.909392357242362e-07, "loss": 0.0057, "step": 100380 }, { "epoch": 1.695376093491404, "grad_norm": 0.6731765270233154, "learning_rate": 6.901918963288973e-07, "loss": 0.0102, "step": 100390 }, { "epoch": 1.695544972472726, "grad_norm": 0.16662119328975677, "learning_rate": 6.894449313591261e-07, "loss": 0.0065, "step": 100400 }, { "epoch": 1.6957138514540482, "grad_norm": 0.18573176860809326, "learning_rate": 6.88698340879817e-07, "loss": 0.0111, "step": 100410 }, { "epoch": 1.69588273043537, "grad_norm": 0.21563342213630676, "learning_rate": 6.879521249558319e-07, "loss": 0.0054, "step": 100420 }, { "epoch": 1.696051609416692, "grad_norm": 0.33118936419487, "learning_rate": 6.872062836519999e-07, "loss": 0.0071, "step": 100430 }, { "epoch": 1.6962204883980139, "grad_norm": 0.33740490674972534, "learning_rate": 6.864608170331177e-07, "loss": 0.0059, "step": 100440 }, { "epoch": 1.696389367379336, "grad_norm": 0.1035131961107254, "learning_rate": 6.857157251639512e-07, "loss": 0.008, "step": 100450 }, { "epoch": 1.696558246360658, "grad_norm": 0.43455979228019714, "learning_rate": 6.849710081092297e-07, "loss": 0.0081, "step": 100460 }, { "epoch": 1.69672712534198, "grad_norm": 0.4353788197040558, "learning_rate": 6.842266659336538e-07, "loss": 0.0087, "step": 100470 }, { "epoch": 1.696896004323302, "grad_norm": 0.16102536022663116, "learning_rate": 6.8348269870189e-07, "loss": 0.007, "step": 100480 }, { "epoch": 1.6970648833046238, "grad_norm": 0.23796598613262177, "learning_rate": 6.827391064785732e-07, "loss": 0.0069, "step": 100490 }, { "epoch": 1.697233762285946, "grad_norm": 0.27612802386283875, "learning_rate": 6.819958893283024e-07, "loss": 0.0082, "step": 100500 }, { "epoch": 1.697402641267268, "grad_norm": 0.24755176901817322, "learning_rate": 6.812530473156487e-07, "loss": 0.0079, "step": 100510 }, { "epoch": 1.69757152024859, "grad_norm": 0.32447099685668945, "learning_rate": 6.805105805051471e-07, "loss": 0.0066, "step": 100520 }, { "epoch": 1.6977403992299118, "grad_norm": 0.271967351436615, "learning_rate": 6.797684889613016e-07, "loss": 0.0048, "step": 100530 }, { "epoch": 1.6979092782112337, "grad_norm": 0.46884748339653015, "learning_rate": 6.79026772748585e-07, "loss": 0.0064, "step": 100540 }, { "epoch": 1.6980781571925558, "grad_norm": 0.4082956612110138, "learning_rate": 6.782854319314336e-07, "loss": 0.0038, "step": 100550 }, { "epoch": 1.698247036173878, "grad_norm": 0.40690621733665466, "learning_rate": 6.775444665742543e-07, "loss": 0.0082, "step": 100560 }, { "epoch": 1.6984159151551999, "grad_norm": 0.1221141368150711, "learning_rate": 6.768038767414193e-07, "loss": 0.0045, "step": 100570 }, { "epoch": 1.6985847941365217, "grad_norm": 0.12241674959659576, "learning_rate": 6.760636624972716e-07, "loss": 0.0105, "step": 100580 }, { "epoch": 1.6987536731178436, "grad_norm": 0.1696355789899826, "learning_rate": 6.753238239061166e-07, "loss": 0.0065, "step": 100590 }, { "epoch": 1.6989225520991658, "grad_norm": 0.07325321435928345, "learning_rate": 6.745843610322312e-07, "loss": 0.0065, "step": 100600 }, { "epoch": 1.6990914310804879, "grad_norm": 0.1719922423362732, "learning_rate": 6.738452739398571e-07, "loss": 0.0069, "step": 100610 }, { "epoch": 1.6992603100618098, "grad_norm": 0.37994152307510376, "learning_rate": 6.73106562693206e-07, "loss": 0.0065, "step": 100620 }, { "epoch": 1.6994291890431317, "grad_norm": 0.30234888195991516, "learning_rate": 6.72368227356453e-07, "loss": 0.0076, "step": 100630 }, { "epoch": 1.6995980680244536, "grad_norm": 0.22533848881721497, "learning_rate": 6.716302679937442e-07, "loss": 0.0056, "step": 100640 }, { "epoch": 1.6997669470057757, "grad_norm": 0.29983586072921753, "learning_rate": 6.708926846691905e-07, "loss": 0.0069, "step": 100650 }, { "epoch": 1.6999358259870978, "grad_norm": 0.5841211080551147, "learning_rate": 6.701554774468744e-07, "loss": 0.0056, "step": 100660 }, { "epoch": 1.7001047049684197, "grad_norm": 0.1846613734960556, "learning_rate": 6.694186463908404e-07, "loss": 0.0076, "step": 100670 }, { "epoch": 1.7002735839497416, "grad_norm": 0.1997556984424591, "learning_rate": 6.686821915651021e-07, "loss": 0.0081, "step": 100680 }, { "epoch": 1.7004424629310635, "grad_norm": 0.20666071772575378, "learning_rate": 6.679461130336418e-07, "loss": 0.0069, "step": 100690 }, { "epoch": 1.7006113419123856, "grad_norm": 0.06615176051855087, "learning_rate": 6.672104108604089e-07, "loss": 0.0084, "step": 100700 }, { "epoch": 1.7007802208937077, "grad_norm": 0.20220474898815155, "learning_rate": 6.664750851093177e-07, "loss": 0.0056, "step": 100710 }, { "epoch": 1.7009490998750296, "grad_norm": 0.4044552147388458, "learning_rate": 6.657401358442522e-07, "loss": 0.0053, "step": 100720 }, { "epoch": 1.7011179788563515, "grad_norm": 0.24886012077331543, "learning_rate": 6.650055631290636e-07, "loss": 0.0066, "step": 100730 }, { "epoch": 1.7012868578376734, "grad_norm": 0.31739795207977295, "learning_rate": 6.642713670275702e-07, "loss": 0.0065, "step": 100740 }, { "epoch": 1.7014557368189955, "grad_norm": 0.14597348868846893, "learning_rate": 6.635375476035549e-07, "loss": 0.0065, "step": 100750 }, { "epoch": 1.7016246158003177, "grad_norm": 0.22485387325286865, "learning_rate": 6.628041049207706e-07, "loss": 0.0066, "step": 100760 }, { "epoch": 1.7017934947816395, "grad_norm": 0.09578128159046173, "learning_rate": 6.620710390429391e-07, "loss": 0.0039, "step": 100770 }, { "epoch": 1.7019623737629614, "grad_norm": 0.20494717359542847, "learning_rate": 6.613383500337473e-07, "loss": 0.0075, "step": 100780 }, { "epoch": 1.7021312527442833, "grad_norm": 0.3036366105079651, "learning_rate": 6.606060379568468e-07, "loss": 0.0067, "step": 100790 }, { "epoch": 1.7023001317256055, "grad_norm": 0.25162097811698914, "learning_rate": 6.59874102875861e-07, "loss": 0.0059, "step": 100800 }, { "epoch": 1.7024690107069276, "grad_norm": 0.16853837668895721, "learning_rate": 6.591425448543781e-07, "loss": 0.0048, "step": 100810 }, { "epoch": 1.7026378896882495, "grad_norm": 0.14245784282684326, "learning_rate": 6.584113639559553e-07, "loss": 0.0074, "step": 100820 }, { "epoch": 1.7028067686695714, "grad_norm": 0.09114793688058853, "learning_rate": 6.576805602441133e-07, "loss": 0.0073, "step": 100830 }, { "epoch": 1.7029756476508933, "grad_norm": 0.48026740550994873, "learning_rate": 6.569501337823442e-07, "loss": 0.0055, "step": 100840 }, { "epoch": 1.7031445266322154, "grad_norm": 0.2577909231185913, "learning_rate": 6.562200846341055e-07, "loss": 0.0043, "step": 100850 }, { "epoch": 1.7033134056135375, "grad_norm": 0.3130808174610138, "learning_rate": 6.554904128628226e-07, "loss": 0.0078, "step": 100860 }, { "epoch": 1.7034822845948594, "grad_norm": 0.22153615951538086, "learning_rate": 6.547611185318863e-07, "loss": 0.0078, "step": 100870 }, { "epoch": 1.7036511635761813, "grad_norm": 0.5886167287826538, "learning_rate": 6.540322017046552e-07, "loss": 0.0063, "step": 100880 }, { "epoch": 1.7038200425575032, "grad_norm": 0.1183406189084053, "learning_rate": 6.533036624444583e-07, "loss": 0.0055, "step": 100890 }, { "epoch": 1.7039889215388253, "grad_norm": 0.1101376935839653, "learning_rate": 6.525755008145895e-07, "loss": 0.0045, "step": 100900 }, { "epoch": 1.7041578005201474, "grad_norm": 0.1940838098526001, "learning_rate": 6.518477168783071e-07, "loss": 0.0075, "step": 100910 }, { "epoch": 1.7043266795014693, "grad_norm": 0.25669220089912415, "learning_rate": 6.511203106988401e-07, "loss": 0.0055, "step": 100920 }, { "epoch": 1.7044955584827912, "grad_norm": 0.4540247917175293, "learning_rate": 6.50393282339385e-07, "loss": 0.0041, "step": 100930 }, { "epoch": 1.7046644374641131, "grad_norm": 0.24339085817337036, "learning_rate": 6.496666318631035e-07, "loss": 0.0048, "step": 100940 }, { "epoch": 1.7048333164454352, "grad_norm": 0.3927467465400696, "learning_rate": 6.489403593331245e-07, "loss": 0.006, "step": 100950 }, { "epoch": 1.7050021954267573, "grad_norm": 0.32741865515708923, "learning_rate": 6.482144648125454e-07, "loss": 0.0072, "step": 100960 }, { "epoch": 1.7051710744080792, "grad_norm": 0.1315220296382904, "learning_rate": 6.474889483644298e-07, "loss": 0.0066, "step": 100970 }, { "epoch": 1.7053399533894011, "grad_norm": 0.34107455611228943, "learning_rate": 6.467638100518092e-07, "loss": 0.0072, "step": 100980 }, { "epoch": 1.705508832370723, "grad_norm": 0.24764682352542877, "learning_rate": 6.460390499376817e-07, "loss": 0.0055, "step": 100990 }, { "epoch": 1.7056777113520452, "grad_norm": 0.24316981434822083, "learning_rate": 6.45314668085013e-07, "loss": 0.01, "step": 101000 }, { "epoch": 1.7058465903333673, "grad_norm": 0.20383307337760925, "learning_rate": 6.445906645567351e-07, "loss": 0.0069, "step": 101010 }, { "epoch": 1.7060154693146892, "grad_norm": 0.2235509604215622, "learning_rate": 6.438670394157476e-07, "loss": 0.0063, "step": 101020 }, { "epoch": 1.706184348296011, "grad_norm": 0.18096232414245605, "learning_rate": 6.43143792724919e-07, "loss": 0.0083, "step": 101030 }, { "epoch": 1.706353227277333, "grad_norm": 0.26366177201271057, "learning_rate": 6.424209245470809e-07, "loss": 0.0069, "step": 101040 }, { "epoch": 1.706522106258655, "grad_norm": 0.17774471640586853, "learning_rate": 6.416984349450345e-07, "loss": 0.0074, "step": 101050 }, { "epoch": 1.7066909852399772, "grad_norm": 0.30311405658721924, "learning_rate": 6.409763239815492e-07, "loss": 0.0083, "step": 101060 }, { "epoch": 1.706859864221299, "grad_norm": 0.231830433011055, "learning_rate": 6.402545917193604e-07, "loss": 0.007, "step": 101070 }, { "epoch": 1.707028743202621, "grad_norm": 0.3082984685897827, "learning_rate": 6.395332382211688e-07, "loss": 0.0089, "step": 101080 }, { "epoch": 1.7071976221839429, "grad_norm": 0.2305542230606079, "learning_rate": 6.388122635496441e-07, "loss": 0.0058, "step": 101090 }, { "epoch": 1.707366501165265, "grad_norm": 0.263169527053833, "learning_rate": 6.380916677674243e-07, "loss": 0.0052, "step": 101100 }, { "epoch": 1.707535380146587, "grad_norm": 0.5212147831916809, "learning_rate": 6.373714509371115e-07, "loss": 0.0061, "step": 101110 }, { "epoch": 1.707704259127909, "grad_norm": 0.1638849675655365, "learning_rate": 6.366516131212769e-07, "loss": 0.0056, "step": 101120 }, { "epoch": 1.707873138109231, "grad_norm": 0.28099289536476135, "learning_rate": 6.359321543824587e-07, "loss": 0.0066, "step": 101130 }, { "epoch": 1.7080420170905528, "grad_norm": 0.18040169775485992, "learning_rate": 6.352130747831608e-07, "loss": 0.0058, "step": 101140 }, { "epoch": 1.708210896071875, "grad_norm": 0.20220737159252167, "learning_rate": 6.344943743858572e-07, "loss": 0.0051, "step": 101150 }, { "epoch": 1.7083797750531968, "grad_norm": 0.25872042775154114, "learning_rate": 6.337760532529841e-07, "loss": 0.009, "step": 101160 }, { "epoch": 1.708548654034519, "grad_norm": 0.23527880012989044, "learning_rate": 6.330581114469486e-07, "loss": 0.0089, "step": 101170 }, { "epoch": 1.7087175330158408, "grad_norm": 0.2390579730272293, "learning_rate": 6.323405490301238e-07, "loss": 0.0051, "step": 101180 }, { "epoch": 1.7088864119971627, "grad_norm": 0.16096140444278717, "learning_rate": 6.316233660648502e-07, "loss": 0.0079, "step": 101190 }, { "epoch": 1.7090552909784849, "grad_norm": 0.4630810618400574, "learning_rate": 6.309065626134336e-07, "loss": 0.0073, "step": 101200 }, { "epoch": 1.7092241699598067, "grad_norm": 0.2409304976463318, "learning_rate": 6.301901387381492e-07, "loss": 0.0059, "step": 101210 }, { "epoch": 1.7093930489411289, "grad_norm": 0.1910097897052765, "learning_rate": 6.294740945012379e-07, "loss": 0.0068, "step": 101220 }, { "epoch": 1.7095619279224508, "grad_norm": 0.41454774141311646, "learning_rate": 6.287584299649074e-07, "loss": 0.0096, "step": 101230 }, { "epoch": 1.7097308069037727, "grad_norm": 0.2645096182823181, "learning_rate": 6.280431451913338e-07, "loss": 0.0081, "step": 101240 }, { "epoch": 1.7098996858850948, "grad_norm": 0.3752205967903137, "learning_rate": 6.273282402426584e-07, "loss": 0.0055, "step": 101250 }, { "epoch": 1.7100685648664167, "grad_norm": 0.2538687288761139, "learning_rate": 6.266137151809909e-07, "loss": 0.0096, "step": 101260 }, { "epoch": 1.7102374438477388, "grad_norm": 0.0961962565779686, "learning_rate": 6.258995700684084e-07, "loss": 0.0079, "step": 101270 }, { "epoch": 1.7104063228290607, "grad_norm": 0.26093098521232605, "learning_rate": 6.251858049669524e-07, "loss": 0.0094, "step": 101280 }, { "epoch": 1.7105752018103826, "grad_norm": 0.23242101073265076, "learning_rate": 6.244724199386338e-07, "loss": 0.0073, "step": 101290 }, { "epoch": 1.7107440807917047, "grad_norm": 0.5309218764305115, "learning_rate": 6.23759415045429e-07, "loss": 0.0059, "step": 101300 }, { "epoch": 1.7109129597730266, "grad_norm": 0.35104241967201233, "learning_rate": 6.230467903492843e-07, "loss": 0.0097, "step": 101310 }, { "epoch": 1.7110818387543487, "grad_norm": 0.09781506657600403, "learning_rate": 6.223345459121083e-07, "loss": 0.0036, "step": 101320 }, { "epoch": 1.7112507177356706, "grad_norm": 0.3047493100166321, "learning_rate": 6.216226817957798e-07, "loss": 0.0075, "step": 101330 }, { "epoch": 1.7114195967169925, "grad_norm": 0.1800195425748825, "learning_rate": 6.209111980621441e-07, "loss": 0.0098, "step": 101340 }, { "epoch": 1.7115884756983146, "grad_norm": 0.31252923607826233, "learning_rate": 6.20200094773013e-07, "loss": 0.0083, "step": 101350 }, { "epoch": 1.7117573546796365, "grad_norm": 0.2097775638103485, "learning_rate": 6.194893719901657e-07, "loss": 0.0066, "step": 101360 }, { "epoch": 1.7119262336609586, "grad_norm": 0.3050503730773926, "learning_rate": 6.187790297753477e-07, "loss": 0.0073, "step": 101370 }, { "epoch": 1.7120951126422805, "grad_norm": 0.05146413668990135, "learning_rate": 6.180690681902717e-07, "loss": 0.0049, "step": 101380 }, { "epoch": 1.7122639916236024, "grad_norm": 0.21371367573738098, "learning_rate": 6.173594872966193e-07, "loss": 0.0067, "step": 101390 }, { "epoch": 1.7124328706049246, "grad_norm": 0.2914169728755951, "learning_rate": 6.166502871560342e-07, "loss": 0.0077, "step": 101400 }, { "epoch": 1.7126017495862464, "grad_norm": 0.21687902510166168, "learning_rate": 6.159414678301318e-07, "loss": 0.007, "step": 101410 }, { "epoch": 1.7127706285675686, "grad_norm": 0.23369525372982025, "learning_rate": 6.152330293804914e-07, "loss": 0.0071, "step": 101420 }, { "epoch": 1.7129395075488905, "grad_norm": 0.151617631316185, "learning_rate": 6.145249718686614e-07, "loss": 0.0069, "step": 101430 }, { "epoch": 1.7131083865302124, "grad_norm": 0.2815706729888916, "learning_rate": 6.138172953561566e-07, "loss": 0.0077, "step": 101440 }, { "epoch": 1.7132772655115345, "grad_norm": 0.40634655952453613, "learning_rate": 6.131099999044566e-07, "loss": 0.0071, "step": 101450 }, { "epoch": 1.7134461444928564, "grad_norm": 0.20314842462539673, "learning_rate": 6.124030855750101e-07, "loss": 0.0051, "step": 101460 }, { "epoch": 1.7136150234741785, "grad_norm": 0.5926129817962646, "learning_rate": 6.116965524292323e-07, "loss": 0.0045, "step": 101470 }, { "epoch": 1.7137839024555004, "grad_norm": 0.20367315411567688, "learning_rate": 6.10990400528505e-07, "loss": 0.0072, "step": 101480 }, { "epoch": 1.7139527814368223, "grad_norm": 0.43935972452163696, "learning_rate": 6.102846299341774e-07, "loss": 0.0083, "step": 101490 }, { "epoch": 1.7141216604181444, "grad_norm": 0.2694533169269562, "learning_rate": 6.09579240707564e-07, "loss": 0.005, "step": 101500 }, { "epoch": 1.7142905393994663, "grad_norm": 0.25964969396591187, "learning_rate": 6.088742329099484e-07, "loss": 0.0071, "step": 101510 }, { "epoch": 1.7144594183807884, "grad_norm": 0.26295188069343567, "learning_rate": 6.081696066025811e-07, "loss": 0.0074, "step": 101520 }, { "epoch": 1.7146282973621103, "grad_norm": 0.21141333878040314, "learning_rate": 6.074653618466747e-07, "loss": 0.0049, "step": 101530 }, { "epoch": 1.7147971763434322, "grad_norm": 0.39017796516418457, "learning_rate": 6.067614987034149e-07, "loss": 0.0068, "step": 101540 }, { "epoch": 1.7149660553247543, "grad_norm": 0.1559397429227829, "learning_rate": 6.060580172339509e-07, "loss": 0.0066, "step": 101550 }, { "epoch": 1.7151349343060762, "grad_norm": 0.24126692116260529, "learning_rate": 6.053549174994006e-07, "loss": 0.0076, "step": 101560 }, { "epoch": 1.7153038132873983, "grad_norm": 0.3389049470424652, "learning_rate": 6.046521995608456e-07, "loss": 0.0066, "step": 101570 }, { "epoch": 1.7154726922687202, "grad_norm": 0.4319494068622589, "learning_rate": 6.039498634793373e-07, "loss": 0.0113, "step": 101580 }, { "epoch": 1.7156415712500421, "grad_norm": 0.3627113103866577, "learning_rate": 6.032479093158927e-07, "loss": 0.0101, "step": 101590 }, { "epoch": 1.7158104502313642, "grad_norm": 0.21255016326904297, "learning_rate": 6.02546337131496e-07, "loss": 0.0134, "step": 101600 }, { "epoch": 1.7159793292126861, "grad_norm": 0.0952327698469162, "learning_rate": 6.018451469870984e-07, "loss": 0.0063, "step": 101610 }, { "epoch": 1.7161482081940083, "grad_norm": 0.3177911937236786, "learning_rate": 6.011443389436167e-07, "loss": 0.0043, "step": 101620 }, { "epoch": 1.7163170871753302, "grad_norm": 0.26063719391822815, "learning_rate": 6.004439130619366e-07, "loss": 0.0081, "step": 101630 }, { "epoch": 1.716485966156652, "grad_norm": 0.2587156593799591, "learning_rate": 5.997438694029095e-07, "loss": 0.0058, "step": 101640 }, { "epoch": 1.7166548451379742, "grad_norm": 0.5174891352653503, "learning_rate": 5.990442080273517e-07, "loss": 0.0075, "step": 101650 }, { "epoch": 1.716823724119296, "grad_norm": 0.16502469778060913, "learning_rate": 5.983449289960491e-07, "loss": 0.0074, "step": 101660 }, { "epoch": 1.7169926031006182, "grad_norm": 0.16859771311283112, "learning_rate": 5.976460323697536e-07, "loss": 0.0064, "step": 101670 }, { "epoch": 1.71716148208194, "grad_norm": 0.17102554440498352, "learning_rate": 5.969475182091844e-07, "loss": 0.0047, "step": 101680 }, { "epoch": 1.717330361063262, "grad_norm": 0.14101839065551758, "learning_rate": 5.962493865750246e-07, "loss": 0.0135, "step": 101690 }, { "epoch": 1.717499240044584, "grad_norm": 0.30820432305336, "learning_rate": 5.955516375279274e-07, "loss": 0.0079, "step": 101700 }, { "epoch": 1.717668119025906, "grad_norm": 0.5191676020622253, "learning_rate": 5.94854271128511e-07, "loss": 0.0064, "step": 101710 }, { "epoch": 1.717836998007228, "grad_norm": 0.3287149965763092, "learning_rate": 5.941572874373614e-07, "loss": 0.0057, "step": 101720 }, { "epoch": 1.71800587698855, "grad_norm": 0.21698932349681854, "learning_rate": 5.934606865150311e-07, "loss": 0.0056, "step": 101730 }, { "epoch": 1.718174755969872, "grad_norm": 0.1810421198606491, "learning_rate": 5.927644684220385e-07, "loss": 0.0071, "step": 101740 }, { "epoch": 1.718343634951194, "grad_norm": 0.3178791403770447, "learning_rate": 5.920686332188691e-07, "loss": 0.0071, "step": 101750 }, { "epoch": 1.718512513932516, "grad_norm": 0.05774726718664169, "learning_rate": 5.913731809659773e-07, "loss": 0.0061, "step": 101760 }, { "epoch": 1.718681392913838, "grad_norm": 0.1716662347316742, "learning_rate": 5.906781117237792e-07, "loss": 0.0048, "step": 101770 }, { "epoch": 1.71885027189516, "grad_norm": 0.19330193102359772, "learning_rate": 5.899834255526626e-07, "loss": 0.0061, "step": 101780 }, { "epoch": 1.7190191508764818, "grad_norm": 0.2083648145198822, "learning_rate": 5.892891225129798e-07, "loss": 0.0095, "step": 101790 }, { "epoch": 1.719188029857804, "grad_norm": 0.36778751015663147, "learning_rate": 5.88595202665051e-07, "loss": 0.0074, "step": 101800 }, { "epoch": 1.7193569088391258, "grad_norm": 0.14053063094615936, "learning_rate": 5.879016660691606e-07, "loss": 0.0043, "step": 101810 }, { "epoch": 1.719525787820448, "grad_norm": 0.11689210683107376, "learning_rate": 5.87208512785562e-07, "loss": 0.0086, "step": 101820 }, { "epoch": 1.7196946668017699, "grad_norm": 0.18473762273788452, "learning_rate": 5.86515742874475e-07, "loss": 0.0051, "step": 101830 }, { "epoch": 1.7198635457830918, "grad_norm": 0.2797435522079468, "learning_rate": 5.858233563960852e-07, "loss": 0.0051, "step": 101840 }, { "epoch": 1.7200324247644139, "grad_norm": 0.23213806748390198, "learning_rate": 5.851313534105463e-07, "loss": 0.0114, "step": 101850 }, { "epoch": 1.7202013037457358, "grad_norm": 0.32422399520874023, "learning_rate": 5.844397339779767e-07, "loss": 0.0067, "step": 101860 }, { "epoch": 1.7203701827270579, "grad_norm": 0.3362427055835724, "learning_rate": 5.837484981584641e-07, "loss": 0.0065, "step": 101870 }, { "epoch": 1.7205390617083798, "grad_norm": 0.12109630554914474, "learning_rate": 5.830576460120596e-07, "loss": 0.0038, "step": 101880 }, { "epoch": 1.7207079406897017, "grad_norm": 0.1068146675825119, "learning_rate": 5.823671775987849e-07, "loss": 0.004, "step": 101890 }, { "epoch": 1.7208768196710238, "grad_norm": 0.3327108323574066, "learning_rate": 5.816770929786242e-07, "loss": 0.0069, "step": 101900 }, { "epoch": 1.7210456986523457, "grad_norm": 0.4582163095474243, "learning_rate": 5.809873922115312e-07, "loss": 0.008, "step": 101910 }, { "epoch": 1.7212145776336678, "grad_norm": 0.3655541241168976, "learning_rate": 5.80298075357425e-07, "loss": 0.0084, "step": 101920 }, { "epoch": 1.7213834566149897, "grad_norm": 0.38997673988342285, "learning_rate": 5.796091424761935e-07, "loss": 0.0078, "step": 101930 }, { "epoch": 1.7215523355963116, "grad_norm": 0.2345806509256363, "learning_rate": 5.789205936276865e-07, "loss": 0.0076, "step": 101940 }, { "epoch": 1.7217212145776337, "grad_norm": 0.196815624833107, "learning_rate": 5.782324288717256e-07, "loss": 0.0046, "step": 101950 }, { "epoch": 1.7218900935589556, "grad_norm": 0.29176023602485657, "learning_rate": 5.775446482680964e-07, "loss": 0.0054, "step": 101960 }, { "epoch": 1.7220589725402777, "grad_norm": 0.15801310539245605, "learning_rate": 5.768572518765509e-07, "loss": 0.0038, "step": 101970 }, { "epoch": 1.7222278515215996, "grad_norm": 0.22115042805671692, "learning_rate": 5.761702397568091e-07, "loss": 0.0039, "step": 101980 }, { "epoch": 1.7223967305029215, "grad_norm": 0.26064762473106384, "learning_rate": 5.75483611968557e-07, "loss": 0.0091, "step": 101990 }, { "epoch": 1.7225656094842436, "grad_norm": 0.19776566326618195, "learning_rate": 5.747973685714469e-07, "loss": 0.0039, "step": 102000 }, { "epoch": 1.7227344884655655, "grad_norm": 0.17197617888450623, "learning_rate": 5.741115096250993e-07, "loss": 0.0046, "step": 102010 }, { "epoch": 1.7229033674468877, "grad_norm": 0.2333928495645523, "learning_rate": 5.734260351890974e-07, "loss": 0.0059, "step": 102020 }, { "epoch": 1.7230722464282096, "grad_norm": 0.18262311816215515, "learning_rate": 5.727409453229949e-07, "loss": 0.0077, "step": 102030 }, { "epoch": 1.7232411254095314, "grad_norm": 0.48257899284362793, "learning_rate": 5.7205624008631e-07, "loss": 0.0087, "step": 102040 }, { "epoch": 1.7234100043908536, "grad_norm": 0.2986753582954407, "learning_rate": 5.713719195385309e-07, "loss": 0.0056, "step": 102050 }, { "epoch": 1.7235788833721755, "grad_norm": 0.3233277499675751, "learning_rate": 5.70687983739106e-07, "loss": 0.0036, "step": 102060 }, { "epoch": 1.7237477623534976, "grad_norm": 0.423279345035553, "learning_rate": 5.700044327474558e-07, "loss": 0.0119, "step": 102070 }, { "epoch": 1.7239166413348195, "grad_norm": 0.23689217865467072, "learning_rate": 5.693212666229647e-07, "loss": 0.0115, "step": 102080 }, { "epoch": 1.7240855203161414, "grad_norm": 0.1098979264497757, "learning_rate": 5.68638485424986e-07, "loss": 0.0059, "step": 102090 }, { "epoch": 1.7242543992974635, "grad_norm": 0.08405335247516632, "learning_rate": 5.679560892128366e-07, "loss": 0.005, "step": 102100 }, { "epoch": 1.7244232782787854, "grad_norm": 0.23887164890766144, "learning_rate": 5.672740780458025e-07, "loss": 0.0063, "step": 102110 }, { "epoch": 1.7245921572601075, "grad_norm": 0.16486380994319916, "learning_rate": 5.665924519831345e-07, "loss": 0.0029, "step": 102120 }, { "epoch": 1.7247610362414294, "grad_norm": 0.5136582851409912, "learning_rate": 5.659112110840515e-07, "loss": 0.0104, "step": 102130 }, { "epoch": 1.7249299152227513, "grad_norm": 0.3302861154079437, "learning_rate": 5.652303554077366e-07, "loss": 0.0039, "step": 102140 }, { "epoch": 1.7250987942040734, "grad_norm": 0.18729077279567719, "learning_rate": 5.645498850133413e-07, "loss": 0.0084, "step": 102150 }, { "epoch": 1.7252676731853953, "grad_norm": 0.2485899031162262, "learning_rate": 5.638697999599835e-07, "loss": 0.0063, "step": 102160 }, { "epoch": 1.7254365521667174, "grad_norm": 0.06564961373806, "learning_rate": 5.631901003067486e-07, "loss": 0.0082, "step": 102170 }, { "epoch": 1.7256054311480393, "grad_norm": 0.3081001043319702, "learning_rate": 5.62510786112685e-07, "loss": 0.0046, "step": 102180 }, { "epoch": 1.7257743101293612, "grad_norm": 0.19905097782611847, "learning_rate": 5.618318574368103e-07, "loss": 0.0068, "step": 102190 }, { "epoch": 1.7259431891106831, "grad_norm": 0.25925374031066895, "learning_rate": 5.611533143381093e-07, "loss": 0.0094, "step": 102200 }, { "epoch": 1.7261120680920052, "grad_norm": 0.21437124907970428, "learning_rate": 5.604751568755307e-07, "loss": 0.0065, "step": 102210 }, { "epoch": 1.7262809470733274, "grad_norm": 0.22085440158843994, "learning_rate": 5.597973851079924e-07, "loss": 0.0059, "step": 102220 }, { "epoch": 1.7264498260546493, "grad_norm": 0.24822014570236206, "learning_rate": 5.591199990943775e-07, "loss": 0.0061, "step": 102230 }, { "epoch": 1.7266187050359711, "grad_norm": 0.30374547839164734, "learning_rate": 5.584429988935347e-07, "loss": 0.0046, "step": 102240 }, { "epoch": 1.726787584017293, "grad_norm": 0.1101081594824791, "learning_rate": 5.577663845642817e-07, "loss": 0.0069, "step": 102250 }, { "epoch": 1.7269564629986152, "grad_norm": 0.3149046301841736, "learning_rate": 5.570901561653991e-07, "loss": 0.005, "step": 102260 }, { "epoch": 1.7271253419799373, "grad_norm": 0.23270869255065918, "learning_rate": 5.564143137556372e-07, "loss": 0.0049, "step": 102270 }, { "epoch": 1.7272942209612592, "grad_norm": 0.22884853184223175, "learning_rate": 5.557388573937111e-07, "loss": 0.01, "step": 102280 }, { "epoch": 1.727463099942581, "grad_norm": 0.0454212948679924, "learning_rate": 5.550637871383041e-07, "loss": 0.0055, "step": 102290 }, { "epoch": 1.727631978923903, "grad_norm": 0.18887993693351746, "learning_rate": 5.543891030480625e-07, "loss": 0.0065, "step": 102300 }, { "epoch": 1.727800857905225, "grad_norm": 0.3055291473865509, "learning_rate": 5.537148051816022e-07, "loss": 0.0077, "step": 102310 }, { "epoch": 1.7279697368865472, "grad_norm": 0.2290903925895691, "learning_rate": 5.530408935975046e-07, "loss": 0.0084, "step": 102320 }, { "epoch": 1.728138615867869, "grad_norm": 0.21610333025455475, "learning_rate": 5.523673683543168e-07, "loss": 0.0064, "step": 102330 }, { "epoch": 1.728307494849191, "grad_norm": 0.4326232969760895, "learning_rate": 5.516942295105543e-07, "loss": 0.0081, "step": 102340 }, { "epoch": 1.728476373830513, "grad_norm": 0.08150053024291992, "learning_rate": 5.510214771246974e-07, "loss": 0.0041, "step": 102350 }, { "epoch": 1.728645252811835, "grad_norm": 0.37836721539497375, "learning_rate": 5.503491112551923e-07, "loss": 0.0077, "step": 102360 }, { "epoch": 1.7288141317931571, "grad_norm": 0.36916399002075195, "learning_rate": 5.496771319604533e-07, "loss": 0.0089, "step": 102370 }, { "epoch": 1.728983010774479, "grad_norm": 0.3080129623413086, "learning_rate": 5.490055392988613e-07, "loss": 0.0045, "step": 102380 }, { "epoch": 1.729151889755801, "grad_norm": 0.28206732869148254, "learning_rate": 5.483343333287605e-07, "loss": 0.0084, "step": 102390 }, { "epoch": 1.7293207687371228, "grad_norm": 0.12146713584661484, "learning_rate": 5.476635141084641e-07, "loss": 0.0032, "step": 102400 }, { "epoch": 1.729489647718445, "grad_norm": 0.12950053811073303, "learning_rate": 5.469930816962521e-07, "loss": 0.0073, "step": 102410 }, { "epoch": 1.729658526699767, "grad_norm": 0.12402493506669998, "learning_rate": 5.463230361503701e-07, "loss": 0.0053, "step": 102420 }, { "epoch": 1.729827405681089, "grad_norm": 0.6194238066673279, "learning_rate": 5.45653377529029e-07, "loss": 0.0073, "step": 102430 }, { "epoch": 1.7299962846624108, "grad_norm": 0.23276276886463165, "learning_rate": 5.449841058904076e-07, "loss": 0.0049, "step": 102440 }, { "epoch": 1.7301651636437327, "grad_norm": 0.34044167399406433, "learning_rate": 5.443152212926506e-07, "loss": 0.0057, "step": 102450 }, { "epoch": 1.7303340426250549, "grad_norm": 0.1355738490819931, "learning_rate": 5.436467237938692e-07, "loss": 0.0059, "step": 102460 }, { "epoch": 1.730502921606377, "grad_norm": 0.20499983429908752, "learning_rate": 5.429786134521408e-07, "loss": 0.0065, "step": 102470 }, { "epoch": 1.7306718005876989, "grad_norm": 0.18873056769371033, "learning_rate": 5.423108903255086e-07, "loss": 0.0086, "step": 102480 }, { "epoch": 1.7308406795690208, "grad_norm": 0.25430750846862793, "learning_rate": 5.41643554471984e-07, "loss": 0.0063, "step": 102490 }, { "epoch": 1.7310095585503427, "grad_norm": 0.1789785623550415, "learning_rate": 5.40976605949543e-07, "loss": 0.0061, "step": 102500 }, { "epoch": 1.7311784375316648, "grad_norm": 0.18963663280010223, "learning_rate": 5.403100448161275e-07, "loss": 0.0065, "step": 102510 }, { "epoch": 1.731347316512987, "grad_norm": 0.3065095841884613, "learning_rate": 5.39643871129647e-07, "loss": 0.008, "step": 102520 }, { "epoch": 1.7315161954943088, "grad_norm": 0.12658967077732086, "learning_rate": 5.389780849479781e-07, "loss": 0.0053, "step": 102530 }, { "epoch": 1.7316850744756307, "grad_norm": 0.2872237265110016, "learning_rate": 5.383126863289622e-07, "loss": 0.0086, "step": 102540 }, { "epoch": 1.7318539534569526, "grad_norm": 0.22245414555072784, "learning_rate": 5.376476753304072e-07, "loss": 0.0075, "step": 102550 }, { "epoch": 1.7320228324382747, "grad_norm": 0.15838328003883362, "learning_rate": 5.369830520100871e-07, "loss": 0.0056, "step": 102560 }, { "epoch": 1.7321917114195968, "grad_norm": 0.02414323389530182, "learning_rate": 5.363188164257427e-07, "loss": 0.0081, "step": 102570 }, { "epoch": 1.7323605904009187, "grad_norm": 0.19366703927516937, "learning_rate": 5.356549686350837e-07, "loss": 0.0067, "step": 102580 }, { "epoch": 1.7325294693822406, "grad_norm": 0.71039879322052, "learning_rate": 5.349915086957808e-07, "loss": 0.0056, "step": 102590 }, { "epoch": 1.7326983483635625, "grad_norm": 0.16292545199394226, "learning_rate": 5.343284366654744e-07, "loss": 0.0075, "step": 102600 }, { "epoch": 1.7328672273448846, "grad_norm": 0.3506486713886261, "learning_rate": 5.336657526017713e-07, "loss": 0.0061, "step": 102610 }, { "epoch": 1.7330361063262067, "grad_norm": 0.08238302916288376, "learning_rate": 5.330034565622444e-07, "loss": 0.0054, "step": 102620 }, { "epoch": 1.7332049853075286, "grad_norm": 0.14629113674163818, "learning_rate": 5.323415486044298e-07, "loss": 0.0071, "step": 102630 }, { "epoch": 1.7333738642888505, "grad_norm": 0.09397418051958084, "learning_rate": 5.316800287858348e-07, "loss": 0.0075, "step": 102640 }, { "epoch": 1.7335427432701724, "grad_norm": 0.1450488120317459, "learning_rate": 5.310188971639291e-07, "loss": 0.007, "step": 102650 }, { "epoch": 1.7337116222514946, "grad_norm": 0.26692843437194824, "learning_rate": 5.303581537961522e-07, "loss": 0.006, "step": 102660 }, { "epoch": 1.7338805012328167, "grad_norm": 0.1684781014919281, "learning_rate": 5.29697798739906e-07, "loss": 0.0055, "step": 102670 }, { "epoch": 1.7340493802141386, "grad_norm": 0.0636628046631813, "learning_rate": 5.290378320525608e-07, "loss": 0.0035, "step": 102680 }, { "epoch": 1.7342182591954605, "grad_norm": 0.226774662733078, "learning_rate": 5.283782537914522e-07, "loss": 0.0064, "step": 102690 }, { "epoch": 1.7343871381767824, "grad_norm": 0.20198605954647064, "learning_rate": 5.277190640138857e-07, "loss": 0.0062, "step": 102700 }, { "epoch": 1.7345560171581045, "grad_norm": 0.4936941862106323, "learning_rate": 5.270602627771271e-07, "loss": 0.0085, "step": 102710 }, { "epoch": 1.7347248961394266, "grad_norm": 0.22771458327770233, "learning_rate": 5.264018501384127e-07, "loss": 0.008, "step": 102720 }, { "epoch": 1.7348937751207485, "grad_norm": 0.222897469997406, "learning_rate": 5.257438261549436e-07, "loss": 0.0093, "step": 102730 }, { "epoch": 1.7350626541020704, "grad_norm": 0.3235233426094055, "learning_rate": 5.250861908838884e-07, "loss": 0.0065, "step": 102740 }, { "epoch": 1.7352315330833923, "grad_norm": 0.16953808069229126, "learning_rate": 5.24428944382378e-07, "loss": 0.0047, "step": 102750 }, { "epoch": 1.7354004120647144, "grad_norm": 0.20221184194087982, "learning_rate": 5.237720867075147e-07, "loss": 0.0069, "step": 102760 }, { "epoch": 1.7355692910460365, "grad_norm": 0.26238638162612915, "learning_rate": 5.231156179163638e-07, "loss": 0.007, "step": 102770 }, { "epoch": 1.7357381700273584, "grad_norm": 0.32241082191467285, "learning_rate": 5.224595380659586e-07, "loss": 0.0062, "step": 102780 }, { "epoch": 1.7359070490086803, "grad_norm": 0.3075038492679596, "learning_rate": 5.218038472132975e-07, "loss": 0.0059, "step": 102790 }, { "epoch": 1.7360759279900022, "grad_norm": 0.15745918452739716, "learning_rate": 5.211485454153437e-07, "loss": 0.0069, "step": 102800 }, { "epoch": 1.7362448069713243, "grad_norm": 0.290757954120636, "learning_rate": 5.204936327290289e-07, "loss": 0.0104, "step": 102810 }, { "epoch": 1.7364136859526464, "grad_norm": 0.18368054926395416, "learning_rate": 5.198391092112515e-07, "loss": 0.0063, "step": 102820 }, { "epoch": 1.7365825649339683, "grad_norm": 0.14439897239208221, "learning_rate": 5.191849749188754e-07, "loss": 0.0093, "step": 102830 }, { "epoch": 1.7367514439152902, "grad_norm": 0.326864093542099, "learning_rate": 5.185312299087281e-07, "loss": 0.0066, "step": 102840 }, { "epoch": 1.7369203228966121, "grad_norm": 0.11146481335163116, "learning_rate": 5.178778742376061e-07, "loss": 0.0073, "step": 102850 }, { "epoch": 1.7370892018779343, "grad_norm": 0.3003244996070862, "learning_rate": 5.172249079622715e-07, "loss": 0.0113, "step": 102860 }, { "epoch": 1.7372580808592564, "grad_norm": 0.20251724123954773, "learning_rate": 5.165723311394532e-07, "loss": 0.0044, "step": 102870 }, { "epoch": 1.7374269598405783, "grad_norm": 0.25890931487083435, "learning_rate": 5.159201438258443e-07, "loss": 0.0057, "step": 102880 }, { "epoch": 1.7375958388219002, "grad_norm": 0.19668987393379211, "learning_rate": 5.152683460781049e-07, "loss": 0.0047, "step": 102890 }, { "epoch": 1.737764717803222, "grad_norm": 0.22871164977550507, "learning_rate": 5.146169379528632e-07, "loss": 0.0075, "step": 102900 }, { "epoch": 1.7379335967845442, "grad_norm": 0.24683871865272522, "learning_rate": 5.139659195067115e-07, "loss": 0.006, "step": 102910 }, { "epoch": 1.7381024757658663, "grad_norm": 0.1527993232011795, "learning_rate": 5.133152907962058e-07, "loss": 0.0037, "step": 102920 }, { "epoch": 1.7382713547471882, "grad_norm": 0.30905577540397644, "learning_rate": 5.126650518778753e-07, "loss": 0.0065, "step": 102930 }, { "epoch": 1.73844023372851, "grad_norm": 0.15055283904075623, "learning_rate": 5.120152028082093e-07, "loss": 0.0052, "step": 102940 }, { "epoch": 1.738609112709832, "grad_norm": 0.24558614194393158, "learning_rate": 5.113657436436659e-07, "loss": 0.0062, "step": 102950 }, { "epoch": 1.738777991691154, "grad_norm": 0.24577650427818298, "learning_rate": 5.107166744406672e-07, "loss": 0.0075, "step": 102960 }, { "epoch": 1.7389468706724762, "grad_norm": 0.035897426307201385, "learning_rate": 5.100679952556031e-07, "loss": 0.0051, "step": 102970 }, { "epoch": 1.7391157496537981, "grad_norm": 0.36205020546913147, "learning_rate": 5.094197061448303e-07, "loss": 0.0067, "step": 102980 }, { "epoch": 1.73928462863512, "grad_norm": 0.382749080657959, "learning_rate": 5.087718071646702e-07, "loss": 0.0065, "step": 102990 }, { "epoch": 1.739453507616442, "grad_norm": 0.7850126624107361, "learning_rate": 5.081242983714091e-07, "loss": 0.0095, "step": 103000 }, { "epoch": 1.739622386597764, "grad_norm": 0.15120066702365875, "learning_rate": 5.074771798213024e-07, "loss": 0.0059, "step": 103010 }, { "epoch": 1.7397912655790861, "grad_norm": 0.3418465256690979, "learning_rate": 5.068304515705702e-07, "loss": 0.0078, "step": 103020 }, { "epoch": 1.739960144560408, "grad_norm": 0.3221154808998108, "learning_rate": 5.061841136753992e-07, "loss": 0.0107, "step": 103030 }, { "epoch": 1.74012902354173, "grad_norm": 0.16768741607666016, "learning_rate": 5.05538166191939e-07, "loss": 0.0049, "step": 103040 }, { "epoch": 1.7402979025230518, "grad_norm": 0.16285502910614014, "learning_rate": 5.048926091763107e-07, "loss": 0.0063, "step": 103050 }, { "epoch": 1.740466781504374, "grad_norm": 0.3046931326389313, "learning_rate": 5.042474426845978e-07, "loss": 0.007, "step": 103060 }, { "epoch": 1.740635660485696, "grad_norm": 0.33598756790161133, "learning_rate": 5.036026667728517e-07, "loss": 0.0064, "step": 103070 }, { "epoch": 1.740804539467018, "grad_norm": 0.3336758613586426, "learning_rate": 5.029582814970874e-07, "loss": 0.0056, "step": 103080 }, { "epoch": 1.7409734184483399, "grad_norm": 0.5552276968955994, "learning_rate": 5.02314286913288e-07, "loss": 0.0095, "step": 103090 }, { "epoch": 1.7411422974296618, "grad_norm": 0.039370086044073105, "learning_rate": 5.01670683077402e-07, "loss": 0.0054, "step": 103100 }, { "epoch": 1.7413111764109839, "grad_norm": 0.35075050592422485, "learning_rate": 5.01027470045346e-07, "loss": 0.0073, "step": 103110 }, { "epoch": 1.741480055392306, "grad_norm": 0.1172325387597084, "learning_rate": 5.003846478729979e-07, "loss": 0.0095, "step": 103120 }, { "epoch": 1.7416489343736279, "grad_norm": 0.17665275931358337, "learning_rate": 4.997422166162058e-07, "loss": 0.008, "step": 103130 }, { "epoch": 1.7418178133549498, "grad_norm": 0.14424751698970795, "learning_rate": 4.991001763307829e-07, "loss": 0.0043, "step": 103140 }, { "epoch": 1.7419866923362717, "grad_norm": 0.2813739478588104, "learning_rate": 4.984585270725073e-07, "loss": 0.0074, "step": 103150 }, { "epoch": 1.7421555713175938, "grad_norm": 0.18287691473960876, "learning_rate": 4.97817268897125e-07, "loss": 0.0077, "step": 103160 }, { "epoch": 1.742324450298916, "grad_norm": 0.13131996989250183, "learning_rate": 4.971764018603459e-07, "loss": 0.0058, "step": 103170 }, { "epoch": 1.7424933292802378, "grad_norm": 0.36502817273139954, "learning_rate": 4.965359260178476e-07, "loss": 0.0091, "step": 103180 }, { "epoch": 1.7426622082615597, "grad_norm": 0.15227699279785156, "learning_rate": 4.95895841425274e-07, "loss": 0.0046, "step": 103190 }, { "epoch": 1.7428310872428816, "grad_norm": 0.34885284304618835, "learning_rate": 4.952561481382318e-07, "loss": 0.0051, "step": 103200 }, { "epoch": 1.7429999662242037, "grad_norm": 0.25647786259651184, "learning_rate": 4.946168462122969e-07, "loss": 0.0047, "step": 103210 }, { "epoch": 1.7431688452055258, "grad_norm": 0.2794713079929352, "learning_rate": 4.939779357030111e-07, "loss": 0.0103, "step": 103220 }, { "epoch": 1.7433377241868477, "grad_norm": 0.3023342192173004, "learning_rate": 4.933394166658806e-07, "loss": 0.0052, "step": 103230 }, { "epoch": 1.7435066031681696, "grad_norm": 0.11131628602743149, "learning_rate": 4.927012891563793e-07, "loss": 0.0035, "step": 103240 }, { "epoch": 1.7436754821494915, "grad_norm": 0.5395594239234924, "learning_rate": 4.920635532299445e-07, "loss": 0.0074, "step": 103250 }, { "epoch": 1.7438443611308136, "grad_norm": 0.5255224108695984, "learning_rate": 4.914262089419819e-07, "loss": 0.0109, "step": 103260 }, { "epoch": 1.7440132401121358, "grad_norm": 0.35688990354537964, "learning_rate": 4.907892563478628e-07, "loss": 0.0083, "step": 103270 }, { "epoch": 1.7441821190934577, "grad_norm": 0.1766558289527893, "learning_rate": 4.90152695502924e-07, "loss": 0.007, "step": 103280 }, { "epoch": 1.7443509980747796, "grad_norm": 0.22019675374031067, "learning_rate": 4.89516526462468e-07, "loss": 0.0057, "step": 103290 }, { "epoch": 1.7445198770561015, "grad_norm": 0.20426395535469055, "learning_rate": 4.888807492817637e-07, "loss": 0.0051, "step": 103300 }, { "epoch": 1.7446887560374236, "grad_norm": 0.7570374011993408, "learning_rate": 4.882453640160462e-07, "loss": 0.0087, "step": 103310 }, { "epoch": 1.7448576350187457, "grad_norm": 0.43269070982933044, "learning_rate": 4.876103707205171e-07, "loss": 0.0083, "step": 103320 }, { "epoch": 1.7450265140000676, "grad_norm": 0.1314755231142044, "learning_rate": 4.869757694503408e-07, "loss": 0.0043, "step": 103330 }, { "epoch": 1.7451953929813895, "grad_norm": 0.27559465169906616, "learning_rate": 4.863415602606508e-07, "loss": 0.0066, "step": 103340 }, { "epoch": 1.7453642719627114, "grad_norm": 0.2957233190536499, "learning_rate": 4.85707743206546e-07, "loss": 0.0064, "step": 103350 }, { "epoch": 1.7455331509440335, "grad_norm": 0.2595444619655609, "learning_rate": 4.850743183430922e-07, "loss": 0.0064, "step": 103360 }, { "epoch": 1.7457020299253556, "grad_norm": 0.31138959527015686, "learning_rate": 4.844412857253167e-07, "loss": 0.0059, "step": 103370 }, { "epoch": 1.7458709089066775, "grad_norm": 0.13854724168777466, "learning_rate": 4.83808645408218e-07, "loss": 0.0093, "step": 103380 }, { "epoch": 1.7460397878879994, "grad_norm": 0.22419439256191254, "learning_rate": 4.831763974467579e-07, "loss": 0.0038, "step": 103390 }, { "epoch": 1.7462086668693213, "grad_norm": 0.4680260419845581, "learning_rate": 4.825445418958641e-07, "loss": 0.0112, "step": 103400 }, { "epoch": 1.7463775458506434, "grad_norm": 0.26211389899253845, "learning_rate": 4.81913078810431e-07, "loss": 0.0085, "step": 103410 }, { "epoch": 1.7465464248319655, "grad_norm": 0.4273739159107208, "learning_rate": 4.81282008245319e-07, "loss": 0.0063, "step": 103420 }, { "epoch": 1.7467153038132874, "grad_norm": 0.2635899484157562, "learning_rate": 4.806513302553534e-07, "loss": 0.0064, "step": 103430 }, { "epoch": 1.7468841827946093, "grad_norm": 0.19911925494670868, "learning_rate": 4.800210448953269e-07, "loss": 0.0065, "step": 103440 }, { "epoch": 1.7470530617759312, "grad_norm": 0.09733831882476807, "learning_rate": 4.793911522199956e-07, "loss": 0.005, "step": 103450 }, { "epoch": 1.7472219407572533, "grad_norm": 0.11809467524290085, "learning_rate": 4.787616522840838e-07, "loss": 0.0071, "step": 103460 }, { "epoch": 1.7473908197385755, "grad_norm": 0.37394821643829346, "learning_rate": 4.781325451422808e-07, "loss": 0.008, "step": 103470 }, { "epoch": 1.7475596987198974, "grad_norm": 0.3036583364009857, "learning_rate": 4.77503830849243e-07, "loss": 0.009, "step": 103480 }, { "epoch": 1.7477285777012193, "grad_norm": 0.25817930698394775, "learning_rate": 4.768755094595895e-07, "loss": 0.0053, "step": 103490 }, { "epoch": 1.7478974566825412, "grad_norm": 0.19545462727546692, "learning_rate": 4.762475810279088e-07, "loss": 0.0042, "step": 103500 }, { "epoch": 1.7480663356638633, "grad_norm": 0.12358104437589645, "learning_rate": 4.7562004560875286e-07, "loss": 0.007, "step": 103510 }, { "epoch": 1.7482352146451854, "grad_norm": 0.38418105244636536, "learning_rate": 4.749929032566414e-07, "loss": 0.0069, "step": 103520 }, { "epoch": 1.7484040936265073, "grad_norm": 0.12953504920005798, "learning_rate": 4.74366154026058e-07, "loss": 0.0053, "step": 103530 }, { "epoch": 1.7485729726078292, "grad_norm": 0.15611642599105835, "learning_rate": 4.737397979714542e-07, "loss": 0.0052, "step": 103540 }, { "epoch": 1.748741851589151, "grad_norm": 0.17037349939346313, "learning_rate": 4.731138351472453e-07, "loss": 0.0049, "step": 103550 }, { "epoch": 1.7489107305704732, "grad_norm": 0.5288131237030029, "learning_rate": 4.7248826560781504e-07, "loss": 0.0066, "step": 103560 }, { "epoch": 1.7490796095517953, "grad_norm": 0.44046932458877563, "learning_rate": 4.718630894075094e-07, "loss": 0.008, "step": 103570 }, { "epoch": 1.7492484885331172, "grad_norm": 0.42190998792648315, "learning_rate": 4.7123830660064263e-07, "loss": 0.0064, "step": 103580 }, { "epoch": 1.749417367514439, "grad_norm": 0.2169916033744812, "learning_rate": 4.7061391724149473e-07, "loss": 0.0064, "step": 103590 }, { "epoch": 1.749586246495761, "grad_norm": 0.26838740706443787, "learning_rate": 4.699899213843118e-07, "loss": 0.0065, "step": 103600 }, { "epoch": 1.7497551254770831, "grad_norm": 0.2618568539619446, "learning_rate": 4.693663190833031e-07, "loss": 0.0101, "step": 103610 }, { "epoch": 1.7499240044584052, "grad_norm": 0.2676297724246979, "learning_rate": 4.687431103926471e-07, "loss": 0.0045, "step": 103620 }, { "epoch": 1.7500928834397271, "grad_norm": 0.05623999238014221, "learning_rate": 4.6812029536648654e-07, "loss": 0.004, "step": 103630 }, { "epoch": 1.750261762421049, "grad_norm": 0.37823301553726196, "learning_rate": 4.6749787405892976e-07, "loss": 0.0102, "step": 103640 }, { "epoch": 1.750430641402371, "grad_norm": 0.10192004591226578, "learning_rate": 4.6687584652405137e-07, "loss": 0.0056, "step": 103650 }, { "epoch": 1.750599520383693, "grad_norm": 0.1155049130320549, "learning_rate": 4.66254212815892e-07, "loss": 0.0082, "step": 103660 }, { "epoch": 1.7507683993650152, "grad_norm": 0.2950233519077301, "learning_rate": 4.656329729884568e-07, "loss": 0.0071, "step": 103670 }, { "epoch": 1.750937278346337, "grad_norm": 0.2147594839334488, "learning_rate": 4.650121270957181e-07, "loss": 0.0066, "step": 103680 }, { "epoch": 1.751106157327659, "grad_norm": 0.5363995432853699, "learning_rate": 4.6439167519161445e-07, "loss": 0.0095, "step": 103690 }, { "epoch": 1.7512750363089808, "grad_norm": 0.16567622125148773, "learning_rate": 4.637716173300466e-07, "loss": 0.0057, "step": 103700 }, { "epoch": 1.751443915290303, "grad_norm": 0.2255406230688095, "learning_rate": 4.6315195356488596e-07, "loss": 0.0051, "step": 103710 }, { "epoch": 1.751612794271625, "grad_norm": 0.3509506583213806, "learning_rate": 4.625326839499661e-07, "loss": 0.0067, "step": 103720 }, { "epoch": 1.751781673252947, "grad_norm": 0.3459523618221283, "learning_rate": 4.619138085390895e-07, "loss": 0.0057, "step": 103730 }, { "epoch": 1.7519505522342689, "grad_norm": 0.12276961654424667, "learning_rate": 4.612953273860199e-07, "loss": 0.0097, "step": 103740 }, { "epoch": 1.7521194312155908, "grad_norm": 0.130599707365036, "learning_rate": 4.606772405444909e-07, "loss": 0.008, "step": 103750 }, { "epoch": 1.752288310196913, "grad_norm": 0.11767283082008362, "learning_rate": 4.600595480682002e-07, "loss": 0.0041, "step": 103760 }, { "epoch": 1.752457189178235, "grad_norm": 0.31938251852989197, "learning_rate": 4.594422500108114e-07, "loss": 0.0056, "step": 103770 }, { "epoch": 1.752626068159557, "grad_norm": 0.18529103696346283, "learning_rate": 4.5882534642595343e-07, "loss": 0.0054, "step": 103780 }, { "epoch": 1.7527949471408788, "grad_norm": 0.3233061134815216, "learning_rate": 4.5820883736722223e-07, "loss": 0.0077, "step": 103790 }, { "epoch": 1.7529638261222007, "grad_norm": 0.3300824761390686, "learning_rate": 4.5759272288817825e-07, "loss": 0.0068, "step": 103800 }, { "epoch": 1.7531327051035228, "grad_norm": 0.20942659676074982, "learning_rate": 4.569770030423487e-07, "loss": 0.0061, "step": 103810 }, { "epoch": 1.753301584084845, "grad_norm": 0.09193890541791916, "learning_rate": 4.5636167788322416e-07, "loss": 0.0064, "step": 103820 }, { "epoch": 1.7534704630661668, "grad_norm": 0.17703615128993988, "learning_rate": 4.5574674746426293e-07, "loss": 0.0056, "step": 103830 }, { "epoch": 1.7536393420474887, "grad_norm": 0.24643079936504364, "learning_rate": 4.5513221183888957e-07, "loss": 0.006, "step": 103840 }, { "epoch": 1.7538082210288106, "grad_norm": 0.21961286664009094, "learning_rate": 4.5451807106049405e-07, "loss": 0.0054, "step": 103850 }, { "epoch": 1.7539771000101327, "grad_norm": 0.7930402159690857, "learning_rate": 4.539043251824293e-07, "loss": 0.0096, "step": 103860 }, { "epoch": 1.7541459789914549, "grad_norm": 0.20789916813373566, "learning_rate": 4.5329097425801713e-07, "loss": 0.0062, "step": 103870 }, { "epoch": 1.7543148579727768, "grad_norm": 0.2673566937446594, "learning_rate": 4.5267801834054437e-07, "loss": 0.0069, "step": 103880 }, { "epoch": 1.7544837369540986, "grad_norm": 0.26057398319244385, "learning_rate": 4.5206545748326224e-07, "loss": 0.006, "step": 103890 }, { "epoch": 1.7546526159354205, "grad_norm": 0.2380186766386032, "learning_rate": 4.5145329173938936e-07, "loss": 0.0077, "step": 103900 }, { "epoch": 1.7548214949167427, "grad_norm": 0.23085814714431763, "learning_rate": 4.5084152116210865e-07, "loss": 0.0073, "step": 103910 }, { "epoch": 1.7549903738980648, "grad_norm": 0.22748106718063354, "learning_rate": 4.5023014580456936e-07, "loss": 0.0072, "step": 103920 }, { "epoch": 1.7551592528793867, "grad_norm": 0.2778046131134033, "learning_rate": 4.496191657198873e-07, "loss": 0.0081, "step": 103930 }, { "epoch": 1.7553281318607086, "grad_norm": 0.32121381163597107, "learning_rate": 4.490085809611411e-07, "loss": 0.0094, "step": 103940 }, { "epoch": 1.7554970108420305, "grad_norm": 0.2738795876502991, "learning_rate": 4.4839839158137774e-07, "loss": 0.0055, "step": 103950 }, { "epoch": 1.7556658898233526, "grad_norm": 0.2384389489889145, "learning_rate": 4.477885976336094e-07, "loss": 0.0105, "step": 103960 }, { "epoch": 1.7558347688046747, "grad_norm": 0.3352004587650299, "learning_rate": 4.4717919917081353e-07, "loss": 0.0103, "step": 103970 }, { "epoch": 1.7560036477859966, "grad_norm": 0.2764197587966919, "learning_rate": 4.465701962459318e-07, "loss": 0.0054, "step": 103980 }, { "epoch": 1.7561725267673185, "grad_norm": 0.09669539332389832, "learning_rate": 4.45961588911874e-07, "loss": 0.0075, "step": 103990 }, { "epoch": 1.7563414057486404, "grad_norm": 0.12675045430660248, "learning_rate": 4.4535337722151407e-07, "loss": 0.0068, "step": 104000 }, { "epoch": 1.7565102847299625, "grad_norm": 0.1610749065876007, "learning_rate": 4.447455612276919e-07, "loss": 0.0045, "step": 104010 }, { "epoch": 1.7566791637112846, "grad_norm": 0.31117746233940125, "learning_rate": 4.4413814098321407e-07, "loss": 0.0054, "step": 104020 }, { "epoch": 1.7568480426926065, "grad_norm": 0.39868924021720886, "learning_rate": 4.4353111654085015e-07, "loss": 0.0072, "step": 104030 }, { "epoch": 1.7570169216739284, "grad_norm": 0.38270169496536255, "learning_rate": 4.429244879533384e-07, "loss": 0.0038, "step": 104040 }, { "epoch": 1.7571858006552503, "grad_norm": 0.3251521587371826, "learning_rate": 4.4231825527338124e-07, "loss": 0.0064, "step": 104050 }, { "epoch": 1.7573546796365724, "grad_norm": 0.16050417721271515, "learning_rate": 4.4171241855364534e-07, "loss": 0.0063, "step": 104060 }, { "epoch": 1.7575235586178946, "grad_norm": 0.2278808057308197, "learning_rate": 4.411069778467647e-07, "loss": 0.0081, "step": 104070 }, { "epoch": 1.7576924375992165, "grad_norm": 0.2005085051059723, "learning_rate": 4.40501933205339e-07, "loss": 0.0067, "step": 104080 }, { "epoch": 1.7578613165805383, "grad_norm": 0.12836964428424835, "learning_rate": 4.3989728468193327e-07, "loss": 0.0067, "step": 104090 }, { "epoch": 1.7580301955618602, "grad_norm": 0.30048125982284546, "learning_rate": 4.392930323290773e-07, "loss": 0.006, "step": 104100 }, { "epoch": 1.7581990745431824, "grad_norm": 0.6059107780456543, "learning_rate": 4.3868917619926677e-07, "loss": 0.0099, "step": 104110 }, { "epoch": 1.7583679535245045, "grad_norm": 0.27952659130096436, "learning_rate": 4.380857163449642e-07, "loss": 0.0083, "step": 104120 }, { "epoch": 1.7585368325058264, "grad_norm": 0.36759498715400696, "learning_rate": 4.3748265281859547e-07, "loss": 0.0077, "step": 104130 }, { "epoch": 1.7587057114871483, "grad_norm": 0.16798768937587738, "learning_rate": 4.368799856725547e-07, "loss": 0.0093, "step": 104140 }, { "epoch": 1.7588745904684702, "grad_norm": 0.2798076570034027, "learning_rate": 4.36277714959199e-07, "loss": 0.0041, "step": 104150 }, { "epoch": 1.7590434694497923, "grad_norm": 0.19370287656784058, "learning_rate": 4.356758407308526e-07, "loss": 0.0087, "step": 104160 }, { "epoch": 1.7592123484311144, "grad_norm": 0.212999165058136, "learning_rate": 4.350743630398052e-07, "loss": 0.006, "step": 104170 }, { "epoch": 1.7593812274124363, "grad_norm": 0.25716421008110046, "learning_rate": 4.3447328193831185e-07, "loss": 0.0098, "step": 104180 }, { "epoch": 1.7595501063937582, "grad_norm": 0.10749323666095734, "learning_rate": 4.338725974785918e-07, "loss": 0.0051, "step": 104190 }, { "epoch": 1.75971898537508, "grad_norm": 0.19295908510684967, "learning_rate": 4.332723097128316e-07, "loss": 0.0084, "step": 104200 }, { "epoch": 1.7598878643564022, "grad_norm": 0.3314419388771057, "learning_rate": 4.3267241869318297e-07, "loss": 0.0063, "step": 104210 }, { "epoch": 1.7600567433377243, "grad_norm": 0.2609173357486725, "learning_rate": 4.320729244717642e-07, "loss": 0.006, "step": 104220 }, { "epoch": 1.7602256223190462, "grad_norm": 0.2847079038619995, "learning_rate": 4.314738271006552e-07, "loss": 0.0087, "step": 104230 }, { "epoch": 1.7603945013003681, "grad_norm": 0.21935240924358368, "learning_rate": 4.308751266319061e-07, "loss": 0.005, "step": 104240 }, { "epoch": 1.76056338028169, "grad_norm": 0.27320995926856995, "learning_rate": 4.302768231175297e-07, "loss": 0.0052, "step": 104250 }, { "epoch": 1.7607322592630121, "grad_norm": 0.37093251943588257, "learning_rate": 4.29678916609505e-07, "loss": 0.0077, "step": 104260 }, { "epoch": 1.7609011382443343, "grad_norm": 0.14142344892024994, "learning_rate": 4.290814071597771e-07, "loss": 0.0051, "step": 104270 }, { "epoch": 1.7610700172256561, "grad_norm": 0.29658952355384827, "learning_rate": 4.2848429482025664e-07, "loss": 0.0057, "step": 104280 }, { "epoch": 1.761238896206978, "grad_norm": 0.3566480576992035, "learning_rate": 4.2788757964281834e-07, "loss": 0.0058, "step": 104290 }, { "epoch": 1.7614077751883, "grad_norm": 0.3065752685070038, "learning_rate": 4.2729126167930457e-07, "loss": 0.004, "step": 104300 }, { "epoch": 1.761576654169622, "grad_norm": 0.20291893184185028, "learning_rate": 4.2669534098152e-07, "loss": 0.007, "step": 104310 }, { "epoch": 1.7617455331509442, "grad_norm": 0.17613589763641357, "learning_rate": 4.2609981760123876e-07, "loss": 0.0061, "step": 104320 }, { "epoch": 1.761914412132266, "grad_norm": 0.20148615539073944, "learning_rate": 4.255046915901967e-07, "loss": 0.0056, "step": 104330 }, { "epoch": 1.762083291113588, "grad_norm": 0.11123263090848923, "learning_rate": 4.2490996300009914e-07, "loss": 0.0076, "step": 104340 }, { "epoch": 1.7622521700949099, "grad_norm": 0.22105756402015686, "learning_rate": 4.2431563188261247e-07, "loss": 0.0039, "step": 104350 }, { "epoch": 1.762421049076232, "grad_norm": 0.20228154957294464, "learning_rate": 4.23721698289371e-07, "loss": 0.0047, "step": 104360 }, { "epoch": 1.762589928057554, "grad_norm": 0.2427949756383896, "learning_rate": 4.231281622719752e-07, "loss": 0.0053, "step": 104370 }, { "epoch": 1.762758807038876, "grad_norm": 0.3323657810688019, "learning_rate": 4.2253502388198975e-07, "loss": 0.0072, "step": 104380 }, { "epoch": 1.762927686020198, "grad_norm": 0.31048834323883057, "learning_rate": 4.2194228317094464e-07, "loss": 0.0068, "step": 104390 }, { "epoch": 1.7630965650015198, "grad_norm": 0.12536850571632385, "learning_rate": 4.213499401903365e-07, "loss": 0.0061, "step": 104400 }, { "epoch": 1.763265443982842, "grad_norm": 0.3044198453426361, "learning_rate": 4.2075799499162575e-07, "loss": 0.0091, "step": 104410 }, { "epoch": 1.763434322964164, "grad_norm": 0.2154127061367035, "learning_rate": 4.2016644762624016e-07, "loss": 0.0111, "step": 104420 }, { "epoch": 1.763603201945486, "grad_norm": 0.29119905829429626, "learning_rate": 4.1957529814557086e-07, "loss": 0.0083, "step": 104430 }, { "epoch": 1.7637720809268078, "grad_norm": 0.1738525629043579, "learning_rate": 4.1898454660097563e-07, "loss": 0.0064, "step": 104440 }, { "epoch": 1.7639409599081297, "grad_norm": 0.03489326685667038, "learning_rate": 4.1839419304377794e-07, "loss": 0.0056, "step": 104450 }, { "epoch": 1.7641098388894518, "grad_norm": 0.18097032606601715, "learning_rate": 4.178042375252672e-07, "loss": 0.0084, "step": 104460 }, { "epoch": 1.764278717870774, "grad_norm": 0.1577809602022171, "learning_rate": 4.172146800966953e-07, "loss": 0.0069, "step": 104470 }, { "epoch": 1.7644475968520958, "grad_norm": 0.4120098650455475, "learning_rate": 4.1662552080928174e-07, "loss": 0.0093, "step": 104480 }, { "epoch": 1.7646164758334177, "grad_norm": 0.17060896754264832, "learning_rate": 4.1603675971421284e-07, "loss": 0.009, "step": 104490 }, { "epoch": 1.7647853548147396, "grad_norm": 0.1899578869342804, "learning_rate": 4.154483968626372e-07, "loss": 0.0069, "step": 104500 }, { "epoch": 1.7649542337960618, "grad_norm": 0.22411495447158813, "learning_rate": 4.148604323056715e-07, "loss": 0.0095, "step": 104510 }, { "epoch": 1.7651231127773839, "grad_norm": 0.2783060371875763, "learning_rate": 4.1427286609439566e-07, "loss": 0.0049, "step": 104520 }, { "epoch": 1.7652919917587058, "grad_norm": 0.17399895191192627, "learning_rate": 4.136856982798565e-07, "loss": 0.0058, "step": 104530 }, { "epoch": 1.7654608707400277, "grad_norm": 0.17387709021568298, "learning_rate": 4.1309892891306715e-07, "loss": 0.0076, "step": 104540 }, { "epoch": 1.7656297497213496, "grad_norm": 0.13248197734355927, "learning_rate": 4.125125580450018e-07, "loss": 0.0071, "step": 104550 }, { "epoch": 1.7657986287026717, "grad_norm": 0.24212850630283356, "learning_rate": 4.1192658572660414e-07, "loss": 0.0054, "step": 104560 }, { "epoch": 1.7659675076839938, "grad_norm": 0.4842388927936554, "learning_rate": 4.113410120087824e-07, "loss": 0.0087, "step": 104570 }, { "epoch": 1.7661363866653157, "grad_norm": 0.27251681685447693, "learning_rate": 4.1075583694240975e-07, "loss": 0.0057, "step": 104580 }, { "epoch": 1.7663052656466376, "grad_norm": 0.2827499806880951, "learning_rate": 4.1017106057832546e-07, "loss": 0.0092, "step": 104590 }, { "epoch": 1.7664741446279595, "grad_norm": 0.25831684470176697, "learning_rate": 4.0958668296733226e-07, "loss": 0.0097, "step": 104600 }, { "epoch": 1.7666430236092816, "grad_norm": 0.19681158661842346, "learning_rate": 4.0900270416019906e-07, "loss": 0.0035, "step": 104610 }, { "epoch": 1.7668119025906037, "grad_norm": 0.1911473423242569, "learning_rate": 4.084191242076607e-07, "loss": 0.0051, "step": 104620 }, { "epoch": 1.7669807815719256, "grad_norm": 0.3912264108657837, "learning_rate": 4.0783594316041955e-07, "loss": 0.0082, "step": 104630 }, { "epoch": 1.7671496605532475, "grad_norm": 0.12614192068576813, "learning_rate": 4.072531610691377e-07, "loss": 0.0053, "step": 104640 }, { "epoch": 1.7673185395345694, "grad_norm": 0.22670353949069977, "learning_rate": 4.066707779844481e-07, "loss": 0.0055, "step": 104650 }, { "epoch": 1.7674874185158915, "grad_norm": 0.05511470511555672, "learning_rate": 4.0608879395694523e-07, "loss": 0.0042, "step": 104660 }, { "epoch": 1.7676562974972136, "grad_norm": 0.5690640211105347, "learning_rate": 4.0550720903719255e-07, "loss": 0.0092, "step": 104670 }, { "epoch": 1.7678251764785355, "grad_norm": 0.23328587412834167, "learning_rate": 4.049260232757141e-07, "loss": 0.009, "step": 104680 }, { "epoch": 1.7679940554598574, "grad_norm": 0.1758875846862793, "learning_rate": 4.043452367230033e-07, "loss": 0.0029, "step": 104690 }, { "epoch": 1.7681629344411793, "grad_norm": 0.17934611439704895, "learning_rate": 4.037648494295171e-07, "loss": 0.0048, "step": 104700 }, { "epoch": 1.7683318134225015, "grad_norm": 0.36388492584228516, "learning_rate": 4.0318486144567903e-07, "loss": 0.0075, "step": 104710 }, { "epoch": 1.7685006924038236, "grad_norm": 0.09959915280342102, "learning_rate": 4.02605272821876e-07, "loss": 0.0084, "step": 104720 }, { "epoch": 1.7686695713851455, "grad_norm": 0.1685471534729004, "learning_rate": 4.0202608360845996e-07, "loss": 0.0041, "step": 104730 }, { "epoch": 1.7688384503664674, "grad_norm": 0.306044340133667, "learning_rate": 4.014472938557523e-07, "loss": 0.0082, "step": 104740 }, { "epoch": 1.7690073293477893, "grad_norm": 0.18909311294555664, "learning_rate": 4.008689036140362e-07, "loss": 0.0077, "step": 104750 }, { "epoch": 1.7691762083291114, "grad_norm": 0.14643770456314087, "learning_rate": 4.002909129335597e-07, "loss": 0.008, "step": 104760 }, { "epoch": 1.7693450873104335, "grad_norm": 0.2479671835899353, "learning_rate": 3.9971332186453715e-07, "loss": 0.0058, "step": 104770 }, { "epoch": 1.7695139662917554, "grad_norm": 0.49121320247650146, "learning_rate": 3.991361304571489e-07, "loss": 0.0095, "step": 104780 }, { "epoch": 1.7696828452730773, "grad_norm": 0.2077055126428604, "learning_rate": 3.985593387615411e-07, "loss": 0.0068, "step": 104790 }, { "epoch": 1.7698517242543992, "grad_norm": 0.2743431329727173, "learning_rate": 3.9798294682782125e-07, "loss": 0.0078, "step": 104800 }, { "epoch": 1.7700206032357213, "grad_norm": 0.5193440318107605, "learning_rate": 3.974069547060666e-07, "loss": 0.0078, "step": 104810 }, { "epoch": 1.7701894822170434, "grad_norm": 0.11206080764532089, "learning_rate": 3.968313624463177e-07, "loss": 0.0042, "step": 104820 }, { "epoch": 1.7703583611983653, "grad_norm": 0.3489011824131012, "learning_rate": 3.962561700985812e-07, "loss": 0.006, "step": 104830 }, { "epoch": 1.7705272401796872, "grad_norm": 0.3463194966316223, "learning_rate": 3.956813777128271e-07, "loss": 0.0051, "step": 104840 }, { "epoch": 1.770696119161009, "grad_norm": 0.278432697057724, "learning_rate": 3.951069853389916e-07, "loss": 0.008, "step": 104850 }, { "epoch": 1.7708649981423312, "grad_norm": 0.24082978069782257, "learning_rate": 3.9453299302697814e-07, "loss": 0.0049, "step": 104860 }, { "epoch": 1.7710338771236533, "grad_norm": 0.17896200716495514, "learning_rate": 3.939594008266545e-07, "loss": 0.0072, "step": 104870 }, { "epoch": 1.7712027561049752, "grad_norm": 0.29297927021980286, "learning_rate": 3.933862087878504e-07, "loss": 0.005, "step": 104880 }, { "epoch": 1.7713716350862971, "grad_norm": 0.33810678124427795, "learning_rate": 3.928134169603648e-07, "loss": 0.0054, "step": 104890 }, { "epoch": 1.771540514067619, "grad_norm": 0.2170923799276352, "learning_rate": 3.9224102539395956e-07, "loss": 0.0086, "step": 104900 }, { "epoch": 1.7717093930489412, "grad_norm": 0.06985797733068466, "learning_rate": 3.91669034138365e-07, "loss": 0.0043, "step": 104910 }, { "epoch": 1.7718782720302633, "grad_norm": 0.2505366802215576, "learning_rate": 3.9109744324327124e-07, "loss": 0.0071, "step": 104920 }, { "epoch": 1.7720471510115852, "grad_norm": 0.2507230043411255, "learning_rate": 3.905262527583381e-07, "loss": 0.0035, "step": 104930 }, { "epoch": 1.772216029992907, "grad_norm": 0.14566802978515625, "learning_rate": 3.8995546273318916e-07, "loss": 0.0057, "step": 104940 }, { "epoch": 1.772384908974229, "grad_norm": 0.15885454416275024, "learning_rate": 3.8938507321741424e-07, "loss": 0.0049, "step": 104950 }, { "epoch": 1.772553787955551, "grad_norm": 0.22214719653129578, "learning_rate": 3.8881508426056593e-07, "loss": 0.0089, "step": 104960 }, { "epoch": 1.7727226669368732, "grad_norm": 0.19884246587753296, "learning_rate": 3.8824549591216286e-07, "loss": 0.0037, "step": 104970 }, { "epoch": 1.772891545918195, "grad_norm": 0.12038996070623398, "learning_rate": 3.876763082216911e-07, "loss": 0.0077, "step": 104980 }, { "epoch": 1.773060424899517, "grad_norm": 0.19529862701892853, "learning_rate": 3.8710752123860107e-07, "loss": 0.0066, "step": 104990 }, { "epoch": 1.7732293038808389, "grad_norm": 0.47412869334220886, "learning_rate": 3.865391350123049e-07, "loss": 0.0065, "step": 105000 }, { "epoch": 1.773398182862161, "grad_norm": 0.08913196623325348, "learning_rate": 3.8597114959218476e-07, "loss": 0.0036, "step": 105010 }, { "epoch": 1.7735670618434831, "grad_norm": 0.04552512243390083, "learning_rate": 3.854035650275845e-07, "loss": 0.005, "step": 105020 }, { "epoch": 1.773735940824805, "grad_norm": 0.23892998695373535, "learning_rate": 3.8483638136781463e-07, "loss": 0.0075, "step": 105030 }, { "epoch": 1.773904819806127, "grad_norm": 0.4363512694835663, "learning_rate": 3.842695986621525e-07, "loss": 0.0059, "step": 105040 }, { "epoch": 1.7740736987874488, "grad_norm": 0.21407051384449005, "learning_rate": 3.837032169598365e-07, "loss": 0.0067, "step": 105050 }, { "epoch": 1.774242577768771, "grad_norm": 0.25896215438842773, "learning_rate": 3.8313723631007325e-07, "loss": 0.0061, "step": 105060 }, { "epoch": 1.774411456750093, "grad_norm": 0.22337010502815247, "learning_rate": 3.8257165676203413e-07, "loss": 0.0078, "step": 105070 }, { "epoch": 1.774580335731415, "grad_norm": 0.18756814301013947, "learning_rate": 3.8200647836485473e-07, "loss": 0.0064, "step": 105080 }, { "epoch": 1.7747492147127368, "grad_norm": 0.3864099979400635, "learning_rate": 3.814417011676363e-07, "loss": 0.0052, "step": 105090 }, { "epoch": 1.7749180936940587, "grad_norm": 0.16248054802417755, "learning_rate": 3.8087732521944643e-07, "loss": 0.0043, "step": 105100 }, { "epoch": 1.7750869726753808, "grad_norm": 0.17777976393699646, "learning_rate": 3.8031335056931575e-07, "loss": 0.0105, "step": 105110 }, { "epoch": 1.775255851656703, "grad_norm": 0.23074325919151306, "learning_rate": 3.7974977726624176e-07, "loss": 0.0065, "step": 105120 }, { "epoch": 1.7754247306380249, "grad_norm": 0.31419458985328674, "learning_rate": 3.791866053591853e-07, "loss": 0.0077, "step": 105130 }, { "epoch": 1.7755936096193468, "grad_norm": 0.16412727534770966, "learning_rate": 3.7862383489707335e-07, "loss": 0.0053, "step": 105140 }, { "epoch": 1.7757624886006687, "grad_norm": 0.29027795791625977, "learning_rate": 3.78061465928799e-07, "loss": 0.007, "step": 105150 }, { "epoch": 1.7759313675819908, "grad_norm": 0.25406375527381897, "learning_rate": 3.7749949850321986e-07, "loss": 0.0063, "step": 105160 }, { "epoch": 1.776100246563313, "grad_norm": 0.42199575901031494, "learning_rate": 3.7693793266915635e-07, "loss": 0.0114, "step": 105170 }, { "epoch": 1.7762691255446348, "grad_norm": 0.3244689404964447, "learning_rate": 3.763767684753966e-07, "loss": 0.012, "step": 105180 }, { "epoch": 1.7764380045259567, "grad_norm": 0.21873979270458221, "learning_rate": 3.7581600597069446e-07, "loss": 0.0064, "step": 105190 }, { "epoch": 1.7766068835072786, "grad_norm": 0.2020305097103119, "learning_rate": 3.7525564520376645e-07, "loss": 0.0065, "step": 105200 }, { "epoch": 1.7767757624886007, "grad_norm": 0.14329124987125397, "learning_rate": 3.746956862232953e-07, "loss": 0.0047, "step": 105210 }, { "epoch": 1.7769446414699228, "grad_norm": 0.18656428158283234, "learning_rate": 3.741361290779294e-07, "loss": 0.0086, "step": 105220 }, { "epoch": 1.7771135204512447, "grad_norm": 0.3099016547203064, "learning_rate": 3.7357697381628143e-07, "loss": 0.0083, "step": 105230 }, { "epoch": 1.7772823994325666, "grad_norm": 0.21494373679161072, "learning_rate": 3.730182204869304e-07, "loss": 0.0073, "step": 105240 }, { "epoch": 1.7774512784138885, "grad_norm": 0.21157871186733246, "learning_rate": 3.7245986913841793e-07, "loss": 0.0091, "step": 105250 }, { "epoch": 1.7776201573952106, "grad_norm": 0.23447707295417786, "learning_rate": 3.719019198192525e-07, "loss": 0.0044, "step": 105260 }, { "epoch": 1.7777890363765327, "grad_norm": 0.05665084347128868, "learning_rate": 3.713443725779081e-07, "loss": 0.008, "step": 105270 }, { "epoch": 1.7779579153578546, "grad_norm": 0.16802582144737244, "learning_rate": 3.7078722746282325e-07, "loss": 0.0042, "step": 105280 }, { "epoch": 1.7781267943391765, "grad_norm": 0.05955491214990616, "learning_rate": 3.7023048452239974e-07, "loss": 0.0088, "step": 105290 }, { "epoch": 1.7782956733204984, "grad_norm": 0.39298829436302185, "learning_rate": 3.696741438050072e-07, "loss": 0.0086, "step": 105300 }, { "epoch": 1.7784645523018205, "grad_norm": 0.06835168600082397, "learning_rate": 3.6911820535897925e-07, "loss": 0.0038, "step": 105310 }, { "epoch": 1.7786334312831427, "grad_norm": 0.08203655481338501, "learning_rate": 3.6856266923261384e-07, "loss": 0.0051, "step": 105320 }, { "epoch": 1.7788023102644646, "grad_norm": 0.25044897198677063, "learning_rate": 3.680075354741752e-07, "loss": 0.0068, "step": 105330 }, { "epoch": 1.7789711892457865, "grad_norm": 0.34168317914009094, "learning_rate": 3.67452804131892e-07, "loss": 0.0065, "step": 105340 }, { "epoch": 1.7791400682271084, "grad_norm": 0.24961508810520172, "learning_rate": 3.6689847525395727e-07, "loss": 0.0052, "step": 105350 }, { "epoch": 1.7793089472084305, "grad_norm": 0.26108649373054504, "learning_rate": 3.663445488885309e-07, "loss": 0.0085, "step": 105360 }, { "epoch": 1.7794778261897526, "grad_norm": 0.1565903127193451, "learning_rate": 3.657910250837354e-07, "loss": 0.0032, "step": 105370 }, { "epoch": 1.7796467051710745, "grad_norm": 0.14211449027061462, "learning_rate": 3.652379038876602e-07, "loss": 0.0054, "step": 105380 }, { "epoch": 1.7798155841523964, "grad_norm": 0.0966508686542511, "learning_rate": 3.6468518534835905e-07, "loss": 0.0083, "step": 105390 }, { "epoch": 1.7799844631337183, "grad_norm": 0.2549456059932709, "learning_rate": 3.641328695138513e-07, "loss": 0.0054, "step": 105400 }, { "epoch": 1.7801533421150404, "grad_norm": 0.2987155318260193, "learning_rate": 3.6358095643211963e-07, "loss": 0.0077, "step": 105410 }, { "epoch": 1.7803222210963625, "grad_norm": 0.24692583084106445, "learning_rate": 3.63029446151113e-07, "loss": 0.0062, "step": 105420 }, { "epoch": 1.7804911000776844, "grad_norm": 0.3142625093460083, "learning_rate": 3.6247833871874637e-07, "loss": 0.004, "step": 105430 }, { "epoch": 1.7806599790590063, "grad_norm": 0.10608471184968948, "learning_rate": 3.6192763418289755e-07, "loss": 0.0067, "step": 105440 }, { "epoch": 1.7808288580403282, "grad_norm": 0.05127890035510063, "learning_rate": 3.6137733259141104e-07, "loss": 0.0084, "step": 105450 }, { "epoch": 1.7809977370216503, "grad_norm": 0.40034040808677673, "learning_rate": 3.6082743399209585e-07, "loss": 0.0099, "step": 105460 }, { "epoch": 1.7811666160029724, "grad_norm": 0.2520824372768402, "learning_rate": 3.6027793843272487e-07, "loss": 0.0039, "step": 105470 }, { "epoch": 1.7813354949842943, "grad_norm": 0.2638511061668396, "learning_rate": 3.597288459610382e-07, "loss": 0.0064, "step": 105480 }, { "epoch": 1.7815043739656162, "grad_norm": 0.2127639651298523, "learning_rate": 3.5918015662473937e-07, "loss": 0.0065, "step": 105490 }, { "epoch": 1.7816732529469381, "grad_norm": 0.16691343486309052, "learning_rate": 3.5863187047149584e-07, "loss": 0.0095, "step": 105500 }, { "epoch": 1.7818421319282602, "grad_norm": 0.3455384373664856, "learning_rate": 3.580839875489428e-07, "loss": 0.0098, "step": 105510 }, { "epoch": 1.7820110109095821, "grad_norm": 0.392618328332901, "learning_rate": 3.5753650790467777e-07, "loss": 0.0088, "step": 105520 }, { "epoch": 1.7821798898909043, "grad_norm": 0.2171199917793274, "learning_rate": 3.569894315862665e-07, "loss": 0.0056, "step": 105530 }, { "epoch": 1.7823487688722262, "grad_norm": 0.2820388376712799, "learning_rate": 3.564427586412356e-07, "loss": 0.0067, "step": 105540 }, { "epoch": 1.782517647853548, "grad_norm": 0.30038899183273315, "learning_rate": 3.558964891170791e-07, "loss": 0.0062, "step": 105550 }, { "epoch": 1.7826865268348702, "grad_norm": 0.10789172351360321, "learning_rate": 3.553506230612558e-07, "loss": 0.0049, "step": 105560 }, { "epoch": 1.782855405816192, "grad_norm": 0.06213916465640068, "learning_rate": 3.548051605211894e-07, "loss": 0.0044, "step": 105570 }, { "epoch": 1.7830242847975142, "grad_norm": 0.1928074061870575, "learning_rate": 3.5426010154426816e-07, "loss": 0.0086, "step": 105580 }, { "epoch": 1.783193163778836, "grad_norm": 0.2075069546699524, "learning_rate": 3.537154461778458e-07, "loss": 0.006, "step": 105590 }, { "epoch": 1.783362042760158, "grad_norm": 0.1659349948167801, "learning_rate": 3.5317119446923955e-07, "loss": 0.0126, "step": 105600 }, { "epoch": 1.78353092174148, "grad_norm": 0.10963410884141922, "learning_rate": 3.5262734646573483e-07, "loss": 0.0038, "step": 105610 }, { "epoch": 1.783699800722802, "grad_norm": 0.25305089354515076, "learning_rate": 3.520839022145778e-07, "loss": 0.0079, "step": 105620 }, { "epoch": 1.783868679704124, "grad_norm": 0.5944250822067261, "learning_rate": 3.5154086176298175e-07, "loss": 0.0094, "step": 105630 }, { "epoch": 1.784037558685446, "grad_norm": 0.3687666058540344, "learning_rate": 3.509982251581251e-07, "loss": 0.011, "step": 105640 }, { "epoch": 1.784206437666768, "grad_norm": 0.2439691424369812, "learning_rate": 3.5045599244715234e-07, "loss": 0.0097, "step": 105650 }, { "epoch": 1.78437531664809, "grad_norm": 0.27879294753074646, "learning_rate": 3.499141636771686e-07, "loss": 0.0044, "step": 105660 }, { "epoch": 1.784544195629412, "grad_norm": 0.25458207726478577, "learning_rate": 3.493727388952478e-07, "loss": 0.0056, "step": 105670 }, { "epoch": 1.784713074610734, "grad_norm": 0.12658090889453888, "learning_rate": 3.488317181484274e-07, "loss": 0.0047, "step": 105680 }, { "epoch": 1.784881953592056, "grad_norm": 0.12930770218372345, "learning_rate": 3.4829110148371095e-07, "loss": 0.0089, "step": 105690 }, { "epoch": 1.7850508325733778, "grad_norm": 0.1992090344429016, "learning_rate": 3.477508889480646e-07, "loss": 0.0069, "step": 105700 }, { "epoch": 1.7852197115547, "grad_norm": 0.35335424542427063, "learning_rate": 3.4721108058842156e-07, "loss": 0.0076, "step": 105710 }, { "epoch": 1.7853885905360218, "grad_norm": 0.11863595247268677, "learning_rate": 3.4667167645167864e-07, "loss": 0.0061, "step": 105720 }, { "epoch": 1.785557469517344, "grad_norm": 0.3409285545349121, "learning_rate": 3.461326765846984e-07, "loss": 0.0052, "step": 105730 }, { "epoch": 1.7857263484986658, "grad_norm": 0.18138673901557922, "learning_rate": 3.4559408103430726e-07, "loss": 0.0088, "step": 105740 }, { "epoch": 1.7858952274799877, "grad_norm": 0.2828466296195984, "learning_rate": 3.4505588984729723e-07, "loss": 0.0072, "step": 105750 }, { "epoch": 1.7860641064613099, "grad_norm": 0.10936472564935684, "learning_rate": 3.445181030704253e-07, "loss": 0.0049, "step": 105760 }, { "epoch": 1.7862329854426318, "grad_norm": 0.3352378308773041, "learning_rate": 3.4398072075041365e-07, "loss": 0.0067, "step": 105770 }, { "epoch": 1.7864018644239539, "grad_norm": 0.2537005841732025, "learning_rate": 3.4344374293394714e-07, "loss": 0.0058, "step": 105780 }, { "epoch": 1.7865707434052758, "grad_norm": 0.15179795026779175, "learning_rate": 3.429071696676778e-07, "loss": 0.0091, "step": 105790 }, { "epoch": 1.7867396223865977, "grad_norm": 0.23886552453041077, "learning_rate": 3.4237100099822175e-07, "loss": 0.0061, "step": 105800 }, { "epoch": 1.7869085013679198, "grad_norm": 0.08560884743928909, "learning_rate": 3.418352369721606e-07, "loss": 0.0025, "step": 105810 }, { "epoch": 1.7870773803492417, "grad_norm": 0.1417539119720459, "learning_rate": 3.412998776360393e-07, "loss": 0.0087, "step": 105820 }, { "epoch": 1.7872462593305638, "grad_norm": 0.12553198635578156, "learning_rate": 3.4076492303636964e-07, "loss": 0.0052, "step": 105830 }, { "epoch": 1.7874151383118857, "grad_norm": 0.4974260628223419, "learning_rate": 3.402303732196266e-07, "loss": 0.0093, "step": 105840 }, { "epoch": 1.7875840172932076, "grad_norm": 0.1930665522813797, "learning_rate": 3.396962282322508e-07, "loss": 0.0091, "step": 105850 }, { "epoch": 1.7877528962745297, "grad_norm": 0.3463580906391144, "learning_rate": 3.3916248812064677e-07, "loss": 0.0058, "step": 105860 }, { "epoch": 1.7879217752558516, "grad_norm": 0.602850615978241, "learning_rate": 3.3862915293118525e-07, "loss": 0.0102, "step": 105870 }, { "epoch": 1.7880906542371737, "grad_norm": 0.3057127296924591, "learning_rate": 3.380962227102003e-07, "loss": 0.0066, "step": 105880 }, { "epoch": 1.7882595332184956, "grad_norm": 0.2687399387359619, "learning_rate": 3.375636975039931e-07, "loss": 0.008, "step": 105890 }, { "epoch": 1.7884284121998175, "grad_norm": 0.08903210610151291, "learning_rate": 3.370315773588262e-07, "loss": 0.005, "step": 105900 }, { "epoch": 1.7885972911811396, "grad_norm": 0.1574459820985794, "learning_rate": 3.3649986232093036e-07, "loss": 0.0075, "step": 105910 }, { "epoch": 1.7887661701624615, "grad_norm": 0.09301877021789551, "learning_rate": 3.3596855243649863e-07, "loss": 0.0058, "step": 105920 }, { "epoch": 1.7889350491437837, "grad_norm": 0.12568522989749908, "learning_rate": 3.3543764775169074e-07, "loss": 0.0065, "step": 105930 }, { "epoch": 1.7891039281251055, "grad_norm": 0.2304261326789856, "learning_rate": 3.349071483126298e-07, "loss": 0.0057, "step": 105940 }, { "epoch": 1.7892728071064274, "grad_norm": 0.2208288609981537, "learning_rate": 3.3437705416540453e-07, "loss": 0.0046, "step": 105950 }, { "epoch": 1.7894416860877496, "grad_norm": 0.3411603271961212, "learning_rate": 3.3384736535606863e-07, "loss": 0.0053, "step": 105960 }, { "epoch": 1.7896105650690715, "grad_norm": 0.21557782590389252, "learning_rate": 3.3331808193063975e-07, "loss": 0.0086, "step": 105970 }, { "epoch": 1.7897794440503936, "grad_norm": 0.20650050044059753, "learning_rate": 3.3278920393510163e-07, "loss": 0.0056, "step": 105980 }, { "epoch": 1.7899483230317155, "grad_norm": 0.3108930289745331, "learning_rate": 3.3226073141540025e-07, "loss": 0.007, "step": 105990 }, { "epoch": 1.7901172020130374, "grad_norm": 0.2462565302848816, "learning_rate": 3.3173266441744843e-07, "loss": 0.0058, "step": 106000 }, { "epoch": 1.7902860809943595, "grad_norm": 0.08967620134353638, "learning_rate": 3.3120500298712387e-07, "loss": 0.0055, "step": 106010 }, { "epoch": 1.7904549599756814, "grad_norm": 0.1296670287847519, "learning_rate": 3.3067774717026937e-07, "loss": 0.0074, "step": 106020 }, { "epoch": 1.7906238389570035, "grad_norm": 0.23047983646392822, "learning_rate": 3.301508970126899e-07, "loss": 0.008, "step": 106030 }, { "epoch": 1.7907927179383254, "grad_norm": 0.3277391791343689, "learning_rate": 3.296244525601572e-07, "loss": 0.0048, "step": 106040 }, { "epoch": 1.7909615969196473, "grad_norm": 0.5016119480133057, "learning_rate": 3.2909841385840803e-07, "loss": 0.0077, "step": 106050 }, { "epoch": 1.7911304759009694, "grad_norm": 0.4076392352581024, "learning_rate": 3.285727809531436e-07, "loss": 0.0071, "step": 106060 }, { "epoch": 1.7912993548822913, "grad_norm": 0.10017953813076019, "learning_rate": 3.280475538900291e-07, "loss": 0.0073, "step": 106070 }, { "epoch": 1.7914682338636134, "grad_norm": 0.4892667233943939, "learning_rate": 3.275227327146946e-07, "loss": 0.01, "step": 106080 }, { "epoch": 1.7916371128449353, "grad_norm": 0.505050003528595, "learning_rate": 3.269983174727359e-07, "loss": 0.0129, "step": 106090 }, { "epoch": 1.7918059918262572, "grad_norm": 0.2327345907688141, "learning_rate": 3.264743082097138e-07, "loss": 0.0084, "step": 106100 }, { "epoch": 1.7919748708075793, "grad_norm": 0.308721661567688, "learning_rate": 3.259507049711513e-07, "loss": 0.007, "step": 106110 }, { "epoch": 1.7921437497889012, "grad_norm": 0.15421974658966064, "learning_rate": 3.254275078025382e-07, "loss": 0.0064, "step": 106120 }, { "epoch": 1.7923126287702233, "grad_norm": 0.3994333744049072, "learning_rate": 3.2490471674932856e-07, "loss": 0.0081, "step": 106130 }, { "epoch": 1.7924815077515452, "grad_norm": 0.15830975770950317, "learning_rate": 3.243823318569422e-07, "loss": 0.0046, "step": 106140 }, { "epoch": 1.7926503867328671, "grad_norm": 0.2625332474708557, "learning_rate": 3.2386035317076126e-07, "loss": 0.0077, "step": 106150 }, { "epoch": 1.7928192657141893, "grad_norm": 0.2507941424846649, "learning_rate": 3.233387807361343e-07, "loss": 0.0069, "step": 106160 }, { "epoch": 1.7929881446955112, "grad_norm": 0.15561284124851227, "learning_rate": 3.228176145983747e-07, "loss": 0.0065, "step": 106170 }, { "epoch": 1.7931570236768333, "grad_norm": 0.5552847981452942, "learning_rate": 3.2229685480275996e-07, "loss": 0.0083, "step": 106180 }, { "epoch": 1.7933259026581552, "grad_norm": 0.15899115800857544, "learning_rate": 3.217765013945318e-07, "loss": 0.0069, "step": 106190 }, { "epoch": 1.793494781639477, "grad_norm": 0.23959098756313324, "learning_rate": 3.2125655441889834e-07, "loss": 0.0125, "step": 106200 }, { "epoch": 1.7936636606207992, "grad_norm": 0.17049409449100494, "learning_rate": 3.2073701392103087e-07, "loss": 0.0059, "step": 106210 }, { "epoch": 1.793832539602121, "grad_norm": 0.31072452664375305, "learning_rate": 3.202178799460659e-07, "loss": 0.0075, "step": 106220 }, { "epoch": 1.7940014185834432, "grad_norm": 0.3175774812698364, "learning_rate": 3.196991525391041e-07, "loss": 0.0061, "step": 106230 }, { "epoch": 1.794170297564765, "grad_norm": 0.2889232635498047, "learning_rate": 3.191808317452111e-07, "loss": 0.0063, "step": 106240 }, { "epoch": 1.794339176546087, "grad_norm": 0.19691093266010284, "learning_rate": 3.1866291760941746e-07, "loss": 0.0047, "step": 106250 }, { "epoch": 1.794508055527409, "grad_norm": 0.18419791758060455, "learning_rate": 3.181454101767201e-07, "loss": 0.0057, "step": 106260 }, { "epoch": 1.794676934508731, "grad_norm": 0.250108003616333, "learning_rate": 3.1762830949207566e-07, "loss": 0.0056, "step": 106270 }, { "epoch": 1.7948458134900531, "grad_norm": 0.2931815981864929, "learning_rate": 3.171116156004106e-07, "loss": 0.0065, "step": 106280 }, { "epoch": 1.795014692471375, "grad_norm": 0.22668114304542542, "learning_rate": 3.1659532854661335e-07, "loss": 0.0055, "step": 106290 }, { "epoch": 1.795183571452697, "grad_norm": 0.3225419521331787, "learning_rate": 3.160794483755375e-07, "loss": 0.0089, "step": 106300 }, { "epoch": 1.795352450434019, "grad_norm": 0.2543400228023529, "learning_rate": 3.155639751320022e-07, "loss": 0.0063, "step": 106310 }, { "epoch": 1.795521329415341, "grad_norm": 0.4866737723350525, "learning_rate": 3.1504890886079e-07, "loss": 0.0069, "step": 106320 }, { "epoch": 1.795690208396663, "grad_norm": 0.25579631328582764, "learning_rate": 3.145342496066489e-07, "loss": 0.0071, "step": 106330 }, { "epoch": 1.795859087377985, "grad_norm": 0.5009521245956421, "learning_rate": 3.140199974142916e-07, "loss": 0.0054, "step": 106340 }, { "epoch": 1.7960279663593068, "grad_norm": 0.171170175075531, "learning_rate": 3.1350615232839385e-07, "loss": 0.0068, "step": 106350 }, { "epoch": 1.796196845340629, "grad_norm": 0.15211303532123566, "learning_rate": 3.129927143935979e-07, "loss": 0.0058, "step": 106360 }, { "epoch": 1.7963657243219509, "grad_norm": 0.33474287390708923, "learning_rate": 3.124796836545102e-07, "loss": 0.0056, "step": 106370 }, { "epoch": 1.796534603303273, "grad_norm": 0.052418243139982224, "learning_rate": 3.119670601557012e-07, "loss": 0.0068, "step": 106380 }, { "epoch": 1.7967034822845949, "grad_norm": 0.14484389126300812, "learning_rate": 3.11454843941707e-07, "loss": 0.0075, "step": 106390 }, { "epoch": 1.7968723612659168, "grad_norm": 0.5143569707870483, "learning_rate": 3.109430350570269e-07, "loss": 0.0059, "step": 106400 }, { "epoch": 1.7970412402472389, "grad_norm": 0.22657236456871033, "learning_rate": 3.1043163354612603e-07, "loss": 0.0043, "step": 106410 }, { "epoch": 1.7972101192285608, "grad_norm": 0.14532437920570374, "learning_rate": 3.099206394534338e-07, "loss": 0.0071, "step": 106420 }, { "epoch": 1.797378998209883, "grad_norm": 0.22110891342163086, "learning_rate": 3.0941005282334344e-07, "loss": 0.0078, "step": 106430 }, { "epoch": 1.7975478771912048, "grad_norm": 0.29498445987701416, "learning_rate": 3.088998737002141e-07, "loss": 0.0062, "step": 106440 }, { "epoch": 1.7977167561725267, "grad_norm": 0.17113083600997925, "learning_rate": 3.083901021283692e-07, "loss": 0.0048, "step": 106450 }, { "epoch": 1.7978856351538488, "grad_norm": 0.12282812595367432, "learning_rate": 3.0788073815209594e-07, "loss": 0.0041, "step": 106460 }, { "epoch": 1.7980545141351707, "grad_norm": 0.279584676027298, "learning_rate": 3.0737178181564686e-07, "loss": 0.009, "step": 106470 }, { "epoch": 1.7982233931164928, "grad_norm": 0.2042725384235382, "learning_rate": 3.068632331632387e-07, "loss": 0.008, "step": 106480 }, { "epoch": 1.7983922720978147, "grad_norm": 0.2040627896785736, "learning_rate": 3.063550922390524e-07, "loss": 0.0082, "step": 106490 }, { "epoch": 1.7985611510791366, "grad_norm": 0.28918907046318054, "learning_rate": 3.058473590872346e-07, "loss": 0.0076, "step": 106500 }, { "epoch": 1.7987300300604587, "grad_norm": 0.19869358837604523, "learning_rate": 3.0534003375189634e-07, "loss": 0.0094, "step": 106510 }, { "epoch": 1.7988989090417806, "grad_norm": 0.26164957880973816, "learning_rate": 3.0483311627711174e-07, "loss": 0.0061, "step": 106520 }, { "epoch": 1.7990677880231027, "grad_norm": 0.29627788066864014, "learning_rate": 3.0432660670692115e-07, "loss": 0.0043, "step": 106530 }, { "epoch": 1.7992366670044246, "grad_norm": 0.15419772267341614, "learning_rate": 3.038205050853288e-07, "loss": 0.0058, "step": 106540 }, { "epoch": 1.7994055459857465, "grad_norm": 0.1996888667345047, "learning_rate": 3.033148114563039e-07, "loss": 0.0079, "step": 106550 }, { "epoch": 1.7995744249670687, "grad_norm": 0.16030429303646088, "learning_rate": 3.0280952586377865e-07, "loss": 0.0062, "step": 106560 }, { "epoch": 1.7997433039483905, "grad_norm": 0.1344280242919922, "learning_rate": 3.0230464835165285e-07, "loss": 0.0052, "step": 106570 }, { "epoch": 1.7999121829297127, "grad_norm": 0.18764272332191467, "learning_rate": 3.0180017896378757e-07, "loss": 0.0079, "step": 106580 }, { "epoch": 1.8000810619110346, "grad_norm": 0.42088615894317627, "learning_rate": 3.01296117744011e-07, "loss": 0.0055, "step": 106590 }, { "epoch": 1.8002499408923565, "grad_norm": 0.24666889011859894, "learning_rate": 3.007924647361138e-07, "loss": 0.0085, "step": 106600 }, { "epoch": 1.8004188198736784, "grad_norm": 0.27033913135528564, "learning_rate": 3.002892199838525e-07, "loss": 0.009, "step": 106610 }, { "epoch": 1.8005876988550005, "grad_norm": 0.20178724825382233, "learning_rate": 2.997863835309472e-07, "loss": 0.0071, "step": 106620 }, { "epoch": 1.8007565778363226, "grad_norm": 0.252067506313324, "learning_rate": 2.9928395542108466e-07, "loss": 0.0059, "step": 106630 }, { "epoch": 1.8009254568176445, "grad_norm": 0.17382805049419403, "learning_rate": 2.987819356979127e-07, "loss": 0.0054, "step": 106640 }, { "epoch": 1.8010943357989664, "grad_norm": 0.24593670666217804, "learning_rate": 2.982803244050469e-07, "loss": 0.0052, "step": 106650 }, { "epoch": 1.8012632147802883, "grad_norm": 0.1997550129890442, "learning_rate": 2.9777912158606426e-07, "loss": 0.0054, "step": 106660 }, { "epoch": 1.8014320937616104, "grad_norm": 0.1958376169204712, "learning_rate": 2.972783272845109e-07, "loss": 0.0052, "step": 106670 }, { "epoch": 1.8016009727429325, "grad_norm": 0.22707998752593994, "learning_rate": 2.967779415438926e-07, "loss": 0.0095, "step": 106680 }, { "epoch": 1.8017698517242544, "grad_norm": 0.3554292917251587, "learning_rate": 2.9627796440768186e-07, "loss": 0.0071, "step": 106690 }, { "epoch": 1.8019387307055763, "grad_norm": 0.18333177268505096, "learning_rate": 2.957783959193156e-07, "loss": 0.0052, "step": 106700 }, { "epoch": 1.8021076096868982, "grad_norm": 0.2687195837497711, "learning_rate": 2.952792361221962e-07, "loss": 0.0077, "step": 106710 }, { "epoch": 1.8022764886682203, "grad_norm": 0.2917008101940155, "learning_rate": 2.9478048505968746e-07, "loss": 0.0064, "step": 106720 }, { "epoch": 1.8024453676495424, "grad_norm": 0.20173190534114838, "learning_rate": 2.942821427751208e-07, "loss": 0.0049, "step": 106730 }, { "epoch": 1.8026142466308643, "grad_norm": 0.21429851651191711, "learning_rate": 2.937842093117904e-07, "loss": 0.0056, "step": 106740 }, { "epoch": 1.8027831256121862, "grad_norm": 0.32517021894454956, "learning_rate": 2.9328668471295673e-07, "loss": 0.0077, "step": 106750 }, { "epoch": 1.8029520045935081, "grad_norm": 0.2074558138847351, "learning_rate": 2.9278956902184243e-07, "loss": 0.0058, "step": 106760 }, { "epoch": 1.8031208835748302, "grad_norm": 0.1610659956932068, "learning_rate": 2.922928622816357e-07, "loss": 0.008, "step": 106770 }, { "epoch": 1.8032897625561524, "grad_norm": 0.196663960814476, "learning_rate": 2.9179656453548865e-07, "loss": 0.008, "step": 106780 }, { "epoch": 1.8034586415374743, "grad_norm": 0.2991016209125519, "learning_rate": 2.9130067582652076e-07, "loss": 0.0072, "step": 106790 }, { "epoch": 1.8036275205187962, "grad_norm": 0.1844288408756256, "learning_rate": 2.9080519619781145e-07, "loss": 0.0069, "step": 106800 }, { "epoch": 1.803796399500118, "grad_norm": 0.4295073449611664, "learning_rate": 2.903101256924068e-07, "loss": 0.0113, "step": 106810 }, { "epoch": 1.8039652784814402, "grad_norm": 0.3817768394947052, "learning_rate": 2.898154643533185e-07, "loss": 0.0108, "step": 106820 }, { "epoch": 1.8041341574627623, "grad_norm": 0.27573710680007935, "learning_rate": 2.893212122235206e-07, "loss": 0.0067, "step": 106830 }, { "epoch": 1.8043030364440842, "grad_norm": 0.15678627789020538, "learning_rate": 2.8882736934595426e-07, "loss": 0.0072, "step": 106840 }, { "epoch": 1.804471915425406, "grad_norm": 0.13834889233112335, "learning_rate": 2.8833393576352066e-07, "loss": 0.0064, "step": 106850 }, { "epoch": 1.804640794406728, "grad_norm": 0.3385283946990967, "learning_rate": 2.8784091151908946e-07, "loss": 0.0061, "step": 106860 }, { "epoch": 1.80480967338805, "grad_norm": 0.34836652874946594, "learning_rate": 2.87348296655493e-07, "loss": 0.0079, "step": 106870 }, { "epoch": 1.8049785523693722, "grad_norm": 0.2150023728609085, "learning_rate": 2.8685609121552994e-07, "loss": 0.0055, "step": 106880 }, { "epoch": 1.805147431350694, "grad_norm": 0.057452376931905746, "learning_rate": 2.863642952419599e-07, "loss": 0.0075, "step": 106890 }, { "epoch": 1.805316310332016, "grad_norm": 0.19576002657413483, "learning_rate": 2.858729087775086e-07, "loss": 0.0041, "step": 106900 }, { "epoch": 1.805485189313338, "grad_norm": 0.15259425342082977, "learning_rate": 2.8538193186486876e-07, "loss": 0.0043, "step": 106910 }, { "epoch": 1.80565406829466, "grad_norm": 0.19322103261947632, "learning_rate": 2.8489136454669495e-07, "loss": 0.008, "step": 106920 }, { "epoch": 1.8058229472759821, "grad_norm": 0.18165774643421173, "learning_rate": 2.8440120686560437e-07, "loss": 0.0067, "step": 106930 }, { "epoch": 1.805991826257304, "grad_norm": 0.2730461061000824, "learning_rate": 2.8391145886418224e-07, "loss": 0.0085, "step": 106940 }, { "epoch": 1.806160705238626, "grad_norm": 0.226180762052536, "learning_rate": 2.834221205849763e-07, "loss": 0.0058, "step": 106950 }, { "epoch": 1.8063295842199478, "grad_norm": 0.2976968288421631, "learning_rate": 2.829331920705003e-07, "loss": 0.0067, "step": 106960 }, { "epoch": 1.80649846320127, "grad_norm": 0.2527257204055786, "learning_rate": 2.824446733632286e-07, "loss": 0.0051, "step": 106970 }, { "epoch": 1.806667342182592, "grad_norm": 0.08838313817977905, "learning_rate": 2.819565645056044e-07, "loss": 0.0046, "step": 106980 }, { "epoch": 1.806836221163914, "grad_norm": 0.18876618146896362, "learning_rate": 2.8146886554003274e-07, "loss": 0.0068, "step": 106990 }, { "epoch": 1.8070051001452359, "grad_norm": 0.9717719554901123, "learning_rate": 2.809815765088847e-07, "loss": 0.008, "step": 107000 }, { "epoch": 1.8071739791265578, "grad_norm": 0.14931604266166687, "learning_rate": 2.80494697454492e-07, "loss": 0.0065, "step": 107010 }, { "epoch": 1.8073428581078799, "grad_norm": 0.12055400758981705, "learning_rate": 2.8000822841915686e-07, "loss": 0.0042, "step": 107020 }, { "epoch": 1.807511737089202, "grad_norm": 0.2902287244796753, "learning_rate": 2.795221694451405e-07, "loss": 0.0056, "step": 107030 }, { "epoch": 1.8076806160705239, "grad_norm": 0.10177701711654663, "learning_rate": 2.79036520574672e-07, "loss": 0.0072, "step": 107040 }, { "epoch": 1.8078494950518458, "grad_norm": 0.2967875897884369, "learning_rate": 2.78551281849942e-07, "loss": 0.0093, "step": 107050 }, { "epoch": 1.8080183740331677, "grad_norm": 0.4391219913959503, "learning_rate": 2.780664533131072e-07, "loss": 0.01, "step": 107060 }, { "epoch": 1.8081872530144898, "grad_norm": 0.1831834465265274, "learning_rate": 2.77582035006288e-07, "loss": 0.007, "step": 107070 }, { "epoch": 1.808356131995812, "grad_norm": 0.3308049738407135, "learning_rate": 2.770980269715712e-07, "loss": 0.0085, "step": 107080 }, { "epoch": 1.8085250109771338, "grad_norm": 0.5594005584716797, "learning_rate": 2.7661442925100367e-07, "loss": 0.0085, "step": 107090 }, { "epoch": 1.8086938899584557, "grad_norm": 0.1955827921628952, "learning_rate": 2.761312418866008e-07, "loss": 0.0079, "step": 107100 }, { "epoch": 1.8088627689397776, "grad_norm": 0.13134834170341492, "learning_rate": 2.756484649203406e-07, "loss": 0.0073, "step": 107110 }, { "epoch": 1.8090316479210997, "grad_norm": 0.19184383749961853, "learning_rate": 2.7516609839416565e-07, "loss": 0.0066, "step": 107120 }, { "epoch": 1.8092005269024218, "grad_norm": 0.29594725370407104, "learning_rate": 2.746841423499813e-07, "loss": 0.0068, "step": 107130 }, { "epoch": 1.8093694058837437, "grad_norm": 0.2543310523033142, "learning_rate": 2.742025968296602e-07, "loss": 0.0058, "step": 107140 }, { "epoch": 1.8095382848650656, "grad_norm": 0.3136759400367737, "learning_rate": 2.7372146187503776e-07, "loss": 0.0036, "step": 107150 }, { "epoch": 1.8097071638463875, "grad_norm": 0.09913485497236252, "learning_rate": 2.7324073752791445e-07, "loss": 0.008, "step": 107160 }, { "epoch": 1.8098760428277096, "grad_norm": 0.1496598720550537, "learning_rate": 2.7276042383005297e-07, "loss": 0.0035, "step": 107170 }, { "epoch": 1.8100449218090318, "grad_norm": 0.2731480896472931, "learning_rate": 2.722805208231821e-07, "loss": 0.0065, "step": 107180 }, { "epoch": 1.8102138007903537, "grad_norm": 0.052527982741594315, "learning_rate": 2.718010285489947e-07, "loss": 0.0062, "step": 107190 }, { "epoch": 1.8103826797716756, "grad_norm": 0.17446263134479523, "learning_rate": 2.71321947049149e-07, "loss": 0.0047, "step": 107200 }, { "epoch": 1.8105515587529974, "grad_norm": 0.18655350804328918, "learning_rate": 2.708432763652646e-07, "loss": 0.0087, "step": 107210 }, { "epoch": 1.8107204377343196, "grad_norm": 0.4533626437187195, "learning_rate": 2.7036501653892824e-07, "loss": 0.0061, "step": 107220 }, { "epoch": 1.8108893167156417, "grad_norm": 0.12336044758558273, "learning_rate": 2.6988716761168986e-07, "loss": 0.0036, "step": 107230 }, { "epoch": 1.8110581956969636, "grad_norm": 0.5154713988304138, "learning_rate": 2.6940972962506415e-07, "loss": 0.0052, "step": 107240 }, { "epoch": 1.8112270746782855, "grad_norm": 0.2725221514701843, "learning_rate": 2.6893270262052905e-07, "loss": 0.0061, "step": 107250 }, { "epoch": 1.8113959536596074, "grad_norm": 0.14604789018630981, "learning_rate": 2.6845608663952803e-07, "loss": 0.0048, "step": 107260 }, { "epoch": 1.8115648326409295, "grad_norm": 0.596747875213623, "learning_rate": 2.67979881723468e-07, "loss": 0.0103, "step": 107270 }, { "epoch": 1.8117337116222516, "grad_norm": 0.5228878855705261, "learning_rate": 2.6750408791372084e-07, "loss": 0.0057, "step": 107280 }, { "epoch": 1.8119025906035735, "grad_norm": 0.35370633006095886, "learning_rate": 2.670287052516224e-07, "loss": 0.0065, "step": 107290 }, { "epoch": 1.8120714695848954, "grad_norm": 0.3136005401611328, "learning_rate": 2.6655373377847236e-07, "loss": 0.0085, "step": 107300 }, { "epoch": 1.8122403485662173, "grad_norm": 0.42194390296936035, "learning_rate": 2.660791735355345e-07, "loss": 0.0074, "step": 107310 }, { "epoch": 1.8124092275475394, "grad_norm": 0.11910005658864975, "learning_rate": 2.6560502456403845e-07, "loss": 0.0052, "step": 107320 }, { "epoch": 1.8125781065288615, "grad_norm": 0.34545469284057617, "learning_rate": 2.651312869051781e-07, "loss": 0.0064, "step": 107330 }, { "epoch": 1.8127469855101834, "grad_norm": 0.11114717274904251, "learning_rate": 2.646579606001082e-07, "loss": 0.0055, "step": 107340 }, { "epoch": 1.8129158644915053, "grad_norm": 0.22199773788452148, "learning_rate": 2.641850456899514e-07, "loss": 0.0063, "step": 107350 }, { "epoch": 1.8130847434728272, "grad_norm": 0.17148509621620178, "learning_rate": 2.637125422157932e-07, "loss": 0.0049, "step": 107360 }, { "epoch": 1.8132536224541493, "grad_norm": 0.3076220750808716, "learning_rate": 2.632404502186836e-07, "loss": 0.0047, "step": 107370 }, { "epoch": 1.8134225014354715, "grad_norm": 0.04321264103055, "learning_rate": 2.6276876973963695e-07, "loss": 0.0039, "step": 107380 }, { "epoch": 1.8135913804167934, "grad_norm": 0.6535623073577881, "learning_rate": 2.622975008196316e-07, "loss": 0.0099, "step": 107390 }, { "epoch": 1.8137602593981152, "grad_norm": 0.22837163507938385, "learning_rate": 2.6182664349960974e-07, "loss": 0.0072, "step": 107400 }, { "epoch": 1.8139291383794371, "grad_norm": 0.08610763400793076, "learning_rate": 2.613561978204804e-07, "loss": 0.0054, "step": 107410 }, { "epoch": 1.8140980173607593, "grad_norm": 0.18319258093833923, "learning_rate": 2.6088616382311195e-07, "loss": 0.0063, "step": 107420 }, { "epoch": 1.8142668963420814, "grad_norm": 0.24266186356544495, "learning_rate": 2.604165415483412e-07, "loss": 0.009, "step": 107430 }, { "epoch": 1.8144357753234033, "grad_norm": 0.27655598521232605, "learning_rate": 2.599473310369671e-07, "loss": 0.0061, "step": 107440 }, { "epoch": 1.8146046543047252, "grad_norm": 0.15016931295394897, "learning_rate": 2.5947853232975497e-07, "loss": 0.0089, "step": 107450 }, { "epoch": 1.814773533286047, "grad_norm": 0.23720720410346985, "learning_rate": 2.59010145467431e-07, "loss": 0.0059, "step": 107460 }, { "epoch": 1.8149424122673692, "grad_norm": 0.20164114236831665, "learning_rate": 2.5854217049068875e-07, "loss": 0.0068, "step": 107470 }, { "epoch": 1.8151112912486913, "grad_norm": 0.3445499837398529, "learning_rate": 2.580746074401841e-07, "loss": 0.0065, "step": 107480 }, { "epoch": 1.8152801702300132, "grad_norm": 0.20983904600143433, "learning_rate": 2.576074563565384e-07, "loss": 0.0074, "step": 107490 }, { "epoch": 1.815449049211335, "grad_norm": 0.22667331993579865, "learning_rate": 2.5714071728033587e-07, "loss": 0.0064, "step": 107500 }, { "epoch": 1.815617928192657, "grad_norm": 0.3570428192615509, "learning_rate": 2.5667439025212626e-07, "loss": 0.0067, "step": 107510 }, { "epoch": 1.8157868071739791, "grad_norm": 0.25888732075691223, "learning_rate": 2.5620847531242277e-07, "loss": 0.0065, "step": 107520 }, { "epoch": 1.8159556861553012, "grad_norm": 0.4879309833049774, "learning_rate": 2.5574297250170356e-07, "loss": 0.0099, "step": 107530 }, { "epoch": 1.8161245651366231, "grad_norm": 0.09233301877975464, "learning_rate": 2.552778818604085e-07, "loss": 0.0044, "step": 107540 }, { "epoch": 1.816293444117945, "grad_norm": 0.19060681760311127, "learning_rate": 2.5481320342894523e-07, "loss": 0.0059, "step": 107550 }, { "epoch": 1.816462323099267, "grad_norm": 0.21863721311092377, "learning_rate": 2.543489372476832e-07, "loss": 0.0068, "step": 107560 }, { "epoch": 1.816631202080589, "grad_norm": 0.26266953349113464, "learning_rate": 2.5388508335695785e-07, "loss": 0.0061, "step": 107570 }, { "epoch": 1.8168000810619112, "grad_norm": 0.4615824222564697, "learning_rate": 2.534216417970653e-07, "loss": 0.0058, "step": 107580 }, { "epoch": 1.816968960043233, "grad_norm": 0.176026850938797, "learning_rate": 2.5295861260827003e-07, "loss": 0.0058, "step": 107590 }, { "epoch": 1.817137839024555, "grad_norm": 0.7033838629722595, "learning_rate": 2.5249599583079876e-07, "loss": 0.0064, "step": 107600 }, { "epoch": 1.8173067180058768, "grad_norm": 0.3143448233604431, "learning_rate": 2.5203379150484207e-07, "loss": 0.0092, "step": 107610 }, { "epoch": 1.817475596987199, "grad_norm": 0.1887858808040619, "learning_rate": 2.5157199967055514e-07, "loss": 0.0085, "step": 107620 }, { "epoch": 1.817644475968521, "grad_norm": 0.27804046869277954, "learning_rate": 2.5111062036805747e-07, "loss": 0.0061, "step": 107630 }, { "epoch": 1.817813354949843, "grad_norm": 0.3959372043609619, "learning_rate": 2.506496536374325e-07, "loss": 0.0051, "step": 107640 }, { "epoch": 1.8179822339311649, "grad_norm": 0.33433765172958374, "learning_rate": 2.501890995187284e-07, "loss": 0.0037, "step": 107650 }, { "epoch": 1.8181511129124868, "grad_norm": 0.4721083343029022, "learning_rate": 2.497289580519563e-07, "loss": 0.0096, "step": 107660 }, { "epoch": 1.8183199918938089, "grad_norm": 0.42669758200645447, "learning_rate": 2.492692292770921e-07, "loss": 0.004, "step": 107670 }, { "epoch": 1.818488870875131, "grad_norm": 0.244266077876091, "learning_rate": 2.4880991323407657e-07, "loss": 0.0065, "step": 107680 }, { "epoch": 1.818657749856453, "grad_norm": 0.36975082755088806, "learning_rate": 2.483510099628139e-07, "loss": 0.0081, "step": 107690 }, { "epoch": 1.8188266288377748, "grad_norm": 0.05969589948654175, "learning_rate": 2.4789251950317114e-07, "loss": 0.0031, "step": 107700 }, { "epoch": 1.8189955078190967, "grad_norm": 0.22282390296459198, "learning_rate": 2.4743444189498253e-07, "loss": 0.006, "step": 107710 }, { "epoch": 1.8191643868004188, "grad_norm": 0.3696771264076233, "learning_rate": 2.469767771780435e-07, "loss": 0.007, "step": 107720 }, { "epoch": 1.819333265781741, "grad_norm": 0.1898893117904663, "learning_rate": 2.46519525392116e-07, "loss": 0.007, "step": 107730 }, { "epoch": 1.8195021447630628, "grad_norm": 0.31918904185295105, "learning_rate": 2.460626865769239e-07, "loss": 0.0052, "step": 107740 }, { "epoch": 1.8196710237443847, "grad_norm": 0.2966409921646118, "learning_rate": 2.4560626077215653e-07, "loss": 0.0078, "step": 107750 }, { "epoch": 1.8198399027257066, "grad_norm": 0.371632844209671, "learning_rate": 2.4515024801746724e-07, "loss": 0.0084, "step": 107760 }, { "epoch": 1.8200087817070287, "grad_norm": 0.16013279557228088, "learning_rate": 2.446946483524737e-07, "loss": 0.0081, "step": 107770 }, { "epoch": 1.8201776606883509, "grad_norm": 0.25146234035491943, "learning_rate": 2.4423946181675705e-07, "loss": 0.0054, "step": 107780 }, { "epoch": 1.8203465396696727, "grad_norm": 0.43745818734169006, "learning_rate": 2.4378468844986236e-07, "loss": 0.0079, "step": 107790 }, { "epoch": 1.8205154186509946, "grad_norm": 0.20664887130260468, "learning_rate": 2.4333032829129857e-07, "loss": 0.0083, "step": 107800 }, { "epoch": 1.8206842976323165, "grad_norm": 0.9678419828414917, "learning_rate": 2.4287638138054127e-07, "loss": 0.0099, "step": 107810 }, { "epoch": 1.8208531766136387, "grad_norm": 0.6937575936317444, "learning_rate": 2.424228477570273e-07, "loss": 0.008, "step": 107820 }, { "epoch": 1.8210220555949608, "grad_norm": 0.3108690083026886, "learning_rate": 2.419697274601579e-07, "loss": 0.0088, "step": 107830 }, { "epoch": 1.8211909345762827, "grad_norm": 0.09267138689756393, "learning_rate": 2.415170205292994e-07, "loss": 0.0073, "step": 107840 }, { "epoch": 1.8213598135576046, "grad_norm": 0.2869453728199005, "learning_rate": 2.410647270037825e-07, "loss": 0.0045, "step": 107850 }, { "epoch": 1.8215286925389265, "grad_norm": 0.4639340341091156, "learning_rate": 2.406128469229002e-07, "loss": 0.0068, "step": 107860 }, { "epoch": 1.8216975715202486, "grad_norm": 0.2164517641067505, "learning_rate": 2.401613803259123e-07, "loss": 0.0099, "step": 107870 }, { "epoch": 1.8218664505015707, "grad_norm": 0.10034526884555817, "learning_rate": 2.3971032725204015e-07, "loss": 0.0073, "step": 107880 }, { "epoch": 1.8220353294828926, "grad_norm": 0.19471319019794464, "learning_rate": 2.3925968774047013e-07, "loss": 0.0065, "step": 107890 }, { "epoch": 1.8222042084642145, "grad_norm": 0.19792230427265167, "learning_rate": 2.3880946183035324e-07, "loss": 0.0057, "step": 107900 }, { "epoch": 1.8223730874455364, "grad_norm": 0.15898744761943817, "learning_rate": 2.3835964956080315e-07, "loss": 0.0067, "step": 107910 }, { "epoch": 1.8225419664268585, "grad_norm": 0.3466832637786865, "learning_rate": 2.3791025097089915e-07, "loss": 0.0048, "step": 107920 }, { "epoch": 1.8227108454081806, "grad_norm": 0.26420286297798157, "learning_rate": 2.3746126609968333e-07, "loss": 0.0048, "step": 107930 }, { "epoch": 1.8228797243895025, "grad_norm": 0.29309946298599243, "learning_rate": 2.370126949861634e-07, "loss": 0.0089, "step": 107940 }, { "epoch": 1.8230486033708244, "grad_norm": 0.24940812587738037, "learning_rate": 2.3656453766930875e-07, "loss": 0.0057, "step": 107950 }, { "epoch": 1.8232174823521463, "grad_norm": 0.43004703521728516, "learning_rate": 2.3611679418805488e-07, "loss": 0.0073, "step": 107960 }, { "epoch": 1.8233863613334684, "grad_norm": 0.2781832814216614, "learning_rate": 2.356694645813007e-07, "loss": 0.0081, "step": 107970 }, { "epoch": 1.8235552403147905, "grad_norm": 0.3466466963291168, "learning_rate": 2.3522254888790953e-07, "loss": 0.0071, "step": 107980 }, { "epoch": 1.8237241192961124, "grad_norm": 0.18807652592658997, "learning_rate": 2.3477604714670698e-07, "loss": 0.0085, "step": 107990 }, { "epoch": 1.8238929982774343, "grad_norm": 0.13585786521434784, "learning_rate": 2.343299593964854e-07, "loss": 0.0075, "step": 108000 }, { "epoch": 1.8240618772587562, "grad_norm": 0.3987380266189575, "learning_rate": 2.338842856759993e-07, "loss": 0.0055, "step": 108010 }, { "epoch": 1.8242307562400784, "grad_norm": 0.20437681674957275, "learning_rate": 2.3343902602396774e-07, "loss": 0.0065, "step": 108020 }, { "epoch": 1.8243996352214005, "grad_norm": 0.14056724309921265, "learning_rate": 2.3299418047907362e-07, "loss": 0.0064, "step": 108030 }, { "epoch": 1.8245685142027224, "grad_norm": 0.25559794902801514, "learning_rate": 2.3254974907996387e-07, "loss": 0.0068, "step": 108040 }, { "epoch": 1.8247373931840443, "grad_norm": 0.1985708773136139, "learning_rate": 2.3210573186525033e-07, "loss": 0.0057, "step": 108050 }, { "epoch": 1.8249062721653662, "grad_norm": 0.4884658455848694, "learning_rate": 2.316621288735077e-07, "loss": 0.0079, "step": 108060 }, { "epoch": 1.8250751511466883, "grad_norm": 0.31980034708976746, "learning_rate": 2.3121894014327517e-07, "loss": 0.0051, "step": 108070 }, { "epoch": 1.8252440301280104, "grad_norm": 0.2959805428981781, "learning_rate": 2.3077616571305528e-07, "loss": 0.0067, "step": 108080 }, { "epoch": 1.8254129091093323, "grad_norm": 0.019986361265182495, "learning_rate": 2.3033380562131612e-07, "loss": 0.0034, "step": 108090 }, { "epoch": 1.8255817880906542, "grad_norm": 0.07217063009738922, "learning_rate": 2.298918599064881e-07, "loss": 0.0043, "step": 108100 }, { "epoch": 1.825750667071976, "grad_norm": 0.192657470703125, "learning_rate": 2.2945032860696658e-07, "loss": 0.0039, "step": 108110 }, { "epoch": 1.8259195460532982, "grad_norm": 0.2590045630931854, "learning_rate": 2.2900921176111147e-07, "loss": 0.0057, "step": 108120 }, { "epoch": 1.8260884250346203, "grad_norm": 0.1773860603570938, "learning_rate": 2.2856850940724484e-07, "loss": 0.0068, "step": 108130 }, { "epoch": 1.8262573040159422, "grad_norm": 0.17879177629947662, "learning_rate": 2.2812822158365498e-07, "loss": 0.0057, "step": 108140 }, { "epoch": 1.8264261829972641, "grad_norm": 0.2987409830093384, "learning_rate": 2.2768834832859189e-07, "loss": 0.0091, "step": 108150 }, { "epoch": 1.826595061978586, "grad_norm": 0.242484912276268, "learning_rate": 2.2724888968027103e-07, "loss": 0.0057, "step": 108160 }, { "epoch": 1.8267639409599081, "grad_norm": 0.3859591484069824, "learning_rate": 2.2680984567687191e-07, "loss": 0.0117, "step": 108170 }, { "epoch": 1.8269328199412302, "grad_norm": 0.15201051533222198, "learning_rate": 2.263712163565368e-07, "loss": 0.0081, "step": 108180 }, { "epoch": 1.8271016989225521, "grad_norm": 0.09491164982318878, "learning_rate": 2.2593300175737466e-07, "loss": 0.0082, "step": 108190 }, { "epoch": 1.827270577903874, "grad_norm": 0.3155602514743805, "learning_rate": 2.254952019174539e-07, "loss": 0.0088, "step": 108200 }, { "epoch": 1.827439456885196, "grad_norm": 0.19550810754299164, "learning_rate": 2.250578168748102e-07, "loss": 0.0072, "step": 108210 }, { "epoch": 1.827608335866518, "grad_norm": 0.27895280718803406, "learning_rate": 2.2462084666744376e-07, "loss": 0.0068, "step": 108220 }, { "epoch": 1.8277772148478402, "grad_norm": 0.21154767274856567, "learning_rate": 2.2418429133331643e-07, "loss": 0.0042, "step": 108230 }, { "epoch": 1.827946093829162, "grad_norm": 0.3080839514732361, "learning_rate": 2.2374815091035505e-07, "loss": 0.0054, "step": 108240 }, { "epoch": 1.828114972810484, "grad_norm": 0.26385775208473206, "learning_rate": 2.2331242543645105e-07, "loss": 0.0064, "step": 108250 }, { "epoch": 1.8282838517918059, "grad_norm": 0.1646813303232193, "learning_rate": 2.2287711494945906e-07, "loss": 0.0051, "step": 108260 }, { "epoch": 1.828452730773128, "grad_norm": 0.2100343257188797, "learning_rate": 2.224422194871978e-07, "loss": 0.0093, "step": 108270 }, { "epoch": 1.82862160975445, "grad_norm": 0.04136165603995323, "learning_rate": 2.220077390874492e-07, "loss": 0.0054, "step": 108280 }, { "epoch": 1.828790488735772, "grad_norm": 0.22518153488636017, "learning_rate": 2.2157367378796034e-07, "loss": 0.0074, "step": 108290 }, { "epoch": 1.8289593677170939, "grad_norm": 0.13521313667297363, "learning_rate": 2.211400236264416e-07, "loss": 0.0046, "step": 108300 }, { "epoch": 1.8291282466984158, "grad_norm": 0.28369277715682983, "learning_rate": 2.207067886405684e-07, "loss": 0.0059, "step": 108310 }, { "epoch": 1.829297125679738, "grad_norm": 0.22046740353107452, "learning_rate": 2.2027396886797782e-07, "loss": 0.0038, "step": 108320 }, { "epoch": 1.82946600466106, "grad_norm": 0.2669176161289215, "learning_rate": 2.1984156434627257e-07, "loss": 0.004, "step": 108330 }, { "epoch": 1.829634883642382, "grad_norm": 0.45372334122657776, "learning_rate": 2.194095751130193e-07, "loss": 0.0077, "step": 108340 }, { "epoch": 1.8298037626237038, "grad_norm": 0.1418810933828354, "learning_rate": 2.1897800120574742e-07, "loss": 0.0039, "step": 108350 }, { "epoch": 1.8299726416050257, "grad_norm": 0.19384971261024475, "learning_rate": 2.1854684266195192e-07, "loss": 0.0071, "step": 108360 }, { "epoch": 1.8301415205863478, "grad_norm": 0.20276717841625214, "learning_rate": 2.1811609951909007e-07, "loss": 0.006, "step": 108370 }, { "epoch": 1.83031039956767, "grad_norm": 0.21066118776798248, "learning_rate": 2.1768577181458417e-07, "loss": 0.0078, "step": 108380 }, { "epoch": 1.8304792785489918, "grad_norm": 0.15478600561618805, "learning_rate": 2.1725585958582097e-07, "loss": 0.0065, "step": 108390 }, { "epoch": 1.8306481575303137, "grad_norm": 0.2181486189365387, "learning_rate": 2.168263628701478e-07, "loss": 0.0058, "step": 108400 }, { "epoch": 1.8308170365116356, "grad_norm": 0.4353921711444855, "learning_rate": 2.163972817048804e-07, "loss": 0.0083, "step": 108410 }, { "epoch": 1.8309859154929577, "grad_norm": 0.1376005858182907, "learning_rate": 2.1596861612729503e-07, "loss": 0.0047, "step": 108420 }, { "epoch": 1.8311547944742799, "grad_norm": 0.25841131806373596, "learning_rate": 2.155403661746347e-07, "loss": 0.0107, "step": 108430 }, { "epoch": 1.8313236734556018, "grad_norm": 0.26653823256492615, "learning_rate": 2.151125318841024e-07, "loss": 0.0063, "step": 108440 }, { "epoch": 1.8314925524369237, "grad_norm": 0.16493427753448486, "learning_rate": 2.14685113292869e-07, "loss": 0.0073, "step": 108450 }, { "epoch": 1.8316614314182456, "grad_norm": 0.14809738099575043, "learning_rate": 2.1425811043806698e-07, "loss": 0.009, "step": 108460 }, { "epoch": 1.8318303103995677, "grad_norm": 0.3850421905517578, "learning_rate": 2.138315233567939e-07, "loss": 0.0064, "step": 108470 }, { "epoch": 1.8319991893808898, "grad_norm": 0.4718252122402191, "learning_rate": 2.1340535208610958e-07, "loss": 0.0083, "step": 108480 }, { "epoch": 1.8321680683622117, "grad_norm": 0.23565110564231873, "learning_rate": 2.129795966630399e-07, "loss": 0.006, "step": 108490 }, { "epoch": 1.8323369473435336, "grad_norm": 0.3905116021633148, "learning_rate": 2.1255425712457202e-07, "loss": 0.0095, "step": 108500 }, { "epoch": 1.8325058263248555, "grad_norm": 0.13903644680976868, "learning_rate": 2.1212933350766075e-07, "loss": 0.0057, "step": 108510 }, { "epoch": 1.8326747053061776, "grad_norm": 0.7066616415977478, "learning_rate": 2.117048258492199e-07, "loss": 0.0066, "step": 108520 }, { "epoch": 1.8328435842874997, "grad_norm": 0.272892564535141, "learning_rate": 2.112807341861306e-07, "loss": 0.007, "step": 108530 }, { "epoch": 1.8330124632688216, "grad_norm": 0.3329223692417145, "learning_rate": 2.1085705855523718e-07, "loss": 0.0074, "step": 108540 }, { "epoch": 1.8331813422501435, "grad_norm": 0.26686331629753113, "learning_rate": 2.1043379899334748e-07, "loss": 0.0068, "step": 108550 }, { "epoch": 1.8333502212314654, "grad_norm": 0.23928505182266235, "learning_rate": 2.1001095553723261e-07, "loss": 0.0063, "step": 108560 }, { "epoch": 1.8335191002127875, "grad_norm": 0.2464580535888672, "learning_rate": 2.095885282236293e-07, "loss": 0.0067, "step": 108570 }, { "epoch": 1.8336879791941096, "grad_norm": 0.13999031484127045, "learning_rate": 2.091665170892354e-07, "loss": 0.0117, "step": 108580 }, { "epoch": 1.8338568581754315, "grad_norm": 0.21696166694164276, "learning_rate": 2.0874492217071607e-07, "loss": 0.0049, "step": 108590 }, { "epoch": 1.8340257371567534, "grad_norm": 0.4846738278865814, "learning_rate": 2.0832374350469699e-07, "loss": 0.0083, "step": 108600 }, { "epoch": 1.8341946161380753, "grad_norm": 0.23454730212688446, "learning_rate": 2.079029811277694e-07, "loss": 0.0042, "step": 108610 }, { "epoch": 1.8343634951193974, "grad_norm": 0.16424860060214996, "learning_rate": 2.0748263507648914e-07, "loss": 0.0028, "step": 108620 }, { "epoch": 1.8345323741007196, "grad_norm": 0.2709076404571533, "learning_rate": 2.070627053873736e-07, "loss": 0.0076, "step": 108630 }, { "epoch": 1.8347012530820415, "grad_norm": 0.2016683965921402, "learning_rate": 2.066431920969064e-07, "loss": 0.0076, "step": 108640 }, { "epoch": 1.8348701320633634, "grad_norm": 0.4312201738357544, "learning_rate": 2.062240952415323e-07, "loss": 0.0074, "step": 108650 }, { "epoch": 1.8350390110446853, "grad_norm": 0.2684875428676605, "learning_rate": 2.058054148576627e-07, "loss": 0.0065, "step": 108660 }, { "epoch": 1.8352078900260074, "grad_norm": 0.3587173819541931, "learning_rate": 2.053871509816707e-07, "loss": 0.0064, "step": 108670 }, { "epoch": 1.8353767690073295, "grad_norm": 0.268390029668808, "learning_rate": 2.0496930364989564e-07, "loss": 0.0058, "step": 108680 }, { "epoch": 1.8355456479886514, "grad_norm": 0.21991120278835297, "learning_rate": 2.0455187289863677e-07, "loss": 0.0092, "step": 108690 }, { "epoch": 1.8357145269699733, "grad_norm": 0.23918350040912628, "learning_rate": 2.041348587641606e-07, "loss": 0.0079, "step": 108700 }, { "epoch": 1.8358834059512952, "grad_norm": 0.19635771214962006, "learning_rate": 2.0371826128269544e-07, "loss": 0.004, "step": 108710 }, { "epoch": 1.8360522849326173, "grad_norm": 0.16976584494113922, "learning_rate": 2.0330208049043676e-07, "loss": 0.0036, "step": 108720 }, { "epoch": 1.8362211639139394, "grad_norm": 0.19267402589321136, "learning_rate": 2.028863164235384e-07, "loss": 0.0057, "step": 108730 }, { "epoch": 1.8363900428952613, "grad_norm": 0.1451558768749237, "learning_rate": 2.024709691181226e-07, "loss": 0.0092, "step": 108740 }, { "epoch": 1.8365589218765832, "grad_norm": 0.7094679474830627, "learning_rate": 2.0205603861027323e-07, "loss": 0.0112, "step": 108750 }, { "epoch": 1.836727800857905, "grad_norm": 0.22630548477172852, "learning_rate": 2.016415249360387e-07, "loss": 0.0067, "step": 108760 }, { "epoch": 1.8368966798392272, "grad_norm": 0.42582187056541443, "learning_rate": 2.0122742813143082e-07, "loss": 0.0071, "step": 108770 }, { "epoch": 1.8370655588205493, "grad_norm": 0.12512707710266113, "learning_rate": 2.0081374823242462e-07, "loss": 0.0059, "step": 108780 }, { "epoch": 1.8372344378018712, "grad_norm": 1.4016764163970947, "learning_rate": 2.0040048527496024e-07, "loss": 0.0042, "step": 108790 }, { "epoch": 1.8374033167831931, "grad_norm": 0.1891208291053772, "learning_rate": 1.9998763929494126e-07, "loss": 0.0074, "step": 108800 }, { "epoch": 1.837572195764515, "grad_norm": 0.3914282023906708, "learning_rate": 1.9957521032823446e-07, "loss": 0.0068, "step": 108810 }, { "epoch": 1.8377410747458371, "grad_norm": 0.5749671459197998, "learning_rate": 1.99163198410669e-07, "loss": 0.0083, "step": 108820 }, { "epoch": 1.8379099537271593, "grad_norm": 0.20645850896835327, "learning_rate": 1.9875160357804236e-07, "loss": 0.0068, "step": 108830 }, { "epoch": 1.8380788327084812, "grad_norm": 0.2510092258453369, "learning_rate": 1.9834042586611201e-07, "loss": 0.0101, "step": 108840 }, { "epoch": 1.838247711689803, "grad_norm": 0.20594748854637146, "learning_rate": 1.9792966531059888e-07, "loss": 0.0058, "step": 108850 }, { "epoch": 1.838416590671125, "grad_norm": 0.3398481011390686, "learning_rate": 1.9751932194718936e-07, "loss": 0.0068, "step": 108860 }, { "epoch": 1.838585469652447, "grad_norm": 0.42326807975769043, "learning_rate": 1.9710939581153332e-07, "loss": 0.0059, "step": 108870 }, { "epoch": 1.8387543486337692, "grad_norm": 0.15791739523410797, "learning_rate": 1.9669988693924502e-07, "loss": 0.0079, "step": 108880 }, { "epoch": 1.838923227615091, "grad_norm": 0.3529512882232666, "learning_rate": 1.962907953658999e-07, "loss": 0.0065, "step": 108890 }, { "epoch": 1.839092106596413, "grad_norm": 0.206798255443573, "learning_rate": 1.9588212112703953e-07, "loss": 0.0056, "step": 108900 }, { "epoch": 1.8392609855777349, "grad_norm": 0.05638216435909271, "learning_rate": 1.9547386425816884e-07, "loss": 0.008, "step": 108910 }, { "epoch": 1.839429864559057, "grad_norm": 0.2625057101249695, "learning_rate": 1.9506602479475613e-07, "loss": 0.0078, "step": 108920 }, { "epoch": 1.8395987435403791, "grad_norm": 0.19505701959133148, "learning_rate": 1.9465860277223304e-07, "loss": 0.0045, "step": 108930 }, { "epoch": 1.839767622521701, "grad_norm": 0.29872509837150574, "learning_rate": 1.942515982259946e-07, "loss": 0.0103, "step": 108940 }, { "epoch": 1.839936501503023, "grad_norm": 0.14184443652629852, "learning_rate": 1.9384501119140254e-07, "loss": 0.0071, "step": 108950 }, { "epoch": 1.8401053804843448, "grad_norm": 0.2262566089630127, "learning_rate": 1.9343884170377969e-07, "loss": 0.0057, "step": 108960 }, { "epoch": 1.840274259465667, "grad_norm": 0.44607818126678467, "learning_rate": 1.9303308979841118e-07, "loss": 0.0044, "step": 108970 }, { "epoch": 1.840443138446989, "grad_norm": 0.18370376527309418, "learning_rate": 1.9262775551054935e-07, "loss": 0.0052, "step": 108980 }, { "epoch": 1.840612017428311, "grad_norm": 0.126276433467865, "learning_rate": 1.9222283887540827e-07, "loss": 0.005, "step": 108990 }, { "epoch": 1.8407808964096328, "grad_norm": 0.2710539400577545, "learning_rate": 1.9181833992816646e-07, "loss": 0.0047, "step": 109000 }, { "epoch": 1.8409497753909547, "grad_norm": 0.1733303666114807, "learning_rate": 1.9141425870396525e-07, "loss": 0.0035, "step": 109010 }, { "epoch": 1.8411186543722768, "grad_norm": 0.2638471722602844, "learning_rate": 1.9101059523790988e-07, "loss": 0.007, "step": 109020 }, { "epoch": 1.841287533353599, "grad_norm": 0.21684151887893677, "learning_rate": 1.9060734956507066e-07, "loss": 0.0061, "step": 109030 }, { "epoch": 1.8414564123349209, "grad_norm": 0.11562512814998627, "learning_rate": 1.9020452172048065e-07, "loss": 0.0069, "step": 109040 }, { "epoch": 1.8416252913162428, "grad_norm": 0.31731054186820984, "learning_rate": 1.8980211173913522e-07, "loss": 0.0055, "step": 109050 }, { "epoch": 1.8417941702975646, "grad_norm": 0.09830323606729507, "learning_rate": 1.894001196559947e-07, "loss": 0.0052, "step": 109060 }, { "epoch": 1.8419630492788868, "grad_norm": 0.11216709017753601, "learning_rate": 1.8899854550598506e-07, "loss": 0.0051, "step": 109070 }, { "epoch": 1.8421319282602089, "grad_norm": 0.5738362073898315, "learning_rate": 1.8859738932399284e-07, "loss": 0.0076, "step": 109080 }, { "epoch": 1.8423008072415308, "grad_norm": 0.20230886340141296, "learning_rate": 1.8819665114487017e-07, "loss": 0.0067, "step": 109090 }, { "epoch": 1.8424696862228527, "grad_norm": 0.29491376876831055, "learning_rate": 1.8779633100343142e-07, "loss": 0.0063, "step": 109100 }, { "epoch": 1.8426385652041746, "grad_norm": 0.2777405381202698, "learning_rate": 1.873964289344554e-07, "loss": 0.0061, "step": 109110 }, { "epoch": 1.8428074441854967, "grad_norm": 0.18258027732372284, "learning_rate": 1.8699694497268494e-07, "loss": 0.0056, "step": 109120 }, { "epoch": 1.8429763231668188, "grad_norm": 0.33994388580322266, "learning_rate": 1.8659787915282724e-07, "loss": 0.0083, "step": 109130 }, { "epoch": 1.8431452021481407, "grad_norm": 0.26191335916519165, "learning_rate": 1.8619923150955012e-07, "loss": 0.008, "step": 109140 }, { "epoch": 1.8433140811294626, "grad_norm": 0.11899825930595398, "learning_rate": 1.8580100207748863e-07, "loss": 0.0062, "step": 109150 }, { "epoch": 1.8434829601107845, "grad_norm": 0.28272876143455505, "learning_rate": 1.8540319089123902e-07, "loss": 0.0079, "step": 109160 }, { "epoch": 1.8436518390921066, "grad_norm": 0.23992665112018585, "learning_rate": 1.8500579798536255e-07, "loss": 0.0061, "step": 109170 }, { "epoch": 1.8438207180734287, "grad_norm": 0.2306264042854309, "learning_rate": 1.8460882339438434e-07, "loss": 0.0034, "step": 109180 }, { "epoch": 1.8439895970547506, "grad_norm": 0.18095390498638153, "learning_rate": 1.8421226715279182e-07, "loss": 0.0079, "step": 109190 }, { "epoch": 1.8441584760360725, "grad_norm": 0.24400831758975983, "learning_rate": 1.838161292950369e-07, "loss": 0.0067, "step": 109200 }, { "epoch": 1.8443273550173944, "grad_norm": 0.41342005133628845, "learning_rate": 1.8342040985553587e-07, "loss": 0.0055, "step": 109210 }, { "epoch": 1.8444962339987165, "grad_norm": 0.39098235964775085, "learning_rate": 1.8302510886866686e-07, "loss": 0.0075, "step": 109220 }, { "epoch": 1.8446651129800387, "grad_norm": 0.31043824553489685, "learning_rate": 1.8263022636877238e-07, "loss": 0.0053, "step": 109230 }, { "epoch": 1.8448339919613606, "grad_norm": 0.1659170240163803, "learning_rate": 1.8223576239015995e-07, "loss": 0.0088, "step": 109240 }, { "epoch": 1.8450028709426824, "grad_norm": 0.13769572973251343, "learning_rate": 1.8184171696709997e-07, "loss": 0.0057, "step": 109250 }, { "epoch": 1.8451717499240043, "grad_norm": 0.2021947056055069, "learning_rate": 1.8144809013382446e-07, "loss": 0.0045, "step": 109260 }, { "epoch": 1.8453406289053265, "grad_norm": 0.35638824105262756, "learning_rate": 1.8105488192453114e-07, "loss": 0.0054, "step": 109270 }, { "epoch": 1.8455095078866486, "grad_norm": 0.24297215044498444, "learning_rate": 1.8066209237338205e-07, "loss": 0.0046, "step": 109280 }, { "epoch": 1.8456783868679705, "grad_norm": 0.19418475031852722, "learning_rate": 1.8026972151450105e-07, "loss": 0.0068, "step": 109290 }, { "epoch": 1.8458472658492924, "grad_norm": 0.2648871839046478, "learning_rate": 1.7987776938197644e-07, "loss": 0.0069, "step": 109300 }, { "epoch": 1.8460161448306143, "grad_norm": 0.2504139840602875, "learning_rate": 1.7948623600986037e-07, "loss": 0.006, "step": 109310 }, { "epoch": 1.8461850238119364, "grad_norm": 0.17549917101860046, "learning_rate": 1.790951214321679e-07, "loss": 0.0055, "step": 109320 }, { "epoch": 1.8463539027932585, "grad_norm": 0.08855430036783218, "learning_rate": 1.7870442568287848e-07, "loss": 0.0047, "step": 109330 }, { "epoch": 1.8465227817745804, "grad_norm": 0.07950008660554886, "learning_rate": 1.7831414879593444e-07, "loss": 0.008, "step": 109340 }, { "epoch": 1.8466916607559023, "grad_norm": 0.20947499573230743, "learning_rate": 1.7792429080524197e-07, "loss": 0.008, "step": 109350 }, { "epoch": 1.8468605397372242, "grad_norm": 0.2691565155982971, "learning_rate": 1.7753485174467177e-07, "loss": 0.0067, "step": 109360 }, { "epoch": 1.8470294187185463, "grad_norm": 0.2274179458618164, "learning_rate": 1.7714583164805677e-07, "loss": 0.0049, "step": 109370 }, { "epoch": 1.8471982976998684, "grad_norm": 0.13110429048538208, "learning_rate": 1.767572305491938e-07, "loss": 0.0111, "step": 109380 }, { "epoch": 1.8473671766811903, "grad_norm": 0.1358967274427414, "learning_rate": 1.7636904848184366e-07, "loss": 0.007, "step": 109390 }, { "epoch": 1.8475360556625122, "grad_norm": 0.1786072701215744, "learning_rate": 1.7598128547973103e-07, "loss": 0.0055, "step": 109400 }, { "epoch": 1.8477049346438341, "grad_norm": 0.42705199122428894, "learning_rate": 1.7559394157654396e-07, "loss": 0.0057, "step": 109410 }, { "epoch": 1.8478738136251562, "grad_norm": 0.29214930534362793, "learning_rate": 1.7520701680593334e-07, "loss": 0.006, "step": 109420 }, { "epoch": 1.8480426926064784, "grad_norm": 0.24333247542381287, "learning_rate": 1.7482051120151443e-07, "loss": 0.0063, "step": 109430 }, { "epoch": 1.8482115715878003, "grad_norm": 0.6381175518035889, "learning_rate": 1.74434424796866e-07, "loss": 0.0115, "step": 109440 }, { "epoch": 1.8483804505691221, "grad_norm": 0.09710315614938736, "learning_rate": 1.740487576255312e-07, "loss": 0.0065, "step": 109450 }, { "epoch": 1.848549329550444, "grad_norm": 0.15770223736763, "learning_rate": 1.736635097210143e-07, "loss": 0.0099, "step": 109460 }, { "epoch": 1.8487182085317662, "grad_norm": 0.22093483805656433, "learning_rate": 1.7327868111678526e-07, "loss": 0.0123, "step": 109470 }, { "epoch": 1.8488870875130883, "grad_norm": 0.2928176820278168, "learning_rate": 1.7289427184627672e-07, "loss": 0.0083, "step": 109480 }, { "epoch": 1.8490559664944102, "grad_norm": 0.3027305006980896, "learning_rate": 1.72510281942887e-07, "loss": 0.009, "step": 109490 }, { "epoch": 1.849224845475732, "grad_norm": 0.29762548208236694, "learning_rate": 1.7212671143997385e-07, "loss": 0.0061, "step": 109500 }, { "epoch": 1.849393724457054, "grad_norm": 0.130634143948555, "learning_rate": 1.717435603708617e-07, "loss": 0.0041, "step": 109510 }, { "epoch": 1.849562603438376, "grad_norm": 0.17781491577625275, "learning_rate": 1.7136082876883786e-07, "loss": 0.0033, "step": 109520 }, { "epoch": 1.8497314824196982, "grad_norm": 0.1834767907857895, "learning_rate": 1.7097851666715404e-07, "loss": 0.0073, "step": 109530 }, { "epoch": 1.84990036140102, "grad_norm": 0.18709804117679596, "learning_rate": 1.7059662409902312e-07, "loss": 0.0069, "step": 109540 }, { "epoch": 1.850069240382342, "grad_norm": 0.18078304827213287, "learning_rate": 1.7021515109762355e-07, "loss": 0.0066, "step": 109550 }, { "epoch": 1.850238119363664, "grad_norm": 0.13755352795124054, "learning_rate": 1.6983409769609715e-07, "loss": 0.005, "step": 109560 }, { "epoch": 1.850406998344986, "grad_norm": 0.10450132191181183, "learning_rate": 1.6945346392754913e-07, "loss": 0.0066, "step": 109570 }, { "epoch": 1.8505758773263081, "grad_norm": 0.4098151922225952, "learning_rate": 1.6907324982504746e-07, "loss": 0.0104, "step": 109580 }, { "epoch": 1.85074475630763, "grad_norm": 0.3577517569065094, "learning_rate": 1.686934554216235e-07, "loss": 0.0069, "step": 109590 }, { "epoch": 1.850913635288952, "grad_norm": 0.09303415566682816, "learning_rate": 1.683140807502742e-07, "loss": 0.0078, "step": 109600 }, { "epoch": 1.8510825142702738, "grad_norm": 0.18837425112724304, "learning_rate": 1.6793512584395767e-07, "loss": 0.0057, "step": 109610 }, { "epoch": 1.851251393251596, "grad_norm": 0.1879073977470398, "learning_rate": 1.6755659073559805e-07, "loss": 0.0037, "step": 109620 }, { "epoch": 1.851420272232918, "grad_norm": 0.8457003235816956, "learning_rate": 1.6717847545807965e-07, "loss": 0.0073, "step": 109630 }, { "epoch": 1.85158915121424, "grad_norm": 0.14797094464302063, "learning_rate": 1.6680078004425282e-07, "loss": 0.0054, "step": 109640 }, { "epoch": 1.8517580301955618, "grad_norm": 0.40826836228370667, "learning_rate": 1.664235045269319e-07, "loss": 0.007, "step": 109650 }, { "epoch": 1.8519269091768837, "grad_norm": 0.2459961175918579, "learning_rate": 1.6604664893889232e-07, "loss": 0.0079, "step": 109660 }, { "epoch": 1.8520957881582059, "grad_norm": 0.24642890691757202, "learning_rate": 1.6567021331287457e-07, "loss": 0.0071, "step": 109670 }, { "epoch": 1.852264667139528, "grad_norm": 0.06976307183504105, "learning_rate": 1.6529419768158306e-07, "loss": 0.0075, "step": 109680 }, { "epoch": 1.8524335461208499, "grad_norm": 0.42128080129623413, "learning_rate": 1.6491860207768495e-07, "loss": 0.0052, "step": 109690 }, { "epoch": 1.8526024251021718, "grad_norm": 0.09540688246488571, "learning_rate": 1.645434265338114e-07, "loss": 0.0078, "step": 109700 }, { "epoch": 1.8527713040834937, "grad_norm": 0.23980867862701416, "learning_rate": 1.6416867108255574e-07, "loss": 0.0052, "step": 109710 }, { "epoch": 1.8529401830648158, "grad_norm": 0.4923226535320282, "learning_rate": 1.637943357564764e-07, "loss": 0.0065, "step": 109720 }, { "epoch": 1.853109062046138, "grad_norm": 0.30097731947898865, "learning_rate": 1.634204205880946e-07, "loss": 0.0101, "step": 109730 }, { "epoch": 1.8532779410274598, "grad_norm": 0.16023273766040802, "learning_rate": 1.6304692560989544e-07, "loss": 0.0041, "step": 109740 }, { "epoch": 1.8534468200087817, "grad_norm": 0.31557920575141907, "learning_rate": 1.6267385085432686e-07, "loss": 0.005, "step": 109750 }, { "epoch": 1.8536156989901036, "grad_norm": 0.15023407340049744, "learning_rate": 1.623011963538007e-07, "loss": 0.006, "step": 109760 }, { "epoch": 1.8537845779714257, "grad_norm": 0.204427570104599, "learning_rate": 1.6192896214069277e-07, "loss": 0.0104, "step": 109770 }, { "epoch": 1.8539534569527478, "grad_norm": 0.21429350972175598, "learning_rate": 1.6155714824734103e-07, "loss": 0.0144, "step": 109780 }, { "epoch": 1.8541223359340697, "grad_norm": 0.34515535831451416, "learning_rate": 1.6118575470604858e-07, "loss": 0.0058, "step": 109790 }, { "epoch": 1.8542912149153916, "grad_norm": 0.13344566524028778, "learning_rate": 1.608147815490807e-07, "loss": 0.0058, "step": 109800 }, { "epoch": 1.8544600938967135, "grad_norm": 0.261016309261322, "learning_rate": 1.604442288086666e-07, "loss": 0.0076, "step": 109810 }, { "epoch": 1.8546289728780356, "grad_norm": 0.35755807161331177, "learning_rate": 1.6007409651700002e-07, "loss": 0.0085, "step": 109820 }, { "epoch": 1.8547978518593577, "grad_norm": 0.35365334153175354, "learning_rate": 1.5970438470623572e-07, "loss": 0.0062, "step": 109830 }, { "epoch": 1.8549667308406796, "grad_norm": 0.18996092677116394, "learning_rate": 1.5933509340849361e-07, "loss": 0.0048, "step": 109840 }, { "epoch": 1.8551356098220015, "grad_norm": 0.5990872383117676, "learning_rate": 1.5896622265585749e-07, "loss": 0.007, "step": 109850 }, { "epoch": 1.8553044888033234, "grad_norm": 0.1593872457742691, "learning_rate": 1.5859777248037446e-07, "loss": 0.006, "step": 109860 }, { "epoch": 1.8554733677846456, "grad_norm": 0.26375317573547363, "learning_rate": 1.5822974291405279e-07, "loss": 0.0066, "step": 109870 }, { "epoch": 1.8556422467659677, "grad_norm": 0.16296036541461945, "learning_rate": 1.5786213398886697e-07, "loss": 0.0081, "step": 109880 }, { "epoch": 1.8558111257472896, "grad_norm": 0.24671244621276855, "learning_rate": 1.574949457367536e-07, "loss": 0.0072, "step": 109890 }, { "epoch": 1.8559800047286115, "grad_norm": 0.1534910351037979, "learning_rate": 1.5712817818961334e-07, "loss": 0.0057, "step": 109900 }, { "epoch": 1.8561488837099334, "grad_norm": 0.20009467005729675, "learning_rate": 1.5676183137931068e-07, "loss": 0.0029, "step": 109910 }, { "epoch": 1.8563177626912555, "grad_norm": 0.19189472496509552, "learning_rate": 1.563959053376718e-07, "loss": 0.0059, "step": 109920 }, { "epoch": 1.8564866416725774, "grad_norm": 0.23405508697032928, "learning_rate": 1.560304000964885e-07, "loss": 0.0042, "step": 109930 }, { "epoch": 1.8566555206538995, "grad_norm": 0.3915542662143707, "learning_rate": 1.5566531568751487e-07, "loss": 0.0084, "step": 109940 }, { "epoch": 1.8568243996352214, "grad_norm": 0.34397369623184204, "learning_rate": 1.553006521424677e-07, "loss": 0.0046, "step": 109950 }, { "epoch": 1.8569932786165433, "grad_norm": 0.16990584135055542, "learning_rate": 1.5493640949302834e-07, "loss": 0.0072, "step": 109960 }, { "epoch": 1.8571621575978654, "grad_norm": 0.13406431674957275, "learning_rate": 1.545725877708415e-07, "loss": 0.0045, "step": 109970 }, { "epoch": 1.8573310365791873, "grad_norm": 0.2653999924659729, "learning_rate": 1.5420918700751519e-07, "loss": 0.0062, "step": 109980 }, { "epoch": 1.8574999155605094, "grad_norm": 0.15841802954673767, "learning_rate": 1.538462072346214e-07, "loss": 0.004, "step": 109990 }, { "epoch": 1.8576687945418313, "grad_norm": 0.3147970736026764, "learning_rate": 1.534836484836938e-07, "loss": 0.0065, "step": 110000 }, { "epoch": 1.8578376735231532, "grad_norm": 0.8308634757995605, "learning_rate": 1.531215107862305e-07, "loss": 0.0102, "step": 110010 }, { "epoch": 1.8580065525044753, "grad_norm": 0.34608444571495056, "learning_rate": 1.5275979417369413e-07, "loss": 0.0092, "step": 110020 }, { "epoch": 1.8581754314857972, "grad_norm": 0.35195499658584595, "learning_rate": 1.52398498677509e-07, "loss": 0.0096, "step": 110030 }, { "epoch": 1.8583443104671193, "grad_norm": 0.3146147131919861, "learning_rate": 1.5203762432906387e-07, "loss": 0.0057, "step": 110040 }, { "epoch": 1.8585131894484412, "grad_norm": 0.26339149475097656, "learning_rate": 1.5167717115971092e-07, "loss": 0.0079, "step": 110050 }, { "epoch": 1.8586820684297631, "grad_norm": 0.17094242572784424, "learning_rate": 1.5131713920076508e-07, "loss": 0.0061, "step": 110060 }, { "epoch": 1.8588509474110853, "grad_norm": 0.1904536783695221, "learning_rate": 1.509575284835052e-07, "loss": 0.0052, "step": 110070 }, { "epoch": 1.8590198263924071, "grad_norm": 0.12331220507621765, "learning_rate": 1.5059833903917355e-07, "loss": 0.0087, "step": 110080 }, { "epoch": 1.8591887053737293, "grad_norm": 0.30456972122192383, "learning_rate": 1.5023957089897457e-07, "loss": 0.0072, "step": 110090 }, { "epoch": 1.8593575843550512, "grad_norm": 0.30631259083747864, "learning_rate": 1.4988122409407834e-07, "loss": 0.0067, "step": 110100 }, { "epoch": 1.859526463336373, "grad_norm": 0.16971243917942047, "learning_rate": 1.4952329865561722e-07, "loss": 0.0044, "step": 110110 }, { "epoch": 1.8596953423176952, "grad_norm": 0.26544177532196045, "learning_rate": 1.4916579461468572e-07, "loss": 0.0056, "step": 110120 }, { "epoch": 1.859864221299017, "grad_norm": 0.002594192512333393, "learning_rate": 1.4880871200234404e-07, "loss": 0.0078, "step": 110130 }, { "epoch": 1.8600331002803392, "grad_norm": 0.1181420087814331, "learning_rate": 1.4845205084961344e-07, "loss": 0.0038, "step": 110140 }, { "epoch": 1.860201979261661, "grad_norm": 0.19559705257415771, "learning_rate": 1.480958111874814e-07, "loss": 0.0065, "step": 110150 }, { "epoch": 1.860370858242983, "grad_norm": 0.2648463547229767, "learning_rate": 1.477399930468959e-07, "loss": 0.007, "step": 110160 }, { "epoch": 1.860539737224305, "grad_norm": 0.09645238518714905, "learning_rate": 1.4738459645877002e-07, "loss": 0.0059, "step": 110170 }, { "epoch": 1.860708616205627, "grad_norm": 0.15433406829833984, "learning_rate": 1.4702962145397958e-07, "loss": 0.0068, "step": 110180 }, { "epoch": 1.8608774951869491, "grad_norm": 0.21621161699295044, "learning_rate": 1.466750680633644e-07, "loss": 0.0086, "step": 110190 }, { "epoch": 1.861046374168271, "grad_norm": 0.23733650147914886, "learning_rate": 1.463209363177265e-07, "loss": 0.0045, "step": 110200 }, { "epoch": 1.861215253149593, "grad_norm": 0.15949931740760803, "learning_rate": 1.4596722624783234e-07, "loss": 0.0112, "step": 110210 }, { "epoch": 1.861384132130915, "grad_norm": 0.33712318539619446, "learning_rate": 1.456139378844118e-07, "loss": 0.0071, "step": 110220 }, { "epoch": 1.861553011112237, "grad_norm": 0.045292001217603683, "learning_rate": 1.4526107125815702e-07, "loss": 0.0053, "step": 110230 }, { "epoch": 1.861721890093559, "grad_norm": 0.15829813480377197, "learning_rate": 1.4490862639972457e-07, "loss": 0.006, "step": 110240 }, { "epoch": 1.861890769074881, "grad_norm": 0.1157640665769577, "learning_rate": 1.4455660333973333e-07, "loss": 0.0068, "step": 110250 }, { "epoch": 1.8620596480562028, "grad_norm": 0.40704545378685, "learning_rate": 1.442050021087671e-07, "loss": 0.011, "step": 110260 }, { "epoch": 1.862228527037525, "grad_norm": 0.1455126851797104, "learning_rate": 1.4385382273737148e-07, "loss": 0.0043, "step": 110270 }, { "epoch": 1.8623974060188468, "grad_norm": 0.03263615071773529, "learning_rate": 1.435030652560565e-07, "loss": 0.0045, "step": 110280 }, { "epoch": 1.862566285000169, "grad_norm": 0.23613744974136353, "learning_rate": 1.4315272969529502e-07, "loss": 0.0058, "step": 110290 }, { "epoch": 1.8627351639814909, "grad_norm": 0.1672377735376358, "learning_rate": 1.4280281608552325e-07, "loss": 0.0042, "step": 110300 }, { "epoch": 1.8629040429628128, "grad_norm": 0.3369462490081787, "learning_rate": 1.4245332445714188e-07, "loss": 0.0054, "step": 110310 }, { "epoch": 1.8630729219441349, "grad_norm": 0.2594289779663086, "learning_rate": 1.421042548405116e-07, "loss": 0.009, "step": 110320 }, { "epoch": 1.8632418009254568, "grad_norm": 0.11544188112020493, "learning_rate": 1.4175560726596037e-07, "loss": 0.0038, "step": 110330 }, { "epoch": 1.8634106799067789, "grad_norm": 0.07531262189149857, "learning_rate": 1.4140738176377733e-07, "loss": 0.006, "step": 110340 }, { "epoch": 1.8635795588881008, "grad_norm": 0.23661495745182037, "learning_rate": 1.41059578364216e-07, "loss": 0.0078, "step": 110350 }, { "epoch": 1.8637484378694227, "grad_norm": 0.3203423321247101, "learning_rate": 1.407121970974923e-07, "loss": 0.0064, "step": 110360 }, { "epoch": 1.8639173168507448, "grad_norm": 0.31090205907821655, "learning_rate": 1.403652379937853e-07, "loss": 0.0085, "step": 110370 }, { "epoch": 1.8640861958320667, "grad_norm": 0.16291694343090057, "learning_rate": 1.4001870108323878e-07, "loss": 0.0083, "step": 110380 }, { "epoch": 1.8642550748133888, "grad_norm": 0.24209897220134735, "learning_rate": 1.396725863959586e-07, "loss": 0.0054, "step": 110390 }, { "epoch": 1.8644239537947107, "grad_norm": 0.252703994512558, "learning_rate": 1.3932689396201404e-07, "loss": 0.0076, "step": 110400 }, { "epoch": 1.8645928327760326, "grad_norm": 0.12369833141565323, "learning_rate": 1.3898162381143943e-07, "loss": 0.005, "step": 110410 }, { "epoch": 1.8647617117573547, "grad_norm": 0.1872195303440094, "learning_rate": 1.3863677597422908e-07, "loss": 0.006, "step": 110420 }, { "epoch": 1.8649305907386766, "grad_norm": 0.1285160630941391, "learning_rate": 1.3829235048034405e-07, "loss": 0.0046, "step": 110430 }, { "epoch": 1.8650994697199987, "grad_norm": 0.08185321092605591, "learning_rate": 1.3794834735970707e-07, "loss": 0.004, "step": 110440 }, { "epoch": 1.8652683487013206, "grad_norm": 0.16705629229545593, "learning_rate": 1.3760476664220311e-07, "loss": 0.0094, "step": 110450 }, { "epoch": 1.8654372276826425, "grad_norm": 0.21486178040504456, "learning_rate": 1.372616083576822e-07, "loss": 0.0047, "step": 110460 }, { "epoch": 1.8656061066639646, "grad_norm": 0.1693265438079834, "learning_rate": 1.369188725359577e-07, "loss": 0.0081, "step": 110470 }, { "epoch": 1.8657749856452865, "grad_norm": 0.1902022808790207, "learning_rate": 1.3657655920680525e-07, "loss": 0.0104, "step": 110480 }, { "epoch": 1.8659438646266087, "grad_norm": 0.2871299684047699, "learning_rate": 1.362346683999638e-07, "loss": 0.0051, "step": 110490 }, { "epoch": 1.8661127436079306, "grad_norm": 0.22712324559688568, "learning_rate": 1.3589320014513629e-07, "loss": 0.0076, "step": 110500 }, { "epoch": 1.8662816225892525, "grad_norm": 0.12318158894777298, "learning_rate": 1.35552154471989e-07, "loss": 0.0035, "step": 110510 }, { "epoch": 1.8664505015705746, "grad_norm": 0.1296771913766861, "learning_rate": 1.3521153141015043e-07, "loss": 0.0042, "step": 110520 }, { "epoch": 1.8666193805518965, "grad_norm": 0.1779998391866684, "learning_rate": 1.3487133098921356e-07, "loss": 0.0046, "step": 110530 }, { "epoch": 1.8667882595332186, "grad_norm": 0.27567633986473083, "learning_rate": 1.3453155323873425e-07, "loss": 0.0107, "step": 110540 }, { "epoch": 1.8669571385145405, "grad_norm": 0.12401222437620163, "learning_rate": 1.3419219818823104e-07, "loss": 0.0077, "step": 110550 }, { "epoch": 1.8671260174958624, "grad_norm": 0.66176837682724, "learning_rate": 1.3385326586718705e-07, "loss": 0.0072, "step": 110560 }, { "epoch": 1.8672948964771845, "grad_norm": 0.23498865962028503, "learning_rate": 1.3351475630504706e-07, "loss": 0.0085, "step": 110570 }, { "epoch": 1.8674637754585064, "grad_norm": 0.6425979733467102, "learning_rate": 1.3317666953122033e-07, "loss": 0.0053, "step": 110580 }, { "epoch": 1.8676326544398285, "grad_norm": 0.23408012092113495, "learning_rate": 1.3283900557507945e-07, "loss": 0.0134, "step": 110590 }, { "epoch": 1.8678015334211504, "grad_norm": 0.11043892055749893, "learning_rate": 1.325017644659593e-07, "loss": 0.0068, "step": 110600 }, { "epoch": 1.8679704124024723, "grad_norm": 0.26033419370651245, "learning_rate": 1.3216494623315867e-07, "loss": 0.0075, "step": 110610 }, { "epoch": 1.8681392913837944, "grad_norm": 0.2416492998600006, "learning_rate": 1.3182855090593916e-07, "loss": 0.0066, "step": 110620 }, { "epoch": 1.8683081703651163, "grad_norm": 0.15056952834129333, "learning_rate": 1.3149257851352625e-07, "loss": 0.0077, "step": 110630 }, { "epoch": 1.8684770493464384, "grad_norm": 0.20654012262821198, "learning_rate": 1.311570290851083e-07, "loss": 0.0051, "step": 110640 }, { "epoch": 1.8686459283277603, "grad_norm": 0.15992018580436707, "learning_rate": 1.3082190264983753e-07, "loss": 0.0055, "step": 110650 }, { "epoch": 1.8688148073090822, "grad_norm": 0.14637482166290283, "learning_rate": 1.304871992368284e-07, "loss": 0.0098, "step": 110660 }, { "epoch": 1.8689836862904043, "grad_norm": 0.3609813451766968, "learning_rate": 1.3015291887515935e-07, "loss": 0.0068, "step": 110670 }, { "epoch": 1.8691525652717262, "grad_norm": 0.19070841372013092, "learning_rate": 1.2981906159387215e-07, "loss": 0.0056, "step": 110680 }, { "epoch": 1.8693214442530484, "grad_norm": 0.1475512683391571, "learning_rate": 1.294856274219708e-07, "loss": 0.0063, "step": 110690 }, { "epoch": 1.8694903232343703, "grad_norm": 0.28767919540405273, "learning_rate": 1.2915261638842324e-07, "loss": 0.0063, "step": 110700 }, { "epoch": 1.8696592022156922, "grad_norm": 0.15169045329093933, "learning_rate": 1.2882002852216136e-07, "loss": 0.0058, "step": 110710 }, { "epoch": 1.8698280811970143, "grad_norm": 0.34823715686798096, "learning_rate": 1.2848786385207924e-07, "loss": 0.0063, "step": 110720 }, { "epoch": 1.8699969601783362, "grad_norm": 0.18632164597511292, "learning_rate": 1.2815612240703434e-07, "loss": 0.0047, "step": 110730 }, { "epoch": 1.8701658391596583, "grad_norm": 0.18328510224819183, "learning_rate": 1.2782480421584753e-07, "loss": 0.0072, "step": 110740 }, { "epoch": 1.8703347181409802, "grad_norm": 0.16059528291225433, "learning_rate": 1.2749390930730245e-07, "loss": 0.0063, "step": 110750 }, { "epoch": 1.870503597122302, "grad_norm": 0.21168801188468933, "learning_rate": 1.2716343771014828e-07, "loss": 0.0077, "step": 110760 }, { "epoch": 1.8706724761036242, "grad_norm": 0.08291850984096527, "learning_rate": 1.2683338945309377e-07, "loss": 0.0048, "step": 110770 }, { "epoch": 1.870841355084946, "grad_norm": 0.4877854287624359, "learning_rate": 1.2650376456481372e-07, "loss": 0.0069, "step": 110780 }, { "epoch": 1.8710102340662682, "grad_norm": 0.1889733076095581, "learning_rate": 1.2617456307394416e-07, "loss": 0.0058, "step": 110790 }, { "epoch": 1.87117911304759, "grad_norm": 0.210309237241745, "learning_rate": 1.2584578500908661e-07, "loss": 0.0124, "step": 110800 }, { "epoch": 1.871347992028912, "grad_norm": 0.22895440459251404, "learning_rate": 1.2551743039880325e-07, "loss": 0.0062, "step": 110810 }, { "epoch": 1.8715168710102341, "grad_norm": 0.5040241479873657, "learning_rate": 1.251894992716207e-07, "loss": 0.0066, "step": 110820 }, { "epoch": 1.871685749991556, "grad_norm": 0.16713592410087585, "learning_rate": 1.2486199165603008e-07, "loss": 0.0065, "step": 110830 }, { "epoch": 1.8718546289728781, "grad_norm": 0.1493701934814453, "learning_rate": 1.245349075804836e-07, "loss": 0.0058, "step": 110840 }, { "epoch": 1.8720235079542, "grad_norm": 0.2672522962093353, "learning_rate": 1.242082470733974e-07, "loss": 0.0061, "step": 110850 }, { "epoch": 1.872192386935522, "grad_norm": 0.23868833482265472, "learning_rate": 1.238820101631516e-07, "loss": 0.0078, "step": 110860 }, { "epoch": 1.872361265916844, "grad_norm": 0.14812862873077393, "learning_rate": 1.235561968780874e-07, "loss": 0.0075, "step": 110870 }, { "epoch": 1.872530144898166, "grad_norm": 0.24327053129673004, "learning_rate": 1.232308072465127e-07, "loss": 0.0129, "step": 110880 }, { "epoch": 1.872699023879488, "grad_norm": 0.25789910554885864, "learning_rate": 1.2290584129669548e-07, "loss": 0.0076, "step": 110890 }, { "epoch": 1.87286790286081, "grad_norm": 0.2875739336013794, "learning_rate": 1.2258129905686755e-07, "loss": 0.0078, "step": 110900 }, { "epoch": 1.8730367818421318, "grad_norm": 0.2840389013290405, "learning_rate": 1.2225718055522528e-07, "loss": 0.0073, "step": 110910 }, { "epoch": 1.873205660823454, "grad_norm": 0.539993166923523, "learning_rate": 1.219334858199267e-07, "loss": 0.0056, "step": 110920 }, { "epoch": 1.8733745398047759, "grad_norm": 0.10294920206069946, "learning_rate": 1.216102148790943e-07, "loss": 0.0056, "step": 110930 }, { "epoch": 1.873543418786098, "grad_norm": 0.41708824038505554, "learning_rate": 1.212873677608123e-07, "loss": 0.0059, "step": 110940 }, { "epoch": 1.8737122977674199, "grad_norm": 0.22390268743038177, "learning_rate": 1.209649444931288e-07, "loss": 0.0055, "step": 110950 }, { "epoch": 1.8738811767487418, "grad_norm": 0.07074542343616486, "learning_rate": 1.206429451040553e-07, "loss": 0.0052, "step": 110960 }, { "epoch": 1.874050055730064, "grad_norm": 0.4235709011554718, "learning_rate": 1.2032136962156715e-07, "loss": 0.0062, "step": 110970 }, { "epoch": 1.8742189347113858, "grad_norm": 0.32253333926200867, "learning_rate": 1.200002180736004e-07, "loss": 0.0067, "step": 110980 }, { "epoch": 1.874387813692708, "grad_norm": 0.24294032156467438, "learning_rate": 1.1967949048805715e-07, "loss": 0.0072, "step": 110990 }, { "epoch": 1.8745566926740298, "grad_norm": 0.2926120460033417, "learning_rate": 1.1935918689280178e-07, "loss": 0.0066, "step": 111000 }, { "epoch": 1.8747255716553517, "grad_norm": 0.39664527773857117, "learning_rate": 1.1903930731566094e-07, "loss": 0.0047, "step": 111010 }, { "epoch": 1.8748944506366738, "grad_norm": 0.24446892738342285, "learning_rate": 1.1871985178442403e-07, "loss": 0.0053, "step": 111020 }, { "epoch": 1.8750633296179957, "grad_norm": 0.28404200077056885, "learning_rate": 1.1840082032684608e-07, "loss": 0.0096, "step": 111030 }, { "epoch": 1.8752322085993178, "grad_norm": 0.40618178248405457, "learning_rate": 1.180822129706427e-07, "loss": 0.0062, "step": 111040 }, { "epoch": 1.8754010875806397, "grad_norm": 0.05727631598711014, "learning_rate": 1.1776402974349455e-07, "loss": 0.0083, "step": 111050 }, { "epoch": 1.8755699665619616, "grad_norm": 0.49017828702926636, "learning_rate": 1.1744627067304393e-07, "loss": 0.0076, "step": 111060 }, { "epoch": 1.8757388455432835, "grad_norm": 0.22793962061405182, "learning_rate": 1.1712893578689766e-07, "loss": 0.0065, "step": 111070 }, { "epoch": 1.8759077245246056, "grad_norm": 0.20287197828292847, "learning_rate": 1.1681202511262368e-07, "loss": 0.0046, "step": 111080 }, { "epoch": 1.8760766035059278, "grad_norm": 0.16454963386058807, "learning_rate": 1.1649553867775665e-07, "loss": 0.0051, "step": 111090 }, { "epoch": 1.8762454824872496, "grad_norm": 0.24034810066223145, "learning_rate": 1.16179476509789e-07, "loss": 0.0106, "step": 111100 }, { "epoch": 1.8764143614685715, "grad_norm": 0.3345232903957367, "learning_rate": 1.1586383863618211e-07, "loss": 0.0078, "step": 111110 }, { "epoch": 1.8765832404498934, "grad_norm": 0.33459004759788513, "learning_rate": 1.1554862508435738e-07, "loss": 0.0062, "step": 111120 }, { "epoch": 1.8767521194312156, "grad_norm": 0.38548460602760315, "learning_rate": 1.1523383588169957e-07, "loss": 0.0053, "step": 111130 }, { "epoch": 1.8769209984125377, "grad_norm": 0.2846052050590515, "learning_rate": 1.1491947105555567e-07, "loss": 0.0099, "step": 111140 }, { "epoch": 1.8770898773938596, "grad_norm": 0.28559213876724243, "learning_rate": 1.1460553063323831e-07, "loss": 0.0088, "step": 111150 }, { "epoch": 1.8772587563751815, "grad_norm": 0.28926530480384827, "learning_rate": 1.1429201464202122e-07, "loss": 0.006, "step": 111160 }, { "epoch": 1.8774276353565034, "grad_norm": 0.45712679624557495, "learning_rate": 1.1397892310914261e-07, "loss": 0.0062, "step": 111170 }, { "epoch": 1.8775965143378255, "grad_norm": 0.2209363579750061, "learning_rate": 1.1366625606180237e-07, "loss": 0.0057, "step": 111180 }, { "epoch": 1.8777653933191476, "grad_norm": 0.20792193710803986, "learning_rate": 1.133540135271638e-07, "loss": 0.0057, "step": 111190 }, { "epoch": 1.8779342723004695, "grad_norm": 0.17237836122512817, "learning_rate": 1.1304219553235518e-07, "loss": 0.0052, "step": 111200 }, { "epoch": 1.8781031512817914, "grad_norm": 0.20769275724887848, "learning_rate": 1.1273080210446541e-07, "loss": 0.0085, "step": 111210 }, { "epoch": 1.8782720302631133, "grad_norm": 0.23631440103054047, "learning_rate": 1.1241983327054784e-07, "loss": 0.0101, "step": 111220 }, { "epoch": 1.8784409092444354, "grad_norm": 0.33054491877555847, "learning_rate": 1.1210928905761864e-07, "loss": 0.0107, "step": 111230 }, { "epoch": 1.8786097882257575, "grad_norm": 0.17524033784866333, "learning_rate": 1.1179916949265735e-07, "loss": 0.0058, "step": 111240 }, { "epoch": 1.8787786672070794, "grad_norm": 0.22948578000068665, "learning_rate": 1.1148947460260685e-07, "loss": 0.0128, "step": 111250 }, { "epoch": 1.8789475461884013, "grad_norm": 0.17970089614391327, "learning_rate": 1.1118020441437171e-07, "loss": 0.0067, "step": 111260 }, { "epoch": 1.8791164251697232, "grad_norm": 0.026019111275672913, "learning_rate": 1.1087135895482049e-07, "loss": 0.0069, "step": 111270 }, { "epoch": 1.8792853041510453, "grad_norm": 0.07313061505556107, "learning_rate": 1.1056293825078612e-07, "loss": 0.0082, "step": 111280 }, { "epoch": 1.8794541831323675, "grad_norm": 0.1450357288122177, "learning_rate": 1.1025494232906275e-07, "loss": 0.0046, "step": 111290 }, { "epoch": 1.8796230621136893, "grad_norm": 0.16222374141216278, "learning_rate": 1.0994737121640786e-07, "loss": 0.0068, "step": 111300 }, { "epoch": 1.8797919410950112, "grad_norm": 0.33652055263519287, "learning_rate": 1.0964022493954285e-07, "loss": 0.0045, "step": 111310 }, { "epoch": 1.8799608200763331, "grad_norm": 0.08895258605480194, "learning_rate": 1.0933350352515193e-07, "loss": 0.0061, "step": 111320 }, { "epoch": 1.8801296990576553, "grad_norm": 0.2643957734107971, "learning_rate": 1.0902720699988267e-07, "loss": 0.0074, "step": 111330 }, { "epoch": 1.8802985780389774, "grad_norm": 0.2676883935928345, "learning_rate": 1.0872133539034436e-07, "loss": 0.005, "step": 111340 }, { "epoch": 1.8804674570202993, "grad_norm": 0.13101720809936523, "learning_rate": 1.0841588872311126e-07, "loss": 0.0058, "step": 111350 }, { "epoch": 1.8806363360016212, "grad_norm": 0.33228421211242676, "learning_rate": 1.081108670247194e-07, "loss": 0.0075, "step": 111360 }, { "epoch": 1.880805214982943, "grad_norm": 0.3610175549983978, "learning_rate": 1.0780627032166868e-07, "loss": 0.0078, "step": 111370 }, { "epoch": 1.8809740939642652, "grad_norm": 0.18451634049415588, "learning_rate": 1.0750209864042183e-07, "loss": 0.0047, "step": 111380 }, { "epoch": 1.8811429729455873, "grad_norm": 0.40934237837791443, "learning_rate": 1.0719835200740325e-07, "loss": 0.0085, "step": 111390 }, { "epoch": 1.8813118519269092, "grad_norm": 0.2640468180179596, "learning_rate": 1.0689503044900352e-07, "loss": 0.0075, "step": 111400 }, { "epoch": 1.881480730908231, "grad_norm": 0.36824214458465576, "learning_rate": 1.0659213399157265e-07, "loss": 0.0059, "step": 111410 }, { "epoch": 1.881649609889553, "grad_norm": 0.12040278315544128, "learning_rate": 1.0628966266142792e-07, "loss": 0.0064, "step": 111420 }, { "epoch": 1.881818488870875, "grad_norm": 0.4685399830341339, "learning_rate": 1.0598761648484501e-07, "loss": 0.0068, "step": 111430 }, { "epoch": 1.8819873678521972, "grad_norm": 0.38721561431884766, "learning_rate": 1.0568599548806568e-07, "loss": 0.0086, "step": 111440 }, { "epoch": 1.8821562468335191, "grad_norm": 0.1287662386894226, "learning_rate": 1.0538479969729399e-07, "loss": 0.0083, "step": 111450 }, { "epoch": 1.882325125814841, "grad_norm": 0.22291672229766846, "learning_rate": 1.0508402913869786e-07, "loss": 0.0073, "step": 111460 }, { "epoch": 1.882494004796163, "grad_norm": 0.43426814675331116, "learning_rate": 1.0478368383840643e-07, "loss": 0.0102, "step": 111470 }, { "epoch": 1.882662883777485, "grad_norm": 0.36434829235076904, "learning_rate": 1.0448376382251379e-07, "loss": 0.0058, "step": 111480 }, { "epoch": 1.8828317627588071, "grad_norm": 0.30929332971572876, "learning_rate": 1.0418426911707524e-07, "loss": 0.0055, "step": 111490 }, { "epoch": 1.883000641740129, "grad_norm": 0.20046286284923553, "learning_rate": 1.0388519974811162e-07, "loss": 0.0043, "step": 111500 }, { "epoch": 1.883169520721451, "grad_norm": 0.2538016140460968, "learning_rate": 1.0358655574160437e-07, "loss": 0.0057, "step": 111510 }, { "epoch": 1.8833383997027728, "grad_norm": 0.23530828952789307, "learning_rate": 1.0328833712349884e-07, "loss": 0.0056, "step": 111520 }, { "epoch": 1.883507278684095, "grad_norm": 0.19255882501602173, "learning_rate": 1.0299054391970375e-07, "loss": 0.0051, "step": 111530 }, { "epoch": 1.883676157665417, "grad_norm": 0.3810306489467621, "learning_rate": 1.026931761560912e-07, "loss": 0.0047, "step": 111540 }, { "epoch": 1.883845036646739, "grad_norm": 0.32096540927886963, "learning_rate": 1.0239623385849495e-07, "loss": 0.0103, "step": 111550 }, { "epoch": 1.8840139156280609, "grad_norm": 0.18546487390995026, "learning_rate": 1.0209971705271327e-07, "loss": 0.006, "step": 111560 }, { "epoch": 1.8841827946093828, "grad_norm": 0.3527408838272095, "learning_rate": 1.0180362576450608e-07, "loss": 0.0078, "step": 111570 }, { "epoch": 1.8843516735907049, "grad_norm": 0.30118244886398315, "learning_rate": 1.0150796001959728e-07, "loss": 0.0062, "step": 111580 }, { "epoch": 1.884520552572027, "grad_norm": 0.2412814497947693, "learning_rate": 1.0121271984367409e-07, "loss": 0.0088, "step": 111590 }, { "epoch": 1.884689431553349, "grad_norm": 0.29011306166648865, "learning_rate": 1.0091790526238543e-07, "loss": 0.0057, "step": 111600 }, { "epoch": 1.8848583105346708, "grad_norm": 0.30237698554992676, "learning_rate": 1.0062351630134526e-07, "loss": 0.0081, "step": 111610 }, { "epoch": 1.8850271895159927, "grad_norm": 0.18958385288715363, "learning_rate": 1.0032955298612868e-07, "loss": 0.0056, "step": 111620 }, { "epoch": 1.8851960684973148, "grad_norm": 0.19925421476364136, "learning_rate": 1.0003601534227414e-07, "loss": 0.0078, "step": 111630 }, { "epoch": 1.885364947478637, "grad_norm": 0.20247933268547058, "learning_rate": 9.974290339528403e-08, "loss": 0.0076, "step": 111640 }, { "epoch": 1.8855338264599588, "grad_norm": 0.23621602356433868, "learning_rate": 9.945021717062298e-08, "loss": 0.0059, "step": 111650 }, { "epoch": 1.8857027054412807, "grad_norm": 0.14133179187774658, "learning_rate": 9.915795669371897e-08, "loss": 0.0052, "step": 111660 }, { "epoch": 1.8858715844226026, "grad_norm": 0.16351750493049622, "learning_rate": 9.886612198996282e-08, "loss": 0.008, "step": 111670 }, { "epoch": 1.8860404634039247, "grad_norm": 0.0077062686905264854, "learning_rate": 9.857471308470812e-08, "loss": 0.0068, "step": 111680 }, { "epoch": 1.8862093423852468, "grad_norm": 0.10897047817707062, "learning_rate": 9.828373000327185e-08, "loss": 0.0067, "step": 111690 }, { "epoch": 1.8863782213665687, "grad_norm": 0.14178024232387543, "learning_rate": 9.799317277093434e-08, "loss": 0.0064, "step": 111700 }, { "epoch": 1.8865471003478906, "grad_norm": 0.367006778717041, "learning_rate": 9.770304141293818e-08, "loss": 0.0074, "step": 111710 }, { "epoch": 1.8867159793292125, "grad_norm": 0.09897676855325699, "learning_rate": 9.741333595448932e-08, "loss": 0.0074, "step": 111720 }, { "epoch": 1.8868848583105347, "grad_norm": 0.3017559349536896, "learning_rate": 9.71240564207565e-08, "loss": 0.0086, "step": 111730 }, { "epoch": 1.8870537372918568, "grad_norm": 0.17264775931835175, "learning_rate": 9.683520283687243e-08, "loss": 0.0063, "step": 111740 }, { "epoch": 1.8872226162731787, "grad_norm": 0.4184567928314209, "learning_rate": 9.654677522793088e-08, "loss": 0.0093, "step": 111750 }, { "epoch": 1.8873914952545006, "grad_norm": 0.3460359573364258, "learning_rate": 9.625877361899017e-08, "loss": 0.0062, "step": 111760 }, { "epoch": 1.8875603742358225, "grad_norm": 0.15205708146095276, "learning_rate": 9.597119803507083e-08, "loss": 0.0082, "step": 111770 }, { "epoch": 1.8877292532171446, "grad_norm": 0.17379310727119446, "learning_rate": 9.568404850115787e-08, "loss": 0.007, "step": 111780 }, { "epoch": 1.8878981321984667, "grad_norm": 0.1762971729040146, "learning_rate": 9.53973250421969e-08, "loss": 0.0039, "step": 111790 }, { "epoch": 1.8880670111797886, "grad_norm": 0.3500659465789795, "learning_rate": 9.511102768309855e-08, "loss": 0.0051, "step": 111800 }, { "epoch": 1.8882358901611105, "grad_norm": 0.16951265931129456, "learning_rate": 9.482515644873513e-08, "loss": 0.0083, "step": 111810 }, { "epoch": 1.8884047691424324, "grad_norm": 0.8501622080802917, "learning_rate": 9.453971136394235e-08, "loss": 0.0084, "step": 111820 }, { "epoch": 1.8885736481237545, "grad_norm": 0.23805426061153412, "learning_rate": 9.425469245351925e-08, "loss": 0.0078, "step": 111830 }, { "epoch": 1.8887425271050766, "grad_norm": 0.18320490419864655, "learning_rate": 9.397009974222771e-08, "loss": 0.0048, "step": 111840 }, { "epoch": 1.8889114060863985, "grad_norm": 0.46061810851097107, "learning_rate": 9.368593325479236e-08, "loss": 0.0061, "step": 111850 }, { "epoch": 1.8890802850677204, "grad_norm": 0.2825698256492615, "learning_rate": 9.34021930159007e-08, "loss": 0.0052, "step": 111860 }, { "epoch": 1.8892491640490423, "grad_norm": 0.26900744438171387, "learning_rate": 9.311887905020356e-08, "loss": 0.0052, "step": 111870 }, { "epoch": 1.8894180430303644, "grad_norm": 0.09973331540822983, "learning_rate": 9.283599138231459e-08, "loss": 0.0036, "step": 111880 }, { "epoch": 1.8895869220116865, "grad_norm": 0.3161943852901459, "learning_rate": 9.255353003681022e-08, "loss": 0.0083, "step": 111890 }, { "epoch": 1.8897558009930084, "grad_norm": 0.47450536489486694, "learning_rate": 9.227149503823029e-08, "loss": 0.0073, "step": 111900 }, { "epoch": 1.8899246799743303, "grad_norm": 0.08508652448654175, "learning_rate": 9.198988641107687e-08, "loss": 0.0056, "step": 111910 }, { "epoch": 1.8900935589556522, "grad_norm": 0.4132693409919739, "learning_rate": 9.170870417981593e-08, "loss": 0.0071, "step": 111920 }, { "epoch": 1.8902624379369743, "grad_norm": 0.30147671699523926, "learning_rate": 9.142794836887515e-08, "loss": 0.0067, "step": 111930 }, { "epoch": 1.8904313169182965, "grad_norm": 0.22735409438610077, "learning_rate": 9.114761900264668e-08, "loss": 0.0076, "step": 111940 }, { "epoch": 1.8906001958996184, "grad_norm": 0.21901552379131317, "learning_rate": 9.086771610548439e-08, "loss": 0.0105, "step": 111950 }, { "epoch": 1.8907690748809403, "grad_norm": 0.22172820568084717, "learning_rate": 9.058823970170549e-08, "loss": 0.0059, "step": 111960 }, { "epoch": 1.8909379538622622, "grad_norm": 0.26675331592559814, "learning_rate": 9.030918981559056e-08, "loss": 0.007, "step": 111970 }, { "epoch": 1.8911068328435843, "grad_norm": 0.5352979898452759, "learning_rate": 9.003056647138298e-08, "loss": 0.0091, "step": 111980 }, { "epoch": 1.8912757118249064, "grad_norm": 0.3039394021034241, "learning_rate": 8.975236969328893e-08, "loss": 0.0045, "step": 111990 }, { "epoch": 1.8914445908062283, "grad_norm": 0.284658819437027, "learning_rate": 8.947459950547632e-08, "loss": 0.0065, "step": 112000 }, { "epoch": 1.8916134697875502, "grad_norm": 0.23851601779460907, "learning_rate": 8.91972559320775e-08, "loss": 0.0046, "step": 112010 }, { "epoch": 1.891782348768872, "grad_norm": 0.14899775385856628, "learning_rate": 8.892033899718821e-08, "loss": 0.0056, "step": 112020 }, { "epoch": 1.8919512277501942, "grad_norm": 0.5310793519020081, "learning_rate": 8.864384872486642e-08, "loss": 0.0053, "step": 112030 }, { "epoch": 1.8921201067315163, "grad_norm": 0.28436610102653503, "learning_rate": 8.836778513913181e-08, "loss": 0.008, "step": 112040 }, { "epoch": 1.8922889857128382, "grad_norm": 0.189908966422081, "learning_rate": 8.809214826396906e-08, "loss": 0.0061, "step": 112050 }, { "epoch": 1.89245786469416, "grad_norm": 0.16608572006225586, "learning_rate": 8.781693812332404e-08, "loss": 0.0092, "step": 112060 }, { "epoch": 1.892626743675482, "grad_norm": 0.23017336428165436, "learning_rate": 8.754215474110705e-08, "loss": 0.0055, "step": 112070 }, { "epoch": 1.8927956226568041, "grad_norm": 0.1988859325647354, "learning_rate": 8.726779814119013e-08, "loss": 0.0103, "step": 112080 }, { "epoch": 1.8929645016381262, "grad_norm": 0.2563791275024414, "learning_rate": 8.699386834740863e-08, "loss": 0.0061, "step": 112090 }, { "epoch": 1.8931333806194481, "grad_norm": 0.4502618908882141, "learning_rate": 8.672036538356132e-08, "loss": 0.0083, "step": 112100 }, { "epoch": 1.89330225960077, "grad_norm": 0.10464760661125183, "learning_rate": 8.644728927340972e-08, "loss": 0.008, "step": 112110 }, { "epoch": 1.893471138582092, "grad_norm": 0.06444704532623291, "learning_rate": 8.617464004067711e-08, "loss": 0.0078, "step": 112120 }, { "epoch": 1.893640017563414, "grad_norm": 0.2375556230545044, "learning_rate": 8.590241770905116e-08, "loss": 0.0071, "step": 112130 }, { "epoch": 1.8938088965447362, "grad_norm": 0.27722445130348206, "learning_rate": 8.563062230218189e-08, "loss": 0.0067, "step": 112140 }, { "epoch": 1.893977775526058, "grad_norm": 0.15865513682365417, "learning_rate": 8.53592538436826e-08, "loss": 0.0041, "step": 112150 }, { "epoch": 1.89414665450738, "grad_norm": 0.07251240313053131, "learning_rate": 8.508831235712777e-08, "loss": 0.0031, "step": 112160 }, { "epoch": 1.8943155334887019, "grad_norm": 0.37626445293426514, "learning_rate": 8.481779786605693e-08, "loss": 0.008, "step": 112170 }, { "epoch": 1.894484412470024, "grad_norm": 0.1392863541841507, "learning_rate": 8.454771039397237e-08, "loss": 0.0058, "step": 112180 }, { "epoch": 1.894653291451346, "grad_norm": 0.11474702507257462, "learning_rate": 8.427804996433752e-08, "loss": 0.0038, "step": 112190 }, { "epoch": 1.894822170432668, "grad_norm": 0.5449046492576599, "learning_rate": 8.400881660058036e-08, "loss": 0.0088, "step": 112200 }, { "epoch": 1.8949910494139899, "grad_norm": 0.28000757098197937, "learning_rate": 8.374001032609158e-08, "loss": 0.0061, "step": 112210 }, { "epoch": 1.8951599283953118, "grad_norm": 0.0979316383600235, "learning_rate": 8.347163116422363e-08, "loss": 0.0054, "step": 112220 }, { "epoch": 1.895328807376634, "grad_norm": 0.40678396821022034, "learning_rate": 8.320367913829341e-08, "loss": 0.0051, "step": 112230 }, { "epoch": 1.895497686357956, "grad_norm": 0.22896072268486023, "learning_rate": 8.293615427157953e-08, "loss": 0.0061, "step": 112240 }, { "epoch": 1.895666565339278, "grad_norm": 0.3055920898914337, "learning_rate": 8.266905658732394e-08, "loss": 0.0062, "step": 112250 }, { "epoch": 1.8958354443205998, "grad_norm": 0.4362565875053406, "learning_rate": 8.240238610873142e-08, "loss": 0.0106, "step": 112260 }, { "epoch": 1.8960043233019217, "grad_norm": 0.28370147943496704, "learning_rate": 8.213614285896898e-08, "loss": 0.0082, "step": 112270 }, { "epoch": 1.8961732022832438, "grad_norm": 0.3227382004261017, "learning_rate": 8.187032686116869e-08, "loss": 0.0067, "step": 112280 }, { "epoch": 1.896342081264566, "grad_norm": 0.2818240523338318, "learning_rate": 8.160493813842318e-08, "loss": 0.0058, "step": 112290 }, { "epoch": 1.8965109602458878, "grad_norm": 0.04695993289351463, "learning_rate": 8.13399767137879e-08, "loss": 0.0068, "step": 112300 }, { "epoch": 1.8966798392272097, "grad_norm": 0.20829987525939941, "learning_rate": 8.107544261028333e-08, "loss": 0.0053, "step": 112310 }, { "epoch": 1.8968487182085316, "grad_norm": 0.508070170879364, "learning_rate": 8.081133585089107e-08, "loss": 0.0085, "step": 112320 }, { "epoch": 1.8970175971898537, "grad_norm": 0.27446725964546204, "learning_rate": 8.05476564585561e-08, "loss": 0.0075, "step": 112330 }, { "epoch": 1.8971864761711759, "grad_norm": 0.4204825758934021, "learning_rate": 8.028440445618623e-08, "loss": 0.005, "step": 112340 }, { "epoch": 1.8973553551524978, "grad_norm": 0.1725979596376419, "learning_rate": 8.002157986665204e-08, "loss": 0.0076, "step": 112350 }, { "epoch": 1.8975242341338197, "grad_norm": 0.21379375457763672, "learning_rate": 7.975918271278748e-08, "loss": 0.0071, "step": 112360 }, { "epoch": 1.8976931131151415, "grad_norm": 0.12634454667568207, "learning_rate": 7.949721301738877e-08, "loss": 0.0072, "step": 112370 }, { "epoch": 1.8978619920964637, "grad_norm": 0.1270836591720581, "learning_rate": 7.923567080321493e-08, "loss": 0.0082, "step": 112380 }, { "epoch": 1.8980308710777858, "grad_norm": 0.0640789270401001, "learning_rate": 7.897455609298832e-08, "loss": 0.006, "step": 112390 }, { "epoch": 1.8981997500591077, "grad_norm": 0.15499451756477356, "learning_rate": 7.871386890939414e-08, "loss": 0.0036, "step": 112400 }, { "epoch": 1.8983686290404296, "grad_norm": 0.4829176366329193, "learning_rate": 7.845360927507928e-08, "loss": 0.0064, "step": 112410 }, { "epoch": 1.8985375080217515, "grad_norm": 0.23498274385929108, "learning_rate": 7.819377721265565e-08, "loss": 0.011, "step": 112420 }, { "epoch": 1.8987063870030736, "grad_norm": 0.16762973368167877, "learning_rate": 7.793437274469628e-08, "loss": 0.0097, "step": 112430 }, { "epoch": 1.8988752659843957, "grad_norm": 0.2214212268590927, "learning_rate": 7.767539589373819e-08, "loss": 0.0051, "step": 112440 }, { "epoch": 1.8990441449657176, "grad_norm": 0.14607663452625275, "learning_rate": 7.741684668227945e-08, "loss": 0.0095, "step": 112450 }, { "epoch": 1.8992130239470395, "grad_norm": 0.23475848138332367, "learning_rate": 7.715872513278322e-08, "loss": 0.008, "step": 112460 }, { "epoch": 1.8993819029283614, "grad_norm": 0.236890971660614, "learning_rate": 7.690103126767434e-08, "loss": 0.009, "step": 112470 }, { "epoch": 1.8995507819096835, "grad_norm": 0.3362078368663788, "learning_rate": 7.664376510934047e-08, "loss": 0.0081, "step": 112480 }, { "epoch": 1.8997196608910056, "grad_norm": 0.16605842113494873, "learning_rate": 7.638692668013203e-08, "loss": 0.007, "step": 112490 }, { "epoch": 1.8998885398723275, "grad_norm": 0.11063337326049805, "learning_rate": 7.613051600236288e-08, "loss": 0.005, "step": 112500 }, { "epoch": 1.9000574188536494, "grad_norm": 0.23610511422157288, "learning_rate": 7.58745330983085e-08, "loss": 0.0099, "step": 112510 }, { "epoch": 1.9002262978349713, "grad_norm": 0.16828542947769165, "learning_rate": 7.561897799021e-08, "loss": 0.0033, "step": 112520 }, { "epoch": 1.9003951768162934, "grad_norm": 0.14909936487674713, "learning_rate": 7.536385070026686e-08, "loss": 0.0045, "step": 112530 }, { "epoch": 1.9005640557976156, "grad_norm": 0.2404428869485855, "learning_rate": 7.510915125064578e-08, "loss": 0.0079, "step": 112540 }, { "epoch": 1.9007329347789375, "grad_norm": 0.1867367923259735, "learning_rate": 7.485487966347348e-08, "loss": 0.0079, "step": 112550 }, { "epoch": 1.9009018137602594, "grad_norm": 0.1499646157026291, "learning_rate": 7.460103596084122e-08, "loss": 0.0065, "step": 112560 }, { "epoch": 1.9010706927415812, "grad_norm": 0.31567588448524475, "learning_rate": 7.434762016480135e-08, "loss": 0.0078, "step": 112570 }, { "epoch": 1.9012395717229034, "grad_norm": 0.35489481687545776, "learning_rate": 7.409463229737068e-08, "loss": 0.0075, "step": 112580 }, { "epoch": 1.9014084507042255, "grad_norm": 0.10567577928304672, "learning_rate": 7.384207238052832e-08, "loss": 0.0046, "step": 112590 }, { "epoch": 1.9015773296855474, "grad_norm": 0.207607701420784, "learning_rate": 7.358994043621615e-08, "loss": 0.0048, "step": 112600 }, { "epoch": 1.9017462086668693, "grad_norm": 0.3095024526119232, "learning_rate": 7.333823648633776e-08, "loss": 0.0045, "step": 112610 }, { "epoch": 1.9019150876481912, "grad_norm": 0.3436466455459595, "learning_rate": 7.30869605527612e-08, "loss": 0.0076, "step": 112620 }, { "epoch": 1.9020839666295133, "grad_norm": 0.47561630606651306, "learning_rate": 7.283611265731682e-08, "loss": 0.0064, "step": 112630 }, { "epoch": 1.9022528456108354, "grad_norm": 0.23013867437839508, "learning_rate": 7.258569282179828e-08, "loss": 0.0071, "step": 112640 }, { "epoch": 1.9024217245921573, "grad_norm": 0.28433141112327576, "learning_rate": 7.233570106795984e-08, "loss": 0.0086, "step": 112650 }, { "epoch": 1.9025906035734792, "grad_norm": 0.19918757677078247, "learning_rate": 7.208613741752135e-08, "loss": 0.0045, "step": 112660 }, { "epoch": 1.902759482554801, "grad_norm": 0.2279874086380005, "learning_rate": 7.18370018921638e-08, "loss": 0.0043, "step": 112670 }, { "epoch": 1.9029283615361232, "grad_norm": 0.14361897110939026, "learning_rate": 7.158829451353155e-08, "loss": 0.0049, "step": 112680 }, { "epoch": 1.9030972405174453, "grad_norm": 0.25506675243377686, "learning_rate": 7.13400153032323e-08, "loss": 0.0054, "step": 112690 }, { "epoch": 1.9032661194987672, "grad_norm": 0.12299153208732605, "learning_rate": 7.10921642828355e-08, "loss": 0.0081, "step": 112700 }, { "epoch": 1.9034349984800891, "grad_norm": 0.3173937201499939, "learning_rate": 7.084474147387332e-08, "loss": 0.0067, "step": 112710 }, { "epoch": 1.903603877461411, "grad_norm": 0.16874058544635773, "learning_rate": 7.059774689784194e-08, "loss": 0.0079, "step": 112720 }, { "epoch": 1.9037727564427331, "grad_norm": 0.11557412892580032, "learning_rate": 7.035118057619972e-08, "loss": 0.0074, "step": 112730 }, { "epoch": 1.9039416354240553, "grad_norm": 0.493621826171875, "learning_rate": 7.010504253036732e-08, "loss": 0.0054, "step": 112740 }, { "epoch": 1.9041105144053772, "grad_norm": 0.31054484844207764, "learning_rate": 6.985933278172874e-08, "loss": 0.0072, "step": 112750 }, { "epoch": 1.904279393386699, "grad_norm": 0.17718550562858582, "learning_rate": 6.961405135163025e-08, "loss": 0.0085, "step": 112760 }, { "epoch": 1.904448272368021, "grad_norm": 0.3090715706348419, "learning_rate": 6.9369198261382e-08, "loss": 0.0074, "step": 112770 }, { "epoch": 1.904617151349343, "grad_norm": 0.08179079741239548, "learning_rate": 6.912477353225588e-08, "loss": 0.0052, "step": 112780 }, { "epoch": 1.9047860303306652, "grad_norm": 0.2761189043521881, "learning_rate": 6.888077718548713e-08, "loss": 0.0077, "step": 112790 }, { "epoch": 1.904954909311987, "grad_norm": 0.16669800877571106, "learning_rate": 6.863720924227269e-08, "loss": 0.0041, "step": 112800 }, { "epoch": 1.905123788293309, "grad_norm": 0.21636557579040527, "learning_rate": 6.83940697237745e-08, "loss": 0.0072, "step": 112810 }, { "epoch": 1.9052926672746309, "grad_norm": 0.4599418342113495, "learning_rate": 6.81513586511151e-08, "loss": 0.0068, "step": 112820 }, { "epoch": 1.905461546255953, "grad_norm": 0.21947090327739716, "learning_rate": 6.790907604538044e-08, "loss": 0.0043, "step": 112830 }, { "epoch": 1.905630425237275, "grad_norm": 0.21605359017848969, "learning_rate": 6.766722192762032e-08, "loss": 0.0093, "step": 112840 }, { "epoch": 1.905799304218597, "grad_norm": 0.150651216506958, "learning_rate": 6.742579631884571e-08, "loss": 0.0055, "step": 112850 }, { "epoch": 1.905968183199919, "grad_norm": 0.3013603091239929, "learning_rate": 6.718479924003152e-08, "loss": 0.0072, "step": 112860 }, { "epoch": 1.9061370621812408, "grad_norm": 0.4993917942047119, "learning_rate": 6.69442307121143e-08, "loss": 0.0054, "step": 112870 }, { "epoch": 1.906305941162563, "grad_norm": 0.2650391459465027, "learning_rate": 6.67040907559946e-08, "loss": 0.0038, "step": 112880 }, { "epoch": 1.906474820143885, "grad_norm": 0.18514135479927063, "learning_rate": 6.646437939253514e-08, "loss": 0.0042, "step": 112890 }, { "epoch": 1.906643699125207, "grad_norm": 0.1855831742286682, "learning_rate": 6.622509664256149e-08, "loss": 0.0067, "step": 112900 }, { "epoch": 1.9068125781065288, "grad_norm": 0.26424986124038696, "learning_rate": 6.598624252686092e-08, "loss": 0.0067, "step": 112910 }, { "epoch": 1.9069814570878507, "grad_norm": 0.23165062069892883, "learning_rate": 6.574781706618627e-08, "loss": 0.0043, "step": 112920 }, { "epoch": 1.9071503360691728, "grad_norm": 0.35174959897994995, "learning_rate": 6.550982028125041e-08, "loss": 0.0079, "step": 112930 }, { "epoch": 1.907319215050495, "grad_norm": 0.24559633433818817, "learning_rate": 6.527225219273015e-08, "loss": 0.0068, "step": 112940 }, { "epoch": 1.9074880940318168, "grad_norm": 0.3094550371170044, "learning_rate": 6.503511282126451e-08, "loss": 0.0061, "step": 112950 }, { "epoch": 1.9076569730131387, "grad_norm": 0.391423761844635, "learning_rate": 6.479840218745592e-08, "loss": 0.0055, "step": 112960 }, { "epoch": 1.9078258519944606, "grad_norm": 0.042304035276174545, "learning_rate": 6.456212031186904e-08, "loss": 0.0038, "step": 112970 }, { "epoch": 1.9079947309757828, "grad_norm": 0.10345958918333054, "learning_rate": 6.432626721503132e-08, "loss": 0.0072, "step": 112980 }, { "epoch": 1.9081636099571049, "grad_norm": 0.34660184383392334, "learning_rate": 6.409084291743306e-08, "loss": 0.0067, "step": 112990 }, { "epoch": 1.9083324889384268, "grad_norm": 0.40176308155059814, "learning_rate": 6.385584743952788e-08, "loss": 0.0096, "step": 113000 }, { "epoch": 1.9085013679197487, "grad_norm": 0.19872787594795227, "learning_rate": 6.362128080173169e-08, "loss": 0.0095, "step": 113010 }, { "epoch": 1.9086702469010706, "grad_norm": 0.09286875277757645, "learning_rate": 6.338714302442206e-08, "loss": 0.0048, "step": 113020 }, { "epoch": 1.9088391258823927, "grad_norm": 0.4074181020259857, "learning_rate": 6.315343412794051e-08, "loss": 0.0063, "step": 113030 }, { "epoch": 1.9090080048637148, "grad_norm": 0.10561540722846985, "learning_rate": 6.292015413259189e-08, "loss": 0.0081, "step": 113040 }, { "epoch": 1.9091768838450367, "grad_norm": 0.11102955043315887, "learning_rate": 6.268730305864279e-08, "loss": 0.008, "step": 113050 }, { "epoch": 1.9093457628263586, "grad_norm": 0.24081318080425262, "learning_rate": 6.245488092632202e-08, "loss": 0.007, "step": 113060 }, { "epoch": 1.9095146418076805, "grad_norm": 0.24407795071601868, "learning_rate": 6.222288775582285e-08, "loss": 0.0107, "step": 113070 }, { "epoch": 1.9096835207890026, "grad_norm": 0.09800327569246292, "learning_rate": 6.199132356729976e-08, "loss": 0.0101, "step": 113080 }, { "epoch": 1.9098523997703247, "grad_norm": 0.3605806529521942, "learning_rate": 6.17601883808705e-08, "loss": 0.0042, "step": 113090 }, { "epoch": 1.9100212787516466, "grad_norm": 0.18161925673484802, "learning_rate": 6.152948221661515e-08, "loss": 0.0054, "step": 113100 }, { "epoch": 1.9101901577329685, "grad_norm": 0.27837201952934265, "learning_rate": 6.129920509457765e-08, "loss": 0.0052, "step": 113110 }, { "epoch": 1.9103590367142904, "grad_norm": 0.17362716794013977, "learning_rate": 6.106935703476313e-08, "loss": 0.0073, "step": 113120 }, { "epoch": 1.9105279156956125, "grad_norm": 0.23018193244934082, "learning_rate": 6.083993805714117e-08, "loss": 0.0074, "step": 113130 }, { "epoch": 1.9106967946769347, "grad_norm": 1.0880500078201294, "learning_rate": 6.061094818164248e-08, "loss": 0.0107, "step": 113140 }, { "epoch": 1.9108656736582565, "grad_norm": 0.42438268661499023, "learning_rate": 6.038238742816116e-08, "loss": 0.0079, "step": 113150 }, { "epoch": 1.9110345526395784, "grad_norm": 0.09566010534763336, "learning_rate": 6.015425581655355e-08, "loss": 0.0056, "step": 113160 }, { "epoch": 1.9112034316209003, "grad_norm": 0.1254514455795288, "learning_rate": 5.992655336664043e-08, "loss": 0.0053, "step": 113170 }, { "epoch": 1.9113723106022225, "grad_norm": 0.13887783885002136, "learning_rate": 5.969928009820325e-08, "loss": 0.0046, "step": 113180 }, { "epoch": 1.9115411895835446, "grad_norm": 0.48814088106155396, "learning_rate": 5.947243603098729e-08, "loss": 0.0075, "step": 113190 }, { "epoch": 1.9117100685648665, "grad_norm": 0.266912579536438, "learning_rate": 5.9246021184699574e-08, "loss": 0.0099, "step": 113200 }, { "epoch": 1.9118789475461884, "grad_norm": 0.09253688901662827, "learning_rate": 5.9020035579010484e-08, "loss": 0.0107, "step": 113210 }, { "epoch": 1.9120478265275103, "grad_norm": 0.07729736715555191, "learning_rate": 5.87944792335543e-08, "loss": 0.0046, "step": 113220 }, { "epoch": 1.9122167055088324, "grad_norm": 0.11359404772520065, "learning_rate": 5.8569352167925367e-08, "loss": 0.005, "step": 113230 }, { "epoch": 1.9123855844901545, "grad_norm": 0.1786210536956787, "learning_rate": 5.834465440168302e-08, "loss": 0.0072, "step": 113240 }, { "epoch": 1.9125544634714764, "grad_norm": 0.21074114739894867, "learning_rate": 5.812038595434777e-08, "loss": 0.0081, "step": 113250 }, { "epoch": 1.9127233424527983, "grad_norm": 0.3636844754219055, "learning_rate": 5.789654684540402e-08, "loss": 0.0078, "step": 113260 }, { "epoch": 1.9128922214341202, "grad_norm": 0.177603617310524, "learning_rate": 5.767313709429845e-08, "loss": 0.0078, "step": 113270 }, { "epoch": 1.9130611004154423, "grad_norm": 0.26209694147109985, "learning_rate": 5.7450156720440517e-08, "loss": 0.0054, "step": 113280 }, { "epoch": 1.9132299793967644, "grad_norm": 0.04771556705236435, "learning_rate": 5.722760574320141e-08, "loss": 0.0076, "step": 113290 }, { "epoch": 1.9133988583780863, "grad_norm": 0.1597602665424347, "learning_rate": 5.700548418191676e-08, "loss": 0.0057, "step": 113300 }, { "epoch": 1.9135677373594082, "grad_norm": 0.10900068283081055, "learning_rate": 5.67837920558828e-08, "loss": 0.0112, "step": 113310 }, { "epoch": 1.9137366163407301, "grad_norm": 0.1422109156847, "learning_rate": 5.65625293843608e-08, "loss": 0.0065, "step": 113320 }, { "epoch": 1.9139054953220522, "grad_norm": 0.21132928133010864, "learning_rate": 5.634169618657259e-08, "loss": 0.0073, "step": 113330 }, { "epoch": 1.9140743743033743, "grad_norm": 0.1502896547317505, "learning_rate": 5.6121292481704504e-08, "loss": 0.0084, "step": 113340 }, { "epoch": 1.9142432532846962, "grad_norm": 0.1908341348171234, "learning_rate": 5.590131828890344e-08, "loss": 0.0058, "step": 113350 }, { "epoch": 1.9144121322660181, "grad_norm": 0.3485729694366455, "learning_rate": 5.568177362728133e-08, "loss": 0.0109, "step": 113360 }, { "epoch": 1.91458101124734, "grad_norm": 0.13754597306251526, "learning_rate": 5.546265851591126e-08, "loss": 0.0118, "step": 113370 }, { "epoch": 1.9147498902286622, "grad_norm": 0.1437094807624817, "learning_rate": 5.524397297382911e-08, "loss": 0.0074, "step": 113380 }, { "epoch": 1.9149187692099843, "grad_norm": 0.18688571453094482, "learning_rate": 5.502571702003468e-08, "loss": 0.0058, "step": 113390 }, { "epoch": 1.9150876481913062, "grad_norm": 0.24655814468860626, "learning_rate": 5.480789067348835e-08, "loss": 0.008, "step": 113400 }, { "epoch": 1.915256527172628, "grad_norm": 0.2149462252855301, "learning_rate": 5.4590493953115555e-08, "loss": 0.0049, "step": 113410 }, { "epoch": 1.91542540615395, "grad_norm": 0.0978979766368866, "learning_rate": 5.437352687780229e-08, "loss": 0.0068, "step": 113420 }, { "epoch": 1.915594285135272, "grad_norm": 0.14443986117839813, "learning_rate": 5.415698946639847e-08, "loss": 0.0041, "step": 113430 }, { "epoch": 1.9157631641165942, "grad_norm": 0.2253948450088501, "learning_rate": 5.394088173771572e-08, "loss": 0.006, "step": 113440 }, { "epoch": 1.915932043097916, "grad_norm": 0.3020341694355011, "learning_rate": 5.3725203710530135e-08, "loss": 0.0041, "step": 113450 }, { "epoch": 1.916100922079238, "grad_norm": 0.19493801891803741, "learning_rate": 5.350995540357839e-08, "loss": 0.0068, "step": 113460 }, { "epoch": 1.9162698010605599, "grad_norm": 0.1709752231836319, "learning_rate": 5.329513683556053e-08, "loss": 0.0055, "step": 113470 }, { "epoch": 1.916438680041882, "grad_norm": 0.4795478582382202, "learning_rate": 5.3080748025140517e-08, "loss": 0.0069, "step": 113480 }, { "epoch": 1.9166075590232041, "grad_norm": 0.21489886939525604, "learning_rate": 5.2866788990942884e-08, "loss": 0.0084, "step": 113490 }, { "epoch": 1.916776438004526, "grad_norm": 0.2372363656759262, "learning_rate": 5.265325975155611e-08, "loss": 0.0063, "step": 113500 }, { "epoch": 1.916945316985848, "grad_norm": 0.21445785462856293, "learning_rate": 5.244016032553201e-08, "loss": 0.0083, "step": 113510 }, { "epoch": 1.9171141959671698, "grad_norm": 0.14917883276939392, "learning_rate": 5.2227490731383e-08, "loss": 0.0058, "step": 113520 }, { "epoch": 1.917283074948492, "grad_norm": 0.13413071632385254, "learning_rate": 5.201525098758542e-08, "loss": 0.0048, "step": 113530 }, { "epoch": 1.917451953929814, "grad_norm": 0.2274184226989746, "learning_rate": 5.180344111257895e-08, "loss": 0.0073, "step": 113540 }, { "epoch": 1.917620832911136, "grad_norm": 0.266066312789917, "learning_rate": 5.1592061124764445e-08, "loss": 0.0078, "step": 113550 }, { "epoch": 1.9177897118924578, "grad_norm": 0.34627673029899597, "learning_rate": 5.138111104250609e-08, "loss": 0.0065, "step": 113560 }, { "epoch": 1.9179585908737797, "grad_norm": 0.33664876222610474, "learning_rate": 5.1170590884130896e-08, "loss": 0.0071, "step": 113570 }, { "epoch": 1.9181274698551019, "grad_norm": 0.21948042511940002, "learning_rate": 5.0960500667928123e-08, "loss": 0.0061, "step": 113580 }, { "epoch": 1.918296348836424, "grad_norm": 0.14905649423599243, "learning_rate": 5.0750840412150394e-08, "loss": 0.0084, "step": 113590 }, { "epoch": 1.9184652278177459, "grad_norm": 0.3335984945297241, "learning_rate": 5.0541610135011465e-08, "loss": 0.0071, "step": 113600 }, { "epoch": 1.9186341067990678, "grad_norm": 0.1161188930273056, "learning_rate": 5.033280985469013e-08, "loss": 0.0043, "step": 113610 }, { "epoch": 1.9188029857803897, "grad_norm": 0.21759603917598724, "learning_rate": 5.012443958932522e-08, "loss": 0.0077, "step": 113620 }, { "epoch": 1.9189718647617118, "grad_norm": 0.3323507010936737, "learning_rate": 4.991649935702003e-08, "loss": 0.0051, "step": 113630 }, { "epoch": 1.919140743743034, "grad_norm": 0.30876579880714417, "learning_rate": 4.970898917584011e-08, "loss": 0.0055, "step": 113640 }, { "epoch": 1.9193096227243558, "grad_norm": 0.19018100202083588, "learning_rate": 4.950190906381325e-08, "loss": 0.0057, "step": 113650 }, { "epoch": 1.9194785017056777, "grad_norm": 0.2148338407278061, "learning_rate": 4.929525903892951e-08, "loss": 0.0053, "step": 113660 }, { "epoch": 1.9196473806869996, "grad_norm": 0.31624138355255127, "learning_rate": 4.9089039119142866e-08, "loss": 0.0064, "step": 113670 }, { "epoch": 1.9198162596683217, "grad_norm": 0.2620059847831726, "learning_rate": 4.8883249322368985e-08, "loss": 0.009, "step": 113680 }, { "epoch": 1.9199851386496438, "grad_norm": 0.2516302168369293, "learning_rate": 4.867788966648579e-08, "loss": 0.0055, "step": 113690 }, { "epoch": 1.9201540176309657, "grad_norm": 0.23798313736915588, "learning_rate": 4.847296016933567e-08, "loss": 0.0086, "step": 113700 }, { "epoch": 1.9203228966122876, "grad_norm": 0.39467743039131165, "learning_rate": 4.826846084872161e-08, "loss": 0.0078, "step": 113710 }, { "epoch": 1.9204917755936095, "grad_norm": 0.28651028871536255, "learning_rate": 4.8064391722409395e-08, "loss": 0.0085, "step": 113720 }, { "epoch": 1.9206606545749316, "grad_norm": 0.19984211027622223, "learning_rate": 4.7860752808129294e-08, "loss": 0.0065, "step": 113730 }, { "epoch": 1.9208295335562537, "grad_norm": 0.25535231828689575, "learning_rate": 4.765754412357215e-08, "loss": 0.0042, "step": 113740 }, { "epoch": 1.9209984125375756, "grad_norm": 0.3101745545864105, "learning_rate": 4.745476568639274e-08, "loss": 0.0062, "step": 113750 }, { "epoch": 1.9211672915188975, "grad_norm": 0.43987390398979187, "learning_rate": 4.725241751420695e-08, "loss": 0.0083, "step": 113760 }, { "epoch": 1.9213361705002194, "grad_norm": 0.11976850777864456, "learning_rate": 4.7050499624595734e-08, "loss": 0.0102, "step": 113770 }, { "epoch": 1.9215050494815415, "grad_norm": 0.5413749814033508, "learning_rate": 4.684901203510062e-08, "loss": 0.0068, "step": 113780 }, { "epoch": 1.9216739284628637, "grad_norm": 0.24405375123023987, "learning_rate": 4.664795476322592e-08, "loss": 0.0078, "step": 113790 }, { "epoch": 1.9218428074441856, "grad_norm": 0.13114964962005615, "learning_rate": 4.644732782643935e-08, "loss": 0.0084, "step": 113800 }, { "epoch": 1.9220116864255075, "grad_norm": 0.33953896164894104, "learning_rate": 4.6247131242171394e-08, "loss": 0.0062, "step": 113810 }, { "epoch": 1.9221805654068294, "grad_norm": 0.34112074971199036, "learning_rate": 4.604736502781315e-08, "loss": 0.0048, "step": 113820 }, { "epoch": 1.9223494443881515, "grad_norm": 0.7026805281639099, "learning_rate": 4.584802920072184e-08, "loss": 0.0059, "step": 113830 }, { "epoch": 1.9225183233694736, "grad_norm": 0.04989639297127724, "learning_rate": 4.564912377821362e-08, "loss": 0.006, "step": 113840 }, { "epoch": 1.9226872023507955, "grad_norm": 0.4509774148464203, "learning_rate": 4.545064877756966e-08, "loss": 0.0042, "step": 113850 }, { "epoch": 1.9228560813321174, "grad_norm": 0.1275562047958374, "learning_rate": 4.5252604216032836e-08, "loss": 0.0079, "step": 113860 }, { "epoch": 1.9230249603134393, "grad_norm": 0.19411110877990723, "learning_rate": 4.505499011080883e-08, "loss": 0.0056, "step": 113870 }, { "epoch": 1.9231938392947614, "grad_norm": 0.13301149010658264, "learning_rate": 4.485780647906557e-08, "loss": 0.0065, "step": 113880 }, { "epoch": 1.9233627182760835, "grad_norm": 0.24082373082637787, "learning_rate": 4.4661053337934355e-08, "loss": 0.009, "step": 113890 }, { "epoch": 1.9235315972574054, "grad_norm": 0.2298441380262375, "learning_rate": 4.446473070450819e-08, "loss": 0.0061, "step": 113900 }, { "epoch": 1.9237004762387273, "grad_norm": 0.18109965324401855, "learning_rate": 4.426883859584341e-08, "loss": 0.0048, "step": 113910 }, { "epoch": 1.9238693552200492, "grad_norm": 0.10501250624656677, "learning_rate": 4.407337702895864e-08, "loss": 0.0067, "step": 113920 }, { "epoch": 1.9240382342013713, "grad_norm": 0.0990186333656311, "learning_rate": 4.3878346020834736e-08, "loss": 0.0048, "step": 113930 }, { "epoch": 1.9242071131826934, "grad_norm": 0.454896479845047, "learning_rate": 4.3683745588415925e-08, "loss": 0.0071, "step": 113940 }, { "epoch": 1.9243759921640153, "grad_norm": 0.16148735582828522, "learning_rate": 4.348957574860868e-08, "loss": 0.0043, "step": 113950 }, { "epoch": 1.9245448711453372, "grad_norm": 0.3387305438518524, "learning_rate": 4.329583651828173e-08, "loss": 0.0064, "step": 113960 }, { "epoch": 1.9247137501266591, "grad_norm": 0.22058482468128204, "learning_rate": 4.3102527914266615e-08, "loss": 0.0091, "step": 113970 }, { "epoch": 1.9248826291079812, "grad_norm": 0.24902568757534027, "learning_rate": 4.290964995335711e-08, "loss": 0.0052, "step": 113980 }, { "epoch": 1.9250515080893034, "grad_norm": 0.1697600930929184, "learning_rate": 4.271720265231094e-08, "loss": 0.0061, "step": 113990 }, { "epoch": 1.9252203870706253, "grad_norm": 0.1384151130914688, "learning_rate": 4.252518602784694e-08, "loss": 0.0064, "step": 114000 }, { "epoch": 1.9253892660519472, "grad_norm": 0.21489526331424713, "learning_rate": 4.233360009664733e-08, "loss": 0.0091, "step": 114010 }, { "epoch": 1.925558145033269, "grad_norm": 0.20175205171108246, "learning_rate": 4.214244487535601e-08, "loss": 0.0052, "step": 114020 }, { "epoch": 1.9257270240145912, "grad_norm": 0.2948250472545624, "learning_rate": 4.195172038058082e-08, "loss": 0.0046, "step": 114030 }, { "epoch": 1.9258959029959133, "grad_norm": 0.21327829360961914, "learning_rate": 4.176142662889127e-08, "loss": 0.0064, "step": 114040 }, { "epoch": 1.9260647819772352, "grad_norm": 0.09071559458971024, "learning_rate": 4.1571563636819136e-08, "loss": 0.0055, "step": 114050 }, { "epoch": 1.926233660958557, "grad_norm": 0.08440662920475006, "learning_rate": 4.138213142085956e-08, "loss": 0.0079, "step": 114060 }, { "epoch": 1.926402539939879, "grad_norm": 0.26229730248451233, "learning_rate": 4.119312999746994e-08, "loss": 0.0035, "step": 114070 }, { "epoch": 1.926571418921201, "grad_norm": 0.2733171582221985, "learning_rate": 4.100455938307046e-08, "loss": 0.0073, "step": 114080 }, { "epoch": 1.9267402979025232, "grad_norm": 0.29450589418411255, "learning_rate": 4.081641959404303e-08, "loss": 0.0125, "step": 114090 }, { "epoch": 1.926909176883845, "grad_norm": 0.25834420323371887, "learning_rate": 4.062871064673346e-08, "loss": 0.0045, "step": 114100 }, { "epoch": 1.927078055865167, "grad_norm": 0.24317383766174316, "learning_rate": 4.0441432557449254e-08, "loss": 0.0072, "step": 114110 }, { "epoch": 1.927246934846489, "grad_norm": 0.5760841965675354, "learning_rate": 4.025458534246018e-08, "loss": 0.0051, "step": 114120 }, { "epoch": 1.927415813827811, "grad_norm": 0.288675993680954, "learning_rate": 4.006816901799992e-08, "loss": 0.007, "step": 114130 }, { "epoch": 1.9275846928091331, "grad_norm": 0.14255958795547485, "learning_rate": 3.98821836002633e-08, "loss": 0.0044, "step": 114140 }, { "epoch": 1.927753571790455, "grad_norm": 0.238078311085701, "learning_rate": 3.9696629105408504e-08, "loss": 0.0075, "step": 114150 }, { "epoch": 1.927922450771777, "grad_norm": 0.3945510983467102, "learning_rate": 3.951150554955652e-08, "loss": 0.0078, "step": 114160 }, { "epoch": 1.9280913297530988, "grad_norm": 0.35665619373321533, "learning_rate": 3.9326812948788926e-08, "loss": 0.0083, "step": 114170 }, { "epoch": 1.928260208734421, "grad_norm": 0.20621682703495026, "learning_rate": 3.9142551319152897e-08, "loss": 0.0064, "step": 114180 }, { "epoch": 1.928429087715743, "grad_norm": 0.3431394696235657, "learning_rate": 3.8958720676655627e-08, "loss": 0.0071, "step": 114190 }, { "epoch": 1.928597966697065, "grad_norm": 0.20666630566120148, "learning_rate": 3.8775321037268776e-08, "loss": 0.0079, "step": 114200 }, { "epoch": 1.9287668456783869, "grad_norm": 0.28330227732658386, "learning_rate": 3.8592352416924604e-08, "loss": 0.006, "step": 114210 }, { "epoch": 1.9289357246597088, "grad_norm": 0.10510825365781784, "learning_rate": 3.840981483151984e-08, "loss": 0.0061, "step": 114220 }, { "epoch": 1.9291046036410309, "grad_norm": 0.29112181067466736, "learning_rate": 3.822770829691291e-08, "loss": 0.0037, "step": 114230 }, { "epoch": 1.929273482622353, "grad_norm": 0.19629456102848053, "learning_rate": 3.804603282892394e-08, "loss": 0.0104, "step": 114240 }, { "epoch": 1.9294423616036749, "grad_norm": 0.199052631855011, "learning_rate": 3.786478844333752e-08, "loss": 0.0125, "step": 114250 }, { "epoch": 1.9296112405849968, "grad_norm": 0.4424894452095032, "learning_rate": 3.768397515589883e-08, "loss": 0.0087, "step": 114260 }, { "epoch": 1.9297801195663187, "grad_norm": 0.37695565819740295, "learning_rate": 3.750359298231643e-08, "loss": 0.0069, "step": 114270 }, { "epoch": 1.9299489985476408, "grad_norm": 0.10627782344818115, "learning_rate": 3.732364193826277e-08, "loss": 0.0042, "step": 114280 }, { "epoch": 1.930117877528963, "grad_norm": 0.1464914083480835, "learning_rate": 3.714412203937035e-08, "loss": 0.0056, "step": 114290 }, { "epoch": 1.9302867565102848, "grad_norm": 0.19811560213565826, "learning_rate": 3.6965033301235576e-08, "loss": 0.0082, "step": 114300 }, { "epoch": 1.9304556354916067, "grad_norm": 0.23247280716896057, "learning_rate": 3.6786375739417676e-08, "loss": 0.0089, "step": 114310 }, { "epoch": 1.9306245144729286, "grad_norm": 0.10719402879476547, "learning_rate": 3.660814936943757e-08, "loss": 0.0043, "step": 114320 }, { "epoch": 1.9307933934542507, "grad_norm": 0.07848626375198364, "learning_rate": 3.643035420677954e-08, "loss": 0.0069, "step": 114330 }, { "epoch": 1.9309622724355728, "grad_norm": 0.13930745422840118, "learning_rate": 3.6252990266889e-08, "loss": 0.0042, "step": 114340 }, { "epoch": 1.9311311514168947, "grad_norm": 0.23392319679260254, "learning_rate": 3.60760575651764e-08, "loss": 0.0052, "step": 114350 }, { "epoch": 1.9313000303982166, "grad_norm": 0.19621163606643677, "learning_rate": 3.5899556117012236e-08, "loss": 0.0049, "step": 114360 }, { "epoch": 1.9314689093795385, "grad_norm": 0.1344621479511261, "learning_rate": 3.572348593773034e-08, "loss": 0.0103, "step": 114370 }, { "epoch": 1.9316377883608606, "grad_norm": 0.16804347932338715, "learning_rate": 3.554784704262793e-08, "loss": 0.0059, "step": 114380 }, { "epoch": 1.9318066673421825, "grad_norm": 0.24934545159339905, "learning_rate": 3.5372639446963897e-08, "loss": 0.0088, "step": 114390 }, { "epoch": 1.9319755463235047, "grad_norm": 0.24987584352493286, "learning_rate": 3.519786316595997e-08, "loss": 0.0068, "step": 114400 }, { "epoch": 1.9321444253048266, "grad_norm": 0.1924600750207901, "learning_rate": 3.5023518214799543e-08, "loss": 0.0088, "step": 114410 }, { "epoch": 1.9323133042861484, "grad_norm": 0.2501574456691742, "learning_rate": 3.484960460862996e-08, "loss": 0.0073, "step": 114420 }, { "epoch": 1.9324821832674706, "grad_norm": 0.16876205801963806, "learning_rate": 3.4676122362560235e-08, "loss": 0.0062, "step": 114430 }, { "epoch": 1.9326510622487925, "grad_norm": 0.19436340034008026, "learning_rate": 3.450307149166276e-08, "loss": 0.0041, "step": 114440 }, { "epoch": 1.9328199412301146, "grad_norm": 0.23099559545516968, "learning_rate": 3.4330452010970515e-08, "loss": 0.0048, "step": 114450 }, { "epoch": 1.9329888202114365, "grad_norm": 0.4883482754230499, "learning_rate": 3.415826393548039e-08, "loss": 0.0061, "step": 114460 }, { "epoch": 1.9331576991927584, "grad_norm": 0.24395941197872162, "learning_rate": 3.3986507280152624e-08, "loss": 0.0064, "step": 114470 }, { "epoch": 1.9333265781740805, "grad_norm": 0.11499463021755219, "learning_rate": 3.3815182059908636e-08, "loss": 0.0051, "step": 114480 }, { "epoch": 1.9334954571554024, "grad_norm": 0.16224169731140137, "learning_rate": 3.364428828963206e-08, "loss": 0.0047, "step": 114490 }, { "epoch": 1.9336643361367245, "grad_norm": 0.37899160385131836, "learning_rate": 3.3473825984171035e-08, "loss": 0.0064, "step": 114500 }, { "epoch": 1.9338332151180464, "grad_norm": 0.26380211114883423, "learning_rate": 3.330379515833371e-08, "loss": 0.0105, "step": 114510 }, { "epoch": 1.9340020940993683, "grad_norm": 0.39525318145751953, "learning_rate": 3.313419582689215e-08, "loss": 0.0079, "step": 114520 }, { "epoch": 1.9341709730806904, "grad_norm": 0.15884780883789062, "learning_rate": 3.2965028004581787e-08, "loss": 0.005, "step": 114530 }, { "epoch": 1.9343398520620123, "grad_norm": 0.2820291519165039, "learning_rate": 3.279629170609811e-08, "loss": 0.0047, "step": 114540 }, { "epoch": 1.9345087310433344, "grad_norm": 0.21240797638893127, "learning_rate": 3.262798694610103e-08, "loss": 0.004, "step": 114550 }, { "epoch": 1.9346776100246563, "grad_norm": 0.12633667886257172, "learning_rate": 3.246011373921276e-08, "loss": 0.007, "step": 114560 }, { "epoch": 1.9348464890059782, "grad_norm": 0.3525210916996002, "learning_rate": 3.229267210001774e-08, "loss": 0.0061, "step": 114570 }, { "epoch": 1.9350153679873003, "grad_norm": 0.14186416566371918, "learning_rate": 3.2125662043062114e-08, "loss": 0.0063, "step": 114580 }, { "epoch": 1.9351842469686222, "grad_norm": 0.8942223787307739, "learning_rate": 3.195908358285593e-08, "loss": 0.006, "step": 114590 }, { "epoch": 1.9353531259499444, "grad_norm": 0.4522523581981659, "learning_rate": 3.179293673387096e-08, "loss": 0.0102, "step": 114600 }, { "epoch": 1.9355220049312662, "grad_norm": 0.21811334788799286, "learning_rate": 3.162722151054176e-08, "loss": 0.007, "step": 114610 }, { "epoch": 1.9356908839125881, "grad_norm": 0.1967283934354782, "learning_rate": 3.1461937927265705e-08, "loss": 0.0061, "step": 114620 }, { "epoch": 1.9358597628939103, "grad_norm": 0.3329906165599823, "learning_rate": 3.1297085998400755e-08, "loss": 0.0099, "step": 114630 }, { "epoch": 1.9360286418752322, "grad_norm": 0.41654106974601746, "learning_rate": 3.113266573827045e-08, "loss": 0.0066, "step": 114640 }, { "epoch": 1.9361975208565543, "grad_norm": 0.2977706491947174, "learning_rate": 3.0968677161158365e-08, "loss": 0.0065, "step": 114650 }, { "epoch": 1.9363663998378762, "grad_norm": 0.14821231365203857, "learning_rate": 3.080512028131144e-08, "loss": 0.0061, "step": 114660 }, { "epoch": 1.936535278819198, "grad_norm": 0.16231366991996765, "learning_rate": 3.0641995112939415e-08, "loss": 0.0055, "step": 114670 }, { "epoch": 1.9367041578005202, "grad_norm": 0.22602902352809906, "learning_rate": 3.047930167021429e-08, "loss": 0.0049, "step": 114680 }, { "epoch": 1.936873036781842, "grad_norm": 0.12035078555345535, "learning_rate": 3.0317039967270315e-08, "loss": 0.006, "step": 114690 }, { "epoch": 1.9370419157631642, "grad_norm": 0.1338915377855301, "learning_rate": 3.0155210018203984e-08, "loss": 0.0074, "step": 114700 }, { "epoch": 1.937210794744486, "grad_norm": 0.23417548835277557, "learning_rate": 2.999381183707517e-08, "loss": 0.0042, "step": 114710 }, { "epoch": 1.937379673725808, "grad_norm": 0.28705525398254395, "learning_rate": 2.983284543790488e-08, "loss": 0.0091, "step": 114720 }, { "epoch": 1.9375485527071301, "grad_norm": 0.4732814431190491, "learning_rate": 2.9672310834679143e-08, "loss": 0.0082, "step": 114730 }, { "epoch": 1.937717431688452, "grad_norm": 0.2194153368473053, "learning_rate": 2.951220804134347e-08, "loss": 0.0075, "step": 114740 }, { "epoch": 1.9378863106697741, "grad_norm": 0.1164470836520195, "learning_rate": 2.9352537071807853e-08, "loss": 0.0041, "step": 114750 }, { "epoch": 1.938055189651096, "grad_norm": 0.29065385460853577, "learning_rate": 2.9193297939943966e-08, "loss": 0.0073, "step": 114760 }, { "epoch": 1.938224068632418, "grad_norm": 0.28628218173980713, "learning_rate": 2.9034490659585745e-08, "loss": 0.0046, "step": 114770 }, { "epoch": 1.93839294761374, "grad_norm": 0.11790957301855087, "learning_rate": 2.8876115244530488e-08, "loss": 0.0067, "step": 114780 }, { "epoch": 1.938561826595062, "grad_norm": 0.2744184136390686, "learning_rate": 2.8718171708536636e-08, "loss": 0.0043, "step": 114790 }, { "epoch": 1.938730705576384, "grad_norm": 0.22218917310237885, "learning_rate": 2.8560660065327096e-08, "loss": 0.0065, "step": 114800 }, { "epoch": 1.938899584557706, "grad_norm": 0.19880935549736023, "learning_rate": 2.8403580328585368e-08, "loss": 0.0059, "step": 114810 }, { "epoch": 1.9390684635390278, "grad_norm": 0.5992051959037781, "learning_rate": 2.8246932511958313e-08, "loss": 0.0045, "step": 114820 }, { "epoch": 1.93923734252035, "grad_norm": 0.21801140904426575, "learning_rate": 2.809071662905505e-08, "loss": 0.004, "step": 114830 }, { "epoch": 1.9394062215016719, "grad_norm": 0.3627292513847351, "learning_rate": 2.7934932693446938e-08, "loss": 0.0097, "step": 114840 }, { "epoch": 1.939575100482994, "grad_norm": 0.12398532778024673, "learning_rate": 2.7779580718669263e-08, "loss": 0.0056, "step": 114850 }, { "epoch": 1.9397439794643159, "grad_norm": 0.29050785303115845, "learning_rate": 2.7624660718217343e-08, "loss": 0.0076, "step": 114860 }, { "epoch": 1.9399128584456378, "grad_norm": 0.5455626249313354, "learning_rate": 2.7470172705550413e-08, "loss": 0.0092, "step": 114870 }, { "epoch": 1.9400817374269599, "grad_norm": 0.15602931380271912, "learning_rate": 2.7316116694091065e-08, "loss": 0.0043, "step": 114880 }, { "epoch": 1.9402506164082818, "grad_norm": 0.2919233441352844, "learning_rate": 2.7162492697221932e-08, "loss": 0.0056, "step": 114890 }, { "epoch": 1.940419495389604, "grad_norm": 0.216639444231987, "learning_rate": 2.7009300728290668e-08, "loss": 0.0088, "step": 114900 }, { "epoch": 1.9405883743709258, "grad_norm": 0.1946246176958084, "learning_rate": 2.6856540800604956e-08, "loss": 0.0062, "step": 114910 }, { "epoch": 1.9407572533522477, "grad_norm": 0.25793173909187317, "learning_rate": 2.6704212927437523e-08, "loss": 0.0041, "step": 114920 }, { "epoch": 1.9409261323335698, "grad_norm": 0.08336132764816284, "learning_rate": 2.6552317122021108e-08, "loss": 0.0071, "step": 114930 }, { "epoch": 1.9410950113148917, "grad_norm": 0.43541061878204346, "learning_rate": 2.640085339755294e-08, "loss": 0.0055, "step": 114940 }, { "epoch": 1.9412638902962138, "grad_norm": 0.21966460347175598, "learning_rate": 2.6249821767191376e-08, "loss": 0.0066, "step": 114950 }, { "epoch": 1.9414327692775357, "grad_norm": 0.29509204626083374, "learning_rate": 2.6099222244057587e-08, "loss": 0.0066, "step": 114960 }, { "epoch": 1.9416016482588576, "grad_norm": 0.3048444986343384, "learning_rate": 2.5949054841235556e-08, "loss": 0.0062, "step": 114970 }, { "epoch": 1.9417705272401797, "grad_norm": 0.3693940043449402, "learning_rate": 2.579931957177151e-08, "loss": 0.0074, "step": 114980 }, { "epoch": 1.9419394062215016, "grad_norm": 0.16283577680587769, "learning_rate": 2.565001644867393e-08, "loss": 0.0065, "step": 114990 }, { "epoch": 1.9421082852028237, "grad_norm": 0.20012634992599487, "learning_rate": 2.550114548491356e-08, "loss": 0.0078, "step": 115000 }, { "epoch": 1.9422771641841456, "grad_norm": 0.1252926141023636, "learning_rate": 2.535270669342449e-08, "loss": 0.0062, "step": 115010 }, { "epoch": 1.9424460431654675, "grad_norm": 0.2448159009218216, "learning_rate": 2.520470008710252e-08, "loss": 0.0055, "step": 115020 }, { "epoch": 1.9426149221467897, "grad_norm": 0.18308666348457336, "learning_rate": 2.505712567880625e-08, "loss": 0.0068, "step": 115030 }, { "epoch": 1.9427838011281116, "grad_norm": 0.4853120446205139, "learning_rate": 2.4909983481356536e-08, "loss": 0.008, "step": 115040 }, { "epoch": 1.9429526801094337, "grad_norm": 0.37056243419647217, "learning_rate": 2.4763273507535934e-08, "loss": 0.0064, "step": 115050 }, { "epoch": 1.9431215590907556, "grad_norm": 0.19725340604782104, "learning_rate": 2.461699577009147e-08, "loss": 0.0079, "step": 115060 }, { "epoch": 1.9432904380720775, "grad_norm": 0.2567845284938812, "learning_rate": 2.4471150281730748e-08, "loss": 0.0071, "step": 115070 }, { "epoch": 1.9434593170533996, "grad_norm": 0.06979241222143173, "learning_rate": 2.4325737055124753e-08, "loss": 0.005, "step": 115080 }, { "epoch": 1.9436281960347215, "grad_norm": 0.07669134438037872, "learning_rate": 2.4180756102906157e-08, "loss": 0.0048, "step": 115090 }, { "epoch": 1.9437970750160436, "grad_norm": 0.4299726188182831, "learning_rate": 2.4036207437670987e-08, "loss": 0.0115, "step": 115100 }, { "epoch": 1.9439659539973655, "grad_norm": 0.4923119843006134, "learning_rate": 2.3892091071976988e-08, "loss": 0.0089, "step": 115110 }, { "epoch": 1.9441348329786874, "grad_norm": 0.12743982672691345, "learning_rate": 2.3748407018345244e-08, "loss": 0.0039, "step": 115120 }, { "epoch": 1.9443037119600095, "grad_norm": 0.23697005212306976, "learning_rate": 2.3605155289257442e-08, "loss": 0.008, "step": 115130 }, { "epoch": 1.9444725909413314, "grad_norm": 0.13425776362419128, "learning_rate": 2.3462335897160294e-08, "loss": 0.0082, "step": 115140 }, { "epoch": 1.9446414699226535, "grad_norm": 0.2987442910671234, "learning_rate": 2.3319948854461095e-08, "loss": 0.0074, "step": 115150 }, { "epoch": 1.9448103489039754, "grad_norm": 0.30571556091308594, "learning_rate": 2.3177994173529948e-08, "loss": 0.0064, "step": 115160 }, { "epoch": 1.9449792278852973, "grad_norm": 0.28374674916267395, "learning_rate": 2.3036471866699216e-08, "loss": 0.0064, "step": 115170 }, { "epoch": 1.9451481068666194, "grad_norm": 0.17455807328224182, "learning_rate": 2.2895381946264617e-08, "loss": 0.0056, "step": 115180 }, { "epoch": 1.9453169858479413, "grad_norm": 0.16515693068504333, "learning_rate": 2.2754724424483566e-08, "loss": 0.0046, "step": 115190 }, { "epoch": 1.9454858648292634, "grad_norm": 0.1856255680322647, "learning_rate": 2.2614499313575733e-08, "loss": 0.0036, "step": 115200 }, { "epoch": 1.9456547438105853, "grad_norm": 0.21458426117897034, "learning_rate": 2.2474706625723597e-08, "loss": 0.0094, "step": 115210 }, { "epoch": 1.9458236227919072, "grad_norm": 0.24673452973365784, "learning_rate": 2.2335346373072997e-08, "loss": 0.0063, "step": 115220 }, { "epoch": 1.9459925017732294, "grad_norm": 0.09657003730535507, "learning_rate": 2.219641856772925e-08, "loss": 0.0024, "step": 115230 }, { "epoch": 1.9461613807545513, "grad_norm": 0.2617625296115875, "learning_rate": 2.205792322176381e-08, "loss": 0.0059, "step": 115240 }, { "epoch": 1.9463302597358734, "grad_norm": 0.25922924280166626, "learning_rate": 2.1919860347208168e-08, "loss": 0.0053, "step": 115250 }, { "epoch": 1.9464991387171953, "grad_norm": 0.20391419529914856, "learning_rate": 2.1782229956057165e-08, "loss": 0.0066, "step": 115260 }, { "epoch": 1.9466680176985172, "grad_norm": 0.22267428040504456, "learning_rate": 2.16450320602668e-08, "loss": 0.0053, "step": 115270 }, { "epoch": 1.9468368966798393, "grad_norm": 0.15162836015224457, "learning_rate": 2.150826667175754e-08, "loss": 0.009, "step": 115280 }, { "epoch": 1.9470057756611612, "grad_norm": 0.13367529213428497, "learning_rate": 2.1371933802410983e-08, "loss": 0.0066, "step": 115290 }, { "epoch": 1.9471746546424833, "grad_norm": 0.16351117193698883, "learning_rate": 2.1236033464071547e-08, "loss": 0.0051, "step": 115300 }, { "epoch": 1.9473435336238052, "grad_norm": 0.0747457817196846, "learning_rate": 2.1100565668544793e-08, "loss": 0.0039, "step": 115310 }, { "epoch": 1.947512412605127, "grad_norm": 0.1963164210319519, "learning_rate": 2.09655304276013e-08, "loss": 0.0044, "step": 115320 }, { "epoch": 1.9476812915864492, "grad_norm": 0.250080943107605, "learning_rate": 2.083092775297224e-08, "loss": 0.006, "step": 115330 }, { "epoch": 1.947850170567771, "grad_norm": 0.29568782448768616, "learning_rate": 2.069675765635104e-08, "loss": 0.0057, "step": 115340 }, { "epoch": 1.9480190495490932, "grad_norm": 0.2500702738761902, "learning_rate": 2.0563020149394486e-08, "loss": 0.005, "step": 115350 }, { "epoch": 1.9481879285304151, "grad_norm": 0.1245436891913414, "learning_rate": 2.0429715243721614e-08, "loss": 0.0072, "step": 115360 }, { "epoch": 1.948356807511737, "grad_norm": 0.24393583834171295, "learning_rate": 2.029684295091261e-08, "loss": 0.0054, "step": 115370 }, { "epoch": 1.9485256864930591, "grad_norm": 0.24573330581188202, "learning_rate": 2.0164403282512124e-08, "loss": 0.0073, "step": 115380 }, { "epoch": 1.948694565474381, "grad_norm": 0.2580696642398834, "learning_rate": 2.0032396250025955e-08, "loss": 0.0046, "step": 115390 }, { "epoch": 1.9488634444557031, "grad_norm": 0.14785084128379822, "learning_rate": 1.9900821864922147e-08, "loss": 0.0045, "step": 115400 }, { "epoch": 1.949032323437025, "grad_norm": 0.2980978786945343, "learning_rate": 1.9769680138631565e-08, "loss": 0.0082, "step": 115410 }, { "epoch": 1.949201202418347, "grad_norm": 0.24226737022399902, "learning_rate": 1.963897108254842e-08, "loss": 0.0047, "step": 115420 }, { "epoch": 1.949370081399669, "grad_norm": 0.38623306155204773, "learning_rate": 1.9508694708026964e-08, "loss": 0.0103, "step": 115430 }, { "epoch": 1.949538960380991, "grad_norm": 0.11759011447429657, "learning_rate": 1.937885102638648e-08, "loss": 0.0061, "step": 115440 }, { "epoch": 1.949707839362313, "grad_norm": 0.28587663173675537, "learning_rate": 1.9249440048906833e-08, "loss": 0.0061, "step": 115450 }, { "epoch": 1.949876718343635, "grad_norm": 0.26926177740097046, "learning_rate": 1.9120461786831245e-08, "loss": 0.0075, "step": 115460 }, { "epoch": 1.9500455973249569, "grad_norm": 0.5522632002830505, "learning_rate": 1.8991916251365205e-08, "loss": 0.0076, "step": 115470 }, { "epoch": 1.9502144763062788, "grad_norm": 0.2955242693424225, "learning_rate": 1.886380345367589e-08, "loss": 0.0073, "step": 115480 }, { "epoch": 1.9503833552876009, "grad_norm": 0.14428119361400604, "learning_rate": 1.8736123404893835e-08, "loss": 0.0063, "step": 115490 }, { "epoch": 1.950552234268923, "grad_norm": 0.29034844040870667, "learning_rate": 1.8608876116111285e-08, "loss": 0.0072, "step": 115500 }, { "epoch": 1.9507211132502449, "grad_norm": 0.3695225119590759, "learning_rate": 1.848206159838328e-08, "loss": 0.0073, "step": 115510 }, { "epoch": 1.9508899922315668, "grad_norm": 0.20858705043792725, "learning_rate": 1.8355679862727127e-08, "loss": 0.0067, "step": 115520 }, { "epoch": 1.9510588712128887, "grad_norm": 0.1813095659017563, "learning_rate": 1.8229730920122922e-08, "loss": 0.0074, "step": 115530 }, { "epoch": 1.9512277501942108, "grad_norm": 0.3189837634563446, "learning_rate": 1.810421478151192e-08, "loss": 0.0056, "step": 115540 }, { "epoch": 1.951396629175533, "grad_norm": 0.1725086271762848, "learning_rate": 1.7979131457799282e-08, "loss": 0.0045, "step": 115550 }, { "epoch": 1.9515655081568548, "grad_norm": 0.23580291867256165, "learning_rate": 1.785448095985243e-08, "loss": 0.005, "step": 115560 }, { "epoch": 1.9517343871381767, "grad_norm": 0.18615636229515076, "learning_rate": 1.7730263298499918e-08, "loss": 0.0072, "step": 115570 }, { "epoch": 1.9519032661194986, "grad_norm": 0.19195789098739624, "learning_rate": 1.7606478484533672e-08, "loss": 0.0064, "step": 115580 }, { "epoch": 1.9520721451008207, "grad_norm": 0.10809890925884247, "learning_rate": 1.7483126528707873e-08, "loss": 0.0045, "step": 115590 }, { "epoch": 1.9522410240821428, "grad_norm": 0.5179917216300964, "learning_rate": 1.7360207441738943e-08, "loss": 0.0112, "step": 115600 }, { "epoch": 1.9524099030634647, "grad_norm": 0.23228438198566437, "learning_rate": 1.723772123430556e-08, "loss": 0.0068, "step": 115610 }, { "epoch": 1.9525787820447866, "grad_norm": 0.2273276150226593, "learning_rate": 1.711566791704977e-08, "loss": 0.0071, "step": 115620 }, { "epoch": 1.9527476610261085, "grad_norm": 0.42636674642562866, "learning_rate": 1.699404750057476e-08, "loss": 0.005, "step": 115630 }, { "epoch": 1.9529165400074306, "grad_norm": 0.21535636484622955, "learning_rate": 1.6872859995446523e-08, "loss": 0.0072, "step": 115640 }, { "epoch": 1.9530854189887528, "grad_norm": 0.2600725591182709, "learning_rate": 1.6752105412193854e-08, "loss": 0.0063, "step": 115650 }, { "epoch": 1.9532542979700747, "grad_norm": 0.10441213846206665, "learning_rate": 1.663178376130725e-08, "loss": 0.0044, "step": 115660 }, { "epoch": 1.9534231769513966, "grad_norm": 0.17206566035747528, "learning_rate": 1.6511895053240023e-08, "loss": 0.0038, "step": 115670 }, { "epoch": 1.9535920559327185, "grad_norm": 0.39849403500556946, "learning_rate": 1.639243929840828e-08, "loss": 0.0049, "step": 115680 }, { "epoch": 1.9537609349140406, "grad_norm": 0.4042329490184784, "learning_rate": 1.6273416507189276e-08, "loss": 0.0057, "step": 115690 }, { "epoch": 1.9539298138953627, "grad_norm": 0.31133347749710083, "learning_rate": 1.6154826689924186e-08, "loss": 0.0078, "step": 115700 }, { "epoch": 1.9540986928766846, "grad_norm": 0.3416605293750763, "learning_rate": 1.6036669856915877e-08, "loss": 0.0091, "step": 115710 }, { "epoch": 1.9542675718580065, "grad_norm": 0.2375125139951706, "learning_rate": 1.591894601842836e-08, "loss": 0.0084, "step": 115720 }, { "epoch": 1.9544364508393284, "grad_norm": 0.2522878646850586, "learning_rate": 1.5801655184690122e-08, "loss": 0.0054, "step": 115730 }, { "epoch": 1.9546053298206505, "grad_norm": 0.2906262278556824, "learning_rate": 1.5684797365891346e-08, "loss": 0.0082, "step": 115740 }, { "epoch": 1.9547742088019726, "grad_norm": 0.20702359080314636, "learning_rate": 1.5568372572183908e-08, "loss": 0.0078, "step": 115750 }, { "epoch": 1.9549430877832945, "grad_norm": 0.6245073080062866, "learning_rate": 1.5452380813681945e-08, "loss": 0.0079, "step": 115760 }, { "epoch": 1.9551119667646164, "grad_norm": 0.2650633752346039, "learning_rate": 1.53368221004635e-08, "loss": 0.0047, "step": 115770 }, { "epoch": 1.9552808457459383, "grad_norm": 0.2802079916000366, "learning_rate": 1.5221696442567768e-08, "loss": 0.0082, "step": 115780 }, { "epoch": 1.9554497247272604, "grad_norm": 0.08925654739141464, "learning_rate": 1.5107003849996195e-08, "loss": 0.0036, "step": 115790 }, { "epoch": 1.9556186037085825, "grad_norm": 0.15923017263412476, "learning_rate": 1.4992744332713584e-08, "loss": 0.0046, "step": 115800 }, { "epoch": 1.9557874826899044, "grad_norm": 0.37443968653678894, "learning_rate": 1.4878917900645883e-08, "loss": 0.0091, "step": 115810 }, { "epoch": 1.9559563616712263, "grad_norm": 0.23334479331970215, "learning_rate": 1.4765524563682409e-08, "loss": 0.008, "step": 115820 }, { "epoch": 1.9561252406525482, "grad_norm": 0.23032750189304352, "learning_rate": 1.4652564331674723e-08, "loss": 0.0065, "step": 115830 }, { "epoch": 1.9562941196338703, "grad_norm": 0.31857287883758545, "learning_rate": 1.4540037214436087e-08, "loss": 0.0068, "step": 115840 }, { "epoch": 1.9564629986151925, "grad_norm": 0.14989209175109863, "learning_rate": 1.4427943221742568e-08, "loss": 0.005, "step": 115850 }, { "epoch": 1.9566318775965144, "grad_norm": 0.37540706992149353, "learning_rate": 1.4316282363333045e-08, "loss": 0.0079, "step": 115860 }, { "epoch": 1.9568007565778363, "grad_norm": 0.2982536256313324, "learning_rate": 1.4205054648908089e-08, "loss": 0.0052, "step": 115870 }, { "epoch": 1.9569696355591581, "grad_norm": 0.10937417298555374, "learning_rate": 1.4094260088131084e-08, "loss": 0.0084, "step": 115880 }, { "epoch": 1.9571385145404803, "grad_norm": 0.46335864067077637, "learning_rate": 1.3983898690627662e-08, "loss": 0.0044, "step": 115890 }, { "epoch": 1.9573073935218024, "grad_norm": 0.17783789336681366, "learning_rate": 1.3873970465985153e-08, "loss": 0.0083, "step": 115900 }, { "epoch": 1.9574762725031243, "grad_norm": 0.6267194151878357, "learning_rate": 1.3764475423754253e-08, "loss": 0.0059, "step": 115910 }, { "epoch": 1.9576451514844462, "grad_norm": 0.19167742133140564, "learning_rate": 1.3655413573447352e-08, "loss": 0.0046, "step": 115920 }, { "epoch": 1.957814030465768, "grad_norm": 0.21962112188339233, "learning_rate": 1.3546784924539646e-08, "loss": 0.0048, "step": 115930 }, { "epoch": 1.9579829094470902, "grad_norm": 0.3748878538608551, "learning_rate": 1.3438589486469145e-08, "loss": 0.0066, "step": 115940 }, { "epoch": 1.9581517884284123, "grad_norm": 0.37272319197654724, "learning_rate": 1.333082726863444e-08, "loss": 0.0053, "step": 115950 }, { "epoch": 1.9583206674097342, "grad_norm": 0.17336991429328918, "learning_rate": 1.3223498280398594e-08, "loss": 0.0069, "step": 115960 }, { "epoch": 1.958489546391056, "grad_norm": 0.27620866894721985, "learning_rate": 1.311660253108582e-08, "loss": 0.0054, "step": 115970 }, { "epoch": 1.958658425372378, "grad_norm": 0.24745024740695953, "learning_rate": 1.3010140029983132e-08, "loss": 0.0097, "step": 115980 }, { "epoch": 1.9588273043537001, "grad_norm": 0.30148667097091675, "learning_rate": 1.2904110786338687e-08, "loss": 0.0149, "step": 115990 }, { "epoch": 1.9589961833350222, "grad_norm": 0.19712576270103455, "learning_rate": 1.279851480936567e-08, "loss": 0.0051, "step": 116000 }, { "epoch": 1.9591650623163441, "grad_norm": 0.2920369505882263, "learning_rate": 1.2693352108236745e-08, "loss": 0.0053, "step": 116010 }, { "epoch": 1.959333941297666, "grad_norm": 0.15206831693649292, "learning_rate": 1.2588622692088492e-08, "loss": 0.0033, "step": 116020 }, { "epoch": 1.959502820278988, "grad_norm": 0.21352489292621613, "learning_rate": 1.2484326570019745e-08, "loss": 0.0055, "step": 116030 }, { "epoch": 1.95967169926031, "grad_norm": 0.5439420938491821, "learning_rate": 1.2380463751091588e-08, "loss": 0.0071, "step": 116040 }, { "epoch": 1.9598405782416322, "grad_norm": 0.16231688857078552, "learning_rate": 1.2277034244327357e-08, "loss": 0.0061, "step": 116050 }, { "epoch": 1.960009457222954, "grad_norm": 0.31724098324775696, "learning_rate": 1.217403805871209e-08, "loss": 0.0073, "step": 116060 }, { "epoch": 1.960178336204276, "grad_norm": 0.2408655881881714, "learning_rate": 1.2071475203194737e-08, "loss": 0.0071, "step": 116070 }, { "epoch": 1.9603472151855978, "grad_norm": 0.18673595786094666, "learning_rate": 1.1969345686685397e-08, "loss": 0.0063, "step": 116080 }, { "epoch": 1.96051609416692, "grad_norm": 0.17401736974716187, "learning_rate": 1.1867649518056413e-08, "loss": 0.0071, "step": 116090 }, { "epoch": 1.960684973148242, "grad_norm": 0.32386869192123413, "learning_rate": 1.1766386706143495e-08, "loss": 0.0072, "step": 116100 }, { "epoch": 1.960853852129564, "grad_norm": 0.7703402042388916, "learning_rate": 1.1665557259744054e-08, "loss": 0.0115, "step": 116110 }, { "epoch": 1.9610227311108859, "grad_norm": 0.2048645317554474, "learning_rate": 1.156516118761719e-08, "loss": 0.0055, "step": 116120 }, { "epoch": 1.9611916100922078, "grad_norm": 0.1627485603094101, "learning_rate": 1.146519849848593e-08, "loss": 0.0049, "step": 116130 }, { "epoch": 1.96136048907353, "grad_norm": 0.4008253514766693, "learning_rate": 1.1365669201034435e-08, "loss": 0.0066, "step": 116140 }, { "epoch": 1.961529368054852, "grad_norm": 0.32952624559402466, "learning_rate": 1.1266573303909678e-08, "loss": 0.0089, "step": 116150 }, { "epoch": 1.961698247036174, "grad_norm": 0.2171364277601242, "learning_rate": 1.1167910815720328e-08, "loss": 0.007, "step": 116160 }, { "epoch": 1.9618671260174958, "grad_norm": 0.4509836733341217, "learning_rate": 1.1069681745038418e-08, "loss": 0.0056, "step": 116170 }, { "epoch": 1.9620360049988177, "grad_norm": 0.06687630712985992, "learning_rate": 1.0971886100398232e-08, "loss": 0.0055, "step": 116180 }, { "epoch": 1.9622048839801398, "grad_norm": 0.22540952265262604, "learning_rate": 1.0874523890295196e-08, "loss": 0.0043, "step": 116190 }, { "epoch": 1.962373762961462, "grad_norm": 0.2363918274641037, "learning_rate": 1.0777595123188656e-08, "loss": 0.0057, "step": 116200 }, { "epoch": 1.9625426419427838, "grad_norm": 0.15118171274662018, "learning_rate": 1.0681099807499096e-08, "loss": 0.006, "step": 116210 }, { "epoch": 1.9627115209241057, "grad_norm": 0.6254355907440186, "learning_rate": 1.058503795160981e-08, "loss": 0.0095, "step": 116220 }, { "epoch": 1.9628803999054276, "grad_norm": 0.07885121554136276, "learning_rate": 1.0489409563866349e-08, "loss": 0.0092, "step": 116230 }, { "epoch": 1.9630492788867497, "grad_norm": 0.3104415237903595, "learning_rate": 1.0394214652577061e-08, "loss": 0.0066, "step": 116240 }, { "epoch": 1.9632181578680719, "grad_norm": 0.17304790019989014, "learning_rate": 1.0299453226012002e-08, "loss": 0.0033, "step": 116250 }, { "epoch": 1.9633870368493938, "grad_norm": 0.11857690662145615, "learning_rate": 1.0205125292404028e-08, "loss": 0.005, "step": 116260 }, { "epoch": 1.9635559158307156, "grad_norm": 0.3302820920944214, "learning_rate": 1.0111230859947142e-08, "loss": 0.0072, "step": 116270 }, { "epoch": 1.9637247948120375, "grad_norm": 0.16701571643352509, "learning_rate": 1.0017769936799815e-08, "loss": 0.004, "step": 116280 }, { "epoch": 1.9638936737933597, "grad_norm": 0.2520110607147217, "learning_rate": 9.924742531081666e-09, "loss": 0.008, "step": 116290 }, { "epoch": 1.9640625527746818, "grad_norm": 0.18617837131023407, "learning_rate": 9.832148650874008e-09, "loss": 0.005, "step": 116300 }, { "epoch": 1.9642314317560037, "grad_norm": 0.1358814835548401, "learning_rate": 9.739988304221514e-09, "loss": 0.0048, "step": 116310 }, { "epoch": 1.9644003107373256, "grad_norm": 0.18504369258880615, "learning_rate": 9.648261499130562e-09, "loss": 0.0085, "step": 116320 }, { "epoch": 1.9645691897186475, "grad_norm": 0.19036109745502472, "learning_rate": 9.556968243570886e-09, "loss": 0.0048, "step": 116330 }, { "epoch": 1.9647380686999696, "grad_norm": 0.19066792726516724, "learning_rate": 9.466108545473363e-09, "loss": 0.0073, "step": 116340 }, { "epoch": 1.9649069476812917, "grad_norm": 0.13790790736675262, "learning_rate": 9.375682412731124e-09, "loss": 0.0054, "step": 116350 }, { "epoch": 1.9650758266626136, "grad_norm": 0.36668750643730164, "learning_rate": 9.285689853201218e-09, "loss": 0.0076, "step": 116360 }, { "epoch": 1.9652447056439355, "grad_norm": 0.0902775228023529, "learning_rate": 9.196130874701282e-09, "loss": 0.0055, "step": 116370 }, { "epoch": 1.9654135846252574, "grad_norm": 0.173154816031456, "learning_rate": 9.107005485011755e-09, "loss": 0.0063, "step": 116380 }, { "epoch": 1.9655824636065795, "grad_norm": 0.27662304043769836, "learning_rate": 9.018313691876445e-09, "loss": 0.0057, "step": 116390 }, { "epoch": 1.9657513425879016, "grad_norm": 0.5737012028694153, "learning_rate": 8.9300555030003e-09, "loss": 0.0046, "step": 116400 }, { "epoch": 1.9659202215692235, "grad_norm": 0.1604272425174713, "learning_rate": 8.842230926051077e-09, "loss": 0.0059, "step": 116410 }, { "epoch": 1.9660891005505454, "grad_norm": 0.19834473729133606, "learning_rate": 8.754839968658224e-09, "loss": 0.0052, "step": 116420 }, { "epoch": 1.9662579795318673, "grad_norm": 0.29899054765701294, "learning_rate": 8.66788263841456e-09, "loss": 0.0065, "step": 116430 }, { "epoch": 1.9664268585131894, "grad_norm": 0.16714191436767578, "learning_rate": 8.581358942874595e-09, "loss": 0.0064, "step": 116440 }, { "epoch": 1.9665957374945116, "grad_norm": 0.2651560604572296, "learning_rate": 8.495268889555653e-09, "loss": 0.0108, "step": 116450 }, { "epoch": 1.9667646164758334, "grad_norm": 0.1255965381860733, "learning_rate": 8.409612485936746e-09, "loss": 0.0051, "step": 116460 }, { "epoch": 1.9669334954571553, "grad_norm": 0.13297414779663086, "learning_rate": 8.324389739459148e-09, "loss": 0.0085, "step": 116470 }, { "epoch": 1.9671023744384772, "grad_norm": 0.4601965546607971, "learning_rate": 8.239600657526936e-09, "loss": 0.0073, "step": 116480 }, { "epoch": 1.9672712534197994, "grad_norm": 0.3144341707229614, "learning_rate": 8.155245247506438e-09, "loss": 0.0097, "step": 116490 }, { "epoch": 1.9674401324011215, "grad_norm": 0.17605483531951904, "learning_rate": 8.071323516726792e-09, "loss": 0.005, "step": 116500 }, { "epoch": 1.9676090113824434, "grad_norm": 0.20358051359653473, "learning_rate": 7.987835472478277e-09, "loss": 0.0057, "step": 116510 }, { "epoch": 1.9677778903637653, "grad_norm": 0.17053179442882538, "learning_rate": 7.904781122014537e-09, "loss": 0.0087, "step": 116520 }, { "epoch": 1.9679467693450872, "grad_norm": 0.19702596962451935, "learning_rate": 7.822160472550356e-09, "loss": 0.0056, "step": 116530 }, { "epoch": 1.9681156483264093, "grad_norm": 0.4348117709159851, "learning_rate": 7.739973531264433e-09, "loss": 0.0056, "step": 116540 }, { "epoch": 1.9682845273077314, "grad_norm": 0.07276838272809982, "learning_rate": 7.658220305297171e-09, "loss": 0.0036, "step": 116550 }, { "epoch": 1.9684534062890533, "grad_norm": 0.39088737964630127, "learning_rate": 7.57690080175011e-09, "loss": 0.0062, "step": 116560 }, { "epoch": 1.9686222852703752, "grad_norm": 0.3411839008331299, "learning_rate": 7.496015027688708e-09, "loss": 0.0057, "step": 116570 }, { "epoch": 1.968791164251697, "grad_norm": 0.3473852276802063, "learning_rate": 7.4155629901401234e-09, "loss": 0.0063, "step": 116580 }, { "epoch": 1.9689600432330192, "grad_norm": 0.2881452441215515, "learning_rate": 7.335544696093211e-09, "loss": 0.008, "step": 116590 }, { "epoch": 1.9691289222143413, "grad_norm": 0.11453493684530258, "learning_rate": 7.255960152500741e-09, "loss": 0.0058, "step": 116600 }, { "epoch": 1.9692978011956632, "grad_norm": 0.16330820322036743, "learning_rate": 7.176809366276627e-09, "loss": 0.0044, "step": 116610 }, { "epoch": 1.9694666801769851, "grad_norm": 0.2255002111196518, "learning_rate": 7.098092344296481e-09, "loss": 0.0057, "step": 116620 }, { "epoch": 1.969635559158307, "grad_norm": 0.19844160974025726, "learning_rate": 7.019809093399832e-09, "loss": 0.0078, "step": 116630 }, { "epoch": 1.9698044381396291, "grad_norm": 0.13984420895576477, "learning_rate": 6.941959620387351e-09, "loss": 0.0117, "step": 116640 }, { "epoch": 1.9699733171209513, "grad_norm": 0.13065841794013977, "learning_rate": 6.8645439320230715e-09, "loss": 0.0132, "step": 116650 }, { "epoch": 1.9701421961022731, "grad_norm": 0.22282592952251434, "learning_rate": 6.787562035031614e-09, "loss": 0.0071, "step": 116660 }, { "epoch": 1.970311075083595, "grad_norm": 0.1284341812133789, "learning_rate": 6.711013936101518e-09, "loss": 0.0059, "step": 116670 }, { "epoch": 1.970479954064917, "grad_norm": 0.22729428112506866, "learning_rate": 6.6348996418830195e-09, "loss": 0.0088, "step": 116680 }, { "epoch": 1.970648833046239, "grad_norm": 0.15526717901229858, "learning_rate": 6.559219158989161e-09, "loss": 0.0065, "step": 116690 }, { "epoch": 1.9708177120275612, "grad_norm": 0.21119160950183868, "learning_rate": 6.4839724939946835e-09, "loss": 0.0052, "step": 116700 }, { "epoch": 1.970986591008883, "grad_norm": 0.20862603187561035, "learning_rate": 6.409159653436581e-09, "loss": 0.0045, "step": 116710 }, { "epoch": 1.971155469990205, "grad_norm": 0.16838853061199188, "learning_rate": 6.334780643814098e-09, "loss": 0.005, "step": 116720 }, { "epoch": 1.9713243489715269, "grad_norm": 0.3828986585140228, "learning_rate": 6.2608354715898436e-09, "loss": 0.0078, "step": 116730 }, { "epoch": 1.971493227952849, "grad_norm": 0.17363609373569489, "learning_rate": 6.187324143188122e-09, "loss": 0.0073, "step": 116740 }, { "epoch": 1.971662106934171, "grad_norm": 0.18093112111091614, "learning_rate": 6.1142466649943834e-09, "loss": 0.0066, "step": 116750 }, { "epoch": 1.971830985915493, "grad_norm": 0.06814879924058914, "learning_rate": 6.041603043357991e-09, "loss": 0.004, "step": 116760 }, { "epoch": 1.971999864896815, "grad_norm": 0.15489551424980164, "learning_rate": 5.969393284590008e-09, "loss": 0.0044, "step": 116770 }, { "epoch": 1.9721687438781368, "grad_norm": 0.10219524800777435, "learning_rate": 5.897617394964305e-09, "loss": 0.0078, "step": 116780 }, { "epoch": 1.972337622859459, "grad_norm": 0.3404405415058136, "learning_rate": 5.826275380715896e-09, "loss": 0.0055, "step": 116790 }, { "epoch": 1.972506501840781, "grad_norm": 0.07901757210493088, "learning_rate": 5.755367248043153e-09, "loss": 0.0049, "step": 116800 }, { "epoch": 1.972675380822103, "grad_norm": 0.4140038788318634, "learning_rate": 5.684893003106151e-09, "loss": 0.0046, "step": 116810 }, { "epoch": 1.9728442598034248, "grad_norm": 0.2794526219367981, "learning_rate": 5.614852652027769e-09, "loss": 0.0071, "step": 116820 }, { "epoch": 1.9730131387847467, "grad_norm": 0.18687205016613007, "learning_rate": 5.545246200893139e-09, "loss": 0.0068, "step": 116830 }, { "epoch": 1.9731820177660688, "grad_norm": 0.2298896610736847, "learning_rate": 5.476073655749092e-09, "loss": 0.0069, "step": 116840 }, { "epoch": 1.973350896747391, "grad_norm": 0.20584513247013092, "learning_rate": 5.407335022605265e-09, "loss": 0.0057, "step": 116850 }, { "epoch": 1.9735197757287128, "grad_norm": 0.5319679975509644, "learning_rate": 5.339030307433546e-09, "loss": 0.0099, "step": 116860 }, { "epoch": 1.9736886547100347, "grad_norm": 0.27525392174720764, "learning_rate": 5.271159516168078e-09, "loss": 0.0062, "step": 116870 }, { "epoch": 1.9738575336913566, "grad_norm": 0.17831571400165558, "learning_rate": 5.203722654705256e-09, "loss": 0.0058, "step": 116880 }, { "epoch": 1.9740264126726788, "grad_norm": 0.39537665247917175, "learning_rate": 5.136719728903727e-09, "loss": 0.0062, "step": 116890 }, { "epoch": 1.9741952916540009, "grad_norm": 0.10969408601522446, "learning_rate": 5.070150744584945e-09, "loss": 0.0094, "step": 116900 }, { "epoch": 1.9743641706353228, "grad_norm": 0.16818152368068695, "learning_rate": 5.0040157075320615e-09, "loss": 0.0053, "step": 116910 }, { "epoch": 1.9745330496166447, "grad_norm": 0.17623919248580933, "learning_rate": 4.938314623490481e-09, "loss": 0.0046, "step": 116920 }, { "epoch": 1.9747019285979666, "grad_norm": 0.35734692215919495, "learning_rate": 4.873047498168415e-09, "loss": 0.0065, "step": 116930 }, { "epoch": 1.9748708075792887, "grad_norm": 0.2988611161708832, "learning_rate": 4.808214337236327e-09, "loss": 0.0054, "step": 116940 }, { "epoch": 1.9750396865606108, "grad_norm": 0.3302677869796753, "learning_rate": 4.74381514632638e-09, "loss": 0.0063, "step": 116950 }, { "epoch": 1.9752085655419327, "grad_norm": 0.47149568796157837, "learning_rate": 4.679849931033542e-09, "loss": 0.0064, "step": 116960 }, { "epoch": 1.9753774445232546, "grad_norm": 0.18204420804977417, "learning_rate": 4.6163186969144795e-09, "loss": 0.0048, "step": 116970 }, { "epoch": 1.9755463235045765, "grad_norm": 0.17692619562149048, "learning_rate": 4.553221449489775e-09, "loss": 0.0077, "step": 116980 }, { "epoch": 1.9757152024858986, "grad_norm": 0.19388613104820251, "learning_rate": 4.490558194240047e-09, "loss": 0.007, "step": 116990 }, { "epoch": 1.9758840814672207, "grad_norm": 0.2220667004585266, "learning_rate": 4.428328936609827e-09, "loss": 0.0045, "step": 117000 }, { "epoch": 1.9760529604485426, "grad_norm": 0.2615431249141693, "learning_rate": 4.366533682005347e-09, "loss": 0.006, "step": 117010 }, { "epoch": 1.9762218394298645, "grad_norm": 0.16758863627910614, "learning_rate": 4.305172435795091e-09, "loss": 0.007, "step": 117020 }, { "epoch": 1.9763907184111864, "grad_norm": 0.25434768199920654, "learning_rate": 4.244245203310349e-09, "loss": 0.0063, "step": 117030 }, { "epoch": 1.9765595973925085, "grad_norm": 0.053858477622270584, "learning_rate": 4.183751989844109e-09, "loss": 0.0074, "step": 117040 }, { "epoch": 1.9767284763738306, "grad_norm": 0.20290936529636383, "learning_rate": 4.123692800652168e-09, "loss": 0.0043, "step": 117050 }, { "epoch": 1.9768973553551525, "grad_norm": 0.3917848765850067, "learning_rate": 4.064067640951463e-09, "loss": 0.0057, "step": 117060 }, { "epoch": 1.9770662343364744, "grad_norm": 0.16416679322719574, "learning_rate": 4.004876515922851e-09, "loss": 0.0058, "step": 117070 }, { "epoch": 1.9772351133177963, "grad_norm": 0.308495432138443, "learning_rate": 3.946119430708884e-09, "loss": 0.0056, "step": 117080 }, { "epoch": 1.9774039922991185, "grad_norm": 0.24431751668453217, "learning_rate": 3.8877963904132565e-09, "loss": 0.0052, "step": 117090 }, { "epoch": 1.9775728712804406, "grad_norm": 0.37270140647888184, "learning_rate": 3.829907400104138e-09, "loss": 0.0059, "step": 117100 }, { "epoch": 1.9777417502617625, "grad_norm": 0.23262427747249603, "learning_rate": 3.7724524648097285e-09, "loss": 0.0081, "step": 117110 }, { "epoch": 1.9779106292430844, "grad_norm": 0.20568248629570007, "learning_rate": 3.71543158952159e-09, "loss": 0.0048, "step": 117120 }, { "epoch": 1.9780795082244063, "grad_norm": 0.22205199301242828, "learning_rate": 3.658844779194648e-09, "loss": 0.0065, "step": 117130 }, { "epoch": 1.9782483872057284, "grad_norm": 0.206839919090271, "learning_rate": 3.602692038744415e-09, "loss": 0.0052, "step": 117140 }, { "epoch": 1.9784172661870505, "grad_norm": 0.5258845686912537, "learning_rate": 3.5469733730492116e-09, "loss": 0.0087, "step": 117150 }, { "epoch": 1.9785861451683724, "grad_norm": 0.39458540081977844, "learning_rate": 3.4916887869496095e-09, "loss": 0.0075, "step": 117160 }, { "epoch": 1.9787550241496943, "grad_norm": 0.20590537786483765, "learning_rate": 3.4368382852484338e-09, "loss": 0.0091, "step": 117170 }, { "epoch": 1.9789239031310162, "grad_norm": 0.4073411524295807, "learning_rate": 3.3824218727118718e-09, "loss": 0.0131, "step": 117180 }, { "epoch": 1.9790927821123383, "grad_norm": 0.1949354112148285, "learning_rate": 3.3284395540666982e-09, "loss": 0.0039, "step": 117190 }, { "epoch": 1.9792616610936604, "grad_norm": 0.24587637186050415, "learning_rate": 3.2748913340030496e-09, "loss": 0.0071, "step": 117200 }, { "epoch": 1.9794305400749823, "grad_norm": 0.1251748651266098, "learning_rate": 3.2217772171727612e-09, "loss": 0.0082, "step": 117210 }, { "epoch": 1.9795994190563042, "grad_norm": 0.17788203060626984, "learning_rate": 3.169097208190475e-09, "loss": 0.0045, "step": 117220 }, { "epoch": 1.979768298037626, "grad_norm": 0.25571370124816895, "learning_rate": 3.1168513116330844e-09, "loss": 0.0068, "step": 117230 }, { "epoch": 1.9799371770189482, "grad_norm": 0.4051472544670105, "learning_rate": 3.0650395320397374e-09, "loss": 0.0058, "step": 117240 }, { "epoch": 1.9801060560002703, "grad_norm": 0.11379631608724594, "learning_rate": 3.0136618739107225e-09, "loss": 0.0074, "step": 117250 }, { "epoch": 1.9802749349815922, "grad_norm": 0.3279997408390045, "learning_rate": 2.9627183417108018e-09, "loss": 0.0036, "step": 117260 }, { "epoch": 1.9804438139629141, "grad_norm": 0.23578840494155884, "learning_rate": 2.9122089398653243e-09, "loss": 0.0098, "step": 117270 }, { "epoch": 1.980612692944236, "grad_norm": 0.22082379460334778, "learning_rate": 2.862133672761891e-09, "loss": 0.0036, "step": 117280 }, { "epoch": 1.9807815719255581, "grad_norm": 0.077960304915905, "learning_rate": 2.8124925447520213e-09, "loss": 0.0037, "step": 117290 }, { "epoch": 1.9809504509068803, "grad_norm": 0.265077143907547, "learning_rate": 2.7632855601472662e-09, "loss": 0.0066, "step": 117300 }, { "epoch": 1.9811193298882022, "grad_norm": 0.24444708228111267, "learning_rate": 2.71451272322365e-09, "loss": 0.0051, "step": 117310 }, { "epoch": 1.981288208869524, "grad_norm": 0.14742979407310486, "learning_rate": 2.6661740382177834e-09, "loss": 0.0048, "step": 117320 }, { "epoch": 1.981457087850846, "grad_norm": 0.15731726586818695, "learning_rate": 2.6182695093290854e-09, "loss": 0.0064, "step": 117330 }, { "epoch": 1.981625966832168, "grad_norm": 0.21985168755054474, "learning_rate": 2.5707991407197817e-09, "loss": 0.0052, "step": 117340 }, { "epoch": 1.9817948458134902, "grad_norm": 0.3855508863925934, "learning_rate": 2.5237629365143513e-09, "loss": 0.0062, "step": 117350 }, { "epoch": 1.981963724794812, "grad_norm": 0.25701913237571716, "learning_rate": 2.477160900797859e-09, "loss": 0.005, "step": 117360 }, { "epoch": 1.982132603776134, "grad_norm": 0.15619871020317078, "learning_rate": 2.4309930376203995e-09, "loss": 0.0036, "step": 117370 }, { "epoch": 1.9823014827574559, "grad_norm": 0.1748032122850418, "learning_rate": 2.3852593509926526e-09, "loss": 0.0033, "step": 117380 }, { "epoch": 1.982470361738778, "grad_norm": 0.16197451949119568, "learning_rate": 2.3399598448869963e-09, "loss": 0.0038, "step": 117390 }, { "epoch": 1.9826392407201001, "grad_norm": 0.2798709571361542, "learning_rate": 2.295094523239727e-09, "loss": 0.0056, "step": 117400 }, { "epoch": 1.982808119701422, "grad_norm": 0.19117118418216705, "learning_rate": 2.250663389948282e-09, "loss": 0.0047, "step": 117410 }, { "epoch": 1.982976998682744, "grad_norm": 0.06575358659029007, "learning_rate": 2.206666448872907e-09, "loss": 0.006, "step": 117420 }, { "epoch": 1.9831458776640658, "grad_norm": 0.20879703760147095, "learning_rate": 2.163103703835545e-09, "loss": 0.008, "step": 117430 }, { "epoch": 1.983314756645388, "grad_norm": 0.34222736954689026, "learning_rate": 2.1199751586215013e-09, "loss": 0.0048, "step": 117440 }, { "epoch": 1.98348363562671, "grad_norm": 0.1955656111240387, "learning_rate": 2.0772808169772227e-09, "loss": 0.0065, "step": 117450 }, { "epoch": 1.983652514608032, "grad_norm": 0.21223846077919006, "learning_rate": 2.035020682612521e-09, "loss": 0.0068, "step": 117460 }, { "epoch": 1.9838213935893538, "grad_norm": 0.24648995697498322, "learning_rate": 1.993194759197792e-09, "loss": 0.0096, "step": 117470 }, { "epoch": 1.9839902725706757, "grad_norm": 0.27110934257507324, "learning_rate": 1.9518030503673513e-09, "loss": 0.0055, "step": 117480 }, { "epoch": 1.9841591515519978, "grad_norm": 0.1536409705877304, "learning_rate": 1.9108455597172117e-09, "loss": 0.006, "step": 117490 }, { "epoch": 1.98432803053332, "grad_norm": 0.3151039481163025, "learning_rate": 1.8703222908056374e-09, "loss": 0.0075, "step": 117500 }, { "epoch": 1.9844969095146419, "grad_norm": 0.32483533024787903, "learning_rate": 1.8302332471531458e-09, "loss": 0.0072, "step": 117510 }, { "epoch": 1.9846657884959638, "grad_norm": 0.22989924252033234, "learning_rate": 1.7905784322425068e-09, "loss": 0.0045, "step": 117520 }, { "epoch": 1.9848346674772857, "grad_norm": 0.16265329718589783, "learning_rate": 1.7513578495192974e-09, "loss": 0.0069, "step": 117530 }, { "epoch": 1.9850035464586078, "grad_norm": 0.3033107817173004, "learning_rate": 1.7125715023902367e-09, "loss": 0.0048, "step": 117540 }, { "epoch": 1.9851724254399299, "grad_norm": 0.4219973087310791, "learning_rate": 1.6742193942254071e-09, "loss": 0.0059, "step": 117550 }, { "epoch": 1.9853413044212518, "grad_norm": 0.13294249773025513, "learning_rate": 1.6363015283565876e-09, "loss": 0.0057, "step": 117560 }, { "epoch": 1.9855101834025737, "grad_norm": 0.1837974190711975, "learning_rate": 1.59881790807781e-09, "loss": 0.0048, "step": 117570 }, { "epoch": 1.9856790623838956, "grad_norm": 0.28592798113822937, "learning_rate": 1.5617685366459135e-09, "loss": 0.0093, "step": 117580 }, { "epoch": 1.9858479413652177, "grad_norm": 0.14292004704475403, "learning_rate": 1.5251534172794346e-09, "loss": 0.0038, "step": 117590 }, { "epoch": 1.9860168203465398, "grad_norm": 0.11799881607294083, "learning_rate": 1.4889725531597177e-09, "loss": 0.0057, "step": 117600 }, { "epoch": 1.9861856993278617, "grad_norm": 0.4460248053073883, "learning_rate": 1.4532259474298037e-09, "loss": 0.0079, "step": 117610 }, { "epoch": 1.9863545783091836, "grad_norm": 0.23983025550842285, "learning_rate": 1.417913603194987e-09, "loss": 0.0113, "step": 117620 }, { "epoch": 1.9865234572905055, "grad_norm": 0.17010413110256195, "learning_rate": 1.3830355235239235e-09, "loss": 0.0075, "step": 117630 }, { "epoch": 1.9866923362718276, "grad_norm": 0.2435818910598755, "learning_rate": 1.3485917114458569e-09, "loss": 0.0058, "step": 117640 }, { "epoch": 1.9868612152531497, "grad_norm": 0.11304343491792679, "learning_rate": 1.3145821699539486e-09, "loss": 0.0042, "step": 117650 }, { "epoch": 1.9870300942344716, "grad_norm": 0.17979663610458374, "learning_rate": 1.2810069020025018e-09, "loss": 0.0066, "step": 117660 }, { "epoch": 1.9871989732157935, "grad_norm": 0.3192386329174042, "learning_rate": 1.2478659105080727e-09, "loss": 0.0062, "step": 117670 }, { "epoch": 1.9873678521971154, "grad_norm": 0.20215538144111633, "learning_rate": 1.2151591983505794e-09, "loss": 0.0087, "step": 117680 }, { "epoch": 1.9875367311784375, "grad_norm": 0.2746070325374603, "learning_rate": 1.1828867683705281e-09, "loss": 0.0049, "step": 117690 }, { "epoch": 1.9877056101597597, "grad_norm": 0.2617490887641907, "learning_rate": 1.1510486233728967e-09, "loss": 0.0063, "step": 117700 }, { "epoch": 1.9878744891410816, "grad_norm": 0.35253965854644775, "learning_rate": 1.1196447661226962e-09, "loss": 0.0057, "step": 117710 }, { "epoch": 1.9880433681224035, "grad_norm": 0.2279784232378006, "learning_rate": 1.0886751993494093e-09, "loss": 0.0059, "step": 117720 }, { "epoch": 1.9882122471037253, "grad_norm": 0.26729434728622437, "learning_rate": 1.0581399257419967e-09, "loss": 0.0079, "step": 117730 }, { "epoch": 1.9883811260850475, "grad_norm": 0.22756001353263855, "learning_rate": 1.0280389479544462e-09, "loss": 0.0075, "step": 117740 }, { "epoch": 1.9885500050663696, "grad_norm": 0.1478089690208435, "learning_rate": 9.98372268601333e-10, "loss": 0.0047, "step": 117750 }, { "epoch": 1.9887188840476915, "grad_norm": 0.5549167394638062, "learning_rate": 9.691398902605953e-10, "loss": 0.005, "step": 117760 }, { "epoch": 1.9888877630290134, "grad_norm": 0.20114126801490784, "learning_rate": 9.403418154713128e-10, "loss": 0.0063, "step": 117770 }, { "epoch": 1.9890566420103353, "grad_norm": 0.21998204290866852, "learning_rate": 9.119780467353734e-10, "loss": 0.005, "step": 117780 }, { "epoch": 1.9892255209916574, "grad_norm": 0.20770691335201263, "learning_rate": 8.840485865169169e-10, "loss": 0.0062, "step": 117790 }, { "epoch": 1.9893943999729795, "grad_norm": 0.18724513053894043, "learning_rate": 8.565534372434459e-10, "loss": 0.0072, "step": 117800 }, { "epoch": 1.9895632789543014, "grad_norm": 0.24195441603660583, "learning_rate": 8.2949260130194e-10, "loss": 0.0067, "step": 117810 }, { "epoch": 1.9897321579356233, "grad_norm": 0.2021712362766266, "learning_rate": 8.028660810449618e-10, "loss": 0.0111, "step": 117820 }, { "epoch": 1.9899010369169452, "grad_norm": 0.07762087881565094, "learning_rate": 7.766738787845507e-10, "loss": 0.0053, "step": 117830 }, { "epoch": 1.9900699158982673, "grad_norm": 0.0644950270652771, "learning_rate": 7.50915996797219e-10, "loss": 0.0055, "step": 117840 }, { "epoch": 1.9902387948795894, "grad_norm": 0.269623726606369, "learning_rate": 7.255924373200662e-10, "loss": 0.0097, "step": 117850 }, { "epoch": 1.9904076738609113, "grad_norm": 0.7097355127334595, "learning_rate": 7.007032025529991e-10, "loss": 0.0073, "step": 117860 }, { "epoch": 1.9905765528422332, "grad_norm": 0.32838940620422363, "learning_rate": 6.762482946592875e-10, "loss": 0.0074, "step": 117870 }, { "epoch": 1.9907454318235551, "grad_norm": 0.35500186681747437, "learning_rate": 6.522277157622325e-10, "loss": 0.0105, "step": 117880 }, { "epoch": 1.9909143108048772, "grad_norm": 0.4740431010723114, "learning_rate": 6.28641467950164e-10, "loss": 0.0058, "step": 117890 }, { "epoch": 1.9910831897861994, "grad_norm": 0.690740168094635, "learning_rate": 6.054895532708882e-10, "loss": 0.0107, "step": 117900 }, { "epoch": 1.9912520687675213, "grad_norm": 0.8360962271690369, "learning_rate": 5.827719737361293e-10, "loss": 0.0108, "step": 117910 }, { "epoch": 1.9914209477488432, "grad_norm": 0.5777000784873962, "learning_rate": 5.604887313198637e-10, "loss": 0.0047, "step": 117920 }, { "epoch": 1.991589826730165, "grad_norm": 0.2684876620769501, "learning_rate": 5.386398279583205e-10, "loss": 0.005, "step": 117930 }, { "epoch": 1.9917587057114872, "grad_norm": 0.24772533774375916, "learning_rate": 5.172252655483157e-10, "loss": 0.0058, "step": 117940 }, { "epoch": 1.9919275846928093, "grad_norm": 0.1986977905035019, "learning_rate": 4.962450459516932e-10, "loss": 0.0041, "step": 117950 }, { "epoch": 1.9920964636741312, "grad_norm": 0.48206591606140137, "learning_rate": 4.756991709908843e-10, "loss": 0.0051, "step": 117960 }, { "epoch": 1.992265342655453, "grad_norm": 0.28733375668525696, "learning_rate": 4.5558764245001717e-10, "loss": 0.0055, "step": 117970 }, { "epoch": 1.992434221636775, "grad_norm": 0.12103351205587387, "learning_rate": 4.3591046207769327e-10, "loss": 0.0063, "step": 117980 }, { "epoch": 1.992603100618097, "grad_norm": 0.24164102971553802, "learning_rate": 4.1666763158199063e-10, "loss": 0.008, "step": 117990 }, { "epoch": 1.9927719795994192, "grad_norm": 0.2289208322763443, "learning_rate": 3.9785915263546024e-10, "loss": 0.0054, "step": 118000 }, { "epoch": 1.992940858580741, "grad_norm": 0.28553086519241333, "learning_rate": 3.794850268723504e-10, "loss": 0.0069, "step": 118010 }, { "epoch": 1.993109737562063, "grad_norm": 0.17946037650108337, "learning_rate": 3.615452558886068e-10, "loss": 0.0094, "step": 118020 }, { "epoch": 1.993278616543385, "grad_norm": 0.15509994328022003, "learning_rate": 3.440398412424273e-10, "loss": 0.0077, "step": 118030 }, { "epoch": 1.993447495524707, "grad_norm": 0.22062915563583374, "learning_rate": 3.2696878445537263e-10, "loss": 0.0083, "step": 118040 }, { "epoch": 1.9936163745060291, "grad_norm": 0.26123642921447754, "learning_rate": 3.103320870101456e-10, "loss": 0.0083, "step": 118050 }, { "epoch": 1.993785253487351, "grad_norm": 0.21151436865329742, "learning_rate": 2.9412975035170155e-10, "loss": 0.0051, "step": 118060 }, { "epoch": 1.993954132468673, "grad_norm": 0.2942645847797394, "learning_rate": 2.7836177588891347e-10, "loss": 0.0054, "step": 118070 }, { "epoch": 1.9941230114499948, "grad_norm": 0.11787573248147964, "learning_rate": 2.630281649901312e-10, "loss": 0.0063, "step": 118080 }, { "epoch": 1.994291890431317, "grad_norm": 0.15688230097293854, "learning_rate": 2.481289189887326e-10, "loss": 0.006, "step": 118090 }, { "epoch": 1.994460769412639, "grad_norm": 0.3148987591266632, "learning_rate": 2.336640391781275e-10, "loss": 0.0049, "step": 118100 }, { "epoch": 1.994629648393961, "grad_norm": 0.40203696489334106, "learning_rate": 2.1963352681619865e-10, "loss": 0.0095, "step": 118110 }, { "epoch": 1.9947985273752828, "grad_norm": 0.21383196115493774, "learning_rate": 2.0603738312086064e-10, "loss": 0.0073, "step": 118120 }, { "epoch": 1.9949674063566047, "grad_norm": 0.3491220772266388, "learning_rate": 1.9287560927339077e-10, "loss": 0.0092, "step": 118130 }, { "epoch": 1.9951362853379269, "grad_norm": 0.1306706815958023, "learning_rate": 1.801482064178739e-10, "loss": 0.0063, "step": 118140 }, { "epoch": 1.995305164319249, "grad_norm": 0.2227552831172943, "learning_rate": 1.678551756589819e-10, "loss": 0.0107, "step": 118150 }, { "epoch": 1.9954740433005709, "grad_norm": 0.11086760461330414, "learning_rate": 1.5599651806585957e-10, "loss": 0.007, "step": 118160 }, { "epoch": 1.9956429222818928, "grad_norm": 0.211228147149086, "learning_rate": 1.4457223466823877e-10, "loss": 0.0072, "step": 118170 }, { "epoch": 1.9958118012632147, "grad_norm": 0.29247942566871643, "learning_rate": 1.3358232645865888e-10, "loss": 0.0076, "step": 118180 }, { "epoch": 1.9959806802445368, "grad_norm": 0.267106294631958, "learning_rate": 1.2302679439191167e-10, "loss": 0.0063, "step": 118190 }, { "epoch": 1.996149559225859, "grad_norm": 0.5110334753990173, "learning_rate": 1.1290563938504139e-10, "loss": 0.0076, "step": 118200 }, { "epoch": 1.9963184382071808, "grad_norm": 0.18586978316307068, "learning_rate": 1.0321886231734468e-10, "loss": 0.0075, "step": 118210 }, { "epoch": 1.9964873171885027, "grad_norm": 0.18805880844593048, "learning_rate": 9.396646403037057e-11, "loss": 0.0067, "step": 118220 }, { "epoch": 1.9966561961698246, "grad_norm": 0.3551613390445709, "learning_rate": 8.514844532792055e-11, "loss": 0.0045, "step": 118230 }, { "epoch": 1.9968250751511467, "grad_norm": 0.15588371455669403, "learning_rate": 7.676480697604849e-11, "loss": 0.0069, "step": 118240 }, { "epoch": 1.9969939541324688, "grad_norm": 0.2875620126724243, "learning_rate": 6.88155497030607e-11, "loss": 0.01, "step": 118250 }, { "epoch": 1.9971628331137907, "grad_norm": 0.16960488259792328, "learning_rate": 6.130067420007101e-11, "loss": 0.0095, "step": 118260 }, { "epoch": 1.9973317120951126, "grad_norm": 0.19638262689113617, "learning_rate": 5.422018111933547e-11, "loss": 0.0073, "step": 118270 }, { "epoch": 1.9975005910764345, "grad_norm": 0.24100814759731293, "learning_rate": 4.7574071075917604e-11, "loss": 0.008, "step": 118280 }, { "epoch": 1.9976694700577566, "grad_norm": 0.2157314419746399, "learning_rate": 4.136234464768851e-11, "loss": 0.0058, "step": 118290 }, { "epoch": 1.9978383490390788, "grad_norm": 0.10750316083431244, "learning_rate": 3.558500237421658e-11, "loss": 0.0059, "step": 118300 }, { "epoch": 1.9980072280204006, "grad_norm": 0.16399963200092316, "learning_rate": 3.024204475732262e-11, "loss": 0.0064, "step": 118310 }, { "epoch": 1.9981761070017225, "grad_norm": 0.22525355219841003, "learning_rate": 2.533347226107985e-11, "loss": 0.0074, "step": 118320 }, { "epoch": 1.9983449859830444, "grad_norm": 0.2232823669910431, "learning_rate": 2.0859285312369026e-11, "loss": 0.0068, "step": 118330 }, { "epoch": 1.9985138649643666, "grad_norm": 0.2874949872493744, "learning_rate": 1.6819484299213095e-11, "loss": 0.0049, "step": 118340 }, { "epoch": 1.9986827439456887, "grad_norm": 0.245168998837471, "learning_rate": 1.3214069573552757e-11, "loss": 0.0059, "step": 118350 }, { "epoch": 1.9988516229270106, "grad_norm": 0.09125957638025284, "learning_rate": 1.004304144736068e-11, "loss": 0.0042, "step": 118360 }, { "epoch": 1.9990205019083325, "grad_norm": 0.2517250180244446, "learning_rate": 7.306400197082398e-12, "loss": 0.0071, "step": 118370 }, { "epoch": 1.9991893808896544, "grad_norm": 0.21378576755523682, "learning_rate": 5.004146059750525e-12, "loss": 0.005, "step": 118380 }, { "epoch": 1.9993582598709765, "grad_norm": 0.5021671056747437, "learning_rate": 3.1362792363154313e-12, "loss": 0.0064, "step": 118390 }, { "epoch": 1.9995271388522986, "grad_norm": 0.07221338152885437, "learning_rate": 1.7027998883145658e-12, "loss": 0.0081, "step": 118400 }, { "epoch": 1.9996960178336205, "grad_norm": 0.10598467290401459, "learning_rate": 7.037081406480184e-13, "loss": 0.0063, "step": 118410 }, { "epoch": 1.9998648968149424, "grad_norm": 0.32722949981689453, "learning_rate": 1.3900407991318533e-13, "loss": 0.0072, "step": 118420 }, { "epoch": 2.0, "step": 118428, "total_flos": 3.4287781995033395e+18, "train_loss": 0.01496461873482785, "train_runtime": 70597.4956, "train_samples_per_second": 13.42, "train_steps_per_second": 1.678 } ], "logging_steps": 10, "max_steps": 118428, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.4287781995033395e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }