{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998704047691045, "eval_steps": 500, "global_step": 34722, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008639682059700203, "grad_norm": 11.771962803910544, "learning_rate": 9.596928982725528e-08, "loss": 0.6022, "step": 10 }, { "epoch": 0.0017279364119400407, "grad_norm": 9.020646240501524, "learning_rate": 1.9193857965451055e-07, "loss": 0.5688, "step": 20 }, { "epoch": 0.0025919046179100607, "grad_norm": 6.34090392679821, "learning_rate": 2.8790786948176586e-07, "loss": 0.4585, "step": 30 }, { "epoch": 0.0034558728238800814, "grad_norm": 2.8058492274624522, "learning_rate": 3.838771593090211e-07, "loss": 0.3803, "step": 40 }, { "epoch": 0.004319841029850101, "grad_norm": 2.6778181919123094, "learning_rate": 4.798464491362765e-07, "loss": 0.3372, "step": 50 }, { "epoch": 0.005183809235820121, "grad_norm": 2.66961075763771, "learning_rate": 5.758157389635317e-07, "loss": 0.316, "step": 60 }, { "epoch": 0.0060477774417901425, "grad_norm": 2.52693112946638, "learning_rate": 6.717850287907871e-07, "loss": 0.2944, "step": 70 }, { "epoch": 0.006911745647760163, "grad_norm": 2.687818570630057, "learning_rate": 7.677543186180422e-07, "loss": 0.2864, "step": 80 }, { "epoch": 0.007775713853730183, "grad_norm": 2.5730686332715154, "learning_rate": 8.637236084452976e-07, "loss": 0.2732, "step": 90 }, { "epoch": 0.008639682059700202, "grad_norm": 2.6580593069171905, "learning_rate": 9.59692898272553e-07, "loss": 0.2706, "step": 100 }, { "epoch": 0.009503650265670223, "grad_norm": 2.6483380675263417, "learning_rate": 1.0556621880998082e-06, "loss": 0.2658, "step": 110 }, { "epoch": 0.010367618471640243, "grad_norm": 3.2034183330693926, "learning_rate": 1.1516314779270634e-06, "loss": 0.2574, "step": 120 }, { "epoch": 0.011231586677610265, "grad_norm": 3.5452977484012838, "learning_rate": 1.2476007677543187e-06, "loss": 0.2564, "step": 130 }, { "epoch": 0.012095554883580285, "grad_norm": 2.8513462750096714, "learning_rate": 1.3435700575815741e-06, "loss": 0.2488, "step": 140 }, { "epoch": 0.012959523089550305, "grad_norm": 2.6333666283166988, "learning_rate": 1.4395393474088292e-06, "loss": 0.2462, "step": 150 }, { "epoch": 0.013823491295520325, "grad_norm": 2.6748238297309967, "learning_rate": 1.5355086372360844e-06, "loss": 0.2462, "step": 160 }, { "epoch": 0.014687459501490346, "grad_norm": 2.5303667333233997, "learning_rate": 1.63147792706334e-06, "loss": 0.2441, "step": 170 }, { "epoch": 0.015551427707460366, "grad_norm": 2.9751307770273425, "learning_rate": 1.7274472168905951e-06, "loss": 0.2391, "step": 180 }, { "epoch": 0.016415395913430386, "grad_norm": 2.758192405739691, "learning_rate": 1.8234165067178506e-06, "loss": 0.2352, "step": 190 }, { "epoch": 0.017279364119400405, "grad_norm": 2.5969506757432677, "learning_rate": 1.919385796545106e-06, "loss": 0.234, "step": 200 }, { "epoch": 0.018143332325370427, "grad_norm": 2.946805205411811, "learning_rate": 2.015355086372361e-06, "loss": 0.2368, "step": 210 }, { "epoch": 0.019007300531340445, "grad_norm": 2.375602226438953, "learning_rate": 2.1113243761996164e-06, "loss": 0.2292, "step": 220 }, { "epoch": 0.019871268737310467, "grad_norm": 2.479386341346247, "learning_rate": 2.2072936660268714e-06, "loss": 0.2282, "step": 230 }, { "epoch": 0.020735236943280486, "grad_norm": 2.530227027626686, "learning_rate": 2.303262955854127e-06, "loss": 0.2283, "step": 240 }, { "epoch": 0.021599205149250508, "grad_norm": 2.663697247032743, "learning_rate": 2.3992322456813823e-06, "loss": 0.2287, "step": 250 }, { "epoch": 0.02246317335522053, "grad_norm": 2.4409362189300823, "learning_rate": 2.4952015355086374e-06, "loss": 0.2283, "step": 260 }, { "epoch": 0.023327141561190548, "grad_norm": 2.4887477176122963, "learning_rate": 2.5911708253358924e-06, "loss": 0.2274, "step": 270 }, { "epoch": 0.02419110976716057, "grad_norm": 2.169049552229543, "learning_rate": 2.6871401151631483e-06, "loss": 0.2259, "step": 280 }, { "epoch": 0.02505507797313059, "grad_norm": 2.2321349960796644, "learning_rate": 2.7831094049904033e-06, "loss": 0.224, "step": 290 }, { "epoch": 0.02591904617910061, "grad_norm": 2.123716642203702, "learning_rate": 2.8790786948176584e-06, "loss": 0.2261, "step": 300 }, { "epoch": 0.02678301438507063, "grad_norm": 2.2304887712432158, "learning_rate": 2.975047984644914e-06, "loss": 0.2223, "step": 310 }, { "epoch": 0.02764698259104065, "grad_norm": 2.3168128684841958, "learning_rate": 3.071017274472169e-06, "loss": 0.2258, "step": 320 }, { "epoch": 0.02851095079701067, "grad_norm": 2.126683137374433, "learning_rate": 3.1669865642994248e-06, "loss": 0.2218, "step": 330 }, { "epoch": 0.02937491900298069, "grad_norm": 2.167002578947182, "learning_rate": 3.26295585412668e-06, "loss": 0.2267, "step": 340 }, { "epoch": 0.03023888720895071, "grad_norm": 2.3404259059137194, "learning_rate": 3.358925143953935e-06, "loss": 0.2154, "step": 350 }, { "epoch": 0.031102855414920732, "grad_norm": 2.1511856683733623, "learning_rate": 3.4548944337811903e-06, "loss": 0.221, "step": 360 }, { "epoch": 0.031966823620890754, "grad_norm": 2.158113277467384, "learning_rate": 3.5508637236084453e-06, "loss": 0.2235, "step": 370 }, { "epoch": 0.03283079182686077, "grad_norm": 2.110044048252369, "learning_rate": 3.6468330134357012e-06, "loss": 0.2204, "step": 380 }, { "epoch": 0.03369476003283079, "grad_norm": 2.0795923977620725, "learning_rate": 3.7428023032629563e-06, "loss": 0.2209, "step": 390 }, { "epoch": 0.03455872823880081, "grad_norm": 1.9794767623756446, "learning_rate": 3.838771593090212e-06, "loss": 0.2186, "step": 400 }, { "epoch": 0.035422696444770835, "grad_norm": 1.9015278291873687, "learning_rate": 3.934740882917467e-06, "loss": 0.217, "step": 410 }, { "epoch": 0.03628666465074085, "grad_norm": 2.123413533371326, "learning_rate": 4.030710172744722e-06, "loss": 0.2176, "step": 420 }, { "epoch": 0.03715063285671087, "grad_norm": 1.982609733635354, "learning_rate": 4.126679462571978e-06, "loss": 0.2161, "step": 430 }, { "epoch": 0.03801460106268089, "grad_norm": 1.873506879383276, "learning_rate": 4.222648752399233e-06, "loss": 0.218, "step": 440 }, { "epoch": 0.038878569268650916, "grad_norm": 1.9435107928689135, "learning_rate": 4.318618042226488e-06, "loss": 0.2177, "step": 450 }, { "epoch": 0.039742537474620934, "grad_norm": 2.0252961794361086, "learning_rate": 4.414587332053743e-06, "loss": 0.2154, "step": 460 }, { "epoch": 0.04060650568059095, "grad_norm": 1.700171956783739, "learning_rate": 4.510556621880998e-06, "loss": 0.2109, "step": 470 }, { "epoch": 0.04147047388656097, "grad_norm": 2.053940448238235, "learning_rate": 4.606525911708254e-06, "loss": 0.2131, "step": 480 }, { "epoch": 0.042334442092531, "grad_norm": 1.822531476696438, "learning_rate": 4.702495201535509e-06, "loss": 0.2124, "step": 490 }, { "epoch": 0.043198410298501015, "grad_norm": 1.9175265658812415, "learning_rate": 4.798464491362765e-06, "loss": 0.2162, "step": 500 }, { "epoch": 0.044062378504471034, "grad_norm": 1.766701105342602, "learning_rate": 4.89443378119002e-06, "loss": 0.2205, "step": 510 }, { "epoch": 0.04492634671044106, "grad_norm": 1.835679557041508, "learning_rate": 4.990403071017275e-06, "loss": 0.2153, "step": 520 }, { "epoch": 0.04579031491641108, "grad_norm": 1.7702803984984092, "learning_rate": 5.086372360844531e-06, "loss": 0.2133, "step": 530 }, { "epoch": 0.046654283122381096, "grad_norm": 1.7541287271210348, "learning_rate": 5.182341650671785e-06, "loss": 0.2111, "step": 540 }, { "epoch": 0.047518251328351115, "grad_norm": 1.679583479494552, "learning_rate": 5.278310940499041e-06, "loss": 0.218, "step": 550 }, { "epoch": 0.04838221953432114, "grad_norm": 1.7372110816101225, "learning_rate": 5.374280230326297e-06, "loss": 0.2111, "step": 560 }, { "epoch": 0.04924618774029116, "grad_norm": 1.718777295330965, "learning_rate": 5.470249520153551e-06, "loss": 0.2164, "step": 570 }, { "epoch": 0.05011015594626118, "grad_norm": 1.6716688118747165, "learning_rate": 5.566218809980807e-06, "loss": 0.2186, "step": 580 }, { "epoch": 0.050974124152231196, "grad_norm": 1.7059282422333841, "learning_rate": 5.662188099808062e-06, "loss": 0.2113, "step": 590 }, { "epoch": 0.05183809235820122, "grad_norm": 1.7518290879187177, "learning_rate": 5.758157389635317e-06, "loss": 0.2148, "step": 600 }, { "epoch": 0.05270206056417124, "grad_norm": 1.7399694944414077, "learning_rate": 5.854126679462573e-06, "loss": 0.2154, "step": 610 }, { "epoch": 0.05356602877014126, "grad_norm": 1.6592091944436103, "learning_rate": 5.950095969289828e-06, "loss": 0.2154, "step": 620 }, { "epoch": 0.054429996976111276, "grad_norm": 1.6428653305834138, "learning_rate": 6.0460652591170836e-06, "loss": 0.2122, "step": 630 }, { "epoch": 0.0552939651820813, "grad_norm": 1.5199474469183176, "learning_rate": 6.142034548944338e-06, "loss": 0.2137, "step": 640 }, { "epoch": 0.05615793338805132, "grad_norm": 1.495986683540699, "learning_rate": 6.238003838771594e-06, "loss": 0.207, "step": 650 }, { "epoch": 0.05702190159402134, "grad_norm": 1.6130097470231044, "learning_rate": 6.3339731285988495e-06, "loss": 0.2094, "step": 660 }, { "epoch": 0.05788586979999136, "grad_norm": 1.429539862570976, "learning_rate": 6.429942418426104e-06, "loss": 0.2144, "step": 670 }, { "epoch": 0.05874983800596138, "grad_norm": 1.4829396307386704, "learning_rate": 6.52591170825336e-06, "loss": 0.2112, "step": 680 }, { "epoch": 0.0596138062119314, "grad_norm": 1.5601103696909149, "learning_rate": 6.621880998080615e-06, "loss": 0.2146, "step": 690 }, { "epoch": 0.06047777441790142, "grad_norm": 1.5121395416193373, "learning_rate": 6.71785028790787e-06, "loss": 0.2085, "step": 700 }, { "epoch": 0.06134174262387144, "grad_norm": 1.5247176525676305, "learning_rate": 6.8138195777351256e-06, "loss": 0.2133, "step": 710 }, { "epoch": 0.062205710829841464, "grad_norm": 1.5231955702558029, "learning_rate": 6.909788867562381e-06, "loss": 0.2105, "step": 720 }, { "epoch": 0.06306967903581148, "grad_norm": 1.575431244960636, "learning_rate": 7.005758157389636e-06, "loss": 0.2112, "step": 730 }, { "epoch": 0.06393364724178151, "grad_norm": 1.4052670709962776, "learning_rate": 7.101727447216891e-06, "loss": 0.214, "step": 740 }, { "epoch": 0.06479761544775152, "grad_norm": 1.5981212939035538, "learning_rate": 7.1976967370441466e-06, "loss": 0.212, "step": 750 }, { "epoch": 0.06566158365372154, "grad_norm": 1.3490598266529032, "learning_rate": 7.2936660268714024e-06, "loss": 0.2156, "step": 760 }, { "epoch": 0.06652555185969157, "grad_norm": 1.469722333119062, "learning_rate": 7.389635316698657e-06, "loss": 0.2125, "step": 770 }, { "epoch": 0.06738952006566158, "grad_norm": 1.4540654642423125, "learning_rate": 7.4856046065259125e-06, "loss": 0.2103, "step": 780 }, { "epoch": 0.06825348827163161, "grad_norm": 1.5938247069436307, "learning_rate": 7.581573896353167e-06, "loss": 0.2074, "step": 790 }, { "epoch": 0.06911745647760162, "grad_norm": 1.419450236768579, "learning_rate": 7.677543186180423e-06, "loss": 0.2156, "step": 800 }, { "epoch": 0.06998142468357164, "grad_norm": 1.4817149124966364, "learning_rate": 7.773512476007678e-06, "loss": 0.2062, "step": 810 }, { "epoch": 0.07084539288954167, "grad_norm": 1.3562338115519248, "learning_rate": 7.869481765834934e-06, "loss": 0.2145, "step": 820 }, { "epoch": 0.07170936109551168, "grad_norm": 1.441182293209827, "learning_rate": 7.965451055662189e-06, "loss": 0.2096, "step": 830 }, { "epoch": 0.0725733293014817, "grad_norm": 1.4090583980671276, "learning_rate": 8.061420345489444e-06, "loss": 0.2088, "step": 840 }, { "epoch": 0.07343729750745173, "grad_norm": 1.3527688354129277, "learning_rate": 8.157389635316699e-06, "loss": 0.207, "step": 850 }, { "epoch": 0.07430126571342174, "grad_norm": 1.3845049446769717, "learning_rate": 8.253358925143955e-06, "loss": 0.2087, "step": 860 }, { "epoch": 0.07516523391939177, "grad_norm": 1.3620809094506507, "learning_rate": 8.34932821497121e-06, "loss": 0.2062, "step": 870 }, { "epoch": 0.07602920212536178, "grad_norm": 1.2449980327729386, "learning_rate": 8.445297504798465e-06, "loss": 0.2286, "step": 880 }, { "epoch": 0.0768931703313318, "grad_norm": 1.2459711984543538, "learning_rate": 8.54126679462572e-06, "loss": 0.2131, "step": 890 }, { "epoch": 0.07775713853730183, "grad_norm": 1.435496434836962, "learning_rate": 8.637236084452976e-06, "loss": 0.2132, "step": 900 }, { "epoch": 0.07862110674327184, "grad_norm": 1.2279448447576062, "learning_rate": 8.73320537428023e-06, "loss": 0.2058, "step": 910 }, { "epoch": 0.07948507494924187, "grad_norm": 1.3411791343537367, "learning_rate": 8.829174664107486e-06, "loss": 0.2104, "step": 920 }, { "epoch": 0.0803490431552119, "grad_norm": 1.3588577237929125, "learning_rate": 8.925143953934742e-06, "loss": 0.2086, "step": 930 }, { "epoch": 0.0812130113611819, "grad_norm": 1.215089486089123, "learning_rate": 9.021113243761996e-06, "loss": 0.2116, "step": 940 }, { "epoch": 0.08207697956715193, "grad_norm": 1.2690165783221579, "learning_rate": 9.117082533589252e-06, "loss": 0.2107, "step": 950 }, { "epoch": 0.08294094777312194, "grad_norm": 1.1716070949493718, "learning_rate": 9.213051823416507e-06, "loss": 0.2123, "step": 960 }, { "epoch": 0.08380491597909197, "grad_norm": 1.2018320494999504, "learning_rate": 9.309021113243763e-06, "loss": 0.2076, "step": 970 }, { "epoch": 0.084668884185062, "grad_norm": 1.253590789081327, "learning_rate": 9.404990403071018e-06, "loss": 0.2053, "step": 980 }, { "epoch": 0.085532852391032, "grad_norm": 1.3322390529767874, "learning_rate": 9.500959692898273e-06, "loss": 0.2109, "step": 990 }, { "epoch": 0.08639682059700203, "grad_norm": 1.1375523323508605, "learning_rate": 9.59692898272553e-06, "loss": 0.2102, "step": 1000 }, { "epoch": 0.08726078880297206, "grad_norm": 1.426192453982837, "learning_rate": 9.692898272552784e-06, "loss": 0.2174, "step": 1010 }, { "epoch": 0.08812475700894207, "grad_norm": 1.2098268943786776, "learning_rate": 9.78886756238004e-06, "loss": 0.2101, "step": 1020 }, { "epoch": 0.08898872521491209, "grad_norm": 1.2055452181294146, "learning_rate": 9.884836852207294e-06, "loss": 0.2049, "step": 1030 }, { "epoch": 0.08985269342088212, "grad_norm": 1.1566160374036807, "learning_rate": 9.98080614203455e-06, "loss": 0.2098, "step": 1040 }, { "epoch": 0.09071666162685213, "grad_norm": 1.2194302882557917, "learning_rate": 9.999998607883625e-06, "loss": 0.2079, "step": 1050 }, { "epoch": 0.09158062983282216, "grad_norm": 1.0589518495138135, "learning_rate": 9.999992952412176e-06, "loss": 0.2098, "step": 1060 }, { "epoch": 0.09244459803879217, "grad_norm": 1.1650993014300575, "learning_rate": 9.999982946583298e-06, "loss": 0.209, "step": 1070 }, { "epoch": 0.09330856624476219, "grad_norm": 1.1322274380889616, "learning_rate": 9.999968590405698e-06, "loss": 0.2067, "step": 1080 }, { "epoch": 0.09417253445073222, "grad_norm": 1.199347187742619, "learning_rate": 9.999949883891863e-06, "loss": 0.2043, "step": 1090 }, { "epoch": 0.09503650265670223, "grad_norm": 1.2200569547783202, "learning_rate": 9.999926827058076e-06, "loss": 0.2069, "step": 1100 }, { "epoch": 0.09590047086267225, "grad_norm": 1.157827031372808, "learning_rate": 9.999899419924391e-06, "loss": 0.2167, "step": 1110 }, { "epoch": 0.09676443906864228, "grad_norm": 1.0754344065487103, "learning_rate": 9.999867662514655e-06, "loss": 0.2039, "step": 1120 }, { "epoch": 0.09762840727461229, "grad_norm": 1.1937972053687231, "learning_rate": 9.999831554856503e-06, "loss": 0.2103, "step": 1130 }, { "epoch": 0.09849237548058232, "grad_norm": 1.1174871907395723, "learning_rate": 9.99979109698135e-06, "loss": 0.203, "step": 1140 }, { "epoch": 0.09935634368655233, "grad_norm": 1.080160161013022, "learning_rate": 9.999746288924396e-06, "loss": 0.2023, "step": 1150 }, { "epoch": 0.10022031189252235, "grad_norm": 1.0920396314412057, "learning_rate": 9.999697130724628e-06, "loss": 0.205, "step": 1160 }, { "epoch": 0.10108428009849238, "grad_norm": 1.173817518913107, "learning_rate": 9.999643622424817e-06, "loss": 0.2083, "step": 1170 }, { "epoch": 0.10194824830446239, "grad_norm": 1.0403379627522062, "learning_rate": 9.99958576407152e-06, "loss": 0.2004, "step": 1180 }, { "epoch": 0.10281221651043242, "grad_norm": 1.146331529719693, "learning_rate": 9.999523555715077e-06, "loss": 0.2089, "step": 1190 }, { "epoch": 0.10367618471640244, "grad_norm": 1.639211053109325, "learning_rate": 9.999456997409614e-06, "loss": 0.2056, "step": 1200 }, { "epoch": 0.10454015292237245, "grad_norm": 1.0282120502515748, "learning_rate": 9.999386089213042e-06, "loss": 0.2004, "step": 1210 }, { "epoch": 0.10540412112834248, "grad_norm": 1.1924096638792132, "learning_rate": 9.999310831187056e-06, "loss": 0.2038, "step": 1220 }, { "epoch": 0.10626808933431249, "grad_norm": 1.3235594482554829, "learning_rate": 9.999231223397136e-06, "loss": 0.2098, "step": 1230 }, { "epoch": 0.10713205754028252, "grad_norm": 1.0759458188187956, "learning_rate": 9.999147265912545e-06, "loss": 0.2045, "step": 1240 }, { "epoch": 0.10799602574625254, "grad_norm": 0.9954941962888572, "learning_rate": 9.999058958806337e-06, "loss": 0.2005, "step": 1250 }, { "epoch": 0.10885999395222255, "grad_norm": 1.1146268522277554, "learning_rate": 9.998966302155337e-06, "loss": 0.2018, "step": 1260 }, { "epoch": 0.10972396215819258, "grad_norm": 1.0822500532270867, "learning_rate": 9.998869296040172e-06, "loss": 0.2042, "step": 1270 }, { "epoch": 0.1105879303641626, "grad_norm": 1.0663634430114168, "learning_rate": 9.99876794054524e-06, "loss": 0.1999, "step": 1280 }, { "epoch": 0.11145189857013262, "grad_norm": 1.0487941810337988, "learning_rate": 9.998662235758726e-06, "loss": 0.2035, "step": 1290 }, { "epoch": 0.11231586677610264, "grad_norm": 1.0397680680828816, "learning_rate": 9.998552181772608e-06, "loss": 0.2008, "step": 1300 }, { "epoch": 0.11317983498207267, "grad_norm": 1.007838733865999, "learning_rate": 9.998437778682632e-06, "loss": 0.201, "step": 1310 }, { "epoch": 0.11404380318804268, "grad_norm": 0.9490225603657636, "learning_rate": 9.998319026588341e-06, "loss": 0.2013, "step": 1320 }, { "epoch": 0.1149077713940127, "grad_norm": 1.0866065144355808, "learning_rate": 9.99819592559306e-06, "loss": 0.2033, "step": 1330 }, { "epoch": 0.11577173959998271, "grad_norm": 1.0692981821008887, "learning_rate": 9.998068475803893e-06, "loss": 0.1964, "step": 1340 }, { "epoch": 0.11663570780595274, "grad_norm": 1.009125477008282, "learning_rate": 9.99793667733173e-06, "loss": 0.2033, "step": 1350 }, { "epoch": 0.11749967601192277, "grad_norm": 0.995413726812648, "learning_rate": 9.997800530291249e-06, "loss": 0.2022, "step": 1360 }, { "epoch": 0.11836364421789278, "grad_norm": 1.0153384644661287, "learning_rate": 9.997660034800904e-06, "loss": 0.2034, "step": 1370 }, { "epoch": 0.1192276124238628, "grad_norm": 1.0060721836065372, "learning_rate": 9.99751519098294e-06, "loss": 0.1981, "step": 1380 }, { "epoch": 0.12009158062983283, "grad_norm": 0.9395888677265961, "learning_rate": 9.997365998963378e-06, "loss": 0.2001, "step": 1390 }, { "epoch": 0.12095554883580284, "grad_norm": 0.9207516546879072, "learning_rate": 9.997212458872026e-06, "loss": 0.1958, "step": 1400 }, { "epoch": 0.12181951704177287, "grad_norm": 0.9562432904941996, "learning_rate": 9.997054570842476e-06, "loss": 0.1923, "step": 1410 }, { "epoch": 0.12268348524774288, "grad_norm": 0.9733048311881327, "learning_rate": 9.996892335012106e-06, "loss": 0.2021, "step": 1420 }, { "epoch": 0.1235474534537129, "grad_norm": 0.928328731096014, "learning_rate": 9.996725751522066e-06, "loss": 0.1962, "step": 1430 }, { "epoch": 0.12441142165968293, "grad_norm": 0.9312940268247356, "learning_rate": 9.996554820517302e-06, "loss": 0.1925, "step": 1440 }, { "epoch": 0.12527538986565295, "grad_norm": 0.9649248803993781, "learning_rate": 9.996379542146532e-06, "loss": 0.1953, "step": 1450 }, { "epoch": 0.12613935807162296, "grad_norm": 0.9689645983824585, "learning_rate": 9.996199916562263e-06, "loss": 0.1969, "step": 1460 }, { "epoch": 0.12700332627759298, "grad_norm": 1.0121475618102251, "learning_rate": 9.99601594392078e-06, "loss": 0.1944, "step": 1470 }, { "epoch": 0.12786729448356302, "grad_norm": 0.9444136452659068, "learning_rate": 9.995827624382157e-06, "loss": 0.1959, "step": 1480 }, { "epoch": 0.12873126268953303, "grad_norm": 1.0984999625010117, "learning_rate": 9.995634958110243e-06, "loss": 0.1979, "step": 1490 }, { "epoch": 0.12959523089550304, "grad_norm": 0.969715543160458, "learning_rate": 9.995437945272671e-06, "loss": 0.1948, "step": 1500 }, { "epoch": 0.13045919910147308, "grad_norm": 1.0241223672746604, "learning_rate": 9.995236586040857e-06, "loss": 0.1945, "step": 1510 }, { "epoch": 0.1313231673074431, "grad_norm": 0.964222667785021, "learning_rate": 9.995030880589998e-06, "loss": 0.1941, "step": 1520 }, { "epoch": 0.1321871355134131, "grad_norm": 0.9432330938575335, "learning_rate": 9.994820829099074e-06, "loss": 0.195, "step": 1530 }, { "epoch": 0.13305110371938314, "grad_norm": 0.9307652199560369, "learning_rate": 9.994606431750842e-06, "loss": 0.1854, "step": 1540 }, { "epoch": 0.13391507192535315, "grad_norm": 0.9310493339961098, "learning_rate": 9.994387688731847e-06, "loss": 0.1928, "step": 1550 }, { "epoch": 0.13477904013132316, "grad_norm": 0.9439479070573529, "learning_rate": 9.994164600232412e-06, "loss": 0.195, "step": 1560 }, { "epoch": 0.13564300833729317, "grad_norm": 0.9603028405137487, "learning_rate": 9.993937166446635e-06, "loss": 0.1898, "step": 1570 }, { "epoch": 0.13650697654326321, "grad_norm": 0.8970445894391301, "learning_rate": 9.993705387572404e-06, "loss": 0.193, "step": 1580 }, { "epoch": 0.13737094474923323, "grad_norm": 0.9137010906999963, "learning_rate": 9.993469263811383e-06, "loss": 0.1864, "step": 1590 }, { "epoch": 0.13823491295520324, "grad_norm": 0.8831586632426319, "learning_rate": 9.993228795369017e-06, "loss": 0.1872, "step": 1600 }, { "epoch": 0.13909888116117328, "grad_norm": 0.9134761502587991, "learning_rate": 9.992983982454528e-06, "loss": 0.1884, "step": 1610 }, { "epoch": 0.1399628493671433, "grad_norm": 0.9339429524570831, "learning_rate": 9.992734825280926e-06, "loss": 0.1917, "step": 1620 }, { "epoch": 0.1408268175731133, "grad_norm": 0.9113630951414461, "learning_rate": 9.992481324064991e-06, "loss": 0.1903, "step": 1630 }, { "epoch": 0.14169078577908334, "grad_norm": 0.9585246215583206, "learning_rate": 9.99222347902729e-06, "loss": 0.1902, "step": 1640 }, { "epoch": 0.14255475398505335, "grad_norm": 0.953890677401058, "learning_rate": 9.991961290392166e-06, "loss": 0.1875, "step": 1650 }, { "epoch": 0.14341872219102336, "grad_norm": 0.8503479328191382, "learning_rate": 9.991694758387744e-06, "loss": 0.1887, "step": 1660 }, { "epoch": 0.1442826903969934, "grad_norm": 0.9020194108982177, "learning_rate": 9.991423883245925e-06, "loss": 0.194, "step": 1670 }, { "epoch": 0.1451466586029634, "grad_norm": 0.9083223160164683, "learning_rate": 9.99114866520239e-06, "loss": 0.1938, "step": 1680 }, { "epoch": 0.14601062680893342, "grad_norm": 0.9090284777953003, "learning_rate": 9.9908691044966e-06, "loss": 0.189, "step": 1690 }, { "epoch": 0.14687459501490346, "grad_norm": 0.9353875805981315, "learning_rate": 9.99058520137179e-06, "loss": 0.1876, "step": 1700 }, { "epoch": 0.14773856322087348, "grad_norm": 0.9804296916341775, "learning_rate": 9.990296956074979e-06, "loss": 0.1882, "step": 1710 }, { "epoch": 0.1486025314268435, "grad_norm": 0.8978507801311811, "learning_rate": 9.99000436885696e-06, "loss": 0.1863, "step": 1720 }, { "epoch": 0.1494664996328135, "grad_norm": 0.9050475519755361, "learning_rate": 9.989707439972306e-06, "loss": 0.1833, "step": 1730 }, { "epoch": 0.15033046783878354, "grad_norm": 0.9315891329324208, "learning_rate": 9.989406169679367e-06, "loss": 0.1913, "step": 1740 }, { "epoch": 0.15119443604475355, "grad_norm": 0.922244731553486, "learning_rate": 9.98910055824027e-06, "loss": 0.1883, "step": 1750 }, { "epoch": 0.15205840425072356, "grad_norm": 0.8667210354836978, "learning_rate": 9.988790605920917e-06, "loss": 0.1852, "step": 1760 }, { "epoch": 0.1529223724566936, "grad_norm": 0.8814053572722416, "learning_rate": 9.988476312990994e-06, "loss": 0.1837, "step": 1770 }, { "epoch": 0.1537863406626636, "grad_norm": 0.8845096592429691, "learning_rate": 9.988157679723953e-06, "loss": 0.1827, "step": 1780 }, { "epoch": 0.15465030886863362, "grad_norm": 0.896642858191613, "learning_rate": 9.98783470639703e-06, "loss": 0.1853, "step": 1790 }, { "epoch": 0.15551427707460366, "grad_norm": 0.9515141760201947, "learning_rate": 9.987507393291238e-06, "loss": 0.186, "step": 1800 }, { "epoch": 0.15637824528057367, "grad_norm": 0.8838310226390965, "learning_rate": 9.98717574069136e-06, "loss": 0.1858, "step": 1810 }, { "epoch": 0.15724221348654369, "grad_norm": 1.0419827856105834, "learning_rate": 9.98683974888596e-06, "loss": 0.1857, "step": 1820 }, { "epoch": 0.15810618169251373, "grad_norm": 0.8554442292653766, "learning_rate": 9.986499418167373e-06, "loss": 0.1878, "step": 1830 }, { "epoch": 0.15897014989848374, "grad_norm": 0.9172557697246417, "learning_rate": 9.986154748831715e-06, "loss": 0.1892, "step": 1840 }, { "epoch": 0.15983411810445375, "grad_norm": 0.906975620762014, "learning_rate": 9.98580574117887e-06, "loss": 0.184, "step": 1850 }, { "epoch": 0.1606980863104238, "grad_norm": 0.8860481268543452, "learning_rate": 9.9854523955125e-06, "loss": 0.1891, "step": 1860 }, { "epoch": 0.1615620545163938, "grad_norm": 0.9766505643536513, "learning_rate": 9.985094712140044e-06, "loss": 0.1882, "step": 1870 }, { "epoch": 0.1624260227223638, "grad_norm": 0.8698571209777873, "learning_rate": 9.98473269137271e-06, "loss": 0.185, "step": 1880 }, { "epoch": 0.16328999092833385, "grad_norm": 0.8758942022132932, "learning_rate": 9.984366333525483e-06, "loss": 0.1875, "step": 1890 }, { "epoch": 0.16415395913430386, "grad_norm": 0.8078901518813743, "learning_rate": 9.983995638917122e-06, "loss": 0.186, "step": 1900 }, { "epoch": 0.16501792734027387, "grad_norm": 0.844074206643064, "learning_rate": 9.98362060787016e-06, "loss": 0.1829, "step": 1910 }, { "epoch": 0.16588189554624388, "grad_norm": 0.9462307510554506, "learning_rate": 9.983241240710897e-06, "loss": 0.1849, "step": 1920 }, { "epoch": 0.16674586375221392, "grad_norm": 0.8894512873356976, "learning_rate": 9.982857537769412e-06, "loss": 0.1804, "step": 1930 }, { "epoch": 0.16760983195818394, "grad_norm": 0.8862445385159019, "learning_rate": 9.982469499379556e-06, "loss": 0.1855, "step": 1940 }, { "epoch": 0.16847380016415395, "grad_norm": 0.8711563518645582, "learning_rate": 9.982077125878948e-06, "loss": 0.1814, "step": 1950 }, { "epoch": 0.169337768370124, "grad_norm": 0.9050034309741795, "learning_rate": 9.981680417608983e-06, "loss": 0.1803, "step": 1960 }, { "epoch": 0.170201736576094, "grad_norm": 0.9013350985231641, "learning_rate": 9.981279374914826e-06, "loss": 0.185, "step": 1970 }, { "epoch": 0.171065704782064, "grad_norm": 0.8654792583323618, "learning_rate": 9.980873998145413e-06, "loss": 0.1825, "step": 1980 }, { "epoch": 0.17192967298803405, "grad_norm": 0.8348173364643074, "learning_rate": 9.980464287653451e-06, "loss": 0.1798, "step": 1990 }, { "epoch": 0.17279364119400406, "grad_norm": 0.8524350735085091, "learning_rate": 9.980050243795418e-06, "loss": 0.182, "step": 2000 }, { "epoch": 0.17365760939997407, "grad_norm": 0.8823380446120698, "learning_rate": 9.979631866931562e-06, "loss": 0.1838, "step": 2010 }, { "epoch": 0.1745215776059441, "grad_norm": 0.8990809252489962, "learning_rate": 9.979209157425902e-06, "loss": 0.1832, "step": 2020 }, { "epoch": 0.17538554581191412, "grad_norm": 0.8737258448461562, "learning_rate": 9.978782115646226e-06, "loss": 0.1833, "step": 2030 }, { "epoch": 0.17624951401788413, "grad_norm": 0.8539185597232698, "learning_rate": 9.978350741964091e-06, "loss": 0.1821, "step": 2040 }, { "epoch": 0.17711348222385417, "grad_norm": 0.8718315063650803, "learning_rate": 9.977915036754822e-06, "loss": 0.1838, "step": 2050 }, { "epoch": 0.17797745042982419, "grad_norm": 0.9045138495180984, "learning_rate": 9.977475000397518e-06, "loss": 0.1828, "step": 2060 }, { "epoch": 0.1788414186357942, "grad_norm": 0.8655192511724848, "learning_rate": 9.97703063327504e-06, "loss": 0.1842, "step": 2070 }, { "epoch": 0.17970538684176424, "grad_norm": 0.9058187600514871, "learning_rate": 9.97658193577402e-06, "loss": 0.1813, "step": 2080 }, { "epoch": 0.18056935504773425, "grad_norm": 0.8534047113131205, "learning_rate": 9.976128908284857e-06, "loss": 0.1823, "step": 2090 }, { "epoch": 0.18143332325370426, "grad_norm": 0.7845439254440908, "learning_rate": 9.975671551201719e-06, "loss": 0.1796, "step": 2100 }, { "epoch": 0.18229729145967427, "grad_norm": 0.8348503967840605, "learning_rate": 9.97520986492254e-06, "loss": 0.1803, "step": 2110 }, { "epoch": 0.1831612596656443, "grad_norm": 0.8434028762996415, "learning_rate": 9.974743849849017e-06, "loss": 0.1793, "step": 2120 }, { "epoch": 0.18402522787161432, "grad_norm": 0.8447485487459954, "learning_rate": 9.974273506386623e-06, "loss": 0.1735, "step": 2130 }, { "epoch": 0.18488919607758433, "grad_norm": 0.8030215035923475, "learning_rate": 9.973798834944588e-06, "loss": 0.1826, "step": 2140 }, { "epoch": 0.18575316428355437, "grad_norm": 0.8518647928321847, "learning_rate": 9.97331983593591e-06, "loss": 0.1812, "step": 2150 }, { "epoch": 0.18661713248952438, "grad_norm": 0.8494439395574299, "learning_rate": 9.972836509777352e-06, "loss": 0.1796, "step": 2160 }, { "epoch": 0.1874811006954944, "grad_norm": 0.806243507038616, "learning_rate": 9.972348856889447e-06, "loss": 0.1803, "step": 2170 }, { "epoch": 0.18834506890146444, "grad_norm": 0.7838675544587994, "learning_rate": 9.971856877696483e-06, "loss": 0.1808, "step": 2180 }, { "epoch": 0.18920903710743445, "grad_norm": 0.8679707131997498, "learning_rate": 9.971360572626525e-06, "loss": 0.1795, "step": 2190 }, { "epoch": 0.19007300531340446, "grad_norm": 0.8359795113698925, "learning_rate": 9.970859942111387e-06, "loss": 0.1829, "step": 2200 }, { "epoch": 0.1909369735193745, "grad_norm": 0.8141606983614745, "learning_rate": 9.97035498658666e-06, "loss": 0.1775, "step": 2210 }, { "epoch": 0.1918009417253445, "grad_norm": 0.8511617271228508, "learning_rate": 9.969845706491686e-06, "loss": 0.1819, "step": 2220 }, { "epoch": 0.19266490993131452, "grad_norm": 0.8181597729741305, "learning_rate": 9.96933210226958e-06, "loss": 0.1816, "step": 2230 }, { "epoch": 0.19352887813728456, "grad_norm": 0.8536969679710033, "learning_rate": 9.968814174367214e-06, "loss": 0.1795, "step": 2240 }, { "epoch": 0.19439284634325457, "grad_norm": 0.8697086511401918, "learning_rate": 9.968291923235222e-06, "loss": 0.1799, "step": 2250 }, { "epoch": 0.19525681454922458, "grad_norm": 0.8874621853306252, "learning_rate": 9.967765349328003e-06, "loss": 0.1748, "step": 2260 }, { "epoch": 0.19612078275519462, "grad_norm": 0.828862563408282, "learning_rate": 9.967234453103712e-06, "loss": 0.1771, "step": 2270 }, { "epoch": 0.19698475096116463, "grad_norm": 0.7766510398391139, "learning_rate": 9.966699235024266e-06, "loss": 0.178, "step": 2280 }, { "epoch": 0.19784871916713465, "grad_norm": 0.854829690265069, "learning_rate": 9.966159695555349e-06, "loss": 0.177, "step": 2290 }, { "epoch": 0.19871268737310466, "grad_norm": 0.8222531787450125, "learning_rate": 9.965615835166396e-06, "loss": 0.1774, "step": 2300 }, { "epoch": 0.1995766555790747, "grad_norm": 0.8173464466969167, "learning_rate": 9.965067654330604e-06, "loss": 0.1749, "step": 2310 }, { "epoch": 0.2004406237850447, "grad_norm": 0.7951744192540607, "learning_rate": 9.964515153524932e-06, "loss": 0.1771, "step": 2320 }, { "epoch": 0.20130459199101472, "grad_norm": 0.8413750210733838, "learning_rate": 9.963958333230097e-06, "loss": 0.1792, "step": 2330 }, { "epoch": 0.20216856019698476, "grad_norm": 0.8451327183723681, "learning_rate": 9.96339719393057e-06, "loss": 0.1812, "step": 2340 }, { "epoch": 0.20303252840295477, "grad_norm": 0.8346875351231713, "learning_rate": 9.962831736114585e-06, "loss": 0.1732, "step": 2350 }, { "epoch": 0.20389649660892478, "grad_norm": 0.7717467067418516, "learning_rate": 9.962261960274132e-06, "loss": 0.1753, "step": 2360 }, { "epoch": 0.20476046481489482, "grad_norm": 0.8407722193490899, "learning_rate": 9.961687866904954e-06, "loss": 0.1747, "step": 2370 }, { "epoch": 0.20562443302086483, "grad_norm": 0.8444934823842906, "learning_rate": 9.961109456506559e-06, "loss": 0.1766, "step": 2380 }, { "epoch": 0.20648840122683484, "grad_norm": 0.8247517545324368, "learning_rate": 9.960526729582203e-06, "loss": 0.1708, "step": 2390 }, { "epoch": 0.20735236943280488, "grad_norm": 0.8276154600773534, "learning_rate": 9.959939686638901e-06, "loss": 0.1754, "step": 2400 }, { "epoch": 0.2082163376387749, "grad_norm": 0.7878727685300535, "learning_rate": 9.959348328187424e-06, "loss": 0.1769, "step": 2410 }, { "epoch": 0.2090803058447449, "grad_norm": 0.8280838186074931, "learning_rate": 9.958752654742296e-06, "loss": 0.1761, "step": 2420 }, { "epoch": 0.20994427405071495, "grad_norm": 0.8279865260833438, "learning_rate": 9.958152666821796e-06, "loss": 0.1788, "step": 2430 }, { "epoch": 0.21080824225668496, "grad_norm": 0.8203639460882185, "learning_rate": 9.957548364947959e-06, "loss": 0.1784, "step": 2440 }, { "epoch": 0.21167221046265497, "grad_norm": 0.7849791611006334, "learning_rate": 9.95693974964657e-06, "loss": 0.1761, "step": 2450 }, { "epoch": 0.21253617866862498, "grad_norm": 0.7544264491850907, "learning_rate": 9.956326821447168e-06, "loss": 0.1735, "step": 2460 }, { "epoch": 0.21340014687459502, "grad_norm": 0.7935822340896542, "learning_rate": 9.955709580883047e-06, "loss": 0.175, "step": 2470 }, { "epoch": 0.21426411508056503, "grad_norm": 0.7652213678508909, "learning_rate": 9.955088028491247e-06, "loss": 0.1738, "step": 2480 }, { "epoch": 0.21512808328653504, "grad_norm": 0.8065981747060027, "learning_rate": 9.954462164812568e-06, "loss": 0.1753, "step": 2490 }, { "epoch": 0.21599205149250508, "grad_norm": 0.7773019508262726, "learning_rate": 9.953831990391557e-06, "loss": 0.1773, "step": 2500 }, { "epoch": 0.2168560196984751, "grad_norm": 0.8167560180005594, "learning_rate": 9.95319750577651e-06, "loss": 0.1766, "step": 2510 }, { "epoch": 0.2177199879044451, "grad_norm": 0.7745012569374494, "learning_rate": 9.952558711519475e-06, "loss": 0.1733, "step": 2520 }, { "epoch": 0.21858395611041515, "grad_norm": 0.7882505435416458, "learning_rate": 9.951915608176247e-06, "loss": 0.173, "step": 2530 }, { "epoch": 0.21944792431638516, "grad_norm": 0.7535315864659419, "learning_rate": 9.951268196306379e-06, "loss": 0.1724, "step": 2540 }, { "epoch": 0.22031189252235517, "grad_norm": 0.7903619770040948, "learning_rate": 9.950616476473161e-06, "loss": 0.1776, "step": 2550 }, { "epoch": 0.2211758607283252, "grad_norm": 0.827402074944646, "learning_rate": 9.949960449243638e-06, "loss": 0.1786, "step": 2560 }, { "epoch": 0.22203982893429522, "grad_norm": 0.7786893208413252, "learning_rate": 9.9493001151886e-06, "loss": 0.177, "step": 2570 }, { "epoch": 0.22290379714026523, "grad_norm": 0.7907751212309051, "learning_rate": 9.94863547488259e-06, "loss": 0.1761, "step": 2580 }, { "epoch": 0.22376776534623527, "grad_norm": 0.8621582903262195, "learning_rate": 9.94796652890389e-06, "loss": 0.1759, "step": 2590 }, { "epoch": 0.22463173355220528, "grad_norm": 0.806757543369109, "learning_rate": 9.947293277834531e-06, "loss": 0.1754, "step": 2600 }, { "epoch": 0.2254957017581753, "grad_norm": 0.7527211655050535, "learning_rate": 9.946615722260291e-06, "loss": 0.1719, "step": 2610 }, { "epoch": 0.22635966996414533, "grad_norm": 0.8447146520227704, "learning_rate": 9.945933862770695e-06, "loss": 0.1747, "step": 2620 }, { "epoch": 0.22722363817011534, "grad_norm": 0.7430272978878533, "learning_rate": 9.94524769995901e-06, "loss": 0.1723, "step": 2630 }, { "epoch": 0.22808760637608536, "grad_norm": 0.7758344166204706, "learning_rate": 9.944557234422244e-06, "loss": 0.173, "step": 2640 }, { "epoch": 0.22895157458205537, "grad_norm": 0.7902705002047675, "learning_rate": 9.943862466761154e-06, "loss": 0.1776, "step": 2650 }, { "epoch": 0.2298155427880254, "grad_norm": 0.8419673410782607, "learning_rate": 9.943163397580237e-06, "loss": 0.1702, "step": 2660 }, { "epoch": 0.23067951099399542, "grad_norm": 0.8381561999082194, "learning_rate": 9.94246002748774e-06, "loss": 0.179, "step": 2670 }, { "epoch": 0.23154347919996543, "grad_norm": 0.7574228443477528, "learning_rate": 9.94175235709564e-06, "loss": 0.1731, "step": 2680 }, { "epoch": 0.23240744740593547, "grad_norm": 0.7911399295773064, "learning_rate": 9.941040387019663e-06, "loss": 0.1716, "step": 2690 }, { "epoch": 0.23327141561190548, "grad_norm": 0.7898506911596503, "learning_rate": 9.940324117879276e-06, "loss": 0.1731, "step": 2700 }, { "epoch": 0.2341353838178755, "grad_norm": 0.7981346247488054, "learning_rate": 9.939603550297684e-06, "loss": 0.1731, "step": 2710 }, { "epoch": 0.23499935202384553, "grad_norm": 0.7584900113832275, "learning_rate": 9.938878684901834e-06, "loss": 0.17, "step": 2720 }, { "epoch": 0.23586332022981554, "grad_norm": 0.803733635587881, "learning_rate": 9.938149522322411e-06, "loss": 0.1798, "step": 2730 }, { "epoch": 0.23672728843578555, "grad_norm": 0.8047802032077452, "learning_rate": 9.937416063193841e-06, "loss": 0.1754, "step": 2740 }, { "epoch": 0.2375912566417556, "grad_norm": 0.748847952157505, "learning_rate": 9.936678308154283e-06, "loss": 0.1721, "step": 2750 }, { "epoch": 0.2384552248477256, "grad_norm": 0.7198022855499399, "learning_rate": 9.935936257845643e-06, "loss": 0.1704, "step": 2760 }, { "epoch": 0.23931919305369562, "grad_norm": 1.0481929988558047, "learning_rate": 9.935189912913555e-06, "loss": 0.1702, "step": 2770 }, { "epoch": 0.24018316125966566, "grad_norm": 0.781807883224081, "learning_rate": 9.934439274007392e-06, "loss": 0.1752, "step": 2780 }, { "epoch": 0.24104712946563567, "grad_norm": 0.7830306240360945, "learning_rate": 9.93368434178027e-06, "loss": 0.1771, "step": 2790 }, { "epoch": 0.24191109767160568, "grad_norm": 0.7546245521114135, "learning_rate": 9.93292511688903e-06, "loss": 0.1726, "step": 2800 }, { "epoch": 0.24277506587757572, "grad_norm": 0.769112224734122, "learning_rate": 9.932161599994253e-06, "loss": 0.1732, "step": 2810 }, { "epoch": 0.24363903408354573, "grad_norm": 0.8248961320812401, "learning_rate": 9.931393791760258e-06, "loss": 0.1763, "step": 2820 }, { "epoch": 0.24450300228951574, "grad_norm": 0.7577418125024905, "learning_rate": 9.930621692855089e-06, "loss": 0.1715, "step": 2830 }, { "epoch": 0.24536697049548575, "grad_norm": 0.7300877318840987, "learning_rate": 9.929845303950533e-06, "loss": 0.1699, "step": 2840 }, { "epoch": 0.2462309387014558, "grad_norm": 0.7810888803907006, "learning_rate": 9.929064625722103e-06, "loss": 0.1727, "step": 2850 }, { "epoch": 0.2470949069074258, "grad_norm": 0.7601317930300447, "learning_rate": 9.928279658849044e-06, "loss": 0.1696, "step": 2860 }, { "epoch": 0.24795887511339582, "grad_norm": 0.7865052537791539, "learning_rate": 9.927490404014335e-06, "loss": 0.1749, "step": 2870 }, { "epoch": 0.24882284331936586, "grad_norm": 0.7668012354719749, "learning_rate": 9.926696861904688e-06, "loss": 0.1727, "step": 2880 }, { "epoch": 0.24968681152533587, "grad_norm": 0.7585293542729381, "learning_rate": 9.925899033210537e-06, "loss": 0.1687, "step": 2890 }, { "epoch": 0.2505507797313059, "grad_norm": 0.7357294583323162, "learning_rate": 9.925096918626057e-06, "loss": 0.1732, "step": 2900 }, { "epoch": 0.2514147479372759, "grad_norm": 0.7723496222824552, "learning_rate": 9.924290518849143e-06, "loss": 0.1667, "step": 2910 }, { "epoch": 0.25227871614324593, "grad_norm": 0.7719742079374611, "learning_rate": 9.92347983458142e-06, "loss": 0.1672, "step": 2920 }, { "epoch": 0.25314268434921594, "grad_norm": 0.7707056771464336, "learning_rate": 9.922664866528245e-06, "loss": 0.1746, "step": 2930 }, { "epoch": 0.25400665255518595, "grad_norm": 0.7292053975039586, "learning_rate": 9.921845615398696e-06, "loss": 0.1669, "step": 2940 }, { "epoch": 0.25487062076115596, "grad_norm": 0.7241856067663586, "learning_rate": 9.921022081905584e-06, "loss": 0.1663, "step": 2950 }, { "epoch": 0.25573458896712603, "grad_norm": 0.7466682681906929, "learning_rate": 9.920194266765443e-06, "loss": 0.172, "step": 2960 }, { "epoch": 0.25659855717309604, "grad_norm": 0.7453627408950962, "learning_rate": 9.919362170698535e-06, "loss": 0.1688, "step": 2970 }, { "epoch": 0.25746252537906605, "grad_norm": 0.7355063590474924, "learning_rate": 9.918525794428835e-06, "loss": 0.1724, "step": 2980 }, { "epoch": 0.25832649358503607, "grad_norm": 0.7790954819757584, "learning_rate": 9.917685138684061e-06, "loss": 0.1691, "step": 2990 }, { "epoch": 0.2591904617910061, "grad_norm": 0.765744357026, "learning_rate": 9.91684020419564e-06, "loss": 0.1706, "step": 3000 }, { "epoch": 0.2600544299969761, "grad_norm": 0.7430626764761369, "learning_rate": 9.915990991698725e-06, "loss": 0.1748, "step": 3010 }, { "epoch": 0.26091839820294616, "grad_norm": 0.7305081505894514, "learning_rate": 9.915137501932196e-06, "loss": 0.1694, "step": 3020 }, { "epoch": 0.26178236640891617, "grad_norm": 0.7738500765507736, "learning_rate": 9.91427973563865e-06, "loss": 0.1667, "step": 3030 }, { "epoch": 0.2626463346148862, "grad_norm": 0.7902256019188909, "learning_rate": 9.913417693564406e-06, "loss": 0.1696, "step": 3040 }, { "epoch": 0.2635103028208562, "grad_norm": 0.7646679714717656, "learning_rate": 9.912551376459502e-06, "loss": 0.1657, "step": 3050 }, { "epoch": 0.2643742710268262, "grad_norm": 0.7451146026894049, "learning_rate": 9.911680785077699e-06, "loss": 0.1703, "step": 3060 }, { "epoch": 0.2652382392327962, "grad_norm": 0.7807148744435659, "learning_rate": 9.910805920176472e-06, "loss": 0.1706, "step": 3070 }, { "epoch": 0.2661022074387663, "grad_norm": 0.7263055380070994, "learning_rate": 9.90992678251702e-06, "loss": 0.1681, "step": 3080 }, { "epoch": 0.2669661756447363, "grad_norm": 0.7663865032837028, "learning_rate": 9.909043372864256e-06, "loss": 0.1667, "step": 3090 }, { "epoch": 0.2678301438507063, "grad_norm": 0.7186567166331693, "learning_rate": 9.90815569198681e-06, "loss": 0.1643, "step": 3100 }, { "epoch": 0.2686941120566763, "grad_norm": 0.7614040177140725, "learning_rate": 9.90726374065703e-06, "loss": 0.1712, "step": 3110 }, { "epoch": 0.2695580802626463, "grad_norm": 0.7593484797688069, "learning_rate": 9.906367519650976e-06, "loss": 0.1727, "step": 3120 }, { "epoch": 0.27042204846861634, "grad_norm": 0.7784846405411727, "learning_rate": 9.905467029748427e-06, "loss": 0.1731, "step": 3130 }, { "epoch": 0.27128601667458635, "grad_norm": 0.6936575897699844, "learning_rate": 9.904562271732877e-06, "loss": 0.1709, "step": 3140 }, { "epoch": 0.2721499848805564, "grad_norm": 0.7455742600336882, "learning_rate": 9.903653246391526e-06, "loss": 0.1712, "step": 3150 }, { "epoch": 0.27301395308652643, "grad_norm": 0.7639542012395595, "learning_rate": 9.902739954515298e-06, "loss": 0.1692, "step": 3160 }, { "epoch": 0.27387792129249644, "grad_norm": 0.7654658583654708, "learning_rate": 9.90182239689882e-06, "loss": 0.1718, "step": 3170 }, { "epoch": 0.27474188949846645, "grad_norm": 0.7254925144092764, "learning_rate": 9.900900574340433e-06, "loss": 0.1711, "step": 3180 }, { "epoch": 0.27560585770443646, "grad_norm": 0.7784945029552791, "learning_rate": 9.899974487642191e-06, "loss": 0.1696, "step": 3190 }, { "epoch": 0.2764698259104065, "grad_norm": 0.7292362264828972, "learning_rate": 9.899044137609857e-06, "loss": 0.1682, "step": 3200 }, { "epoch": 0.27733379411637654, "grad_norm": 0.7488659895432636, "learning_rate": 9.898109525052904e-06, "loss": 0.1669, "step": 3210 }, { "epoch": 0.27819776232234655, "grad_norm": 0.7356583051338673, "learning_rate": 9.89717065078451e-06, "loss": 0.1619, "step": 3220 }, { "epoch": 0.27906173052831657, "grad_norm": 0.72285525580165, "learning_rate": 9.896227515621567e-06, "loss": 0.1661, "step": 3230 }, { "epoch": 0.2799256987342866, "grad_norm": 0.7339085499346532, "learning_rate": 9.89528012038467e-06, "loss": 0.1668, "step": 3240 }, { "epoch": 0.2807896669402566, "grad_norm": 0.7303332944076532, "learning_rate": 9.89432846589812e-06, "loss": 0.1686, "step": 3250 }, { "epoch": 0.2816536351462266, "grad_norm": 0.7801708995133046, "learning_rate": 9.893372552989928e-06, "loss": 0.167, "step": 3260 }, { "epoch": 0.28251760335219667, "grad_norm": 0.7841519896635663, "learning_rate": 9.892412382491808e-06, "loss": 0.1687, "step": 3270 }, { "epoch": 0.2833815715581667, "grad_norm": 0.7379472562953742, "learning_rate": 9.891447955239177e-06, "loss": 0.1678, "step": 3280 }, { "epoch": 0.2842455397641367, "grad_norm": 0.7477316830102703, "learning_rate": 9.890479272071156e-06, "loss": 0.1682, "step": 3290 }, { "epoch": 0.2851095079701067, "grad_norm": 0.7499765853953004, "learning_rate": 9.88950633383057e-06, "loss": 0.173, "step": 3300 }, { "epoch": 0.2859734761760767, "grad_norm": 0.7617572357950421, "learning_rate": 9.888529141363949e-06, "loss": 0.1692, "step": 3310 }, { "epoch": 0.2868374443820467, "grad_norm": 0.7928336156705718, "learning_rate": 9.887547695521518e-06, "loss": 0.1655, "step": 3320 }, { "epoch": 0.28770141258801674, "grad_norm": 0.7303417343810876, "learning_rate": 9.886561997157207e-06, "loss": 0.163, "step": 3330 }, { "epoch": 0.2885653807939868, "grad_norm": 0.7364254164460936, "learning_rate": 9.885572047128646e-06, "loss": 0.1717, "step": 3340 }, { "epoch": 0.2894293489999568, "grad_norm": 0.7948365440268766, "learning_rate": 9.884577846297163e-06, "loss": 0.1718, "step": 3350 }, { "epoch": 0.2902933172059268, "grad_norm": 0.7305274500423871, "learning_rate": 9.883579395527787e-06, "loss": 0.1641, "step": 3360 }, { "epoch": 0.29115728541189684, "grad_norm": 0.7134297636334582, "learning_rate": 9.882576695689239e-06, "loss": 0.1691, "step": 3370 }, { "epoch": 0.29202125361786685, "grad_norm": 0.6925354450672917, "learning_rate": 9.881569747653943e-06, "loss": 0.1683, "step": 3380 }, { "epoch": 0.29288522182383686, "grad_norm": 0.7386046590981896, "learning_rate": 9.880558552298018e-06, "loss": 0.1713, "step": 3390 }, { "epoch": 0.29374919002980693, "grad_norm": 0.721795755790784, "learning_rate": 9.879543110501276e-06, "loss": 0.1667, "step": 3400 }, { "epoch": 0.29461315823577694, "grad_norm": 0.7281554532294857, "learning_rate": 9.878523423147223e-06, "loss": 0.1667, "step": 3410 }, { "epoch": 0.29547712644174695, "grad_norm": 0.7232451772483279, "learning_rate": 9.877499491123066e-06, "loss": 0.1682, "step": 3420 }, { "epoch": 0.29634109464771696, "grad_norm": 0.712697831818986, "learning_rate": 9.876471315319699e-06, "loss": 0.1712, "step": 3430 }, { "epoch": 0.297205062853687, "grad_norm": 0.7314848374059167, "learning_rate": 9.875438896631706e-06, "loss": 0.1669, "step": 3440 }, { "epoch": 0.298069031059657, "grad_norm": 0.7223661801150361, "learning_rate": 9.87440223595737e-06, "loss": 0.1694, "step": 3450 }, { "epoch": 0.298932999265627, "grad_norm": 0.7297043299090169, "learning_rate": 9.873361334198661e-06, "loss": 0.1603, "step": 3460 }, { "epoch": 0.29979696747159706, "grad_norm": 0.7633676240410845, "learning_rate": 9.872316192261238e-06, "loss": 0.1657, "step": 3470 }, { "epoch": 0.3006609356775671, "grad_norm": 0.6987340528429398, "learning_rate": 9.871266811054449e-06, "loss": 0.1705, "step": 3480 }, { "epoch": 0.3015249038835371, "grad_norm": 0.7405716794127658, "learning_rate": 9.870213191491335e-06, "loss": 0.1653, "step": 3490 }, { "epoch": 0.3023888720895071, "grad_norm": 0.7479032250490073, "learning_rate": 9.869155334488622e-06, "loss": 0.1618, "step": 3500 }, { "epoch": 0.3032528402954771, "grad_norm": 0.7501069449128815, "learning_rate": 9.86809324096672e-06, "loss": 0.1619, "step": 3510 }, { "epoch": 0.3041168085014471, "grad_norm": 0.7702944945336244, "learning_rate": 9.867026911849728e-06, "loss": 0.1709, "step": 3520 }, { "epoch": 0.3049807767074172, "grad_norm": 0.7111948089944916, "learning_rate": 9.865956348065431e-06, "loss": 0.1695, "step": 3530 }, { "epoch": 0.3058447449133872, "grad_norm": 0.7294603405967993, "learning_rate": 9.864881550545296e-06, "loss": 0.1655, "step": 3540 }, { "epoch": 0.3067087131193572, "grad_norm": 0.7354788765353012, "learning_rate": 9.863802520224474e-06, "loss": 0.1651, "step": 3550 }, { "epoch": 0.3075726813253272, "grad_norm": 0.717913826033429, "learning_rate": 9.862719258041804e-06, "loss": 0.1623, "step": 3560 }, { "epoch": 0.30843664953129724, "grad_norm": 0.7612142871404554, "learning_rate": 9.8616317649398e-06, "loss": 0.1717, "step": 3570 }, { "epoch": 0.30930061773726725, "grad_norm": 0.7464556010648316, "learning_rate": 9.860540041864662e-06, "loss": 0.1642, "step": 3580 }, { "epoch": 0.3101645859432373, "grad_norm": 0.7331397054467885, "learning_rate": 9.859444089766264e-06, "loss": 0.1673, "step": 3590 }, { "epoch": 0.3110285541492073, "grad_norm": 0.7874566449044291, "learning_rate": 9.85834390959817e-06, "loss": 0.1658, "step": 3600 }, { "epoch": 0.31189252235517734, "grad_norm": 0.7535513082033617, "learning_rate": 9.85723950231761e-06, "loss": 0.1675, "step": 3610 }, { "epoch": 0.31275649056114735, "grad_norm": 0.7159751469299143, "learning_rate": 9.856130868885505e-06, "loss": 0.1679, "step": 3620 }, { "epoch": 0.31362045876711736, "grad_norm": 0.6813273679617754, "learning_rate": 9.855018010266443e-06, "loss": 0.1664, "step": 3630 }, { "epoch": 0.31448442697308737, "grad_norm": 0.7173235203424494, "learning_rate": 9.853900927428694e-06, "loss": 0.163, "step": 3640 }, { "epoch": 0.3153483951790574, "grad_norm": 0.7041005574822984, "learning_rate": 9.852779621344199e-06, "loss": 0.1648, "step": 3650 }, { "epoch": 0.31621236338502745, "grad_norm": 0.7814286351256942, "learning_rate": 9.851654092988578e-06, "loss": 0.1649, "step": 3660 }, { "epoch": 0.31707633159099746, "grad_norm": 0.726258361940594, "learning_rate": 9.850524343341121e-06, "loss": 0.1662, "step": 3670 }, { "epoch": 0.3179402997969675, "grad_norm": 0.7291712487787225, "learning_rate": 9.849390373384793e-06, "loss": 0.1616, "step": 3680 }, { "epoch": 0.3188042680029375, "grad_norm": 0.7562894585114789, "learning_rate": 9.84825218410623e-06, "loss": 0.1646, "step": 3690 }, { "epoch": 0.3196682362089075, "grad_norm": 0.7025509334909738, "learning_rate": 9.84710977649574e-06, "loss": 0.1649, "step": 3700 }, { "epoch": 0.3205322044148775, "grad_norm": 0.7193689188065979, "learning_rate": 9.845963151547302e-06, "loss": 0.167, "step": 3710 }, { "epoch": 0.3213961726208476, "grad_norm": 0.7177232302086264, "learning_rate": 9.84481231025856e-06, "loss": 0.1648, "step": 3720 }, { "epoch": 0.3222601408268176, "grad_norm": 0.7181865122617623, "learning_rate": 9.84365725363083e-06, "loss": 0.166, "step": 3730 }, { "epoch": 0.3231241090327876, "grad_norm": 0.7522519602597432, "learning_rate": 9.842497982669097e-06, "loss": 0.1665, "step": 3740 }, { "epoch": 0.3239880772387576, "grad_norm": 0.7487144507984922, "learning_rate": 9.84133449838201e-06, "loss": 0.1601, "step": 3750 }, { "epoch": 0.3248520454447276, "grad_norm": 0.7042965860824565, "learning_rate": 9.840166801781887e-06, "loss": 0.1638, "step": 3760 }, { "epoch": 0.32571601365069763, "grad_norm": 0.7187756482975587, "learning_rate": 9.838994893884705e-06, "loss": 0.1695, "step": 3770 }, { "epoch": 0.3265799818566677, "grad_norm": 0.6995611012661251, "learning_rate": 9.837818775710114e-06, "loss": 0.1648, "step": 3780 }, { "epoch": 0.3274439500626377, "grad_norm": 0.7608691305524844, "learning_rate": 9.836638448281417e-06, "loss": 0.1654, "step": 3790 }, { "epoch": 0.3283079182686077, "grad_norm": 0.7289093633796809, "learning_rate": 9.835453912625587e-06, "loss": 0.1667, "step": 3800 }, { "epoch": 0.32917188647457774, "grad_norm": 0.7629065996737572, "learning_rate": 9.834265169773259e-06, "loss": 0.1713, "step": 3810 }, { "epoch": 0.33003585468054775, "grad_norm": 0.6727524986501837, "learning_rate": 9.833072220758722e-06, "loss": 0.1646, "step": 3820 }, { "epoch": 0.33089982288651776, "grad_norm": 0.7491865346889776, "learning_rate": 9.831875066619929e-06, "loss": 0.1645, "step": 3830 }, { "epoch": 0.33176379109248777, "grad_norm": 0.7645752796321156, "learning_rate": 9.830673708398492e-06, "loss": 0.1633, "step": 3840 }, { "epoch": 0.33262775929845784, "grad_norm": 0.7036207961815555, "learning_rate": 9.829468147139681e-06, "loss": 0.1653, "step": 3850 }, { "epoch": 0.33349172750442785, "grad_norm": 0.6558514059493622, "learning_rate": 9.828258383892419e-06, "loss": 0.1593, "step": 3860 }, { "epoch": 0.33435569571039786, "grad_norm": 0.6523169696559058, "learning_rate": 9.827044419709289e-06, "loss": 0.1665, "step": 3870 }, { "epoch": 0.33521966391636787, "grad_norm": 0.719938636914537, "learning_rate": 9.825826255646532e-06, "loss": 0.1654, "step": 3880 }, { "epoch": 0.3360836321223379, "grad_norm": 0.9142436257642391, "learning_rate": 9.824603892764033e-06, "loss": 0.1655, "step": 3890 }, { "epoch": 0.3369476003283079, "grad_norm": 0.7308837755716424, "learning_rate": 9.82337733212534e-06, "loss": 0.1647, "step": 3900 }, { "epoch": 0.33781156853427796, "grad_norm": 0.7170945250305703, "learning_rate": 9.82214657479765e-06, "loss": 0.165, "step": 3910 }, { "epoch": 0.338675536740248, "grad_norm": 0.7101772423726191, "learning_rate": 9.820911621851813e-06, "loss": 0.1674, "step": 3920 }, { "epoch": 0.339539504946218, "grad_norm": 0.7105871752653151, "learning_rate": 9.819672474362324e-06, "loss": 0.1661, "step": 3930 }, { "epoch": 0.340403473152188, "grad_norm": 0.6928866893620583, "learning_rate": 9.818429133407332e-06, "loss": 0.1639, "step": 3940 }, { "epoch": 0.341267441358158, "grad_norm": 0.7070762196788623, "learning_rate": 9.817181600068636e-06, "loss": 0.1598, "step": 3950 }, { "epoch": 0.342131409564128, "grad_norm": 0.7131526299925895, "learning_rate": 9.81592987543168e-06, "loss": 0.1603, "step": 3960 }, { "epoch": 0.3429953777700981, "grad_norm": 0.6857882956396996, "learning_rate": 9.814673960585556e-06, "loss": 0.166, "step": 3970 }, { "epoch": 0.3438593459760681, "grad_norm": 0.7335700923086347, "learning_rate": 9.813413856623002e-06, "loss": 0.167, "step": 3980 }, { "epoch": 0.3447233141820381, "grad_norm": 0.6928897108991285, "learning_rate": 9.812149564640397e-06, "loss": 0.1612, "step": 3990 }, { "epoch": 0.3455872823880081, "grad_norm": 0.7266942679789092, "learning_rate": 9.810881085737769e-06, "loss": 0.1617, "step": 4000 }, { "epoch": 0.34645125059397813, "grad_norm": 0.7271458299087374, "learning_rate": 9.809608421018786e-06, "loss": 0.1606, "step": 4010 }, { "epoch": 0.34731521879994814, "grad_norm": 0.7087330966470788, "learning_rate": 9.80833157159076e-06, "loss": 0.1635, "step": 4020 }, { "epoch": 0.34817918700591816, "grad_norm": 0.6481443225031476, "learning_rate": 9.807050538564644e-06, "loss": 0.1577, "step": 4030 }, { "epoch": 0.3490431552118882, "grad_norm": 0.701382417957392, "learning_rate": 9.805765323055025e-06, "loss": 0.1603, "step": 4040 }, { "epoch": 0.34990712341785823, "grad_norm": 0.6975531043581854, "learning_rate": 9.804475926180139e-06, "loss": 0.1658, "step": 4050 }, { "epoch": 0.35077109162382825, "grad_norm": 0.738431014281204, "learning_rate": 9.803182349061853e-06, "loss": 0.1655, "step": 4060 }, { "epoch": 0.35163505982979826, "grad_norm": 0.7081474595206806, "learning_rate": 9.801884592825673e-06, "loss": 0.1632, "step": 4070 }, { "epoch": 0.35249902803576827, "grad_norm": 0.6865576639672578, "learning_rate": 9.80058265860074e-06, "loss": 0.1683, "step": 4080 }, { "epoch": 0.3533629962417383, "grad_norm": 0.7273806197401467, "learning_rate": 9.799276547519836e-06, "loss": 0.1663, "step": 4090 }, { "epoch": 0.35422696444770835, "grad_norm": 0.7164869319313873, "learning_rate": 9.797966260719369e-06, "loss": 0.1632, "step": 4100 }, { "epoch": 0.35509093265367836, "grad_norm": 0.6888892295698765, "learning_rate": 9.796651799339383e-06, "loss": 0.1642, "step": 4110 }, { "epoch": 0.35595490085964837, "grad_norm": 0.6597106187788412, "learning_rate": 9.795333164523557e-06, "loss": 0.1662, "step": 4120 }, { "epoch": 0.3568188690656184, "grad_norm": 0.7121146624840744, "learning_rate": 9.7940103574192e-06, "loss": 0.1629, "step": 4130 }, { "epoch": 0.3576828372715884, "grad_norm": 0.7300742422291335, "learning_rate": 9.792683379177249e-06, "loss": 0.1631, "step": 4140 }, { "epoch": 0.3585468054775584, "grad_norm": 0.7310423779575642, "learning_rate": 9.791352230952269e-06, "loss": 0.1669, "step": 4150 }, { "epoch": 0.3594107736835285, "grad_norm": 0.7040579907657136, "learning_rate": 9.790016913902458e-06, "loss": 0.1657, "step": 4160 }, { "epoch": 0.3602747418894985, "grad_norm": 0.6613582124056474, "learning_rate": 9.788677429189642e-06, "loss": 0.165, "step": 4170 }, { "epoch": 0.3611387100954685, "grad_norm": 0.7137351700890041, "learning_rate": 9.787333777979266e-06, "loss": 0.1628, "step": 4180 }, { "epoch": 0.3620026783014385, "grad_norm": 0.6950454669747205, "learning_rate": 9.785985961440405e-06, "loss": 0.166, "step": 4190 }, { "epoch": 0.3628666465074085, "grad_norm": 0.690613286107089, "learning_rate": 9.784633980745756e-06, "loss": 0.1644, "step": 4200 }, { "epoch": 0.36373061471337853, "grad_norm": 0.6770452196316469, "learning_rate": 9.783277837071647e-06, "loss": 0.1589, "step": 4210 }, { "epoch": 0.36459458291934854, "grad_norm": 0.7351025780843091, "learning_rate": 9.781917531598013e-06, "loss": 0.1605, "step": 4220 }, { "epoch": 0.3654585511253186, "grad_norm": 0.7029824747231594, "learning_rate": 9.780553065508424e-06, "loss": 0.1589, "step": 4230 }, { "epoch": 0.3663225193312886, "grad_norm": 0.7065773579594148, "learning_rate": 9.779184439990064e-06, "loss": 0.1671, "step": 4240 }, { "epoch": 0.36718648753725863, "grad_norm": 0.6659108075139156, "learning_rate": 9.777811656233738e-06, "loss": 0.1677, "step": 4250 }, { "epoch": 0.36805045574322864, "grad_norm": 0.699215116558182, "learning_rate": 9.776434715433863e-06, "loss": 0.163, "step": 4260 }, { "epoch": 0.36891442394919866, "grad_norm": 0.706583481787705, "learning_rate": 9.775053618788482e-06, "loss": 0.1647, "step": 4270 }, { "epoch": 0.36977839215516867, "grad_norm": 0.6874162890009886, "learning_rate": 9.773668367499246e-06, "loss": 0.1673, "step": 4280 }, { "epoch": 0.37064236036113873, "grad_norm": 0.7546642378109868, "learning_rate": 9.772278962771427e-06, "loss": 0.164, "step": 4290 }, { "epoch": 0.37150632856710875, "grad_norm": 0.6958279018093277, "learning_rate": 9.770885405813907e-06, "loss": 0.1639, "step": 4300 }, { "epoch": 0.37237029677307876, "grad_norm": 0.6863783205097246, "learning_rate": 9.769487697839184e-06, "loss": 0.1606, "step": 4310 }, { "epoch": 0.37323426497904877, "grad_norm": 0.7660038203574383, "learning_rate": 9.768085840063363e-06, "loss": 0.1658, "step": 4320 }, { "epoch": 0.3740982331850188, "grad_norm": 0.7310683895324374, "learning_rate": 9.766679833706163e-06, "loss": 0.165, "step": 4330 }, { "epoch": 0.3749622013909888, "grad_norm": 0.6682511713805321, "learning_rate": 9.765269679990913e-06, "loss": 0.1613, "step": 4340 }, { "epoch": 0.37582616959695886, "grad_norm": 0.7295984036992738, "learning_rate": 9.763855380144546e-06, "loss": 0.1627, "step": 4350 }, { "epoch": 0.37669013780292887, "grad_norm": 0.6661232017039217, "learning_rate": 9.762436935397608e-06, "loss": 0.1583, "step": 4360 }, { "epoch": 0.3775541060088989, "grad_norm": 0.6649991756043743, "learning_rate": 9.76101434698425e-06, "loss": 0.1618, "step": 4370 }, { "epoch": 0.3784180742148689, "grad_norm": 0.6923759634173425, "learning_rate": 9.759587616142225e-06, "loss": 0.163, "step": 4380 }, { "epoch": 0.3792820424208389, "grad_norm": 0.6871423621900502, "learning_rate": 9.758156744112895e-06, "loss": 0.1597, "step": 4390 }, { "epoch": 0.3801460106268089, "grad_norm": 0.7069305432340762, "learning_rate": 9.75672173214122e-06, "loss": 0.1674, "step": 4400 }, { "epoch": 0.38100997883277893, "grad_norm": 0.7103059022932815, "learning_rate": 9.755282581475769e-06, "loss": 0.1606, "step": 4410 }, { "epoch": 0.381873947038749, "grad_norm": 0.6899949710005489, "learning_rate": 9.753839293368704e-06, "loss": 0.1619, "step": 4420 }, { "epoch": 0.382737915244719, "grad_norm": 0.6866404341566841, "learning_rate": 9.752391869075791e-06, "loss": 0.1621, "step": 4430 }, { "epoch": 0.383601883450689, "grad_norm": 0.6747754230556255, "learning_rate": 9.750940309856393e-06, "loss": 0.1613, "step": 4440 }, { "epoch": 0.38446585165665903, "grad_norm": 0.7249657876338548, "learning_rate": 9.749484616973478e-06, "loss": 0.1613, "step": 4450 }, { "epoch": 0.38532981986262904, "grad_norm": 0.7105209288147724, "learning_rate": 9.748024791693598e-06, "loss": 0.1569, "step": 4460 }, { "epoch": 0.38619378806859905, "grad_norm": 0.6574038542965889, "learning_rate": 9.74656083528691e-06, "loss": 0.1597, "step": 4470 }, { "epoch": 0.3870577562745691, "grad_norm": 0.6480703817246564, "learning_rate": 9.745092749027163e-06, "loss": 0.1603, "step": 4480 }, { "epoch": 0.38792172448053913, "grad_norm": 0.6415644598250044, "learning_rate": 9.743620534191698e-06, "loss": 0.1588, "step": 4490 }, { "epoch": 0.38878569268650914, "grad_norm": 0.6805308455140375, "learning_rate": 9.74214419206145e-06, "loss": 0.1629, "step": 4500 }, { "epoch": 0.38964966089247915, "grad_norm": 0.7330716655075384, "learning_rate": 9.74066372392094e-06, "loss": 0.161, "step": 4510 }, { "epoch": 0.39051362909844917, "grad_norm": 0.6818706767824103, "learning_rate": 9.73917913105829e-06, "loss": 0.1591, "step": 4520 }, { "epoch": 0.3913775973044192, "grad_norm": 0.7047990410076855, "learning_rate": 9.737690414765198e-06, "loss": 0.1641, "step": 4530 }, { "epoch": 0.39224156551038925, "grad_norm": 0.6958885127192019, "learning_rate": 9.736197576336957e-06, "loss": 0.1598, "step": 4540 }, { "epoch": 0.39310553371635926, "grad_norm": 0.7029461177460052, "learning_rate": 9.734700617072444e-06, "loss": 0.1561, "step": 4550 }, { "epoch": 0.39396950192232927, "grad_norm": 0.6822164097545496, "learning_rate": 9.733199538274124e-06, "loss": 0.1576, "step": 4560 }, { "epoch": 0.3948334701282993, "grad_norm": 0.6805525304356631, "learning_rate": 9.731694341248045e-06, "loss": 0.1582, "step": 4570 }, { "epoch": 0.3956974383342693, "grad_norm": 0.7090666255186993, "learning_rate": 9.730185027303837e-06, "loss": 0.1625, "step": 4580 }, { "epoch": 0.3965614065402393, "grad_norm": 0.6775427538075873, "learning_rate": 9.728671597754715e-06, "loss": 0.1599, "step": 4590 }, { "epoch": 0.3974253747462093, "grad_norm": 0.7056790719470328, "learning_rate": 9.727154053917469e-06, "loss": 0.1628, "step": 4600 }, { "epoch": 0.3982893429521794, "grad_norm": 0.638674649902704, "learning_rate": 9.725632397112474e-06, "loss": 0.1629, "step": 4610 }, { "epoch": 0.3991533111581494, "grad_norm": 0.6550807852274794, "learning_rate": 9.724106628663683e-06, "loss": 0.16, "step": 4620 }, { "epoch": 0.4000172793641194, "grad_norm": 0.6460332536413842, "learning_rate": 9.722576749898624e-06, "loss": 0.1579, "step": 4630 }, { "epoch": 0.4008812475700894, "grad_norm": 0.6551551230297281, "learning_rate": 9.721042762148405e-06, "loss": 0.1622, "step": 4640 }, { "epoch": 0.40174521577605943, "grad_norm": 0.6626205641580281, "learning_rate": 9.719504666747704e-06, "loss": 0.1559, "step": 4650 }, { "epoch": 0.40260918398202944, "grad_norm": 0.7096427344280777, "learning_rate": 9.717962465034778e-06, "loss": 0.1658, "step": 4660 }, { "epoch": 0.4034731521879995, "grad_norm": 0.7129316785627647, "learning_rate": 9.716416158351454e-06, "loss": 0.1624, "step": 4670 }, { "epoch": 0.4043371203939695, "grad_norm": 0.6976638834788887, "learning_rate": 9.714865748043129e-06, "loss": 0.164, "step": 4680 }, { "epoch": 0.40520108859993953, "grad_norm": 0.6658249350744622, "learning_rate": 9.713311235458778e-06, "loss": 0.1631, "step": 4690 }, { "epoch": 0.40606505680590954, "grad_norm": 0.6840733659601906, "learning_rate": 9.711752621950936e-06, "loss": 0.1615, "step": 4700 }, { "epoch": 0.40692902501187955, "grad_norm": 0.6840753154130336, "learning_rate": 9.71018990887571e-06, "loss": 0.1617, "step": 4710 }, { "epoch": 0.40779299321784956, "grad_norm": 0.6630826297979107, "learning_rate": 9.708623097592775e-06, "loss": 0.1621, "step": 4720 }, { "epoch": 0.4086569614238196, "grad_norm": 0.7140002244423074, "learning_rate": 9.70705218946537e-06, "loss": 0.1622, "step": 4730 }, { "epoch": 0.40952092962978964, "grad_norm": 0.6758179638610594, "learning_rate": 9.705477185860302e-06, "loss": 0.1594, "step": 4740 }, { "epoch": 0.41038489783575965, "grad_norm": 0.6900535022098612, "learning_rate": 9.703898088147935e-06, "loss": 0.1632, "step": 4750 }, { "epoch": 0.41124886604172967, "grad_norm": 0.6761698450465825, "learning_rate": 9.702314897702203e-06, "loss": 0.1614, "step": 4760 }, { "epoch": 0.4121128342476997, "grad_norm": 0.6485182080321424, "learning_rate": 9.700727615900591e-06, "loss": 0.1604, "step": 4770 }, { "epoch": 0.4129768024536697, "grad_norm": 0.689619498277749, "learning_rate": 9.699136244124155e-06, "loss": 0.1613, "step": 4780 }, { "epoch": 0.4138407706596397, "grad_norm": 0.6848912848714906, "learning_rate": 9.697540783757502e-06, "loss": 0.1641, "step": 4790 }, { "epoch": 0.41470473886560977, "grad_norm": 0.6931500377479651, "learning_rate": 9.695941236188797e-06, "loss": 0.1619, "step": 4800 }, { "epoch": 0.4155687070715798, "grad_norm": 0.6851757299026764, "learning_rate": 9.694337602809765e-06, "loss": 0.1566, "step": 4810 }, { "epoch": 0.4164326752775498, "grad_norm": 0.6624506770445044, "learning_rate": 9.692729885015684e-06, "loss": 0.1599, "step": 4820 }, { "epoch": 0.4172966434835198, "grad_norm": 0.6387799436823667, "learning_rate": 9.691118084205382e-06, "loss": 0.1637, "step": 4830 }, { "epoch": 0.4181606116894898, "grad_norm": 0.6918645541241718, "learning_rate": 9.689502201781247e-06, "loss": 0.1596, "step": 4840 }, { "epoch": 0.4190245798954598, "grad_norm": 0.6718823552411884, "learning_rate": 9.68788223914921e-06, "loss": 0.1599, "step": 4850 }, { "epoch": 0.4198885481014299, "grad_norm": 0.6412918843776836, "learning_rate": 9.686258197718761e-06, "loss": 0.1577, "step": 4860 }, { "epoch": 0.4207525163073999, "grad_norm": 0.674420383357471, "learning_rate": 9.684630078902933e-06, "loss": 0.1607, "step": 4870 }, { "epoch": 0.4216164845133699, "grad_norm": 0.6704220676917114, "learning_rate": 9.682997884118303e-06, "loss": 0.1656, "step": 4880 }, { "epoch": 0.4224804527193399, "grad_norm": 0.6878812416032285, "learning_rate": 9.681361614785006e-06, "loss": 0.1594, "step": 4890 }, { "epoch": 0.42334442092530994, "grad_norm": 0.6553717926047634, "learning_rate": 9.679721272326709e-06, "loss": 0.1584, "step": 4900 }, { "epoch": 0.42420838913127995, "grad_norm": 0.6774054340189088, "learning_rate": 9.678076858170633e-06, "loss": 0.1576, "step": 4910 }, { "epoch": 0.42507235733724996, "grad_norm": 0.6738216331424101, "learning_rate": 9.676428373747538e-06, "loss": 0.1588, "step": 4920 }, { "epoch": 0.42593632554322003, "grad_norm": 0.6741765178104295, "learning_rate": 9.674775820491725e-06, "loss": 0.1573, "step": 4930 }, { "epoch": 0.42680029374919004, "grad_norm": 0.6760301728835072, "learning_rate": 9.673119199841033e-06, "loss": 0.1587, "step": 4940 }, { "epoch": 0.42766426195516005, "grad_norm": 0.683102732085449, "learning_rate": 9.671458513236845e-06, "loss": 0.1599, "step": 4950 }, { "epoch": 0.42852823016113006, "grad_norm": 0.6379446609802096, "learning_rate": 9.669793762124079e-06, "loss": 0.1565, "step": 4960 }, { "epoch": 0.4293921983671001, "grad_norm": 0.6798505709963193, "learning_rate": 9.668124947951187e-06, "loss": 0.1621, "step": 4970 }, { "epoch": 0.4302561665730701, "grad_norm": 0.6549161090260386, "learning_rate": 9.666452072170163e-06, "loss": 0.1582, "step": 4980 }, { "epoch": 0.43112013477904015, "grad_norm": 0.6852988744956113, "learning_rate": 9.664775136236528e-06, "loss": 0.1566, "step": 4990 }, { "epoch": 0.43198410298501017, "grad_norm": 0.6526290012979358, "learning_rate": 9.663094141609337e-06, "loss": 0.1587, "step": 5000 }, { "epoch": 0.4328480711909802, "grad_norm": 0.6668884578151805, "learning_rate": 9.661409089751179e-06, "loss": 0.1575, "step": 5010 }, { "epoch": 0.4337120393969502, "grad_norm": 0.6976587290426327, "learning_rate": 9.659719982128172e-06, "loss": 0.16, "step": 5020 }, { "epoch": 0.4345760076029202, "grad_norm": 0.7079200322711302, "learning_rate": 9.65802682020996e-06, "loss": 0.159, "step": 5030 }, { "epoch": 0.4354399758088902, "grad_norm": 0.6917877632076461, "learning_rate": 9.656329605469724e-06, "loss": 0.1547, "step": 5040 }, { "epoch": 0.4363039440148603, "grad_norm": 0.6797501891633531, "learning_rate": 9.654628339384154e-06, "loss": 0.161, "step": 5050 }, { "epoch": 0.4371679122208303, "grad_norm": 0.6635520032766078, "learning_rate": 9.652923023433483e-06, "loss": 0.1551, "step": 5060 }, { "epoch": 0.4380318804268003, "grad_norm": 0.6803906100574411, "learning_rate": 9.651213659101456e-06, "loss": 0.1606, "step": 5070 }, { "epoch": 0.4388958486327703, "grad_norm": 0.6700575525523713, "learning_rate": 9.649500247875347e-06, "loss": 0.1604, "step": 5080 }, { "epoch": 0.4397598168387403, "grad_norm": 0.6662538170197336, "learning_rate": 9.647782791245945e-06, "loss": 0.1568, "step": 5090 }, { "epoch": 0.44062378504471034, "grad_norm": 0.6624543848438776, "learning_rate": 9.646061290707566e-06, "loss": 0.1574, "step": 5100 }, { "epoch": 0.44148775325068035, "grad_norm": 0.7109126656654107, "learning_rate": 9.644335747758037e-06, "loss": 0.1591, "step": 5110 }, { "epoch": 0.4423517214566504, "grad_norm": 0.6473806141940632, "learning_rate": 9.642606163898708e-06, "loss": 0.1595, "step": 5120 }, { "epoch": 0.4432156896626204, "grad_norm": 0.7006474036535846, "learning_rate": 9.640872540634443e-06, "loss": 0.1557, "step": 5130 }, { "epoch": 0.44407965786859044, "grad_norm": 0.641365283402573, "learning_rate": 9.63913487947362e-06, "loss": 0.1624, "step": 5140 }, { "epoch": 0.44494362607456045, "grad_norm": 0.6959501153561491, "learning_rate": 9.63739318192813e-06, "loss": 0.1612, "step": 5150 }, { "epoch": 0.44580759428053046, "grad_norm": 0.6750876307270886, "learning_rate": 9.635647449513375e-06, "loss": 0.1594, "step": 5160 }, { "epoch": 0.4466715624865005, "grad_norm": 0.6462815274785941, "learning_rate": 9.633897683748271e-06, "loss": 0.1563, "step": 5170 }, { "epoch": 0.44753553069247054, "grad_norm": 0.6795189194974991, "learning_rate": 9.632143886155242e-06, "loss": 0.1569, "step": 5180 }, { "epoch": 0.44839949889844055, "grad_norm": 0.6386005245305387, "learning_rate": 9.630386058260219e-06, "loss": 0.1595, "step": 5190 }, { "epoch": 0.44926346710441056, "grad_norm": 0.6696051720370734, "learning_rate": 9.628624201592637e-06, "loss": 0.1559, "step": 5200 }, { "epoch": 0.4501274353103806, "grad_norm": 0.6525705870370956, "learning_rate": 9.626858317685446e-06, "loss": 0.1552, "step": 5210 }, { "epoch": 0.4509914035163506, "grad_norm": 0.6601726705245239, "learning_rate": 9.625088408075088e-06, "loss": 0.1547, "step": 5220 }, { "epoch": 0.4518553717223206, "grad_norm": 0.6949657010269852, "learning_rate": 9.623314474301513e-06, "loss": 0.1628, "step": 5230 }, { "epoch": 0.45271933992829066, "grad_norm": 0.6272986645491211, "learning_rate": 9.621536517908175e-06, "loss": 0.1584, "step": 5240 }, { "epoch": 0.4535833081342607, "grad_norm": 0.6564336061317336, "learning_rate": 9.619754540442023e-06, "loss": 0.1561, "step": 5250 }, { "epoch": 0.4544472763402307, "grad_norm": 0.6339087400986222, "learning_rate": 9.61796854345351e-06, "loss": 0.1584, "step": 5260 }, { "epoch": 0.4553112445462007, "grad_norm": 0.655855968908132, "learning_rate": 9.616178528496583e-06, "loss": 0.1553, "step": 5270 }, { "epoch": 0.4561752127521707, "grad_norm": 0.6860757996153211, "learning_rate": 9.61438449712868e-06, "loss": 0.16, "step": 5280 }, { "epoch": 0.4570391809581407, "grad_norm": 0.6406762286870573, "learning_rate": 9.612586450910744e-06, "loss": 0.1536, "step": 5290 }, { "epoch": 0.45790314916411073, "grad_norm": 0.6507725769700754, "learning_rate": 9.610784391407204e-06, "loss": 0.1602, "step": 5300 }, { "epoch": 0.4587671173700808, "grad_norm": 0.6513419928024363, "learning_rate": 9.608978320185985e-06, "loss": 0.154, "step": 5310 }, { "epoch": 0.4596310855760508, "grad_norm": 0.6557237617400382, "learning_rate": 9.607168238818496e-06, "loss": 0.1607, "step": 5320 }, { "epoch": 0.4604950537820208, "grad_norm": 0.6489510877793241, "learning_rate": 9.605354148879643e-06, "loss": 0.1543, "step": 5330 }, { "epoch": 0.46135902198799084, "grad_norm": 0.6518534162138759, "learning_rate": 9.603536051947815e-06, "loss": 0.1537, "step": 5340 }, { "epoch": 0.46222299019396085, "grad_norm": 0.6435402536124452, "learning_rate": 9.601713949604887e-06, "loss": 0.1602, "step": 5350 }, { "epoch": 0.46308695839993086, "grad_norm": 0.656285821302999, "learning_rate": 9.599887843436224e-06, "loss": 0.1575, "step": 5360 }, { "epoch": 0.4639509266059009, "grad_norm": 0.6734174544832082, "learning_rate": 9.598057735030668e-06, "loss": 0.1615, "step": 5370 }, { "epoch": 0.46481489481187094, "grad_norm": 0.6525119845384318, "learning_rate": 9.59622362598055e-06, "loss": 0.1555, "step": 5380 }, { "epoch": 0.46567886301784095, "grad_norm": 0.6424183453875718, "learning_rate": 9.594385517881673e-06, "loss": 0.1584, "step": 5390 }, { "epoch": 0.46654283122381096, "grad_norm": 0.6777539885626862, "learning_rate": 9.592543412333329e-06, "loss": 0.1623, "step": 5400 }, { "epoch": 0.467406799429781, "grad_norm": 0.6259425254530941, "learning_rate": 9.59069731093828e-06, "loss": 0.1599, "step": 5410 }, { "epoch": 0.468270767635751, "grad_norm": 0.6932018168621533, "learning_rate": 9.588847215302772e-06, "loss": 0.1592, "step": 5420 }, { "epoch": 0.46913473584172105, "grad_norm": 0.6477717328424021, "learning_rate": 9.586993127036522e-06, "loss": 0.1588, "step": 5430 }, { "epoch": 0.46999870404769106, "grad_norm": 0.6592836020610073, "learning_rate": 9.58513504775272e-06, "loss": 0.1635, "step": 5440 }, { "epoch": 0.4708626722536611, "grad_norm": 0.6457972290946014, "learning_rate": 9.583272979068032e-06, "loss": 0.1573, "step": 5450 }, { "epoch": 0.4717266404596311, "grad_norm": 0.6707595353255882, "learning_rate": 9.581406922602593e-06, "loss": 0.1528, "step": 5460 }, { "epoch": 0.4725906086656011, "grad_norm": 0.654924509203685, "learning_rate": 9.579536879980005e-06, "loss": 0.154, "step": 5470 }, { "epoch": 0.4734545768715711, "grad_norm": 0.6498507426859098, "learning_rate": 9.577662852827345e-06, "loss": 0.1554, "step": 5480 }, { "epoch": 0.4743185450775411, "grad_norm": 0.6503674843306472, "learning_rate": 9.575784842775152e-06, "loss": 0.1578, "step": 5490 }, { "epoch": 0.4751825132835112, "grad_norm": 0.6685006182692143, "learning_rate": 9.573902851457428e-06, "loss": 0.1579, "step": 5500 }, { "epoch": 0.4760464814894812, "grad_norm": 0.647438727181266, "learning_rate": 9.572016880511645e-06, "loss": 0.1581, "step": 5510 }, { "epoch": 0.4769104496954512, "grad_norm": 0.6524775973331061, "learning_rate": 9.570126931578734e-06, "loss": 0.1558, "step": 5520 }, { "epoch": 0.4777744179014212, "grad_norm": 0.6328300944860435, "learning_rate": 9.56823300630309e-06, "loss": 0.1514, "step": 5530 }, { "epoch": 0.47863838610739123, "grad_norm": 0.633087466179911, "learning_rate": 9.566335106332563e-06, "loss": 0.1595, "step": 5540 }, { "epoch": 0.47950235431336125, "grad_norm": 0.6531923998474526, "learning_rate": 9.564433233318466e-06, "loss": 0.1535, "step": 5550 }, { "epoch": 0.4803663225193313, "grad_norm": 0.635543450438123, "learning_rate": 9.562527388915565e-06, "loss": 0.1557, "step": 5560 }, { "epoch": 0.4812302907253013, "grad_norm": 0.628235427511297, "learning_rate": 9.560617574782085e-06, "loss": 0.1601, "step": 5570 }, { "epoch": 0.48209425893127134, "grad_norm": 0.6322211295045682, "learning_rate": 9.558703792579702e-06, "loss": 0.1598, "step": 5580 }, { "epoch": 0.48295822713724135, "grad_norm": 0.6423726739039197, "learning_rate": 9.556786043973547e-06, "loss": 0.1557, "step": 5590 }, { "epoch": 0.48382219534321136, "grad_norm": 0.6773207454316515, "learning_rate": 9.554864330632198e-06, "loss": 0.1585, "step": 5600 }, { "epoch": 0.48468616354918137, "grad_norm": 0.6499962659091628, "learning_rate": 9.55293865422769e-06, "loss": 0.1583, "step": 5610 }, { "epoch": 0.48555013175515144, "grad_norm": 0.6653000931931303, "learning_rate": 9.551009016435495e-06, "loss": 0.1604, "step": 5620 }, { "epoch": 0.48641409996112145, "grad_norm": 0.6360345932914004, "learning_rate": 9.549075418934543e-06, "loss": 0.1521, "step": 5630 }, { "epoch": 0.48727806816709146, "grad_norm": 0.5933880679418415, "learning_rate": 9.547137863407204e-06, "loss": 0.1577, "step": 5640 }, { "epoch": 0.48814203637306147, "grad_norm": 0.629981336471149, "learning_rate": 9.545196351539292e-06, "loss": 0.1532, "step": 5650 }, { "epoch": 0.4890060045790315, "grad_norm": 0.6555386097808079, "learning_rate": 9.543250885020061e-06, "loss": 0.156, "step": 5660 }, { "epoch": 0.4898699727850015, "grad_norm": 0.6842451255068056, "learning_rate": 9.54130146554221e-06, "loss": 0.1587, "step": 5670 }, { "epoch": 0.4907339409909715, "grad_norm": 0.6713064235267874, "learning_rate": 9.539348094801877e-06, "loss": 0.1552, "step": 5680 }, { "epoch": 0.4915979091969416, "grad_norm": 0.6564079624166437, "learning_rate": 9.537390774498637e-06, "loss": 0.1537, "step": 5690 }, { "epoch": 0.4924618774029116, "grad_norm": 0.6286290451414437, "learning_rate": 9.535429506335496e-06, "loss": 0.1566, "step": 5700 }, { "epoch": 0.4933258456088816, "grad_norm": 0.6385916147206167, "learning_rate": 9.533464292018906e-06, "loss": 0.1553, "step": 5710 }, { "epoch": 0.4941898138148516, "grad_norm": 0.6518930348205345, "learning_rate": 9.531495133258742e-06, "loss": 0.1544, "step": 5720 }, { "epoch": 0.4950537820208216, "grad_norm": 0.629072344541979, "learning_rate": 9.529522031768317e-06, "loss": 0.1613, "step": 5730 }, { "epoch": 0.49591775022679163, "grad_norm": 0.6572574558805094, "learning_rate": 9.527544989264375e-06, "loss": 0.1553, "step": 5740 }, { "epoch": 0.4967817184327617, "grad_norm": 0.631342912981415, "learning_rate": 9.525564007467082e-06, "loss": 0.1555, "step": 5750 }, { "epoch": 0.4976456866387317, "grad_norm": 0.610023752445402, "learning_rate": 9.523579088100041e-06, "loss": 0.1532, "step": 5760 }, { "epoch": 0.4985096548447017, "grad_norm": 0.6376457831648065, "learning_rate": 9.521590232890272e-06, "loss": 0.1577, "step": 5770 }, { "epoch": 0.49937362305067173, "grad_norm": 0.6406158974539737, "learning_rate": 9.519597443568227e-06, "loss": 0.1552, "step": 5780 }, { "epoch": 0.5002375912566418, "grad_norm": 0.6608118104247712, "learning_rate": 9.517600721867775e-06, "loss": 0.1617, "step": 5790 }, { "epoch": 0.5011015594626118, "grad_norm": 0.6153155011611277, "learning_rate": 9.51560006952621e-06, "loss": 0.1566, "step": 5800 }, { "epoch": 0.5019655276685818, "grad_norm": 0.6539057272210915, "learning_rate": 9.513595488284246e-06, "loss": 0.1549, "step": 5810 }, { "epoch": 0.5028294958745518, "grad_norm": 0.6531761984313647, "learning_rate": 9.511586979886013e-06, "loss": 0.1552, "step": 5820 }, { "epoch": 0.5036934640805218, "grad_norm": 0.638972805350294, "learning_rate": 9.509574546079061e-06, "loss": 0.1536, "step": 5830 }, { "epoch": 0.5045574322864919, "grad_norm": 0.6454598366800842, "learning_rate": 9.507558188614353e-06, "loss": 0.1564, "step": 5840 }, { "epoch": 0.5054214004924619, "grad_norm": 0.6179767848023813, "learning_rate": 9.505537909246266e-06, "loss": 0.1615, "step": 5850 }, { "epoch": 0.5062853686984319, "grad_norm": 0.6574241098778968, "learning_rate": 9.50351370973259e-06, "loss": 0.1506, "step": 5860 }, { "epoch": 0.5071493369044019, "grad_norm": 0.6180722558466789, "learning_rate": 9.501485591834525e-06, "loss": 0.1522, "step": 5870 }, { "epoch": 0.5080133051103719, "grad_norm": 0.6538193809641687, "learning_rate": 9.499453557316684e-06, "loss": 0.155, "step": 5880 }, { "epoch": 0.5088772733163419, "grad_norm": 0.6838416890378471, "learning_rate": 9.497417607947081e-06, "loss": 0.1547, "step": 5890 }, { "epoch": 0.5097412415223119, "grad_norm": 0.634166321930832, "learning_rate": 9.495377745497144e-06, "loss": 0.1538, "step": 5900 }, { "epoch": 0.510605209728282, "grad_norm": 0.6333864736473793, "learning_rate": 9.493333971741698e-06, "loss": 0.161, "step": 5910 }, { "epoch": 0.5114691779342521, "grad_norm": 0.6519770013554314, "learning_rate": 9.491286288458978e-06, "loss": 0.1507, "step": 5920 }, { "epoch": 0.5123331461402221, "grad_norm": 0.6532553915478285, "learning_rate": 9.489234697430613e-06, "loss": 0.1548, "step": 5930 }, { "epoch": 0.5131971143461921, "grad_norm": 0.6360296430234144, "learning_rate": 9.48717920044164e-06, "loss": 0.1523, "step": 5940 }, { "epoch": 0.5140610825521621, "grad_norm": 0.6562037627803998, "learning_rate": 9.485119799280491e-06, "loss": 0.1528, "step": 5950 }, { "epoch": 0.5149250507581321, "grad_norm": 0.6710431539588906, "learning_rate": 9.483056495738994e-06, "loss": 0.1525, "step": 5960 }, { "epoch": 0.5157890189641021, "grad_norm": 0.6421136526052125, "learning_rate": 9.480989291612372e-06, "loss": 0.1546, "step": 5970 }, { "epoch": 0.5166529871700721, "grad_norm": 0.6391902054075337, "learning_rate": 9.478918188699243e-06, "loss": 0.1542, "step": 5980 }, { "epoch": 0.5175169553760421, "grad_norm": 0.6556659472283152, "learning_rate": 9.47684318880162e-06, "loss": 0.158, "step": 5990 }, { "epoch": 0.5183809235820122, "grad_norm": 0.6546018179543279, "learning_rate": 9.474764293724898e-06, "loss": 0.1572, "step": 6000 }, { "epoch": 0.5192448917879822, "grad_norm": 0.6154480749406304, "learning_rate": 9.472681505277872e-06, "loss": 0.1543, "step": 6010 }, { "epoch": 0.5201088599939522, "grad_norm": 0.6403268547561236, "learning_rate": 9.470594825272719e-06, "loss": 0.151, "step": 6020 }, { "epoch": 0.5209728281999222, "grad_norm": 0.6236563458442675, "learning_rate": 9.468504255525e-06, "loss": 0.1542, "step": 6030 }, { "epoch": 0.5218367964058923, "grad_norm": 0.6877503233654196, "learning_rate": 9.466409797853665e-06, "loss": 0.1554, "step": 6040 }, { "epoch": 0.5227007646118623, "grad_norm": 0.643531814258169, "learning_rate": 9.464311454081041e-06, "loss": 0.1532, "step": 6050 }, { "epoch": 0.5235647328178323, "grad_norm": 0.6805286157046766, "learning_rate": 9.462209226032843e-06, "loss": 0.154, "step": 6060 }, { "epoch": 0.5244287010238023, "grad_norm": 0.61245472642869, "learning_rate": 9.460103115538161e-06, "loss": 0.1558, "step": 6070 }, { "epoch": 0.5252926692297724, "grad_norm": 0.6232098480791012, "learning_rate": 9.457993124429466e-06, "loss": 0.1546, "step": 6080 }, { "epoch": 0.5261566374357424, "grad_norm": 0.618449033777273, "learning_rate": 9.4558792545426e-06, "loss": 0.1557, "step": 6090 }, { "epoch": 0.5270206056417124, "grad_norm": 0.6397349484531721, "learning_rate": 9.453761507716787e-06, "loss": 0.1542, "step": 6100 }, { "epoch": 0.5278845738476824, "grad_norm": 0.6278611478363766, "learning_rate": 9.451639885794621e-06, "loss": 0.1574, "step": 6110 }, { "epoch": 0.5287485420536524, "grad_norm": 0.6235855930527535, "learning_rate": 9.449514390622062e-06, "loss": 0.1544, "step": 6120 }, { "epoch": 0.5296125102596224, "grad_norm": 0.6700763747048177, "learning_rate": 9.447385024048454e-06, "loss": 0.1521, "step": 6130 }, { "epoch": 0.5304764784655924, "grad_norm": 0.6090307268762309, "learning_rate": 9.445251787926492e-06, "loss": 0.1574, "step": 6140 }, { "epoch": 0.5313404466715624, "grad_norm": 0.6427231492601245, "learning_rate": 9.443114684112251e-06, "loss": 0.1492, "step": 6150 }, { "epoch": 0.5322044148775326, "grad_norm": 0.6555482109889144, "learning_rate": 9.440973714465167e-06, "loss": 0.1551, "step": 6160 }, { "epoch": 0.5330683830835026, "grad_norm": 0.6250258619357056, "learning_rate": 9.438828880848039e-06, "loss": 0.1524, "step": 6170 }, { "epoch": 0.5339323512894726, "grad_norm": 0.6288378897514569, "learning_rate": 9.436680185127026e-06, "loss": 0.1536, "step": 6180 }, { "epoch": 0.5347963194954426, "grad_norm": 0.6291255544505886, "learning_rate": 9.434527629171653e-06, "loss": 0.1584, "step": 6190 }, { "epoch": 0.5356602877014126, "grad_norm": 0.651577330866289, "learning_rate": 9.432371214854797e-06, "loss": 0.1522, "step": 6200 }, { "epoch": 0.5365242559073826, "grad_norm": 0.6177750730266219, "learning_rate": 9.430210944052696e-06, "loss": 0.1559, "step": 6210 }, { "epoch": 0.5373882241133526, "grad_norm": 0.6229158880730679, "learning_rate": 9.428046818644942e-06, "loss": 0.1574, "step": 6220 }, { "epoch": 0.5382521923193226, "grad_norm": 0.6628200588962239, "learning_rate": 9.425878840514487e-06, "loss": 0.1501, "step": 6230 }, { "epoch": 0.5391161605252927, "grad_norm": 0.6597060635008648, "learning_rate": 9.423707011547622e-06, "loss": 0.1474, "step": 6240 }, { "epoch": 0.5399801287312627, "grad_norm": 0.6543877462107659, "learning_rate": 9.421531333634e-06, "loss": 0.1562, "step": 6250 }, { "epoch": 0.5408440969372327, "grad_norm": 0.631809391193605, "learning_rate": 9.419351808666618e-06, "loss": 0.1532, "step": 6260 }, { "epoch": 0.5417080651432027, "grad_norm": 0.597981419686076, "learning_rate": 9.417168438541821e-06, "loss": 0.1501, "step": 6270 }, { "epoch": 0.5425720333491727, "grad_norm": 0.6143884751433082, "learning_rate": 9.414981225159303e-06, "loss": 0.1585, "step": 6280 }, { "epoch": 0.5434360015551428, "grad_norm": 0.6797038536136399, "learning_rate": 9.412790170422094e-06, "loss": 0.1538, "step": 6290 }, { "epoch": 0.5442999697611128, "grad_norm": 0.613381369055774, "learning_rate": 9.410595276236574e-06, "loss": 0.1505, "step": 6300 }, { "epoch": 0.5451639379670828, "grad_norm": 0.6353511536715715, "learning_rate": 9.408396544512459e-06, "loss": 0.1558, "step": 6310 }, { "epoch": 0.5460279061730529, "grad_norm": 0.6444380767578983, "learning_rate": 9.406193977162806e-06, "loss": 0.1492, "step": 6320 }, { "epoch": 0.5468918743790229, "grad_norm": 0.6205572841742492, "learning_rate": 9.403987576104009e-06, "loss": 0.1563, "step": 6330 }, { "epoch": 0.5477558425849929, "grad_norm": 0.5971746646391922, "learning_rate": 9.401777343255796e-06, "loss": 0.1528, "step": 6340 }, { "epoch": 0.5486198107909629, "grad_norm": 0.6233544276574221, "learning_rate": 9.399563280541233e-06, "loss": 0.1553, "step": 6350 }, { "epoch": 0.5494837789969329, "grad_norm": 0.6487060550718584, "learning_rate": 9.397345389886715e-06, "loss": 0.1523, "step": 6360 }, { "epoch": 0.5503477472029029, "grad_norm": 0.6296095891164247, "learning_rate": 9.395123673221965e-06, "loss": 0.1553, "step": 6370 }, { "epoch": 0.5512117154088729, "grad_norm": 0.7315998401798692, "learning_rate": 9.39289813248004e-06, "loss": 0.1567, "step": 6380 }, { "epoch": 0.5520756836148429, "grad_norm": 0.6450303515918221, "learning_rate": 9.390668769597323e-06, "loss": 0.1456, "step": 6390 }, { "epoch": 0.552939651820813, "grad_norm": 0.6353752953847911, "learning_rate": 9.38843558651352e-06, "loss": 0.1591, "step": 6400 }, { "epoch": 0.553803620026783, "grad_norm": 0.639900060325134, "learning_rate": 9.386198585171666e-06, "loss": 0.1582, "step": 6410 }, { "epoch": 0.5546675882327531, "grad_norm": 0.631243331570897, "learning_rate": 9.38395776751811e-06, "loss": 0.1535, "step": 6420 }, { "epoch": 0.5555315564387231, "grad_norm": 0.6120386993003224, "learning_rate": 9.381713135502531e-06, "loss": 0.1504, "step": 6430 }, { "epoch": 0.5563955246446931, "grad_norm": 0.6218134724182738, "learning_rate": 9.379464691077919e-06, "loss": 0.1523, "step": 6440 }, { "epoch": 0.5572594928506631, "grad_norm": 0.7523102118837981, "learning_rate": 9.377212436200587e-06, "loss": 0.1521, "step": 6450 }, { "epoch": 0.5581234610566331, "grad_norm": 0.6557050210004696, "learning_rate": 9.374956372830158e-06, "loss": 0.1561, "step": 6460 }, { "epoch": 0.5589874292626031, "grad_norm": 0.6201510619086964, "learning_rate": 9.37269650292957e-06, "loss": 0.155, "step": 6470 }, { "epoch": 0.5598513974685732, "grad_norm": 0.5811173881087803, "learning_rate": 9.370432828465079e-06, "loss": 0.1551, "step": 6480 }, { "epoch": 0.5607153656745432, "grad_norm": 0.6012143606073025, "learning_rate": 9.36816535140624e-06, "loss": 0.1543, "step": 6490 }, { "epoch": 0.5615793338805132, "grad_norm": 0.5825061635910226, "learning_rate": 9.365894073725929e-06, "loss": 0.1548, "step": 6500 }, { "epoch": 0.5624433020864832, "grad_norm": 0.6375174928498161, "learning_rate": 9.363618997400319e-06, "loss": 0.1542, "step": 6510 }, { "epoch": 0.5633072702924532, "grad_norm": 0.6218539072355832, "learning_rate": 9.361340124408893e-06, "loss": 0.1504, "step": 6520 }, { "epoch": 0.5641712384984232, "grad_norm": 0.6377893763114578, "learning_rate": 9.359057456734437e-06, "loss": 0.1528, "step": 6530 }, { "epoch": 0.5650352067043933, "grad_norm": 0.6059514862386471, "learning_rate": 9.356770996363034e-06, "loss": 0.1536, "step": 6540 }, { "epoch": 0.5658991749103633, "grad_norm": 0.6782562253690789, "learning_rate": 9.354480745284075e-06, "loss": 0.1522, "step": 6550 }, { "epoch": 0.5667631431163334, "grad_norm": 0.6530575495241211, "learning_rate": 9.352186705490245e-06, "loss": 0.1564, "step": 6560 }, { "epoch": 0.5676271113223034, "grad_norm": 0.6271306204208658, "learning_rate": 9.349888878977525e-06, "loss": 0.1579, "step": 6570 }, { "epoch": 0.5684910795282734, "grad_norm": 0.655379907159268, "learning_rate": 9.347587267745188e-06, "loss": 0.1587, "step": 6580 }, { "epoch": 0.5693550477342434, "grad_norm": 0.6207039064949125, "learning_rate": 9.345281873795807e-06, "loss": 0.1502, "step": 6590 }, { "epoch": 0.5702190159402134, "grad_norm": 0.6202852350840276, "learning_rate": 9.342972699135238e-06, "loss": 0.1482, "step": 6600 }, { "epoch": 0.5710829841461834, "grad_norm": 0.6170685026124321, "learning_rate": 9.340659745772635e-06, "loss": 0.1528, "step": 6610 }, { "epoch": 0.5719469523521534, "grad_norm": 0.6108278186924734, "learning_rate": 9.338343015720434e-06, "loss": 0.153, "step": 6620 }, { "epoch": 0.5728109205581234, "grad_norm": 0.6451428011838746, "learning_rate": 9.33602251099436e-06, "loss": 0.1483, "step": 6630 }, { "epoch": 0.5736748887640934, "grad_norm": 0.6672803417700074, "learning_rate": 9.33369823361342e-06, "loss": 0.1525, "step": 6640 }, { "epoch": 0.5745388569700635, "grad_norm": 0.638138381181463, "learning_rate": 9.331370185599902e-06, "loss": 0.1526, "step": 6650 }, { "epoch": 0.5754028251760335, "grad_norm": 0.6285291919757152, "learning_rate": 9.32903836897938e-06, "loss": 0.1579, "step": 6660 }, { "epoch": 0.5762667933820036, "grad_norm": 0.5894093953466101, "learning_rate": 9.326702785780704e-06, "loss": 0.1527, "step": 6670 }, { "epoch": 0.5771307615879736, "grad_norm": 0.6363863884972929, "learning_rate": 9.324363438035998e-06, "loss": 0.1522, "step": 6680 }, { "epoch": 0.5779947297939436, "grad_norm": 0.5954359232987835, "learning_rate": 9.322020327780667e-06, "loss": 0.147, "step": 6690 }, { "epoch": 0.5788586979999136, "grad_norm": 0.5924570670614575, "learning_rate": 9.319673457053389e-06, "loss": 0.1554, "step": 6700 }, { "epoch": 0.5797226662058836, "grad_norm": 0.6557729717170221, "learning_rate": 9.31732282789611e-06, "loss": 0.1548, "step": 6710 }, { "epoch": 0.5805866344118537, "grad_norm": 0.6252295859666467, "learning_rate": 9.314968442354048e-06, "loss": 0.1519, "step": 6720 }, { "epoch": 0.5814506026178237, "grad_norm": 0.6104984510258042, "learning_rate": 9.312610302475691e-06, "loss": 0.1545, "step": 6730 }, { "epoch": 0.5823145708237937, "grad_norm": 0.6084878036937621, "learning_rate": 9.31024841031279e-06, "loss": 0.1509, "step": 6740 }, { "epoch": 0.5831785390297637, "grad_norm": 0.615179111407142, "learning_rate": 9.307882767920365e-06, "loss": 0.1496, "step": 6750 }, { "epoch": 0.5840425072357337, "grad_norm": 0.6335012641621653, "learning_rate": 9.305513377356696e-06, "loss": 0.159, "step": 6760 }, { "epoch": 0.5849064754417037, "grad_norm": 0.6432893824981711, "learning_rate": 9.303140240683326e-06, "loss": 0.1558, "step": 6770 }, { "epoch": 0.5857704436476737, "grad_norm": 0.6288129691404086, "learning_rate": 9.300763359965057e-06, "loss": 0.1529, "step": 6780 }, { "epoch": 0.5866344118536437, "grad_norm": 0.5996719948442747, "learning_rate": 9.298382737269944e-06, "loss": 0.1561, "step": 6790 }, { "epoch": 0.5874983800596139, "grad_norm": 0.6318777413266485, "learning_rate": 9.295998374669307e-06, "loss": 0.1532, "step": 6800 }, { "epoch": 0.5883623482655839, "grad_norm": 0.6393146031927769, "learning_rate": 9.29361027423771e-06, "loss": 0.154, "step": 6810 }, { "epoch": 0.5892263164715539, "grad_norm": 0.6087589722272293, "learning_rate": 9.291218438052978e-06, "loss": 0.1449, "step": 6820 }, { "epoch": 0.5900902846775239, "grad_norm": 0.6053164128500995, "learning_rate": 9.28882286819618e-06, "loss": 0.1543, "step": 6830 }, { "epoch": 0.5909542528834939, "grad_norm": 0.6168383132645895, "learning_rate": 9.286423566751638e-06, "loss": 0.153, "step": 6840 }, { "epoch": 0.5918182210894639, "grad_norm": 0.6357370911729846, "learning_rate": 9.284020535806917e-06, "loss": 0.1493, "step": 6850 }, { "epoch": 0.5926821892954339, "grad_norm": 0.5856044394038176, "learning_rate": 9.28161377745283e-06, "loss": 0.1526, "step": 6860 }, { "epoch": 0.5935461575014039, "grad_norm": 0.595353891775203, "learning_rate": 9.27920329378343e-06, "loss": 0.1521, "step": 6870 }, { "epoch": 0.594410125707374, "grad_norm": 0.5879333218876476, "learning_rate": 9.276789086896015e-06, "loss": 0.1517, "step": 6880 }, { "epoch": 0.595274093913344, "grad_norm": 0.6437671621448833, "learning_rate": 9.274371158891117e-06, "loss": 0.1528, "step": 6890 }, { "epoch": 0.596138062119314, "grad_norm": 0.6362829862629101, "learning_rate": 9.271949511872514e-06, "loss": 0.1574, "step": 6900 }, { "epoch": 0.597002030325284, "grad_norm": 0.6324049229309815, "learning_rate": 9.269524147947214e-06, "loss": 0.1537, "step": 6910 }, { "epoch": 0.597865998531254, "grad_norm": 0.6329882824337885, "learning_rate": 9.267095069225456e-06, "loss": 0.1519, "step": 6920 }, { "epoch": 0.5987299667372241, "grad_norm": 0.6005996025691578, "learning_rate": 9.264662277820719e-06, "loss": 0.1512, "step": 6930 }, { "epoch": 0.5995939349431941, "grad_norm": 0.6040443399864707, "learning_rate": 9.262225775849707e-06, "loss": 0.1525, "step": 6940 }, { "epoch": 0.6004579031491641, "grad_norm": 0.6159608848677605, "learning_rate": 9.259785565432356e-06, "loss": 0.1533, "step": 6950 }, { "epoch": 0.6013218713551342, "grad_norm": 0.6149382032923227, "learning_rate": 9.257341648691822e-06, "loss": 0.1509, "step": 6960 }, { "epoch": 0.6021858395611042, "grad_norm": 0.6290543016885738, "learning_rate": 9.254894027754493e-06, "loss": 0.1539, "step": 6970 }, { "epoch": 0.6030498077670742, "grad_norm": 0.5930637622387478, "learning_rate": 9.25244270474998e-06, "loss": 0.1543, "step": 6980 }, { "epoch": 0.6039137759730442, "grad_norm": 0.5657882487334296, "learning_rate": 9.249987681811106e-06, "loss": 0.1492, "step": 6990 }, { "epoch": 0.6047777441790142, "grad_norm": 0.6095682916220933, "learning_rate": 9.247528961073925e-06, "loss": 0.1516, "step": 7000 }, { "epoch": 0.6056417123849842, "grad_norm": 0.5992451055616469, "learning_rate": 9.2450665446777e-06, "loss": 0.1534, "step": 7010 }, { "epoch": 0.6065056805909542, "grad_norm": 0.5997401846439975, "learning_rate": 9.242600434764912e-06, "loss": 0.1521, "step": 7020 }, { "epoch": 0.6073696487969242, "grad_norm": 0.5933488089940263, "learning_rate": 9.240130633481259e-06, "loss": 0.1518, "step": 7030 }, { "epoch": 0.6082336170028942, "grad_norm": 0.6313843488784524, "learning_rate": 9.237657142975643e-06, "loss": 0.1538, "step": 7040 }, { "epoch": 0.6090975852088644, "grad_norm": 0.6413521714283614, "learning_rate": 9.235179965400184e-06, "loss": 0.152, "step": 7050 }, { "epoch": 0.6099615534148344, "grad_norm": 0.6184470237558511, "learning_rate": 9.232699102910208e-06, "loss": 0.1504, "step": 7060 }, { "epoch": 0.6108255216208044, "grad_norm": 0.5707911485975334, "learning_rate": 9.230214557664241e-06, "loss": 0.1504, "step": 7070 }, { "epoch": 0.6116894898267744, "grad_norm": 0.5777559907197692, "learning_rate": 9.227726331824021e-06, "loss": 0.1496, "step": 7080 }, { "epoch": 0.6125534580327444, "grad_norm": 0.5954819404017806, "learning_rate": 9.225234427554485e-06, "loss": 0.1509, "step": 7090 }, { "epoch": 0.6134174262387144, "grad_norm": 0.6192745295620272, "learning_rate": 9.222738847023772e-06, "loss": 0.151, "step": 7100 }, { "epoch": 0.6142813944446844, "grad_norm": 0.6038949399030311, "learning_rate": 9.220239592403216e-06, "loss": 0.1491, "step": 7110 }, { "epoch": 0.6151453626506544, "grad_norm": 0.5824949970844027, "learning_rate": 9.217736665867352e-06, "loss": 0.1473, "step": 7120 }, { "epoch": 0.6160093308566245, "grad_norm": 0.5954051916023153, "learning_rate": 9.215230069593907e-06, "loss": 0.1525, "step": 7130 }, { "epoch": 0.6168732990625945, "grad_norm": 0.6320195114436121, "learning_rate": 9.212719805763806e-06, "loss": 0.1541, "step": 7140 }, { "epoch": 0.6177372672685645, "grad_norm": 0.6929376486569512, "learning_rate": 9.210205876561153e-06, "loss": 0.1493, "step": 7150 }, { "epoch": 0.6186012354745345, "grad_norm": 0.6159597724690932, "learning_rate": 9.207688284173257e-06, "loss": 0.1489, "step": 7160 }, { "epoch": 0.6194652036805045, "grad_norm": 0.5943323896784803, "learning_rate": 9.205167030790604e-06, "loss": 0.1486, "step": 7170 }, { "epoch": 0.6203291718864746, "grad_norm": 0.6204073730643097, "learning_rate": 9.202642118606866e-06, "loss": 0.1503, "step": 7180 }, { "epoch": 0.6211931400924446, "grad_norm": 0.6241579805429692, "learning_rate": 9.2001135498189e-06, "loss": 0.1532, "step": 7190 }, { "epoch": 0.6220571082984147, "grad_norm": 0.6030366378179512, "learning_rate": 9.19758132662675e-06, "loss": 0.149, "step": 7200 }, { "epoch": 0.6229210765043847, "grad_norm": 0.6071124192692541, "learning_rate": 9.195045451233627e-06, "loss": 0.1541, "step": 7210 }, { "epoch": 0.6237850447103547, "grad_norm": 0.6158999535197959, "learning_rate": 9.192505925845932e-06, "loss": 0.1527, "step": 7220 }, { "epoch": 0.6246490129163247, "grad_norm": 0.5640793686230424, "learning_rate": 9.189962752673234e-06, "loss": 0.1465, "step": 7230 }, { "epoch": 0.6255129811222947, "grad_norm": 0.5975619759720371, "learning_rate": 9.187415933928279e-06, "loss": 0.1525, "step": 7240 }, { "epoch": 0.6263769493282647, "grad_norm": 0.5968403447579432, "learning_rate": 9.184865471826988e-06, "loss": 0.1506, "step": 7250 }, { "epoch": 0.6272409175342347, "grad_norm": 0.6042528506000148, "learning_rate": 9.182311368588444e-06, "loss": 0.1486, "step": 7260 }, { "epoch": 0.6281048857402047, "grad_norm": 0.606460473515699, "learning_rate": 9.179753626434905e-06, "loss": 0.1505, "step": 7270 }, { "epoch": 0.6289688539461747, "grad_norm": 0.5903817090048041, "learning_rate": 9.17719224759179e-06, "loss": 0.1518, "step": 7280 }, { "epoch": 0.6298328221521448, "grad_norm": 0.5952765730532862, "learning_rate": 9.174627234287688e-06, "loss": 0.1492, "step": 7290 }, { "epoch": 0.6306967903581148, "grad_norm": 0.622409000843169, "learning_rate": 9.172058588754345e-06, "loss": 0.1529, "step": 7300 }, { "epoch": 0.6315607585640849, "grad_norm": 0.57758523085189, "learning_rate": 9.169486313226671e-06, "loss": 0.1506, "step": 7310 }, { "epoch": 0.6324247267700549, "grad_norm": 0.6142820795880717, "learning_rate": 9.166910409942731e-06, "loss": 0.1486, "step": 7320 }, { "epoch": 0.6332886949760249, "grad_norm": 0.5969544846944455, "learning_rate": 9.16433088114375e-06, "loss": 0.152, "step": 7330 }, { "epoch": 0.6341526631819949, "grad_norm": 0.6077472401893858, "learning_rate": 9.161747729074105e-06, "loss": 0.1546, "step": 7340 }, { "epoch": 0.6350166313879649, "grad_norm": 0.6153979176395339, "learning_rate": 9.159160955981326e-06, "loss": 0.1519, "step": 7350 }, { "epoch": 0.635880599593935, "grad_norm": 0.5856990458728484, "learning_rate": 9.156570564116092e-06, "loss": 0.1509, "step": 7360 }, { "epoch": 0.636744567799905, "grad_norm": 0.5805924301676306, "learning_rate": 9.153976555732233e-06, "loss": 0.1441, "step": 7370 }, { "epoch": 0.637608536005875, "grad_norm": 0.6405542270797587, "learning_rate": 9.151378933086728e-06, "loss": 0.1536, "step": 7380 }, { "epoch": 0.638472504211845, "grad_norm": 0.6087345540466436, "learning_rate": 9.148777698439695e-06, "loss": 0.1532, "step": 7390 }, { "epoch": 0.639336472417815, "grad_norm": 0.5881093647581587, "learning_rate": 9.146172854054395e-06, "loss": 0.1497, "step": 7400 }, { "epoch": 0.640200440623785, "grad_norm": 0.653373547737377, "learning_rate": 9.143564402197239e-06, "loss": 0.1577, "step": 7410 }, { "epoch": 0.641064408829755, "grad_norm": 0.5725873284917496, "learning_rate": 9.140952345137762e-06, "loss": 0.1482, "step": 7420 }, { "epoch": 0.6419283770357251, "grad_norm": 0.6062841375115021, "learning_rate": 9.138336685148648e-06, "loss": 0.1519, "step": 7430 }, { "epoch": 0.6427923452416952, "grad_norm": 0.6390131647406511, "learning_rate": 9.13571742450571e-06, "loss": 0.1472, "step": 7440 }, { "epoch": 0.6436563134476652, "grad_norm": 0.6325898539379184, "learning_rate": 9.133094565487894e-06, "loss": 0.153, "step": 7450 }, { "epoch": 0.6445202816536352, "grad_norm": 0.6244419302476996, "learning_rate": 9.130468110377283e-06, "loss": 0.1481, "step": 7460 }, { "epoch": 0.6453842498596052, "grad_norm": 0.5917216909133591, "learning_rate": 9.127838061459077e-06, "loss": 0.1481, "step": 7470 }, { "epoch": 0.6462482180655752, "grad_norm": 0.6155031693074737, "learning_rate": 9.125204421021616e-06, "loss": 0.1499, "step": 7480 }, { "epoch": 0.6471121862715452, "grad_norm": 0.602270438150178, "learning_rate": 9.122567191356355e-06, "loss": 0.1495, "step": 7490 }, { "epoch": 0.6479761544775152, "grad_norm": 0.6426993093044683, "learning_rate": 9.119926374757876e-06, "loss": 0.1527, "step": 7500 }, { "epoch": 0.6488401226834852, "grad_norm": 0.5912880315934217, "learning_rate": 9.117281973523882e-06, "loss": 0.1501, "step": 7510 }, { "epoch": 0.6497040908894552, "grad_norm": 0.6244924057208089, "learning_rate": 9.114633989955194e-06, "loss": 0.1519, "step": 7520 }, { "epoch": 0.6505680590954253, "grad_norm": 0.611934191875604, "learning_rate": 9.111982426355753e-06, "loss": 0.1469, "step": 7530 }, { "epoch": 0.6514320273013953, "grad_norm": 0.5864263801510056, "learning_rate": 9.109327285032607e-06, "loss": 0.149, "step": 7540 }, { "epoch": 0.6522959955073653, "grad_norm": 0.6010224495299872, "learning_rate": 9.106668568295927e-06, "loss": 0.1459, "step": 7550 }, { "epoch": 0.6531599637133354, "grad_norm": 0.6050030904357648, "learning_rate": 9.104006278458986e-06, "loss": 0.1458, "step": 7560 }, { "epoch": 0.6540239319193054, "grad_norm": 0.5676610160897111, "learning_rate": 9.101340417838171e-06, "loss": 0.1487, "step": 7570 }, { "epoch": 0.6548879001252754, "grad_norm": 0.6592513736662908, "learning_rate": 9.098670988752975e-06, "loss": 0.1479, "step": 7580 }, { "epoch": 0.6557518683312454, "grad_norm": 0.5927040039853829, "learning_rate": 9.095997993525999e-06, "loss": 0.1557, "step": 7590 }, { "epoch": 0.6566158365372154, "grad_norm": 0.6020603854415782, "learning_rate": 9.093321434482935e-06, "loss": 0.1571, "step": 7600 }, { "epoch": 0.6574798047431855, "grad_norm": 0.5872803841047225, "learning_rate": 9.09064131395259e-06, "loss": 0.1528, "step": 7610 }, { "epoch": 0.6583437729491555, "grad_norm": 0.5901679825406331, "learning_rate": 9.087957634266862e-06, "loss": 0.1502, "step": 7620 }, { "epoch": 0.6592077411551255, "grad_norm": 0.6311244536626748, "learning_rate": 9.085270397760748e-06, "loss": 0.1518, "step": 7630 }, { "epoch": 0.6600717093610955, "grad_norm": 0.5872882598881509, "learning_rate": 9.082579606772339e-06, "loss": 0.1528, "step": 7640 }, { "epoch": 0.6609356775670655, "grad_norm": 0.5882786648598662, "learning_rate": 9.079885263642818e-06, "loss": 0.1524, "step": 7650 }, { "epoch": 0.6617996457730355, "grad_norm": 0.5823254581363112, "learning_rate": 9.077187370716461e-06, "loss": 0.1505, "step": 7660 }, { "epoch": 0.6626636139790055, "grad_norm": 0.6065493971147171, "learning_rate": 9.074485930340631e-06, "loss": 0.1466, "step": 7670 }, { "epoch": 0.6635275821849755, "grad_norm": 0.6491173159325675, "learning_rate": 9.071780944865775e-06, "loss": 0.1472, "step": 7680 }, { "epoch": 0.6643915503909457, "grad_norm": 0.6409731127559477, "learning_rate": 9.06907241664543e-06, "loss": 0.1511, "step": 7690 }, { "epoch": 0.6652555185969157, "grad_norm": 0.581674663711164, "learning_rate": 9.066360348036211e-06, "loss": 0.1459, "step": 7700 }, { "epoch": 0.6661194868028857, "grad_norm": 0.6084235174422291, "learning_rate": 9.063644741397814e-06, "loss": 0.1534, "step": 7710 }, { "epoch": 0.6669834550088557, "grad_norm": 0.6142204723314244, "learning_rate": 9.060925599093015e-06, "loss": 0.1503, "step": 7720 }, { "epoch": 0.6678474232148257, "grad_norm": 0.6210363271358172, "learning_rate": 9.058202923487669e-06, "loss": 0.1501, "step": 7730 }, { "epoch": 0.6687113914207957, "grad_norm": 0.6328872498299684, "learning_rate": 9.055476716950697e-06, "loss": 0.15, "step": 7740 }, { "epoch": 0.6695753596267657, "grad_norm": 0.6076328150996245, "learning_rate": 9.052746981854097e-06, "loss": 0.1475, "step": 7750 }, { "epoch": 0.6704393278327357, "grad_norm": 0.5985368263170588, "learning_rate": 9.050013720572941e-06, "loss": 0.1479, "step": 7760 }, { "epoch": 0.6713032960387058, "grad_norm": 0.6388889436940819, "learning_rate": 9.04727693548536e-06, "loss": 0.1526, "step": 7770 }, { "epoch": 0.6721672642446758, "grad_norm": 0.603154034262408, "learning_rate": 9.04453662897256e-06, "loss": 0.1464, "step": 7780 }, { "epoch": 0.6730312324506458, "grad_norm": 0.5762078420887212, "learning_rate": 9.041792803418808e-06, "loss": 0.148, "step": 7790 }, { "epoch": 0.6738952006566158, "grad_norm": 0.6146161683014285, "learning_rate": 9.039045461211426e-06, "loss": 0.1521, "step": 7800 }, { "epoch": 0.6747591688625859, "grad_norm": 0.5739686729569367, "learning_rate": 9.036294604740805e-06, "loss": 0.1519, "step": 7810 }, { "epoch": 0.6756231370685559, "grad_norm": 0.5868335044090508, "learning_rate": 9.03354023640039e-06, "loss": 0.1497, "step": 7820 }, { "epoch": 0.6764871052745259, "grad_norm": 0.6111017627175641, "learning_rate": 9.030782358586684e-06, "loss": 0.1528, "step": 7830 }, { "epoch": 0.677351073480496, "grad_norm": 0.6252200831513823, "learning_rate": 9.028020973699237e-06, "loss": 0.1524, "step": 7840 }, { "epoch": 0.678215041686466, "grad_norm": 0.6345447586951666, "learning_rate": 9.025256084140656e-06, "loss": 0.1502, "step": 7850 }, { "epoch": 0.679079009892436, "grad_norm": 0.6300359296600807, "learning_rate": 9.022487692316599e-06, "loss": 0.1495, "step": 7860 }, { "epoch": 0.679942978098406, "grad_norm": 0.5918604092279216, "learning_rate": 9.019715800635764e-06, "loss": 0.1482, "step": 7870 }, { "epoch": 0.680806946304376, "grad_norm": 0.552573528938169, "learning_rate": 9.0169404115099e-06, "loss": 0.1503, "step": 7880 }, { "epoch": 0.681670914510346, "grad_norm": 0.6159373617370572, "learning_rate": 9.014161527353798e-06, "loss": 0.1482, "step": 7890 }, { "epoch": 0.682534882716316, "grad_norm": 0.5782099461611947, "learning_rate": 9.01137915058529e-06, "loss": 0.155, "step": 7900 }, { "epoch": 0.683398850922286, "grad_norm": 0.5935877354981174, "learning_rate": 9.008593283625242e-06, "loss": 0.149, "step": 7910 }, { "epoch": 0.684262819128256, "grad_norm": 0.5559859018366894, "learning_rate": 9.005803928897563e-06, "loss": 0.1466, "step": 7920 }, { "epoch": 0.685126787334226, "grad_norm": 0.6204828417543897, "learning_rate": 9.003011088829197e-06, "loss": 0.1509, "step": 7930 }, { "epoch": 0.6859907555401962, "grad_norm": 0.5691521373817178, "learning_rate": 9.000214765850115e-06, "loss": 0.1468, "step": 7940 }, { "epoch": 0.6868547237461662, "grad_norm": 0.5852804581194763, "learning_rate": 8.997414962393323e-06, "loss": 0.1463, "step": 7950 }, { "epoch": 0.6877186919521362, "grad_norm": 0.6049762063656603, "learning_rate": 8.994611680894853e-06, "loss": 0.1471, "step": 7960 }, { "epoch": 0.6885826601581062, "grad_norm": 0.5777602090413144, "learning_rate": 8.991804923793762e-06, "loss": 0.1494, "step": 7970 }, { "epoch": 0.6894466283640762, "grad_norm": 0.6044551263155569, "learning_rate": 8.988994693532136e-06, "loss": 0.1469, "step": 7980 }, { "epoch": 0.6903105965700462, "grad_norm": 0.5783047865363863, "learning_rate": 8.986180992555077e-06, "loss": 0.148, "step": 7990 }, { "epoch": 0.6911745647760162, "grad_norm": 0.6233075599008563, "learning_rate": 8.983363823310712e-06, "loss": 0.1525, "step": 8000 }, { "epoch": 0.6920385329819863, "grad_norm": 0.6006254139480321, "learning_rate": 8.98054318825018e-06, "loss": 0.1509, "step": 8010 }, { "epoch": 0.6929025011879563, "grad_norm": 0.5769597479586498, "learning_rate": 8.977719089827638e-06, "loss": 0.1428, "step": 8020 }, { "epoch": 0.6937664693939263, "grad_norm": 0.5885334854420028, "learning_rate": 8.974891530500263e-06, "loss": 0.1461, "step": 8030 }, { "epoch": 0.6946304375998963, "grad_norm": 0.58922621960749, "learning_rate": 8.97206051272823e-06, "loss": 0.1468, "step": 8040 }, { "epoch": 0.6954944058058663, "grad_norm": 0.5900619322694628, "learning_rate": 8.969226038974737e-06, "loss": 0.1489, "step": 8050 }, { "epoch": 0.6963583740118363, "grad_norm": 0.5721375599740359, "learning_rate": 8.966388111705977e-06, "loss": 0.1445, "step": 8060 }, { "epoch": 0.6972223422178064, "grad_norm": 0.6060535974988339, "learning_rate": 8.963546733391155e-06, "loss": 0.1513, "step": 8070 }, { "epoch": 0.6980863104237764, "grad_norm": 0.6121984155980555, "learning_rate": 8.960701906502476e-06, "loss": 0.1502, "step": 8080 }, { "epoch": 0.6989502786297465, "grad_norm": 0.5883564250085476, "learning_rate": 8.957853633515148e-06, "loss": 0.1481, "step": 8090 }, { "epoch": 0.6998142468357165, "grad_norm": 0.5995778157693623, "learning_rate": 8.955001916907376e-06, "loss": 0.1516, "step": 8100 }, { "epoch": 0.7006782150416865, "grad_norm": 0.6060847874402603, "learning_rate": 8.952146759160356e-06, "loss": 0.1488, "step": 8110 }, { "epoch": 0.7015421832476565, "grad_norm": 0.5781934980960773, "learning_rate": 8.949288162758287e-06, "loss": 0.1492, "step": 8120 }, { "epoch": 0.7024061514536265, "grad_norm": 0.6051536158517672, "learning_rate": 8.946426130188357e-06, "loss": 0.1503, "step": 8130 }, { "epoch": 0.7032701196595965, "grad_norm": 0.6139354449588857, "learning_rate": 8.943560663940739e-06, "loss": 0.1495, "step": 8140 }, { "epoch": 0.7041340878655665, "grad_norm": 0.6126263867946146, "learning_rate": 8.940691766508597e-06, "loss": 0.1476, "step": 8150 }, { "epoch": 0.7049980560715365, "grad_norm": 0.6302258584565575, "learning_rate": 8.937819440388086e-06, "loss": 0.1473, "step": 8160 }, { "epoch": 0.7058620242775066, "grad_norm": 0.5923426197318572, "learning_rate": 8.93494368807833e-06, "loss": 0.1448, "step": 8170 }, { "epoch": 0.7067259924834766, "grad_norm": 0.5819670560556821, "learning_rate": 8.93206451208145e-06, "loss": 0.1471, "step": 8180 }, { "epoch": 0.7075899606894466, "grad_norm": 0.5704325384697982, "learning_rate": 8.929181914902532e-06, "loss": 0.1484, "step": 8190 }, { "epoch": 0.7084539288954167, "grad_norm": 0.5854395681335803, "learning_rate": 8.926295899049651e-06, "loss": 0.1467, "step": 8200 }, { "epoch": 0.7093178971013867, "grad_norm": 0.5956571082587886, "learning_rate": 8.923406467033846e-06, "loss": 0.1502, "step": 8210 }, { "epoch": 0.7101818653073567, "grad_norm": 0.6013675361000554, "learning_rate": 8.920513621369138e-06, "loss": 0.1421, "step": 8220 }, { "epoch": 0.7110458335133267, "grad_norm": 0.6169527609070847, "learning_rate": 8.917617364572509e-06, "loss": 0.1457, "step": 8230 }, { "epoch": 0.7119098017192967, "grad_norm": 0.615664017102032, "learning_rate": 8.914717699163913e-06, "loss": 0.1528, "step": 8240 }, { "epoch": 0.7127737699252668, "grad_norm": 0.5929835165146116, "learning_rate": 8.91181462766627e-06, "loss": 0.1528, "step": 8250 }, { "epoch": 0.7136377381312368, "grad_norm": 0.5769838984731995, "learning_rate": 8.908908152605468e-06, "loss": 0.1467, "step": 8260 }, { "epoch": 0.7145017063372068, "grad_norm": 0.5979484694169591, "learning_rate": 8.905998276510347e-06, "loss": 0.1495, "step": 8270 }, { "epoch": 0.7153656745431768, "grad_norm": 0.6250913684486913, "learning_rate": 8.903085001912707e-06, "loss": 0.1503, "step": 8280 }, { "epoch": 0.7162296427491468, "grad_norm": 0.5743702279111842, "learning_rate": 8.90016833134732e-06, "loss": 0.1521, "step": 8290 }, { "epoch": 0.7170936109551168, "grad_norm": 0.5800789077997158, "learning_rate": 8.89724826735189e-06, "loss": 0.1488, "step": 8300 }, { "epoch": 0.7179575791610868, "grad_norm": 0.6123653987611494, "learning_rate": 8.894324812467092e-06, "loss": 0.1411, "step": 8310 }, { "epoch": 0.718821547367057, "grad_norm": 0.6413369452138997, "learning_rate": 8.891397969236541e-06, "loss": 0.1465, "step": 8320 }, { "epoch": 0.719685515573027, "grad_norm": 0.6314994629599093, "learning_rate": 8.888467740206805e-06, "loss": 0.1482, "step": 8330 }, { "epoch": 0.720549483778997, "grad_norm": 0.5891434300309477, "learning_rate": 8.885534127927397e-06, "loss": 0.1478, "step": 8340 }, { "epoch": 0.721413451984967, "grad_norm": 0.5858090399630496, "learning_rate": 8.882597134950772e-06, "loss": 0.1472, "step": 8350 }, { "epoch": 0.722277420190937, "grad_norm": 0.5638968847011544, "learning_rate": 8.879656763832327e-06, "loss": 0.1425, "step": 8360 }, { "epoch": 0.723141388396907, "grad_norm": 0.6067904865153183, "learning_rate": 8.876713017130398e-06, "loss": 0.1459, "step": 8370 }, { "epoch": 0.724005356602877, "grad_norm": 0.5851959853995922, "learning_rate": 8.87376589740626e-06, "loss": 0.1423, "step": 8380 }, { "epoch": 0.724869324808847, "grad_norm": 0.5670882649673757, "learning_rate": 8.870815407224121e-06, "loss": 0.1422, "step": 8390 }, { "epoch": 0.725733293014817, "grad_norm": 0.5792761245461205, "learning_rate": 8.867861549151123e-06, "loss": 0.1511, "step": 8400 }, { "epoch": 0.726597261220787, "grad_norm": 0.6384499720411182, "learning_rate": 8.864904325757336e-06, "loss": 0.1458, "step": 8410 }, { "epoch": 0.7274612294267571, "grad_norm": 0.6239567584341648, "learning_rate": 8.861943739615761e-06, "loss": 0.1481, "step": 8420 }, { "epoch": 0.7283251976327271, "grad_norm": 0.6526576277949593, "learning_rate": 8.85897979330232e-06, "loss": 0.1457, "step": 8430 }, { "epoch": 0.7291891658386971, "grad_norm": 0.6192375294210014, "learning_rate": 8.856012489395865e-06, "loss": 0.1515, "step": 8440 }, { "epoch": 0.7300531340446672, "grad_norm": 0.5829504676468671, "learning_rate": 8.853041830478165e-06, "loss": 0.1443, "step": 8450 }, { "epoch": 0.7309171022506372, "grad_norm": 0.6240893602044929, "learning_rate": 8.85006781913391e-06, "loss": 0.1468, "step": 8460 }, { "epoch": 0.7317810704566072, "grad_norm": 0.6402177545734322, "learning_rate": 8.847090457950704e-06, "loss": 0.1504, "step": 8470 }, { "epoch": 0.7326450386625772, "grad_norm": 0.6354668520522381, "learning_rate": 8.84410974951907e-06, "loss": 0.1463, "step": 8480 }, { "epoch": 0.7335090068685473, "grad_norm": 0.5727464227472098, "learning_rate": 8.841125696432438e-06, "loss": 0.1498, "step": 8490 }, { "epoch": 0.7343729750745173, "grad_norm": 0.6018868846257793, "learning_rate": 8.838138301287156e-06, "loss": 0.1467, "step": 8500 }, { "epoch": 0.7352369432804873, "grad_norm": 0.5644741322670542, "learning_rate": 8.835147566682472e-06, "loss": 0.1463, "step": 8510 }, { "epoch": 0.7361009114864573, "grad_norm": 0.5673974911271266, "learning_rate": 8.832153495220543e-06, "loss": 0.1475, "step": 8520 }, { "epoch": 0.7369648796924273, "grad_norm": 0.5964122079459192, "learning_rate": 8.829156089506427e-06, "loss": 0.1503, "step": 8530 }, { "epoch": 0.7378288478983973, "grad_norm": 0.5715091500499475, "learning_rate": 8.826155352148089e-06, "loss": 0.143, "step": 8540 }, { "epoch": 0.7386928161043673, "grad_norm": 0.6035211545064121, "learning_rate": 8.823151285756383e-06, "loss": 0.1494, "step": 8550 }, { "epoch": 0.7395567843103373, "grad_norm": 0.5289010319710973, "learning_rate": 8.820143892945072e-06, "loss": 0.1465, "step": 8560 }, { "epoch": 0.7404207525163073, "grad_norm": 0.6314524880705613, "learning_rate": 8.817133176330802e-06, "loss": 0.1486, "step": 8570 }, { "epoch": 0.7412847207222775, "grad_norm": 0.6019438096859134, "learning_rate": 8.814119138533117e-06, "loss": 0.149, "step": 8580 }, { "epoch": 0.7421486889282475, "grad_norm": 0.5685350683665455, "learning_rate": 8.811101782174447e-06, "loss": 0.1482, "step": 8590 }, { "epoch": 0.7430126571342175, "grad_norm": 0.5672312522177472, "learning_rate": 8.808081109880113e-06, "loss": 0.149, "step": 8600 }, { "epoch": 0.7438766253401875, "grad_norm": 0.6033392735761053, "learning_rate": 8.80505712427832e-06, "loss": 0.1478, "step": 8610 }, { "epoch": 0.7447405935461575, "grad_norm": 0.5852922151350154, "learning_rate": 8.802029828000157e-06, "loss": 0.1461, "step": 8620 }, { "epoch": 0.7456045617521275, "grad_norm": 0.5895046658401459, "learning_rate": 8.798999223679586e-06, "loss": 0.1462, "step": 8630 }, { "epoch": 0.7464685299580975, "grad_norm": 0.5961991481966116, "learning_rate": 8.79596531395346e-06, "loss": 0.1462, "step": 8640 }, { "epoch": 0.7473324981640675, "grad_norm": 0.5457241905915233, "learning_rate": 8.792928101461493e-06, "loss": 0.1482, "step": 8650 }, { "epoch": 0.7481964663700376, "grad_norm": 0.5709887805971291, "learning_rate": 8.789887588846288e-06, "loss": 0.1464, "step": 8660 }, { "epoch": 0.7490604345760076, "grad_norm": 0.6179251161053763, "learning_rate": 8.786843778753311e-06, "loss": 0.1512, "step": 8670 }, { "epoch": 0.7499244027819776, "grad_norm": 0.5417188053967987, "learning_rate": 8.783796673830896e-06, "loss": 0.1473, "step": 8680 }, { "epoch": 0.7507883709879476, "grad_norm": 0.565076720183294, "learning_rate": 8.780746276730246e-06, "loss": 0.145, "step": 8690 }, { "epoch": 0.7516523391939177, "grad_norm": 0.5855244277950163, "learning_rate": 8.777692590105429e-06, "loss": 0.1462, "step": 8700 }, { "epoch": 0.7525163073998877, "grad_norm": 0.5497821035086375, "learning_rate": 8.774635616613373e-06, "loss": 0.1442, "step": 8710 }, { "epoch": 0.7533802756058577, "grad_norm": 0.6039032637599129, "learning_rate": 8.771575358913871e-06, "loss": 0.1462, "step": 8720 }, { "epoch": 0.7542442438118278, "grad_norm": 0.5750476580302071, "learning_rate": 8.768511819669566e-06, "loss": 0.1453, "step": 8730 }, { "epoch": 0.7551082120177978, "grad_norm": 0.5568443722713766, "learning_rate": 8.765445001545961e-06, "loss": 0.1503, "step": 8740 }, { "epoch": 0.7559721802237678, "grad_norm": 0.6349949989616961, "learning_rate": 8.76237490721141e-06, "loss": 0.1458, "step": 8750 }, { "epoch": 0.7568361484297378, "grad_norm": 0.5900255153823822, "learning_rate": 8.75930153933712e-06, "loss": 0.1488, "step": 8760 }, { "epoch": 0.7577001166357078, "grad_norm": 0.5726499989030004, "learning_rate": 8.756224900597144e-06, "loss": 0.1463, "step": 8770 }, { "epoch": 0.7585640848416778, "grad_norm": 0.5794097752200494, "learning_rate": 8.75314499366838e-06, "loss": 0.1476, "step": 8780 }, { "epoch": 0.7594280530476478, "grad_norm": 0.5592726753611104, "learning_rate": 8.750061821230573e-06, "loss": 0.1456, "step": 8790 }, { "epoch": 0.7602920212536178, "grad_norm": 0.5642859448954136, "learning_rate": 8.746975385966305e-06, "loss": 0.1444, "step": 8800 }, { "epoch": 0.7611559894595878, "grad_norm": 0.602949892871976, "learning_rate": 8.743885690561002e-06, "loss": 0.1476, "step": 8810 }, { "epoch": 0.7620199576655579, "grad_norm": 0.578749309560873, "learning_rate": 8.740792737702921e-06, "loss": 0.1464, "step": 8820 }, { "epoch": 0.762883925871528, "grad_norm": 0.5827033788877268, "learning_rate": 8.737696530083158e-06, "loss": 0.1444, "step": 8830 }, { "epoch": 0.763747894077498, "grad_norm": 0.5737302603011082, "learning_rate": 8.734597070395635e-06, "loss": 0.1476, "step": 8840 }, { "epoch": 0.764611862283468, "grad_norm": 0.567276002499956, "learning_rate": 8.731494361337111e-06, "loss": 0.147, "step": 8850 }, { "epoch": 0.765475830489438, "grad_norm": 0.5785412025187946, "learning_rate": 8.72838840560717e-06, "loss": 0.1466, "step": 8860 }, { "epoch": 0.766339798695408, "grad_norm": 0.6021940515191849, "learning_rate": 8.725279205908214e-06, "loss": 0.1424, "step": 8870 }, { "epoch": 0.767203766901378, "grad_norm": 0.5678686047684466, "learning_rate": 8.722166764945476e-06, "loss": 0.1416, "step": 8880 }, { "epoch": 0.768067735107348, "grad_norm": 0.5983675801989564, "learning_rate": 8.719051085427007e-06, "loss": 0.1445, "step": 8890 }, { "epoch": 0.7689317033133181, "grad_norm": 0.5508988572074687, "learning_rate": 8.71593217006367e-06, "loss": 0.1466, "step": 8900 }, { "epoch": 0.7697956715192881, "grad_norm": 0.5884231126943428, "learning_rate": 8.712810021569153e-06, "loss": 0.1509, "step": 8910 }, { "epoch": 0.7706596397252581, "grad_norm": 0.5702397352103349, "learning_rate": 8.709684642659952e-06, "loss": 0.15, "step": 8920 }, { "epoch": 0.7715236079312281, "grad_norm": 0.5646379676204086, "learning_rate": 8.706556036055372e-06, "loss": 0.1448, "step": 8930 }, { "epoch": 0.7723875761371981, "grad_norm": 0.5908474945066845, "learning_rate": 8.703424204477527e-06, "loss": 0.1484, "step": 8940 }, { "epoch": 0.7732515443431681, "grad_norm": 0.5582469316100711, "learning_rate": 8.700289150651342e-06, "loss": 0.1452, "step": 8950 }, { "epoch": 0.7741155125491382, "grad_norm": 0.5758318223072699, "learning_rate": 8.69715087730454e-06, "loss": 0.1439, "step": 8960 }, { "epoch": 0.7749794807551083, "grad_norm": 0.5851791361562063, "learning_rate": 8.694009387167643e-06, "loss": 0.1461, "step": 8970 }, { "epoch": 0.7758434489610783, "grad_norm": 0.55539409751672, "learning_rate": 8.690864682973983e-06, "loss": 0.1429, "step": 8980 }, { "epoch": 0.7767074171670483, "grad_norm": 0.5972513387417647, "learning_rate": 8.687716767459677e-06, "loss": 0.1438, "step": 8990 }, { "epoch": 0.7775713853730183, "grad_norm": 0.5449750895753459, "learning_rate": 8.68456564336364e-06, "loss": 0.1423, "step": 9000 }, { "epoch": 0.7784353535789883, "grad_norm": 0.5629247273553248, "learning_rate": 8.681411313427584e-06, "loss": 0.1406, "step": 9010 }, { "epoch": 0.7792993217849583, "grad_norm": 0.5815650093028251, "learning_rate": 8.678253780395997e-06, "loss": 0.1498, "step": 9020 }, { "epoch": 0.7801632899909283, "grad_norm": 0.6061694848721607, "learning_rate": 8.67509304701617e-06, "loss": 0.147, "step": 9030 }, { "epoch": 0.7810272581968983, "grad_norm": 0.6055037881598447, "learning_rate": 8.671929116038167e-06, "loss": 0.1468, "step": 9040 }, { "epoch": 0.7818912264028683, "grad_norm": 0.5850345947610444, "learning_rate": 8.66876199021484e-06, "loss": 0.1484, "step": 9050 }, { "epoch": 0.7827551946088384, "grad_norm": 0.5542978640162831, "learning_rate": 8.665591672301816e-06, "loss": 0.1426, "step": 9060 }, { "epoch": 0.7836191628148084, "grad_norm": 0.5928333489110242, "learning_rate": 8.662418165057507e-06, "loss": 0.1429, "step": 9070 }, { "epoch": 0.7844831310207785, "grad_norm": 0.5648483381701337, "learning_rate": 8.659241471243088e-06, "loss": 0.1445, "step": 9080 }, { "epoch": 0.7853470992267485, "grad_norm": 0.6297067607299209, "learning_rate": 8.656061593622521e-06, "loss": 0.1486, "step": 9090 }, { "epoch": 0.7862110674327185, "grad_norm": 0.5783598666839724, "learning_rate": 8.652878534962523e-06, "loss": 0.1449, "step": 9100 }, { "epoch": 0.7870750356386885, "grad_norm": 0.5808145834168469, "learning_rate": 8.649692298032594e-06, "loss": 0.1452, "step": 9110 }, { "epoch": 0.7879390038446585, "grad_norm": 0.6257064540825436, "learning_rate": 8.646502885604988e-06, "loss": 0.1439, "step": 9120 }, { "epoch": 0.7888029720506285, "grad_norm": 0.5958553918867221, "learning_rate": 8.643310300454726e-06, "loss": 0.1498, "step": 9130 }, { "epoch": 0.7896669402565986, "grad_norm": 0.5718667220315514, "learning_rate": 8.640114545359589e-06, "loss": 0.1423, "step": 9140 }, { "epoch": 0.7905309084625686, "grad_norm": 0.5828008232292384, "learning_rate": 8.636915623100116e-06, "loss": 0.1427, "step": 9150 }, { "epoch": 0.7913948766685386, "grad_norm": 0.6078309638455678, "learning_rate": 8.633713536459603e-06, "loss": 0.1493, "step": 9160 }, { "epoch": 0.7922588448745086, "grad_norm": 0.5805499311435128, "learning_rate": 8.6305082882241e-06, "loss": 0.1452, "step": 9170 }, { "epoch": 0.7931228130804786, "grad_norm": 0.5523180415389615, "learning_rate": 8.627299881182402e-06, "loss": 0.1484, "step": 9180 }, { "epoch": 0.7939867812864486, "grad_norm": 0.5731511610127191, "learning_rate": 8.624088318126057e-06, "loss": 0.1472, "step": 9190 }, { "epoch": 0.7948507494924186, "grad_norm": 0.5711686241765782, "learning_rate": 8.620873601849362e-06, "loss": 0.1441, "step": 9200 }, { "epoch": 0.7957147176983888, "grad_norm": 0.5559112922512367, "learning_rate": 8.617655735149354e-06, "loss": 0.15, "step": 9210 }, { "epoch": 0.7965786859043588, "grad_norm": 0.5942008849380511, "learning_rate": 8.614434720825805e-06, "loss": 0.1453, "step": 9220 }, { "epoch": 0.7974426541103288, "grad_norm": 0.5826039170890981, "learning_rate": 8.611210561681236e-06, "loss": 0.147, "step": 9230 }, { "epoch": 0.7983066223162988, "grad_norm": 0.5809325892616363, "learning_rate": 8.6079832605209e-06, "loss": 0.1439, "step": 9240 }, { "epoch": 0.7991705905222688, "grad_norm": 0.551222305517412, "learning_rate": 8.604752820152782e-06, "loss": 0.1451, "step": 9250 }, { "epoch": 0.8000345587282388, "grad_norm": 0.569466216351549, "learning_rate": 8.601519243387602e-06, "loss": 0.1408, "step": 9260 }, { "epoch": 0.8008985269342088, "grad_norm": 0.5632939536324367, "learning_rate": 8.598282533038804e-06, "loss": 0.1414, "step": 9270 }, { "epoch": 0.8017624951401788, "grad_norm": 0.5945112711943039, "learning_rate": 8.595042691922564e-06, "loss": 0.149, "step": 9280 }, { "epoch": 0.8026264633461488, "grad_norm": 0.6257070347054505, "learning_rate": 8.591799722857779e-06, "loss": 0.1468, "step": 9290 }, { "epoch": 0.8034904315521189, "grad_norm": 0.5502878627500125, "learning_rate": 8.588553628666067e-06, "loss": 0.1459, "step": 9300 }, { "epoch": 0.8043543997580889, "grad_norm": 0.5542111041310896, "learning_rate": 8.585304412171767e-06, "loss": 0.143, "step": 9310 }, { "epoch": 0.8052183679640589, "grad_norm": 0.5326126029738462, "learning_rate": 8.582052076201932e-06, "loss": 0.1423, "step": 9320 }, { "epoch": 0.8060823361700289, "grad_norm": 0.5885736349948085, "learning_rate": 8.578796623586332e-06, "loss": 0.1447, "step": 9330 }, { "epoch": 0.806946304375999, "grad_norm": 0.584316686720796, "learning_rate": 8.575538057157448e-06, "loss": 0.1448, "step": 9340 }, { "epoch": 0.807810272581969, "grad_norm": 0.5664241111477385, "learning_rate": 8.572276379750468e-06, "loss": 0.145, "step": 9350 }, { "epoch": 0.808674240787939, "grad_norm": 0.5993574255434805, "learning_rate": 8.569011594203291e-06, "loss": 0.1458, "step": 9360 }, { "epoch": 0.809538208993909, "grad_norm": 0.5888740832156935, "learning_rate": 8.565743703356514e-06, "loss": 0.1422, "step": 9370 }, { "epoch": 0.8104021771998791, "grad_norm": 0.5360589804242816, "learning_rate": 8.562472710053444e-06, "loss": 0.1405, "step": 9380 }, { "epoch": 0.8112661454058491, "grad_norm": 0.576297123132208, "learning_rate": 8.559198617140081e-06, "loss": 0.1421, "step": 9390 }, { "epoch": 0.8121301136118191, "grad_norm": 0.589692619006291, "learning_rate": 8.555921427465124e-06, "loss": 0.1462, "step": 9400 }, { "epoch": 0.8129940818177891, "grad_norm": 0.586639880883335, "learning_rate": 8.552641143879965e-06, "loss": 0.1418, "step": 9410 }, { "epoch": 0.8138580500237591, "grad_norm": 0.5728715638136743, "learning_rate": 8.549357769238689e-06, "loss": 0.1482, "step": 9420 }, { "epoch": 0.8147220182297291, "grad_norm": 0.5577477890301851, "learning_rate": 8.546071306398075e-06, "loss": 0.142, "step": 9430 }, { "epoch": 0.8155859864356991, "grad_norm": 0.5773828818872149, "learning_rate": 8.54278175821758e-06, "loss": 0.1462, "step": 9440 }, { "epoch": 0.8164499546416691, "grad_norm": 0.6148625238176575, "learning_rate": 8.539489127559352e-06, "loss": 0.1439, "step": 9450 }, { "epoch": 0.8173139228476392, "grad_norm": 0.5809787205443636, "learning_rate": 8.53619341728822e-06, "loss": 0.1446, "step": 9460 }, { "epoch": 0.8181778910536093, "grad_norm": 0.5673805345122123, "learning_rate": 8.532894630271691e-06, "loss": 0.1464, "step": 9470 }, { "epoch": 0.8190418592595793, "grad_norm": 0.5714831001212896, "learning_rate": 8.529592769379947e-06, "loss": 0.1467, "step": 9480 }, { "epoch": 0.8199058274655493, "grad_norm": 0.8142466454645951, "learning_rate": 8.52628783748585e-06, "loss": 0.146, "step": 9490 }, { "epoch": 0.8207697956715193, "grad_norm": 0.5736789389590432, "learning_rate": 8.52297983746493e-06, "loss": 0.1467, "step": 9500 }, { "epoch": 0.8216337638774893, "grad_norm": 0.5758982922754664, "learning_rate": 8.519668772195389e-06, "loss": 0.1437, "step": 9510 }, { "epoch": 0.8224977320834593, "grad_norm": 0.5451983109559408, "learning_rate": 8.516354644558091e-06, "loss": 0.1428, "step": 9520 }, { "epoch": 0.8233617002894293, "grad_norm": 0.5781843496988237, "learning_rate": 8.513037457436571e-06, "loss": 0.1421, "step": 9530 }, { "epoch": 0.8242256684953994, "grad_norm": 0.5660860358702046, "learning_rate": 8.509717213717023e-06, "loss": 0.1466, "step": 9540 }, { "epoch": 0.8250896367013694, "grad_norm": 0.5875923545017205, "learning_rate": 8.5063939162883e-06, "loss": 0.1471, "step": 9550 }, { "epoch": 0.8259536049073394, "grad_norm": 0.5793638698834187, "learning_rate": 8.503067568041912e-06, "loss": 0.1435, "step": 9560 }, { "epoch": 0.8268175731133094, "grad_norm": 0.6082894408541321, "learning_rate": 8.499738171872024e-06, "loss": 0.1476, "step": 9570 }, { "epoch": 0.8276815413192794, "grad_norm": 0.5410052725196663, "learning_rate": 8.496405730675453e-06, "loss": 0.1413, "step": 9580 }, { "epoch": 0.8285455095252495, "grad_norm": 0.5668481687999716, "learning_rate": 8.493070247351665e-06, "loss": 0.1428, "step": 9590 }, { "epoch": 0.8294094777312195, "grad_norm": 0.5396188203926512, "learning_rate": 8.48973172480277e-06, "loss": 0.1452, "step": 9600 }, { "epoch": 0.8302734459371895, "grad_norm": 0.5944775835927977, "learning_rate": 8.48639016593353e-06, "loss": 0.1416, "step": 9610 }, { "epoch": 0.8311374141431596, "grad_norm": 0.5707292481605472, "learning_rate": 8.483045573651345e-06, "loss": 0.1443, "step": 9620 }, { "epoch": 0.8320013823491296, "grad_norm": 0.5596948995294987, "learning_rate": 8.479697950866249e-06, "loss": 0.1414, "step": 9630 }, { "epoch": 0.8328653505550996, "grad_norm": 0.5789700110781804, "learning_rate": 8.476347300490919e-06, "loss": 0.1428, "step": 9640 }, { "epoch": 0.8337293187610696, "grad_norm": 0.5816059812515278, "learning_rate": 8.472993625440666e-06, "loss": 0.1503, "step": 9650 }, { "epoch": 0.8345932869670396, "grad_norm": 0.5583523771021892, "learning_rate": 8.469636928633426e-06, "loss": 0.1464, "step": 9660 }, { "epoch": 0.8354572551730096, "grad_norm": 0.5783974918171014, "learning_rate": 8.466277212989778e-06, "loss": 0.1421, "step": 9670 }, { "epoch": 0.8363212233789796, "grad_norm": 0.5637539984292849, "learning_rate": 8.462914481432912e-06, "loss": 0.1407, "step": 9680 }, { "epoch": 0.8371851915849496, "grad_norm": 0.5469313058157097, "learning_rate": 8.459548736888651e-06, "loss": 0.1441, "step": 9690 }, { "epoch": 0.8380491597909197, "grad_norm": 0.5781885951173833, "learning_rate": 8.456179982285437e-06, "loss": 0.1423, "step": 9700 }, { "epoch": 0.8389131279968897, "grad_norm": 0.5929979133896947, "learning_rate": 8.452808220554332e-06, "loss": 0.1419, "step": 9710 }, { "epoch": 0.8397770962028598, "grad_norm": 0.5688459632744697, "learning_rate": 8.449433454629015e-06, "loss": 0.1462, "step": 9720 }, { "epoch": 0.8406410644088298, "grad_norm": 0.5603455231856802, "learning_rate": 8.446055687445774e-06, "loss": 0.1448, "step": 9730 }, { "epoch": 0.8415050326147998, "grad_norm": 0.5859270782944502, "learning_rate": 8.442674921943516e-06, "loss": 0.142, "step": 9740 }, { "epoch": 0.8423690008207698, "grad_norm": 0.5798335827513441, "learning_rate": 8.439291161063751e-06, "loss": 0.141, "step": 9750 }, { "epoch": 0.8432329690267398, "grad_norm": 0.5565217504939, "learning_rate": 8.4359044077506e-06, "loss": 0.1428, "step": 9760 }, { "epoch": 0.8440969372327098, "grad_norm": 0.6040073775552014, "learning_rate": 8.43251466495078e-06, "loss": 0.1396, "step": 9770 }, { "epoch": 0.8449609054386799, "grad_norm": 0.5687283030783332, "learning_rate": 8.429121935613614e-06, "loss": 0.1396, "step": 9780 }, { "epoch": 0.8458248736446499, "grad_norm": 0.5425553417122125, "learning_rate": 8.425726222691027e-06, "loss": 0.1434, "step": 9790 }, { "epoch": 0.8466888418506199, "grad_norm": 0.5518060633249242, "learning_rate": 8.422327529137534e-06, "loss": 0.1459, "step": 9800 }, { "epoch": 0.8475528100565899, "grad_norm": 0.5433804487253319, "learning_rate": 8.418925857910245e-06, "loss": 0.1408, "step": 9810 }, { "epoch": 0.8484167782625599, "grad_norm": 0.5442315827085944, "learning_rate": 8.415521211968862e-06, "loss": 0.1477, "step": 9820 }, { "epoch": 0.8492807464685299, "grad_norm": 0.5521307095439536, "learning_rate": 8.412113594275676e-06, "loss": 0.1432, "step": 9830 }, { "epoch": 0.8501447146744999, "grad_norm": 0.5785067887210904, "learning_rate": 8.408703007795559e-06, "loss": 0.1442, "step": 9840 }, { "epoch": 0.85100868288047, "grad_norm": 0.5888646780531525, "learning_rate": 8.405289455495971e-06, "loss": 0.1432, "step": 9850 }, { "epoch": 0.8518726510864401, "grad_norm": 0.5869356385591026, "learning_rate": 8.401872940346952e-06, "loss": 0.1435, "step": 9860 }, { "epoch": 0.8527366192924101, "grad_norm": 0.5647084533037493, "learning_rate": 8.398453465321115e-06, "loss": 0.1413, "step": 9870 }, { "epoch": 0.8536005874983801, "grad_norm": 0.5805929245861594, "learning_rate": 8.395031033393655e-06, "loss": 0.1471, "step": 9880 }, { "epoch": 0.8544645557043501, "grad_norm": 0.5818692090965515, "learning_rate": 8.391605647542336e-06, "loss": 0.1415, "step": 9890 }, { "epoch": 0.8553285239103201, "grad_norm": 0.5623579419758806, "learning_rate": 8.388177310747494e-06, "loss": 0.1445, "step": 9900 }, { "epoch": 0.8561924921162901, "grad_norm": 0.5842301010806707, "learning_rate": 8.384746025992026e-06, "loss": 0.1388, "step": 9910 }, { "epoch": 0.8570564603222601, "grad_norm": 0.552275823798508, "learning_rate": 8.381311796261407e-06, "loss": 0.1454, "step": 9920 }, { "epoch": 0.8579204285282301, "grad_norm": 0.5814597997265838, "learning_rate": 8.37787462454366e-06, "loss": 0.1426, "step": 9930 }, { "epoch": 0.8587843967342002, "grad_norm": 0.5391815636893464, "learning_rate": 8.374434513829377e-06, "loss": 0.1425, "step": 9940 }, { "epoch": 0.8596483649401702, "grad_norm": 0.5909735682466266, "learning_rate": 8.370991467111705e-06, "loss": 0.1457, "step": 9950 }, { "epoch": 0.8605123331461402, "grad_norm": 0.5908399739792674, "learning_rate": 8.367545487386345e-06, "loss": 0.1423, "step": 9960 }, { "epoch": 0.8613763013521103, "grad_norm": 0.5702025368686541, "learning_rate": 8.36409657765155e-06, "loss": 0.1407, "step": 9970 }, { "epoch": 0.8622402695580803, "grad_norm": 0.616393750494224, "learning_rate": 8.36064474090812e-06, "loss": 0.1454, "step": 9980 }, { "epoch": 0.8631042377640503, "grad_norm": 0.5793933000509822, "learning_rate": 8.357189980159405e-06, "loss": 0.142, "step": 9990 }, { "epoch": 0.8639682059700203, "grad_norm": 0.6120735127654011, "learning_rate": 8.353732298411298e-06, "loss": 0.143, "step": 10000 }, { "epoch": 0.8648321741759903, "grad_norm": 0.5672142424827209, "learning_rate": 8.350271698672236e-06, "loss": 0.1465, "step": 10010 }, { "epoch": 0.8656961423819604, "grad_norm": 0.5902729900228314, "learning_rate": 8.34680818395319e-06, "loss": 0.1435, "step": 10020 }, { "epoch": 0.8665601105879304, "grad_norm": 0.5691844420715534, "learning_rate": 8.343341757267671e-06, "loss": 0.1439, "step": 10030 }, { "epoch": 0.8674240787939004, "grad_norm": 0.5525245356489779, "learning_rate": 8.339872421631718e-06, "loss": 0.1421, "step": 10040 }, { "epoch": 0.8682880469998704, "grad_norm": 0.5633902551254112, "learning_rate": 8.336400180063913e-06, "loss": 0.1405, "step": 10050 }, { "epoch": 0.8691520152058404, "grad_norm": 0.5522452533333609, "learning_rate": 8.332925035585351e-06, "loss": 0.1406, "step": 10060 }, { "epoch": 0.8700159834118104, "grad_norm": 0.5683843284307902, "learning_rate": 8.329446991219665e-06, "loss": 0.1414, "step": 10070 }, { "epoch": 0.8708799516177804, "grad_norm": 0.8877940351300201, "learning_rate": 8.325966049993004e-06, "loss": 0.1443, "step": 10080 }, { "epoch": 0.8717439198237504, "grad_norm": 0.6050744894035649, "learning_rate": 8.322482214934044e-06, "loss": 0.1441, "step": 10090 }, { "epoch": 0.8726078880297206, "grad_norm": 0.5650088110025322, "learning_rate": 8.318995489073968e-06, "loss": 0.1416, "step": 10100 }, { "epoch": 0.8734718562356906, "grad_norm": 0.5571218501034207, "learning_rate": 8.315505875446489e-06, "loss": 0.1452, "step": 10110 }, { "epoch": 0.8743358244416606, "grad_norm": 0.531945157846496, "learning_rate": 8.31201337708782e-06, "loss": 0.1413, "step": 10120 }, { "epoch": 0.8751997926476306, "grad_norm": 0.5793899013617272, "learning_rate": 8.308517997036687e-06, "loss": 0.1419, "step": 10130 }, { "epoch": 0.8760637608536006, "grad_norm": 0.5522987674972991, "learning_rate": 8.305019738334328e-06, "loss": 0.1429, "step": 10140 }, { "epoch": 0.8769277290595706, "grad_norm": 0.599533080514142, "learning_rate": 8.301518604024481e-06, "loss": 0.1483, "step": 10150 }, { "epoch": 0.8777916972655406, "grad_norm": 0.5820611165673861, "learning_rate": 8.298014597153387e-06, "loss": 0.1449, "step": 10160 }, { "epoch": 0.8786556654715106, "grad_norm": 0.552105008952654, "learning_rate": 8.294507720769789e-06, "loss": 0.1422, "step": 10170 }, { "epoch": 0.8795196336774807, "grad_norm": 0.589431986283032, "learning_rate": 8.290997977924922e-06, "loss": 0.142, "step": 10180 }, { "epoch": 0.8803836018834507, "grad_norm": 0.5912403064747072, "learning_rate": 8.287485371672518e-06, "loss": 0.1455, "step": 10190 }, { "epoch": 0.8812475700894207, "grad_norm": 0.5835849936868334, "learning_rate": 8.283969905068803e-06, "loss": 0.141, "step": 10200 }, { "epoch": 0.8821115382953907, "grad_norm": 0.5560030512889691, "learning_rate": 8.280451581172484e-06, "loss": 0.1424, "step": 10210 }, { "epoch": 0.8829755065013607, "grad_norm": 0.5646674155359306, "learning_rate": 8.276930403044759e-06, "loss": 0.1457, "step": 10220 }, { "epoch": 0.8838394747073308, "grad_norm": 0.5737466726861257, "learning_rate": 8.273406373749313e-06, "loss": 0.1441, "step": 10230 }, { "epoch": 0.8847034429133008, "grad_norm": 0.5523276034253872, "learning_rate": 8.269879496352304e-06, "loss": 0.1405, "step": 10240 }, { "epoch": 0.8855674111192708, "grad_norm": 0.5534688383692349, "learning_rate": 8.266349773922372e-06, "loss": 0.137, "step": 10250 }, { "epoch": 0.8864313793252409, "grad_norm": 0.601059059729315, "learning_rate": 8.262817209530636e-06, "loss": 0.1398, "step": 10260 }, { "epoch": 0.8872953475312109, "grad_norm": 0.548502422051078, "learning_rate": 8.259281806250678e-06, "loss": 0.1411, "step": 10270 }, { "epoch": 0.8881593157371809, "grad_norm": 0.5333366385953059, "learning_rate": 8.255743567158561e-06, "loss": 0.142, "step": 10280 }, { "epoch": 0.8890232839431509, "grad_norm": 0.5154244022617628, "learning_rate": 8.252202495332808e-06, "loss": 0.1385, "step": 10290 }, { "epoch": 0.8898872521491209, "grad_norm": 0.5613558968123625, "learning_rate": 8.248658593854408e-06, "loss": 0.138, "step": 10300 }, { "epoch": 0.8907512203550909, "grad_norm": 0.5449671665400901, "learning_rate": 8.245111865806816e-06, "loss": 0.1427, "step": 10310 }, { "epoch": 0.8916151885610609, "grad_norm": 0.6077993286632034, "learning_rate": 8.24156231427594e-06, "loss": 0.1397, "step": 10320 }, { "epoch": 0.8924791567670309, "grad_norm": 0.5669974577554227, "learning_rate": 8.23800994235015e-06, "loss": 0.1409, "step": 10330 }, { "epoch": 0.893343124973001, "grad_norm": 0.553947870504879, "learning_rate": 8.234454753120268e-06, "loss": 0.1427, "step": 10340 }, { "epoch": 0.8942070931789711, "grad_norm": 0.5480355609367276, "learning_rate": 8.230896749679566e-06, "loss": 0.1445, "step": 10350 }, { "epoch": 0.8950710613849411, "grad_norm": 0.567683345822826, "learning_rate": 8.227335935123766e-06, "loss": 0.1417, "step": 10360 }, { "epoch": 0.8959350295909111, "grad_norm": 0.5923492791517332, "learning_rate": 8.223772312551035e-06, "loss": 0.1452, "step": 10370 }, { "epoch": 0.8967989977968811, "grad_norm": 0.559847173910797, "learning_rate": 8.220205885061986e-06, "loss": 0.142, "step": 10380 }, { "epoch": 0.8976629660028511, "grad_norm": 0.52678466638974, "learning_rate": 8.216636655759666e-06, "loss": 0.1388, "step": 10390 }, { "epoch": 0.8985269342088211, "grad_norm": 0.5679754532984375, "learning_rate": 8.213064627749567e-06, "loss": 0.1431, "step": 10400 }, { "epoch": 0.8993909024147911, "grad_norm": 0.535769991144621, "learning_rate": 8.209489804139614e-06, "loss": 0.142, "step": 10410 }, { "epoch": 0.9002548706207611, "grad_norm": 0.5849730580217068, "learning_rate": 8.205912188040164e-06, "loss": 0.1458, "step": 10420 }, { "epoch": 0.9011188388267312, "grad_norm": 0.5895526919570224, "learning_rate": 8.202331782564e-06, "loss": 0.1412, "step": 10430 }, { "epoch": 0.9019828070327012, "grad_norm": 0.5626343062982044, "learning_rate": 8.198748590826336e-06, "loss": 0.1417, "step": 10440 }, { "epoch": 0.9028467752386712, "grad_norm": 0.5514554215350627, "learning_rate": 8.195162615944809e-06, "loss": 0.1388, "step": 10450 }, { "epoch": 0.9037107434446412, "grad_norm": 0.596576561211133, "learning_rate": 8.191573861039481e-06, "loss": 0.138, "step": 10460 }, { "epoch": 0.9045747116506112, "grad_norm": 0.5608018302461737, "learning_rate": 8.187982329232826e-06, "loss": 0.1406, "step": 10470 }, { "epoch": 0.9054386798565813, "grad_norm": 0.5922211818250301, "learning_rate": 8.18438802364974e-06, "loss": 0.1407, "step": 10480 }, { "epoch": 0.9063026480625513, "grad_norm": 0.5626611609158868, "learning_rate": 8.18079094741753e-06, "loss": 0.1403, "step": 10490 }, { "epoch": 0.9071666162685214, "grad_norm": 0.55739369913719, "learning_rate": 8.177191103665912e-06, "loss": 0.1428, "step": 10500 }, { "epoch": 0.9080305844744914, "grad_norm": 0.5589210208749367, "learning_rate": 8.173588495527013e-06, "loss": 0.1444, "step": 10510 }, { "epoch": 0.9088945526804614, "grad_norm": 0.5948997818661523, "learning_rate": 8.169983126135366e-06, "loss": 0.1406, "step": 10520 }, { "epoch": 0.9097585208864314, "grad_norm": 0.5637575295315885, "learning_rate": 8.166374998627903e-06, "loss": 0.1419, "step": 10530 }, { "epoch": 0.9106224890924014, "grad_norm": 0.5520322935532269, "learning_rate": 8.162764116143956e-06, "loss": 0.1428, "step": 10540 }, { "epoch": 0.9114864572983714, "grad_norm": 0.5476065819654563, "learning_rate": 8.159150481825256e-06, "loss": 0.1445, "step": 10550 }, { "epoch": 0.9123504255043414, "grad_norm": 0.5607845386057297, "learning_rate": 8.155534098815929e-06, "loss": 0.1409, "step": 10560 }, { "epoch": 0.9132143937103114, "grad_norm": 0.5895145175268459, "learning_rate": 8.15191497026249e-06, "loss": 0.1392, "step": 10570 }, { "epoch": 0.9140783619162814, "grad_norm": 0.5894062695205331, "learning_rate": 8.148293099313843e-06, "loss": 0.1353, "step": 10580 }, { "epoch": 0.9149423301222515, "grad_norm": 0.5743939777288788, "learning_rate": 8.14466848912128e-06, "loss": 0.1382, "step": 10590 }, { "epoch": 0.9158062983282215, "grad_norm": 0.5738967722486941, "learning_rate": 8.141041142838475e-06, "loss": 0.1437, "step": 10600 }, { "epoch": 0.9166702665341916, "grad_norm": 0.5508385159709146, "learning_rate": 8.137411063621488e-06, "loss": 0.1395, "step": 10610 }, { "epoch": 0.9175342347401616, "grad_norm": 0.5549344070840297, "learning_rate": 8.133778254628744e-06, "loss": 0.1391, "step": 10620 }, { "epoch": 0.9183982029461316, "grad_norm": 0.5557337487994121, "learning_rate": 8.130142719021055e-06, "loss": 0.1428, "step": 10630 }, { "epoch": 0.9192621711521016, "grad_norm": 0.5335590128180864, "learning_rate": 8.126504459961601e-06, "loss": 0.1435, "step": 10640 }, { "epoch": 0.9201261393580716, "grad_norm": 0.5618082432643633, "learning_rate": 8.122863480615932e-06, "loss": 0.1434, "step": 10650 }, { "epoch": 0.9209901075640416, "grad_norm": 0.5483727536324223, "learning_rate": 8.119219784151964e-06, "loss": 0.1407, "step": 10660 }, { "epoch": 0.9218540757700117, "grad_norm": 0.5225716204079307, "learning_rate": 8.11557337373998e-06, "loss": 0.1426, "step": 10670 }, { "epoch": 0.9227180439759817, "grad_norm": 0.5802251936874245, "learning_rate": 8.11192425255262e-06, "loss": 0.1414, "step": 10680 }, { "epoch": 0.9235820121819517, "grad_norm": 0.549636716436263, "learning_rate": 8.108272423764883e-06, "loss": 0.1439, "step": 10690 }, { "epoch": 0.9244459803879217, "grad_norm": 0.5654550374218119, "learning_rate": 8.104617890554129e-06, "loss": 0.1427, "step": 10700 }, { "epoch": 0.9253099485938917, "grad_norm": 0.5673693824798013, "learning_rate": 8.100960656100069e-06, "loss": 0.1429, "step": 10710 }, { "epoch": 0.9261739167998617, "grad_norm": 0.5132529576114598, "learning_rate": 8.097300723584757e-06, "loss": 0.1377, "step": 10720 }, { "epoch": 0.9270378850058317, "grad_norm": 0.5334736844206937, "learning_rate": 8.093638096192606e-06, "loss": 0.1405, "step": 10730 }, { "epoch": 0.9279018532118019, "grad_norm": 0.5510539851521975, "learning_rate": 8.089972777110366e-06, "loss": 0.1383, "step": 10740 }, { "epoch": 0.9287658214177719, "grad_norm": 0.543077012923459, "learning_rate": 8.08630476952713e-06, "loss": 0.1408, "step": 10750 }, { "epoch": 0.9296297896237419, "grad_norm": 0.5386576487008168, "learning_rate": 8.082634076634334e-06, "loss": 0.142, "step": 10760 }, { "epoch": 0.9304937578297119, "grad_norm": 0.5565406495408177, "learning_rate": 8.078960701625746e-06, "loss": 0.1373, "step": 10770 }, { "epoch": 0.9313577260356819, "grad_norm": 0.5948123321042771, "learning_rate": 8.07528464769747e-06, "loss": 0.1382, "step": 10780 }, { "epoch": 0.9322216942416519, "grad_norm": 0.5446315002732925, "learning_rate": 8.071605918047938e-06, "loss": 0.1379, "step": 10790 }, { "epoch": 0.9330856624476219, "grad_norm": 0.5539597975139731, "learning_rate": 8.067924515877914e-06, "loss": 0.138, "step": 10800 }, { "epoch": 0.9339496306535919, "grad_norm": 0.5552871380668192, "learning_rate": 8.064240444390487e-06, "loss": 0.1402, "step": 10810 }, { "epoch": 0.934813598859562, "grad_norm": 0.5673123265243116, "learning_rate": 8.060553706791066e-06, "loss": 0.1431, "step": 10820 }, { "epoch": 0.935677567065532, "grad_norm": 0.5909182056894116, "learning_rate": 8.05686430628738e-06, "loss": 0.1429, "step": 10830 }, { "epoch": 0.936541535271502, "grad_norm": 0.5504559786897197, "learning_rate": 8.053172246089476e-06, "loss": 0.1454, "step": 10840 }, { "epoch": 0.937405503477472, "grad_norm": 0.5806631353235165, "learning_rate": 8.049477529409712e-06, "loss": 0.1472, "step": 10850 }, { "epoch": 0.9382694716834421, "grad_norm": 0.5709906117492967, "learning_rate": 8.045780159462769e-06, "loss": 0.1392, "step": 10860 }, { "epoch": 0.9391334398894121, "grad_norm": 0.5940409325929809, "learning_rate": 8.042080139465617e-06, "loss": 0.1371, "step": 10870 }, { "epoch": 0.9399974080953821, "grad_norm": 0.5644194425180563, "learning_rate": 8.03837747263755e-06, "loss": 0.1401, "step": 10880 }, { "epoch": 0.9408613763013521, "grad_norm": 0.5513413048942039, "learning_rate": 8.034672162200153e-06, "loss": 0.1433, "step": 10890 }, { "epoch": 0.9417253445073221, "grad_norm": 0.5451574269752696, "learning_rate": 8.030964211377317e-06, "loss": 0.1409, "step": 10900 }, { "epoch": 0.9425893127132922, "grad_norm": 0.5651146180037745, "learning_rate": 8.027253623395231e-06, "loss": 0.1382, "step": 10910 }, { "epoch": 0.9434532809192622, "grad_norm": 0.5398822960393462, "learning_rate": 8.023540401482373e-06, "loss": 0.1431, "step": 10920 }, { "epoch": 0.9443172491252322, "grad_norm": 0.5437530423433509, "learning_rate": 8.019824548869518e-06, "loss": 0.1425, "step": 10930 }, { "epoch": 0.9451812173312022, "grad_norm": 0.5546789213874745, "learning_rate": 8.016106068789727e-06, "loss": 0.1373, "step": 10940 }, { "epoch": 0.9460451855371722, "grad_norm": 0.5471020932209029, "learning_rate": 8.012384964478346e-06, "loss": 0.138, "step": 10950 }, { "epoch": 0.9469091537431422, "grad_norm": 0.5697798890296741, "learning_rate": 8.008661239173012e-06, "loss": 0.1407, "step": 10960 }, { "epoch": 0.9477731219491122, "grad_norm": 0.5575286625262722, "learning_rate": 8.004934896113633e-06, "loss": 0.1397, "step": 10970 }, { "epoch": 0.9486370901550822, "grad_norm": 0.5265828755856463, "learning_rate": 8.001205938542398e-06, "loss": 0.1424, "step": 10980 }, { "epoch": 0.9495010583610524, "grad_norm": 0.551199244101437, "learning_rate": 7.997474369703772e-06, "loss": 0.1419, "step": 10990 }, { "epoch": 0.9503650265670224, "grad_norm": 0.5545419227985952, "learning_rate": 7.993740192844493e-06, "loss": 0.1408, "step": 11000 }, { "epoch": 0.9512289947729924, "grad_norm": 0.5783342185948568, "learning_rate": 7.990003411213562e-06, "loss": 0.1387, "step": 11010 }, { "epoch": 0.9520929629789624, "grad_norm": 0.5520321411170842, "learning_rate": 7.986264028062256e-06, "loss": 0.1414, "step": 11020 }, { "epoch": 0.9529569311849324, "grad_norm": 0.5376112537460771, "learning_rate": 7.982522046644106e-06, "loss": 0.1407, "step": 11030 }, { "epoch": 0.9538208993909024, "grad_norm": 0.5973209607733642, "learning_rate": 7.97877747021491e-06, "loss": 0.1381, "step": 11040 }, { "epoch": 0.9546848675968724, "grad_norm": 0.5722729711850041, "learning_rate": 7.975030302032722e-06, "loss": 0.143, "step": 11050 }, { "epoch": 0.9555488358028424, "grad_norm": 0.5505304492880758, "learning_rate": 7.971280545357851e-06, "loss": 0.1429, "step": 11060 }, { "epoch": 0.9564128040088125, "grad_norm": 0.5430702520493753, "learning_rate": 7.967528203452856e-06, "loss": 0.1377, "step": 11070 }, { "epoch": 0.9572767722147825, "grad_norm": 0.5379484526659242, "learning_rate": 7.963773279582548e-06, "loss": 0.1425, "step": 11080 }, { "epoch": 0.9581407404207525, "grad_norm": 0.59660987229937, "learning_rate": 7.960015777013984e-06, "loss": 0.1404, "step": 11090 }, { "epoch": 0.9590047086267225, "grad_norm": 0.5421623266199536, "learning_rate": 7.956255699016466e-06, "loss": 0.1424, "step": 11100 }, { "epoch": 0.9598686768326925, "grad_norm": 0.5322756578697195, "learning_rate": 7.952493048861534e-06, "loss": 0.1396, "step": 11110 }, { "epoch": 0.9607326450386626, "grad_norm": 0.6187852901428197, "learning_rate": 7.948727829822967e-06, "loss": 0.141, "step": 11120 }, { "epoch": 0.9615966132446326, "grad_norm": 0.6065087922615199, "learning_rate": 7.94496004517678e-06, "loss": 0.1411, "step": 11130 }, { "epoch": 0.9624605814506026, "grad_norm": 0.5659172316900728, "learning_rate": 7.941189698201218e-06, "loss": 0.1396, "step": 11140 }, { "epoch": 0.9633245496565727, "grad_norm": 0.5317942345273444, "learning_rate": 7.937416792176758e-06, "loss": 0.141, "step": 11150 }, { "epoch": 0.9641885178625427, "grad_norm": 0.554066543556405, "learning_rate": 7.933641330386104e-06, "loss": 0.1397, "step": 11160 }, { "epoch": 0.9650524860685127, "grad_norm": 0.5254678736887252, "learning_rate": 7.929863316114179e-06, "loss": 0.1388, "step": 11170 }, { "epoch": 0.9659164542744827, "grad_norm": 0.5302754296193825, "learning_rate": 7.926082752648135e-06, "loss": 0.1444, "step": 11180 }, { "epoch": 0.9667804224804527, "grad_norm": 0.5698851699270783, "learning_rate": 7.922299643277331e-06, "loss": 0.1409, "step": 11190 }, { "epoch": 0.9676443906864227, "grad_norm": 0.5464315412902958, "learning_rate": 7.918513991293352e-06, "loss": 0.1414, "step": 11200 }, { "epoch": 0.9685083588923927, "grad_norm": 0.5599688882235971, "learning_rate": 7.91472579998999e-06, "loss": 0.1402, "step": 11210 }, { "epoch": 0.9693723270983627, "grad_norm": 0.5753649910885886, "learning_rate": 7.910935072663245e-06, "loss": 0.1409, "step": 11220 }, { "epoch": 0.9702362953043328, "grad_norm": 0.5646681396820311, "learning_rate": 7.907141812611325e-06, "loss": 0.141, "step": 11230 }, { "epoch": 0.9711002635103029, "grad_norm": 0.5455395876393425, "learning_rate": 7.903346023134645e-06, "loss": 0.14, "step": 11240 }, { "epoch": 0.9719642317162729, "grad_norm": 0.5643721404005844, "learning_rate": 7.899547707535816e-06, "loss": 0.1405, "step": 11250 }, { "epoch": 0.9728281999222429, "grad_norm": 0.5734263716016804, "learning_rate": 7.895746869119647e-06, "loss": 0.1386, "step": 11260 }, { "epoch": 0.9736921681282129, "grad_norm": 0.5754019383795476, "learning_rate": 7.891943511193148e-06, "loss": 0.1412, "step": 11270 }, { "epoch": 0.9745561363341829, "grad_norm": 0.5357745666067263, "learning_rate": 7.888137637065514e-06, "loss": 0.1407, "step": 11280 }, { "epoch": 0.9754201045401529, "grad_norm": 0.5463072339025498, "learning_rate": 7.884329250048134e-06, "loss": 0.1369, "step": 11290 }, { "epoch": 0.9762840727461229, "grad_norm": 0.563329757491908, "learning_rate": 7.880518353454576e-06, "loss": 0.1366, "step": 11300 }, { "epoch": 0.977148040952093, "grad_norm": 0.5750546590943822, "learning_rate": 7.876704950600607e-06, "loss": 0.1379, "step": 11310 }, { "epoch": 0.978012009158063, "grad_norm": 0.5285988056427497, "learning_rate": 7.872889044804155e-06, "loss": 0.1399, "step": 11320 }, { "epoch": 0.978875977364033, "grad_norm": 0.532439311490372, "learning_rate": 7.869070639385343e-06, "loss": 0.1422, "step": 11330 }, { "epoch": 0.979739945570003, "grad_norm": 0.5398829784687519, "learning_rate": 7.865249737666458e-06, "loss": 0.1375, "step": 11340 }, { "epoch": 0.980603913775973, "grad_norm": 0.5315198727011683, "learning_rate": 7.861426342971962e-06, "loss": 0.1333, "step": 11350 }, { "epoch": 0.981467881981943, "grad_norm": 0.5484806587096321, "learning_rate": 7.857600458628485e-06, "loss": 0.1383, "step": 11360 }, { "epoch": 0.9823318501879131, "grad_norm": 0.5632626864322672, "learning_rate": 7.85377208796483e-06, "loss": 0.1408, "step": 11370 }, { "epoch": 0.9831958183938831, "grad_norm": 0.5756731418315327, "learning_rate": 7.849941234311952e-06, "loss": 0.1423, "step": 11380 }, { "epoch": 0.9840597865998532, "grad_norm": 0.569984058319945, "learning_rate": 7.846107901002976e-06, "loss": 0.1418, "step": 11390 }, { "epoch": 0.9849237548058232, "grad_norm": 0.5394728565154906, "learning_rate": 7.84227209137318e-06, "loss": 0.1397, "step": 11400 }, { "epoch": 0.9857877230117932, "grad_norm": 0.5294475608511082, "learning_rate": 7.838433808759994e-06, "loss": 0.1417, "step": 11410 }, { "epoch": 0.9866516912177632, "grad_norm": 0.5788220534347303, "learning_rate": 7.834593056503009e-06, "loss": 0.1403, "step": 11420 }, { "epoch": 0.9875156594237332, "grad_norm": 0.5495378815031298, "learning_rate": 7.830749837943952e-06, "loss": 0.1438, "step": 11430 }, { "epoch": 0.9883796276297032, "grad_norm": 0.5602649448652921, "learning_rate": 7.826904156426706e-06, "loss": 0.1349, "step": 11440 }, { "epoch": 0.9892435958356732, "grad_norm": 0.510550749078522, "learning_rate": 7.823056015297294e-06, "loss": 0.1389, "step": 11450 }, { "epoch": 0.9901075640416432, "grad_norm": 0.5618787773731678, "learning_rate": 7.819205417903879e-06, "loss": 0.1391, "step": 11460 }, { "epoch": 0.9909715322476133, "grad_norm": 0.5415142910705364, "learning_rate": 7.81535236759676e-06, "loss": 0.1383, "step": 11470 }, { "epoch": 0.9918355004535833, "grad_norm": 0.5378807964604567, "learning_rate": 7.81149686772837e-06, "loss": 0.1381, "step": 11480 }, { "epoch": 0.9926994686595533, "grad_norm": 0.5086407896763732, "learning_rate": 7.807638921653275e-06, "loss": 0.1396, "step": 11490 }, { "epoch": 0.9935634368655234, "grad_norm": 0.5763401451203161, "learning_rate": 7.803778532728168e-06, "loss": 0.1385, "step": 11500 }, { "epoch": 0.9944274050714934, "grad_norm": 0.5283421801533311, "learning_rate": 7.799915704311869e-06, "loss": 0.1392, "step": 11510 }, { "epoch": 0.9952913732774634, "grad_norm": 0.5407041178210695, "learning_rate": 7.79605043976532e-06, "loss": 0.1378, "step": 11520 }, { "epoch": 0.9961553414834334, "grad_norm": 0.5444185941688278, "learning_rate": 7.792182742451579e-06, "loss": 0.1389, "step": 11530 }, { "epoch": 0.9970193096894034, "grad_norm": 0.5155116418138258, "learning_rate": 7.78831261573583e-06, "loss": 0.1362, "step": 11540 }, { "epoch": 0.9978832778953735, "grad_norm": 0.5558368170116909, "learning_rate": 7.784440062985357e-06, "loss": 0.1402, "step": 11550 }, { "epoch": 0.9987472461013435, "grad_norm": 0.5370499131190705, "learning_rate": 7.78056508756957e-06, "loss": 0.1378, "step": 11560 }, { "epoch": 0.9996112143073135, "grad_norm": 0.5314120316897529, "learning_rate": 7.776687692859972e-06, "loss": 0.1361, "step": 11570 }, { "epoch": 1.0004751825132836, "grad_norm": 0.6484295050544892, "learning_rate": 7.772807882230184e-06, "loss": 0.1253, "step": 11580 }, { "epoch": 1.0013391507192535, "grad_norm": 0.5288373554942792, "learning_rate": 7.768925659055922e-06, "loss": 0.1179, "step": 11590 }, { "epoch": 1.0022031189252236, "grad_norm": 0.5226558697799325, "learning_rate": 7.765041026715e-06, "loss": 0.1162, "step": 11600 }, { "epoch": 1.0030670871311935, "grad_norm": 0.5559986175018743, "learning_rate": 7.761153988587336e-06, "loss": 0.1136, "step": 11610 }, { "epoch": 1.0039310553371636, "grad_norm": 0.5738333391751079, "learning_rate": 7.757264548054931e-06, "loss": 0.1129, "step": 11620 }, { "epoch": 1.0047950235431335, "grad_norm": 0.5874896405994214, "learning_rate": 7.75337270850188e-06, "loss": 0.1155, "step": 11630 }, { "epoch": 1.0056589917491037, "grad_norm": 0.57700834782927, "learning_rate": 7.749478473314371e-06, "loss": 0.114, "step": 11640 }, { "epoch": 1.0065229599550736, "grad_norm": 0.5253879750024364, "learning_rate": 7.745581845880668e-06, "loss": 0.1147, "step": 11650 }, { "epoch": 1.0073869281610437, "grad_norm": 0.578799776263045, "learning_rate": 7.741682829591124e-06, "loss": 0.1162, "step": 11660 }, { "epoch": 1.0082508963670136, "grad_norm": 0.5520505063573035, "learning_rate": 7.737781427838163e-06, "loss": 0.1111, "step": 11670 }, { "epoch": 1.0091148645729837, "grad_norm": 0.5445981382109637, "learning_rate": 7.733877644016288e-06, "loss": 0.1146, "step": 11680 }, { "epoch": 1.0099788327789538, "grad_norm": 0.51984767612585, "learning_rate": 7.729971481522079e-06, "loss": 0.115, "step": 11690 }, { "epoch": 1.0108428009849237, "grad_norm": 0.579207833886054, "learning_rate": 7.726062943754177e-06, "loss": 0.1146, "step": 11700 }, { "epoch": 1.0117067691908939, "grad_norm": 0.6003541965292548, "learning_rate": 7.722152034113299e-06, "loss": 0.114, "step": 11710 }, { "epoch": 1.0125707373968638, "grad_norm": 0.5344757173700486, "learning_rate": 7.718238756002214e-06, "loss": 0.114, "step": 11720 }, { "epoch": 1.0134347056028339, "grad_norm": 0.5717675830192779, "learning_rate": 7.714323112825764e-06, "loss": 0.1142, "step": 11730 }, { "epoch": 1.0142986738088038, "grad_norm": 0.5589395318387775, "learning_rate": 7.710405107990841e-06, "loss": 0.1124, "step": 11740 }, { "epoch": 1.015162642014774, "grad_norm": 0.5191208223948365, "learning_rate": 7.706484744906394e-06, "loss": 0.1132, "step": 11750 }, { "epoch": 1.0160266102207438, "grad_norm": 0.5739974899486876, "learning_rate": 7.702562026983425e-06, "loss": 0.113, "step": 11760 }, { "epoch": 1.016890578426714, "grad_norm": 0.5538985992217317, "learning_rate": 7.698636957634984e-06, "loss": 0.1125, "step": 11770 }, { "epoch": 1.0177545466326838, "grad_norm": 0.5556788352650281, "learning_rate": 7.694709540276165e-06, "loss": 0.1086, "step": 11780 }, { "epoch": 1.018618514838654, "grad_norm": 0.557737388023279, "learning_rate": 7.690779778324106e-06, "loss": 0.1139, "step": 11790 }, { "epoch": 1.0194824830446239, "grad_norm": 0.5533298134289029, "learning_rate": 7.68684767519799e-06, "loss": 0.1166, "step": 11800 }, { "epoch": 1.020346451250594, "grad_norm": 0.5575744213533839, "learning_rate": 7.68291323431903e-06, "loss": 0.1123, "step": 11810 }, { "epoch": 1.021210419456564, "grad_norm": 0.5576624574436335, "learning_rate": 7.678976459110475e-06, "loss": 0.1147, "step": 11820 }, { "epoch": 1.022074387662534, "grad_norm": 0.5582794921471507, "learning_rate": 7.675037352997606e-06, "loss": 0.1128, "step": 11830 }, { "epoch": 1.0229383558685041, "grad_norm": 0.5765353203515869, "learning_rate": 7.67109591940773e-06, "loss": 0.1135, "step": 11840 }, { "epoch": 1.023802324074474, "grad_norm": 0.5594470994345416, "learning_rate": 7.667152161770185e-06, "loss": 0.1132, "step": 11850 }, { "epoch": 1.0246662922804441, "grad_norm": 0.5843630847176365, "learning_rate": 7.663206083516323e-06, "loss": 0.1148, "step": 11860 }, { "epoch": 1.025530260486414, "grad_norm": 0.5413541740109529, "learning_rate": 7.659257688079524e-06, "loss": 0.1113, "step": 11870 }, { "epoch": 1.0263942286923842, "grad_norm": 0.5822217976514313, "learning_rate": 7.655306978895173e-06, "loss": 0.116, "step": 11880 }, { "epoch": 1.027258196898354, "grad_norm": 0.5979006652450949, "learning_rate": 7.651353959400678e-06, "loss": 0.1135, "step": 11890 }, { "epoch": 1.0281221651043242, "grad_norm": 0.5665894234503989, "learning_rate": 7.647398633035452e-06, "loss": 0.1144, "step": 11900 }, { "epoch": 1.028986133310294, "grad_norm": 0.5375518216139221, "learning_rate": 7.643441003240918e-06, "loss": 0.1177, "step": 11910 }, { "epoch": 1.0298501015162642, "grad_norm": 0.5546406845601831, "learning_rate": 7.639481073460501e-06, "loss": 0.1154, "step": 11920 }, { "epoch": 1.0307140697222341, "grad_norm": 0.5670480535796274, "learning_rate": 7.635518847139627e-06, "loss": 0.112, "step": 11930 }, { "epoch": 1.0315780379282042, "grad_norm": 0.5887507475464324, "learning_rate": 7.631554327725722e-06, "loss": 0.1095, "step": 11940 }, { "epoch": 1.0324420061341744, "grad_norm": 0.5896886428008532, "learning_rate": 7.627587518668205e-06, "loss": 0.1166, "step": 11950 }, { "epoch": 1.0333059743401443, "grad_norm": 0.5690679992282057, "learning_rate": 7.623618423418488e-06, "loss": 0.1135, "step": 11960 }, { "epoch": 1.0341699425461144, "grad_norm": 0.5763884873413326, "learning_rate": 7.619647045429975e-06, "loss": 0.1145, "step": 11970 }, { "epoch": 1.0350339107520843, "grad_norm": 0.5547331293683502, "learning_rate": 7.615673388158052e-06, "loss": 0.1125, "step": 11980 }, { "epoch": 1.0358978789580544, "grad_norm": 0.5809540964348433, "learning_rate": 7.6116974550600895e-06, "loss": 0.1147, "step": 11990 }, { "epoch": 1.0367618471640243, "grad_norm": 0.5617878261436202, "learning_rate": 7.60771924959544e-06, "loss": 0.1152, "step": 12000 }, { "epoch": 1.0376258153699944, "grad_norm": 0.5691052166008522, "learning_rate": 7.603738775225429e-06, "loss": 0.1118, "step": 12010 }, { "epoch": 1.0384897835759643, "grad_norm": 0.5927470323848189, "learning_rate": 7.599756035413359e-06, "loss": 0.1142, "step": 12020 }, { "epoch": 1.0393537517819345, "grad_norm": 0.5348052360596652, "learning_rate": 7.595771033624507e-06, "loss": 0.1138, "step": 12030 }, { "epoch": 1.0402177199879044, "grad_norm": 0.5942881290559787, "learning_rate": 7.5917837733261104e-06, "loss": 0.1136, "step": 12040 }, { "epoch": 1.0410816881938745, "grad_norm": 0.5870254311070779, "learning_rate": 7.5877942579873755e-06, "loss": 0.1111, "step": 12050 }, { "epoch": 1.0419456563998444, "grad_norm": 0.5560655753288578, "learning_rate": 7.583802491079473e-06, "loss": 0.1166, "step": 12060 }, { "epoch": 1.0428096246058145, "grad_norm": 0.5812311683484401, "learning_rate": 7.579808476075529e-06, "loss": 0.1134, "step": 12070 }, { "epoch": 1.0436735928117846, "grad_norm": 0.587965901597796, "learning_rate": 7.575812216450626e-06, "loss": 0.1131, "step": 12080 }, { "epoch": 1.0445375610177545, "grad_norm": 0.5807654543387688, "learning_rate": 7.571813715681804e-06, "loss": 0.1107, "step": 12090 }, { "epoch": 1.0454015292237246, "grad_norm": 0.5819264052078913, "learning_rate": 7.567812977248046e-06, "loss": 0.1123, "step": 12100 }, { "epoch": 1.0462654974296945, "grad_norm": 0.5915930314280453, "learning_rate": 7.5638100046302855e-06, "loss": 0.1131, "step": 12110 }, { "epoch": 1.0471294656356647, "grad_norm": 0.5258533900711518, "learning_rate": 7.5598048013114015e-06, "loss": 0.1136, "step": 12120 }, { "epoch": 1.0479934338416346, "grad_norm": 0.5836672177555081, "learning_rate": 7.555797370776212e-06, "loss": 0.1181, "step": 12130 }, { "epoch": 1.0488574020476047, "grad_norm": 0.563741150677117, "learning_rate": 7.551787716511472e-06, "loss": 0.1143, "step": 12140 }, { "epoch": 1.0497213702535746, "grad_norm": 0.5745656695349525, "learning_rate": 7.547775842005871e-06, "loss": 0.114, "step": 12150 }, { "epoch": 1.0505853384595447, "grad_norm": 0.5502725240340415, "learning_rate": 7.543761750750034e-06, "loss": 0.114, "step": 12160 }, { "epoch": 1.0514493066655146, "grad_norm": 0.5573865072308392, "learning_rate": 7.53974544623651e-06, "loss": 0.1123, "step": 12170 }, { "epoch": 1.0523132748714847, "grad_norm": 0.5424078163988334, "learning_rate": 7.535726931959781e-06, "loss": 0.1125, "step": 12180 }, { "epoch": 1.0531772430774549, "grad_norm": 0.6021571647004282, "learning_rate": 7.531706211416239e-06, "loss": 0.1138, "step": 12190 }, { "epoch": 1.0540412112834248, "grad_norm": 0.5664122542057618, "learning_rate": 7.527683288104208e-06, "loss": 0.1115, "step": 12200 }, { "epoch": 1.0549051794893949, "grad_norm": 0.5716818115749401, "learning_rate": 7.523658165523924e-06, "loss": 0.1114, "step": 12210 }, { "epoch": 1.0557691476953648, "grad_norm": 0.6063170462255332, "learning_rate": 7.5196308471775345e-06, "loss": 0.1112, "step": 12220 }, { "epoch": 1.056633115901335, "grad_norm": 1.2632340648066684, "learning_rate": 7.5156013365691005e-06, "loss": 0.1161, "step": 12230 }, { "epoch": 1.0574970841073048, "grad_norm": 0.5509074426756472, "learning_rate": 7.51156963720459e-06, "loss": 0.1151, "step": 12240 }, { "epoch": 1.058361052313275, "grad_norm": 0.5466414245187583, "learning_rate": 7.5075357525918725e-06, "loss": 0.1128, "step": 12250 }, { "epoch": 1.0592250205192448, "grad_norm": 0.596160426575199, "learning_rate": 7.503499686240719e-06, "loss": 0.1114, "step": 12260 }, { "epoch": 1.060088988725215, "grad_norm": 0.5642196283298498, "learning_rate": 7.499461441662807e-06, "loss": 0.1136, "step": 12270 }, { "epoch": 1.0609529569311849, "grad_norm": 0.5849920421619905, "learning_rate": 7.495421022371697e-06, "loss": 0.1134, "step": 12280 }, { "epoch": 1.061816925137155, "grad_norm": 0.5787275769912021, "learning_rate": 7.491378431882851e-06, "loss": 0.1149, "step": 12290 }, { "epoch": 1.0626808933431249, "grad_norm": 0.5483503453537147, "learning_rate": 7.487333673713615e-06, "loss": 0.1167, "step": 12300 }, { "epoch": 1.063544861549095, "grad_norm": 0.5359573243330554, "learning_rate": 7.483286751383224e-06, "loss": 0.1146, "step": 12310 }, { "epoch": 1.064408829755065, "grad_norm": 0.5875705499528636, "learning_rate": 7.479237668412793e-06, "loss": 0.112, "step": 12320 }, { "epoch": 1.065272797961035, "grad_norm": 0.5561394006831806, "learning_rate": 7.47518642832532e-06, "loss": 0.1139, "step": 12330 }, { "epoch": 1.0661367661670051, "grad_norm": 0.5633666250557524, "learning_rate": 7.47113303464568e-06, "loss": 0.1125, "step": 12340 }, { "epoch": 1.067000734372975, "grad_norm": 0.5623851970677978, "learning_rate": 7.4670774909006174e-06, "loss": 0.1181, "step": 12350 }, { "epoch": 1.0678647025789452, "grad_norm": 0.5587675735584775, "learning_rate": 7.463019800618749e-06, "loss": 0.1133, "step": 12360 }, { "epoch": 1.068728670784915, "grad_norm": 0.584981094727008, "learning_rate": 7.458959967330565e-06, "loss": 0.1135, "step": 12370 }, { "epoch": 1.0695926389908852, "grad_norm": 0.6284088915860303, "learning_rate": 7.4548979945684105e-06, "loss": 0.1184, "step": 12380 }, { "epoch": 1.070456607196855, "grad_norm": 0.5931271494335132, "learning_rate": 7.450833885866502e-06, "loss": 0.1125, "step": 12390 }, { "epoch": 1.0713205754028252, "grad_norm": 0.5550532429183336, "learning_rate": 7.446767644760906e-06, "loss": 0.1127, "step": 12400 }, { "epoch": 1.0721845436087951, "grad_norm": 0.5711627384957843, "learning_rate": 7.4426992747895495e-06, "loss": 0.1144, "step": 12410 }, { "epoch": 1.0730485118147652, "grad_norm": 0.5497441584250377, "learning_rate": 7.43862877949221e-06, "loss": 0.1152, "step": 12420 }, { "epoch": 1.0739124800207351, "grad_norm": 0.5849304669143405, "learning_rate": 7.434556162410514e-06, "loss": 0.1133, "step": 12430 }, { "epoch": 1.0747764482267053, "grad_norm": 0.5584597859335196, "learning_rate": 7.430481427087935e-06, "loss": 0.114, "step": 12440 }, { "epoch": 1.0756404164326754, "grad_norm": 0.5509002638468832, "learning_rate": 7.426404577069789e-06, "loss": 0.1137, "step": 12450 }, { "epoch": 1.0765043846386453, "grad_norm": 0.5694890975843275, "learning_rate": 7.42232561590323e-06, "loss": 0.1143, "step": 12460 }, { "epoch": 1.0773683528446154, "grad_norm": 0.6020473634715721, "learning_rate": 7.418244547137254e-06, "loss": 0.1139, "step": 12470 }, { "epoch": 1.0782323210505853, "grad_norm": 0.5867109716162718, "learning_rate": 7.414161374322686e-06, "loss": 0.1159, "step": 12480 }, { "epoch": 1.0790962892565554, "grad_norm": 0.5644144690792395, "learning_rate": 7.410076101012184e-06, "loss": 0.1146, "step": 12490 }, { "epoch": 1.0799602574625253, "grad_norm": 0.6177197577941457, "learning_rate": 7.405988730760231e-06, "loss": 0.116, "step": 12500 }, { "epoch": 1.0808242256684955, "grad_norm": 0.5853046102952433, "learning_rate": 7.401899267123137e-06, "loss": 0.1145, "step": 12510 }, { "epoch": 1.0816881938744654, "grad_norm": 0.5697893771470309, "learning_rate": 7.397807713659034e-06, "loss": 0.1162, "step": 12520 }, { "epoch": 1.0825521620804355, "grad_norm": 0.573783818853617, "learning_rate": 7.39371407392787e-06, "loss": 0.1159, "step": 12530 }, { "epoch": 1.0834161302864054, "grad_norm": 0.5563259533540806, "learning_rate": 7.38961835149141e-06, "loss": 0.1156, "step": 12540 }, { "epoch": 1.0842800984923755, "grad_norm": 0.5776456652808264, "learning_rate": 7.38552054991323e-06, "loss": 0.1168, "step": 12550 }, { "epoch": 1.0851440666983454, "grad_norm": 0.5605064163789284, "learning_rate": 7.381420672758714e-06, "loss": 0.1129, "step": 12560 }, { "epoch": 1.0860080349043155, "grad_norm": 0.5724005520433846, "learning_rate": 7.377318723595055e-06, "loss": 0.1141, "step": 12570 }, { "epoch": 1.0868720031102856, "grad_norm": 0.5763764037044753, "learning_rate": 7.373214705991245e-06, "loss": 0.1143, "step": 12580 }, { "epoch": 1.0877359713162555, "grad_norm": 0.5517897969244039, "learning_rate": 7.36910862351808e-06, "loss": 0.1128, "step": 12590 }, { "epoch": 1.0885999395222257, "grad_norm": 0.5459275054821126, "learning_rate": 7.36500047974815e-06, "loss": 0.1148, "step": 12600 }, { "epoch": 1.0894639077281956, "grad_norm": 0.5503423920751671, "learning_rate": 7.360890278255838e-06, "loss": 0.1112, "step": 12610 }, { "epoch": 1.0903278759341657, "grad_norm": 0.5640084967541655, "learning_rate": 7.356778022617318e-06, "loss": 0.1112, "step": 12620 }, { "epoch": 1.0911918441401356, "grad_norm": 0.5875745843307169, "learning_rate": 7.352663716410553e-06, "loss": 0.118, "step": 12630 }, { "epoch": 1.0920558123461057, "grad_norm": 0.5323955570867941, "learning_rate": 7.3485473632152844e-06, "loss": 0.1147, "step": 12640 }, { "epoch": 1.0929197805520756, "grad_norm": 0.5483966268609873, "learning_rate": 7.344428966613044e-06, "loss": 0.114, "step": 12650 }, { "epoch": 1.0937837487580457, "grad_norm": 0.549406276310017, "learning_rate": 7.340308530187134e-06, "loss": 0.1108, "step": 12660 }, { "epoch": 1.0946477169640156, "grad_norm": 0.5433161641383483, "learning_rate": 7.336186057522633e-06, "loss": 0.1159, "step": 12670 }, { "epoch": 1.0955116851699858, "grad_norm": 0.5842351306204783, "learning_rate": 7.332061552206393e-06, "loss": 0.114, "step": 12680 }, { "epoch": 1.0963756533759557, "grad_norm": 0.5616172693897599, "learning_rate": 7.327935017827034e-06, "loss": 0.1114, "step": 12690 }, { "epoch": 1.0972396215819258, "grad_norm": 0.6017902269821186, "learning_rate": 7.323806457974939e-06, "loss": 0.115, "step": 12700 }, { "epoch": 1.098103589787896, "grad_norm": 0.5724606965291857, "learning_rate": 7.319675876242256e-06, "loss": 0.1141, "step": 12710 }, { "epoch": 1.0989675579938658, "grad_norm": 0.5895805939601668, "learning_rate": 7.315543276222894e-06, "loss": 0.1133, "step": 12720 }, { "epoch": 1.099831526199836, "grad_norm": 0.5827595105149493, "learning_rate": 7.3114086615125125e-06, "loss": 0.115, "step": 12730 }, { "epoch": 1.1006954944058058, "grad_norm": 0.5300031140016738, "learning_rate": 7.3072720357085284e-06, "loss": 0.1141, "step": 12740 }, { "epoch": 1.101559462611776, "grad_norm": 0.5659944671796406, "learning_rate": 7.303133402410104e-06, "loss": 0.1103, "step": 12750 }, { "epoch": 1.1024234308177459, "grad_norm": 0.5575335822538651, "learning_rate": 7.298992765218156e-06, "loss": 0.1189, "step": 12760 }, { "epoch": 1.103287399023716, "grad_norm": 0.5816088261759306, "learning_rate": 7.294850127735336e-06, "loss": 0.1125, "step": 12770 }, { "epoch": 1.1041513672296859, "grad_norm": 0.5377912164914728, "learning_rate": 7.2907054935660415e-06, "loss": 0.1142, "step": 12780 }, { "epoch": 1.105015335435656, "grad_norm": 0.5171845913840188, "learning_rate": 7.286558866316405e-06, "loss": 0.1144, "step": 12790 }, { "epoch": 1.105879303641626, "grad_norm": 0.5625513547764542, "learning_rate": 7.282410249594294e-06, "loss": 0.1139, "step": 12800 }, { "epoch": 1.106743271847596, "grad_norm": 0.548817914014772, "learning_rate": 7.278259647009308e-06, "loss": 0.1125, "step": 12810 }, { "epoch": 1.1076072400535661, "grad_norm": 0.5619622892577025, "learning_rate": 7.27410706217277e-06, "loss": 0.1123, "step": 12820 }, { "epoch": 1.108471208259536, "grad_norm": 0.5958312479893616, "learning_rate": 7.269952498697734e-06, "loss": 0.1151, "step": 12830 }, { "epoch": 1.1093351764655062, "grad_norm": 0.6065189288813805, "learning_rate": 7.265795960198971e-06, "loss": 0.1145, "step": 12840 }, { "epoch": 1.110199144671476, "grad_norm": 0.574744744019306, "learning_rate": 7.261637450292972e-06, "loss": 0.1123, "step": 12850 }, { "epoch": 1.1110631128774462, "grad_norm": 0.5523033030399436, "learning_rate": 7.257476972597941e-06, "loss": 0.1169, "step": 12860 }, { "epoch": 1.111927081083416, "grad_norm": 0.5615343679114696, "learning_rate": 7.2533145307337995e-06, "loss": 0.1116, "step": 12870 }, { "epoch": 1.1127910492893862, "grad_norm": 0.5892480180243681, "learning_rate": 7.249150128322171e-06, "loss": 0.1167, "step": 12880 }, { "epoch": 1.1136550174953561, "grad_norm": 0.5835398687472092, "learning_rate": 7.244983768986391e-06, "loss": 0.1143, "step": 12890 }, { "epoch": 1.1145189857013262, "grad_norm": 0.5420813706717189, "learning_rate": 7.240815456351493e-06, "loss": 0.1142, "step": 12900 }, { "epoch": 1.1153829539072961, "grad_norm": 0.5560693442080733, "learning_rate": 7.236645194044215e-06, "loss": 0.1124, "step": 12910 }, { "epoch": 1.1162469221132663, "grad_norm": 0.582775487561968, "learning_rate": 7.232472985692985e-06, "loss": 0.1158, "step": 12920 }, { "epoch": 1.1171108903192362, "grad_norm": 0.5813772164210899, "learning_rate": 7.228298834927932e-06, "loss": 0.1151, "step": 12930 }, { "epoch": 1.1179748585252063, "grad_norm": 0.551749722566045, "learning_rate": 7.224122745380866e-06, "loss": 0.1144, "step": 12940 }, { "epoch": 1.1188388267311762, "grad_norm": 0.5987437110852261, "learning_rate": 7.2199447206852926e-06, "loss": 0.1131, "step": 12950 }, { "epoch": 1.1197027949371463, "grad_norm": 0.5823318038186402, "learning_rate": 7.215764764476392e-06, "loss": 0.1144, "step": 12960 }, { "epoch": 1.1205667631431164, "grad_norm": 0.562823286330617, "learning_rate": 7.211582880391036e-06, "loss": 0.1138, "step": 12970 }, { "epoch": 1.1214307313490863, "grad_norm": 0.5689016713168833, "learning_rate": 7.2073990720677625e-06, "loss": 0.1082, "step": 12980 }, { "epoch": 1.1222946995550565, "grad_norm": 0.5669520425360047, "learning_rate": 7.203213343146793e-06, "loss": 0.1123, "step": 12990 }, { "epoch": 1.1231586677610264, "grad_norm": 0.5909865997664461, "learning_rate": 7.199025697270014e-06, "loss": 0.1168, "step": 13000 }, { "epoch": 1.1240226359669965, "grad_norm": 0.6073490868130594, "learning_rate": 7.194836138080983e-06, "loss": 0.1164, "step": 13010 }, { "epoch": 1.1248866041729664, "grad_norm": 0.5738517459776369, "learning_rate": 7.19064466922492e-06, "loss": 0.1115, "step": 13020 }, { "epoch": 1.1257505723789365, "grad_norm": 0.5341322056648838, "learning_rate": 7.186451294348708e-06, "loss": 0.1127, "step": 13030 }, { "epoch": 1.1266145405849064, "grad_norm": 0.5659073456165932, "learning_rate": 7.182256017100888e-06, "loss": 0.1132, "step": 13040 }, { "epoch": 1.1274785087908765, "grad_norm": 0.5733078496207322, "learning_rate": 7.178058841131658e-06, "loss": 0.1152, "step": 13050 }, { "epoch": 1.1283424769968464, "grad_norm": 0.5917250921625605, "learning_rate": 7.173859770092863e-06, "loss": 0.1137, "step": 13060 }, { "epoch": 1.1292064452028165, "grad_norm": 0.5724064155200794, "learning_rate": 7.1696588076380025e-06, "loss": 0.1124, "step": 13070 }, { "epoch": 1.1300704134087867, "grad_norm": 0.5839270882164534, "learning_rate": 7.165455957422219e-06, "loss": 0.1173, "step": 13080 }, { "epoch": 1.1309343816147566, "grad_norm": 0.5818668132052538, "learning_rate": 7.161251223102297e-06, "loss": 0.1116, "step": 13090 }, { "epoch": 1.1317983498207267, "grad_norm": 0.562653394134604, "learning_rate": 7.15704460833666e-06, "loss": 0.1169, "step": 13100 }, { "epoch": 1.1326623180266966, "grad_norm": 0.5369258651613542, "learning_rate": 7.152836116785372e-06, "loss": 0.1144, "step": 13110 }, { "epoch": 1.1335262862326667, "grad_norm": 0.5609598754088422, "learning_rate": 7.148625752110125e-06, "loss": 0.1128, "step": 13120 }, { "epoch": 1.1343902544386366, "grad_norm": 0.5307583270088041, "learning_rate": 7.1444135179742424e-06, "loss": 0.1129, "step": 13130 }, { "epoch": 1.1352542226446067, "grad_norm": 0.5794753705639407, "learning_rate": 7.140199418042674e-06, "loss": 0.116, "step": 13140 }, { "epoch": 1.1361181908505766, "grad_norm": 0.5765150526409482, "learning_rate": 7.135983455981993e-06, "loss": 0.1161, "step": 13150 }, { "epoch": 1.1369821590565468, "grad_norm": 0.588585379901811, "learning_rate": 7.131765635460394e-06, "loss": 0.1156, "step": 13160 }, { "epoch": 1.1378461272625167, "grad_norm": 0.5268321880508435, "learning_rate": 7.127545960147685e-06, "loss": 0.1105, "step": 13170 }, { "epoch": 1.1387100954684868, "grad_norm": 0.5409759474589731, "learning_rate": 7.123324433715293e-06, "loss": 0.1139, "step": 13180 }, { "epoch": 1.1395740636744567, "grad_norm": 0.548753740483243, "learning_rate": 7.119101059836252e-06, "loss": 0.1115, "step": 13190 }, { "epoch": 1.1404380318804268, "grad_norm": 0.5473787441617439, "learning_rate": 7.114875842185205e-06, "loss": 0.1132, "step": 13200 }, { "epoch": 1.1413020000863967, "grad_norm": 0.5521716011944642, "learning_rate": 7.110648784438396e-06, "loss": 0.1132, "step": 13210 }, { "epoch": 1.1421659682923668, "grad_norm": 0.5452012339396002, "learning_rate": 7.1064198902736766e-06, "loss": 0.1136, "step": 13220 }, { "epoch": 1.143029936498337, "grad_norm": 0.573456706312834, "learning_rate": 7.10218916337049e-06, "loss": 0.1117, "step": 13230 }, { "epoch": 1.1438939047043069, "grad_norm": 0.5486166950146599, "learning_rate": 7.097956607409876e-06, "loss": 0.1138, "step": 13240 }, { "epoch": 1.144757872910277, "grad_norm": 0.5526828504425823, "learning_rate": 7.093722226074467e-06, "loss": 0.1099, "step": 13250 }, { "epoch": 1.1456218411162469, "grad_norm": 0.5772698801339367, "learning_rate": 7.089486023048482e-06, "loss": 0.1102, "step": 13260 }, { "epoch": 1.146485809322217, "grad_norm": 0.6049052486348215, "learning_rate": 7.0852480020177265e-06, "loss": 0.1124, "step": 13270 }, { "epoch": 1.147349777528187, "grad_norm": 0.5755929622844455, "learning_rate": 7.081008166669585e-06, "loss": 0.1113, "step": 13280 }, { "epoch": 1.148213745734157, "grad_norm": 0.5789717682270602, "learning_rate": 7.076766520693024e-06, "loss": 0.113, "step": 13290 }, { "epoch": 1.149077713940127, "grad_norm": 0.5590626048251914, "learning_rate": 7.072523067778583e-06, "loss": 0.1154, "step": 13300 }, { "epoch": 1.149941682146097, "grad_norm": 0.5562146192899278, "learning_rate": 7.068277811618376e-06, "loss": 0.1147, "step": 13310 }, { "epoch": 1.150805650352067, "grad_norm": 0.5391519600284189, "learning_rate": 7.064030755906084e-06, "loss": 0.1132, "step": 13320 }, { "epoch": 1.151669618558037, "grad_norm": 0.5689682085308547, "learning_rate": 7.059781904336953e-06, "loss": 0.1128, "step": 13330 }, { "epoch": 1.1525335867640072, "grad_norm": 0.5685437828904983, "learning_rate": 7.055531260607795e-06, "loss": 0.1151, "step": 13340 }, { "epoch": 1.153397554969977, "grad_norm": 0.5947489843605617, "learning_rate": 7.051278828416979e-06, "loss": 0.112, "step": 13350 }, { "epoch": 1.1542615231759472, "grad_norm": 0.5463410502004773, "learning_rate": 7.047024611464428e-06, "loss": 0.1138, "step": 13360 }, { "epoch": 1.1551254913819171, "grad_norm": 0.5374793646965029, "learning_rate": 7.042768613451623e-06, "loss": 0.1132, "step": 13370 }, { "epoch": 1.1559894595878872, "grad_norm": 0.5966329054253161, "learning_rate": 7.038510838081588e-06, "loss": 0.1125, "step": 13380 }, { "epoch": 1.1568534277938571, "grad_norm": 0.5531816864338223, "learning_rate": 7.0342512890589e-06, "loss": 0.1124, "step": 13390 }, { "epoch": 1.1577173959998273, "grad_norm": 0.6014056601060235, "learning_rate": 7.029989970089675e-06, "loss": 0.1121, "step": 13400 }, { "epoch": 1.1585813642057972, "grad_norm": 0.5984871684047993, "learning_rate": 7.025726884881572e-06, "loss": 0.1135, "step": 13410 }, { "epoch": 1.1594453324117673, "grad_norm": 0.5583075318226408, "learning_rate": 7.021462037143783e-06, "loss": 0.1119, "step": 13420 }, { "epoch": 1.1603093006177372, "grad_norm": 0.5502086604536702, "learning_rate": 7.017195430587037e-06, "loss": 0.1161, "step": 13430 }, { "epoch": 1.1611732688237073, "grad_norm": 0.5918176410941008, "learning_rate": 7.012927068923592e-06, "loss": 0.1145, "step": 13440 }, { "epoch": 1.1620372370296774, "grad_norm": 0.5545319587622841, "learning_rate": 7.008656955867232e-06, "loss": 0.1123, "step": 13450 }, { "epoch": 1.1629012052356473, "grad_norm": 0.5673106420372879, "learning_rate": 7.004385095133268e-06, "loss": 0.1152, "step": 13460 }, { "epoch": 1.1637651734416172, "grad_norm": 0.5840240557788225, "learning_rate": 7.000111490438527e-06, "loss": 0.1107, "step": 13470 }, { "epoch": 1.1646291416475874, "grad_norm": 0.543595956222494, "learning_rate": 6.9958361455013556e-06, "loss": 0.1145, "step": 13480 }, { "epoch": 1.1654931098535575, "grad_norm": 0.5490360713143707, "learning_rate": 6.991559064041618e-06, "loss": 0.1151, "step": 13490 }, { "epoch": 1.1663570780595274, "grad_norm": 0.5905952151929723, "learning_rate": 6.987280249780682e-06, "loss": 0.1123, "step": 13500 }, { "epoch": 1.1672210462654975, "grad_norm": 0.5708684897331763, "learning_rate": 6.9829997064414315e-06, "loss": 0.1117, "step": 13510 }, { "epoch": 1.1680850144714674, "grad_norm": 0.5707805140043376, "learning_rate": 6.9787174377482454e-06, "loss": 0.1129, "step": 13520 }, { "epoch": 1.1689489826774375, "grad_norm": 0.5734976835058482, "learning_rate": 6.9744334474270134e-06, "loss": 0.1127, "step": 13530 }, { "epoch": 1.1698129508834074, "grad_norm": 0.5754813896572345, "learning_rate": 6.970147739205115e-06, "loss": 0.1111, "step": 13540 }, { "epoch": 1.1706769190893775, "grad_norm": 0.5805142687488006, "learning_rate": 6.965860316811432e-06, "loss": 0.1096, "step": 13550 }, { "epoch": 1.1715408872953474, "grad_norm": 0.5677452272337598, "learning_rate": 6.96157118397633e-06, "loss": 0.1114, "step": 13560 }, { "epoch": 1.1724048555013176, "grad_norm": 0.5478940239346025, "learning_rate": 6.957280344431669e-06, "loss": 0.1139, "step": 13570 }, { "epoch": 1.1732688237072875, "grad_norm": 0.5705600986096185, "learning_rate": 6.9529878019107886e-06, "loss": 0.1141, "step": 13580 }, { "epoch": 1.1741327919132576, "grad_norm": 0.5671172827221018, "learning_rate": 6.948693560148515e-06, "loss": 0.1153, "step": 13590 }, { "epoch": 1.1749967601192277, "grad_norm": 0.5887655397357853, "learning_rate": 6.944397622881151e-06, "loss": 0.1166, "step": 13600 }, { "epoch": 1.1758607283251976, "grad_norm": 0.5712255557841519, "learning_rate": 6.940099993846472e-06, "loss": 0.1116, "step": 13610 }, { "epoch": 1.1767246965311677, "grad_norm": 0.5388407173904846, "learning_rate": 6.935800676783731e-06, "loss": 0.113, "step": 13620 }, { "epoch": 1.1775886647371376, "grad_norm": 0.5524086717947396, "learning_rate": 6.931499675433644e-06, "loss": 0.1106, "step": 13630 }, { "epoch": 1.1784526329431078, "grad_norm": 0.5900683311612895, "learning_rate": 6.927196993538396e-06, "loss": 0.1143, "step": 13640 }, { "epoch": 1.1793166011490777, "grad_norm": 0.5571666223561705, "learning_rate": 6.922892634841632e-06, "loss": 0.1105, "step": 13650 }, { "epoch": 1.1801805693550478, "grad_norm": 0.5449665981514987, "learning_rate": 6.91858660308846e-06, "loss": 0.1141, "step": 13660 }, { "epoch": 1.1810445375610177, "grad_norm": 0.5464355283512453, "learning_rate": 6.914278902025436e-06, "loss": 0.1123, "step": 13670 }, { "epoch": 1.1819085057669878, "grad_norm": 0.5519986042239243, "learning_rate": 6.909969535400578e-06, "loss": 0.1125, "step": 13680 }, { "epoch": 1.1827724739729577, "grad_norm": 0.5614519421544537, "learning_rate": 6.905658506963344e-06, "loss": 0.1141, "step": 13690 }, { "epoch": 1.1836364421789278, "grad_norm": 0.5704123213334359, "learning_rate": 6.901345820464647e-06, "loss": 0.1149, "step": 13700 }, { "epoch": 1.184500410384898, "grad_norm": 0.5776361497185494, "learning_rate": 6.897031479656833e-06, "loss": 0.1123, "step": 13710 }, { "epoch": 1.1853643785908679, "grad_norm": 0.5459654368909052, "learning_rate": 6.8927154882936955e-06, "loss": 0.1129, "step": 13720 }, { "epoch": 1.1862283467968378, "grad_norm": 0.5748613451436307, "learning_rate": 6.888397850130458e-06, "loss": 0.115, "step": 13730 }, { "epoch": 1.1870923150028079, "grad_norm": 0.5431693832897374, "learning_rate": 6.884078568923782e-06, "loss": 0.1123, "step": 13740 }, { "epoch": 1.187956283208778, "grad_norm": 0.5623779523522215, "learning_rate": 6.879757648431755e-06, "loss": 0.1127, "step": 13750 }, { "epoch": 1.188820251414748, "grad_norm": 0.592553622911199, "learning_rate": 6.875435092413894e-06, "loss": 0.1153, "step": 13760 }, { "epoch": 1.189684219620718, "grad_norm": 0.5595958783240502, "learning_rate": 6.871110904631136e-06, "loss": 0.1138, "step": 13770 }, { "epoch": 1.190548187826688, "grad_norm": 0.5633116470772369, "learning_rate": 6.866785088845838e-06, "loss": 0.1143, "step": 13780 }, { "epoch": 1.191412156032658, "grad_norm": 0.5909659506315208, "learning_rate": 6.862457648821773e-06, "loss": 0.1134, "step": 13790 }, { "epoch": 1.192276124238628, "grad_norm": 0.5739285382431173, "learning_rate": 6.858128588324131e-06, "loss": 0.1133, "step": 13800 }, { "epoch": 1.193140092444598, "grad_norm": 0.5721923307570116, "learning_rate": 6.853797911119509e-06, "loss": 0.1137, "step": 13810 }, { "epoch": 1.194004060650568, "grad_norm": 0.5613644285392295, "learning_rate": 6.84946562097591e-06, "loss": 0.1122, "step": 13820 }, { "epoch": 1.194868028856538, "grad_norm": 0.5542391369025897, "learning_rate": 6.845131721662741e-06, "loss": 0.114, "step": 13830 }, { "epoch": 1.195731997062508, "grad_norm": 0.5437931235251273, "learning_rate": 6.840796216950813e-06, "loss": 0.1104, "step": 13840 }, { "epoch": 1.1965959652684781, "grad_norm": 0.5727878132218367, "learning_rate": 6.836459110612326e-06, "loss": 0.1161, "step": 13850 }, { "epoch": 1.1974599334744482, "grad_norm": 0.5552817579336806, "learning_rate": 6.83212040642088e-06, "loss": 0.1136, "step": 13860 }, { "epoch": 1.1983239016804181, "grad_norm": 0.559250789005052, "learning_rate": 6.827780108151464e-06, "loss": 0.1125, "step": 13870 }, { "epoch": 1.1991878698863883, "grad_norm": 0.5937634902485177, "learning_rate": 6.823438219580452e-06, "loss": 0.1083, "step": 13880 }, { "epoch": 1.2000518380923582, "grad_norm": 0.5552214918377005, "learning_rate": 6.819094744485601e-06, "loss": 0.1129, "step": 13890 }, { "epoch": 1.2009158062983283, "grad_norm": 0.5655419429967947, "learning_rate": 6.8147496866460525e-06, "loss": 0.1101, "step": 13900 }, { "epoch": 1.2017797745042982, "grad_norm": 0.5839619215629936, "learning_rate": 6.810403049842324e-06, "loss": 0.1085, "step": 13910 }, { "epoch": 1.2026437427102683, "grad_norm": 0.5409388759672646, "learning_rate": 6.806054837856301e-06, "loss": 0.1138, "step": 13920 }, { "epoch": 1.2035077109162382, "grad_norm": 0.57144561714602, "learning_rate": 6.801705054471248e-06, "loss": 0.1145, "step": 13930 }, { "epoch": 1.2043716791222083, "grad_norm": 0.5424545215147586, "learning_rate": 6.79735370347179e-06, "loss": 0.1139, "step": 13940 }, { "epoch": 1.2052356473281782, "grad_norm": 0.5618561017996767, "learning_rate": 6.793000788643923e-06, "loss": 0.1116, "step": 13950 }, { "epoch": 1.2060996155341484, "grad_norm": 0.5676546554249075, "learning_rate": 6.788646313774996e-06, "loss": 0.1133, "step": 13960 }, { "epoch": 1.2069635837401185, "grad_norm": 0.6010536091884849, "learning_rate": 6.784290282653719e-06, "loss": 0.1125, "step": 13970 }, { "epoch": 1.2078275519460884, "grad_norm": 0.6034476266033024, "learning_rate": 6.779932699070157e-06, "loss": 0.109, "step": 13980 }, { "epoch": 1.2086915201520585, "grad_norm": 0.5223870934855904, "learning_rate": 6.775573566815725e-06, "loss": 0.1104, "step": 13990 }, { "epoch": 1.2095554883580284, "grad_norm": 0.5894416024611014, "learning_rate": 6.771212889683182e-06, "loss": 0.1118, "step": 14000 }, { "epoch": 1.2104194565639985, "grad_norm": 0.5336729589337562, "learning_rate": 6.766850671466637e-06, "loss": 0.1133, "step": 14010 }, { "epoch": 1.2112834247699684, "grad_norm": 0.6024966427336127, "learning_rate": 6.762486915961536e-06, "loss": 0.1095, "step": 14020 }, { "epoch": 1.2121473929759385, "grad_norm": 0.5413840123335933, "learning_rate": 6.758121626964665e-06, "loss": 0.1132, "step": 14030 }, { "epoch": 1.2130113611819084, "grad_norm": 0.582683552400572, "learning_rate": 6.753754808274139e-06, "loss": 0.1142, "step": 14040 }, { "epoch": 1.2138753293878786, "grad_norm": 0.5593554231560006, "learning_rate": 6.749386463689413e-06, "loss": 0.111, "step": 14050 }, { "epoch": 1.2147392975938485, "grad_norm": 0.5534453036111682, "learning_rate": 6.74501659701126e-06, "loss": 0.112, "step": 14060 }, { "epoch": 1.2156032657998186, "grad_norm": 0.5373879399272095, "learning_rate": 6.7406452120417846e-06, "loss": 0.1104, "step": 14070 }, { "epoch": 1.2164672340057887, "grad_norm": 0.5605673893423937, "learning_rate": 6.736272312584408e-06, "loss": 0.1146, "step": 14080 }, { "epoch": 1.2173312022117586, "grad_norm": 0.5282153572951253, "learning_rate": 6.7318979024438725e-06, "loss": 0.1108, "step": 14090 }, { "epoch": 1.2181951704177285, "grad_norm": 0.5738009147157338, "learning_rate": 6.7275219854262295e-06, "loss": 0.1101, "step": 14100 }, { "epoch": 1.2190591386236986, "grad_norm": 0.5537976410855348, "learning_rate": 6.7231445653388494e-06, "loss": 0.1117, "step": 14110 }, { "epoch": 1.2199231068296688, "grad_norm": 0.536439976576386, "learning_rate": 6.718765645990402e-06, "loss": 0.1126, "step": 14120 }, { "epoch": 1.2207870750356387, "grad_norm": 0.5614207764845589, "learning_rate": 6.7143852311908695e-06, "loss": 0.11, "step": 14130 }, { "epoch": 1.2216510432416088, "grad_norm": 0.5799281943109638, "learning_rate": 6.7100033247515265e-06, "loss": 0.1118, "step": 14140 }, { "epoch": 1.2225150114475787, "grad_norm": 0.5334465896125076, "learning_rate": 6.705619930484954e-06, "loss": 0.1137, "step": 14150 }, { "epoch": 1.2233789796535488, "grad_norm": 0.5418264694625892, "learning_rate": 6.701235052205023e-06, "loss": 0.1118, "step": 14160 }, { "epoch": 1.2242429478595187, "grad_norm": 0.5716467405913869, "learning_rate": 6.696848693726896e-06, "loss": 0.1137, "step": 14170 }, { "epoch": 1.2251069160654888, "grad_norm": 0.5600841783319294, "learning_rate": 6.692460858867025e-06, "loss": 0.1117, "step": 14180 }, { "epoch": 1.2259708842714587, "grad_norm": 0.5430963957009048, "learning_rate": 6.6880715514431424e-06, "loss": 0.11, "step": 14190 }, { "epoch": 1.2268348524774288, "grad_norm": 0.610965208613663, "learning_rate": 6.683680775274267e-06, "loss": 0.1161, "step": 14200 }, { "epoch": 1.2276988206833988, "grad_norm": 0.5428600711535735, "learning_rate": 6.679288534180692e-06, "loss": 0.114, "step": 14210 }, { "epoch": 1.2285627888893689, "grad_norm": 0.5257402970258083, "learning_rate": 6.67489483198399e-06, "loss": 0.1127, "step": 14220 }, { "epoch": 1.229426757095339, "grad_norm": 0.5695987153021643, "learning_rate": 6.670499672506996e-06, "loss": 0.109, "step": 14230 }, { "epoch": 1.230290725301309, "grad_norm": 0.5421509104065498, "learning_rate": 6.666103059573823e-06, "loss": 0.1144, "step": 14240 }, { "epoch": 1.231154693507279, "grad_norm": 0.5700089455833822, "learning_rate": 6.661704997009841e-06, "loss": 0.1096, "step": 14250 }, { "epoch": 1.232018661713249, "grad_norm": 0.5770791849329917, "learning_rate": 6.657305488641687e-06, "loss": 0.1124, "step": 14260 }, { "epoch": 1.232882629919219, "grad_norm": 0.5585275622314702, "learning_rate": 6.65290453829725e-06, "loss": 0.1153, "step": 14270 }, { "epoch": 1.233746598125189, "grad_norm": 0.5494171663494277, "learning_rate": 6.648502149805679e-06, "loss": 0.1166, "step": 14280 }, { "epoch": 1.234610566331159, "grad_norm": 0.5765287928053029, "learning_rate": 6.6440983269973725e-06, "loss": 0.1122, "step": 14290 }, { "epoch": 1.235474534537129, "grad_norm": 0.5763938405118092, "learning_rate": 6.639693073703974e-06, "loss": 0.1076, "step": 14300 }, { "epoch": 1.236338502743099, "grad_norm": 0.5494980151341401, "learning_rate": 6.635286393758376e-06, "loss": 0.1143, "step": 14310 }, { "epoch": 1.237202470949069, "grad_norm": 0.5680501144954855, "learning_rate": 6.63087829099471e-06, "loss": 0.1137, "step": 14320 }, { "epoch": 1.2380664391550391, "grad_norm": 0.5452528726216044, "learning_rate": 6.6264687692483455e-06, "loss": 0.111, "step": 14330 }, { "epoch": 1.2389304073610092, "grad_norm": 0.575161065532565, "learning_rate": 6.6220578323558885e-06, "loss": 0.1155, "step": 14340 }, { "epoch": 1.2397943755669791, "grad_norm": 0.5825666496800802, "learning_rate": 6.617645484155173e-06, "loss": 0.1138, "step": 14350 }, { "epoch": 1.240658343772949, "grad_norm": 0.5573343725422848, "learning_rate": 6.6132317284852656e-06, "loss": 0.1115, "step": 14360 }, { "epoch": 1.2415223119789192, "grad_norm": 0.5976405807104025, "learning_rate": 6.60881656918645e-06, "loss": 0.1119, "step": 14370 }, { "epoch": 1.2423862801848893, "grad_norm": 0.5773790829133977, "learning_rate": 6.60440001010024e-06, "loss": 0.1109, "step": 14380 }, { "epoch": 1.2432502483908592, "grad_norm": 0.5596129375427722, "learning_rate": 6.599982055069363e-06, "loss": 0.1133, "step": 14390 }, { "epoch": 1.2441142165968293, "grad_norm": 0.5986154505530558, "learning_rate": 6.595562707937759e-06, "loss": 0.1112, "step": 14400 }, { "epoch": 1.2449781848027992, "grad_norm": 0.5850475888093254, "learning_rate": 6.591141972550581e-06, "loss": 0.1122, "step": 14410 }, { "epoch": 1.2458421530087693, "grad_norm": 0.5438809542880516, "learning_rate": 6.5867198527541935e-06, "loss": 0.1124, "step": 14420 }, { "epoch": 1.2467061212147392, "grad_norm": 0.5854222234007489, "learning_rate": 6.582296352396158e-06, "loss": 0.1145, "step": 14430 }, { "epoch": 1.2475700894207093, "grad_norm": 0.5647614923048787, "learning_rate": 6.577871475325245e-06, "loss": 0.1131, "step": 14440 }, { "epoch": 1.2484340576266792, "grad_norm": 0.5446916876914634, "learning_rate": 6.573445225391417e-06, "loss": 0.1128, "step": 14450 }, { "epoch": 1.2492980258326494, "grad_norm": 0.556275506719302, "learning_rate": 6.569017606445836e-06, "loss": 0.1104, "step": 14460 }, { "epoch": 1.2501619940386193, "grad_norm": 0.5439613445971405, "learning_rate": 6.564588622340848e-06, "loss": 0.1085, "step": 14470 }, { "epoch": 1.2510259622445894, "grad_norm": 0.5628308747399765, "learning_rate": 6.560158276929996e-06, "loss": 0.119, "step": 14480 }, { "epoch": 1.2518899304505595, "grad_norm": 0.561991898239134, "learning_rate": 6.555726574068e-06, "loss": 0.1146, "step": 14490 }, { "epoch": 1.2527538986565294, "grad_norm": 0.5751250413019706, "learning_rate": 6.551293517610764e-06, "loss": 0.1085, "step": 14500 }, { "epoch": 1.2536178668624995, "grad_norm": 0.5640021015599359, "learning_rate": 6.546859111415371e-06, "loss": 0.1102, "step": 14510 }, { "epoch": 1.2544818350684694, "grad_norm": 0.5635763265614476, "learning_rate": 6.5424233593400735e-06, "loss": 0.1145, "step": 14520 }, { "epoch": 1.2553458032744396, "grad_norm": 0.591286004727335, "learning_rate": 6.537986265244302e-06, "loss": 0.111, "step": 14530 }, { "epoch": 1.2562097714804095, "grad_norm": 0.5522099040559655, "learning_rate": 6.533547832988647e-06, "loss": 0.1111, "step": 14540 }, { "epoch": 1.2570737396863796, "grad_norm": 0.5470523497296542, "learning_rate": 6.529108066434872e-06, "loss": 0.1114, "step": 14550 }, { "epoch": 1.2579377078923495, "grad_norm": 0.568333276620229, "learning_rate": 6.524666969445892e-06, "loss": 0.1119, "step": 14560 }, { "epoch": 1.2588016760983196, "grad_norm": 0.5850616288956318, "learning_rate": 6.520224545885789e-06, "loss": 0.1096, "step": 14570 }, { "epoch": 1.2596656443042895, "grad_norm": 0.5773097418715036, "learning_rate": 6.515780799619791e-06, "loss": 0.1135, "step": 14580 }, { "epoch": 1.2605296125102596, "grad_norm": 0.5388132754358779, "learning_rate": 6.511335734514283e-06, "loss": 0.1119, "step": 14590 }, { "epoch": 1.2613935807162298, "grad_norm": 0.5577823715228171, "learning_rate": 6.506889354436792e-06, "loss": 0.1176, "step": 14600 }, { "epoch": 1.2622575489221997, "grad_norm": 0.5339935975649758, "learning_rate": 6.5024416632559935e-06, "loss": 0.107, "step": 14610 }, { "epoch": 1.2631215171281696, "grad_norm": 0.5732228640887302, "learning_rate": 6.4979926648417025e-06, "loss": 0.1124, "step": 14620 }, { "epoch": 1.2639854853341397, "grad_norm": 0.5903353302881912, "learning_rate": 6.493542363064871e-06, "loss": 0.1145, "step": 14630 }, { "epoch": 1.2648494535401098, "grad_norm": 0.5470991075480361, "learning_rate": 6.489090761797583e-06, "loss": 0.108, "step": 14640 }, { "epoch": 1.2657134217460797, "grad_norm": 0.5625592769987781, "learning_rate": 6.484637864913059e-06, "loss": 0.1114, "step": 14650 }, { "epoch": 1.2665773899520498, "grad_norm": 0.5689791373198402, "learning_rate": 6.4801836762856406e-06, "loss": 0.1133, "step": 14660 }, { "epoch": 1.2674413581580197, "grad_norm": 0.5444276374757011, "learning_rate": 6.475728199790796e-06, "loss": 0.1094, "step": 14670 }, { "epoch": 1.2683053263639898, "grad_norm": 0.5667653808009618, "learning_rate": 6.471271439305115e-06, "loss": 0.112, "step": 14680 }, { "epoch": 1.2691692945699597, "grad_norm": 0.5344168071211037, "learning_rate": 6.466813398706302e-06, "loss": 0.1128, "step": 14690 }, { "epoch": 1.2700332627759299, "grad_norm": 0.5510709964262109, "learning_rate": 6.462354081873177e-06, "loss": 0.1101, "step": 14700 }, { "epoch": 1.2708972309819, "grad_norm": 0.5653351061563336, "learning_rate": 6.457893492685671e-06, "loss": 0.1126, "step": 14710 }, { "epoch": 1.27176119918787, "grad_norm": 0.5565782210839264, "learning_rate": 6.453431635024817e-06, "loss": 0.111, "step": 14720 }, { "epoch": 1.2726251673938398, "grad_norm": 0.5547676333122443, "learning_rate": 6.4489685127727606e-06, "loss": 0.1086, "step": 14730 }, { "epoch": 1.27348913559981, "grad_norm": 0.544001901072926, "learning_rate": 6.44450412981274e-06, "loss": 0.1111, "step": 14740 }, { "epoch": 1.27435310380578, "grad_norm": 0.5267941742084936, "learning_rate": 6.440038490029092e-06, "loss": 0.112, "step": 14750 }, { "epoch": 1.27521707201175, "grad_norm": 0.5453459720243373, "learning_rate": 6.435571597307251e-06, "loss": 0.1119, "step": 14760 }, { "epoch": 1.27608104021772, "grad_norm": 0.5647611308613276, "learning_rate": 6.431103455533735e-06, "loss": 0.1155, "step": 14770 }, { "epoch": 1.27694500842369, "grad_norm": 0.5702592798618671, "learning_rate": 6.426634068596154e-06, "loss": 0.1077, "step": 14780 }, { "epoch": 1.27780897662966, "grad_norm": 0.6036801878221308, "learning_rate": 6.422163440383202e-06, "loss": 0.1168, "step": 14790 }, { "epoch": 1.27867294483563, "grad_norm": 0.5962193986965203, "learning_rate": 6.417691574784647e-06, "loss": 0.1155, "step": 14800 }, { "epoch": 1.2795369130416, "grad_norm": 0.593769084774705, "learning_rate": 6.413218475691337e-06, "loss": 0.1136, "step": 14810 }, { "epoch": 1.28040088124757, "grad_norm": 0.5378674069612178, "learning_rate": 6.408744146995197e-06, "loss": 0.1127, "step": 14820 }, { "epoch": 1.2812648494535401, "grad_norm": 0.552753211413115, "learning_rate": 6.404268592589214e-06, "loss": 0.1095, "step": 14830 }, { "epoch": 1.28212881765951, "grad_norm": 0.5624083853062832, "learning_rate": 6.39979181636745e-06, "loss": 0.1084, "step": 14840 }, { "epoch": 1.2829927858654802, "grad_norm": 0.5601341611146992, "learning_rate": 6.395313822225022e-06, "loss": 0.1125, "step": 14850 }, { "epoch": 1.2838567540714503, "grad_norm": 0.5172364101610524, "learning_rate": 6.390834614058114e-06, "loss": 0.1114, "step": 14860 }, { "epoch": 1.2847207222774202, "grad_norm": 0.5229962139350934, "learning_rate": 6.38635419576396e-06, "loss": 0.1132, "step": 14870 }, { "epoch": 1.28558469048339, "grad_norm": 0.5756669235276612, "learning_rate": 6.381872571240852e-06, "loss": 0.1147, "step": 14880 }, { "epoch": 1.2864486586893602, "grad_norm": 0.5563681551307071, "learning_rate": 6.377389744388127e-06, "loss": 0.1095, "step": 14890 }, { "epoch": 1.2873126268953303, "grad_norm": 0.5854403852531498, "learning_rate": 6.372905719106172e-06, "loss": 0.1128, "step": 14900 }, { "epoch": 1.2881765951013002, "grad_norm": 0.6036186605098479, "learning_rate": 6.368420499296413e-06, "loss": 0.1126, "step": 14910 }, { "epoch": 1.2890405633072703, "grad_norm": 0.593748655103996, "learning_rate": 6.36393408886132e-06, "loss": 0.1148, "step": 14920 }, { "epoch": 1.2899045315132402, "grad_norm": 0.5552533246969583, "learning_rate": 6.359446491704394e-06, "loss": 0.11, "step": 14930 }, { "epoch": 1.2907684997192104, "grad_norm": 0.5879665603388455, "learning_rate": 6.3549577117301735e-06, "loss": 0.1121, "step": 14940 }, { "epoch": 1.2916324679251803, "grad_norm": 0.553609513620935, "learning_rate": 6.35046775284422e-06, "loss": 0.1108, "step": 14950 }, { "epoch": 1.2924964361311504, "grad_norm": 0.5804539813996858, "learning_rate": 6.345976618953127e-06, "loss": 0.1125, "step": 14960 }, { "epoch": 1.2933604043371205, "grad_norm": 0.5404050350479398, "learning_rate": 6.341484313964506e-06, "loss": 0.108, "step": 14970 }, { "epoch": 1.2942243725430904, "grad_norm": 0.5717453327587577, "learning_rate": 6.33699084178699e-06, "loss": 0.1132, "step": 14980 }, { "epoch": 1.2950883407490603, "grad_norm": 0.5571547661988168, "learning_rate": 6.332496206330227e-06, "loss": 0.1118, "step": 14990 }, { "epoch": 1.2959523089550304, "grad_norm": 0.5894118107294782, "learning_rate": 6.328000411504876e-06, "loss": 0.1119, "step": 15000 }, { "epoch": 1.2968162771610006, "grad_norm": 0.6271319736229325, "learning_rate": 6.323503461222602e-06, "loss": 0.1093, "step": 15010 }, { "epoch": 1.2976802453669705, "grad_norm": 0.5714163744986458, "learning_rate": 6.319005359396084e-06, "loss": 0.1163, "step": 15020 }, { "epoch": 1.2985442135729406, "grad_norm": 0.570485713384322, "learning_rate": 6.314506109938996e-06, "loss": 0.1133, "step": 15030 }, { "epoch": 1.2994081817789105, "grad_norm": 0.6031381548468219, "learning_rate": 6.31000571676601e-06, "loss": 0.1148, "step": 15040 }, { "epoch": 1.3002721499848806, "grad_norm": 0.5782702057393322, "learning_rate": 6.305504183792799e-06, "loss": 0.1112, "step": 15050 }, { "epoch": 1.3011361181908505, "grad_norm": 0.5618969513353681, "learning_rate": 6.30100151493602e-06, "loss": 0.1132, "step": 15060 }, { "epoch": 1.3020000863968206, "grad_norm": 0.5627948160701411, "learning_rate": 6.296497714113325e-06, "loss": 0.1113, "step": 15070 }, { "epoch": 1.3028640546027908, "grad_norm": 0.5534126614895483, "learning_rate": 6.291992785243348e-06, "loss": 0.112, "step": 15080 }, { "epoch": 1.3037280228087607, "grad_norm": 0.5912501317649836, "learning_rate": 6.287486732245705e-06, "loss": 0.112, "step": 15090 }, { "epoch": 1.3045919910147306, "grad_norm": 0.5600010807396356, "learning_rate": 6.282979559040988e-06, "loss": 0.1141, "step": 15100 }, { "epoch": 1.3054559592207007, "grad_norm": 0.5935733410748419, "learning_rate": 6.278471269550766e-06, "loss": 0.1139, "step": 15110 }, { "epoch": 1.3063199274266708, "grad_norm": 0.5600831456724733, "learning_rate": 6.2739618676975785e-06, "loss": 0.1116, "step": 15120 }, { "epoch": 1.3071838956326407, "grad_norm": 0.5599466998893358, "learning_rate": 6.2694513574049354e-06, "loss": 0.1102, "step": 15130 }, { "epoch": 1.3080478638386106, "grad_norm": 0.5559964600507826, "learning_rate": 6.264939742597304e-06, "loss": 0.1102, "step": 15140 }, { "epoch": 1.3089118320445807, "grad_norm": 0.5786344058859312, "learning_rate": 6.2604270272001235e-06, "loss": 0.1131, "step": 15150 }, { "epoch": 1.3097758002505508, "grad_norm": 0.5657515488388618, "learning_rate": 6.255913215139778e-06, "loss": 0.113, "step": 15160 }, { "epoch": 1.3106397684565207, "grad_norm": 0.5863407258722373, "learning_rate": 6.251398310343617e-06, "loss": 0.1118, "step": 15170 }, { "epoch": 1.3115037366624909, "grad_norm": 0.5442709891236638, "learning_rate": 6.246882316739932e-06, "loss": 0.1129, "step": 15180 }, { "epoch": 1.3123677048684608, "grad_norm": 0.6002637189048005, "learning_rate": 6.242365238257969e-06, "loss": 0.1137, "step": 15190 }, { "epoch": 1.313231673074431, "grad_norm": 0.5842893880761071, "learning_rate": 6.237847078827914e-06, "loss": 0.1126, "step": 15200 }, { "epoch": 1.3140956412804008, "grad_norm": 0.5830066962552539, "learning_rate": 6.233327842380894e-06, "loss": 0.1161, "step": 15210 }, { "epoch": 1.314959609486371, "grad_norm": 0.5373321121823259, "learning_rate": 6.228807532848973e-06, "loss": 0.111, "step": 15220 }, { "epoch": 1.315823577692341, "grad_norm": 0.6108127472256695, "learning_rate": 6.224286154165148e-06, "loss": 0.1101, "step": 15230 }, { "epoch": 1.316687545898311, "grad_norm": 0.5696760440842511, "learning_rate": 6.219763710263349e-06, "loss": 0.1081, "step": 15240 }, { "epoch": 1.3175515141042808, "grad_norm": 0.5489297133762001, "learning_rate": 6.21524020507843e-06, "loss": 0.1118, "step": 15250 }, { "epoch": 1.318415482310251, "grad_norm": 0.5613132623439822, "learning_rate": 6.21071564254617e-06, "loss": 0.1143, "step": 15260 }, { "epoch": 1.319279450516221, "grad_norm": 0.5526160450209798, "learning_rate": 6.206190026603267e-06, "loss": 0.1126, "step": 15270 }, { "epoch": 1.320143418722191, "grad_norm": 0.5769836694482383, "learning_rate": 6.201663361187336e-06, "loss": 0.1087, "step": 15280 }, { "epoch": 1.321007386928161, "grad_norm": 0.55598238886065, "learning_rate": 6.1971356502369065e-06, "loss": 0.1131, "step": 15290 }, { "epoch": 1.321871355134131, "grad_norm": 0.5735365686216224, "learning_rate": 6.192606897691414e-06, "loss": 0.1126, "step": 15300 }, { "epoch": 1.3227353233401011, "grad_norm": 0.5783112362966369, "learning_rate": 6.188077107491204e-06, "loss": 0.1112, "step": 15310 }, { "epoch": 1.323599291546071, "grad_norm": 0.5683067930605199, "learning_rate": 6.183546283577523e-06, "loss": 0.1121, "step": 15320 }, { "epoch": 1.3244632597520412, "grad_norm": 0.5464109155323802, "learning_rate": 6.1790144298925146e-06, "loss": 0.1137, "step": 15330 }, { "epoch": 1.3253272279580113, "grad_norm": 0.5585746473787109, "learning_rate": 6.174481550379225e-06, "loss": 0.1134, "step": 15340 }, { "epoch": 1.3261911961639812, "grad_norm": 0.5545762428074108, "learning_rate": 6.1699476489815876e-06, "loss": 0.1149, "step": 15350 }, { "epoch": 1.327055164369951, "grad_norm": 0.5748347832493429, "learning_rate": 6.165412729644426e-06, "loss": 0.109, "step": 15360 }, { "epoch": 1.3279191325759212, "grad_norm": 0.5842776295530736, "learning_rate": 6.1608767963134495e-06, "loss": 0.1072, "step": 15370 }, { "epoch": 1.3287831007818913, "grad_norm": 0.5575191047466767, "learning_rate": 6.156339852935251e-06, "loss": 0.1117, "step": 15380 }, { "epoch": 1.3296470689878612, "grad_norm": 0.5450731649894345, "learning_rate": 6.1518019034572995e-06, "loss": 0.1144, "step": 15390 }, { "epoch": 1.3305110371938313, "grad_norm": 0.5624842087427264, "learning_rate": 6.147262951827943e-06, "loss": 0.1107, "step": 15400 }, { "epoch": 1.3313750053998012, "grad_norm": 0.566230353504596, "learning_rate": 6.142723001996398e-06, "loss": 0.1078, "step": 15410 }, { "epoch": 1.3322389736057714, "grad_norm": 0.5596105650561354, "learning_rate": 6.138182057912751e-06, "loss": 0.1114, "step": 15420 }, { "epoch": 1.3331029418117413, "grad_norm": 0.5631168310453766, "learning_rate": 6.133640123527953e-06, "loss": 0.1094, "step": 15430 }, { "epoch": 1.3339669100177114, "grad_norm": 0.5757668723185331, "learning_rate": 6.12909720279382e-06, "loss": 0.1133, "step": 15440 }, { "epoch": 1.3348308782236813, "grad_norm": 0.5656227206741703, "learning_rate": 6.12455329966302e-06, "loss": 0.1102, "step": 15450 }, { "epoch": 1.3356948464296514, "grad_norm": 0.5675203492355643, "learning_rate": 6.120008418089082e-06, "loss": 0.1108, "step": 15460 }, { "epoch": 1.3365588146356213, "grad_norm": 0.5609540526824188, "learning_rate": 6.11546256202638e-06, "loss": 0.1083, "step": 15470 }, { "epoch": 1.3374227828415914, "grad_norm": 0.5453368748675613, "learning_rate": 6.110915735430142e-06, "loss": 0.1068, "step": 15480 }, { "epoch": 1.3382867510475616, "grad_norm": 0.5783208678504869, "learning_rate": 6.106367942256437e-06, "loss": 0.114, "step": 15490 }, { "epoch": 1.3391507192535315, "grad_norm": 0.5772617722487127, "learning_rate": 6.1018191864621764e-06, "loss": 0.1103, "step": 15500 }, { "epoch": 1.3400146874595014, "grad_norm": 0.5726131201792536, "learning_rate": 6.097269472005107e-06, "loss": 0.1108, "step": 15510 }, { "epoch": 1.3408786556654715, "grad_norm": 0.5397368294651602, "learning_rate": 6.092718802843814e-06, "loss": 0.1106, "step": 15520 }, { "epoch": 1.3417426238714416, "grad_norm": 0.5591733492489677, "learning_rate": 6.088167182937706e-06, "loss": 0.1129, "step": 15530 }, { "epoch": 1.3426065920774115, "grad_norm": 0.5561490349875766, "learning_rate": 6.083614616247028e-06, "loss": 0.112, "step": 15540 }, { "epoch": 1.3434705602833816, "grad_norm": 0.5819373721015771, "learning_rate": 6.0790611067328395e-06, "loss": 0.1072, "step": 15550 }, { "epoch": 1.3443345284893515, "grad_norm": 0.5359474980622055, "learning_rate": 6.074506658357029e-06, "loss": 0.1109, "step": 15560 }, { "epoch": 1.3451984966953217, "grad_norm": 0.5720230417354536, "learning_rate": 6.069951275082295e-06, "loss": 0.1078, "step": 15570 }, { "epoch": 1.3460624649012916, "grad_norm": 0.5432165606654724, "learning_rate": 6.065394960872154e-06, "loss": 0.1078, "step": 15580 }, { "epoch": 1.3469264331072617, "grad_norm": 0.5956848004967984, "learning_rate": 6.06083771969093e-06, "loss": 0.1087, "step": 15590 }, { "epoch": 1.3477904013132318, "grad_norm": 0.5831628365743788, "learning_rate": 6.056279555503752e-06, "loss": 0.1075, "step": 15600 }, { "epoch": 1.3486543695192017, "grad_norm": 0.5360439964712416, "learning_rate": 6.0517204722765585e-06, "loss": 0.1083, "step": 15610 }, { "epoch": 1.3495183377251716, "grad_norm": 0.5608239905295521, "learning_rate": 6.047160473976081e-06, "loss": 0.1145, "step": 15620 }, { "epoch": 1.3503823059311417, "grad_norm": 0.5681136455973719, "learning_rate": 6.04259956456985e-06, "loss": 0.1091, "step": 15630 }, { "epoch": 1.3512462741371118, "grad_norm": 0.5358581606501858, "learning_rate": 6.038037748026187e-06, "loss": 0.1095, "step": 15640 }, { "epoch": 1.3521102423430817, "grad_norm": 0.5761867356055901, "learning_rate": 6.0334750283142056e-06, "loss": 0.1113, "step": 15650 }, { "epoch": 1.3529742105490519, "grad_norm": 0.5717376371598626, "learning_rate": 6.028911409403801e-06, "loss": 0.1122, "step": 15660 }, { "epoch": 1.3538381787550218, "grad_norm": 0.5571214583689651, "learning_rate": 6.024346895265659e-06, "loss": 0.1127, "step": 15670 }, { "epoch": 1.354702146960992, "grad_norm": 0.5875745499147877, "learning_rate": 6.019781489871235e-06, "loss": 0.1083, "step": 15680 }, { "epoch": 1.3555661151669618, "grad_norm": 0.5546175269594011, "learning_rate": 6.015215197192763e-06, "loss": 0.1079, "step": 15690 }, { "epoch": 1.356430083372932, "grad_norm": 0.5450808281935141, "learning_rate": 6.010648021203252e-06, "loss": 0.1112, "step": 15700 }, { "epoch": 1.3572940515789018, "grad_norm": 0.549235755180578, "learning_rate": 6.006079965876476e-06, "loss": 0.1085, "step": 15710 }, { "epoch": 1.358158019784872, "grad_norm": 0.5623635978497271, "learning_rate": 6.001511035186975e-06, "loss": 0.1078, "step": 15720 }, { "epoch": 1.3590219879908418, "grad_norm": 0.5484310920570102, "learning_rate": 5.996941233110052e-06, "loss": 0.1092, "step": 15730 }, { "epoch": 1.359885956196812, "grad_norm": 0.5154476200776394, "learning_rate": 5.992370563621766e-06, "loss": 0.1085, "step": 15740 }, { "epoch": 1.360749924402782, "grad_norm": 0.5553387563055833, "learning_rate": 5.987799030698932e-06, "loss": 0.112, "step": 15750 }, { "epoch": 1.361613892608752, "grad_norm": 0.5580242137209885, "learning_rate": 5.9832266383191154e-06, "loss": 0.1118, "step": 15760 }, { "epoch": 1.3624778608147219, "grad_norm": 0.5283878918575788, "learning_rate": 5.978653390460632e-06, "loss": 0.1087, "step": 15770 }, { "epoch": 1.363341829020692, "grad_norm": 0.5785508559189512, "learning_rate": 5.974079291102538e-06, "loss": 0.1108, "step": 15780 }, { "epoch": 1.3642057972266621, "grad_norm": 0.55241795550289, "learning_rate": 5.969504344224635e-06, "loss": 0.1118, "step": 15790 }, { "epoch": 1.365069765432632, "grad_norm": 0.5351889601182599, "learning_rate": 5.964928553807455e-06, "loss": 0.1095, "step": 15800 }, { "epoch": 1.3659337336386022, "grad_norm": 0.5964305804829771, "learning_rate": 5.960351923832274e-06, "loss": 0.1085, "step": 15810 }, { "epoch": 1.366797701844572, "grad_norm": 0.5659343518769593, "learning_rate": 5.955774458281088e-06, "loss": 0.1119, "step": 15820 }, { "epoch": 1.3676616700505422, "grad_norm": 0.5387151801430248, "learning_rate": 5.951196161136629e-06, "loss": 0.1104, "step": 15830 }, { "epoch": 1.368525638256512, "grad_norm": 1.5548648788419452, "learning_rate": 5.946617036382344e-06, "loss": 0.1102, "step": 15840 }, { "epoch": 1.3693896064624822, "grad_norm": 0.5558801260199372, "learning_rate": 5.942037088002408e-06, "loss": 0.111, "step": 15850 }, { "epoch": 1.3702535746684523, "grad_norm": 0.575072919526527, "learning_rate": 5.937456319981706e-06, "loss": 0.1137, "step": 15860 }, { "epoch": 1.3711175428744222, "grad_norm": 0.5530395900840259, "learning_rate": 5.932874736305843e-06, "loss": 0.1101, "step": 15870 }, { "epoch": 1.3719815110803921, "grad_norm": 0.5983896943740621, "learning_rate": 5.9282923409611285e-06, "loss": 0.1075, "step": 15880 }, { "epoch": 1.3728454792863622, "grad_norm": 0.5768004665520459, "learning_rate": 5.9237091379345765e-06, "loss": 0.1107, "step": 15890 }, { "epoch": 1.3737094474923324, "grad_norm": 0.569097515223848, "learning_rate": 5.919125131213912e-06, "loss": 0.1089, "step": 15900 }, { "epoch": 1.3745734156983023, "grad_norm": 0.5312624276461287, "learning_rate": 5.91454032478755e-06, "loss": 0.11, "step": 15910 }, { "epoch": 1.3754373839042724, "grad_norm": 0.5328951454517763, "learning_rate": 5.909954722644608e-06, "loss": 0.1088, "step": 15920 }, { "epoch": 1.3763013521102423, "grad_norm": 0.5722786730159535, "learning_rate": 5.905368328774893e-06, "loss": 0.1071, "step": 15930 }, { "epoch": 1.3771653203162124, "grad_norm": 0.582982357808919, "learning_rate": 5.9007811471689e-06, "loss": 0.1107, "step": 15940 }, { "epoch": 1.3780292885221823, "grad_norm": 0.5409670522081398, "learning_rate": 5.896193181817811e-06, "loss": 0.1123, "step": 15950 }, { "epoch": 1.3788932567281524, "grad_norm": 0.5525488935001114, "learning_rate": 5.891604436713491e-06, "loss": 0.1101, "step": 15960 }, { "epoch": 1.3797572249341226, "grad_norm": 0.5840786377822249, "learning_rate": 5.887014915848478e-06, "loss": 0.1097, "step": 15970 }, { "epoch": 1.3806211931400925, "grad_norm": 0.5384412553004606, "learning_rate": 5.882424623215993e-06, "loss": 0.1088, "step": 15980 }, { "epoch": 1.3814851613460624, "grad_norm": 0.5833292422162293, "learning_rate": 5.877833562809922e-06, "loss": 0.1123, "step": 15990 }, { "epoch": 1.3823491295520325, "grad_norm": 0.5517932567301533, "learning_rate": 5.873241738624824e-06, "loss": 0.1057, "step": 16000 }, { "epoch": 1.3832130977580026, "grad_norm": 0.5756476697861506, "learning_rate": 5.8686491546559154e-06, "loss": 0.1086, "step": 16010 }, { "epoch": 1.3840770659639725, "grad_norm": 0.5785581813009524, "learning_rate": 5.8640558148990835e-06, "loss": 0.1115, "step": 16020 }, { "epoch": 1.3849410341699424, "grad_norm": 0.5943858429441137, "learning_rate": 5.8594617233508645e-06, "loss": 0.1092, "step": 16030 }, { "epoch": 1.3858050023759125, "grad_norm": 0.5745575971627046, "learning_rate": 5.854866884008452e-06, "loss": 0.1102, "step": 16040 }, { "epoch": 1.3866689705818827, "grad_norm": 0.5348702159362239, "learning_rate": 5.850271300869691e-06, "loss": 0.1096, "step": 16050 }, { "epoch": 1.3875329387878526, "grad_norm": 0.5694557334714356, "learning_rate": 5.845674977933074e-06, "loss": 0.1085, "step": 16060 }, { "epoch": 1.3883969069938227, "grad_norm": 0.5693489028372023, "learning_rate": 5.841077919197734e-06, "loss": 0.1086, "step": 16070 }, { "epoch": 1.3892608751997926, "grad_norm": 0.5593707766156559, "learning_rate": 5.8364801286634486e-06, "loss": 0.1095, "step": 16080 }, { "epoch": 1.3901248434057627, "grad_norm": 0.6124636333798967, "learning_rate": 5.831881610330628e-06, "loss": 0.1166, "step": 16090 }, { "epoch": 1.3909888116117326, "grad_norm": 0.5846656666878277, "learning_rate": 5.8272823682003196e-06, "loss": 0.1111, "step": 16100 }, { "epoch": 1.3918527798177027, "grad_norm": 0.5720609742683443, "learning_rate": 5.822682406274197e-06, "loss": 0.1097, "step": 16110 }, { "epoch": 1.3927167480236728, "grad_norm": 0.5513132732611473, "learning_rate": 5.818081728554562e-06, "loss": 0.1106, "step": 16120 }, { "epoch": 1.3935807162296427, "grad_norm": 0.5149649775459123, "learning_rate": 5.81348033904434e-06, "loss": 0.1114, "step": 16130 }, { "epoch": 1.3944446844356126, "grad_norm": 0.5869210395463293, "learning_rate": 5.808878241747071e-06, "loss": 0.1049, "step": 16140 }, { "epoch": 1.3953086526415828, "grad_norm": 0.5625464154152859, "learning_rate": 5.804275440666918e-06, "loss": 0.1097, "step": 16150 }, { "epoch": 1.396172620847553, "grad_norm": 0.5813811467929083, "learning_rate": 5.7996719398086485e-06, "loss": 0.1119, "step": 16160 }, { "epoch": 1.3970365890535228, "grad_norm": 0.5725158526835552, "learning_rate": 5.795067743177648e-06, "loss": 0.1099, "step": 16170 }, { "epoch": 1.397900557259493, "grad_norm": 0.5720353741750168, "learning_rate": 5.790462854779898e-06, "loss": 0.1083, "step": 16180 }, { "epoch": 1.3987645254654628, "grad_norm": 0.5757257456663112, "learning_rate": 5.785857278621989e-06, "loss": 0.1074, "step": 16190 }, { "epoch": 1.399628493671433, "grad_norm": 0.5751688492722273, "learning_rate": 5.7812510187111045e-06, "loss": 0.111, "step": 16200 }, { "epoch": 1.4004924618774028, "grad_norm": 0.5308149515056841, "learning_rate": 5.776644079055029e-06, "loss": 0.1086, "step": 16210 }, { "epoch": 1.401356430083373, "grad_norm": 0.5457555860479729, "learning_rate": 5.772036463662133e-06, "loss": 0.1095, "step": 16220 }, { "epoch": 1.402220398289343, "grad_norm": 0.5525027323997239, "learning_rate": 5.7674281765413775e-06, "loss": 0.1098, "step": 16230 }, { "epoch": 1.403084366495313, "grad_norm": 0.5552174838299228, "learning_rate": 5.7628192217023075e-06, "loss": 0.109, "step": 16240 }, { "epoch": 1.4039483347012829, "grad_norm": 0.6052659498732853, "learning_rate": 5.758209603155047e-06, "loss": 0.1072, "step": 16250 }, { "epoch": 1.404812302907253, "grad_norm": 0.5614718168621989, "learning_rate": 5.7535993249103e-06, "loss": 0.1106, "step": 16260 }, { "epoch": 1.4056762711132231, "grad_norm": 0.5536105288949624, "learning_rate": 5.748988390979346e-06, "loss": 0.11, "step": 16270 }, { "epoch": 1.406540239319193, "grad_norm": 0.5611334021841203, "learning_rate": 5.74437680537403e-06, "loss": 0.1101, "step": 16280 }, { "epoch": 1.4074042075251632, "grad_norm": 0.5573869486007782, "learning_rate": 5.73976457210677e-06, "loss": 0.1105, "step": 16290 }, { "epoch": 1.408268175731133, "grad_norm": 0.5516200215230308, "learning_rate": 5.73515169519054e-06, "loss": 0.1082, "step": 16300 }, { "epoch": 1.4091321439371032, "grad_norm": 0.5526404729155173, "learning_rate": 5.730538178638881e-06, "loss": 0.1101, "step": 16310 }, { "epoch": 1.409996112143073, "grad_norm": 0.5719197743944662, "learning_rate": 5.725924026465888e-06, "loss": 0.1112, "step": 16320 }, { "epoch": 1.4108600803490432, "grad_norm": 0.5694236895947357, "learning_rate": 5.721309242686209e-06, "loss": 0.1093, "step": 16330 }, { "epoch": 1.411724048555013, "grad_norm": 0.6141799055072968, "learning_rate": 5.716693831315041e-06, "loss": 0.1126, "step": 16340 }, { "epoch": 1.4125880167609832, "grad_norm": 0.5781600972939118, "learning_rate": 5.7120777963681276e-06, "loss": 0.1133, "step": 16350 }, { "epoch": 1.4134519849669531, "grad_norm": 0.5489573576838745, "learning_rate": 5.707461141861753e-06, "loss": 0.1085, "step": 16360 }, { "epoch": 1.4143159531729232, "grad_norm": 0.5511980848938117, "learning_rate": 5.702843871812745e-06, "loss": 0.1093, "step": 16370 }, { "epoch": 1.4151799213788934, "grad_norm": 0.5694496019360336, "learning_rate": 5.698225990238463e-06, "loss": 0.1069, "step": 16380 }, { "epoch": 1.4160438895848633, "grad_norm": 0.587017896333731, "learning_rate": 5.6936075011568e-06, "loss": 0.1109, "step": 16390 }, { "epoch": 1.4169078577908332, "grad_norm": 0.568063496878268, "learning_rate": 5.688988408586177e-06, "loss": 0.1093, "step": 16400 }, { "epoch": 1.4177718259968033, "grad_norm": 0.5679995822925583, "learning_rate": 5.684368716545541e-06, "loss": 0.106, "step": 16410 }, { "epoch": 1.4186357942027734, "grad_norm": 0.5734866434303718, "learning_rate": 5.6797484290543594e-06, "loss": 0.1119, "step": 16420 }, { "epoch": 1.4194997624087433, "grad_norm": 0.5431629848080799, "learning_rate": 5.6751275501326185e-06, "loss": 0.11, "step": 16430 }, { "epoch": 1.4203637306147134, "grad_norm": 0.5720416624760498, "learning_rate": 5.67050608380082e-06, "loss": 0.1087, "step": 16440 }, { "epoch": 1.4212276988206833, "grad_norm": 0.580302420152164, "learning_rate": 5.665884034079974e-06, "loss": 0.1087, "step": 16450 }, { "epoch": 1.4220916670266535, "grad_norm": 0.5840176383870129, "learning_rate": 5.6612614049916e-06, "loss": 0.1068, "step": 16460 }, { "epoch": 1.4229556352326234, "grad_norm": 0.5619479511353946, "learning_rate": 5.656638200557723e-06, "loss": 0.1064, "step": 16470 }, { "epoch": 1.4238196034385935, "grad_norm": 0.563149143445443, "learning_rate": 5.652014424800865e-06, "loss": 0.112, "step": 16480 }, { "epoch": 1.4246835716445636, "grad_norm": 0.554179718385933, "learning_rate": 5.647390081744047e-06, "loss": 0.1118, "step": 16490 }, { "epoch": 1.4255475398505335, "grad_norm": 0.5790448425729009, "learning_rate": 5.6427651754107856e-06, "loss": 0.113, "step": 16500 }, { "epoch": 1.4264115080565034, "grad_norm": 0.5413167122533629, "learning_rate": 5.638139709825085e-06, "loss": 0.1105, "step": 16510 }, { "epoch": 1.4272754762624735, "grad_norm": 0.5403591817971234, "learning_rate": 5.633513689011436e-06, "loss": 0.1094, "step": 16520 }, { "epoch": 1.4281394444684437, "grad_norm": 0.5719167784121851, "learning_rate": 5.628887116994812e-06, "loss": 0.1069, "step": 16530 }, { "epoch": 1.4290034126744136, "grad_norm": 0.5571643381657813, "learning_rate": 5.624259997800671e-06, "loss": 0.1082, "step": 16540 }, { "epoch": 1.4298673808803837, "grad_norm": 0.5539391800128624, "learning_rate": 5.61963233545494e-06, "loss": 0.1076, "step": 16550 }, { "epoch": 1.4307313490863536, "grad_norm": 0.5737886682031937, "learning_rate": 5.615004133984022e-06, "loss": 0.1087, "step": 16560 }, { "epoch": 1.4315953172923237, "grad_norm": 0.5466801931023841, "learning_rate": 5.610375397414788e-06, "loss": 0.1101, "step": 16570 }, { "epoch": 1.4324592854982936, "grad_norm": 0.5797886336713219, "learning_rate": 5.605746129774577e-06, "loss": 0.1114, "step": 16580 }, { "epoch": 1.4333232537042637, "grad_norm": 0.5529740818600277, "learning_rate": 5.601116335091189e-06, "loss": 0.1074, "step": 16590 }, { "epoch": 1.4341872219102336, "grad_norm": 0.5715134987232192, "learning_rate": 5.59648601739288e-06, "loss": 0.1084, "step": 16600 }, { "epoch": 1.4350511901162037, "grad_norm": 0.5565474264744146, "learning_rate": 5.591855180708365e-06, "loss": 0.1082, "step": 16610 }, { "epoch": 1.4359151583221736, "grad_norm": 0.5771393921573549, "learning_rate": 5.587223829066807e-06, "loss": 0.1132, "step": 16620 }, { "epoch": 1.4367791265281438, "grad_norm": 0.5354337704232355, "learning_rate": 5.582591966497818e-06, "loss": 0.1077, "step": 16630 }, { "epoch": 1.437643094734114, "grad_norm": 0.5704092054091621, "learning_rate": 5.577959597031459e-06, "loss": 0.1084, "step": 16640 }, { "epoch": 1.4385070629400838, "grad_norm": 0.547835523974159, "learning_rate": 5.573326724698225e-06, "loss": 0.1069, "step": 16650 }, { "epoch": 1.4393710311460537, "grad_norm": 0.5550047278667364, "learning_rate": 5.568693353529053e-06, "loss": 0.1092, "step": 16660 }, { "epoch": 1.4402349993520238, "grad_norm": 0.5598159535674883, "learning_rate": 5.564059487555311e-06, "loss": 0.11, "step": 16670 }, { "epoch": 1.441098967557994, "grad_norm": 0.5500359408388827, "learning_rate": 5.559425130808802e-06, "loss": 0.1093, "step": 16680 }, { "epoch": 1.4419629357639638, "grad_norm": 0.5748371580710966, "learning_rate": 5.554790287321752e-06, "loss": 0.1095, "step": 16690 }, { "epoch": 1.442826903969934, "grad_norm": 0.5589845183126114, "learning_rate": 5.550154961126812e-06, "loss": 0.11, "step": 16700 }, { "epoch": 1.4436908721759039, "grad_norm": 0.5596557149578248, "learning_rate": 5.5455191562570535e-06, "loss": 0.1084, "step": 16710 }, { "epoch": 1.444554840381874, "grad_norm": 0.555957437652107, "learning_rate": 5.540882876745962e-06, "loss": 0.1099, "step": 16720 }, { "epoch": 1.4454188085878439, "grad_norm": 0.5697811632279, "learning_rate": 5.53624612662744e-06, "loss": 0.11, "step": 16730 }, { "epoch": 1.446282776793814, "grad_norm": 0.540863041698352, "learning_rate": 5.531608909935795e-06, "loss": 0.1078, "step": 16740 }, { "epoch": 1.4471467449997841, "grad_norm": 0.5439712322944413, "learning_rate": 5.526971230705744e-06, "loss": 0.1092, "step": 16750 }, { "epoch": 1.448010713205754, "grad_norm": 0.5539653073070816, "learning_rate": 5.522333092972406e-06, "loss": 0.1067, "step": 16760 }, { "epoch": 1.448874681411724, "grad_norm": 0.5757732852795685, "learning_rate": 5.517694500771298e-06, "loss": 0.1077, "step": 16770 }, { "epoch": 1.449738649617694, "grad_norm": 0.5523898348981371, "learning_rate": 5.513055458138329e-06, "loss": 0.1098, "step": 16780 }, { "epoch": 1.4506026178236642, "grad_norm": 0.53039159499405, "learning_rate": 5.508415969109808e-06, "loss": 0.1091, "step": 16790 }, { "epoch": 1.451466586029634, "grad_norm": 0.575717777450302, "learning_rate": 5.503776037722425e-06, "loss": 0.1115, "step": 16800 }, { "epoch": 1.4523305542356042, "grad_norm": 0.5295410373537287, "learning_rate": 5.4991356680132576e-06, "loss": 0.1106, "step": 16810 }, { "epoch": 1.453194522441574, "grad_norm": 0.5637895554867309, "learning_rate": 5.494494864019767e-06, "loss": 0.1106, "step": 16820 }, { "epoch": 1.4540584906475442, "grad_norm": 0.5647698345641413, "learning_rate": 5.489853629779789e-06, "loss": 0.1113, "step": 16830 }, { "epoch": 1.4549224588535141, "grad_norm": 0.5480780997759342, "learning_rate": 5.485211969331536e-06, "loss": 0.1104, "step": 16840 }, { "epoch": 1.4557864270594842, "grad_norm": 0.5962694168716901, "learning_rate": 5.480569886713586e-06, "loss": 0.1083, "step": 16850 }, { "epoch": 1.4566503952654544, "grad_norm": 0.5468143792266299, "learning_rate": 5.475927385964892e-06, "loss": 0.105, "step": 16860 }, { "epoch": 1.4575143634714243, "grad_norm": 0.5627954667617832, "learning_rate": 5.4712844711247685e-06, "loss": 0.1097, "step": 16870 }, { "epoch": 1.4583783316773942, "grad_norm": 0.5498247339749494, "learning_rate": 5.4666411462328835e-06, "loss": 0.1073, "step": 16880 }, { "epoch": 1.4592422998833643, "grad_norm": 0.5593645586338002, "learning_rate": 5.461997415329274e-06, "loss": 0.1099, "step": 16890 }, { "epoch": 1.4601062680893344, "grad_norm": 0.5707187551887513, "learning_rate": 5.457353282454318e-06, "loss": 0.1121, "step": 16900 }, { "epoch": 1.4609702362953043, "grad_norm": 0.553904006667131, "learning_rate": 5.452708751648753e-06, "loss": 0.1072, "step": 16910 }, { "epoch": 1.4618342045012742, "grad_norm": 0.529572172260642, "learning_rate": 5.448063826953654e-06, "loss": 0.1091, "step": 16920 }, { "epoch": 1.4626981727072443, "grad_norm": 0.5472381570541359, "learning_rate": 5.443418512410449e-06, "loss": 0.1072, "step": 16930 }, { "epoch": 1.4635621409132145, "grad_norm": 0.5704255859503703, "learning_rate": 5.438772812060895e-06, "loss": 0.1099, "step": 16940 }, { "epoch": 1.4644261091191844, "grad_norm": 0.5661130823236894, "learning_rate": 5.434126729947091e-06, "loss": 0.1067, "step": 16950 }, { "epoch": 1.4652900773251545, "grad_norm": 0.5824585865876896, "learning_rate": 5.4294802701114645e-06, "loss": 0.1075, "step": 16960 }, { "epoch": 1.4661540455311244, "grad_norm": 0.5712779312011059, "learning_rate": 5.424833436596774e-06, "loss": 0.1106, "step": 16970 }, { "epoch": 1.4670180137370945, "grad_norm": 0.556641188854497, "learning_rate": 5.420186233446104e-06, "loss": 0.1087, "step": 16980 }, { "epoch": 1.4678819819430644, "grad_norm": 0.5747662022205271, "learning_rate": 5.415538664702858e-06, "loss": 0.1073, "step": 16990 }, { "epoch": 1.4687459501490345, "grad_norm": 0.5379635353303245, "learning_rate": 5.410890734410761e-06, "loss": 0.1053, "step": 17000 }, { "epoch": 1.4696099183550047, "grad_norm": 0.6016166805731681, "learning_rate": 5.406242446613845e-06, "loss": 0.1103, "step": 17010 }, { "epoch": 1.4704738865609746, "grad_norm": 0.5651647377002924, "learning_rate": 5.401593805356464e-06, "loss": 0.1066, "step": 17020 }, { "epoch": 1.4713378547669445, "grad_norm": 0.53397079179655, "learning_rate": 5.39694481468327e-06, "loss": 0.1108, "step": 17030 }, { "epoch": 1.4722018229729146, "grad_norm": 0.6010434080095977, "learning_rate": 5.392295478639226e-06, "loss": 0.1098, "step": 17040 }, { "epoch": 1.4730657911788847, "grad_norm": 0.5546090791053712, "learning_rate": 5.38764580126959e-06, "loss": 0.1114, "step": 17050 }, { "epoch": 1.4739297593848546, "grad_norm": 0.5273120526879177, "learning_rate": 5.382995786619921e-06, "loss": 0.1088, "step": 17060 }, { "epoch": 1.4747937275908247, "grad_norm": 0.5567574055473559, "learning_rate": 5.378345438736068e-06, "loss": 0.1085, "step": 17070 }, { "epoch": 1.4756576957967946, "grad_norm": 0.5714232269488697, "learning_rate": 5.373694761664174e-06, "loss": 0.1074, "step": 17080 }, { "epoch": 1.4765216640027647, "grad_norm": 0.5534188993303146, "learning_rate": 5.369043759450664e-06, "loss": 0.1112, "step": 17090 }, { "epoch": 1.4773856322087346, "grad_norm": 0.5533405667659507, "learning_rate": 5.364392436142251e-06, "loss": 0.1084, "step": 17100 }, { "epoch": 1.4782496004147048, "grad_norm": 0.5715199286886745, "learning_rate": 5.359740795785921e-06, "loss": 0.1088, "step": 17110 }, { "epoch": 1.479113568620675, "grad_norm": 0.57254667157946, "learning_rate": 5.3550888424289415e-06, "loss": 0.1056, "step": 17120 }, { "epoch": 1.4799775368266448, "grad_norm": 0.5199024237592625, "learning_rate": 5.3504365801188495e-06, "loss": 0.1052, "step": 17130 }, { "epoch": 1.4808415050326147, "grad_norm": 0.5813582218704558, "learning_rate": 5.3457840129034535e-06, "loss": 0.1047, "step": 17140 }, { "epoch": 1.4817054732385848, "grad_norm": 0.5390351359241751, "learning_rate": 5.341131144830822e-06, "loss": 0.1068, "step": 17150 }, { "epoch": 1.482569441444555, "grad_norm": 0.5829892827363654, "learning_rate": 5.336477979949291e-06, "loss": 0.1063, "step": 17160 }, { "epoch": 1.4834334096505248, "grad_norm": 0.5613668918351362, "learning_rate": 5.33182452230745e-06, "loss": 0.1093, "step": 17170 }, { "epoch": 1.484297377856495, "grad_norm": 0.5668262800654991, "learning_rate": 5.327170775954149e-06, "loss": 0.1067, "step": 17180 }, { "epoch": 1.4851613460624649, "grad_norm": 0.563123194809761, "learning_rate": 5.322516744938482e-06, "loss": 0.1068, "step": 17190 }, { "epoch": 1.486025314268435, "grad_norm": 0.5702164031662597, "learning_rate": 5.317862433309797e-06, "loss": 0.1123, "step": 17200 }, { "epoch": 1.4868892824744049, "grad_norm": 0.5573428134086994, "learning_rate": 5.3132078451176815e-06, "loss": 0.1092, "step": 17210 }, { "epoch": 1.487753250680375, "grad_norm": 0.5423066018915117, "learning_rate": 5.308552984411968e-06, "loss": 0.1065, "step": 17220 }, { "epoch": 1.488617218886345, "grad_norm": 0.5895533833702804, "learning_rate": 5.30389785524272e-06, "loss": 0.1121, "step": 17230 }, { "epoch": 1.489481187092315, "grad_norm": 0.5609587275234417, "learning_rate": 5.299242461660243e-06, "loss": 0.1065, "step": 17240 }, { "epoch": 1.490345155298285, "grad_norm": 0.575671022001389, "learning_rate": 5.294586807715066e-06, "loss": 0.1087, "step": 17250 }, { "epoch": 1.491209123504255, "grad_norm": 0.5345702719825773, "learning_rate": 5.289930897457946e-06, "loss": 0.1077, "step": 17260 }, { "epoch": 1.4920730917102252, "grad_norm": 0.5371496978514604, "learning_rate": 5.285274734939864e-06, "loss": 0.1103, "step": 17270 }, { "epoch": 1.492937059916195, "grad_norm": 0.5593334573221135, "learning_rate": 5.280618324212018e-06, "loss": 0.1074, "step": 17280 }, { "epoch": 1.493801028122165, "grad_norm": 0.5295295244997914, "learning_rate": 5.275961669325828e-06, "loss": 0.1063, "step": 17290 }, { "epoch": 1.494664996328135, "grad_norm": 0.5383691997834555, "learning_rate": 5.271304774332917e-06, "loss": 0.1089, "step": 17300 }, { "epoch": 1.4955289645341052, "grad_norm": 0.5378075832729008, "learning_rate": 5.266647643285126e-06, "loss": 0.1071, "step": 17310 }, { "epoch": 1.4963929327400751, "grad_norm": 0.5864711882360478, "learning_rate": 5.261990280234498e-06, "loss": 0.1082, "step": 17320 }, { "epoch": 1.4972569009460452, "grad_norm": 0.5833725727517236, "learning_rate": 5.257332689233275e-06, "loss": 0.109, "step": 17330 }, { "epoch": 1.4981208691520151, "grad_norm": 0.617406542429736, "learning_rate": 5.252674874333902e-06, "loss": 0.1103, "step": 17340 }, { "epoch": 1.4989848373579853, "grad_norm": 0.5393750705119263, "learning_rate": 5.248016839589015e-06, "loss": 0.1065, "step": 17350 }, { "epoch": 1.4998488055639552, "grad_norm": 0.5525154054728131, "learning_rate": 5.243358589051445e-06, "loss": 0.1086, "step": 17360 }, { "epoch": 1.5007127737699253, "grad_norm": 0.5663281890236571, "learning_rate": 5.238700126774208e-06, "loss": 0.1106, "step": 17370 }, { "epoch": 1.5015767419758954, "grad_norm": 0.5639045716521068, "learning_rate": 5.234041456810501e-06, "loss": 0.1099, "step": 17380 }, { "epoch": 1.5024407101818653, "grad_norm": 0.5501571786531163, "learning_rate": 5.229382583213713e-06, "loss": 0.1084, "step": 17390 }, { "epoch": 1.5033046783878352, "grad_norm": 0.5555533506524184, "learning_rate": 5.224723510037395e-06, "loss": 0.1096, "step": 17400 }, { "epoch": 1.5041686465938053, "grad_norm": 0.5565015152184756, "learning_rate": 5.220064241335286e-06, "loss": 0.1115, "step": 17410 }, { "epoch": 1.5050326147997755, "grad_norm": 0.5594405883971818, "learning_rate": 5.215404781161283e-06, "loss": 0.1086, "step": 17420 }, { "epoch": 1.5058965830057454, "grad_norm": 0.560011821779313, "learning_rate": 5.2107451335694595e-06, "loss": 0.1075, "step": 17430 }, { "epoch": 1.5067605512117153, "grad_norm": 0.543119102461241, "learning_rate": 5.206085302614045e-06, "loss": 0.1071, "step": 17440 }, { "epoch": 1.5076245194176854, "grad_norm": 0.5357688437385529, "learning_rate": 5.201425292349434e-06, "loss": 0.1114, "step": 17450 }, { "epoch": 1.5084884876236555, "grad_norm": 0.5504332290569045, "learning_rate": 5.196765106830171e-06, "loss": 0.1096, "step": 17460 }, { "epoch": 1.5093524558296254, "grad_norm": 0.567852538652432, "learning_rate": 5.192104750110959e-06, "loss": 0.1074, "step": 17470 }, { "epoch": 1.5102164240355955, "grad_norm": 0.5322729979849891, "learning_rate": 5.187444226246645e-06, "loss": 0.1099, "step": 17480 }, { "epoch": 1.5110803922415657, "grad_norm": 0.5756617595017037, "learning_rate": 5.182783539292226e-06, "loss": 0.1086, "step": 17490 }, { "epoch": 1.5119443604475356, "grad_norm": 0.5678975606126713, "learning_rate": 5.178122693302835e-06, "loss": 0.1057, "step": 17500 }, { "epoch": 1.5128083286535055, "grad_norm": 0.5461058418335905, "learning_rate": 5.173461692333751e-06, "loss": 0.1068, "step": 17510 }, { "epoch": 1.5136722968594756, "grad_norm": 0.571376612212014, "learning_rate": 5.168800540440381e-06, "loss": 0.1075, "step": 17520 }, { "epoch": 1.5145362650654457, "grad_norm": 0.5497908782485466, "learning_rate": 5.164139241678267e-06, "loss": 0.1072, "step": 17530 }, { "epoch": 1.5154002332714156, "grad_norm": 0.5914438759722356, "learning_rate": 5.159477800103079e-06, "loss": 0.1085, "step": 17540 }, { "epoch": 1.5162642014773855, "grad_norm": 0.5316510326275006, "learning_rate": 5.1548162197706075e-06, "loss": 0.1075, "step": 17550 }, { "epoch": 1.5171281696833556, "grad_norm": 0.5621679449680816, "learning_rate": 5.150154504736768e-06, "loss": 0.1071, "step": 17560 }, { "epoch": 1.5179921378893257, "grad_norm": 0.5372659772513624, "learning_rate": 5.145492659057593e-06, "loss": 0.1061, "step": 17570 }, { "epoch": 1.5188561060952956, "grad_norm": 0.5710203008791778, "learning_rate": 5.140830686789224e-06, "loss": 0.1069, "step": 17580 }, { "epoch": 1.5197200743012658, "grad_norm": 0.5481229869659677, "learning_rate": 5.1361685919879175e-06, "loss": 0.1087, "step": 17590 }, { "epoch": 1.520584042507236, "grad_norm": 0.5589983135098491, "learning_rate": 5.131506378710035e-06, "loss": 0.1109, "step": 17600 }, { "epoch": 1.5214480107132058, "grad_norm": 0.5635199037596682, "learning_rate": 5.126844051012041e-06, "loss": 0.1103, "step": 17610 }, { "epoch": 1.5223119789191757, "grad_norm": 0.5500151319574998, "learning_rate": 5.122181612950499e-06, "loss": 0.108, "step": 17620 }, { "epoch": 1.5231759471251458, "grad_norm": 0.5371844113385824, "learning_rate": 5.11751906858207e-06, "loss": 0.1072, "step": 17630 }, { "epoch": 1.524039915331116, "grad_norm": 0.5485761580061022, "learning_rate": 5.112856421963507e-06, "loss": 0.1072, "step": 17640 }, { "epoch": 1.5249038835370858, "grad_norm": 0.5700920471032753, "learning_rate": 5.108193677151648e-06, "loss": 0.1085, "step": 17650 }, { "epoch": 1.5257678517430557, "grad_norm": 0.5514566079923485, "learning_rate": 5.103530838203427e-06, "loss": 0.1071, "step": 17660 }, { "epoch": 1.5266318199490259, "grad_norm": 0.5712587946627912, "learning_rate": 5.0988679091758465e-06, "loss": 0.1057, "step": 17670 }, { "epoch": 1.527495788154996, "grad_norm": 0.523278063242322, "learning_rate": 5.094204894125997e-06, "loss": 0.1055, "step": 17680 }, { "epoch": 1.5283597563609659, "grad_norm": 0.547975825208195, "learning_rate": 5.0895417971110384e-06, "loss": 0.1069, "step": 17690 }, { "epoch": 1.5292237245669358, "grad_norm": 0.5649900655239077, "learning_rate": 5.0848786221882065e-06, "loss": 0.1071, "step": 17700 }, { "epoch": 1.530087692772906, "grad_norm": 0.5235846263354882, "learning_rate": 5.0802153734148e-06, "loss": 0.1086, "step": 17710 }, { "epoch": 1.530951660978876, "grad_norm": 0.558486058207212, "learning_rate": 5.075552054848188e-06, "loss": 0.108, "step": 17720 }, { "epoch": 1.531815629184846, "grad_norm": 0.5591344019040175, "learning_rate": 5.070888670545794e-06, "loss": 0.1078, "step": 17730 }, { "epoch": 1.532679597390816, "grad_norm": 0.5860722333653754, "learning_rate": 5.066225224565102e-06, "loss": 0.1085, "step": 17740 }, { "epoch": 1.5335435655967862, "grad_norm": 0.5963012918305679, "learning_rate": 5.061561720963649e-06, "loss": 0.1098, "step": 17750 }, { "epoch": 1.534407533802756, "grad_norm": 0.5604735839188759, "learning_rate": 5.056898163799023e-06, "loss": 0.1081, "step": 17760 }, { "epoch": 1.535271502008726, "grad_norm": 0.5521582524965631, "learning_rate": 5.052234557128859e-06, "loss": 0.1056, "step": 17770 }, { "epoch": 1.536135470214696, "grad_norm": 0.5863548246000648, "learning_rate": 5.04757090501083e-06, "loss": 0.1073, "step": 17780 }, { "epoch": 1.5369994384206662, "grad_norm": 0.5505531729226412, "learning_rate": 5.042907211502654e-06, "loss": 0.1059, "step": 17790 }, { "epoch": 1.5378634066266361, "grad_norm": 0.5527561782797406, "learning_rate": 5.038243480662086e-06, "loss": 0.107, "step": 17800 }, { "epoch": 1.538727374832606, "grad_norm": 0.5403035943659682, "learning_rate": 5.033579716546908e-06, "loss": 0.1066, "step": 17810 }, { "epoch": 1.5395913430385761, "grad_norm": 0.6183154805975839, "learning_rate": 5.028915923214935e-06, "loss": 0.1063, "step": 17820 }, { "epoch": 1.5404553112445463, "grad_norm": 0.5501718905367079, "learning_rate": 5.0242521047240076e-06, "loss": 0.1036, "step": 17830 }, { "epoch": 1.5413192794505162, "grad_norm": 0.5332692223400205, "learning_rate": 5.019588265131984e-06, "loss": 0.1061, "step": 17840 }, { "epoch": 1.5421832476564863, "grad_norm": 0.5613621146761566, "learning_rate": 5.014924408496746e-06, "loss": 0.1068, "step": 17850 }, { "epoch": 1.5430472158624564, "grad_norm": 0.5581157336544168, "learning_rate": 5.010260538876187e-06, "loss": 0.1075, "step": 17860 }, { "epoch": 1.5439111840684263, "grad_norm": 0.559150234766518, "learning_rate": 5.0055966603282136e-06, "loss": 0.1046, "step": 17870 }, { "epoch": 1.5447751522743962, "grad_norm": 0.5523464636226372, "learning_rate": 5.000932776910739e-06, "loss": 0.1081, "step": 17880 }, { "epoch": 1.5456391204803663, "grad_norm": 0.5874851979850498, "learning_rate": 4.996268892681681e-06, "loss": 0.1059, "step": 17890 }, { "epoch": 1.5465030886863365, "grad_norm": 0.5599016847752923, "learning_rate": 4.991605011698957e-06, "loss": 0.105, "step": 17900 }, { "epoch": 1.5473670568923064, "grad_norm": 0.5536885812459234, "learning_rate": 4.9869411380204825e-06, "loss": 0.1114, "step": 17910 }, { "epoch": 1.5482310250982763, "grad_norm": 0.5850370040739463, "learning_rate": 4.982277275704169e-06, "loss": 0.108, "step": 17920 }, { "epoch": 1.5490949933042464, "grad_norm": 0.5476002826516605, "learning_rate": 4.977613428807913e-06, "loss": 0.1114, "step": 17930 }, { "epoch": 1.5499589615102165, "grad_norm": 0.5458827789684254, "learning_rate": 4.972949601389601e-06, "loss": 0.1055, "step": 17940 }, { "epoch": 1.5508229297161864, "grad_norm": 0.5413640872702045, "learning_rate": 4.968285797507104e-06, "loss": 0.1113, "step": 17950 }, { "epoch": 1.5516868979221563, "grad_norm": 0.5752827421319834, "learning_rate": 4.963622021218271e-06, "loss": 0.1087, "step": 17960 }, { "epoch": 1.5525508661281266, "grad_norm": 0.5860674587550517, "learning_rate": 4.958958276580922e-06, "loss": 0.1097, "step": 17970 }, { "epoch": 1.5534148343340966, "grad_norm": 0.5444099654254021, "learning_rate": 4.954294567652858e-06, "loss": 0.1082, "step": 17980 }, { "epoch": 1.5542788025400665, "grad_norm": 0.5578751187043065, "learning_rate": 4.949630898491842e-06, "loss": 0.1085, "step": 17990 }, { "epoch": 1.5551427707460366, "grad_norm": 0.5607469002061576, "learning_rate": 4.9449672731556095e-06, "loss": 0.1103, "step": 18000 }, { "epoch": 1.5560067389520067, "grad_norm": 0.597520663614332, "learning_rate": 4.940303695701848e-06, "loss": 0.1085, "step": 18010 }, { "epoch": 1.5568707071579766, "grad_norm": 0.5635599902531142, "learning_rate": 4.935640170188212e-06, "loss": 0.1044, "step": 18020 }, { "epoch": 1.5577346753639465, "grad_norm": 0.5477678866631618, "learning_rate": 4.930976700672308e-06, "loss": 0.1067, "step": 18030 }, { "epoch": 1.5585986435699166, "grad_norm": 0.5633674903097964, "learning_rate": 4.926313291211695e-06, "loss": 0.1074, "step": 18040 }, { "epoch": 1.5594626117758867, "grad_norm": 0.5457039922820861, "learning_rate": 4.921649945863875e-06, "loss": 0.1072, "step": 18050 }, { "epoch": 1.5603265799818566, "grad_norm": 0.5529102573041352, "learning_rate": 4.916986668686299e-06, "loss": 0.1028, "step": 18060 }, { "epoch": 1.5611905481878265, "grad_norm": 0.5526616247690039, "learning_rate": 4.912323463736358e-06, "loss": 0.1067, "step": 18070 }, { "epoch": 1.5620545163937967, "grad_norm": 0.5906101164405569, "learning_rate": 4.907660335071379e-06, "loss": 0.107, "step": 18080 }, { "epoch": 1.5629184845997668, "grad_norm": 0.5802445703142322, "learning_rate": 4.902997286748623e-06, "loss": 0.1073, "step": 18090 }, { "epoch": 1.5637824528057367, "grad_norm": 0.5497703406075652, "learning_rate": 4.898334322825279e-06, "loss": 0.1045, "step": 18100 }, { "epoch": 1.5646464210117068, "grad_norm": 0.5638475765108427, "learning_rate": 4.893671447358469e-06, "loss": 0.1069, "step": 18110 }, { "epoch": 1.565510389217677, "grad_norm": 0.5474191633068053, "learning_rate": 4.889008664405229e-06, "loss": 0.1067, "step": 18120 }, { "epoch": 1.5663743574236468, "grad_norm": 0.5726552269200086, "learning_rate": 4.8843459780225214e-06, "loss": 0.105, "step": 18130 }, { "epoch": 1.5672383256296167, "grad_norm": 0.57494985885623, "learning_rate": 4.879683392267223e-06, "loss": 0.108, "step": 18140 }, { "epoch": 1.5681022938355869, "grad_norm": 0.5758913701642095, "learning_rate": 4.875020911196123e-06, "loss": 0.1071, "step": 18150 }, { "epoch": 1.568966262041557, "grad_norm": 0.5750957605006866, "learning_rate": 4.870358538865916e-06, "loss": 0.1085, "step": 18160 }, { "epoch": 1.5698302302475269, "grad_norm": 0.5600079931570073, "learning_rate": 4.865696279333207e-06, "loss": 0.107, "step": 18170 }, { "epoch": 1.5706941984534968, "grad_norm": 0.5617150407874216, "learning_rate": 4.861034136654501e-06, "loss": 0.1086, "step": 18180 }, { "epoch": 1.571558166659467, "grad_norm": 0.5759016246754355, "learning_rate": 4.8563721148862015e-06, "loss": 0.1034, "step": 18190 }, { "epoch": 1.572422134865437, "grad_norm": 0.5381126057860631, "learning_rate": 4.851710218084605e-06, "loss": 0.1057, "step": 18200 }, { "epoch": 1.573286103071407, "grad_norm": 0.577124912521626, "learning_rate": 4.8470484503059e-06, "loss": 0.1053, "step": 18210 }, { "epoch": 1.5741500712773768, "grad_norm": 0.5509976895411456, "learning_rate": 4.8423868156061644e-06, "loss": 0.1045, "step": 18220 }, { "epoch": 1.5750140394833472, "grad_norm": 0.5820369404762893, "learning_rate": 4.837725318041362e-06, "loss": 0.1089, "step": 18230 }, { "epoch": 1.575878007689317, "grad_norm": 0.55949728961922, "learning_rate": 4.833063961667331e-06, "loss": 0.1073, "step": 18240 }, { "epoch": 1.576741975895287, "grad_norm": 0.5710029043616441, "learning_rate": 4.828402750539791e-06, "loss": 0.1062, "step": 18250 }, { "epoch": 1.577605944101257, "grad_norm": 0.5382770988006425, "learning_rate": 4.823741688714335e-06, "loss": 0.1075, "step": 18260 }, { "epoch": 1.5784699123072272, "grad_norm": 0.5658950513457499, "learning_rate": 4.8190807802464285e-06, "loss": 0.1083, "step": 18270 }, { "epoch": 1.5793338805131971, "grad_norm": 0.5364382299197196, "learning_rate": 4.814420029191395e-06, "loss": 0.106, "step": 18280 }, { "epoch": 1.580197848719167, "grad_norm": 0.5524618471037588, "learning_rate": 4.8097594396044326e-06, "loss": 0.106, "step": 18290 }, { "epoch": 1.5810618169251371, "grad_norm": 0.5628924598931844, "learning_rate": 4.8050990155405894e-06, "loss": 0.1069, "step": 18300 }, { "epoch": 1.5819257851311073, "grad_norm": 0.5420207388695794, "learning_rate": 4.800438761054775e-06, "loss": 0.1069, "step": 18310 }, { "epoch": 1.5827897533370772, "grad_norm": 0.5910637255202297, "learning_rate": 4.7957786802017485e-06, "loss": 0.105, "step": 18320 }, { "epoch": 1.583653721543047, "grad_norm": 0.5691128173632788, "learning_rate": 4.791118777036119e-06, "loss": 0.1067, "step": 18330 }, { "epoch": 1.5845176897490172, "grad_norm": 0.5519021523237372, "learning_rate": 4.786459055612341e-06, "loss": 0.1077, "step": 18340 }, { "epoch": 1.5853816579549873, "grad_norm": 0.5528173834315456, "learning_rate": 4.781799519984715e-06, "loss": 0.1051, "step": 18350 }, { "epoch": 1.5862456261609572, "grad_norm": 0.5221856325986058, "learning_rate": 4.7771401742073696e-06, "loss": 0.1053, "step": 18360 }, { "epoch": 1.5871095943669273, "grad_norm": 0.5619510960078813, "learning_rate": 4.772481022334278e-06, "loss": 0.1062, "step": 18370 }, { "epoch": 1.5879735625728975, "grad_norm": 0.5186024822105305, "learning_rate": 4.76782206841924e-06, "loss": 0.1062, "step": 18380 }, { "epoch": 1.5888375307788674, "grad_norm": 0.5525545768358348, "learning_rate": 4.763163316515886e-06, "loss": 0.1069, "step": 18390 }, { "epoch": 1.5897014989848373, "grad_norm": 0.5551666634710379, "learning_rate": 4.7585047706776674e-06, "loss": 0.1064, "step": 18400 }, { "epoch": 1.5905654671908074, "grad_norm": 0.5738819938747015, "learning_rate": 4.753846434957856e-06, "loss": 0.1084, "step": 18410 }, { "epoch": 1.5914294353967775, "grad_norm": 0.5471225790659157, "learning_rate": 4.749188313409548e-06, "loss": 0.104, "step": 18420 }, { "epoch": 1.5922934036027474, "grad_norm": 0.5619829238273804, "learning_rate": 4.744530410085641e-06, "loss": 0.1055, "step": 18430 }, { "epoch": 1.5931573718087173, "grad_norm": 0.5637950090825805, "learning_rate": 4.739872729038853e-06, "loss": 0.1086, "step": 18440 }, { "epoch": 1.5940213400146874, "grad_norm": 0.5903713369226199, "learning_rate": 4.735215274321706e-06, "loss": 0.1063, "step": 18450 }, { "epoch": 1.5948853082206575, "grad_norm": 0.5685279528763301, "learning_rate": 4.7305580499865256e-06, "loss": 0.1077, "step": 18460 }, { "epoch": 1.5957492764266274, "grad_norm": 0.5384106682889425, "learning_rate": 4.725901060085431e-06, "loss": 0.1061, "step": 18470 }, { "epoch": 1.5966132446325976, "grad_norm": 0.5777818816201741, "learning_rate": 4.721244308670344e-06, "loss": 0.1084, "step": 18480 }, { "epoch": 1.5974772128385677, "grad_norm": 0.5523159068046266, "learning_rate": 4.71658779979298e-06, "loss": 0.1057, "step": 18490 }, { "epoch": 1.5983411810445376, "grad_norm": 0.5948813436945792, "learning_rate": 4.711931537504836e-06, "loss": 0.1085, "step": 18500 }, { "epoch": 1.5992051492505075, "grad_norm": 0.5407671177901976, "learning_rate": 4.7072755258572014e-06, "loss": 0.1044, "step": 18510 }, { "epoch": 1.6000691174564776, "grad_norm": 0.5731745458031607, "learning_rate": 4.7026197689011425e-06, "loss": 0.1058, "step": 18520 }, { "epoch": 1.6009330856624477, "grad_norm": 0.5522669562395803, "learning_rate": 4.697964270687507e-06, "loss": 0.1051, "step": 18530 }, { "epoch": 1.6017970538684176, "grad_norm": 0.5441669651971076, "learning_rate": 4.693309035266921e-06, "loss": 0.1058, "step": 18540 }, { "epoch": 1.6026610220743875, "grad_norm": 0.5659568723921515, "learning_rate": 4.6886540666897725e-06, "loss": 0.1097, "step": 18550 }, { "epoch": 1.6035249902803577, "grad_norm": 0.568976782651086, "learning_rate": 4.683999369006223e-06, "loss": 0.1057, "step": 18560 }, { "epoch": 1.6043889584863278, "grad_norm": 0.5368332076086545, "learning_rate": 4.679344946266202e-06, "loss": 0.1071, "step": 18570 }, { "epoch": 1.6052529266922977, "grad_norm": 0.5472796671016149, "learning_rate": 4.674690802519394e-06, "loss": 0.1059, "step": 18580 }, { "epoch": 1.6061168948982676, "grad_norm": 0.5871536707812725, "learning_rate": 4.67003694181524e-06, "loss": 0.1065, "step": 18590 }, { "epoch": 1.6069808631042377, "grad_norm": 0.5637887574564212, "learning_rate": 4.665383368202939e-06, "loss": 0.1044, "step": 18600 }, { "epoch": 1.6078448313102078, "grad_norm": 0.5616396771556457, "learning_rate": 4.660730085731438e-06, "loss": 0.1054, "step": 18610 }, { "epoch": 1.6087087995161777, "grad_norm": 0.5556472856134798, "learning_rate": 4.656077098449434e-06, "loss": 0.1032, "step": 18620 }, { "epoch": 1.6095727677221479, "grad_norm": 0.5683986382033807, "learning_rate": 4.65142441040536e-06, "loss": 0.1072, "step": 18630 }, { "epoch": 1.610436735928118, "grad_norm": 0.583395037447458, "learning_rate": 4.646772025647394e-06, "loss": 0.1051, "step": 18640 }, { "epoch": 1.6113007041340879, "grad_norm": 0.5816202934421579, "learning_rate": 4.642119948223449e-06, "loss": 0.1072, "step": 18650 }, { "epoch": 1.6121646723400578, "grad_norm": 0.5657989657195068, "learning_rate": 4.6374681821811745e-06, "loss": 0.1073, "step": 18660 }, { "epoch": 1.613028640546028, "grad_norm": 0.5791187780948374, "learning_rate": 4.6328167315679396e-06, "loss": 0.1034, "step": 18670 }, { "epoch": 1.613892608751998, "grad_norm": 0.5715678977702422, "learning_rate": 4.6281656004308464e-06, "loss": 0.1017, "step": 18680 }, { "epoch": 1.614756576957968, "grad_norm": 0.5734621932193357, "learning_rate": 4.62351479281672e-06, "loss": 0.1064, "step": 18690 }, { "epoch": 1.6156205451639378, "grad_norm": 0.5530026596429509, "learning_rate": 4.618864312772098e-06, "loss": 0.1079, "step": 18700 }, { "epoch": 1.616484513369908, "grad_norm": 0.5601065046640386, "learning_rate": 4.614214164343237e-06, "loss": 0.1058, "step": 18710 }, { "epoch": 1.617348481575878, "grad_norm": 0.5760129273781665, "learning_rate": 4.609564351576103e-06, "loss": 0.1059, "step": 18720 }, { "epoch": 1.618212449781848, "grad_norm": 0.5558664942203434, "learning_rate": 4.604914878516376e-06, "loss": 0.1055, "step": 18730 }, { "epoch": 1.619076417987818, "grad_norm": 0.5550058318089843, "learning_rate": 4.60026574920943e-06, "loss": 0.107, "step": 18740 }, { "epoch": 1.6199403861937882, "grad_norm": 0.5546104276887956, "learning_rate": 4.595616967700346e-06, "loss": 0.1058, "step": 18750 }, { "epoch": 1.6208043543997581, "grad_norm": 0.563362489721722, "learning_rate": 4.590968538033906e-06, "loss": 0.1028, "step": 18760 }, { "epoch": 1.621668322605728, "grad_norm": 0.5664210910466141, "learning_rate": 4.586320464254579e-06, "loss": 0.1062, "step": 18770 }, { "epoch": 1.6225322908116981, "grad_norm": 0.5604114011064872, "learning_rate": 4.581672750406527e-06, "loss": 0.1047, "step": 18780 }, { "epoch": 1.6233962590176683, "grad_norm": 0.5739443373151654, "learning_rate": 4.5770254005336005e-06, "loss": 0.1076, "step": 18790 }, { "epoch": 1.6242602272236382, "grad_norm": 0.5438322646921915, "learning_rate": 4.5723784186793305e-06, "loss": 0.1015, "step": 18800 }, { "epoch": 1.625124195429608, "grad_norm": 0.5713437054250342, "learning_rate": 4.56773180888693e-06, "loss": 0.1049, "step": 18810 }, { "epoch": 1.6259881636355782, "grad_norm": 0.5203624733377038, "learning_rate": 4.563085575199288e-06, "loss": 0.1033, "step": 18820 }, { "epoch": 1.6268521318415483, "grad_norm": 0.5345371509745694, "learning_rate": 4.558439721658962e-06, "loss": 0.1029, "step": 18830 }, { "epoch": 1.6277161000475182, "grad_norm": 0.5512000186661318, "learning_rate": 4.5537942523081856e-06, "loss": 0.1069, "step": 18840 }, { "epoch": 1.6285800682534881, "grad_norm": 0.5732026077183064, "learning_rate": 4.549149171188856e-06, "loss": 0.1055, "step": 18850 }, { "epoch": 1.6294440364594585, "grad_norm": 0.53897272246241, "learning_rate": 4.5445044823425285e-06, "loss": 0.1032, "step": 18860 }, { "epoch": 1.6303080046654284, "grad_norm": 0.5385728364611817, "learning_rate": 4.5398601898104215e-06, "loss": 0.1045, "step": 18870 }, { "epoch": 1.6311719728713983, "grad_norm": 0.5739375455349807, "learning_rate": 4.535216297633407e-06, "loss": 0.1042, "step": 18880 }, { "epoch": 1.6320359410773684, "grad_norm": 0.5851177785810917, "learning_rate": 4.53057280985201e-06, "loss": 0.1107, "step": 18890 }, { "epoch": 1.6328999092833385, "grad_norm": 0.5626896150925973, "learning_rate": 4.5259297305064006e-06, "loss": 0.1067, "step": 18900 }, { "epoch": 1.6337638774893084, "grad_norm": 0.5579093379958637, "learning_rate": 4.521287063636397e-06, "loss": 0.1035, "step": 18910 }, { "epoch": 1.6346278456952783, "grad_norm": 0.5463165649754007, "learning_rate": 4.516644813281455e-06, "loss": 0.1072, "step": 18920 }, { "epoch": 1.6354918139012484, "grad_norm": 0.5299271950173727, "learning_rate": 4.512002983480674e-06, "loss": 0.1045, "step": 18930 }, { "epoch": 1.6363557821072185, "grad_norm": 0.5295341692293466, "learning_rate": 4.507361578272779e-06, "loss": 0.1058, "step": 18940 }, { "epoch": 1.6372197503131884, "grad_norm": 0.5659514898847705, "learning_rate": 4.50272060169613e-06, "loss": 0.1056, "step": 18950 }, { "epoch": 1.6380837185191583, "grad_norm": 0.5355035407457457, "learning_rate": 4.49808005778872e-06, "loss": 0.1058, "step": 18960 }, { "epoch": 1.6389476867251285, "grad_norm": 0.5813365434148485, "learning_rate": 4.493439950588152e-06, "loss": 0.1048, "step": 18970 }, { "epoch": 1.6398116549310986, "grad_norm": 0.5465368834634429, "learning_rate": 4.48880028413166e-06, "loss": 0.1094, "step": 18980 }, { "epoch": 1.6406756231370685, "grad_norm": 0.5594639004899338, "learning_rate": 4.484161062456093e-06, "loss": 0.1077, "step": 18990 }, { "epoch": 1.6415395913430386, "grad_norm": 0.5619680445715887, "learning_rate": 4.479522289597909e-06, "loss": 0.1036, "step": 19000 }, { "epoch": 1.6424035595490087, "grad_norm": 0.5458294496106755, "learning_rate": 4.474883969593179e-06, "loss": 0.1042, "step": 19010 }, { "epoch": 1.6432675277549786, "grad_norm": 0.5718266897624316, "learning_rate": 4.470246106477575e-06, "loss": 0.1033, "step": 19020 }, { "epoch": 1.6441314959609485, "grad_norm": 0.5726300062386809, "learning_rate": 4.46560870428638e-06, "loss": 0.108, "step": 19030 }, { "epoch": 1.6449954641669187, "grad_norm": 0.5593823250439539, "learning_rate": 4.460971767054469e-06, "loss": 0.1054, "step": 19040 }, { "epoch": 1.6458594323728888, "grad_norm": 0.5771387893259979, "learning_rate": 4.456335298816314e-06, "loss": 0.1038, "step": 19050 }, { "epoch": 1.6467234005788587, "grad_norm": 0.5206178813990037, "learning_rate": 4.45169930360598e-06, "loss": 0.1047, "step": 19060 }, { "epoch": 1.6475873687848286, "grad_norm": 0.5546056481802352, "learning_rate": 4.4470637854571195e-06, "loss": 0.1053, "step": 19070 }, { "epoch": 1.6484513369907987, "grad_norm": 0.563121998454811, "learning_rate": 4.442428748402974e-06, "loss": 0.1025, "step": 19080 }, { "epoch": 1.6493153051967688, "grad_norm": 0.5740974281010763, "learning_rate": 4.437794196476357e-06, "loss": 0.1077, "step": 19090 }, { "epoch": 1.6501792734027387, "grad_norm": 0.5798995188610356, "learning_rate": 4.433160133709668e-06, "loss": 0.1077, "step": 19100 }, { "epoch": 1.6510432416087086, "grad_norm": 0.5399967033915721, "learning_rate": 4.428526564134879e-06, "loss": 0.1051, "step": 19110 }, { "epoch": 1.651907209814679, "grad_norm": 0.6047608211253362, "learning_rate": 4.423893491783535e-06, "loss": 0.1051, "step": 19120 }, { "epoch": 1.6527711780206489, "grad_norm": 0.5567507402078828, "learning_rate": 4.4192609206867395e-06, "loss": 0.1044, "step": 19130 }, { "epoch": 1.6536351462266188, "grad_norm": 0.5509176536860414, "learning_rate": 4.414628854875171e-06, "loss": 0.1082, "step": 19140 }, { "epoch": 1.654499114432589, "grad_norm": 0.5807724228673988, "learning_rate": 4.409997298379062e-06, "loss": 0.1021, "step": 19150 }, { "epoch": 1.655363082638559, "grad_norm": 0.5588511431747389, "learning_rate": 4.405366255228206e-06, "loss": 0.1057, "step": 19160 }, { "epoch": 1.656227050844529, "grad_norm": 0.5760437138864196, "learning_rate": 4.400735729451943e-06, "loss": 0.0994, "step": 19170 }, { "epoch": 1.6570910190504988, "grad_norm": 0.5862621716618907, "learning_rate": 4.396105725079169e-06, "loss": 0.1041, "step": 19180 }, { "epoch": 1.657954987256469, "grad_norm": 0.5555879959070747, "learning_rate": 4.391476246138326e-06, "loss": 0.1052, "step": 19190 }, { "epoch": 1.658818955462439, "grad_norm": 0.5952847226961522, "learning_rate": 4.386847296657396e-06, "loss": 0.1081, "step": 19200 }, { "epoch": 1.659682923668409, "grad_norm": 0.5348380660765963, "learning_rate": 4.382218880663902e-06, "loss": 0.107, "step": 19210 }, { "epoch": 1.6605468918743789, "grad_norm": 0.5151912291244257, "learning_rate": 4.3775910021849e-06, "loss": 0.1031, "step": 19220 }, { "epoch": 1.661410860080349, "grad_norm": 0.6003358328007403, "learning_rate": 4.372963665246986e-06, "loss": 0.1064, "step": 19230 }, { "epoch": 1.6622748282863191, "grad_norm": 0.5544041061975947, "learning_rate": 4.368336873876273e-06, "loss": 0.1014, "step": 19240 }, { "epoch": 1.663138796492289, "grad_norm": 0.54913783519371, "learning_rate": 4.36371063209841e-06, "loss": 0.1036, "step": 19250 }, { "epoch": 1.6640027646982591, "grad_norm": 0.5583738275839267, "learning_rate": 4.359084943938564e-06, "loss": 0.1064, "step": 19260 }, { "epoch": 1.6648667329042293, "grad_norm": 0.5738850263871381, "learning_rate": 4.35445981342142e-06, "loss": 0.1049, "step": 19270 }, { "epoch": 1.6657307011101992, "grad_norm": 0.5524237103340026, "learning_rate": 4.349835244571175e-06, "loss": 0.1042, "step": 19280 }, { "epoch": 1.666594669316169, "grad_norm": 0.5706676298342727, "learning_rate": 4.345211241411543e-06, "loss": 0.1093, "step": 19290 }, { "epoch": 1.6674586375221392, "grad_norm": 0.5848544135945213, "learning_rate": 4.340587807965743e-06, "loss": 0.1072, "step": 19300 }, { "epoch": 1.6683226057281093, "grad_norm": 0.6287334419221372, "learning_rate": 4.335964948256497e-06, "loss": 0.1074, "step": 19310 }, { "epoch": 1.6691865739340792, "grad_norm": 0.5777601253584108, "learning_rate": 4.331342666306029e-06, "loss": 0.1049, "step": 19320 }, { "epoch": 1.670050542140049, "grad_norm": 0.5518672383982035, "learning_rate": 4.32672096613606e-06, "loss": 0.1076, "step": 19330 }, { "epoch": 1.6709145103460192, "grad_norm": 0.5457043407209156, "learning_rate": 4.322099851767807e-06, "loss": 0.1063, "step": 19340 }, { "epoch": 1.6717784785519894, "grad_norm": 0.5424461831045879, "learning_rate": 4.317479327221976e-06, "loss": 0.1063, "step": 19350 }, { "epoch": 1.6726424467579593, "grad_norm": 0.5805324592882917, "learning_rate": 4.3128593965187555e-06, "loss": 0.1064, "step": 19360 }, { "epoch": 1.6735064149639294, "grad_norm": 0.6130304203689181, "learning_rate": 4.3082400636778236e-06, "loss": 0.1042, "step": 19370 }, { "epoch": 1.6743703831698995, "grad_norm": 0.5810864835602517, "learning_rate": 4.303621332718336e-06, "loss": 0.1037, "step": 19380 }, { "epoch": 1.6752343513758694, "grad_norm": 0.5318618871960004, "learning_rate": 4.299003207658926e-06, "loss": 0.1095, "step": 19390 }, { "epoch": 1.6760983195818393, "grad_norm": 0.5575880416342754, "learning_rate": 4.294385692517696e-06, "loss": 0.1069, "step": 19400 }, { "epoch": 1.6769622877878094, "grad_norm": 0.5760120387957547, "learning_rate": 4.289768791312219e-06, "loss": 0.1031, "step": 19410 }, { "epoch": 1.6778262559937795, "grad_norm": 0.6238448718107859, "learning_rate": 4.2851525080595356e-06, "loss": 0.1034, "step": 19420 }, { "epoch": 1.6786902241997494, "grad_norm": 0.5617405307505385, "learning_rate": 4.280536846776151e-06, "loss": 0.1054, "step": 19430 }, { "epoch": 1.6795541924057193, "grad_norm": 0.5540885463967568, "learning_rate": 4.275921811478021e-06, "loss": 0.1081, "step": 19440 }, { "epoch": 1.6804181606116895, "grad_norm": 0.5559250887793661, "learning_rate": 4.271307406180565e-06, "loss": 0.1091, "step": 19450 }, { "epoch": 1.6812821288176596, "grad_norm": 0.5462162389581664, "learning_rate": 4.266693634898648e-06, "loss": 0.1045, "step": 19460 }, { "epoch": 1.6821460970236295, "grad_norm": 0.5768486265339877, "learning_rate": 4.262080501646594e-06, "loss": 0.1079, "step": 19470 }, { "epoch": 1.6830100652295994, "grad_norm": 0.5679698629432213, "learning_rate": 4.257468010438156e-06, "loss": 0.1049, "step": 19480 }, { "epoch": 1.6838740334355695, "grad_norm": 0.560233750108615, "learning_rate": 4.25285616528654e-06, "loss": 0.1053, "step": 19490 }, { "epoch": 1.6847380016415396, "grad_norm": 0.5195629545137693, "learning_rate": 4.248244970204388e-06, "loss": 0.1025, "step": 19500 }, { "epoch": 1.6856019698475095, "grad_norm": 0.5613215865113323, "learning_rate": 4.243634429203774e-06, "loss": 0.1029, "step": 19510 }, { "epoch": 1.6864659380534797, "grad_norm": 0.5652931780024508, "learning_rate": 4.2390245462962035e-06, "loss": 0.1048, "step": 19520 }, { "epoch": 1.6873299062594498, "grad_norm": 0.5816856644432576, "learning_rate": 4.234415325492608e-06, "loss": 0.1057, "step": 19530 }, { "epoch": 1.6881938744654197, "grad_norm": 0.6059617188417905, "learning_rate": 4.229806770803349e-06, "loss": 0.1027, "step": 19540 }, { "epoch": 1.6890578426713896, "grad_norm": 0.5445694254477236, "learning_rate": 4.225198886238201e-06, "loss": 0.1041, "step": 19550 }, { "epoch": 1.6899218108773597, "grad_norm": 0.5581999508448194, "learning_rate": 4.220591675806359e-06, "loss": 0.1068, "step": 19560 }, { "epoch": 1.6907857790833298, "grad_norm": 0.5546346046115186, "learning_rate": 4.215985143516431e-06, "loss": 0.1044, "step": 19570 }, { "epoch": 1.6916497472892997, "grad_norm": 0.5439755044085209, "learning_rate": 4.211379293376438e-06, "loss": 0.1033, "step": 19580 }, { "epoch": 1.6925137154952696, "grad_norm": 0.5656359038198634, "learning_rate": 4.2067741293938e-06, "loss": 0.1048, "step": 19590 }, { "epoch": 1.6933776837012398, "grad_norm": 0.5590831186640056, "learning_rate": 4.202169655575347e-06, "loss": 0.1011, "step": 19600 }, { "epoch": 1.6942416519072099, "grad_norm": 0.5394716638115005, "learning_rate": 4.197565875927306e-06, "loss": 0.1027, "step": 19610 }, { "epoch": 1.6951056201131798, "grad_norm": 0.5634450989871348, "learning_rate": 4.1929627944552995e-06, "loss": 0.1058, "step": 19620 }, { "epoch": 1.69596958831915, "grad_norm": 0.5515417589498646, "learning_rate": 4.188360415164344e-06, "loss": 0.1039, "step": 19630 }, { "epoch": 1.69683355652512, "grad_norm": 0.5421289498514134, "learning_rate": 4.183758742058842e-06, "loss": 0.1052, "step": 19640 }, { "epoch": 1.69769752473109, "grad_norm": 0.5844255281676082, "learning_rate": 4.179157779142585e-06, "loss": 0.1006, "step": 19650 }, { "epoch": 1.6985614929370598, "grad_norm": 0.5736360543034765, "learning_rate": 4.174557530418748e-06, "loss": 0.1057, "step": 19660 }, { "epoch": 1.69942546114303, "grad_norm": 0.5552882278271722, "learning_rate": 4.169957999889877e-06, "loss": 0.1028, "step": 19670 }, { "epoch": 1.700289429349, "grad_norm": 0.544532249195041, "learning_rate": 4.165359191557901e-06, "loss": 0.103, "step": 19680 }, { "epoch": 1.70115339755497, "grad_norm": 0.5246566731943877, "learning_rate": 4.160761109424115e-06, "loss": 0.1018, "step": 19690 }, { "epoch": 1.7020173657609399, "grad_norm": 0.5417849366104356, "learning_rate": 4.15616375748919e-06, "loss": 0.1039, "step": 19700 }, { "epoch": 1.70288133396691, "grad_norm": 0.5741977557459841, "learning_rate": 4.151567139753152e-06, "loss": 0.1062, "step": 19710 }, { "epoch": 1.7037453021728801, "grad_norm": 0.5637437127823944, "learning_rate": 4.1469712602153935e-06, "loss": 0.1064, "step": 19720 }, { "epoch": 1.70460927037885, "grad_norm": 0.5947331247456926, "learning_rate": 4.142376122874664e-06, "loss": 0.1059, "step": 19730 }, { "epoch": 1.70547323858482, "grad_norm": 0.5566283619723043, "learning_rate": 4.137781731729069e-06, "loss": 0.1015, "step": 19740 }, { "epoch": 1.7063372067907903, "grad_norm": 0.5582794032645616, "learning_rate": 4.1331880907760595e-06, "loss": 0.1089, "step": 19750 }, { "epoch": 1.7072011749967602, "grad_norm": 0.5836144846557693, "learning_rate": 4.1285952040124375e-06, "loss": 0.1022, "step": 19760 }, { "epoch": 1.70806514320273, "grad_norm": 0.5676899148191863, "learning_rate": 4.124003075434351e-06, "loss": 0.1094, "step": 19770 }, { "epoch": 1.7089291114087002, "grad_norm": 0.5207672775373055, "learning_rate": 4.119411709037286e-06, "loss": 0.1022, "step": 19780 }, { "epoch": 1.7097930796146703, "grad_norm": 0.5410470442889012, "learning_rate": 4.114821108816063e-06, "loss": 0.1012, "step": 19790 }, { "epoch": 1.7106570478206402, "grad_norm": 0.5620634468728611, "learning_rate": 4.110231278764837e-06, "loss": 0.1015, "step": 19800 }, { "epoch": 1.71152101602661, "grad_norm": 0.5625517889943247, "learning_rate": 4.1056422228770986e-06, "loss": 0.1042, "step": 19810 }, { "epoch": 1.7123849842325802, "grad_norm": 0.5551478643367547, "learning_rate": 4.101053945145655e-06, "loss": 0.1054, "step": 19820 }, { "epoch": 1.7132489524385504, "grad_norm": 0.5961782039221952, "learning_rate": 4.0964664495626436e-06, "loss": 0.1068, "step": 19830 }, { "epoch": 1.7141129206445203, "grad_norm": 0.577866925877064, "learning_rate": 4.091879740119518e-06, "loss": 0.1051, "step": 19840 }, { "epoch": 1.7149768888504902, "grad_norm": 0.5938704854801597, "learning_rate": 4.087293820807054e-06, "loss": 0.1056, "step": 19850 }, { "epoch": 1.7158408570564603, "grad_norm": 0.5403360078049187, "learning_rate": 4.082708695615326e-06, "loss": 0.103, "step": 19860 }, { "epoch": 1.7167048252624304, "grad_norm": 0.5435756955040987, "learning_rate": 4.078124368533733e-06, "loss": 0.104, "step": 19870 }, { "epoch": 1.7175687934684003, "grad_norm": 0.5541908421361005, "learning_rate": 4.07354084355097e-06, "loss": 0.1004, "step": 19880 }, { "epoch": 1.7184327616743704, "grad_norm": 0.5886329233256653, "learning_rate": 4.06895812465504e-06, "loss": 0.1068, "step": 19890 }, { "epoch": 1.7192967298803405, "grad_norm": 0.5707722202962463, "learning_rate": 4.064376215833238e-06, "loss": 0.105, "step": 19900 }, { "epoch": 1.7201606980863104, "grad_norm": 0.5674191409460011, "learning_rate": 4.05979512107216e-06, "loss": 0.103, "step": 19910 }, { "epoch": 1.7210246662922803, "grad_norm": 0.5519385356483035, "learning_rate": 4.055214844357692e-06, "loss": 0.1028, "step": 19920 }, { "epoch": 1.7218886344982505, "grad_norm": 0.5548644777530287, "learning_rate": 4.050635389675006e-06, "loss": 0.1019, "step": 19930 }, { "epoch": 1.7227526027042206, "grad_norm": 0.5631549051283312, "learning_rate": 4.046056761008561e-06, "loss": 0.1054, "step": 19940 }, { "epoch": 1.7236165709101905, "grad_norm": 0.5606486954370578, "learning_rate": 4.041478962342098e-06, "loss": 0.1024, "step": 19950 }, { "epoch": 1.7244805391161604, "grad_norm": 0.5667956495178411, "learning_rate": 4.036901997658632e-06, "loss": 0.1031, "step": 19960 }, { "epoch": 1.7253445073221305, "grad_norm": 0.5455738969443668, "learning_rate": 4.03232587094046e-06, "loss": 0.1016, "step": 19970 }, { "epoch": 1.7262084755281006, "grad_norm": 0.5615537609092984, "learning_rate": 4.0277505861691405e-06, "loss": 0.1049, "step": 19980 }, { "epoch": 1.7270724437340705, "grad_norm": 0.6077668734633745, "learning_rate": 4.023176147325505e-06, "loss": 0.1054, "step": 19990 }, { "epoch": 1.7279364119400407, "grad_norm": 0.5353436552303115, "learning_rate": 4.018602558389648e-06, "loss": 0.1048, "step": 20000 }, { "epoch": 1.7288003801460108, "grad_norm": 0.547027306615421, "learning_rate": 4.014029823340928e-06, "loss": 0.1022, "step": 20010 }, { "epoch": 1.7296643483519807, "grad_norm": 0.5953265053533482, "learning_rate": 4.0094579461579505e-06, "loss": 0.1055, "step": 20020 }, { "epoch": 1.7305283165579506, "grad_norm": 0.5527840216631419, "learning_rate": 4.0048869308185856e-06, "loss": 0.104, "step": 20030 }, { "epoch": 1.7313922847639207, "grad_norm": 0.5631809649129795, "learning_rate": 4.0003167812999465e-06, "loss": 0.1031, "step": 20040 }, { "epoch": 1.7322562529698908, "grad_norm": 0.5420978278693646, "learning_rate": 3.9957475015784e-06, "loss": 0.1024, "step": 20050 }, { "epoch": 1.7331202211758607, "grad_norm": 0.5495208887748646, "learning_rate": 3.9911790956295455e-06, "loss": 0.099, "step": 20060 }, { "epoch": 1.7339841893818306, "grad_norm": 0.6077615563313703, "learning_rate": 3.986611567428231e-06, "loss": 0.1039, "step": 20070 }, { "epoch": 1.7348481575878008, "grad_norm": 0.5575610185779836, "learning_rate": 3.982044920948542e-06, "loss": 0.1041, "step": 20080 }, { "epoch": 1.7357121257937709, "grad_norm": 0.5712717988692463, "learning_rate": 3.977479160163786e-06, "loss": 0.1036, "step": 20090 }, { "epoch": 1.7365760939997408, "grad_norm": 0.5868440064927699, "learning_rate": 3.972914289046512e-06, "loss": 0.1051, "step": 20100 }, { "epoch": 1.7374400622057107, "grad_norm": 0.5571265568035261, "learning_rate": 3.968350311568487e-06, "loss": 0.1059, "step": 20110 }, { "epoch": 1.7383040304116808, "grad_norm": 0.5571341134249902, "learning_rate": 3.963787231700707e-06, "loss": 0.1009, "step": 20120 }, { "epoch": 1.739167998617651, "grad_norm": 0.5442273293247737, "learning_rate": 3.959225053413379e-06, "loss": 0.1056, "step": 20130 }, { "epoch": 1.7400319668236208, "grad_norm": 0.5392709739565602, "learning_rate": 3.954663780675932e-06, "loss": 0.1044, "step": 20140 }, { "epoch": 1.740895935029591, "grad_norm": 0.5714865715662484, "learning_rate": 3.950103417457004e-06, "loss": 0.1003, "step": 20150 }, { "epoch": 1.741759903235561, "grad_norm": 0.5611686800757524, "learning_rate": 3.945543967724444e-06, "loss": 0.1062, "step": 20160 }, { "epoch": 1.742623871441531, "grad_norm": 0.562876377608983, "learning_rate": 3.940985435445303e-06, "loss": 0.1041, "step": 20170 }, { "epoch": 1.7434878396475009, "grad_norm": 0.5365688293514245, "learning_rate": 3.936427824585836e-06, "loss": 0.1044, "step": 20180 }, { "epoch": 1.744351807853471, "grad_norm": 0.5784734187598308, "learning_rate": 3.931871139111497e-06, "loss": 0.1012, "step": 20190 }, { "epoch": 1.7452157760594411, "grad_norm": 0.56924590426923, "learning_rate": 3.927315382986935e-06, "loss": 0.1048, "step": 20200 }, { "epoch": 1.746079744265411, "grad_norm": 0.5868943193747296, "learning_rate": 3.922760560175984e-06, "loss": 0.103, "step": 20210 }, { "epoch": 1.746943712471381, "grad_norm": 0.5684990052889753, "learning_rate": 3.918206674641674e-06, "loss": 0.1054, "step": 20220 }, { "epoch": 1.747807680677351, "grad_norm": 0.5984985926149132, "learning_rate": 3.913653730346219e-06, "loss": 0.1005, "step": 20230 }, { "epoch": 1.7486716488833212, "grad_norm": 0.5719741375137701, "learning_rate": 3.909101731251008e-06, "loss": 0.1034, "step": 20240 }, { "epoch": 1.749535617089291, "grad_norm": 0.5445905911014346, "learning_rate": 3.904550681316613e-06, "loss": 0.1004, "step": 20250 }, { "epoch": 1.7503995852952612, "grad_norm": 0.5535256139758667, "learning_rate": 3.900000584502777e-06, "loss": 0.0999, "step": 20260 }, { "epoch": 1.7512635535012313, "grad_norm": 0.5764065311727894, "learning_rate": 3.8954514447684154e-06, "loss": 0.1007, "step": 20270 }, { "epoch": 1.7521275217072012, "grad_norm": 0.5458153301564745, "learning_rate": 3.890903266071614e-06, "loss": 0.1033, "step": 20280 }, { "epoch": 1.752991489913171, "grad_norm": 0.562777505638518, "learning_rate": 3.886356052369613e-06, "loss": 0.1022, "step": 20290 }, { "epoch": 1.7538554581191412, "grad_norm": 0.5552263607405525, "learning_rate": 3.881809807618822e-06, "loss": 0.1014, "step": 20300 }, { "epoch": 1.7547194263251114, "grad_norm": 0.5809887010445461, "learning_rate": 3.8772645357748055e-06, "loss": 0.1025, "step": 20310 }, { "epoch": 1.7555833945310813, "grad_norm": 0.5851596272145743, "learning_rate": 3.872720240792281e-06, "loss": 0.1038, "step": 20320 }, { "epoch": 1.7564473627370512, "grad_norm": 0.5919873050029621, "learning_rate": 3.868176926625111e-06, "loss": 0.1023, "step": 20330 }, { "epoch": 1.7573113309430213, "grad_norm": 0.5763379694868309, "learning_rate": 3.863634597226314e-06, "loss": 0.1012, "step": 20340 }, { "epoch": 1.7581752991489914, "grad_norm": 0.5913953702067776, "learning_rate": 3.859093256548044e-06, "loss": 0.1058, "step": 20350 }, { "epoch": 1.7590392673549613, "grad_norm": 0.5320930353822502, "learning_rate": 3.854552908541601e-06, "loss": 0.101, "step": 20360 }, { "epoch": 1.7599032355609312, "grad_norm": 0.5513593841607388, "learning_rate": 3.850013557157413e-06, "loss": 0.1017, "step": 20370 }, { "epoch": 1.7607672037669013, "grad_norm": 0.5272766571509331, "learning_rate": 3.845475206345048e-06, "loss": 0.105, "step": 20380 }, { "epoch": 1.7616311719728714, "grad_norm": 0.561909459049623, "learning_rate": 3.840937860053204e-06, "loss": 0.1056, "step": 20390 }, { "epoch": 1.7624951401788413, "grad_norm": 0.5623218739821189, "learning_rate": 3.836401522229698e-06, "loss": 0.1018, "step": 20400 }, { "epoch": 1.7633591083848115, "grad_norm": 0.5713647997114392, "learning_rate": 3.831866196821476e-06, "loss": 0.1009, "step": 20410 }, { "epoch": 1.7642230765907816, "grad_norm": 0.5537859362481026, "learning_rate": 3.827331887774599e-06, "loss": 0.1021, "step": 20420 }, { "epoch": 1.7650870447967515, "grad_norm": 0.5606600806201089, "learning_rate": 3.8227985990342495e-06, "loss": 0.0982, "step": 20430 }, { "epoch": 1.7659510130027214, "grad_norm": 0.5701330136609685, "learning_rate": 3.818266334544714e-06, "loss": 0.1042, "step": 20440 }, { "epoch": 1.7668149812086915, "grad_norm": 0.5731712287364021, "learning_rate": 3.813735098249395e-06, "loss": 0.1039, "step": 20450 }, { "epoch": 1.7676789494146616, "grad_norm": 0.5673002836546218, "learning_rate": 3.8092048940907944e-06, "loss": 0.1043, "step": 20460 }, { "epoch": 1.7685429176206315, "grad_norm": 0.5670893355009318, "learning_rate": 3.8046757260105244e-06, "loss": 0.1024, "step": 20470 }, { "epoch": 1.7694068858266014, "grad_norm": 0.5944192806216618, "learning_rate": 3.800147597949285e-06, "loss": 0.1017, "step": 20480 }, { "epoch": 1.7702708540325716, "grad_norm": 0.5670087860330885, "learning_rate": 3.7956205138468795e-06, "loss": 0.1015, "step": 20490 }, { "epoch": 1.7711348222385417, "grad_norm": 0.5817270080000759, "learning_rate": 3.7910944776422e-06, "loss": 0.1039, "step": 20500 }, { "epoch": 1.7719987904445116, "grad_norm": 0.5724007713380568, "learning_rate": 3.7865694932732296e-06, "loss": 0.1051, "step": 20510 }, { "epoch": 1.7728627586504817, "grad_norm": 0.5473492286689364, "learning_rate": 3.7820455646770284e-06, "loss": 0.1024, "step": 20520 }, { "epoch": 1.7737267268564518, "grad_norm": 0.5485247247016158, "learning_rate": 3.7775226957897465e-06, "loss": 0.1019, "step": 20530 }, { "epoch": 1.7745906950624217, "grad_norm": 0.5643486978024851, "learning_rate": 3.773000890546609e-06, "loss": 0.104, "step": 20540 }, { "epoch": 1.7754546632683916, "grad_norm": 0.5796798917661853, "learning_rate": 3.7684801528819155e-06, "loss": 0.1047, "step": 20550 }, { "epoch": 1.7763186314743618, "grad_norm": 0.545404357770531, "learning_rate": 3.763960486729035e-06, "loss": 0.1056, "step": 20560 }, { "epoch": 1.7771825996803319, "grad_norm": 0.5357278759624914, "learning_rate": 3.759441896020405e-06, "loss": 0.1036, "step": 20570 }, { "epoch": 1.7780465678863018, "grad_norm": 0.5918980827105917, "learning_rate": 3.7549243846875288e-06, "loss": 0.1016, "step": 20580 }, { "epoch": 1.7789105360922717, "grad_norm": 0.557293641693346, "learning_rate": 3.750407956660973e-06, "loss": 0.1025, "step": 20590 }, { "epoch": 1.7797745042982418, "grad_norm": 0.5442810225575131, "learning_rate": 3.745892615870353e-06, "loss": 0.0984, "step": 20600 }, { "epoch": 1.780638472504212, "grad_norm": 0.5844983519187161, "learning_rate": 3.741378366244346e-06, "loss": 0.1044, "step": 20610 }, { "epoch": 1.7815024407101818, "grad_norm": 0.6092327070617354, "learning_rate": 3.736865211710678e-06, "loss": 0.1032, "step": 20620 }, { "epoch": 1.7823664089161517, "grad_norm": 0.5845750451198817, "learning_rate": 3.732353156196123e-06, "loss": 0.106, "step": 20630 }, { "epoch": 1.783230377122122, "grad_norm": 0.5142430347585016, "learning_rate": 3.727842203626494e-06, "loss": 0.0974, "step": 20640 }, { "epoch": 1.784094345328092, "grad_norm": 0.5477628186400496, "learning_rate": 3.723332357926649e-06, "loss": 0.1003, "step": 20650 }, { "epoch": 1.7849583135340619, "grad_norm": 0.5303665814483901, "learning_rate": 3.7188236230204826e-06, "loss": 0.1077, "step": 20660 }, { "epoch": 1.785822281740032, "grad_norm": 0.5762679439825226, "learning_rate": 3.7143160028309215e-06, "loss": 0.1047, "step": 20670 }, { "epoch": 1.7866862499460021, "grad_norm": 0.5530133329338524, "learning_rate": 3.7098095012799216e-06, "loss": 0.0995, "step": 20680 }, { "epoch": 1.787550218151972, "grad_norm": 0.5482584661223466, "learning_rate": 3.7053041222884688e-06, "loss": 0.1026, "step": 20690 }, { "epoch": 1.788414186357942, "grad_norm": 0.580595778741035, "learning_rate": 3.7007998697765713e-06, "loss": 0.106, "step": 20700 }, { "epoch": 1.789278154563912, "grad_norm": 0.576869662947042, "learning_rate": 3.696296747663253e-06, "loss": 0.102, "step": 20710 }, { "epoch": 1.7901421227698822, "grad_norm": 0.5597875975808141, "learning_rate": 3.6917947598665593e-06, "loss": 0.1014, "step": 20720 }, { "epoch": 1.791006090975852, "grad_norm": 0.558351611350602, "learning_rate": 3.6872939103035465e-06, "loss": 0.1006, "step": 20730 }, { "epoch": 1.791870059181822, "grad_norm": 0.5606337066934608, "learning_rate": 3.682794202890284e-06, "loss": 0.105, "step": 20740 }, { "epoch": 1.792734027387792, "grad_norm": 0.5690262394802477, "learning_rate": 3.6782956415418404e-06, "loss": 0.1015, "step": 20750 }, { "epoch": 1.7935979955937622, "grad_norm": 0.5554666935458995, "learning_rate": 3.673798230172293e-06, "loss": 0.1012, "step": 20760 }, { "epoch": 1.794461963799732, "grad_norm": 0.5826019998816877, "learning_rate": 3.6693019726947154e-06, "loss": 0.1007, "step": 20770 }, { "epoch": 1.7953259320057022, "grad_norm": 0.552502735816188, "learning_rate": 3.6648068730211816e-06, "loss": 0.1005, "step": 20780 }, { "epoch": 1.7961899002116724, "grad_norm": 0.5604779925921183, "learning_rate": 3.660312935062752e-06, "loss": 0.1015, "step": 20790 }, { "epoch": 1.7970538684176423, "grad_norm": 0.553123810366442, "learning_rate": 3.65582016272948e-06, "loss": 0.1009, "step": 20800 }, { "epoch": 1.7979178366236122, "grad_norm": 0.574040004269223, "learning_rate": 3.651328559930404e-06, "loss": 0.1024, "step": 20810 }, { "epoch": 1.7987818048295823, "grad_norm": 0.5789047190106877, "learning_rate": 3.6468381305735485e-06, "loss": 0.1001, "step": 20820 }, { "epoch": 1.7996457730355524, "grad_norm": 0.5743348463122552, "learning_rate": 3.6423488785659085e-06, "loss": 0.1063, "step": 20830 }, { "epoch": 1.8005097412415223, "grad_norm": 0.568363700602923, "learning_rate": 3.637860807813462e-06, "loss": 0.1022, "step": 20840 }, { "epoch": 1.8013737094474922, "grad_norm": 0.5632244458817115, "learning_rate": 3.6333739222211576e-06, "loss": 0.1005, "step": 20850 }, { "epoch": 1.8022376776534623, "grad_norm": 0.5535634272232598, "learning_rate": 3.628888225692912e-06, "loss": 0.1038, "step": 20860 }, { "epoch": 1.8031016458594324, "grad_norm": 0.5617936512836789, "learning_rate": 3.6244037221316066e-06, "loss": 0.1015, "step": 20870 }, { "epoch": 1.8039656140654023, "grad_norm": 0.5676594590712748, "learning_rate": 3.619920415439084e-06, "loss": 0.0987, "step": 20880 }, { "epoch": 1.8048295822713725, "grad_norm": 0.5548626950647346, "learning_rate": 3.615438309516148e-06, "loss": 0.1008, "step": 20890 }, { "epoch": 1.8056935504773426, "grad_norm": 0.5472900095239701, "learning_rate": 3.610957408262561e-06, "loss": 0.103, "step": 20900 }, { "epoch": 1.8065575186833125, "grad_norm": 0.556208859315823, "learning_rate": 3.606477715577026e-06, "loss": 0.1029, "step": 20910 }, { "epoch": 1.8074214868892824, "grad_norm": 0.5843164924577277, "learning_rate": 3.6019992353572047e-06, "loss": 0.1014, "step": 20920 }, { "epoch": 1.8082854550952525, "grad_norm": 0.5261056588336221, "learning_rate": 3.5975219714997025e-06, "loss": 0.1008, "step": 20930 }, { "epoch": 1.8091494233012226, "grad_norm": 0.5291736647089412, "learning_rate": 3.5930459279000606e-06, "loss": 0.1035, "step": 20940 }, { "epoch": 1.8100133915071925, "grad_norm": 0.5657988364870209, "learning_rate": 3.588571108452764e-06, "loss": 0.1034, "step": 20950 }, { "epoch": 1.8108773597131624, "grad_norm": 0.5825009374567696, "learning_rate": 3.5840975170512314e-06, "loss": 0.1002, "step": 20960 }, { "epoch": 1.8117413279191326, "grad_norm": 0.5580209417771652, "learning_rate": 3.579625157587814e-06, "loss": 0.101, "step": 20970 }, { "epoch": 1.8126052961251027, "grad_norm": 0.523370115805435, "learning_rate": 3.575154033953787e-06, "loss": 0.1032, "step": 20980 }, { "epoch": 1.8134692643310726, "grad_norm": 0.5803826900106622, "learning_rate": 3.570684150039353e-06, "loss": 0.1014, "step": 20990 }, { "epoch": 1.8143332325370425, "grad_norm": 0.53633215338986, "learning_rate": 3.5662155097336378e-06, "loss": 0.1021, "step": 21000 }, { "epoch": 1.8151972007430126, "grad_norm": 0.5457685347468603, "learning_rate": 3.5617481169246845e-06, "loss": 0.1062, "step": 21010 }, { "epoch": 1.8160611689489827, "grad_norm": 0.5436165338718727, "learning_rate": 3.557281975499446e-06, "loss": 0.1, "step": 21020 }, { "epoch": 1.8169251371549526, "grad_norm": 0.5219347875259863, "learning_rate": 3.5528170893437918e-06, "loss": 0.1022, "step": 21030 }, { "epoch": 1.8177891053609228, "grad_norm": 0.5459117725002699, "learning_rate": 3.5483534623424988e-06, "loss": 0.1026, "step": 21040 }, { "epoch": 1.8186530735668929, "grad_norm": 0.5344167085133646, "learning_rate": 3.5438910983792465e-06, "loss": 0.0988, "step": 21050 }, { "epoch": 1.8195170417728628, "grad_norm": 0.533995913655794, "learning_rate": 3.539430001336614e-06, "loss": 0.1022, "step": 21060 }, { "epoch": 1.8203810099788327, "grad_norm": 0.5395042423170748, "learning_rate": 3.534970175096083e-06, "loss": 0.1031, "step": 21070 }, { "epoch": 1.8212449781848028, "grad_norm": 0.5508954228284384, "learning_rate": 3.5305116235380233e-06, "loss": 0.1036, "step": 21080 }, { "epoch": 1.822108946390773, "grad_norm": 0.5459122344449423, "learning_rate": 3.526054350541704e-06, "loss": 0.1028, "step": 21090 }, { "epoch": 1.8229729145967428, "grad_norm": 0.573507303035393, "learning_rate": 3.521598359985271e-06, "loss": 0.1016, "step": 21100 }, { "epoch": 1.8238368828027127, "grad_norm": 0.5437055323857766, "learning_rate": 3.5171436557457628e-06, "loss": 0.0984, "step": 21110 }, { "epoch": 1.8247008510086828, "grad_norm": 0.5670371375647348, "learning_rate": 3.512690241699096e-06, "loss": 0.1008, "step": 21120 }, { "epoch": 1.825564819214653, "grad_norm": 0.5628275899816054, "learning_rate": 3.5082381217200668e-06, "loss": 0.0983, "step": 21130 }, { "epoch": 1.8264287874206229, "grad_norm": 0.57987227106917, "learning_rate": 3.5037872996823384e-06, "loss": 0.1009, "step": 21140 }, { "epoch": 1.827292755626593, "grad_norm": 0.5597332618992171, "learning_rate": 3.4993377794584532e-06, "loss": 0.1042, "step": 21150 }, { "epoch": 1.8281567238325631, "grad_norm": 0.5527884910029922, "learning_rate": 3.4948895649198156e-06, "loss": 0.1002, "step": 21160 }, { "epoch": 1.829020692038533, "grad_norm": 0.5399126337821516, "learning_rate": 3.4904426599366985e-06, "loss": 0.0998, "step": 21170 }, { "epoch": 1.829884660244503, "grad_norm": 0.5533812170631007, "learning_rate": 3.4859970683782283e-06, "loss": 0.1029, "step": 21180 }, { "epoch": 1.830748628450473, "grad_norm": 0.570090142425241, "learning_rate": 3.481552794112395e-06, "loss": 0.1002, "step": 21190 }, { "epoch": 1.8316125966564432, "grad_norm": 0.576237314844463, "learning_rate": 3.4771098410060433e-06, "loss": 0.101, "step": 21200 }, { "epoch": 1.832476564862413, "grad_norm": 0.5785261238185759, "learning_rate": 3.47266821292486e-06, "loss": 0.1026, "step": 21210 }, { "epoch": 1.833340533068383, "grad_norm": 0.5391411473596741, "learning_rate": 3.4682279137333874e-06, "loss": 0.1009, "step": 21220 }, { "epoch": 1.834204501274353, "grad_norm": 0.5717021882218313, "learning_rate": 3.463788947295008e-06, "loss": 0.1017, "step": 21230 }, { "epoch": 1.8350684694803232, "grad_norm": 0.5641109625964653, "learning_rate": 3.459351317471948e-06, "loss": 0.1015, "step": 21240 }, { "epoch": 1.835932437686293, "grad_norm": 0.5841912468112753, "learning_rate": 3.4549150281252635e-06, "loss": 0.1009, "step": 21250 }, { "epoch": 1.836796405892263, "grad_norm": 0.5775667698271872, "learning_rate": 3.4504800831148523e-06, "loss": 0.1015, "step": 21260 }, { "epoch": 1.8376603740982334, "grad_norm": 0.5511909835861091, "learning_rate": 3.446046486299437e-06, "loss": 0.1001, "step": 21270 }, { "epoch": 1.8385243423042033, "grad_norm": 0.5344861997274931, "learning_rate": 3.4416142415365718e-06, "loss": 0.1046, "step": 21280 }, { "epoch": 1.8393883105101732, "grad_norm": 0.5756081848954616, "learning_rate": 3.437183352682628e-06, "loss": 0.1033, "step": 21290 }, { "epoch": 1.8402522787161433, "grad_norm": 0.5671991668813597, "learning_rate": 3.432753823592804e-06, "loss": 0.1046, "step": 21300 }, { "epoch": 1.8411162469221134, "grad_norm": 0.5782180776536016, "learning_rate": 3.42832565812111e-06, "loss": 0.0986, "step": 21310 }, { "epoch": 1.8419802151280833, "grad_norm": 0.5327885416091026, "learning_rate": 3.4238988601203766e-06, "loss": 0.103, "step": 21320 }, { "epoch": 1.8428441833340532, "grad_norm": 0.5301622627094087, "learning_rate": 3.4194734334422343e-06, "loss": 0.1022, "step": 21330 }, { "epoch": 1.8437081515400233, "grad_norm": 0.542883154178525, "learning_rate": 3.4150493819371282e-06, "loss": 0.1029, "step": 21340 }, { "epoch": 1.8445721197459934, "grad_norm": 0.5485117027957886, "learning_rate": 3.4106267094543068e-06, "loss": 0.1033, "step": 21350 }, { "epoch": 1.8454360879519633, "grad_norm": 0.547398116269584, "learning_rate": 3.4062054198418143e-06, "loss": 0.1027, "step": 21360 }, { "epoch": 1.8463000561579332, "grad_norm": 0.5467991450188698, "learning_rate": 3.401785516946495e-06, "loss": 0.1043, "step": 21370 }, { "epoch": 1.8471640243639034, "grad_norm": 0.5856305997104947, "learning_rate": 3.397367004613985e-06, "loss": 0.1013, "step": 21380 }, { "epoch": 1.8480279925698735, "grad_norm": 0.5382540415719735, "learning_rate": 3.3929498866887124e-06, "loss": 0.1011, "step": 21390 }, { "epoch": 1.8488919607758434, "grad_norm": 0.5519626832639604, "learning_rate": 3.3885341670138915e-06, "loss": 0.098, "step": 21400 }, { "epoch": 1.8497559289818135, "grad_norm": 0.5320256820466911, "learning_rate": 3.384119849431517e-06, "loss": 0.105, "step": 21410 }, { "epoch": 1.8506198971877836, "grad_norm": 0.568130486279502, "learning_rate": 3.3797069377823676e-06, "loss": 0.1002, "step": 21420 }, { "epoch": 1.8514838653937535, "grad_norm": 0.5478982214510582, "learning_rate": 3.3752954359059976e-06, "loss": 0.1039, "step": 21430 }, { "epoch": 1.8523478335997234, "grad_norm": 0.544232492931912, "learning_rate": 3.3708853476407365e-06, "loss": 0.1004, "step": 21440 }, { "epoch": 1.8532118018056936, "grad_norm": 0.5585103563773959, "learning_rate": 3.366476676823677e-06, "loss": 0.1019, "step": 21450 }, { "epoch": 1.8540757700116637, "grad_norm": 0.5455231576521419, "learning_rate": 3.3620694272906874e-06, "loss": 0.0977, "step": 21460 }, { "epoch": 1.8549397382176336, "grad_norm": 0.5379648194855917, "learning_rate": 3.357663602876392e-06, "loss": 0.0979, "step": 21470 }, { "epoch": 1.8558037064236035, "grad_norm": 0.5726162354617428, "learning_rate": 3.3532592074141823e-06, "loss": 0.0994, "step": 21480 }, { "epoch": 1.8566676746295736, "grad_norm": 0.5827018315751672, "learning_rate": 3.3488562447361978e-06, "loss": 0.1, "step": 21490 }, { "epoch": 1.8575316428355437, "grad_norm": 0.5635050703224438, "learning_rate": 3.344454718673339e-06, "loss": 0.1029, "step": 21500 }, { "epoch": 1.8583956110415136, "grad_norm": 0.5565811080368595, "learning_rate": 3.3400546330552554e-06, "loss": 0.1026, "step": 21510 }, { "epoch": 1.8592595792474835, "grad_norm": 0.564486228128983, "learning_rate": 3.3356559917103377e-06, "loss": 0.1019, "step": 21520 }, { "epoch": 1.8601235474534539, "grad_norm": 0.5605995453561945, "learning_rate": 3.3312587984657246e-06, "loss": 0.1006, "step": 21530 }, { "epoch": 1.8609875156594238, "grad_norm": 0.6100355923575964, "learning_rate": 3.326863057147295e-06, "loss": 0.1039, "step": 21540 }, { "epoch": 1.8618514838653937, "grad_norm": 0.5762711693846725, "learning_rate": 3.3224687715796656e-06, "loss": 0.1014, "step": 21550 }, { "epoch": 1.8627154520713638, "grad_norm": 0.5366489167927376, "learning_rate": 3.3180759455861798e-06, "loss": 0.1003, "step": 21560 }, { "epoch": 1.863579420277334, "grad_norm": 0.5662365857970131, "learning_rate": 3.3136845829889185e-06, "loss": 0.1027, "step": 21570 }, { "epoch": 1.8644433884833038, "grad_norm": 0.5459962285652393, "learning_rate": 3.309294687608685e-06, "loss": 0.1005, "step": 21580 }, { "epoch": 1.8653073566892737, "grad_norm": 0.573806154613363, "learning_rate": 3.3049062632650098e-06, "loss": 0.1011, "step": 21590 }, { "epoch": 1.8661713248952438, "grad_norm": 0.5266529570129646, "learning_rate": 3.3005193137761376e-06, "loss": 0.1023, "step": 21600 }, { "epoch": 1.867035293101214, "grad_norm": 0.5777135015426488, "learning_rate": 3.2961338429590364e-06, "loss": 0.1022, "step": 21610 }, { "epoch": 1.8678992613071839, "grad_norm": 0.5710616793282451, "learning_rate": 3.2917498546293823e-06, "loss": 0.1024, "step": 21620 }, { "epoch": 1.8687632295131538, "grad_norm": 0.5774894136897458, "learning_rate": 3.287367352601568e-06, "loss": 0.1003, "step": 21630 }, { "epoch": 1.869627197719124, "grad_norm": 0.5741261431440448, "learning_rate": 3.282986340688684e-06, "loss": 0.1028, "step": 21640 }, { "epoch": 1.870491165925094, "grad_norm": 0.5883458021179613, "learning_rate": 3.278606822702532e-06, "loss": 0.1008, "step": 21650 }, { "epoch": 1.871355134131064, "grad_norm": 0.574927450088088, "learning_rate": 3.274228802453611e-06, "loss": 0.1003, "step": 21660 }, { "epoch": 1.872219102337034, "grad_norm": 0.5477269695224006, "learning_rate": 3.269852283751117e-06, "loss": 0.1023, "step": 21670 }, { "epoch": 1.8730830705430042, "grad_norm": 0.5438420121822475, "learning_rate": 3.2654772704029385e-06, "loss": 0.1005, "step": 21680 }, { "epoch": 1.873947038748974, "grad_norm": 0.5773522247749471, "learning_rate": 3.2611037662156553e-06, "loss": 0.101, "step": 21690 }, { "epoch": 1.874811006954944, "grad_norm": 0.5270650909460688, "learning_rate": 3.256731774994534e-06, "loss": 0.1009, "step": 21700 }, { "epoch": 1.875674975160914, "grad_norm": 0.5697439029107149, "learning_rate": 3.2523613005435273e-06, "loss": 0.1018, "step": 21710 }, { "epoch": 1.8765389433668842, "grad_norm": 0.5889980436086851, "learning_rate": 3.247992346665262e-06, "loss": 0.103, "step": 21720 }, { "epoch": 1.877402911572854, "grad_norm": 0.5855966040283817, "learning_rate": 3.243624917161048e-06, "loss": 0.1004, "step": 21730 }, { "epoch": 1.878266879778824, "grad_norm": 0.5645627450678173, "learning_rate": 3.239259015830865e-06, "loss": 0.1003, "step": 21740 }, { "epoch": 1.8791308479847941, "grad_norm": 0.5607021768032766, "learning_rate": 3.234894646473368e-06, "loss": 0.1045, "step": 21750 }, { "epoch": 1.8799948161907643, "grad_norm": 0.6018622097238112, "learning_rate": 3.230531812885872e-06, "loss": 0.1023, "step": 21760 }, { "epoch": 1.8808587843967342, "grad_norm": 0.5716356881800182, "learning_rate": 3.2261705188643612e-06, "loss": 0.1022, "step": 21770 }, { "epoch": 1.8817227526027043, "grad_norm": 0.5520205261963792, "learning_rate": 3.2218107682034775e-06, "loss": 0.1013, "step": 21780 }, { "epoch": 1.8825867208086744, "grad_norm": 0.5495910812136405, "learning_rate": 3.217452564696522e-06, "loss": 0.1014, "step": 21790 }, { "epoch": 1.8834506890146443, "grad_norm": 0.5624942994289897, "learning_rate": 3.2130959121354475e-06, "loss": 0.1001, "step": 21800 }, { "epoch": 1.8843146572206142, "grad_norm": 0.6136256619750384, "learning_rate": 3.208740814310859e-06, "loss": 0.1002, "step": 21810 }, { "epoch": 1.8851786254265843, "grad_norm": 0.5520828631133291, "learning_rate": 3.20438727501201e-06, "loss": 0.1051, "step": 21820 }, { "epoch": 1.8860425936325544, "grad_norm": 0.6030386575456828, "learning_rate": 3.2000352980267936e-06, "loss": 0.1001, "step": 21830 }, { "epoch": 1.8869065618385243, "grad_norm": 0.5712910795331343, "learning_rate": 3.195684887141747e-06, "loss": 0.0997, "step": 21840 }, { "epoch": 1.8877705300444942, "grad_norm": 0.5486849114411699, "learning_rate": 3.191336046142044e-06, "loss": 0.101, "step": 21850 }, { "epoch": 1.8886344982504644, "grad_norm": 0.6141548602098744, "learning_rate": 3.1869887788114943e-06, "loss": 0.1029, "step": 21860 }, { "epoch": 1.8894984664564345, "grad_norm": 0.5775840450930885, "learning_rate": 3.1826430889325344e-06, "loss": 0.1029, "step": 21870 }, { "epoch": 1.8903624346624044, "grad_norm": 0.5540741540255073, "learning_rate": 3.1782989802862313e-06, "loss": 0.0997, "step": 21880 }, { "epoch": 1.8912264028683743, "grad_norm": 0.5645731722914674, "learning_rate": 3.173956456652274e-06, "loss": 0.1025, "step": 21890 }, { "epoch": 1.8920903710743444, "grad_norm": 0.5934655084949406, "learning_rate": 3.169615521808978e-06, "loss": 0.0969, "step": 21900 }, { "epoch": 1.8929543392803145, "grad_norm": 0.5541470186648709, "learning_rate": 3.165276179533267e-06, "loss": 0.1033, "step": 21910 }, { "epoch": 1.8938183074862844, "grad_norm": 0.5738429556296889, "learning_rate": 3.1609384336006866e-06, "loss": 0.1016, "step": 21920 }, { "epoch": 1.8946822756922546, "grad_norm": 0.546682642116901, "learning_rate": 3.156602287785391e-06, "loss": 0.1013, "step": 21930 }, { "epoch": 1.8955462438982247, "grad_norm": 0.5727258889036533, "learning_rate": 3.1522677458601447e-06, "loss": 0.1018, "step": 21940 }, { "epoch": 1.8964102121041946, "grad_norm": 0.5750188686261928, "learning_rate": 3.1479348115963105e-06, "loss": 0.0991, "step": 21950 }, { "epoch": 1.8972741803101645, "grad_norm": 0.5675659013009478, "learning_rate": 3.143603488763858e-06, "loss": 0.1002, "step": 21960 }, { "epoch": 1.8981381485161346, "grad_norm": 0.5432009469281879, "learning_rate": 3.1392737811313535e-06, "loss": 0.1008, "step": 21970 }, { "epoch": 1.8990021167221047, "grad_norm": 0.5323856224850143, "learning_rate": 3.134945692465957e-06, "loss": 0.1009, "step": 21980 }, { "epoch": 1.8998660849280746, "grad_norm": 0.5519361955646229, "learning_rate": 3.13061922653342e-06, "loss": 0.1026, "step": 21990 }, { "epoch": 1.9007300531340445, "grad_norm": 0.5620259952273624, "learning_rate": 3.1262943870980823e-06, "loss": 0.0992, "step": 22000 }, { "epoch": 1.9015940213400147, "grad_norm": 0.5686870753402948, "learning_rate": 3.121971177922869e-06, "loss": 0.1001, "step": 22010 }, { "epoch": 1.9024579895459848, "grad_norm": 0.6088252851839825, "learning_rate": 3.1176496027692886e-06, "loss": 0.1027, "step": 22020 }, { "epoch": 1.9033219577519547, "grad_norm": 0.5844603975272236, "learning_rate": 3.113329665397422e-06, "loss": 0.1015, "step": 22030 }, { "epoch": 1.9041859259579248, "grad_norm": 0.5606232894350783, "learning_rate": 3.1090113695659297e-06, "loss": 0.1022, "step": 22040 }, { "epoch": 1.905049894163895, "grad_norm": 0.5659034726528784, "learning_rate": 3.1046947190320463e-06, "loss": 0.1013, "step": 22050 }, { "epoch": 1.9059138623698648, "grad_norm": 0.5340403917026766, "learning_rate": 3.100379717551567e-06, "loss": 0.0991, "step": 22060 }, { "epoch": 1.9067778305758347, "grad_norm": 0.5618283207865042, "learning_rate": 3.0960663688788596e-06, "loss": 0.0996, "step": 22070 }, { "epoch": 1.9076417987818048, "grad_norm": 0.5531777403622569, "learning_rate": 3.0917546767668504e-06, "loss": 0.103, "step": 22080 }, { "epoch": 1.908505766987775, "grad_norm": 0.5497027735815795, "learning_rate": 3.0874446449670258e-06, "loss": 0.0972, "step": 22090 }, { "epoch": 1.9093697351937449, "grad_norm": 0.5697514613470495, "learning_rate": 3.083136277229426e-06, "loss": 0.1011, "step": 22100 }, { "epoch": 1.9102337033997148, "grad_norm": 0.5476742899641973, "learning_rate": 3.0788295773026437e-06, "loss": 0.1014, "step": 22110 }, { "epoch": 1.9110976716056849, "grad_norm": 0.5677127957502706, "learning_rate": 3.0745245489338217e-06, "loss": 0.1023, "step": 22120 }, { "epoch": 1.911961639811655, "grad_norm": 0.5983383473334517, "learning_rate": 3.0702211958686503e-06, "loss": 0.1004, "step": 22130 }, { "epoch": 1.912825608017625, "grad_norm": 0.5395467401934758, "learning_rate": 3.065919521851354e-06, "loss": 0.1011, "step": 22140 }, { "epoch": 1.9136895762235948, "grad_norm": 0.5372402138956013, "learning_rate": 3.0616195306247054e-06, "loss": 0.0996, "step": 22150 }, { "epoch": 1.9145535444295652, "grad_norm": 0.5403566249572875, "learning_rate": 3.057321225930009e-06, "loss": 0.0999, "step": 22160 }, { "epoch": 1.915417512635535, "grad_norm": 0.5544226990298059, "learning_rate": 3.0530246115071036e-06, "loss": 0.1, "step": 22170 }, { "epoch": 1.916281480841505, "grad_norm": 0.5826205818540283, "learning_rate": 3.0487296910943543e-06, "loss": 0.0996, "step": 22180 }, { "epoch": 1.917145449047475, "grad_norm": 0.5743500225747563, "learning_rate": 3.044436468428655e-06, "loss": 0.103, "step": 22190 }, { "epoch": 1.9180094172534452, "grad_norm": 0.5633148475444488, "learning_rate": 3.040144947245422e-06, "loss": 0.1002, "step": 22200 }, { "epoch": 1.918873385459415, "grad_norm": 0.5352323628595675, "learning_rate": 3.0358551312785912e-06, "loss": 0.0993, "step": 22210 }, { "epoch": 1.919737353665385, "grad_norm": 0.5599302140701811, "learning_rate": 3.0315670242606123e-06, "loss": 0.1013, "step": 22220 }, { "epoch": 1.9206013218713551, "grad_norm": 0.5660720954378141, "learning_rate": 3.02728062992245e-06, "loss": 0.1014, "step": 22230 }, { "epoch": 1.9214652900773252, "grad_norm": 0.5380748855737479, "learning_rate": 3.0229959519935814e-06, "loss": 0.101, "step": 22240 }, { "epoch": 1.9223292582832952, "grad_norm": 0.5884805058572077, "learning_rate": 3.018712994201989e-06, "loss": 0.102, "step": 22250 }, { "epoch": 1.923193226489265, "grad_norm": 0.5874042489492537, "learning_rate": 3.0144317602741535e-06, "loss": 0.1006, "step": 22260 }, { "epoch": 1.9240571946952352, "grad_norm": 0.5327072747468091, "learning_rate": 3.010152253935061e-06, "loss": 0.0965, "step": 22270 }, { "epoch": 1.9249211629012053, "grad_norm": 0.5672748667596161, "learning_rate": 3.005874478908195e-06, "loss": 0.0992, "step": 22280 }, { "epoch": 1.9257851311071752, "grad_norm": 0.5779026537091281, "learning_rate": 3.00159843891553e-06, "loss": 0.1008, "step": 22290 }, { "epoch": 1.9266490993131453, "grad_norm": 0.5775941103917845, "learning_rate": 2.9973241376775314e-06, "loss": 0.102, "step": 22300 }, { "epoch": 1.9275130675191154, "grad_norm": 0.5614889405016127, "learning_rate": 2.9930515789131507e-06, "loss": 0.1024, "step": 22310 }, { "epoch": 1.9283770357250853, "grad_norm": 0.5207831073526223, "learning_rate": 2.9887807663398283e-06, "loss": 0.0995, "step": 22320 }, { "epoch": 1.9292410039310552, "grad_norm": 0.5518036833957318, "learning_rate": 2.9845117036734773e-06, "loss": 0.0981, "step": 22330 }, { "epoch": 1.9301049721370254, "grad_norm": 0.5848049624329538, "learning_rate": 2.9802443946284944e-06, "loss": 0.1027, "step": 22340 }, { "epoch": 1.9309689403429955, "grad_norm": 0.5595521149024182, "learning_rate": 2.9759788429177493e-06, "loss": 0.0969, "step": 22350 }, { "epoch": 1.9318329085489654, "grad_norm": 0.5774209309801077, "learning_rate": 2.9717150522525838e-06, "loss": 0.098, "step": 22360 }, { "epoch": 1.9326968767549353, "grad_norm": 0.582103892789509, "learning_rate": 2.967453026342802e-06, "loss": 0.1037, "step": 22370 }, { "epoch": 1.9335608449609054, "grad_norm": 0.596134016903453, "learning_rate": 2.9631927688966783e-06, "loss": 0.1022, "step": 22380 }, { "epoch": 1.9344248131668755, "grad_norm": 0.5787978494309993, "learning_rate": 2.9589342836209473e-06, "loss": 0.1014, "step": 22390 }, { "epoch": 1.9352887813728454, "grad_norm": 0.5598636799101697, "learning_rate": 2.9546775742208e-06, "loss": 0.0982, "step": 22400 }, { "epoch": 1.9361527495788153, "grad_norm": 0.5457627399040831, "learning_rate": 2.9504226443998827e-06, "loss": 0.1003, "step": 22410 }, { "epoch": 1.9370167177847857, "grad_norm": 0.5273927080875033, "learning_rate": 2.946169497860294e-06, "loss": 0.0977, "step": 22420 }, { "epoch": 1.9378806859907556, "grad_norm": 0.5515535907865661, "learning_rate": 2.94191813830258e-06, "loss": 0.1016, "step": 22430 }, { "epoch": 1.9387446541967255, "grad_norm": 0.5858401856027835, "learning_rate": 2.9376685694257358e-06, "loss": 0.098, "step": 22440 }, { "epoch": 1.9396086224026956, "grad_norm": 0.5326821139702289, "learning_rate": 2.933420794927191e-06, "loss": 0.0999, "step": 22450 }, { "epoch": 1.9404725906086657, "grad_norm": 0.6165635449377432, "learning_rate": 2.9291748185028197e-06, "loss": 0.099, "step": 22460 }, { "epoch": 1.9413365588146356, "grad_norm": 0.5420456009991401, "learning_rate": 2.9249306438469305e-06, "loss": 0.0992, "step": 22470 }, { "epoch": 1.9422005270206055, "grad_norm": 0.57820086837121, "learning_rate": 2.9206882746522665e-06, "loss": 0.1002, "step": 22480 }, { "epoch": 1.9430644952265756, "grad_norm": 0.5818903495695285, "learning_rate": 2.9164477146099923e-06, "loss": 0.0996, "step": 22490 }, { "epoch": 1.9439284634325458, "grad_norm": 0.5443267800752883, "learning_rate": 2.9122089674097076e-06, "loss": 0.1014, "step": 22500 }, { "epoch": 1.9447924316385157, "grad_norm": 0.5500539383572447, "learning_rate": 2.907972036739427e-06, "loss": 0.1022, "step": 22510 }, { "epoch": 1.9456563998444856, "grad_norm": 0.6347343648821124, "learning_rate": 2.9037369262855947e-06, "loss": 0.1, "step": 22520 }, { "epoch": 1.9465203680504557, "grad_norm": 0.5514477780439476, "learning_rate": 2.8995036397330563e-06, "loss": 0.0992, "step": 22530 }, { "epoch": 1.9473843362564258, "grad_norm": 0.586202809665758, "learning_rate": 2.8952721807650854e-06, "loss": 0.105, "step": 22540 }, { "epoch": 1.9482483044623957, "grad_norm": 0.5545892197299369, "learning_rate": 2.8910425530633556e-06, "loss": 0.0984, "step": 22550 }, { "epoch": 1.9491122726683658, "grad_norm": 0.5730214745435419, "learning_rate": 2.886814760307952e-06, "loss": 0.0993, "step": 22560 }, { "epoch": 1.949976240874336, "grad_norm": 0.5458153748028479, "learning_rate": 2.8825888061773623e-06, "loss": 0.0988, "step": 22570 }, { "epoch": 1.9508402090803059, "grad_norm": 0.5921035939497012, "learning_rate": 2.87836469434847e-06, "loss": 0.0946, "step": 22580 }, { "epoch": 1.9517041772862758, "grad_norm": 0.5560783693469633, "learning_rate": 2.8741424284965645e-06, "loss": 0.1013, "step": 22590 }, { "epoch": 1.9525681454922459, "grad_norm": 0.549086003919069, "learning_rate": 2.869922012295322e-06, "loss": 0.1005, "step": 22600 }, { "epoch": 1.953432113698216, "grad_norm": 0.5793251391076313, "learning_rate": 2.865703449416812e-06, "loss": 0.1035, "step": 22610 }, { "epoch": 1.954296081904186, "grad_norm": 0.5570124709906242, "learning_rate": 2.8614867435314877e-06, "loss": 0.0954, "step": 22620 }, { "epoch": 1.9551600501101558, "grad_norm": 0.5329637013463601, "learning_rate": 2.8572718983081972e-06, "loss": 0.0993, "step": 22630 }, { "epoch": 1.956024018316126, "grad_norm": 0.5332785700544586, "learning_rate": 2.853058917414153e-06, "loss": 0.0956, "step": 22640 }, { "epoch": 1.956887986522096, "grad_norm": 0.570819195946033, "learning_rate": 2.848847804514962e-06, "loss": 0.1007, "step": 22650 }, { "epoch": 1.957751954728066, "grad_norm": 0.5495513030265846, "learning_rate": 2.844638563274596e-06, "loss": 0.1002, "step": 22660 }, { "epoch": 1.958615922934036, "grad_norm": 0.5975360173842512, "learning_rate": 2.840431197355401e-06, "loss": 0.1002, "step": 22670 }, { "epoch": 1.9594798911400062, "grad_norm": 0.560563297872084, "learning_rate": 2.836225710418091e-06, "loss": 0.0988, "step": 22680 }, { "epoch": 1.960343859345976, "grad_norm": 0.5717780583220105, "learning_rate": 2.8320221061217446e-06, "loss": 0.1013, "step": 22690 }, { "epoch": 1.961207827551946, "grad_norm": 0.5525346469001077, "learning_rate": 2.827820388123806e-06, "loss": 0.1004, "step": 22700 }, { "epoch": 1.9620717957579161, "grad_norm": 0.5827285043087085, "learning_rate": 2.823620560080074e-06, "loss": 0.1011, "step": 22710 }, { "epoch": 1.9629357639638862, "grad_norm": 0.5752788801952603, "learning_rate": 2.8194226256447033e-06, "loss": 0.099, "step": 22720 }, { "epoch": 1.9637997321698561, "grad_norm": 0.557491580653571, "learning_rate": 2.815226588470201e-06, "loss": 0.1018, "step": 22730 }, { "epoch": 1.964663700375826, "grad_norm": 0.5916884024884534, "learning_rate": 2.811032452207429e-06, "loss": 0.0974, "step": 22740 }, { "epoch": 1.9655276685817962, "grad_norm": 0.5386096553878962, "learning_rate": 2.8068402205055878e-06, "loss": 0.0989, "step": 22750 }, { "epoch": 1.9663916367877663, "grad_norm": 0.5710691676756725, "learning_rate": 2.8026498970122245e-06, "loss": 0.0955, "step": 22760 }, { "epoch": 1.9672556049937362, "grad_norm": 0.5556909406918205, "learning_rate": 2.798461485373225e-06, "loss": 0.1005, "step": 22770 }, { "epoch": 1.968119573199706, "grad_norm": 0.557444256716542, "learning_rate": 2.79427498923281e-06, "loss": 0.0969, "step": 22780 }, { "epoch": 1.9689835414056762, "grad_norm": 0.5905330627179569, "learning_rate": 2.7900904122335415e-06, "loss": 0.0987, "step": 22790 }, { "epoch": 1.9698475096116463, "grad_norm": 0.5826168610406894, "learning_rate": 2.7859077580162976e-06, "loss": 0.0997, "step": 22800 }, { "epoch": 1.9707114778176162, "grad_norm": 0.5600933255055477, "learning_rate": 2.781727030220298e-06, "loss": 0.0986, "step": 22810 }, { "epoch": 1.9715754460235864, "grad_norm": 0.54806352388108, "learning_rate": 2.777548232483075e-06, "loss": 0.0976, "step": 22820 }, { "epoch": 1.9724394142295565, "grad_norm": 0.5551221906142857, "learning_rate": 2.773371368440494e-06, "loss": 0.0983, "step": 22830 }, { "epoch": 1.9733033824355264, "grad_norm": 0.5723114218083482, "learning_rate": 2.7691964417267214e-06, "loss": 0.0996, "step": 22840 }, { "epoch": 1.9741673506414963, "grad_norm": 0.6044292557590281, "learning_rate": 2.765023455974253e-06, "loss": 0.0982, "step": 22850 }, { "epoch": 1.9750313188474664, "grad_norm": 0.5746034008271442, "learning_rate": 2.7608524148138878e-06, "loss": 0.0988, "step": 22860 }, { "epoch": 1.9758952870534365, "grad_norm": 0.6032867504998831, "learning_rate": 2.7566833218747353e-06, "loss": 0.0999, "step": 22870 }, { "epoch": 1.9767592552594064, "grad_norm": 0.5802365470670267, "learning_rate": 2.7525161807842095e-06, "loss": 0.1009, "step": 22880 }, { "epoch": 1.9776232234653763, "grad_norm": 0.5781767957801193, "learning_rate": 2.748350995168023e-06, "loss": 0.0985, "step": 22890 }, { "epoch": 1.9784871916713465, "grad_norm": 0.5519359594377506, "learning_rate": 2.744187768650198e-06, "loss": 0.0989, "step": 22900 }, { "epoch": 1.9793511598773166, "grad_norm": 0.573944538698511, "learning_rate": 2.7400265048530355e-06, "loss": 0.0983, "step": 22910 }, { "epoch": 1.9802151280832865, "grad_norm": 0.5723397613793625, "learning_rate": 2.7358672073971433e-06, "loss": 0.098, "step": 22920 }, { "epoch": 1.9810790962892566, "grad_norm": 0.5510971107060265, "learning_rate": 2.7317098799014086e-06, "loss": 0.1021, "step": 22930 }, { "epoch": 1.9819430644952267, "grad_norm": 0.5351223179335077, "learning_rate": 2.727554525983015e-06, "loss": 0.0958, "step": 22940 }, { "epoch": 1.9828070327011966, "grad_norm": 0.5911713108321606, "learning_rate": 2.723401149257414e-06, "loss": 0.0987, "step": 22950 }, { "epoch": 1.9836710009071665, "grad_norm": 0.555620541087196, "learning_rate": 2.7192497533383517e-06, "loss": 0.0974, "step": 22960 }, { "epoch": 1.9845349691131366, "grad_norm": 0.531489300609613, "learning_rate": 2.7151003418378426e-06, "loss": 0.0998, "step": 22970 }, { "epoch": 1.9853989373191068, "grad_norm": 0.56067493893783, "learning_rate": 2.710952918366176e-06, "loss": 0.0986, "step": 22980 }, { "epoch": 1.9862629055250767, "grad_norm": 0.5115950990313571, "learning_rate": 2.706807486531912e-06, "loss": 0.0993, "step": 22990 }, { "epoch": 1.9871268737310466, "grad_norm": 0.56036178938333, "learning_rate": 2.7026640499418764e-06, "loss": 0.0959, "step": 23000 }, { "epoch": 1.9879908419370167, "grad_norm": 0.5820731208668521, "learning_rate": 2.698522612201163e-06, "loss": 0.1015, "step": 23010 }, { "epoch": 1.9888548101429868, "grad_norm": 0.5561062988471792, "learning_rate": 2.6943831769131235e-06, "loss": 0.0998, "step": 23020 }, { "epoch": 1.9897187783489567, "grad_norm": 0.550576715080664, "learning_rate": 2.690245747679366e-06, "loss": 0.0995, "step": 23030 }, { "epoch": 1.9905827465549266, "grad_norm": 0.5607772814673408, "learning_rate": 2.686110328099753e-06, "loss": 0.0981, "step": 23040 }, { "epoch": 1.991446714760897, "grad_norm": 0.5649321440620626, "learning_rate": 2.681976921772405e-06, "loss": 0.0948, "step": 23050 }, { "epoch": 1.9923106829668669, "grad_norm": 0.5547901042849294, "learning_rate": 2.6778455322936837e-06, "loss": 0.1017, "step": 23060 }, { "epoch": 1.9931746511728368, "grad_norm": 0.5433895881280426, "learning_rate": 2.6737161632581983e-06, "loss": 0.0972, "step": 23070 }, { "epoch": 1.9940386193788069, "grad_norm": 0.5879816117729628, "learning_rate": 2.6695888182588005e-06, "loss": 0.0983, "step": 23080 }, { "epoch": 1.994902587584777, "grad_norm": 0.5639706915617735, "learning_rate": 2.665463500886577e-06, "loss": 0.0976, "step": 23090 }, { "epoch": 1.995766555790747, "grad_norm": 0.5597079533515157, "learning_rate": 2.661340214730862e-06, "loss": 0.0992, "step": 23100 }, { "epoch": 1.9966305239967168, "grad_norm": 0.5975735164456436, "learning_rate": 2.6572189633792052e-06, "loss": 0.0966, "step": 23110 }, { "epoch": 1.997494492202687, "grad_norm": 0.5431942288707596, "learning_rate": 2.6530997504174007e-06, "loss": 0.099, "step": 23120 }, { "epoch": 1.998358460408657, "grad_norm": 0.5610255705894158, "learning_rate": 2.64898257942946e-06, "loss": 0.1023, "step": 23130 }, { "epoch": 1.999222428614627, "grad_norm": 0.5565024056381163, "learning_rate": 2.6448674539976264e-06, "loss": 0.0952, "step": 23140 }, { "epoch": 2.000086396820597, "grad_norm": 0.5762272252445255, "learning_rate": 2.6407543777023514e-06, "loss": 0.096, "step": 23150 }, { "epoch": 2.000950365026567, "grad_norm": 0.5941202253662053, "learning_rate": 2.6366433541223156e-06, "loss": 0.0673, "step": 23160 }, { "epoch": 2.001814333232537, "grad_norm": 0.6016764334805269, "learning_rate": 2.6325343868344064e-06, "loss": 0.0668, "step": 23170 }, { "epoch": 2.002678301438507, "grad_norm": 0.5768547489229648, "learning_rate": 2.6284274794137243e-06, "loss": 0.0668, "step": 23180 }, { "epoch": 2.003542269644477, "grad_norm": 0.5653152032508831, "learning_rate": 2.624322635433577e-06, "loss": 0.0636, "step": 23190 }, { "epoch": 2.0044062378504472, "grad_norm": 0.6356562558227943, "learning_rate": 2.620219858465477e-06, "loss": 0.0653, "step": 23200 }, { "epoch": 2.005270206056417, "grad_norm": 0.6010359673980143, "learning_rate": 2.616119152079142e-06, "loss": 0.0668, "step": 23210 }, { "epoch": 2.006134174262387, "grad_norm": 0.5909469980255928, "learning_rate": 2.6120205198424786e-06, "loss": 0.0637, "step": 23220 }, { "epoch": 2.006998142468357, "grad_norm": 0.5797202598631477, "learning_rate": 2.6079239653215994e-06, "loss": 0.0665, "step": 23230 }, { "epoch": 2.0078621106743273, "grad_norm": 0.5595299434577446, "learning_rate": 2.603829492080802e-06, "loss": 0.0648, "step": 23240 }, { "epoch": 2.008726078880297, "grad_norm": 0.5868298303035886, "learning_rate": 2.5997371036825814e-06, "loss": 0.0649, "step": 23250 }, { "epoch": 2.009590047086267, "grad_norm": 0.6136273885472115, "learning_rate": 2.5956468036876046e-06, "loss": 0.0658, "step": 23260 }, { "epoch": 2.0104540152922374, "grad_norm": 0.6199336639686098, "learning_rate": 2.591558595654737e-06, "loss": 0.0661, "step": 23270 }, { "epoch": 2.0113179834982073, "grad_norm": 0.5663941452077751, "learning_rate": 2.587472483141015e-06, "loss": 0.0635, "step": 23280 }, { "epoch": 2.0121819517041772, "grad_norm": 0.5937353796830521, "learning_rate": 2.5833884697016513e-06, "loss": 0.0652, "step": 23290 }, { "epoch": 2.013045919910147, "grad_norm": 0.6077597290157493, "learning_rate": 2.5793065588900367e-06, "loss": 0.0635, "step": 23300 }, { "epoch": 2.0139098881161175, "grad_norm": 0.608614512624362, "learning_rate": 2.575226754257728e-06, "loss": 0.0648, "step": 23310 }, { "epoch": 2.0147738563220874, "grad_norm": 0.6411985030217807, "learning_rate": 2.571149059354456e-06, "loss": 0.065, "step": 23320 }, { "epoch": 2.0156378245280573, "grad_norm": 0.5980106388366775, "learning_rate": 2.5670734777281093e-06, "loss": 0.0645, "step": 23330 }, { "epoch": 2.016501792734027, "grad_norm": 0.6363825389790632, "learning_rate": 2.56300001292474e-06, "loss": 0.0625, "step": 23340 }, { "epoch": 2.0173657609399975, "grad_norm": 0.6223195747565223, "learning_rate": 2.5589286684885584e-06, "loss": 0.0647, "step": 23350 }, { "epoch": 2.0182297291459674, "grad_norm": 0.6163821451502053, "learning_rate": 2.5548594479619315e-06, "loss": 0.0653, "step": 23360 }, { "epoch": 2.0190936973519373, "grad_norm": 0.5917851425986639, "learning_rate": 2.5507923548853775e-06, "loss": 0.0661, "step": 23370 }, { "epoch": 2.0199576655579077, "grad_norm": 0.6484282253432566, "learning_rate": 2.546727392797561e-06, "loss": 0.0613, "step": 23380 }, { "epoch": 2.0208216337638776, "grad_norm": 0.6039945969146345, "learning_rate": 2.5426645652352968e-06, "loss": 0.0639, "step": 23390 }, { "epoch": 2.0216856019698475, "grad_norm": 0.6437986864420733, "learning_rate": 2.5386038757335364e-06, "loss": 0.0646, "step": 23400 }, { "epoch": 2.0225495701758174, "grad_norm": 0.6132933888487462, "learning_rate": 2.5345453278253818e-06, "loss": 0.0617, "step": 23410 }, { "epoch": 2.0234135383817877, "grad_norm": 0.6140098088819063, "learning_rate": 2.530488925042056e-06, "loss": 0.0637, "step": 23420 }, { "epoch": 2.0242775065877576, "grad_norm": 0.6116175505543225, "learning_rate": 2.5264346709129315e-06, "loss": 0.0656, "step": 23430 }, { "epoch": 2.0251414747937275, "grad_norm": 0.6161269270983905, "learning_rate": 2.5223825689655024e-06, "loss": 0.0633, "step": 23440 }, { "epoch": 2.0260054429996974, "grad_norm": 0.6097595430473298, "learning_rate": 2.5183326227253915e-06, "loss": 0.064, "step": 23450 }, { "epoch": 2.0268694112056678, "grad_norm": 0.6039768047331386, "learning_rate": 2.514284835716343e-06, "loss": 0.0651, "step": 23460 }, { "epoch": 2.0277333794116377, "grad_norm": 0.5964483205003952, "learning_rate": 2.5102392114602326e-06, "loss": 0.0626, "step": 23470 }, { "epoch": 2.0285973476176076, "grad_norm": 0.6335610652801382, "learning_rate": 2.506195753477043e-06, "loss": 0.0648, "step": 23480 }, { "epoch": 2.0294613158235775, "grad_norm": 0.6032288219480831, "learning_rate": 2.502154465284879e-06, "loss": 0.0647, "step": 23490 }, { "epoch": 2.030325284029548, "grad_norm": 0.6081416747954673, "learning_rate": 2.4981153503999536e-06, "loss": 0.0636, "step": 23500 }, { "epoch": 2.0311892522355177, "grad_norm": 0.6190433175441461, "learning_rate": 2.494078412336589e-06, "loss": 0.0626, "step": 23510 }, { "epoch": 2.0320532204414876, "grad_norm": 0.6352529516782883, "learning_rate": 2.4900436546072206e-06, "loss": 0.0657, "step": 23520 }, { "epoch": 2.032917188647458, "grad_norm": 0.60396342566828, "learning_rate": 2.486011080722374e-06, "loss": 0.0638, "step": 23530 }, { "epoch": 2.033781156853428, "grad_norm": 0.6321000602851499, "learning_rate": 2.4819806941906855e-06, "loss": 0.0632, "step": 23540 }, { "epoch": 2.0346451250593978, "grad_norm": 0.6408999563026007, "learning_rate": 2.477952498518883e-06, "loss": 0.0672, "step": 23550 }, { "epoch": 2.0355090932653677, "grad_norm": 0.6099633041788447, "learning_rate": 2.473926497211794e-06, "loss": 0.063, "step": 23560 }, { "epoch": 2.036373061471338, "grad_norm": 0.6080948793986606, "learning_rate": 2.469902693772325e-06, "loss": 0.0611, "step": 23570 }, { "epoch": 2.037237029677308, "grad_norm": 0.6341190621707892, "learning_rate": 2.4658810917014823e-06, "loss": 0.0637, "step": 23580 }, { "epoch": 2.038100997883278, "grad_norm": 0.6540975857228662, "learning_rate": 2.4618616944983525e-06, "loss": 0.0644, "step": 23590 }, { "epoch": 2.0389649660892477, "grad_norm": 0.6366781118625203, "learning_rate": 2.457844505660102e-06, "loss": 0.0648, "step": 23600 }, { "epoch": 2.039828934295218, "grad_norm": 0.6317871739227648, "learning_rate": 2.453829528681977e-06, "loss": 0.0639, "step": 23610 }, { "epoch": 2.040692902501188, "grad_norm": 0.625094462950415, "learning_rate": 2.4498167670572977e-06, "loss": 0.0661, "step": 23620 }, { "epoch": 2.041556870707158, "grad_norm": 0.6075843535052505, "learning_rate": 2.4458062242774627e-06, "loss": 0.0669, "step": 23630 }, { "epoch": 2.042420838913128, "grad_norm": 0.6231492470009503, "learning_rate": 2.441797903831934e-06, "loss": 0.0645, "step": 23640 }, { "epoch": 2.043284807119098, "grad_norm": 0.6013322781301382, "learning_rate": 2.437791809208241e-06, "loss": 0.0653, "step": 23650 }, { "epoch": 2.044148775325068, "grad_norm": 0.6332588139476704, "learning_rate": 2.433787943891976e-06, "loss": 0.0663, "step": 23660 }, { "epoch": 2.045012743531038, "grad_norm": 0.5952863252399502, "learning_rate": 2.429786311366796e-06, "loss": 0.065, "step": 23670 }, { "epoch": 2.0458767117370082, "grad_norm": 0.6412555707079038, "learning_rate": 2.4257869151144128e-06, "loss": 0.064, "step": 23680 }, { "epoch": 2.046740679942978, "grad_norm": 0.6266314277669499, "learning_rate": 2.421789758614589e-06, "loss": 0.0616, "step": 23690 }, { "epoch": 2.047604648148948, "grad_norm": 0.6024010395703102, "learning_rate": 2.417794845345142e-06, "loss": 0.0627, "step": 23700 }, { "epoch": 2.048468616354918, "grad_norm": 0.6333664605446039, "learning_rate": 2.4138021787819365e-06, "loss": 0.063, "step": 23710 }, { "epoch": 2.0493325845608883, "grad_norm": 0.640445584802518, "learning_rate": 2.409811762398887e-06, "loss": 0.0625, "step": 23720 }, { "epoch": 2.050196552766858, "grad_norm": 0.6389702484627462, "learning_rate": 2.405823599667939e-06, "loss": 0.0629, "step": 23730 }, { "epoch": 2.051060520972828, "grad_norm": 0.6205228444403665, "learning_rate": 2.40183769405909e-06, "loss": 0.0629, "step": 23740 }, { "epoch": 2.0519244891787984, "grad_norm": 0.6340486050763664, "learning_rate": 2.3978540490403645e-06, "loss": 0.062, "step": 23750 }, { "epoch": 2.0527884573847683, "grad_norm": 0.6367859468897559, "learning_rate": 2.393872668077824e-06, "loss": 0.0622, "step": 23760 }, { "epoch": 2.0536524255907382, "grad_norm": 0.5904483099597296, "learning_rate": 2.3898935546355584e-06, "loss": 0.0643, "step": 23770 }, { "epoch": 2.054516393796708, "grad_norm": 0.6290343818423563, "learning_rate": 2.385916712175688e-06, "loss": 0.0635, "step": 23780 }, { "epoch": 2.0553803620026785, "grad_norm": 0.6312722588263152, "learning_rate": 2.3819421441583545e-06, "loss": 0.0609, "step": 23790 }, { "epoch": 2.0562443302086484, "grad_norm": 0.634502209125537, "learning_rate": 2.3779698540417202e-06, "loss": 0.0624, "step": 23800 }, { "epoch": 2.0571082984146183, "grad_norm": 0.6736741707092652, "learning_rate": 2.373999845281966e-06, "loss": 0.0628, "step": 23810 }, { "epoch": 2.057972266620588, "grad_norm": 0.6568078983053083, "learning_rate": 2.3700321213332873e-06, "loss": 0.0664, "step": 23820 }, { "epoch": 2.0588362348265585, "grad_norm": 0.6025816101246598, "learning_rate": 2.3660666856478975e-06, "loss": 0.064, "step": 23830 }, { "epoch": 2.0597002030325284, "grad_norm": 0.6673143783562789, "learning_rate": 2.3621035416760062e-06, "loss": 0.0639, "step": 23840 }, { "epoch": 2.0605641712384983, "grad_norm": 0.5777579786946239, "learning_rate": 2.3581426928658436e-06, "loss": 0.0637, "step": 23850 }, { "epoch": 2.0614281394444682, "grad_norm": 0.5945588080678396, "learning_rate": 2.354184142663631e-06, "loss": 0.0631, "step": 23860 }, { "epoch": 2.0622921076504386, "grad_norm": 0.6272905472891154, "learning_rate": 2.3502278945136007e-06, "loss": 0.0663, "step": 23870 }, { "epoch": 2.0631560758564085, "grad_norm": 0.5814381114745243, "learning_rate": 2.34627395185797e-06, "loss": 0.0657, "step": 23880 }, { "epoch": 2.0640200440623784, "grad_norm": 0.6194731552781829, "learning_rate": 2.3423223181369613e-06, "loss": 0.0658, "step": 23890 }, { "epoch": 2.0648840122683487, "grad_norm": 0.6423636477498306, "learning_rate": 2.3383729967887814e-06, "loss": 0.0651, "step": 23900 }, { "epoch": 2.0657479804743186, "grad_norm": 0.6494081770950135, "learning_rate": 2.334425991249627e-06, "loss": 0.0648, "step": 23910 }, { "epoch": 2.0666119486802885, "grad_norm": 0.6163722203310215, "learning_rate": 2.3304813049536802e-06, "loss": 0.0624, "step": 23920 }, { "epoch": 2.0674759168862584, "grad_norm": 0.6247311437412909, "learning_rate": 2.3265389413331023e-06, "loss": 0.0632, "step": 23930 }, { "epoch": 2.0683398850922288, "grad_norm": 0.6531618747835833, "learning_rate": 2.3225989038180415e-06, "loss": 0.0634, "step": 23940 }, { "epoch": 2.0692038532981987, "grad_norm": 0.6648124763866536, "learning_rate": 2.3186611958366135e-06, "loss": 0.0625, "step": 23950 }, { "epoch": 2.0700678215041686, "grad_norm": 0.615739462809398, "learning_rate": 2.314725820814911e-06, "loss": 0.0641, "step": 23960 }, { "epoch": 2.0709317897101385, "grad_norm": 0.6131030682970773, "learning_rate": 2.3107927821769954e-06, "loss": 0.0638, "step": 23970 }, { "epoch": 2.071795757916109, "grad_norm": 0.6084449651691136, "learning_rate": 2.306862083344899e-06, "loss": 0.0641, "step": 23980 }, { "epoch": 2.0726597261220787, "grad_norm": 0.6477878147729919, "learning_rate": 2.3029337277386136e-06, "loss": 0.0635, "step": 23990 }, { "epoch": 2.0735236943280486, "grad_norm": 0.6254935837523002, "learning_rate": 2.299007718776096e-06, "loss": 0.0634, "step": 24000 }, { "epoch": 2.074387662534019, "grad_norm": 0.634446437880187, "learning_rate": 2.2950840598732572e-06, "loss": 0.0618, "step": 24010 }, { "epoch": 2.075251630739989, "grad_norm": 0.627483584074575, "learning_rate": 2.2911627544439668e-06, "loss": 0.0631, "step": 24020 }, { "epoch": 2.0761155989459588, "grad_norm": 0.6316220677935596, "learning_rate": 2.287243805900046e-06, "loss": 0.0646, "step": 24030 }, { "epoch": 2.0769795671519287, "grad_norm": 0.638069305465654, "learning_rate": 2.2833272176512626e-06, "loss": 0.0627, "step": 24040 }, { "epoch": 2.077843535357899, "grad_norm": 0.6679409341830631, "learning_rate": 2.2794129931053368e-06, "loss": 0.0614, "step": 24050 }, { "epoch": 2.078707503563869, "grad_norm": 0.6324265674564412, "learning_rate": 2.275501135667927e-06, "loss": 0.0622, "step": 24060 }, { "epoch": 2.079571471769839, "grad_norm": 0.6643281873207468, "learning_rate": 2.2715916487426338e-06, "loss": 0.0615, "step": 24070 }, { "epoch": 2.0804354399758087, "grad_norm": 0.647967935021621, "learning_rate": 2.2676845357309922e-06, "loss": 0.0631, "step": 24080 }, { "epoch": 2.081299408181779, "grad_norm": 0.6292387792234486, "learning_rate": 2.26377980003248e-06, "loss": 0.0619, "step": 24090 }, { "epoch": 2.082163376387749, "grad_norm": 0.5903159353895882, "learning_rate": 2.2598774450444976e-06, "loss": 0.0641, "step": 24100 }, { "epoch": 2.083027344593719, "grad_norm": 0.664983677188501, "learning_rate": 2.255977474162379e-06, "loss": 0.0641, "step": 24110 }, { "epoch": 2.0838913127996888, "grad_norm": 0.593897600532623, "learning_rate": 2.252079890779382e-06, "loss": 0.0635, "step": 24120 }, { "epoch": 2.084755281005659, "grad_norm": 0.6117589894042372, "learning_rate": 2.2481846982866843e-06, "loss": 0.061, "step": 24130 }, { "epoch": 2.085619249211629, "grad_norm": 0.6177291800970711, "learning_rate": 2.2442919000733947e-06, "loss": 0.0626, "step": 24140 }, { "epoch": 2.086483217417599, "grad_norm": 0.6117096845706067, "learning_rate": 2.240401499526522e-06, "loss": 0.0635, "step": 24150 }, { "epoch": 2.0873471856235692, "grad_norm": 0.6329557650601069, "learning_rate": 2.2365135000310018e-06, "loss": 0.0605, "step": 24160 }, { "epoch": 2.088211153829539, "grad_norm": 0.5882562424900543, "learning_rate": 2.232627904969677e-06, "loss": 0.0622, "step": 24170 }, { "epoch": 2.089075122035509, "grad_norm": 0.6579717668444609, "learning_rate": 2.2287447177232957e-06, "loss": 0.0622, "step": 24180 }, { "epoch": 2.089939090241479, "grad_norm": 0.6186116937347731, "learning_rate": 2.224863941670513e-06, "loss": 0.0629, "step": 24190 }, { "epoch": 2.0908030584474493, "grad_norm": 0.6228935783095523, "learning_rate": 2.2209855801878886e-06, "loss": 0.0613, "step": 24200 }, { "epoch": 2.091667026653419, "grad_norm": 0.6404729334159126, "learning_rate": 2.2171096366498785e-06, "loss": 0.0624, "step": 24210 }, { "epoch": 2.092530994859389, "grad_norm": 0.6022716371431075, "learning_rate": 2.213236114428834e-06, "loss": 0.0625, "step": 24220 }, { "epoch": 2.093394963065359, "grad_norm": 0.6422295848132793, "learning_rate": 2.2093650168950022e-06, "loss": 0.0611, "step": 24230 }, { "epoch": 2.0942589312713293, "grad_norm": 0.6411305363281238, "learning_rate": 2.2054963474165165e-06, "loss": 0.0631, "step": 24240 }, { "epoch": 2.0951228994772992, "grad_norm": 0.6223287089938836, "learning_rate": 2.2016301093594055e-06, "loss": 0.0631, "step": 24250 }, { "epoch": 2.095986867683269, "grad_norm": 0.6423015109263637, "learning_rate": 2.1977663060875746e-06, "loss": 0.0633, "step": 24260 }, { "epoch": 2.0968508358892395, "grad_norm": 0.623734559704998, "learning_rate": 2.1939049409628143e-06, "loss": 0.0635, "step": 24270 }, { "epoch": 2.0977148040952094, "grad_norm": 0.655630654823313, "learning_rate": 2.1900460173447925e-06, "loss": 0.062, "step": 24280 }, { "epoch": 2.0985787723011793, "grad_norm": 0.684247340874467, "learning_rate": 2.1861895385910535e-06, "loss": 0.0638, "step": 24290 }, { "epoch": 2.099442740507149, "grad_norm": 0.6820724809192963, "learning_rate": 2.1823355080570145e-06, "loss": 0.0621, "step": 24300 }, { "epoch": 2.1003067087131195, "grad_norm": 0.6534096381485632, "learning_rate": 2.178483929095961e-06, "loss": 0.0636, "step": 24310 }, { "epoch": 2.1011706769190894, "grad_norm": 0.6510147646848921, "learning_rate": 2.1746348050590486e-06, "loss": 0.0614, "step": 24320 }, { "epoch": 2.1020346451250593, "grad_norm": 0.6041154097070152, "learning_rate": 2.170788139295295e-06, "loss": 0.0634, "step": 24330 }, { "epoch": 2.1028986133310292, "grad_norm": 0.6120032247248519, "learning_rate": 2.166943935151578e-06, "loss": 0.0624, "step": 24340 }, { "epoch": 2.1037625815369996, "grad_norm": 0.6462962519025485, "learning_rate": 2.163102195972634e-06, "loss": 0.0668, "step": 24350 }, { "epoch": 2.1046265497429695, "grad_norm": 0.7056923665766052, "learning_rate": 2.159262925101058e-06, "loss": 0.0636, "step": 24360 }, { "epoch": 2.1054905179489394, "grad_norm": 0.643509016704401, "learning_rate": 2.1554261258772936e-06, "loss": 0.0661, "step": 24370 }, { "epoch": 2.1063544861549097, "grad_norm": 0.6549146285728764, "learning_rate": 2.151591801639635e-06, "loss": 0.0619, "step": 24380 }, { "epoch": 2.1072184543608796, "grad_norm": 0.6581508720395497, "learning_rate": 2.147759955724223e-06, "loss": 0.0618, "step": 24390 }, { "epoch": 2.1080824225668495, "grad_norm": 0.6517376056398865, "learning_rate": 2.1439305914650398e-06, "loss": 0.0612, "step": 24400 }, { "epoch": 2.1089463907728194, "grad_norm": 0.6663625422110055, "learning_rate": 2.1401037121939176e-06, "loss": 0.0629, "step": 24410 }, { "epoch": 2.1098103589787898, "grad_norm": 0.6199637112899523, "learning_rate": 2.1362793212405112e-06, "loss": 0.0606, "step": 24420 }, { "epoch": 2.1106743271847597, "grad_norm": 0.6068817093511195, "learning_rate": 2.1324574219323244e-06, "loss": 0.0633, "step": 24430 }, { "epoch": 2.1115382953907296, "grad_norm": 0.6391262425524984, "learning_rate": 2.128638017594685e-06, "loss": 0.0634, "step": 24440 }, { "epoch": 2.1124022635966995, "grad_norm": 0.6545074639548908, "learning_rate": 2.124821111550756e-06, "loss": 0.0635, "step": 24450 }, { "epoch": 2.11326623180267, "grad_norm": 0.6283092609786927, "learning_rate": 2.1210067071215174e-06, "loss": 0.0607, "step": 24460 }, { "epoch": 2.1141302000086397, "grad_norm": 0.6265855386605454, "learning_rate": 2.117194807625784e-06, "loss": 0.0606, "step": 24470 }, { "epoch": 2.1149941682146096, "grad_norm": 0.668275663546584, "learning_rate": 2.113385416380182e-06, "loss": 0.0641, "step": 24480 }, { "epoch": 2.1158581364205795, "grad_norm": 0.6468286051725382, "learning_rate": 2.1095785366991602e-06, "loss": 0.0612, "step": 24490 }, { "epoch": 2.11672210462655, "grad_norm": 0.6338607054600975, "learning_rate": 2.1057741718949803e-06, "loss": 0.0625, "step": 24500 }, { "epoch": 2.1175860728325198, "grad_norm": 0.6284795065484166, "learning_rate": 2.1019723252777152e-06, "loss": 0.0604, "step": 24510 }, { "epoch": 2.1184500410384897, "grad_norm": 0.6653757272633931, "learning_rate": 2.09817300015525e-06, "loss": 0.0639, "step": 24520 }, { "epoch": 2.11931400924446, "grad_norm": 0.6132171778029453, "learning_rate": 2.094376199833274e-06, "loss": 0.0595, "step": 24530 }, { "epoch": 2.12017797745043, "grad_norm": 0.6337994827967848, "learning_rate": 2.0905819276152777e-06, "loss": 0.0624, "step": 24540 }, { "epoch": 2.1210419456564, "grad_norm": 0.6200981039293022, "learning_rate": 2.086790186802554e-06, "loss": 0.0627, "step": 24550 }, { "epoch": 2.1219059138623697, "grad_norm": 0.607923399933756, "learning_rate": 2.0830009806941943e-06, "loss": 0.0633, "step": 24560 }, { "epoch": 2.12276988206834, "grad_norm": 0.6327477772847702, "learning_rate": 2.079214312587083e-06, "loss": 0.0628, "step": 24570 }, { "epoch": 2.12363385027431, "grad_norm": 0.6290055999012083, "learning_rate": 2.0754301857758968e-06, "loss": 0.0612, "step": 24580 }, { "epoch": 2.12449781848028, "grad_norm": 0.6093290964229778, "learning_rate": 2.0716486035531007e-06, "loss": 0.0608, "step": 24590 }, { "epoch": 2.1253617866862498, "grad_norm": 0.6269656092540906, "learning_rate": 2.0678695692089467e-06, "loss": 0.0602, "step": 24600 }, { "epoch": 2.12622575489222, "grad_norm": 0.6486656468551327, "learning_rate": 2.064093086031469e-06, "loss": 0.0598, "step": 24610 }, { "epoch": 2.12708972309819, "grad_norm": 0.631027633175365, "learning_rate": 2.0603191573064814e-06, "loss": 0.0631, "step": 24620 }, { "epoch": 2.12795369130416, "grad_norm": 0.6583211890310452, "learning_rate": 2.0565477863175785e-06, "loss": 0.063, "step": 24630 }, { "epoch": 2.12881765951013, "grad_norm": 0.6045310854163645, "learning_rate": 2.052778976346127e-06, "loss": 0.0634, "step": 24640 }, { "epoch": 2.1296816277161, "grad_norm": 0.6640903285647783, "learning_rate": 2.0490127306712656e-06, "loss": 0.064, "step": 24650 }, { "epoch": 2.13054559592207, "grad_norm": 0.6438233531716371, "learning_rate": 2.0452490525698986e-06, "loss": 0.0646, "step": 24660 }, { "epoch": 2.13140956412804, "grad_norm": 0.617131494530124, "learning_rate": 2.041487945316705e-06, "loss": 0.0629, "step": 24670 }, { "epoch": 2.1322735323340103, "grad_norm": 0.6065166880037826, "learning_rate": 2.037729412184118e-06, "loss": 0.0639, "step": 24680 }, { "epoch": 2.13313750053998, "grad_norm": 0.6478988234190833, "learning_rate": 2.0339734564423365e-06, "loss": 0.0632, "step": 24690 }, { "epoch": 2.13400146874595, "grad_norm": 0.6298992819336114, "learning_rate": 2.030220081359314e-06, "loss": 0.061, "step": 24700 }, { "epoch": 2.13486543695192, "grad_norm": 0.650993251243365, "learning_rate": 2.0264692902007576e-06, "loss": 0.06, "step": 24710 }, { "epoch": 2.1357294051578903, "grad_norm": 0.6637271378066566, "learning_rate": 2.022721086230135e-06, "loss": 0.0618, "step": 24720 }, { "epoch": 2.1365933733638602, "grad_norm": 0.6671955218278872, "learning_rate": 2.018975472708648e-06, "loss": 0.0624, "step": 24730 }, { "epoch": 2.13745734156983, "grad_norm": 0.6433645148089315, "learning_rate": 2.015232452895258e-06, "loss": 0.0612, "step": 24740 }, { "epoch": 2.1383213097758, "grad_norm": 0.6355581302816811, "learning_rate": 2.0114920300466624e-06, "loss": 0.0628, "step": 24750 }, { "epoch": 2.1391852779817704, "grad_norm": 0.6174139399477326, "learning_rate": 2.0077542074173044e-06, "loss": 0.0632, "step": 24760 }, { "epoch": 2.1400492461877403, "grad_norm": 0.667036387076024, "learning_rate": 2.0040189882593556e-06, "loss": 0.0626, "step": 24770 }, { "epoch": 2.14091321439371, "grad_norm": 0.6256015668288021, "learning_rate": 2.0002863758227332e-06, "loss": 0.0638, "step": 24780 }, { "epoch": 2.1417771825996805, "grad_norm": 0.6350675918812858, "learning_rate": 1.9965563733550797e-06, "loss": 0.0615, "step": 24790 }, { "epoch": 2.1426411508056504, "grad_norm": 0.6560277084131226, "learning_rate": 1.992828984101768e-06, "loss": 0.0607, "step": 24800 }, { "epoch": 2.1435051190116203, "grad_norm": 0.622302271239541, "learning_rate": 1.989104211305898e-06, "loss": 0.0607, "step": 24810 }, { "epoch": 2.1443690872175902, "grad_norm": 0.6220514071659232, "learning_rate": 1.985382058208292e-06, "loss": 0.0643, "step": 24820 }, { "epoch": 2.1452330554235606, "grad_norm": 0.6285288781254312, "learning_rate": 1.9816625280474966e-06, "loss": 0.0595, "step": 24830 }, { "epoch": 2.1460970236295305, "grad_norm": 0.5995374132946757, "learning_rate": 1.9779456240597718e-06, "loss": 0.0628, "step": 24840 }, { "epoch": 2.1469609918355004, "grad_norm": 0.6358530584409989, "learning_rate": 1.9742313494790943e-06, "loss": 0.0637, "step": 24850 }, { "epoch": 2.1478249600414703, "grad_norm": 0.6407208020987373, "learning_rate": 1.970519707537151e-06, "loss": 0.0616, "step": 24860 }, { "epoch": 2.1486889282474406, "grad_norm": 0.6356132639497752, "learning_rate": 1.9668107014633458e-06, "loss": 0.0626, "step": 24870 }, { "epoch": 2.1495528964534105, "grad_norm": 0.654976860526239, "learning_rate": 1.963104334484777e-06, "loss": 0.0613, "step": 24880 }, { "epoch": 2.1504168646593804, "grad_norm": 0.6227712775313601, "learning_rate": 1.9594006098262584e-06, "loss": 0.0625, "step": 24890 }, { "epoch": 2.1512808328653508, "grad_norm": 0.661425967242727, "learning_rate": 1.955699530710298e-06, "loss": 0.066, "step": 24900 }, { "epoch": 2.1521448010713207, "grad_norm": 0.6501765614101469, "learning_rate": 1.952001100357104e-06, "loss": 0.0617, "step": 24910 }, { "epoch": 2.1530087692772906, "grad_norm": 0.6565869828321924, "learning_rate": 1.9483053219845786e-06, "loss": 0.0609, "step": 24920 }, { "epoch": 2.1538727374832605, "grad_norm": 0.6170406429516456, "learning_rate": 1.9446121988083176e-06, "loss": 0.0618, "step": 24930 }, { "epoch": 2.154736705689231, "grad_norm": 0.6332346071442021, "learning_rate": 1.9409217340416094e-06, "loss": 0.0659, "step": 24940 }, { "epoch": 2.1556006738952007, "grad_norm": 0.6310607589991424, "learning_rate": 1.9372339308954243e-06, "loss": 0.0617, "step": 24950 }, { "epoch": 2.1564646421011706, "grad_norm": 0.631341824342489, "learning_rate": 1.9335487925784203e-06, "loss": 0.0619, "step": 24960 }, { "epoch": 2.1573286103071405, "grad_norm": 0.6055231058000567, "learning_rate": 1.9298663222969333e-06, "loss": 0.0611, "step": 24970 }, { "epoch": 2.158192578513111, "grad_norm": 0.6519031072151518, "learning_rate": 1.926186523254984e-06, "loss": 0.0623, "step": 24980 }, { "epoch": 2.1590565467190808, "grad_norm": 0.6532029733553798, "learning_rate": 1.9225093986542633e-06, "loss": 0.0628, "step": 24990 }, { "epoch": 2.1599205149250507, "grad_norm": 0.6371257726134606, "learning_rate": 1.9188349516941363e-06, "loss": 0.0617, "step": 25000 }, { "epoch": 2.160784483131021, "grad_norm": 0.6517130228940046, "learning_rate": 1.91516318557164e-06, "loss": 0.062, "step": 25010 }, { "epoch": 2.161648451336991, "grad_norm": 0.6314055320125311, "learning_rate": 1.911494103481476e-06, "loss": 0.0624, "step": 25020 }, { "epoch": 2.162512419542961, "grad_norm": 0.644728408364859, "learning_rate": 1.907827708616018e-06, "loss": 0.0611, "step": 25030 }, { "epoch": 2.1633763877489307, "grad_norm": 0.6783305329471674, "learning_rate": 1.904164004165288e-06, "loss": 0.063, "step": 25040 }, { "epoch": 2.164240355954901, "grad_norm": 0.6226296409212154, "learning_rate": 1.9005029933169815e-06, "loss": 0.0613, "step": 25050 }, { "epoch": 2.165104324160871, "grad_norm": 0.6627104553870933, "learning_rate": 1.8968446792564405e-06, "loss": 0.0611, "step": 25060 }, { "epoch": 2.165968292366841, "grad_norm": 0.6672474621528279, "learning_rate": 1.893189065166669e-06, "loss": 0.0611, "step": 25070 }, { "epoch": 2.1668322605728108, "grad_norm": 0.6591423831765176, "learning_rate": 1.8895361542283103e-06, "loss": 0.0618, "step": 25080 }, { "epoch": 2.167696228778781, "grad_norm": 0.6253784506850278, "learning_rate": 1.8858859496196685e-06, "loss": 0.0596, "step": 25090 }, { "epoch": 2.168560196984751, "grad_norm": 0.6112277593734251, "learning_rate": 1.8822384545166845e-06, "loss": 0.0614, "step": 25100 }, { "epoch": 2.169424165190721, "grad_norm": 0.6230633387448746, "learning_rate": 1.8785936720929437e-06, "loss": 0.0621, "step": 25110 }, { "epoch": 2.170288133396691, "grad_norm": 0.6411573873586922, "learning_rate": 1.874951605519673e-06, "loss": 0.0615, "step": 25120 }, { "epoch": 2.171152101602661, "grad_norm": 0.6496078251100311, "learning_rate": 1.8713122579657333e-06, "loss": 0.0624, "step": 25130 }, { "epoch": 2.172016069808631, "grad_norm": 0.6297655237072545, "learning_rate": 1.8676756325976265e-06, "loss": 0.0625, "step": 25140 }, { "epoch": 2.172880038014601, "grad_norm": 0.6170767552073277, "learning_rate": 1.8640417325794735e-06, "loss": 0.0604, "step": 25150 }, { "epoch": 2.1737440062205713, "grad_norm": 0.6272151430305527, "learning_rate": 1.8604105610730378e-06, "loss": 0.0613, "step": 25160 }, { "epoch": 2.174607974426541, "grad_norm": 0.6675092753122248, "learning_rate": 1.8567821212376986e-06, "loss": 0.0639, "step": 25170 }, { "epoch": 2.175471942632511, "grad_norm": 0.6224564596460632, "learning_rate": 1.8531564162304677e-06, "loss": 0.0627, "step": 25180 }, { "epoch": 2.176335910838481, "grad_norm": 0.6068390661175612, "learning_rate": 1.8495334492059653e-06, "loss": 0.0621, "step": 25190 }, { "epoch": 2.1771998790444513, "grad_norm": 0.6523381112434008, "learning_rate": 1.8459132233164407e-06, "loss": 0.0622, "step": 25200 }, { "epoch": 2.1780638472504212, "grad_norm": 0.6416349928882249, "learning_rate": 1.8422957417117531e-06, "loss": 0.0631, "step": 25210 }, { "epoch": 2.178927815456391, "grad_norm": 0.6530937293777992, "learning_rate": 1.8386810075393735e-06, "loss": 0.061, "step": 25220 }, { "epoch": 2.179791783662361, "grad_norm": 0.6660230625168588, "learning_rate": 1.8350690239443841e-06, "loss": 0.0602, "step": 25230 }, { "epoch": 2.1806557518683314, "grad_norm": 0.6125379807881822, "learning_rate": 1.8314597940694711e-06, "loss": 0.0613, "step": 25240 }, { "epoch": 2.1815197200743013, "grad_norm": 0.6556372780382884, "learning_rate": 1.8278533210549304e-06, "loss": 0.0648, "step": 25250 }, { "epoch": 2.182383688280271, "grad_norm": 0.6875735101398713, "learning_rate": 1.8242496080386546e-06, "loss": 0.0624, "step": 25260 }, { "epoch": 2.183247656486241, "grad_norm": 0.6386277390230637, "learning_rate": 1.8206486581561356e-06, "loss": 0.0639, "step": 25270 }, { "epoch": 2.1841116246922114, "grad_norm": 0.6383759095811737, "learning_rate": 1.8170504745404598e-06, "loss": 0.0635, "step": 25280 }, { "epoch": 2.1849755928981813, "grad_norm": 0.6291029502412889, "learning_rate": 1.8134550603223123e-06, "loss": 0.0615, "step": 25290 }, { "epoch": 2.1858395611041512, "grad_norm": 0.6597481436350051, "learning_rate": 1.8098624186299628e-06, "loss": 0.0637, "step": 25300 }, { "epoch": 2.1867035293101216, "grad_norm": 0.6060866620443429, "learning_rate": 1.8062725525892716e-06, "loss": 0.0593, "step": 25310 }, { "epoch": 2.1875674975160915, "grad_norm": 0.6594341783898489, "learning_rate": 1.802685465323682e-06, "loss": 0.062, "step": 25320 }, { "epoch": 2.1884314657220614, "grad_norm": 0.6967919531607281, "learning_rate": 1.7991011599542202e-06, "loss": 0.062, "step": 25330 }, { "epoch": 2.1892954339280313, "grad_norm": 0.6355968136614126, "learning_rate": 1.7955196395994967e-06, "loss": 0.0619, "step": 25340 }, { "epoch": 2.1901594021340016, "grad_norm": 0.6493195599762654, "learning_rate": 1.7919409073756883e-06, "loss": 0.0602, "step": 25350 }, { "epoch": 2.1910233703399715, "grad_norm": 0.6687212211442863, "learning_rate": 1.7883649663965574e-06, "loss": 0.0619, "step": 25360 }, { "epoch": 2.1918873385459414, "grad_norm": 0.6783993190646149, "learning_rate": 1.7847918197734293e-06, "loss": 0.0627, "step": 25370 }, { "epoch": 2.1927513067519113, "grad_norm": 0.6823882867138366, "learning_rate": 1.781221470615206e-06, "loss": 0.0609, "step": 25380 }, { "epoch": 2.1936152749578817, "grad_norm": 0.6046134509758881, "learning_rate": 1.7776539220283446e-06, "loss": 0.0629, "step": 25390 }, { "epoch": 2.1944792431638516, "grad_norm": 0.6632724484704178, "learning_rate": 1.774089177116876e-06, "loss": 0.0628, "step": 25400 }, { "epoch": 2.1953432113698215, "grad_norm": 0.6093237196847797, "learning_rate": 1.7705272389823869e-06, "loss": 0.0622, "step": 25410 }, { "epoch": 2.196207179575792, "grad_norm": 0.6369052508210911, "learning_rate": 1.7669681107240223e-06, "loss": 0.0607, "step": 25420 }, { "epoch": 2.1970711477817617, "grad_norm": 0.6407569635936984, "learning_rate": 1.763411795438482e-06, "loss": 0.0611, "step": 25430 }, { "epoch": 2.1979351159877316, "grad_norm": 0.6563720363166178, "learning_rate": 1.7598582962200172e-06, "loss": 0.0599, "step": 25440 }, { "epoch": 2.1987990841937015, "grad_norm": 0.673275092712552, "learning_rate": 1.7563076161604364e-06, "loss": 0.0598, "step": 25450 }, { "epoch": 2.199663052399672, "grad_norm": 0.6380876243254755, "learning_rate": 1.7527597583490825e-06, "loss": 0.0606, "step": 25460 }, { "epoch": 2.2005270206056418, "grad_norm": 0.6450355340439092, "learning_rate": 1.7492147258728538e-06, "loss": 0.0621, "step": 25470 }, { "epoch": 2.2013909888116117, "grad_norm": 0.6226050474103794, "learning_rate": 1.745672521816184e-06, "loss": 0.061, "step": 25480 }, { "epoch": 2.2022549570175816, "grad_norm": 0.6409167634399674, "learning_rate": 1.7421331492610533e-06, "loss": 0.0614, "step": 25490 }, { "epoch": 2.203118925223552, "grad_norm": 0.6730822678941681, "learning_rate": 1.7385966112869657e-06, "loss": 0.0591, "step": 25500 }, { "epoch": 2.203982893429522, "grad_norm": 0.6970778786367295, "learning_rate": 1.7350629109709715e-06, "loss": 0.0607, "step": 25510 }, { "epoch": 2.2048468616354917, "grad_norm": 0.6125821916693323, "learning_rate": 1.731532051387646e-06, "loss": 0.0596, "step": 25520 }, { "epoch": 2.205710829841462, "grad_norm": 0.6656767212696019, "learning_rate": 1.7280040356090933e-06, "loss": 0.0607, "step": 25530 }, { "epoch": 2.206574798047432, "grad_norm": 0.6411004681356632, "learning_rate": 1.724478866704944e-06, "loss": 0.062, "step": 25540 }, { "epoch": 2.207438766253402, "grad_norm": 0.6764489258462001, "learning_rate": 1.720956547742349e-06, "loss": 0.0612, "step": 25550 }, { "epoch": 2.2083027344593718, "grad_norm": 0.6128942114008559, "learning_rate": 1.7174370817859854e-06, "loss": 0.0608, "step": 25560 }, { "epoch": 2.209166702665342, "grad_norm": 0.6339329827203459, "learning_rate": 1.7139204718980434e-06, "loss": 0.0625, "step": 25570 }, { "epoch": 2.210030670871312, "grad_norm": 0.6452354260110682, "learning_rate": 1.710406721138229e-06, "loss": 0.0614, "step": 25580 }, { "epoch": 2.210894639077282, "grad_norm": 0.6166881292995755, "learning_rate": 1.7068958325637591e-06, "loss": 0.0616, "step": 25590 }, { "epoch": 2.211758607283252, "grad_norm": 0.6476715295266554, "learning_rate": 1.7033878092293655e-06, "loss": 0.061, "step": 25600 }, { "epoch": 2.212622575489222, "grad_norm": 0.6517855464246195, "learning_rate": 1.699882654187282e-06, "loss": 0.0627, "step": 25610 }, { "epoch": 2.213486543695192, "grad_norm": 0.6340260543769496, "learning_rate": 1.6963803704872478e-06, "loss": 0.0611, "step": 25620 }, { "epoch": 2.214350511901162, "grad_norm": 0.6298347310498605, "learning_rate": 1.6928809611765051e-06, "loss": 0.0616, "step": 25630 }, { "epoch": 2.2152144801071323, "grad_norm": 0.6243243135913467, "learning_rate": 1.6893844292997923e-06, "loss": 0.0615, "step": 25640 }, { "epoch": 2.216078448313102, "grad_norm": 0.6340777870224308, "learning_rate": 1.6858907778993516e-06, "loss": 0.0609, "step": 25650 }, { "epoch": 2.216942416519072, "grad_norm": 0.6083314908430885, "learning_rate": 1.6824000100149067e-06, "loss": 0.0596, "step": 25660 }, { "epoch": 2.217806384725042, "grad_norm": 0.6726539489645692, "learning_rate": 1.678912128683685e-06, "loss": 0.0621, "step": 25670 }, { "epoch": 2.2186703529310123, "grad_norm": 0.6194556913540952, "learning_rate": 1.6754271369403934e-06, "loss": 0.0613, "step": 25680 }, { "epoch": 2.2195343211369822, "grad_norm": 0.6207525296451107, "learning_rate": 1.671945037817233e-06, "loss": 0.0616, "step": 25690 }, { "epoch": 2.220398289342952, "grad_norm": 0.681932107424091, "learning_rate": 1.6684658343438769e-06, "loss": 0.0621, "step": 25700 }, { "epoch": 2.221262257548922, "grad_norm": 0.6855599096619783, "learning_rate": 1.6649895295474895e-06, "loss": 0.0619, "step": 25710 }, { "epoch": 2.2221262257548924, "grad_norm": 0.6129639845472115, "learning_rate": 1.6615161264527075e-06, "loss": 0.0607, "step": 25720 }, { "epoch": 2.2229901939608623, "grad_norm": 0.6626689168785096, "learning_rate": 1.6580456280816442e-06, "loss": 0.0599, "step": 25730 }, { "epoch": 2.223854162166832, "grad_norm": 0.6226827855598428, "learning_rate": 1.654578037453885e-06, "loss": 0.0602, "step": 25740 }, { "epoch": 2.224718130372802, "grad_norm": 0.6147897546663305, "learning_rate": 1.6511133575864846e-06, "loss": 0.0595, "step": 25750 }, { "epoch": 2.2255820985787724, "grad_norm": 0.6263674040275825, "learning_rate": 1.6476515914939718e-06, "loss": 0.0642, "step": 25760 }, { "epoch": 2.2264460667847423, "grad_norm": 0.6413262252107033, "learning_rate": 1.6441927421883274e-06, "loss": 0.0617, "step": 25770 }, { "epoch": 2.2273100349907122, "grad_norm": 0.6571817695841334, "learning_rate": 1.6407368126790074e-06, "loss": 0.0614, "step": 25780 }, { "epoch": 2.228174003196682, "grad_norm": 0.639650797211082, "learning_rate": 1.637283805972918e-06, "loss": 0.0625, "step": 25790 }, { "epoch": 2.2290379714026525, "grad_norm": 0.6241557566612796, "learning_rate": 1.6338337250744313e-06, "loss": 0.0594, "step": 25800 }, { "epoch": 2.2299019396086224, "grad_norm": 0.6436803684620909, "learning_rate": 1.6303865729853618e-06, "loss": 0.0591, "step": 25810 }, { "epoch": 2.2307659078145923, "grad_norm": 0.6593942452366395, "learning_rate": 1.6269423527049876e-06, "loss": 0.0605, "step": 25820 }, { "epoch": 2.2316298760205626, "grad_norm": 0.7094732125345027, "learning_rate": 1.623501067230029e-06, "loss": 0.0596, "step": 25830 }, { "epoch": 2.2324938442265325, "grad_norm": 0.639451168612842, "learning_rate": 1.6200627195546547e-06, "loss": 0.0587, "step": 25840 }, { "epoch": 2.2333578124325024, "grad_norm": 0.666192956241704, "learning_rate": 1.6166273126704773e-06, "loss": 0.0592, "step": 25850 }, { "epoch": 2.2342217806384723, "grad_norm": 0.651541166383784, "learning_rate": 1.6131948495665483e-06, "loss": 0.0627, "step": 25860 }, { "epoch": 2.2350857488444427, "grad_norm": 0.6671884346428812, "learning_rate": 1.609765333229364e-06, "loss": 0.0592, "step": 25870 }, { "epoch": 2.2359497170504126, "grad_norm": 0.6312027856638661, "learning_rate": 1.6063387666428514e-06, "loss": 0.0616, "step": 25880 }, { "epoch": 2.2368136852563825, "grad_norm": 0.6351034279727996, "learning_rate": 1.6029151527883724e-06, "loss": 0.0619, "step": 25890 }, { "epoch": 2.2376776534623524, "grad_norm": 0.650998398374031, "learning_rate": 1.5994944946447187e-06, "loss": 0.0596, "step": 25900 }, { "epoch": 2.2385416216683227, "grad_norm": 0.64185933391637, "learning_rate": 1.5960767951881146e-06, "loss": 0.0613, "step": 25910 }, { "epoch": 2.2394055898742926, "grad_norm": 0.6821387202412239, "learning_rate": 1.5926620573922048e-06, "loss": 0.0605, "step": 25920 }, { "epoch": 2.2402695580802625, "grad_norm": 0.6584422635160079, "learning_rate": 1.5892502842280605e-06, "loss": 0.0631, "step": 25930 }, { "epoch": 2.241133526286233, "grad_norm": 0.6719097501541714, "learning_rate": 1.5858414786641723e-06, "loss": 0.0598, "step": 25940 }, { "epoch": 2.2419974944922028, "grad_norm": 0.6524586802098732, "learning_rate": 1.5824356436664467e-06, "loss": 0.0604, "step": 25950 }, { "epoch": 2.2428614626981727, "grad_norm": 0.6516842713970796, "learning_rate": 1.5790327821982137e-06, "loss": 0.0625, "step": 25960 }, { "epoch": 2.2437254309041426, "grad_norm": 0.6683175188643405, "learning_rate": 1.5756328972202023e-06, "loss": 0.0591, "step": 25970 }, { "epoch": 2.244589399110113, "grad_norm": 0.625411302041235, "learning_rate": 1.5722359916905656e-06, "loss": 0.0593, "step": 25980 }, { "epoch": 2.245453367316083, "grad_norm": 0.6293011621106597, "learning_rate": 1.5688420685648565e-06, "loss": 0.0622, "step": 25990 }, { "epoch": 2.2463173355220527, "grad_norm": 0.6852578163384612, "learning_rate": 1.5654511307960346e-06, "loss": 0.0616, "step": 26000 }, { "epoch": 2.2471813037280226, "grad_norm": 0.6510391632446941, "learning_rate": 1.5620631813344611e-06, "loss": 0.0643, "step": 26010 }, { "epoch": 2.248045271933993, "grad_norm": 0.6375311713032594, "learning_rate": 1.5586782231279012e-06, "loss": 0.0628, "step": 26020 }, { "epoch": 2.248909240139963, "grad_norm": 0.6571909140263892, "learning_rate": 1.5552962591215137e-06, "loss": 0.0603, "step": 26030 }, { "epoch": 2.2497732083459328, "grad_norm": 0.6435249357904795, "learning_rate": 1.5519172922578529e-06, "loss": 0.0629, "step": 26040 }, { "epoch": 2.250637176551903, "grad_norm": 0.6516646258238982, "learning_rate": 1.5485413254768655e-06, "loss": 0.0583, "step": 26050 }, { "epoch": 2.251501144757873, "grad_norm": 0.6565838894870303, "learning_rate": 1.5451683617158864e-06, "loss": 0.0611, "step": 26060 }, { "epoch": 2.252365112963843, "grad_norm": 0.6896405697177518, "learning_rate": 1.541798403909644e-06, "loss": 0.0601, "step": 26070 }, { "epoch": 2.253229081169813, "grad_norm": 0.6312058690706274, "learning_rate": 1.5384314549902407e-06, "loss": 0.0598, "step": 26080 }, { "epoch": 2.254093049375783, "grad_norm": 0.689835159961576, "learning_rate": 1.5350675178871717e-06, "loss": 0.0618, "step": 26090 }, { "epoch": 2.254957017581753, "grad_norm": 0.6158946907902951, "learning_rate": 1.531706595527303e-06, "loss": 0.0604, "step": 26100 }, { "epoch": 2.255820985787723, "grad_norm": 0.6099539265032519, "learning_rate": 1.5283486908348872e-06, "loss": 0.0589, "step": 26110 }, { "epoch": 2.256684953993693, "grad_norm": 0.6450195523068343, "learning_rate": 1.5249938067315379e-06, "loss": 0.0613, "step": 26120 }, { "epoch": 2.257548922199663, "grad_norm": 0.6487167626847626, "learning_rate": 1.5216419461362542e-06, "loss": 0.0591, "step": 26130 }, { "epoch": 2.258412890405633, "grad_norm": 0.6753222278690582, "learning_rate": 1.5182931119653965e-06, "loss": 0.0595, "step": 26140 }, { "epoch": 2.259276858611603, "grad_norm": 0.7071305222388669, "learning_rate": 1.5149473071326941e-06, "loss": 0.0609, "step": 26150 }, { "epoch": 2.2601408268175733, "grad_norm": 0.664863282231861, "learning_rate": 1.5116045345492403e-06, "loss": 0.06, "step": 26160 }, { "epoch": 2.2610047950235432, "grad_norm": 0.6528376283630767, "learning_rate": 1.5082647971234886e-06, "loss": 0.0635, "step": 26170 }, { "epoch": 2.261868763229513, "grad_norm": 0.6492573255834764, "learning_rate": 1.5049280977612575e-06, "loss": 0.0627, "step": 26180 }, { "epoch": 2.262732731435483, "grad_norm": 0.6627021537499391, "learning_rate": 1.501594439365715e-06, "loss": 0.0598, "step": 26190 }, { "epoch": 2.2635966996414534, "grad_norm": 0.6806417650959726, "learning_rate": 1.4982638248373871e-06, "loss": 0.0637, "step": 26200 }, { "epoch": 2.2644606678474233, "grad_norm": 0.62424115966496, "learning_rate": 1.4949362570741493e-06, "loss": 0.0592, "step": 26210 }, { "epoch": 2.265324636053393, "grad_norm": 0.6688986783593902, "learning_rate": 1.4916117389712303e-06, "loss": 0.0603, "step": 26220 }, { "epoch": 2.266188604259363, "grad_norm": 0.7018681679297502, "learning_rate": 1.4882902734212013e-06, "loss": 0.0637, "step": 26230 }, { "epoch": 2.2670525724653334, "grad_norm": 0.5925240661361009, "learning_rate": 1.484971863313978e-06, "loss": 0.061, "step": 26240 }, { "epoch": 2.2679165406713033, "grad_norm": 0.6414504235333198, "learning_rate": 1.4816565115368199e-06, "loss": 0.064, "step": 26250 }, { "epoch": 2.2687805088772732, "grad_norm": 0.656382879329087, "learning_rate": 1.4783442209743225e-06, "loss": 0.0597, "step": 26260 }, { "epoch": 2.2696444770832436, "grad_norm": 0.6691090047775298, "learning_rate": 1.4750349945084209e-06, "loss": 0.0605, "step": 26270 }, { "epoch": 2.2705084452892135, "grad_norm": 0.657652046855535, "learning_rate": 1.4717288350183805e-06, "loss": 0.0599, "step": 26280 }, { "epoch": 2.2713724134951834, "grad_norm": 0.6423009118728417, "learning_rate": 1.4684257453808032e-06, "loss": 0.0593, "step": 26290 }, { "epoch": 2.2722363817011533, "grad_norm": 0.6485166849916811, "learning_rate": 1.4651257284696164e-06, "loss": 0.0595, "step": 26300 }, { "epoch": 2.273100349907123, "grad_norm": 0.6207446565216592, "learning_rate": 1.4618287871560737e-06, "loss": 0.0601, "step": 26310 }, { "epoch": 2.2739643181130935, "grad_norm": 0.6633360470389493, "learning_rate": 1.4585349243087538e-06, "loss": 0.061, "step": 26320 }, { "epoch": 2.2748282863190634, "grad_norm": 0.6355126266677902, "learning_rate": 1.4552441427935582e-06, "loss": 0.0585, "step": 26330 }, { "epoch": 2.2756922545250333, "grad_norm": 0.6310809829654427, "learning_rate": 1.4519564454737063e-06, "loss": 0.06, "step": 26340 }, { "epoch": 2.2765562227310037, "grad_norm": 0.67846250533861, "learning_rate": 1.4486718352097323e-06, "loss": 0.0604, "step": 26350 }, { "epoch": 2.2774201909369736, "grad_norm": 0.664061169733949, "learning_rate": 1.4453903148594866e-06, "loss": 0.0632, "step": 26360 }, { "epoch": 2.2782841591429435, "grad_norm": 0.6697299183841422, "learning_rate": 1.4421118872781291e-06, "loss": 0.0605, "step": 26370 }, { "epoch": 2.2791481273489134, "grad_norm": 0.6498208864924574, "learning_rate": 1.438836555318135e-06, "loss": 0.0639, "step": 26380 }, { "epoch": 2.2800120955548837, "grad_norm": 0.64671706463022, "learning_rate": 1.4355643218292742e-06, "loss": 0.0615, "step": 26390 }, { "epoch": 2.2808760637608536, "grad_norm": 0.7394098399800825, "learning_rate": 1.4322951896586334e-06, "loss": 0.0606, "step": 26400 }, { "epoch": 2.2817400319668235, "grad_norm": 0.642376609395183, "learning_rate": 1.4290291616505918e-06, "loss": 0.0605, "step": 26410 }, { "epoch": 2.2826040001727934, "grad_norm": 0.6566571466559815, "learning_rate": 1.4257662406468353e-06, "loss": 0.0573, "step": 26420 }, { "epoch": 2.2834679683787638, "grad_norm": 0.6520952949887607, "learning_rate": 1.4225064294863372e-06, "loss": 0.0606, "step": 26430 }, { "epoch": 2.2843319365847337, "grad_norm": 0.6838279782209414, "learning_rate": 1.4192497310053748e-06, "loss": 0.0623, "step": 26440 }, { "epoch": 2.2851959047907036, "grad_norm": 0.6446957305304464, "learning_rate": 1.41599614803751e-06, "loss": 0.0599, "step": 26450 }, { "epoch": 2.286059872996674, "grad_norm": 0.6410457377021014, "learning_rate": 1.4127456834135978e-06, "loss": 0.0625, "step": 26460 }, { "epoch": 2.286923841202644, "grad_norm": 0.6186720566777877, "learning_rate": 1.4094983399617784e-06, "loss": 0.0614, "step": 26470 }, { "epoch": 2.2877878094086137, "grad_norm": 0.6939594322670045, "learning_rate": 1.4062541205074742e-06, "loss": 0.0606, "step": 26480 }, { "epoch": 2.2886517776145836, "grad_norm": 0.6742929364984191, "learning_rate": 1.4030130278733967e-06, "loss": 0.058, "step": 26490 }, { "epoch": 2.289515745820554, "grad_norm": 0.6389179635631196, "learning_rate": 1.3997750648795295e-06, "loss": 0.0609, "step": 26500 }, { "epoch": 2.290379714026524, "grad_norm": 0.6664027390477426, "learning_rate": 1.3965402343431362e-06, "loss": 0.0593, "step": 26510 }, { "epoch": 2.2912436822324938, "grad_norm": 0.6522631321278886, "learning_rate": 1.3933085390787531e-06, "loss": 0.0611, "step": 26520 }, { "epoch": 2.2921076504384637, "grad_norm": 0.6604097749419889, "learning_rate": 1.3900799818981947e-06, "loss": 0.06, "step": 26530 }, { "epoch": 2.292971618644434, "grad_norm": 0.6406399475479331, "learning_rate": 1.3868545656105342e-06, "loss": 0.0586, "step": 26540 }, { "epoch": 2.293835586850404, "grad_norm": 0.59954172486234, "learning_rate": 1.3836322930221225e-06, "loss": 0.061, "step": 26550 }, { "epoch": 2.294699555056374, "grad_norm": 0.6664300784047551, "learning_rate": 1.3804131669365705e-06, "loss": 0.0595, "step": 26560 }, { "epoch": 2.295563523262344, "grad_norm": 0.6188454289593853, "learning_rate": 1.3771971901547515e-06, "loss": 0.06, "step": 26570 }, { "epoch": 2.296427491468314, "grad_norm": 0.6424177988440397, "learning_rate": 1.3739843654747986e-06, "loss": 0.0606, "step": 26580 }, { "epoch": 2.297291459674284, "grad_norm": 0.6688160131458641, "learning_rate": 1.370774695692102e-06, "loss": 0.0606, "step": 26590 }, { "epoch": 2.298155427880254, "grad_norm": 0.6683429083926847, "learning_rate": 1.3675681835993109e-06, "loss": 0.0613, "step": 26600 }, { "epoch": 2.299019396086224, "grad_norm": 0.7006858852613675, "learning_rate": 1.3643648319863222e-06, "loss": 0.0588, "step": 26610 }, { "epoch": 2.299883364292194, "grad_norm": 0.6778287862856338, "learning_rate": 1.3611646436402849e-06, "loss": 0.0626, "step": 26620 }, { "epoch": 2.300747332498164, "grad_norm": 0.6624181219982882, "learning_rate": 1.3579676213455934e-06, "loss": 0.0618, "step": 26630 }, { "epoch": 2.301611300704134, "grad_norm": 0.7008422542870012, "learning_rate": 1.3547737678838934e-06, "loss": 0.0607, "step": 26640 }, { "epoch": 2.3024752689101042, "grad_norm": 0.6624919987207697, "learning_rate": 1.3515830860340678e-06, "loss": 0.0606, "step": 26650 }, { "epoch": 2.303339237116074, "grad_norm": 0.6147145335368871, "learning_rate": 1.3483955785722418e-06, "loss": 0.0622, "step": 26660 }, { "epoch": 2.304203205322044, "grad_norm": 0.6281804015316972, "learning_rate": 1.3452112482717788e-06, "loss": 0.0572, "step": 26670 }, { "epoch": 2.3050671735280144, "grad_norm": 0.6451228796733127, "learning_rate": 1.3420300979032758e-06, "loss": 0.0606, "step": 26680 }, { "epoch": 2.3059311417339843, "grad_norm": 0.6169719002668468, "learning_rate": 1.3388521302345703e-06, "loss": 0.0607, "step": 26690 }, { "epoch": 2.306795109939954, "grad_norm": 0.685603442557472, "learning_rate": 1.3356773480307178e-06, "loss": 0.0618, "step": 26700 }, { "epoch": 2.307659078145924, "grad_norm": 0.6443542909589884, "learning_rate": 1.332505754054016e-06, "loss": 0.0596, "step": 26710 }, { "epoch": 2.3085230463518944, "grad_norm": 0.6542177335627326, "learning_rate": 1.3293373510639772e-06, "loss": 0.0599, "step": 26720 }, { "epoch": 2.3093870145578643, "grad_norm": 0.618466824135546, "learning_rate": 1.326172141817349e-06, "loss": 0.058, "step": 26730 }, { "epoch": 2.3102509827638342, "grad_norm": 0.6754024486903456, "learning_rate": 1.3230101290680859e-06, "loss": 0.0608, "step": 26740 }, { "epoch": 2.311114950969804, "grad_norm": 0.6259336996138705, "learning_rate": 1.3198513155673742e-06, "loss": 0.0602, "step": 26750 }, { "epoch": 2.3119789191757745, "grad_norm": 0.6952082803278199, "learning_rate": 1.3166957040636102e-06, "loss": 0.0611, "step": 26760 }, { "epoch": 2.3128428873817444, "grad_norm": 0.679152130889011, "learning_rate": 1.3135432973024044e-06, "loss": 0.0572, "step": 26770 }, { "epoch": 2.3137068555877143, "grad_norm": 0.6706211214233345, "learning_rate": 1.3103940980265805e-06, "loss": 0.0604, "step": 26780 }, { "epoch": 2.3145708237936846, "grad_norm": 0.617309011922249, "learning_rate": 1.3072481089761697e-06, "loss": 0.0575, "step": 26790 }, { "epoch": 2.3154347919996545, "grad_norm": 0.6615416949022743, "learning_rate": 1.3041053328884128e-06, "loss": 0.0616, "step": 26800 }, { "epoch": 2.3162987602056244, "grad_norm": 0.6747162247811561, "learning_rate": 1.3009657724977537e-06, "loss": 0.0619, "step": 26810 }, { "epoch": 2.3171627284115943, "grad_norm": 0.6664271188966158, "learning_rate": 1.2978294305358374e-06, "loss": 0.0615, "step": 26820 }, { "epoch": 2.3180266966175647, "grad_norm": 0.65748922479625, "learning_rate": 1.294696309731508e-06, "loss": 0.0623, "step": 26830 }, { "epoch": 2.3188906648235346, "grad_norm": 0.6685930838215557, "learning_rate": 1.2915664128108123e-06, "loss": 0.0604, "step": 26840 }, { "epoch": 2.3197546330295045, "grad_norm": 0.6127967842512623, "learning_rate": 1.288439742496984e-06, "loss": 0.0599, "step": 26850 }, { "epoch": 2.3206186012354744, "grad_norm": 0.6643011680928712, "learning_rate": 1.2853163015104563e-06, "loss": 0.063, "step": 26860 }, { "epoch": 2.3214825694414447, "grad_norm": 0.6399180186475122, "learning_rate": 1.2821960925688493e-06, "loss": 0.0601, "step": 26870 }, { "epoch": 2.3223465376474146, "grad_norm": 0.6309313709379819, "learning_rate": 1.2790791183869717e-06, "loss": 0.0597, "step": 26880 }, { "epoch": 2.3232105058533845, "grad_norm": 0.6292393124967992, "learning_rate": 1.2759653816768175e-06, "loss": 0.0613, "step": 26890 }, { "epoch": 2.324074474059355, "grad_norm": 0.6272135357664029, "learning_rate": 1.2728548851475625e-06, "loss": 0.0608, "step": 26900 }, { "epoch": 2.3249384422653248, "grad_norm": 0.6534930026134436, "learning_rate": 1.2697476315055673e-06, "loss": 0.0603, "step": 26910 }, { "epoch": 2.3258024104712947, "grad_norm": 0.6627039375093233, "learning_rate": 1.2666436234543684e-06, "loss": 0.0602, "step": 26920 }, { "epoch": 2.3266663786772646, "grad_norm": 0.6998958974545966, "learning_rate": 1.2635428636946773e-06, "loss": 0.0587, "step": 26930 }, { "epoch": 2.3275303468832345, "grad_norm": 0.664922356390914, "learning_rate": 1.2604453549243796e-06, "loss": 0.0607, "step": 26940 }, { "epoch": 2.328394315089205, "grad_norm": 0.6545529848823667, "learning_rate": 1.2573510998385357e-06, "loss": 0.0606, "step": 26950 }, { "epoch": 2.3292582832951747, "grad_norm": 0.6572234115129042, "learning_rate": 1.2542601011293714e-06, "loss": 0.0614, "step": 26960 }, { "epoch": 2.3301222515011446, "grad_norm": 0.623619780479768, "learning_rate": 1.2511723614862803e-06, "loss": 0.0586, "step": 26970 }, { "epoch": 2.330986219707115, "grad_norm": 0.6760745961536608, "learning_rate": 1.2480878835958199e-06, "loss": 0.0603, "step": 26980 }, { "epoch": 2.331850187913085, "grad_norm": 0.6446461921885446, "learning_rate": 1.245006670141709e-06, "loss": 0.0606, "step": 26990 }, { "epoch": 2.3327141561190547, "grad_norm": 0.6720018979198659, "learning_rate": 1.2419287238048327e-06, "loss": 0.0615, "step": 27000 }, { "epoch": 2.3335781243250246, "grad_norm": 0.6209878169214561, "learning_rate": 1.238854047263221e-06, "loss": 0.0624, "step": 27010 }, { "epoch": 2.334442092530995, "grad_norm": 0.6223483037469352, "learning_rate": 1.2357826431920706e-06, "loss": 0.059, "step": 27020 }, { "epoch": 2.335306060736965, "grad_norm": 0.6514212276691695, "learning_rate": 1.2327145142637237e-06, "loss": 0.0605, "step": 27030 }, { "epoch": 2.336170028942935, "grad_norm": 0.6010763844851291, "learning_rate": 1.2296496631476807e-06, "loss": 0.0606, "step": 27040 }, { "epoch": 2.3370339971489047, "grad_norm": 0.6531152999704667, "learning_rate": 1.2265880925105777e-06, "loss": 0.0591, "step": 27050 }, { "epoch": 2.337897965354875, "grad_norm": 0.6706702248713575, "learning_rate": 1.2235298050162093e-06, "loss": 0.0592, "step": 27060 }, { "epoch": 2.338761933560845, "grad_norm": 0.6501204240351136, "learning_rate": 1.2204748033255054e-06, "loss": 0.0601, "step": 27070 }, { "epoch": 2.339625901766815, "grad_norm": 0.6360109585574683, "learning_rate": 1.2174230900965407e-06, "loss": 0.0622, "step": 27080 }, { "epoch": 2.340489869972785, "grad_norm": 0.7292331220131177, "learning_rate": 1.214374667984527e-06, "loss": 0.0602, "step": 27090 }, { "epoch": 2.341353838178755, "grad_norm": 0.64431736347878, "learning_rate": 1.2113295396418124e-06, "loss": 0.06, "step": 27100 }, { "epoch": 2.342217806384725, "grad_norm": 0.6693597385480407, "learning_rate": 1.2082877077178834e-06, "loss": 0.0604, "step": 27110 }, { "epoch": 2.343081774590695, "grad_norm": 0.70203505820828, "learning_rate": 1.2052491748593492e-06, "loss": 0.0584, "step": 27120 }, { "epoch": 2.3439457427966652, "grad_norm": 0.6529968481749552, "learning_rate": 1.2022139437099601e-06, "loss": 0.0582, "step": 27130 }, { "epoch": 2.344809711002635, "grad_norm": 0.6292470117300617, "learning_rate": 1.1991820169105827e-06, "loss": 0.0591, "step": 27140 }, { "epoch": 2.345673679208605, "grad_norm": 0.6937281317516001, "learning_rate": 1.19615339709922e-06, "loss": 0.0611, "step": 27150 }, { "epoch": 2.346537647414575, "grad_norm": 0.667882030426861, "learning_rate": 1.193128086910985e-06, "loss": 0.0572, "step": 27160 }, { "epoch": 2.3474016156205453, "grad_norm": 0.6415713468157893, "learning_rate": 1.1901060889781224e-06, "loss": 0.0596, "step": 27170 }, { "epoch": 2.348265583826515, "grad_norm": 0.6686307087579261, "learning_rate": 1.1870874059299875e-06, "loss": 0.0594, "step": 27180 }, { "epoch": 2.349129552032485, "grad_norm": 0.6509117568868412, "learning_rate": 1.1840720403930555e-06, "loss": 0.0594, "step": 27190 }, { "epoch": 2.3499935202384554, "grad_norm": 0.6287633557030508, "learning_rate": 1.1810599949909124e-06, "loss": 0.0589, "step": 27200 }, { "epoch": 2.3508574884444253, "grad_norm": 0.6378929565344178, "learning_rate": 1.1780512723442556e-06, "loss": 0.0607, "step": 27210 }, { "epoch": 2.3517214566503952, "grad_norm": 0.6682225214396356, "learning_rate": 1.1750458750708959e-06, "loss": 0.0609, "step": 27220 }, { "epoch": 2.352585424856365, "grad_norm": 0.664213964785015, "learning_rate": 1.1720438057857458e-06, "loss": 0.0589, "step": 27230 }, { "epoch": 2.3534493930623355, "grad_norm": 0.7004254892208797, "learning_rate": 1.169045067100824e-06, "loss": 0.0593, "step": 27240 }, { "epoch": 2.3543133612683054, "grad_norm": 0.6778013453063583, "learning_rate": 1.1660496616252498e-06, "loss": 0.0593, "step": 27250 }, { "epoch": 2.3551773294742753, "grad_norm": 0.6625578892854007, "learning_rate": 1.1630575919652459e-06, "loss": 0.0592, "step": 27260 }, { "epoch": 2.356041297680245, "grad_norm": 0.6889032338164695, "learning_rate": 1.16006886072413e-06, "loss": 0.0598, "step": 27270 }, { "epoch": 2.3569052658862155, "grad_norm": 0.656085217318829, "learning_rate": 1.1570834705023148e-06, "loss": 0.058, "step": 27280 }, { "epoch": 2.3577692340921854, "grad_norm": 0.6322543933666873, "learning_rate": 1.1541014238973076e-06, "loss": 0.0594, "step": 27290 }, { "epoch": 2.3586332022981553, "grad_norm": 0.6621617238381241, "learning_rate": 1.1511227235037036e-06, "loss": 0.0603, "step": 27300 }, { "epoch": 2.3594971705041257, "grad_norm": 0.6675369069452474, "learning_rate": 1.1481473719131935e-06, "loss": 0.0591, "step": 27310 }, { "epoch": 2.3603611387100956, "grad_norm": 0.6403904619439585, "learning_rate": 1.1451753717145436e-06, "loss": 0.057, "step": 27320 }, { "epoch": 2.3612251069160655, "grad_norm": 0.6974195948916855, "learning_rate": 1.1422067254936148e-06, "loss": 0.0564, "step": 27330 }, { "epoch": 2.3620890751220354, "grad_norm": 0.6844415275124478, "learning_rate": 1.139241435833342e-06, "loss": 0.0605, "step": 27340 }, { "epoch": 2.3629530433280057, "grad_norm": 0.6308979148922937, "learning_rate": 1.1362795053137477e-06, "loss": 0.0591, "step": 27350 }, { "epoch": 2.3638170115339756, "grad_norm": 0.6868448909356765, "learning_rate": 1.1333209365119198e-06, "loss": 0.0606, "step": 27360 }, { "epoch": 2.3646809797399455, "grad_norm": 0.6814825334027916, "learning_rate": 1.130365732002034e-06, "loss": 0.0597, "step": 27370 }, { "epoch": 2.3655449479459154, "grad_norm": 0.6505325443513633, "learning_rate": 1.1274138943553303e-06, "loss": 0.0601, "step": 27380 }, { "epoch": 2.3664089161518858, "grad_norm": 0.6516230369514702, "learning_rate": 1.1244654261401233e-06, "loss": 0.0573, "step": 27390 }, { "epoch": 2.3672728843578557, "grad_norm": 0.6351111112315094, "learning_rate": 1.1215203299217941e-06, "loss": 0.0591, "step": 27400 }, { "epoch": 2.3681368525638256, "grad_norm": 0.6323350777816571, "learning_rate": 1.1185786082627897e-06, "loss": 0.0582, "step": 27410 }, { "epoch": 2.369000820769796, "grad_norm": 0.669953614513429, "learning_rate": 1.1156402637226254e-06, "loss": 0.0601, "step": 27420 }, { "epoch": 2.369864788975766, "grad_norm": 0.6477445841820543, "learning_rate": 1.112705298857869e-06, "loss": 0.0608, "step": 27430 }, { "epoch": 2.3707287571817357, "grad_norm": 0.6971705503297287, "learning_rate": 1.1097737162221584e-06, "loss": 0.0585, "step": 27440 }, { "epoch": 2.3715927253877056, "grad_norm": 0.6874010419235855, "learning_rate": 1.1068455183661796e-06, "loss": 0.0611, "step": 27450 }, { "epoch": 2.3724566935936755, "grad_norm": 0.6634814836649913, "learning_rate": 1.103920707837683e-06, "loss": 0.0573, "step": 27460 }, { "epoch": 2.373320661799646, "grad_norm": 0.6668055692462801, "learning_rate": 1.10099928718146e-06, "loss": 0.0606, "step": 27470 }, { "epoch": 2.3741846300056157, "grad_norm": 0.659507511140097, "learning_rate": 1.0980812589393637e-06, "loss": 0.0584, "step": 27480 }, { "epoch": 2.3750485982115856, "grad_norm": 0.7058506378776868, "learning_rate": 1.095166625650289e-06, "loss": 0.0605, "step": 27490 }, { "epoch": 2.375912566417556, "grad_norm": 0.6616478710874778, "learning_rate": 1.0922553898501799e-06, "loss": 0.0575, "step": 27500 }, { "epoch": 2.376776534623526, "grad_norm": 0.6416220397649288, "learning_rate": 1.0893475540720215e-06, "loss": 0.0574, "step": 27510 }, { "epoch": 2.377640502829496, "grad_norm": 0.6851881227890764, "learning_rate": 1.086443120845842e-06, "loss": 0.0603, "step": 27520 }, { "epoch": 2.378504471035466, "grad_norm": 0.643914935400431, "learning_rate": 1.0835420926987123e-06, "loss": 0.0594, "step": 27530 }, { "epoch": 2.379368439241436, "grad_norm": 0.7065001618716296, "learning_rate": 1.0806444721547367e-06, "loss": 0.0598, "step": 27540 }, { "epoch": 2.380232407447406, "grad_norm": 0.6531887811707487, "learning_rate": 1.077750261735055e-06, "loss": 0.0632, "step": 27550 }, { "epoch": 2.381096375653376, "grad_norm": 0.6782902569583262, "learning_rate": 1.0748594639578391e-06, "loss": 0.0597, "step": 27560 }, { "epoch": 2.3819603438593457, "grad_norm": 0.6590868743045272, "learning_rate": 1.0719720813382972e-06, "loss": 0.0596, "step": 27570 }, { "epoch": 2.382824312065316, "grad_norm": 0.6852303874619127, "learning_rate": 1.0690881163886602e-06, "loss": 0.0586, "step": 27580 }, { "epoch": 2.383688280271286, "grad_norm": 0.6691472842481307, "learning_rate": 1.066207571618187e-06, "loss": 0.0586, "step": 27590 }, { "epoch": 2.384552248477256, "grad_norm": 0.6758493976482691, "learning_rate": 1.0633304495331614e-06, "loss": 0.0587, "step": 27600 }, { "epoch": 2.3854162166832262, "grad_norm": 0.6360569860336877, "learning_rate": 1.0604567526368875e-06, "loss": 0.0598, "step": 27610 }, { "epoch": 2.386280184889196, "grad_norm": 0.6688496501564594, "learning_rate": 1.057586483429694e-06, "loss": 0.0602, "step": 27620 }, { "epoch": 2.387144153095166, "grad_norm": 0.6318508440444366, "learning_rate": 1.054719644408919e-06, "loss": 0.0599, "step": 27630 }, { "epoch": 2.388008121301136, "grad_norm": 0.6679409504648013, "learning_rate": 1.051856238068925e-06, "loss": 0.0589, "step": 27640 }, { "epoch": 2.3888720895071063, "grad_norm": 0.6845032317565701, "learning_rate": 1.0489962669010817e-06, "loss": 0.0584, "step": 27650 }, { "epoch": 2.389736057713076, "grad_norm": 0.6218207202922958, "learning_rate": 1.0461397333937728e-06, "loss": 0.0586, "step": 27660 }, { "epoch": 2.390600025919046, "grad_norm": 0.6504975420124806, "learning_rate": 1.0432866400323883e-06, "loss": 0.0593, "step": 27670 }, { "epoch": 2.391463994125016, "grad_norm": 0.6627505995283275, "learning_rate": 1.0404369892993299e-06, "loss": 0.0601, "step": 27680 }, { "epoch": 2.3923279623309863, "grad_norm": 0.6866333978458881, "learning_rate": 1.037590783673999e-06, "loss": 0.0603, "step": 27690 }, { "epoch": 2.3931919305369562, "grad_norm": 0.6274084673491703, "learning_rate": 1.0347480256328025e-06, "loss": 0.0597, "step": 27700 }, { "epoch": 2.394055898742926, "grad_norm": 0.6381370089501482, "learning_rate": 1.0319087176491455e-06, "loss": 0.0577, "step": 27710 }, { "epoch": 2.3949198669488965, "grad_norm": 0.6704993469854148, "learning_rate": 1.0290728621934315e-06, "loss": 0.0599, "step": 27720 }, { "epoch": 2.3957838351548664, "grad_norm": 0.6716724508184789, "learning_rate": 1.0262404617330652e-06, "loss": 0.0592, "step": 27730 }, { "epoch": 2.3966478033608363, "grad_norm": 0.6583842494843922, "learning_rate": 1.023411518732435e-06, "loss": 0.058, "step": 27740 }, { "epoch": 2.397511771566806, "grad_norm": 0.655274656221077, "learning_rate": 1.0205860356529318e-06, "loss": 0.059, "step": 27750 }, { "epoch": 2.3983757397727765, "grad_norm": 0.6339739798600978, "learning_rate": 1.0177640149529277e-06, "loss": 0.0609, "step": 27760 }, { "epoch": 2.3992397079787464, "grad_norm": 0.6884819006309391, "learning_rate": 1.014945459087791e-06, "loss": 0.0601, "step": 27770 }, { "epoch": 2.4001036761847163, "grad_norm": 0.6504434154351232, "learning_rate": 1.0121303705098646e-06, "loss": 0.0583, "step": 27780 }, { "epoch": 2.400967644390686, "grad_norm": 0.6478456711603433, "learning_rate": 1.0093187516684832e-06, "loss": 0.0624, "step": 27790 }, { "epoch": 2.4018316125966566, "grad_norm": 0.6253213623206553, "learning_rate": 1.0065106050099599e-06, "loss": 0.0587, "step": 27800 }, { "epoch": 2.4026955808026265, "grad_norm": 0.6790278170027676, "learning_rate": 1.003705932977585e-06, "loss": 0.0596, "step": 27810 }, { "epoch": 2.4035595490085964, "grad_norm": 0.6798115066515837, "learning_rate": 1.0009047380116283e-06, "loss": 0.0607, "step": 27820 }, { "epoch": 2.4044235172145667, "grad_norm": 0.6604675110327878, "learning_rate": 9.98107022549331e-07, "loss": 0.0604, "step": 27830 }, { "epoch": 2.4052874854205366, "grad_norm": 0.6790462682499643, "learning_rate": 9.95312789024912e-07, "loss": 0.0568, "step": 27840 }, { "epoch": 2.4061514536265065, "grad_norm": 0.6128078717477331, "learning_rate": 9.925220398695562e-07, "loss": 0.0596, "step": 27850 }, { "epoch": 2.4070154218324764, "grad_norm": 0.6753776573614281, "learning_rate": 9.897347775114185e-07, "loss": 0.0608, "step": 27860 }, { "epoch": 2.4078793900384468, "grad_norm": 0.645529040989447, "learning_rate": 9.86951004375618e-07, "loss": 0.0586, "step": 27870 }, { "epoch": 2.4087433582444167, "grad_norm": 0.6658258287934132, "learning_rate": 9.841707228842428e-07, "loss": 0.0596, "step": 27880 }, { "epoch": 2.4096073264503866, "grad_norm": 0.6567521212851223, "learning_rate": 9.81393935456339e-07, "loss": 0.0576, "step": 27890 }, { "epoch": 2.4104712946563565, "grad_norm": 0.660656906400927, "learning_rate": 9.786206445079127e-07, "loss": 0.058, "step": 27900 }, { "epoch": 2.411335262862327, "grad_norm": 0.6380899683509319, "learning_rate": 9.75850852451929e-07, "loss": 0.0589, "step": 27910 }, { "epoch": 2.4121992310682967, "grad_norm": 0.6517954005215502, "learning_rate": 9.73084561698307e-07, "loss": 0.0599, "step": 27920 }, { "epoch": 2.4130631992742666, "grad_norm": 0.6807352997523919, "learning_rate": 9.703217746539256e-07, "loss": 0.0604, "step": 27930 }, { "epoch": 2.413927167480237, "grad_norm": 0.633359951134548, "learning_rate": 9.675624937226037e-07, "loss": 0.0582, "step": 27940 }, { "epoch": 2.414791135686207, "grad_norm": 0.661588990732132, "learning_rate": 9.648067213051216e-07, "loss": 0.062, "step": 27950 }, { "epoch": 2.4156551038921767, "grad_norm": 0.6643855642016916, "learning_rate": 9.620544597991992e-07, "loss": 0.059, "step": 27960 }, { "epoch": 2.4165190720981466, "grad_norm": 0.6772126749791726, "learning_rate": 9.593057115995053e-07, "loss": 0.06, "step": 27970 }, { "epoch": 2.417383040304117, "grad_norm": 0.6194181138318597, "learning_rate": 9.565604790976485e-07, "loss": 0.0578, "step": 27980 }, { "epoch": 2.418247008510087, "grad_norm": 0.6764386780365282, "learning_rate": 9.53818764682184e-07, "loss": 0.058, "step": 27990 }, { "epoch": 2.419110976716057, "grad_norm": 0.6927664968419396, "learning_rate": 9.510805707386006e-07, "loss": 0.0599, "step": 28000 }, { "epoch": 2.4199749449220267, "grad_norm": 0.674259380191846, "learning_rate": 9.483458996493267e-07, "loss": 0.0599, "step": 28010 }, { "epoch": 2.420838913127997, "grad_norm": 0.6678938458437413, "learning_rate": 9.456147537937249e-07, "loss": 0.0607, "step": 28020 }, { "epoch": 2.421702881333967, "grad_norm": 0.6678011366556437, "learning_rate": 9.42887135548089e-07, "loss": 0.0579, "step": 28030 }, { "epoch": 2.422566849539937, "grad_norm": 0.6879787431287953, "learning_rate": 9.401630472856499e-07, "loss": 0.0601, "step": 28040 }, { "epoch": 2.423430817745907, "grad_norm": 0.6705502097546254, "learning_rate": 9.374424913765567e-07, "loss": 0.0579, "step": 28050 }, { "epoch": 2.424294785951877, "grad_norm": 0.6599368510358516, "learning_rate": 9.347254701878943e-07, "loss": 0.06, "step": 28060 }, { "epoch": 2.425158754157847, "grad_norm": 0.6826991683827245, "learning_rate": 9.320119860836674e-07, "loss": 0.0614, "step": 28070 }, { "epoch": 2.426022722363817, "grad_norm": 0.6762089597321239, "learning_rate": 9.293020414248072e-07, "loss": 0.0594, "step": 28080 }, { "epoch": 2.426886690569787, "grad_norm": 0.6460291522786121, "learning_rate": 9.265956385691583e-07, "loss": 0.0579, "step": 28090 }, { "epoch": 2.427750658775757, "grad_norm": 0.6506399156335623, "learning_rate": 9.238927798714908e-07, "loss": 0.0581, "step": 28100 }, { "epoch": 2.428614626981727, "grad_norm": 0.6684113157689684, "learning_rate": 9.211934676834882e-07, "loss": 0.0594, "step": 28110 }, { "epoch": 2.429478595187697, "grad_norm": 0.6356040848491912, "learning_rate": 9.184977043537474e-07, "loss": 0.0573, "step": 28120 }, { "epoch": 2.4303425633936673, "grad_norm": 0.6847493066510186, "learning_rate": 9.158054922277787e-07, "loss": 0.0605, "step": 28130 }, { "epoch": 2.431206531599637, "grad_norm": 0.7050255385387857, "learning_rate": 9.131168336480018e-07, "loss": 0.059, "step": 28140 }, { "epoch": 2.432070499805607, "grad_norm": 0.6719753806637919, "learning_rate": 9.104317309537469e-07, "loss": 0.0585, "step": 28150 }, { "epoch": 2.4329344680115774, "grad_norm": 0.6482007708428015, "learning_rate": 9.077501864812476e-07, "loss": 0.0606, "step": 28160 }, { "epoch": 2.4337984362175473, "grad_norm": 0.6634719349909597, "learning_rate": 9.050722025636427e-07, "loss": 0.0557, "step": 28170 }, { "epoch": 2.4346624044235172, "grad_norm": 0.6551058466757594, "learning_rate": 9.023977815309714e-07, "loss": 0.0594, "step": 28180 }, { "epoch": 2.435526372629487, "grad_norm": 0.6744881656835849, "learning_rate": 8.997269257101776e-07, "loss": 0.0607, "step": 28190 }, { "epoch": 2.436390340835457, "grad_norm": 0.6918028486996344, "learning_rate": 8.970596374250984e-07, "loss": 0.0606, "step": 28200 }, { "epoch": 2.4372543090414274, "grad_norm": 0.6315554234710237, "learning_rate": 8.94395918996468e-07, "loss": 0.0573, "step": 28210 }, { "epoch": 2.4381182772473973, "grad_norm": 0.6532510959054035, "learning_rate": 8.917357727419157e-07, "loss": 0.0581, "step": 28220 }, { "epoch": 2.438982245453367, "grad_norm": 0.6657451241588958, "learning_rate": 8.890792009759624e-07, "loss": 0.0607, "step": 28230 }, { "epoch": 2.4398462136593375, "grad_norm": 0.6798649150950511, "learning_rate": 8.864262060100182e-07, "loss": 0.0582, "step": 28240 }, { "epoch": 2.4407101818653074, "grad_norm": 0.6867048576047796, "learning_rate": 8.837767901523808e-07, "loss": 0.0588, "step": 28250 }, { "epoch": 2.4415741500712773, "grad_norm": 0.6368106790875463, "learning_rate": 8.81130955708237e-07, "loss": 0.0578, "step": 28260 }, { "epoch": 2.442438118277247, "grad_norm": 0.6514293184898988, "learning_rate": 8.784887049796537e-07, "loss": 0.0553, "step": 28270 }, { "epoch": 2.4433020864832176, "grad_norm": 0.6861528684747734, "learning_rate": 8.758500402655811e-07, "loss": 0.0583, "step": 28280 }, { "epoch": 2.4441660546891875, "grad_norm": 0.6408952542356624, "learning_rate": 8.732149638618481e-07, "loss": 0.0587, "step": 28290 }, { "epoch": 2.4450300228951574, "grad_norm": 0.6583448327327999, "learning_rate": 8.70583478061166e-07, "loss": 0.0599, "step": 28300 }, { "epoch": 2.4458939911011273, "grad_norm": 0.6841081059724552, "learning_rate": 8.679555851531168e-07, "loss": 0.058, "step": 28310 }, { "epoch": 2.4467579593070976, "grad_norm": 0.6287400345247842, "learning_rate": 8.653312874241587e-07, "loss": 0.0594, "step": 28320 }, { "epoch": 2.4476219275130675, "grad_norm": 0.6443569285000627, "learning_rate": 8.627105871576214e-07, "loss": 0.0575, "step": 28330 }, { "epoch": 2.4484858957190374, "grad_norm": 0.6537062566918245, "learning_rate": 8.600934866337035e-07, "loss": 0.0595, "step": 28340 }, { "epoch": 2.4493498639250078, "grad_norm": 0.6728442936950617, "learning_rate": 8.57479988129476e-07, "loss": 0.0608, "step": 28350 }, { "epoch": 2.4502138321309777, "grad_norm": 0.6392937359718057, "learning_rate": 8.548700939188686e-07, "loss": 0.0572, "step": 28360 }, { "epoch": 2.4510778003369476, "grad_norm": 0.6616002855883878, "learning_rate": 8.522638062726823e-07, "loss": 0.0582, "step": 28370 }, { "epoch": 2.4519417685429175, "grad_norm": 0.6671488566913149, "learning_rate": 8.496611274585759e-07, "loss": 0.0582, "step": 28380 }, { "epoch": 2.452805736748888, "grad_norm": 0.6564631235016541, "learning_rate": 8.470620597410689e-07, "loss": 0.061, "step": 28390 }, { "epoch": 2.4536697049548577, "grad_norm": 0.6637031889827743, "learning_rate": 8.444666053815375e-07, "loss": 0.0622, "step": 28400 }, { "epoch": 2.4545336731608276, "grad_norm": 0.7176647827953652, "learning_rate": 8.418747666382188e-07, "loss": 0.059, "step": 28410 }, { "epoch": 2.4553976413667975, "grad_norm": 0.6896783584801283, "learning_rate": 8.392865457662002e-07, "loss": 0.0604, "step": 28420 }, { "epoch": 2.456261609572768, "grad_norm": 0.6828733344280327, "learning_rate": 8.367019450174208e-07, "loss": 0.0603, "step": 28430 }, { "epoch": 2.4571255777787377, "grad_norm": 0.6659408708812785, "learning_rate": 8.341209666406724e-07, "loss": 0.0593, "step": 28440 }, { "epoch": 2.4579895459847076, "grad_norm": 0.6766561239252696, "learning_rate": 8.315436128815918e-07, "loss": 0.059, "step": 28450 }, { "epoch": 2.458853514190678, "grad_norm": 0.6530791811491967, "learning_rate": 8.289698859826667e-07, "loss": 0.0576, "step": 28460 }, { "epoch": 2.459717482396648, "grad_norm": 0.6605880314782795, "learning_rate": 8.263997881832258e-07, "loss": 0.0575, "step": 28470 }, { "epoch": 2.460581450602618, "grad_norm": 0.6717909641695353, "learning_rate": 8.238333217194411e-07, "loss": 0.0578, "step": 28480 }, { "epoch": 2.4614454188085877, "grad_norm": 0.6542064505610335, "learning_rate": 8.212704888243245e-07, "loss": 0.0576, "step": 28490 }, { "epoch": 2.462309387014558, "grad_norm": 0.7133803141730897, "learning_rate": 8.187112917277279e-07, "loss": 0.0575, "step": 28500 }, { "epoch": 2.463173355220528, "grad_norm": 0.6987238128077268, "learning_rate": 8.161557326563374e-07, "loss": 0.0601, "step": 28510 }, { "epoch": 2.464037323426498, "grad_norm": 0.658559293565418, "learning_rate": 8.136038138336754e-07, "loss": 0.0573, "step": 28520 }, { "epoch": 2.4649012916324677, "grad_norm": 0.6867165061226779, "learning_rate": 8.110555374800988e-07, "loss": 0.0612, "step": 28530 }, { "epoch": 2.465765259838438, "grad_norm": 0.6616775930013254, "learning_rate": 8.085109058127916e-07, "loss": 0.0589, "step": 28540 }, { "epoch": 2.466629228044408, "grad_norm": 0.6748442619397084, "learning_rate": 8.059699210457695e-07, "loss": 0.0602, "step": 28550 }, { "epoch": 2.467493196250378, "grad_norm": 0.6784319465044278, "learning_rate": 8.034325853898716e-07, "loss": 0.0601, "step": 28560 }, { "epoch": 2.4683571644563482, "grad_norm": 0.6696709196103713, "learning_rate": 8.008989010527674e-07, "loss": 0.0583, "step": 28570 }, { "epoch": 2.469221132662318, "grad_norm": 0.6752973689391882, "learning_rate": 7.983688702389447e-07, "loss": 0.0593, "step": 28580 }, { "epoch": 2.470085100868288, "grad_norm": 0.6121634874033778, "learning_rate": 7.958424951497157e-07, "loss": 0.0586, "step": 28590 }, { "epoch": 2.470949069074258, "grad_norm": 0.6580181912566206, "learning_rate": 7.933197779832091e-07, "loss": 0.0568, "step": 28600 }, { "epoch": 2.4718130372802283, "grad_norm": 0.6552538023898506, "learning_rate": 7.908007209343716e-07, "loss": 0.0578, "step": 28610 }, { "epoch": 2.472677005486198, "grad_norm": 0.6669667599185005, "learning_rate": 7.882853261949692e-07, "loss": 0.0593, "step": 28620 }, { "epoch": 2.473540973692168, "grad_norm": 0.6340279306362967, "learning_rate": 7.857735959535739e-07, "loss": 0.055, "step": 28630 }, { "epoch": 2.474404941898138, "grad_norm": 0.6530282367866861, "learning_rate": 7.832655323955773e-07, "loss": 0.0596, "step": 28640 }, { "epoch": 2.4752689101041083, "grad_norm": 0.66428227074611, "learning_rate": 7.807611377031738e-07, "loss": 0.0576, "step": 28650 }, { "epoch": 2.4761328783100782, "grad_norm": 0.5941794758457453, "learning_rate": 7.782604140553734e-07, "loss": 0.0583, "step": 28660 }, { "epoch": 2.476996846516048, "grad_norm": 0.6814535079374869, "learning_rate": 7.757633636279826e-07, "loss": 0.0605, "step": 28670 }, { "epoch": 2.4778608147220185, "grad_norm": 0.6802629053649004, "learning_rate": 7.732699885936201e-07, "loss": 0.0576, "step": 28680 }, { "epoch": 2.4787247829279884, "grad_norm": 0.6726871693359314, "learning_rate": 7.707802911217027e-07, "loss": 0.0591, "step": 28690 }, { "epoch": 2.4795887511339583, "grad_norm": 0.6870407955530046, "learning_rate": 7.682942733784476e-07, "loss": 0.0587, "step": 28700 }, { "epoch": 2.480452719339928, "grad_norm": 0.6289813857003265, "learning_rate": 7.658119375268714e-07, "loss": 0.0576, "step": 28710 }, { "epoch": 2.481316687545898, "grad_norm": 0.6398280282296972, "learning_rate": 7.633332857267856e-07, "loss": 0.0582, "step": 28720 }, { "epoch": 2.4821806557518684, "grad_norm": 0.643099284809275, "learning_rate": 7.608583201348002e-07, "loss": 0.0594, "step": 28730 }, { "epoch": 2.4830446239578383, "grad_norm": 0.6469507902323824, "learning_rate": 7.583870429043134e-07, "loss": 0.0557, "step": 28740 }, { "epoch": 2.483908592163808, "grad_norm": 0.664417794823054, "learning_rate": 7.55919456185516e-07, "loss": 0.0563, "step": 28750 }, { "epoch": 2.4847725603697786, "grad_norm": 0.654464666999886, "learning_rate": 7.534555621253875e-07, "loss": 0.0568, "step": 28760 }, { "epoch": 2.4856365285757485, "grad_norm": 0.6736856787614522, "learning_rate": 7.509953628676963e-07, "loss": 0.0591, "step": 28770 }, { "epoch": 2.4865004967817184, "grad_norm": 0.6689201937782084, "learning_rate": 7.485388605529942e-07, "loss": 0.0605, "step": 28780 }, { "epoch": 2.4873644649876883, "grad_norm": 0.671068501395764, "learning_rate": 7.460860573186168e-07, "loss": 0.0591, "step": 28790 }, { "epoch": 2.4882284331936586, "grad_norm": 0.6849956789413019, "learning_rate": 7.43636955298681e-07, "loss": 0.0605, "step": 28800 }, { "epoch": 2.4890924013996285, "grad_norm": 0.6174444958621187, "learning_rate": 7.411915566240835e-07, "loss": 0.0572, "step": 28810 }, { "epoch": 2.4899563696055984, "grad_norm": 0.6613532292211507, "learning_rate": 7.387498634224988e-07, "loss": 0.0612, "step": 28820 }, { "epoch": 2.4908203378115683, "grad_norm": 0.6281340387625589, "learning_rate": 7.36311877818377e-07, "loss": 0.057, "step": 28830 }, { "epoch": 2.4916843060175387, "grad_norm": 0.6472955772427801, "learning_rate": 7.338776019329452e-07, "loss": 0.0595, "step": 28840 }, { "epoch": 2.4925482742235086, "grad_norm": 0.6465898107352807, "learning_rate": 7.314470378841987e-07, "loss": 0.0579, "step": 28850 }, { "epoch": 2.4934122424294785, "grad_norm": 0.6872080656870608, "learning_rate": 7.290201877869052e-07, "loss": 0.0577, "step": 28860 }, { "epoch": 2.494276210635449, "grad_norm": 0.6546298045170468, "learning_rate": 7.26597053752599e-07, "loss": 0.0593, "step": 28870 }, { "epoch": 2.4951401788414187, "grad_norm": 0.6501124516401879, "learning_rate": 7.241776378895865e-07, "loss": 0.0567, "step": 28880 }, { "epoch": 2.4960041470473886, "grad_norm": 0.6489435699910041, "learning_rate": 7.217619423029332e-07, "loss": 0.0588, "step": 28890 }, { "epoch": 2.4968681152533585, "grad_norm": 0.6535221870925897, "learning_rate": 7.193499690944706e-07, "loss": 0.0592, "step": 28900 }, { "epoch": 2.497732083459329, "grad_norm": 0.6771730869265828, "learning_rate": 7.169417203627898e-07, "loss": 0.0579, "step": 28910 }, { "epoch": 2.4985960516652987, "grad_norm": 0.6359104906466674, "learning_rate": 7.145371982032423e-07, "loss": 0.0575, "step": 28920 }, { "epoch": 2.4994600198712686, "grad_norm": 0.6722845799249196, "learning_rate": 7.121364047079405e-07, "loss": 0.059, "step": 28930 }, { "epoch": 2.5003239880772385, "grad_norm": 0.6749302305814222, "learning_rate": 7.097393419657439e-07, "loss": 0.0584, "step": 28940 }, { "epoch": 2.501187956283209, "grad_norm": 0.6397819866220034, "learning_rate": 7.07346012062275e-07, "loss": 0.0596, "step": 28950 }, { "epoch": 2.502051924489179, "grad_norm": 0.649234976789809, "learning_rate": 7.049564170799034e-07, "loss": 0.0588, "step": 28960 }, { "epoch": 2.5029158926951487, "grad_norm": 0.6194457878662328, "learning_rate": 7.025705590977528e-07, "loss": 0.0586, "step": 28970 }, { "epoch": 2.503779860901119, "grad_norm": 0.6254725233188696, "learning_rate": 7.001884401916898e-07, "loss": 0.0584, "step": 28980 }, { "epoch": 2.504643829107089, "grad_norm": 0.6533137706948257, "learning_rate": 6.978100624343332e-07, "loss": 0.0568, "step": 28990 }, { "epoch": 2.505507797313059, "grad_norm": 0.6726580638707064, "learning_rate": 6.954354278950443e-07, "loss": 0.0582, "step": 29000 }, { "epoch": 2.5063717655190287, "grad_norm": 0.7037083198887285, "learning_rate": 6.930645386399277e-07, "loss": 0.0614, "step": 29010 }, { "epoch": 2.507235733724999, "grad_norm": 0.7033536212322138, "learning_rate": 6.906973967318287e-07, "loss": 0.0608, "step": 29020 }, { "epoch": 2.508099701930969, "grad_norm": 0.6409072157382765, "learning_rate": 6.883340042303333e-07, "loss": 0.0574, "step": 29030 }, { "epoch": 2.508963670136939, "grad_norm": 0.6445925601552681, "learning_rate": 6.859743631917653e-07, "loss": 0.0583, "step": 29040 }, { "epoch": 2.509827638342909, "grad_norm": 0.6992038269097037, "learning_rate": 6.836184756691838e-07, "loss": 0.0597, "step": 29050 }, { "epoch": 2.510691606548879, "grad_norm": 0.6687242847138208, "learning_rate": 6.812663437123823e-07, "loss": 0.058, "step": 29060 }, { "epoch": 2.511555574754849, "grad_norm": 0.6608479078655375, "learning_rate": 6.789179693678855e-07, "loss": 0.059, "step": 29070 }, { "epoch": 2.512419542960819, "grad_norm": 0.6961967420294427, "learning_rate": 6.765733546789527e-07, "loss": 0.0596, "step": 29080 }, { "epoch": 2.5132835111667893, "grad_norm": 0.6380443357234777, "learning_rate": 6.742325016855655e-07, "loss": 0.0579, "step": 29090 }, { "epoch": 2.514147479372759, "grad_norm": 0.6437761706113116, "learning_rate": 6.718954124244386e-07, "loss": 0.0576, "step": 29100 }, { "epoch": 2.515011447578729, "grad_norm": 0.7021053853712458, "learning_rate": 6.695620889290095e-07, "loss": 0.058, "step": 29110 }, { "epoch": 2.515875415784699, "grad_norm": 0.6825429370809767, "learning_rate": 6.672325332294383e-07, "loss": 0.0571, "step": 29120 }, { "epoch": 2.516739383990669, "grad_norm": 0.679356433703693, "learning_rate": 6.649067473526083e-07, "loss": 0.0574, "step": 29130 }, { "epoch": 2.517603352196639, "grad_norm": 0.658900017381107, "learning_rate": 6.625847333221213e-07, "loss": 0.0604, "step": 29140 }, { "epoch": 2.518467320402609, "grad_norm": 0.6650829530549738, "learning_rate": 6.602664931583008e-07, "loss": 0.0577, "step": 29150 }, { "epoch": 2.519331288608579, "grad_norm": 0.7048816158879708, "learning_rate": 6.579520288781826e-07, "loss": 0.059, "step": 29160 }, { "epoch": 2.5201952568145494, "grad_norm": 0.6645875550709922, "learning_rate": 6.556413424955188e-07, "loss": 0.0594, "step": 29170 }, { "epoch": 2.5210592250205193, "grad_norm": 0.6709488583655743, "learning_rate": 6.533344360207744e-07, "loss": 0.0603, "step": 29180 }, { "epoch": 2.521923193226489, "grad_norm": 0.6671497082111582, "learning_rate": 6.510313114611272e-07, "loss": 0.0593, "step": 29190 }, { "epoch": 2.5227871614324595, "grad_norm": 0.6281321494275784, "learning_rate": 6.487319708204625e-07, "loss": 0.0571, "step": 29200 }, { "epoch": 2.5236511296384294, "grad_norm": 0.6805803298969804, "learning_rate": 6.464364160993736e-07, "loss": 0.0593, "step": 29210 }, { "epoch": 2.5245150978443993, "grad_norm": 0.6850029853982884, "learning_rate": 6.441446492951597e-07, "loss": 0.06, "step": 29220 }, { "epoch": 2.525379066050369, "grad_norm": 0.6418791460040815, "learning_rate": 6.418566724018232e-07, "loss": 0.0552, "step": 29230 }, { "epoch": 2.526243034256339, "grad_norm": 0.6482480515608605, "learning_rate": 6.39572487410075e-07, "loss": 0.058, "step": 29240 }, { "epoch": 2.5271070024623095, "grad_norm": 0.6396174623886076, "learning_rate": 6.372920963073165e-07, "loss": 0.0599, "step": 29250 }, { "epoch": 2.5279709706682794, "grad_norm": 0.6809497391776808, "learning_rate": 6.350155010776576e-07, "loss": 0.0608, "step": 29260 }, { "epoch": 2.5288349388742493, "grad_norm": 0.6779821207469485, "learning_rate": 6.32742703701899e-07, "loss": 0.0572, "step": 29270 }, { "epoch": 2.5296989070802196, "grad_norm": 0.6653538492392551, "learning_rate": 6.304737061575438e-07, "loss": 0.0583, "step": 29280 }, { "epoch": 2.5305628752861895, "grad_norm": 0.6416808234550777, "learning_rate": 6.282085104187796e-07, "loss": 0.0614, "step": 29290 }, { "epoch": 2.5314268434921594, "grad_norm": 0.6550871279640734, "learning_rate": 6.259471184564952e-07, "loss": 0.0574, "step": 29300 }, { "epoch": 2.5322908116981298, "grad_norm": 0.6707751927794339, "learning_rate": 6.236895322382653e-07, "loss": 0.0583, "step": 29310 }, { "epoch": 2.5331547799040997, "grad_norm": 0.6483868684196846, "learning_rate": 6.214357537283527e-07, "loss": 0.0568, "step": 29320 }, { "epoch": 2.5340187481100696, "grad_norm": 0.6548519923942192, "learning_rate": 6.191857848877097e-07, "loss": 0.0569, "step": 29330 }, { "epoch": 2.5348827163160395, "grad_norm": 0.684998353287605, "learning_rate": 6.16939627673972e-07, "loss": 0.0592, "step": 29340 }, { "epoch": 2.5357466845220094, "grad_norm": 0.6359323968029438, "learning_rate": 6.146972840414623e-07, "loss": 0.0598, "step": 29350 }, { "epoch": 2.5366106527279797, "grad_norm": 0.6175361169902326, "learning_rate": 6.124587559411782e-07, "loss": 0.0557, "step": 29360 }, { "epoch": 2.5374746209339496, "grad_norm": 0.6772844589228746, "learning_rate": 6.102240453208052e-07, "loss": 0.0605, "step": 29370 }, { "epoch": 2.5383385891399195, "grad_norm": 0.6512332430117873, "learning_rate": 6.07993154124702e-07, "loss": 0.0605, "step": 29380 }, { "epoch": 2.53920255734589, "grad_norm": 0.6989309110098257, "learning_rate": 6.057660842939095e-07, "loss": 0.0592, "step": 29390 }, { "epoch": 2.5400665255518597, "grad_norm": 0.6773524899173674, "learning_rate": 6.035428377661362e-07, "loss": 0.0589, "step": 29400 }, { "epoch": 2.5409304937578296, "grad_norm": 0.668998479726973, "learning_rate": 6.013234164757709e-07, "loss": 0.0591, "step": 29410 }, { "epoch": 2.5417944619638, "grad_norm": 0.6703732268508873, "learning_rate": 5.99107822353871e-07, "loss": 0.057, "step": 29420 }, { "epoch": 2.54265843016977, "grad_norm": 0.6635245821998534, "learning_rate": 5.968960573281645e-07, "loss": 0.0595, "step": 29430 }, { "epoch": 2.54352239837574, "grad_norm": 0.6525266456918762, "learning_rate": 5.946881233230473e-07, "loss": 0.0564, "step": 29440 }, { "epoch": 2.5443863665817097, "grad_norm": 0.687436696980093, "learning_rate": 5.924840222595818e-07, "loss": 0.0583, "step": 29450 }, { "epoch": 2.5452503347876796, "grad_norm": 0.6639549603653401, "learning_rate": 5.902837560554981e-07, "loss": 0.062, "step": 29460 }, { "epoch": 2.54611430299365, "grad_norm": 0.664926820131592, "learning_rate": 5.880873266251869e-07, "loss": 0.0595, "step": 29470 }, { "epoch": 2.54697827119962, "grad_norm": 0.6871249087091793, "learning_rate": 5.858947358797018e-07, "loss": 0.0596, "step": 29480 }, { "epoch": 2.5478422394055897, "grad_norm": 0.667792891284496, "learning_rate": 5.837059857267546e-07, "loss": 0.058, "step": 29490 }, { "epoch": 2.54870620761156, "grad_norm": 0.6350186657516332, "learning_rate": 5.815210780707192e-07, "loss": 0.0599, "step": 29500 }, { "epoch": 2.54957017581753, "grad_norm": 0.6961370444377433, "learning_rate": 5.793400148126233e-07, "loss": 0.0583, "step": 29510 }, { "epoch": 2.5504341440235, "grad_norm": 0.6484114304314101, "learning_rate": 5.77162797850151e-07, "loss": 0.0579, "step": 29520 }, { "epoch": 2.5512981122294702, "grad_norm": 0.6489574041467437, "learning_rate": 5.749894290776381e-07, "loss": 0.0581, "step": 29530 }, { "epoch": 2.55216208043544, "grad_norm": 0.6704512119761233, "learning_rate": 5.728199103860738e-07, "loss": 0.0582, "step": 29540 }, { "epoch": 2.55302604864141, "grad_norm": 0.6880307479956702, "learning_rate": 5.70654243663099e-07, "loss": 0.0568, "step": 29550 }, { "epoch": 2.55389001684738, "grad_norm": 0.6705087029929916, "learning_rate": 5.684924307929984e-07, "loss": 0.0571, "step": 29560 }, { "epoch": 2.55475398505335, "grad_norm": 0.6728745082079638, "learning_rate": 5.663344736567083e-07, "loss": 0.0558, "step": 29570 }, { "epoch": 2.55561795325932, "grad_norm": 0.6830199354155625, "learning_rate": 5.641803741318069e-07, "loss": 0.0598, "step": 29580 }, { "epoch": 2.55648192146529, "grad_norm": 0.6522985920938468, "learning_rate": 5.620301340925199e-07, "loss": 0.0551, "step": 29590 }, { "epoch": 2.55734588967126, "grad_norm": 0.6888286792099857, "learning_rate": 5.598837554097092e-07, "loss": 0.0569, "step": 29600 }, { "epoch": 2.5582098578772303, "grad_norm": 0.6438224709357185, "learning_rate": 5.577412399508831e-07, "loss": 0.0581, "step": 29610 }, { "epoch": 2.5590738260832, "grad_norm": 0.7114816339042405, "learning_rate": 5.556025895801847e-07, "loss": 0.0572, "step": 29620 }, { "epoch": 2.55993779428917, "grad_norm": 0.6307847616008638, "learning_rate": 5.534678061583953e-07, "loss": 0.0565, "step": 29630 }, { "epoch": 2.56080176249514, "grad_norm": 0.665655599351334, "learning_rate": 5.513368915429318e-07, "loss": 0.0574, "step": 29640 }, { "epoch": 2.56166573070111, "grad_norm": 0.6160826997111912, "learning_rate": 5.492098475878432e-07, "loss": 0.0559, "step": 29650 }, { "epoch": 2.5625296989070803, "grad_norm": 0.6430126390669085, "learning_rate": 5.470866761438165e-07, "loss": 0.0571, "step": 29660 }, { "epoch": 2.56339366711305, "grad_norm": 0.6612250849594098, "learning_rate": 5.449673790581611e-07, "loss": 0.0578, "step": 29670 }, { "epoch": 2.56425763531902, "grad_norm": 0.6560418863856894, "learning_rate": 5.428519581748215e-07, "loss": 0.0563, "step": 29680 }, { "epoch": 2.5651216035249904, "grad_norm": 0.6643504836285753, "learning_rate": 5.40740415334367e-07, "loss": 0.0564, "step": 29690 }, { "epoch": 2.5659855717309603, "grad_norm": 0.6771315503647855, "learning_rate": 5.386327523739954e-07, "loss": 0.0576, "step": 29700 }, { "epoch": 2.56684953993693, "grad_norm": 0.6363414935150137, "learning_rate": 5.365289711275235e-07, "loss": 0.0576, "step": 29710 }, { "epoch": 2.5677135081429006, "grad_norm": 0.6531883331299517, "learning_rate": 5.34429073425396e-07, "loss": 0.0591, "step": 29720 }, { "epoch": 2.5685774763488705, "grad_norm": 0.720546804487095, "learning_rate": 5.323330610946769e-07, "loss": 0.0568, "step": 29730 }, { "epoch": 2.5694414445548404, "grad_norm": 0.6329942697006272, "learning_rate": 5.302409359590483e-07, "loss": 0.0587, "step": 29740 }, { "epoch": 2.5703054127608103, "grad_norm": 0.6687880309557287, "learning_rate": 5.281526998388115e-07, "loss": 0.0602, "step": 29750 }, { "epoch": 2.57116938096678, "grad_norm": 0.6776722689320893, "learning_rate": 5.260683545508827e-07, "loss": 0.0619, "step": 29760 }, { "epoch": 2.5720333491727505, "grad_norm": 0.6712745107615856, "learning_rate": 5.239879019087957e-07, "loss": 0.0601, "step": 29770 }, { "epoch": 2.5728973173787204, "grad_norm": 0.6886401833521799, "learning_rate": 5.219113437226946e-07, "loss": 0.0591, "step": 29780 }, { "epoch": 2.5737612855846903, "grad_norm": 0.6579663523334925, "learning_rate": 5.198386817993367e-07, "loss": 0.0585, "step": 29790 }, { "epoch": 2.5746252537906607, "grad_norm": 0.6409753322445424, "learning_rate": 5.17769917942087e-07, "loss": 0.0573, "step": 29800 }, { "epoch": 2.5754892219966306, "grad_norm": 0.6611922702209809, "learning_rate": 5.157050539509228e-07, "loss": 0.059, "step": 29810 }, { "epoch": 2.5763531902026005, "grad_norm": 0.6499977380134889, "learning_rate": 5.136440916224245e-07, "loss": 0.0572, "step": 29820 }, { "epoch": 2.577217158408571, "grad_norm": 0.6872934986169129, "learning_rate": 5.11587032749779e-07, "loss": 0.056, "step": 29830 }, { "epoch": 2.5780811266145407, "grad_norm": 0.6732910099436433, "learning_rate": 5.095338791227783e-07, "loss": 0.0585, "step": 29840 }, { "epoch": 2.5789450948205106, "grad_norm": 0.6708757861303539, "learning_rate": 5.074846325278127e-07, "loss": 0.0587, "step": 29850 }, { "epoch": 2.5798090630264805, "grad_norm": 0.6783891979386204, "learning_rate": 5.054392947478798e-07, "loss": 0.056, "step": 29860 }, { "epoch": 2.5806730312324504, "grad_norm": 0.6931804578103389, "learning_rate": 5.033978675625679e-07, "loss": 0.0572, "step": 29870 }, { "epoch": 2.5815369994384207, "grad_norm": 0.6481825220806289, "learning_rate": 5.013603527480704e-07, "loss": 0.0581, "step": 29880 }, { "epoch": 2.5824009676443906, "grad_norm": 0.6817496187695342, "learning_rate": 4.993267520771705e-07, "loss": 0.0566, "step": 29890 }, { "epoch": 2.5832649358503605, "grad_norm": 0.6841775081681513, "learning_rate": 4.972970673192529e-07, "loss": 0.0588, "step": 29900 }, { "epoch": 2.584128904056331, "grad_norm": 0.6475734993713166, "learning_rate": 4.952713002402859e-07, "loss": 0.0588, "step": 29910 }, { "epoch": 2.584992872262301, "grad_norm": 0.6662802149829226, "learning_rate": 4.93249452602837e-07, "loss": 0.0579, "step": 29920 }, { "epoch": 2.5858568404682707, "grad_norm": 0.6365663998049235, "learning_rate": 4.912315261660611e-07, "loss": 0.058, "step": 29930 }, { "epoch": 2.586720808674241, "grad_norm": 0.6486599808954889, "learning_rate": 4.892175226856994e-07, "loss": 0.0584, "step": 29940 }, { "epoch": 2.587584776880211, "grad_norm": 0.6658531724786169, "learning_rate": 4.872074439140817e-07, "loss": 0.0576, "step": 29950 }, { "epoch": 2.588448745086181, "grad_norm": 0.7116085843786459, "learning_rate": 4.852012916001225e-07, "loss": 0.0594, "step": 29960 }, { "epoch": 2.5893127132921507, "grad_norm": 0.6671752461840659, "learning_rate": 4.831990674893222e-07, "loss": 0.0573, "step": 29970 }, { "epoch": 2.5901766814981206, "grad_norm": 0.6356609240356549, "learning_rate": 4.812007733237583e-07, "loss": 0.0582, "step": 29980 }, { "epoch": 2.591040649704091, "grad_norm": 0.6244169993393772, "learning_rate": 4.792064108420941e-07, "loss": 0.0557, "step": 29990 }, { "epoch": 2.591904617910061, "grad_norm": 0.6540666502258284, "learning_rate": 4.772159817795685e-07, "loss": 0.0582, "step": 30000 }, { "epoch": 2.592768586116031, "grad_norm": 0.6358928816445137, "learning_rate": 4.752294878680025e-07, "loss": 0.0576, "step": 30010 }, { "epoch": 2.593632554322001, "grad_norm": 0.6572956423353731, "learning_rate": 4.7324693083578563e-07, "loss": 0.0584, "step": 30020 }, { "epoch": 2.594496522527971, "grad_norm": 0.6604128875290302, "learning_rate": 4.7126831240789097e-07, "loss": 0.059, "step": 30030 }, { "epoch": 2.595360490733941, "grad_norm": 0.656347890741661, "learning_rate": 4.692936343058579e-07, "loss": 0.0584, "step": 30040 }, { "epoch": 2.5962244589399113, "grad_norm": 0.6991884383243728, "learning_rate": 4.673228982478012e-07, "loss": 0.0579, "step": 30050 }, { "epoch": 2.597088427145881, "grad_norm": 0.6979127438966883, "learning_rate": 4.653561059484035e-07, "loss": 0.0582, "step": 30060 }, { "epoch": 2.597952395351851, "grad_norm": 0.674144107741082, "learning_rate": 4.633932591189172e-07, "loss": 0.0581, "step": 30070 }, { "epoch": 2.598816363557821, "grad_norm": 0.6871709263612947, "learning_rate": 4.6143435946716276e-07, "loss": 0.0614, "step": 30080 }, { "epoch": 2.599680331763791, "grad_norm": 0.6587668438432055, "learning_rate": 4.594794086975252e-07, "loss": 0.0581, "step": 30090 }, { "epoch": 2.600544299969761, "grad_norm": 0.6601269223981766, "learning_rate": 4.575284085109527e-07, "loss": 0.0566, "step": 30100 }, { "epoch": 2.601408268175731, "grad_norm": 0.6767525995745622, "learning_rate": 4.555813606049575e-07, "loss": 0.0554, "step": 30110 }, { "epoch": 2.602272236381701, "grad_norm": 0.6629565179971297, "learning_rate": 4.5363826667361443e-07, "loss": 0.0564, "step": 30120 }, { "epoch": 2.6031362045876714, "grad_norm": 0.7046146642092987, "learning_rate": 4.5169912840755505e-07, "loss": 0.0588, "step": 30130 }, { "epoch": 2.6040001727936413, "grad_norm": 0.6772719079025742, "learning_rate": 4.4976394749397076e-07, "loss": 0.0565, "step": 30140 }, { "epoch": 2.604864140999611, "grad_norm": 0.6742492402219806, "learning_rate": 4.478327256166104e-07, "loss": 0.0551, "step": 30150 }, { "epoch": 2.6057281092055815, "grad_norm": 0.670700818891851, "learning_rate": 4.459054644557759e-07, "loss": 0.0572, "step": 30160 }, { "epoch": 2.6065920774115514, "grad_norm": 0.6811115544062041, "learning_rate": 4.43982165688327e-07, "loss": 0.0583, "step": 30170 }, { "epoch": 2.6074560456175213, "grad_norm": 0.6771983529090304, "learning_rate": 4.4206283098767067e-07, "loss": 0.0569, "step": 30180 }, { "epoch": 2.608320013823491, "grad_norm": 0.7017740502312722, "learning_rate": 4.4014746202377e-07, "loss": 0.0585, "step": 30190 }, { "epoch": 2.609183982029461, "grad_norm": 0.6755203458518678, "learning_rate": 4.3823606046313415e-07, "loss": 0.0567, "step": 30200 }, { "epoch": 2.6100479502354315, "grad_norm": 0.6772379176657675, "learning_rate": 4.363286279688217e-07, "loss": 0.0582, "step": 30210 }, { "epoch": 2.6109119184414014, "grad_norm": 0.6695431410155791, "learning_rate": 4.3442516620043674e-07, "loss": 0.0597, "step": 30220 }, { "epoch": 2.6117758866473713, "grad_norm": 0.6567571285238, "learning_rate": 4.325256768141312e-07, "loss": 0.0574, "step": 30230 }, { "epoch": 2.6126398548533416, "grad_norm": 0.6599745824932721, "learning_rate": 4.306301614625979e-07, "loss": 0.057, "step": 30240 }, { "epoch": 2.6135038230593115, "grad_norm": 0.6378082167289888, "learning_rate": 4.287386217950734e-07, "loss": 0.0569, "step": 30250 }, { "epoch": 2.6143677912652814, "grad_norm": 0.6809435093701421, "learning_rate": 4.268510594573344e-07, "loss": 0.0572, "step": 30260 }, { "epoch": 2.6152317594712513, "grad_norm": 0.6741354796855408, "learning_rate": 4.249674760916961e-07, "loss": 0.057, "step": 30270 }, { "epoch": 2.616095727677221, "grad_norm": 0.6663299971055163, "learning_rate": 4.2308787333701697e-07, "loss": 0.0573, "step": 30280 }, { "epoch": 2.6169596958831916, "grad_norm": 0.6923628741436045, "learning_rate": 4.2121225282868273e-07, "loss": 0.0575, "step": 30290 }, { "epoch": 2.6178236640891615, "grad_norm": 0.6422042356059123, "learning_rate": 4.193406161986241e-07, "loss": 0.0567, "step": 30300 }, { "epoch": 2.6186876322951314, "grad_norm": 0.6971541286553281, "learning_rate": 4.174729650752979e-07, "loss": 0.059, "step": 30310 }, { "epoch": 2.6195516005011017, "grad_norm": 0.668983792614538, "learning_rate": 4.1560930108369925e-07, "loss": 0.0575, "step": 30320 }, { "epoch": 2.6204155687070716, "grad_norm": 0.6783496859385131, "learning_rate": 4.1374962584534886e-07, "loss": 0.0582, "step": 30330 }, { "epoch": 2.6212795369130415, "grad_norm": 0.6566315043249467, "learning_rate": 4.1189394097830073e-07, "loss": 0.0587, "step": 30340 }, { "epoch": 2.622143505119012, "grad_norm": 0.6665224926516164, "learning_rate": 4.1004224809713497e-07, "loss": 0.0569, "step": 30350 }, { "epoch": 2.6230074733249817, "grad_norm": 0.6610369842339688, "learning_rate": 4.081945488129602e-07, "loss": 0.0566, "step": 30360 }, { "epoch": 2.6238714415309516, "grad_norm": 0.6659586041314874, "learning_rate": 4.06350844733408e-07, "loss": 0.0584, "step": 30370 }, { "epoch": 2.6247354097369215, "grad_norm": 0.6998362957468676, "learning_rate": 4.0451113746263426e-07, "loss": 0.0572, "step": 30380 }, { "epoch": 2.6255993779428914, "grad_norm": 0.6629821943664789, "learning_rate": 4.0267542860132017e-07, "loss": 0.0604, "step": 30390 }, { "epoch": 2.626463346148862, "grad_norm": 0.6851188724926308, "learning_rate": 4.008437197466647e-07, "loss": 0.0576, "step": 30400 }, { "epoch": 2.6273273143548317, "grad_norm": 0.6358552887813863, "learning_rate": 3.990160124923875e-07, "loss": 0.0591, "step": 30410 }, { "epoch": 2.6281912825608016, "grad_norm": 0.6822769123702047, "learning_rate": 3.9719230842872714e-07, "loss": 0.058, "step": 30420 }, { "epoch": 2.629055250766772, "grad_norm": 0.6760968470919131, "learning_rate": 3.9537260914243924e-07, "loss": 0.058, "step": 30430 }, { "epoch": 2.629919218972742, "grad_norm": 0.679594017303257, "learning_rate": 3.9355691621679403e-07, "loss": 0.059, "step": 30440 }, { "epoch": 2.6307831871787117, "grad_norm": 0.6778234438718367, "learning_rate": 3.9174523123157617e-07, "loss": 0.0579, "step": 30450 }, { "epoch": 2.631647155384682, "grad_norm": 0.7006233738319552, "learning_rate": 3.8993755576308413e-07, "loss": 0.0608, "step": 30460 }, { "epoch": 2.632511123590652, "grad_norm": 0.6485879407092817, "learning_rate": 3.8813389138412595e-07, "loss": 0.0551, "step": 30470 }, { "epoch": 2.633375091796622, "grad_norm": 0.688314346867616, "learning_rate": 3.863342396640213e-07, "loss": 0.0578, "step": 30480 }, { "epoch": 2.634239060002592, "grad_norm": 0.6914084623518867, "learning_rate": 3.845386021685971e-07, "loss": 0.0587, "step": 30490 }, { "epoch": 2.6351030282085617, "grad_norm": 0.686323558943809, "learning_rate": 3.827469804601908e-07, "loss": 0.0569, "step": 30500 }, { "epoch": 2.635966996414532, "grad_norm": 0.6703731117375387, "learning_rate": 3.8095937609764157e-07, "loss": 0.0581, "step": 30510 }, { "epoch": 2.636830964620502, "grad_norm": 0.6690328063750695, "learning_rate": 3.791757906362958e-07, "loss": 0.0572, "step": 30520 }, { "epoch": 2.637694932826472, "grad_norm": 0.6455426316238811, "learning_rate": 3.7739622562800224e-07, "loss": 0.0565, "step": 30530 }, { "epoch": 2.638558901032442, "grad_norm": 0.6888587274112533, "learning_rate": 3.7562068262111286e-07, "loss": 0.0581, "step": 30540 }, { "epoch": 2.639422869238412, "grad_norm": 0.6825385826605149, "learning_rate": 3.738491631604779e-07, "loss": 0.0598, "step": 30550 }, { "epoch": 2.640286837444382, "grad_norm": 0.690570583937555, "learning_rate": 3.72081668787449e-07, "loss": 0.0562, "step": 30560 }, { "epoch": 2.6411508056503523, "grad_norm": 0.6766329158244576, "learning_rate": 3.703182010398748e-07, "loss": 0.058, "step": 30570 }, { "epoch": 2.642014773856322, "grad_norm": 0.67715272342871, "learning_rate": 3.6855876145209914e-07, "loss": 0.0575, "step": 30580 }, { "epoch": 2.642878742062292, "grad_norm": 0.6789455137223994, "learning_rate": 3.668033515549646e-07, "loss": 0.0574, "step": 30590 }, { "epoch": 2.643742710268262, "grad_norm": 0.6383316891492063, "learning_rate": 3.6505197287580285e-07, "loss": 0.0582, "step": 30600 }, { "epoch": 2.644606678474232, "grad_norm": 0.6629664084374244, "learning_rate": 3.633046269384427e-07, "loss": 0.0585, "step": 30610 }, { "epoch": 2.6454706466802023, "grad_norm": 0.6913305154208267, "learning_rate": 3.615613152632008e-07, "loss": 0.0585, "step": 30620 }, { "epoch": 2.646334614886172, "grad_norm": 0.6718079255061178, "learning_rate": 3.5982203936688776e-07, "loss": 0.0572, "step": 30630 }, { "epoch": 2.647198583092142, "grad_norm": 0.6686806554991838, "learning_rate": 3.5808680076279577e-07, "loss": 0.0553, "step": 30640 }, { "epoch": 2.6480625512981124, "grad_norm": 0.6736141930150831, "learning_rate": 3.563556009607122e-07, "loss": 0.0593, "step": 30650 }, { "epoch": 2.6489265195040823, "grad_norm": 0.6240399939775275, "learning_rate": 3.546284414669055e-07, "loss": 0.0564, "step": 30660 }, { "epoch": 2.649790487710052, "grad_norm": 0.6560981438220517, "learning_rate": 3.5290532378412956e-07, "loss": 0.0565, "step": 30670 }, { "epoch": 2.6506544559160226, "grad_norm": 0.6680071817104607, "learning_rate": 3.5118624941162303e-07, "loss": 0.0566, "step": 30680 }, { "epoch": 2.6515184241219925, "grad_norm": 0.6625073324317056, "learning_rate": 3.4947121984510393e-07, "loss": 0.0581, "step": 30690 }, { "epoch": 2.6523823923279624, "grad_norm": 0.6807614072790078, "learning_rate": 3.477602365767746e-07, "loss": 0.0559, "step": 30700 }, { "epoch": 2.6532463605339323, "grad_norm": 0.6908495010112696, "learning_rate": 3.460533010953138e-07, "loss": 0.0585, "step": 30710 }, { "epoch": 2.654110328739902, "grad_norm": 0.6892337877997854, "learning_rate": 3.4435041488588063e-07, "loss": 0.0581, "step": 30720 }, { "epoch": 2.6549742969458725, "grad_norm": 0.6433255896995709, "learning_rate": 3.4265157943010774e-07, "loss": 0.0557, "step": 30730 }, { "epoch": 2.6558382651518424, "grad_norm": 0.6486141164989712, "learning_rate": 3.4095679620610834e-07, "loss": 0.0585, "step": 30740 }, { "epoch": 2.6567022333578123, "grad_norm": 0.6826727156921892, "learning_rate": 3.3926606668846395e-07, "loss": 0.058, "step": 30750 }, { "epoch": 2.6575662015637826, "grad_norm": 0.658775821207378, "learning_rate": 3.375793923482351e-07, "loss": 0.0569, "step": 30760 }, { "epoch": 2.6584301697697525, "grad_norm": 0.7123926912059468, "learning_rate": 3.3589677465294957e-07, "loss": 0.0544, "step": 30770 }, { "epoch": 2.6592941379757224, "grad_norm": 0.693975415540836, "learning_rate": 3.3421821506660736e-07, "loss": 0.0586, "step": 30780 }, { "epoch": 2.6601581061816924, "grad_norm": 0.66409621627837, "learning_rate": 3.3254371504967744e-07, "loss": 0.057, "step": 30790 }, { "epoch": 2.6610220743876627, "grad_norm": 0.6408974196362415, "learning_rate": 3.3087327605909603e-07, "loss": 0.058, "step": 30800 }, { "epoch": 2.6618860425936326, "grad_norm": 0.6548935799283724, "learning_rate": 3.292068995482672e-07, "loss": 0.0583, "step": 30810 }, { "epoch": 2.6627500107996025, "grad_norm": 0.6114378558059209, "learning_rate": 3.2754458696705957e-07, "loss": 0.0583, "step": 30820 }, { "epoch": 2.6636139790055724, "grad_norm": 0.6532677500781641, "learning_rate": 3.2588633976180616e-07, "loss": 0.0565, "step": 30830 }, { "epoch": 2.6644779472115427, "grad_norm": 0.6621994992284654, "learning_rate": 3.242321593753017e-07, "loss": 0.0578, "step": 30840 }, { "epoch": 2.6653419154175126, "grad_norm": 0.6659908441376852, "learning_rate": 3.225820472468044e-07, "loss": 0.0548, "step": 30850 }, { "epoch": 2.6662058836234825, "grad_norm": 0.6884289390924854, "learning_rate": 3.2093600481203135e-07, "loss": 0.0566, "step": 30860 }, { "epoch": 2.667069851829453, "grad_norm": 0.662687602293797, "learning_rate": 3.1929403350315913e-07, "loss": 0.0578, "step": 30870 }, { "epoch": 2.667933820035423, "grad_norm": 0.6549345644376964, "learning_rate": 3.176561347488227e-07, "loss": 0.0571, "step": 30880 }, { "epoch": 2.6687977882413927, "grad_norm": 0.6848731424249099, "learning_rate": 3.1602230997411166e-07, "loss": 0.0591, "step": 30890 }, { "epoch": 2.6696617564473626, "grad_norm": 0.6238420529746252, "learning_rate": 3.1439256060057486e-07, "loss": 0.0594, "step": 30900 }, { "epoch": 2.6705257246533325, "grad_norm": 0.6593033839201444, "learning_rate": 3.1276688804620983e-07, "loss": 0.058, "step": 30910 }, { "epoch": 2.671389692859303, "grad_norm": 0.6324222514200223, "learning_rate": 3.111452937254722e-07, "loss": 0.0555, "step": 30920 }, { "epoch": 2.6722536610652727, "grad_norm": 0.6712429491334161, "learning_rate": 3.095277790492646e-07, "loss": 0.059, "step": 30930 }, { "epoch": 2.6731176292712426, "grad_norm": 0.6485249395528295, "learning_rate": 3.0791434542494615e-07, "loss": 0.057, "step": 30940 }, { "epoch": 2.673981597477213, "grad_norm": 0.6944594330088194, "learning_rate": 3.063049942563173e-07, "loss": 0.0562, "step": 30950 }, { "epoch": 2.674845565683183, "grad_norm": 0.6280925503430318, "learning_rate": 3.04699726943633e-07, "loss": 0.056, "step": 30960 }, { "epoch": 2.675709533889153, "grad_norm": 0.7066752206732005, "learning_rate": 3.0309854488359213e-07, "loss": 0.0578, "step": 30970 }, { "epoch": 2.676573502095123, "grad_norm": 0.6903537649768017, "learning_rate": 3.015014494693391e-07, "loss": 0.0554, "step": 30980 }, { "epoch": 2.677437470301093, "grad_norm": 0.6860432662236696, "learning_rate": 2.9990844209046355e-07, "loss": 0.057, "step": 30990 }, { "epoch": 2.678301438507063, "grad_norm": 0.6777716875811373, "learning_rate": 2.983195241329967e-07, "loss": 0.0569, "step": 31000 }, { "epoch": 2.679165406713033, "grad_norm": 0.6624799407879325, "learning_rate": 2.967346969794138e-07, "loss": 0.0558, "step": 31010 }, { "epoch": 2.6800293749190027, "grad_norm": 0.6752161383911821, "learning_rate": 2.9515396200862964e-07, "loss": 0.0567, "step": 31020 }, { "epoch": 2.680893343124973, "grad_norm": 0.6767628777851237, "learning_rate": 2.935773205959985e-07, "loss": 0.0583, "step": 31030 }, { "epoch": 2.681757311330943, "grad_norm": 0.6467792303059987, "learning_rate": 2.9200477411331197e-07, "loss": 0.0576, "step": 31040 }, { "epoch": 2.682621279536913, "grad_norm": 0.6338846148774004, "learning_rate": 2.904363239288022e-07, "loss": 0.0557, "step": 31050 }, { "epoch": 2.683485247742883, "grad_norm": 0.6403686317193569, "learning_rate": 2.8887197140713144e-07, "loss": 0.0581, "step": 31060 }, { "epoch": 2.684349215948853, "grad_norm": 0.6463691594607338, "learning_rate": 2.873117179094037e-07, "loss": 0.0608, "step": 31070 }, { "epoch": 2.685213184154823, "grad_norm": 0.6908061227463191, "learning_rate": 2.857555647931509e-07, "loss": 0.0581, "step": 31080 }, { "epoch": 2.6860771523607934, "grad_norm": 0.6817739973491456, "learning_rate": 2.8420351341234e-07, "loss": 0.0579, "step": 31090 }, { "epoch": 2.6869411205667633, "grad_norm": 0.7034592398074475, "learning_rate": 2.8265556511736846e-07, "loss": 0.0578, "step": 31100 }, { "epoch": 2.687805088772733, "grad_norm": 0.7360862346410634, "learning_rate": 2.811117212550629e-07, "loss": 0.0572, "step": 31110 }, { "epoch": 2.688669056978703, "grad_norm": 0.6841861146827901, "learning_rate": 2.7957198316868164e-07, "loss": 0.0592, "step": 31120 }, { "epoch": 2.689533025184673, "grad_norm": 0.6698510034953965, "learning_rate": 2.7803635219790736e-07, "loss": 0.0585, "step": 31130 }, { "epoch": 2.6903969933906433, "grad_norm": 0.6243962845040457, "learning_rate": 2.765048296788514e-07, "loss": 0.0563, "step": 31140 }, { "epoch": 2.691260961596613, "grad_norm": 0.6725898416366625, "learning_rate": 2.749774169440489e-07, "loss": 0.0578, "step": 31150 }, { "epoch": 2.692124929802583, "grad_norm": 0.6513848193495176, "learning_rate": 2.7345411532246126e-07, "loss": 0.0585, "step": 31160 }, { "epoch": 2.6929888980085535, "grad_norm": 0.6539524109515839, "learning_rate": 2.719349261394705e-07, "loss": 0.0596, "step": 31170 }, { "epoch": 2.6938528662145234, "grad_norm": 0.6501080679911405, "learning_rate": 2.7041985071688257e-07, "loss": 0.0562, "step": 31180 }, { "epoch": 2.6947168344204933, "grad_norm": 0.680056582828761, "learning_rate": 2.6890889037292255e-07, "loss": 0.0589, "step": 31190 }, { "epoch": 2.6955808026264636, "grad_norm": 0.6577180730454457, "learning_rate": 2.674020464222349e-07, "loss": 0.0573, "step": 31200 }, { "epoch": 2.6964447708324335, "grad_norm": 0.6778652955187711, "learning_rate": 2.658993201758864e-07, "loss": 0.0557, "step": 31210 }, { "epoch": 2.6973087390384034, "grad_norm": 0.6401257423410647, "learning_rate": 2.6440071294135504e-07, "loss": 0.0572, "step": 31220 }, { "epoch": 2.6981727072443733, "grad_norm": 0.6664094217724789, "learning_rate": 2.629062260225396e-07, "loss": 0.0564, "step": 31230 }, { "epoch": 2.699036675450343, "grad_norm": 0.7071794832969107, "learning_rate": 2.6141586071975146e-07, "loss": 0.0577, "step": 31240 }, { "epoch": 2.6999006436563135, "grad_norm": 0.6731307049372357, "learning_rate": 2.5992961832971897e-07, "loss": 0.0587, "step": 31250 }, { "epoch": 2.7007646118622834, "grad_norm": 0.7126246677195216, "learning_rate": 2.584475001455783e-07, "loss": 0.0567, "step": 31260 }, { "epoch": 2.7016285800682533, "grad_norm": 0.6443499323094868, "learning_rate": 2.5696950745688175e-07, "loss": 0.0607, "step": 31270 }, { "epoch": 2.7024925482742237, "grad_norm": 0.6613790530887171, "learning_rate": 2.554956415495902e-07, "loss": 0.056, "step": 31280 }, { "epoch": 2.7033565164801936, "grad_norm": 0.6526390569684382, "learning_rate": 2.540259037060738e-07, "loss": 0.0558, "step": 31290 }, { "epoch": 2.7042204846861635, "grad_norm": 0.6491466055505781, "learning_rate": 2.525602952051115e-07, "loss": 0.0565, "step": 31300 }, { "epoch": 2.705084452892134, "grad_norm": 0.6477798234747194, "learning_rate": 2.510988173218881e-07, "loss": 0.0566, "step": 31310 }, { "epoch": 2.7059484210981037, "grad_norm": 0.678937957222222, "learning_rate": 2.496414713279988e-07, "loss": 0.0594, "step": 31320 }, { "epoch": 2.7068123893040736, "grad_norm": 0.6481312566936305, "learning_rate": 2.481882584914369e-07, "loss": 0.0559, "step": 31330 }, { "epoch": 2.7076763575100435, "grad_norm": 0.6400669772772863, "learning_rate": 2.467391800766056e-07, "loss": 0.055, "step": 31340 }, { "epoch": 2.7085403257160134, "grad_norm": 0.6899428068381959, "learning_rate": 2.452942373443068e-07, "loss": 0.0598, "step": 31350 }, { "epoch": 2.709404293921984, "grad_norm": 0.7131334900456067, "learning_rate": 2.438534315517482e-07, "loss": 0.0579, "step": 31360 }, { "epoch": 2.7102682621279537, "grad_norm": 0.6774543991643118, "learning_rate": 2.424167639525327e-07, "loss": 0.0574, "step": 31370 }, { "epoch": 2.7111322303339236, "grad_norm": 0.6601832620044917, "learning_rate": 2.40984235796668e-07, "loss": 0.0566, "step": 31380 }, { "epoch": 2.711996198539894, "grad_norm": 0.6510071598504059, "learning_rate": 2.3955584833055655e-07, "loss": 0.0571, "step": 31390 }, { "epoch": 2.712860166745864, "grad_norm": 0.686821661165841, "learning_rate": 2.381316027969993e-07, "loss": 0.0583, "step": 31400 }, { "epoch": 2.7137241349518337, "grad_norm": 0.6850970538518514, "learning_rate": 2.3671150043519388e-07, "loss": 0.0573, "step": 31410 }, { "epoch": 2.7145881031578036, "grad_norm": 0.6976938553504183, "learning_rate": 2.3529554248073127e-07, "loss": 0.0571, "step": 31420 }, { "epoch": 2.715452071363774, "grad_norm": 0.6707899675092523, "learning_rate": 2.3388373016559995e-07, "loss": 0.054, "step": 31430 }, { "epoch": 2.716316039569744, "grad_norm": 0.6525033797859318, "learning_rate": 2.324760647181784e-07, "loss": 0.0563, "step": 31440 }, { "epoch": 2.717180007775714, "grad_norm": 0.6712410318989885, "learning_rate": 2.3107254736323747e-07, "loss": 0.0584, "step": 31450 }, { "epoch": 2.7180439759816837, "grad_norm": 0.6489822837447691, "learning_rate": 2.2967317932193868e-07, "loss": 0.0568, "step": 31460 }, { "epoch": 2.718907944187654, "grad_norm": 0.6943226651728736, "learning_rate": 2.282779618118358e-07, "loss": 0.0566, "step": 31470 }, { "epoch": 2.719771912393624, "grad_norm": 0.7089036129137106, "learning_rate": 2.2688689604686899e-07, "loss": 0.0576, "step": 31480 }, { "epoch": 2.720635880599594, "grad_norm": 0.6762859711461554, "learning_rate": 2.254999832373661e-07, "loss": 0.0571, "step": 31490 }, { "epoch": 2.721499848805564, "grad_norm": 0.6544346615382407, "learning_rate": 2.2411722459004192e-07, "loss": 0.0566, "step": 31500 }, { "epoch": 2.722363817011534, "grad_norm": 0.668302454393643, "learning_rate": 2.2273862130799684e-07, "loss": 0.0569, "step": 31510 }, { "epoch": 2.723227785217504, "grad_norm": 0.6942525287151675, "learning_rate": 2.2136417459071802e-07, "loss": 0.0566, "step": 31520 }, { "epoch": 2.724091753423474, "grad_norm": 0.6509997629051298, "learning_rate": 2.199938856340711e-07, "loss": 0.0574, "step": 31530 }, { "epoch": 2.7249557216294438, "grad_norm": 0.6723635409838065, "learning_rate": 2.1862775563030903e-07, "loss": 0.0596, "step": 31540 }, { "epoch": 2.725819689835414, "grad_norm": 0.670764933007711, "learning_rate": 2.1726578576806324e-07, "loss": 0.0551, "step": 31550 }, { "epoch": 2.726683658041384, "grad_norm": 0.6629743678908288, "learning_rate": 2.1590797723234802e-07, "loss": 0.0553, "step": 31560 }, { "epoch": 2.727547626247354, "grad_norm": 0.6327215531773591, "learning_rate": 2.145543312045534e-07, "loss": 0.0566, "step": 31570 }, { "epoch": 2.7284115944533243, "grad_norm": 0.6781526282634827, "learning_rate": 2.1320484886245163e-07, "loss": 0.0585, "step": 31580 }, { "epoch": 2.729275562659294, "grad_norm": 0.6622262488922581, "learning_rate": 2.1185953138019022e-07, "loss": 0.0541, "step": 31590 }, { "epoch": 2.730139530865264, "grad_norm": 0.667475826799447, "learning_rate": 2.1051837992829226e-07, "loss": 0.0548, "step": 31600 }, { "epoch": 2.7310034990712344, "grad_norm": 0.657088121258104, "learning_rate": 2.091813956736577e-07, "loss": 0.0568, "step": 31610 }, { "epoch": 2.7318674672772043, "grad_norm": 0.6498889038053293, "learning_rate": 2.0784857977955931e-07, "loss": 0.057, "step": 31620 }, { "epoch": 2.732731435483174, "grad_norm": 0.6733409297948825, "learning_rate": 2.0651993340564614e-07, "loss": 0.0584, "step": 31630 }, { "epoch": 2.733595403689144, "grad_norm": 0.6852886398578109, "learning_rate": 2.0519545770793348e-07, "loss": 0.0574, "step": 31640 }, { "epoch": 2.734459371895114, "grad_norm": 0.6792628939751382, "learning_rate": 2.0387515383881452e-07, "loss": 0.0572, "step": 31650 }, { "epoch": 2.7353233401010844, "grad_norm": 0.6599160690463196, "learning_rate": 2.025590229470481e-07, "loss": 0.0566, "step": 31660 }, { "epoch": 2.7361873083070543, "grad_norm": 0.629698984875205, "learning_rate": 2.0124706617776546e-07, "loss": 0.0568, "step": 31670 }, { "epoch": 2.737051276513024, "grad_norm": 0.6599918753484036, "learning_rate": 1.999392846724618e-07, "loss": 0.0568, "step": 31680 }, { "epoch": 2.7379152447189945, "grad_norm": 0.6543758042897919, "learning_rate": 1.986356795690042e-07, "loss": 0.0556, "step": 31690 }, { "epoch": 2.7387792129249644, "grad_norm": 0.6784143602920379, "learning_rate": 1.973362520016231e-07, "loss": 0.0582, "step": 31700 }, { "epoch": 2.7396431811309343, "grad_norm": 0.6903678628469099, "learning_rate": 1.9604100310091523e-07, "loss": 0.0581, "step": 31710 }, { "epoch": 2.7405071493369046, "grad_norm": 0.6463342523156203, "learning_rate": 1.947499339938408e-07, "loss": 0.0567, "step": 31720 }, { "epoch": 2.7413711175428745, "grad_norm": 0.7025200315096658, "learning_rate": 1.93463045803724e-07, "loss": 0.0577, "step": 31730 }, { "epoch": 2.7422350857488444, "grad_norm": 0.6884195548334803, "learning_rate": 1.9218033965025195e-07, "loss": 0.0578, "step": 31740 }, { "epoch": 2.7430990539548143, "grad_norm": 0.6444704425358405, "learning_rate": 1.9090181664947137e-07, "loss": 0.0575, "step": 31750 }, { "epoch": 2.7439630221607842, "grad_norm": 0.6725158461214576, "learning_rate": 1.8962747791379023e-07, "loss": 0.0572, "step": 31760 }, { "epoch": 2.7448269903667546, "grad_norm": 0.6760315296837612, "learning_rate": 1.8835732455197596e-07, "loss": 0.0571, "step": 31770 }, { "epoch": 2.7456909585727245, "grad_norm": 0.6515448653414146, "learning_rate": 1.870913576691552e-07, "loss": 0.0574, "step": 31780 }, { "epoch": 2.7465549267786944, "grad_norm": 0.702263272691988, "learning_rate": 1.8582957836681015e-07, "loss": 0.0576, "step": 31790 }, { "epoch": 2.7474188949846647, "grad_norm": 0.674403943690754, "learning_rate": 1.845719877427815e-07, "loss": 0.0567, "step": 31800 }, { "epoch": 2.7482828631906346, "grad_norm": 0.6919511526668807, "learning_rate": 1.8331858689126348e-07, "loss": 0.0577, "step": 31810 }, { "epoch": 2.7491468313966045, "grad_norm": 0.669158032499984, "learning_rate": 1.820693769028059e-07, "loss": 0.057, "step": 31820 }, { "epoch": 2.750010799602575, "grad_norm": 0.6565133834585644, "learning_rate": 1.8082435886431383e-07, "loss": 0.0549, "step": 31830 }, { "epoch": 2.750874767808545, "grad_norm": 0.6700558974120085, "learning_rate": 1.7958353385904126e-07, "loss": 0.0581, "step": 31840 }, { "epoch": 2.7517387360145147, "grad_norm": 0.6799608539206533, "learning_rate": 1.7834690296659852e-07, "loss": 0.0588, "step": 31850 }, { "epoch": 2.7526027042204846, "grad_norm": 0.6342124927694704, "learning_rate": 1.7711446726294267e-07, "loss": 0.0586, "step": 31860 }, { "epoch": 2.7534666724264545, "grad_norm": 0.6311819082310265, "learning_rate": 1.7588622782038378e-07, "loss": 0.0562, "step": 31870 }, { "epoch": 2.754330640632425, "grad_norm": 0.674920565799908, "learning_rate": 1.7466218570757754e-07, "loss": 0.0595, "step": 31880 }, { "epoch": 2.7551946088383947, "grad_norm": 0.6635343214254527, "learning_rate": 1.7344234198953146e-07, "loss": 0.0574, "step": 31890 }, { "epoch": 2.7560585770443646, "grad_norm": 0.6525627692019093, "learning_rate": 1.7222669772759716e-07, "loss": 0.0547, "step": 31900 }, { "epoch": 2.756922545250335, "grad_norm": 0.6896387409372627, "learning_rate": 1.7101525397947406e-07, "loss": 0.0579, "step": 31910 }, { "epoch": 2.757786513456305, "grad_norm": 0.6720002525440155, "learning_rate": 1.6980801179920681e-07, "loss": 0.0573, "step": 31920 }, { "epoch": 2.758650481662275, "grad_norm": 0.6655959204666657, "learning_rate": 1.6860497223718188e-07, "loss": 0.056, "step": 31930 }, { "epoch": 2.759514449868245, "grad_norm": 0.6723784296940033, "learning_rate": 1.6740613634013413e-07, "loss": 0.0564, "step": 31940 }, { "epoch": 2.760378418074215, "grad_norm": 0.6816159314364688, "learning_rate": 1.6621150515113538e-07, "loss": 0.0548, "step": 31950 }, { "epoch": 2.761242386280185, "grad_norm": 0.6597189697816395, "learning_rate": 1.6502107970960246e-07, "loss": 0.0581, "step": 31960 }, { "epoch": 2.762106354486155, "grad_norm": 0.7127449095465125, "learning_rate": 1.6383486105129243e-07, "loss": 0.0564, "step": 31970 }, { "epoch": 2.7629703226921247, "grad_norm": 0.6623458323343064, "learning_rate": 1.6265285020830245e-07, "loss": 0.0588, "step": 31980 }, { "epoch": 2.763834290898095, "grad_norm": 0.6524127223074473, "learning_rate": 1.6147504820906546e-07, "loss": 0.0568, "step": 31990 }, { "epoch": 2.764698259104065, "grad_norm": 0.6452682493379834, "learning_rate": 1.603014560783578e-07, "loss": 0.0574, "step": 32000 }, { "epoch": 2.765562227310035, "grad_norm": 0.6838304105116794, "learning_rate": 1.5913207483728765e-07, "loss": 0.0565, "step": 32010 }, { "epoch": 2.766426195516005, "grad_norm": 0.7006017396514543, "learning_rate": 1.579669055033034e-07, "loss": 0.0563, "step": 32020 }, { "epoch": 2.767290163721975, "grad_norm": 0.63390922464745, "learning_rate": 1.5680594909018575e-07, "loss": 0.0574, "step": 32030 }, { "epoch": 2.768154131927945, "grad_norm": 0.630174268226932, "learning_rate": 1.5564920660805115e-07, "loss": 0.0563, "step": 32040 }, { "epoch": 2.769018100133915, "grad_norm": 0.6950850152884743, "learning_rate": 1.544966790633512e-07, "loss": 0.055, "step": 32050 }, { "epoch": 2.769882068339885, "grad_norm": 0.686069735603795, "learning_rate": 1.5334836745886762e-07, "loss": 0.0555, "step": 32060 }, { "epoch": 2.770746036545855, "grad_norm": 0.6924302699941641, "learning_rate": 1.5220427279371507e-07, "loss": 0.0579, "step": 32070 }, { "epoch": 2.771610004751825, "grad_norm": 0.6763504845141571, "learning_rate": 1.51064396063339e-07, "loss": 0.059, "step": 32080 }, { "epoch": 2.772473972957795, "grad_norm": 0.6855868771736366, "learning_rate": 1.4992873825951548e-07, "loss": 0.0581, "step": 32090 }, { "epoch": 2.7733379411637653, "grad_norm": 0.6702095553630669, "learning_rate": 1.4879730037034857e-07, "loss": 0.058, "step": 32100 }, { "epoch": 2.774201909369735, "grad_norm": 0.6798131090156784, "learning_rate": 1.47670083380273e-07, "loss": 0.0562, "step": 32110 }, { "epoch": 2.775065877575705, "grad_norm": 0.6878985668508981, "learning_rate": 1.465470882700476e-07, "loss": 0.0587, "step": 32120 }, { "epoch": 2.7759298457816755, "grad_norm": 0.6956681523253542, "learning_rate": 1.4542831601676078e-07, "loss": 0.0569, "step": 32130 }, { "epoch": 2.7767938139876454, "grad_norm": 0.6749804800774928, "learning_rate": 1.4431376759382664e-07, "loss": 0.0552, "step": 32140 }, { "epoch": 2.7776577821936153, "grad_norm": 0.6820298790434, "learning_rate": 1.432034439709812e-07, "loss": 0.0601, "step": 32150 }, { "epoch": 2.778521750399585, "grad_norm": 0.7023866826285801, "learning_rate": 1.4209734611428882e-07, "loss": 0.0588, "step": 32160 }, { "epoch": 2.779385718605555, "grad_norm": 0.7251283545475257, "learning_rate": 1.4099547498613365e-07, "loss": 0.0611, "step": 32170 }, { "epoch": 2.7802496868115254, "grad_norm": 0.6794399022502436, "learning_rate": 1.398978315452243e-07, "loss": 0.0586, "step": 32180 }, { "epoch": 2.7811136550174953, "grad_norm": 0.6829730156934579, "learning_rate": 1.3880441674659017e-07, "loss": 0.0578, "step": 32190 }, { "epoch": 2.781977623223465, "grad_norm": 0.6780607571604497, "learning_rate": 1.3771523154158194e-07, "loss": 0.0582, "step": 32200 }, { "epoch": 2.7828415914294355, "grad_norm": 0.6944067360719522, "learning_rate": 1.366302768778699e-07, "loss": 0.0563, "step": 32210 }, { "epoch": 2.7837055596354054, "grad_norm": 0.6926254817665085, "learning_rate": 1.3554955369944334e-07, "loss": 0.0575, "step": 32220 }, { "epoch": 2.7845695278413753, "grad_norm": 0.6412247121885293, "learning_rate": 1.3447306294661012e-07, "loss": 0.0589, "step": 32230 }, { "epoch": 2.7854334960473457, "grad_norm": 0.6625032725187675, "learning_rate": 1.334008055559949e-07, "loss": 0.057, "step": 32240 }, { "epoch": 2.7862974642533156, "grad_norm": 0.6890514579031258, "learning_rate": 1.3233278246054081e-07, "loss": 0.0575, "step": 32250 }, { "epoch": 2.7871614324592855, "grad_norm": 0.6749984423954664, "learning_rate": 1.3126899458950403e-07, "loss": 0.0567, "step": 32260 }, { "epoch": 2.7880254006652554, "grad_norm": 0.6058312889602515, "learning_rate": 1.302094428684586e-07, "loss": 0.0576, "step": 32270 }, { "epoch": 2.7888893688712253, "grad_norm": 0.6640679135768991, "learning_rate": 1.2915412821929096e-07, "loss": 0.0549, "step": 32280 }, { "epoch": 2.7897533370771956, "grad_norm": 0.6667923213308632, "learning_rate": 1.281030515602022e-07, "loss": 0.0542, "step": 32290 }, { "epoch": 2.7906173052831655, "grad_norm": 0.6763644991293586, "learning_rate": 1.2705621380570465e-07, "loss": 0.0564, "step": 32300 }, { "epoch": 2.7914812734891354, "grad_norm": 0.671158560568952, "learning_rate": 1.2601361586662364e-07, "loss": 0.0585, "step": 32310 }, { "epoch": 2.792345241695106, "grad_norm": 0.6522339042934434, "learning_rate": 1.249752586500952e-07, "loss": 0.0581, "step": 32320 }, { "epoch": 2.7932092099010757, "grad_norm": 0.6979271307967363, "learning_rate": 1.239411430595655e-07, "loss": 0.0586, "step": 32330 }, { "epoch": 2.7940731781070456, "grad_norm": 0.6799894206330298, "learning_rate": 1.229112699947904e-07, "loss": 0.0563, "step": 32340 }, { "epoch": 2.794937146313016, "grad_norm": 0.677619561225624, "learning_rate": 1.2188564035183425e-07, "loss": 0.0556, "step": 32350 }, { "epoch": 2.795801114518986, "grad_norm": 0.6297337136303034, "learning_rate": 1.2086425502306986e-07, "loss": 0.0553, "step": 32360 }, { "epoch": 2.7966650827249557, "grad_norm": 0.6450798875927606, "learning_rate": 1.1984711489717583e-07, "loss": 0.057, "step": 32370 }, { "epoch": 2.7975290509309256, "grad_norm": 0.6710396032708422, "learning_rate": 1.188342208591392e-07, "loss": 0.0572, "step": 32380 }, { "epoch": 2.7983930191368955, "grad_norm": 0.6870288667869697, "learning_rate": 1.1782557379025005e-07, "loss": 0.0566, "step": 32390 }, { "epoch": 2.799256987342866, "grad_norm": 0.6549415421772309, "learning_rate": 1.1682117456810583e-07, "loss": 0.0591, "step": 32400 }, { "epoch": 2.8001209555488358, "grad_norm": 0.6412098336825817, "learning_rate": 1.1582102406660634e-07, "loss": 0.0572, "step": 32410 }, { "epoch": 2.8009849237548057, "grad_norm": 0.6752618412945657, "learning_rate": 1.1482512315595607e-07, "loss": 0.058, "step": 32420 }, { "epoch": 2.801848891960776, "grad_norm": 0.6359954770406422, "learning_rate": 1.1383347270265965e-07, "loss": 0.0553, "step": 32430 }, { "epoch": 2.802712860166746, "grad_norm": 0.6876557978296493, "learning_rate": 1.1284607356952638e-07, "loss": 0.0575, "step": 32440 }, { "epoch": 2.803576828372716, "grad_norm": 0.7021487811787359, "learning_rate": 1.1186292661566511e-07, "loss": 0.0566, "step": 32450 }, { "epoch": 2.804440796578686, "grad_norm": 0.6750019608519389, "learning_rate": 1.108840326964844e-07, "loss": 0.0576, "step": 32460 }, { "epoch": 2.805304764784656, "grad_norm": 0.6277430752290388, "learning_rate": 1.0990939266369404e-07, "loss": 0.0587, "step": 32470 }, { "epoch": 2.806168732990626, "grad_norm": 0.6530520547027956, "learning_rate": 1.0893900736530127e-07, "loss": 0.0573, "step": 32480 }, { "epoch": 2.807032701196596, "grad_norm": 0.6696408985828085, "learning_rate": 1.0797287764561238e-07, "loss": 0.0566, "step": 32490 }, { "epoch": 2.8078966694025658, "grad_norm": 0.6628118939310904, "learning_rate": 1.0701100434522882e-07, "loss": 0.056, "step": 32500 }, { "epoch": 2.808760637608536, "grad_norm": 0.6358518359835514, "learning_rate": 1.0605338830105283e-07, "loss": 0.0567, "step": 32510 }, { "epoch": 2.809624605814506, "grad_norm": 0.6552015344909092, "learning_rate": 1.0510003034627848e-07, "loss": 0.0558, "step": 32520 }, { "epoch": 2.810488574020476, "grad_norm": 0.6786865741009189, "learning_rate": 1.0415093131039666e-07, "loss": 0.0542, "step": 32530 }, { "epoch": 2.8113525422264463, "grad_norm": 0.6708262449590945, "learning_rate": 1.0320609201919241e-07, "loss": 0.0546, "step": 32540 }, { "epoch": 2.812216510432416, "grad_norm": 0.6871889215796761, "learning_rate": 1.0226551329474477e-07, "loss": 0.0591, "step": 32550 }, { "epoch": 2.813080478638386, "grad_norm": 0.6796148770244584, "learning_rate": 1.0132919595542634e-07, "loss": 0.0572, "step": 32560 }, { "epoch": 2.813944446844356, "grad_norm": 0.6448854547531903, "learning_rate": 1.0039714081589991e-07, "loss": 0.057, "step": 32570 }, { "epoch": 2.8148084150503263, "grad_norm": 0.7214571409269863, "learning_rate": 9.946934868712176e-08, "loss": 0.0566, "step": 32580 }, { "epoch": 2.815672383256296, "grad_norm": 0.6864676012136871, "learning_rate": 9.854582037633953e-08, "loss": 0.0559, "step": 32590 }, { "epoch": 2.816536351462266, "grad_norm": 0.6875452157455118, "learning_rate": 9.762655668708876e-08, "loss": 0.0574, "step": 32600 }, { "epoch": 2.817400319668236, "grad_norm": 0.6471250629240326, "learning_rate": 9.671155841919577e-08, "loss": 0.0574, "step": 32610 }, { "epoch": 2.8182642878742064, "grad_norm": 0.6830004863111572, "learning_rate": 9.580082636877652e-08, "loss": 0.0569, "step": 32620 }, { "epoch": 2.8191282560801763, "grad_norm": 0.6419472467128323, "learning_rate": 9.489436132823326e-08, "loss": 0.057, "step": 32630 }, { "epoch": 2.819992224286146, "grad_norm": 0.657306933977155, "learning_rate": 9.399216408625678e-08, "loss": 0.0585, "step": 32640 }, { "epoch": 2.8208561924921165, "grad_norm": 0.6432005216885947, "learning_rate": 9.309423542782414e-08, "loss": 0.0573, "step": 32650 }, { "epoch": 2.8217201606980864, "grad_norm": 0.7002865300654664, "learning_rate": 9.22005761341982e-08, "loss": 0.0588, "step": 32660 }, { "epoch": 2.8225841289040563, "grad_norm": 0.6612868667391867, "learning_rate": 9.131118698292863e-08, "loss": 0.0566, "step": 32670 }, { "epoch": 2.823448097110026, "grad_norm": 0.6761202883755489, "learning_rate": 9.042606874784809e-08, "loss": 0.059, "step": 32680 }, { "epoch": 2.824312065315996, "grad_norm": 0.6862611791080319, "learning_rate": 8.954522219907392e-08, "loss": 0.0547, "step": 32690 }, { "epoch": 2.8251760335219664, "grad_norm": 0.6254683022957856, "learning_rate": 8.866864810300579e-08, "loss": 0.0564, "step": 32700 }, { "epoch": 2.8260400017279363, "grad_norm": 0.6541988728358744, "learning_rate": 8.77963472223281e-08, "loss": 0.0576, "step": 32710 }, { "epoch": 2.8269039699339062, "grad_norm": 0.7137230691053326, "learning_rate": 8.692832031600539e-08, "loss": 0.0568, "step": 32720 }, { "epoch": 2.8277679381398766, "grad_norm": 0.6744846313172326, "learning_rate": 8.60645681392841e-08, "loss": 0.0563, "step": 32730 }, { "epoch": 2.8286319063458465, "grad_norm": 0.7290262081800398, "learning_rate": 8.520509144369194e-08, "loss": 0.0582, "step": 32740 }, { "epoch": 2.8294958745518164, "grad_norm": 0.6928234087420588, "learning_rate": 8.434989097703572e-08, "loss": 0.0565, "step": 32750 }, { "epoch": 2.8303598427577867, "grad_norm": 0.6537822863085431, "learning_rate": 8.349896748340191e-08, "loss": 0.0576, "step": 32760 }, { "epoch": 2.8312238109637566, "grad_norm": 0.6664152071722941, "learning_rate": 8.265232170315607e-08, "loss": 0.0584, "step": 32770 }, { "epoch": 2.8320877791697265, "grad_norm": 0.6543148772010279, "learning_rate": 8.180995437294226e-08, "loss": 0.0564, "step": 32780 }, { "epoch": 2.8329517473756964, "grad_norm": 0.6469114237292819, "learning_rate": 8.097186622568032e-08, "loss": 0.0566, "step": 32790 }, { "epoch": 2.8338157155816663, "grad_norm": 0.6486680998913869, "learning_rate": 8.013805799056806e-08, "loss": 0.0559, "step": 32800 }, { "epoch": 2.8346796837876367, "grad_norm": 0.6528582438625301, "learning_rate": 7.930853039307962e-08, "loss": 0.0569, "step": 32810 }, { "epoch": 2.8355436519936066, "grad_norm": 0.6647853143387031, "learning_rate": 7.848328415496375e-08, "loss": 0.0584, "step": 32820 }, { "epoch": 2.8364076201995765, "grad_norm": 0.6857635854830215, "learning_rate": 7.766231999424556e-08, "loss": 0.0569, "step": 32830 }, { "epoch": 2.837271588405547, "grad_norm": 0.6888594940329839, "learning_rate": 7.684563862522254e-08, "loss": 0.0581, "step": 32840 }, { "epoch": 2.8381355566115167, "grad_norm": 0.6483498063174805, "learning_rate": 7.603324075846741e-08, "loss": 0.0548, "step": 32850 }, { "epoch": 2.8389995248174866, "grad_norm": 0.6961287471485743, "learning_rate": 7.522512710082531e-08, "loss": 0.0572, "step": 32860 }, { "epoch": 2.839863493023457, "grad_norm": 0.6713877134589054, "learning_rate": 7.442129835541433e-08, "loss": 0.0558, "step": 32870 }, { "epoch": 2.840727461229427, "grad_norm": 0.6447047687390621, "learning_rate": 7.362175522162284e-08, "loss": 0.0558, "step": 32880 }, { "epoch": 2.8415914294353968, "grad_norm": 0.6950714605507896, "learning_rate": 7.282649839511269e-08, "loss": 0.0562, "step": 32890 }, { "epoch": 2.8424553976413667, "grad_norm": 0.662844122297223, "learning_rate": 7.203552856781482e-08, "loss": 0.057, "step": 32900 }, { "epoch": 2.8433193658473366, "grad_norm": 0.6764003819400013, "learning_rate": 7.124884642793039e-08, "loss": 0.0576, "step": 32910 }, { "epoch": 2.844183334053307, "grad_norm": 0.6665223091511387, "learning_rate": 7.046645265992968e-08, "loss": 0.0546, "step": 32920 }, { "epoch": 2.845047302259277, "grad_norm": 0.7105587069690044, "learning_rate": 6.968834794455203e-08, "loss": 0.0583, "step": 32930 }, { "epoch": 2.8459112704652467, "grad_norm": 0.6579588776400764, "learning_rate": 6.891453295880591e-08, "loss": 0.0572, "step": 32940 }, { "epoch": 2.846775238671217, "grad_norm": 0.6500028386919731, "learning_rate": 6.81450083759666e-08, "loss": 0.0572, "step": 32950 }, { "epoch": 2.847639206877187, "grad_norm": 0.7149086521713711, "learning_rate": 6.737977486557578e-08, "loss": 0.0543, "step": 32960 }, { "epoch": 2.848503175083157, "grad_norm": 0.6856207174938993, "learning_rate": 6.661883309344253e-08, "loss": 0.0568, "step": 32970 }, { "epoch": 2.849367143289127, "grad_norm": 0.6891817002441347, "learning_rate": 6.586218372164166e-08, "loss": 0.0571, "step": 32980 }, { "epoch": 2.850231111495097, "grad_norm": 0.6777405340120837, "learning_rate": 6.510982740851269e-08, "loss": 0.0564, "step": 32990 }, { "epoch": 2.851095079701067, "grad_norm": 0.6673915333144583, "learning_rate": 6.436176480866141e-08, "loss": 0.0569, "step": 33000 }, { "epoch": 2.851959047907037, "grad_norm": 0.6734418146878922, "learning_rate": 6.361799657295552e-08, "loss": 0.0586, "step": 33010 }, { "epoch": 2.852823016113007, "grad_norm": 0.6904229506397264, "learning_rate": 6.287852334852795e-08, "loss": 0.06, "step": 33020 }, { "epoch": 2.853686984318977, "grad_norm": 0.6830159237337381, "learning_rate": 6.214334577877401e-08, "loss": 0.0554, "step": 33030 }, { "epoch": 2.854550952524947, "grad_norm": 0.7220761887506169, "learning_rate": 6.141246450335148e-08, "loss": 0.0586, "step": 33040 }, { "epoch": 2.855414920730917, "grad_norm": 0.6791045442446241, "learning_rate": 6.068588015818055e-08, "loss": 0.0579, "step": 33050 }, { "epoch": 2.8562788889368873, "grad_norm": 0.6860422169888754, "learning_rate": 5.996359337544277e-08, "loss": 0.0554, "step": 33060 }, { "epoch": 2.857142857142857, "grad_norm": 0.6926889419675112, "learning_rate": 5.924560478357988e-08, "loss": 0.0572, "step": 33070 }, { "epoch": 2.858006825348827, "grad_norm": 0.6572432697985694, "learning_rate": 5.853191500729327e-08, "loss": 0.0589, "step": 33080 }, { "epoch": 2.8588707935547975, "grad_norm": 0.6959692528825726, "learning_rate": 5.782252466754623e-08, "loss": 0.0577, "step": 33090 }, { "epoch": 2.8597347617607674, "grad_norm": 0.6563165677197141, "learning_rate": 5.711743438155892e-08, "loss": 0.0583, "step": 33100 }, { "epoch": 2.8605987299667373, "grad_norm": 0.6539622559768795, "learning_rate": 5.6416644762812277e-08, "loss": 0.0578, "step": 33110 }, { "epoch": 2.861462698172707, "grad_norm": 0.6369086566201649, "learning_rate": 5.5720156421043556e-08, "loss": 0.0564, "step": 33120 }, { "epoch": 2.862326666378677, "grad_norm": 0.6420979588342911, "learning_rate": 5.502796996224746e-08, "loss": 0.0563, "step": 33130 }, { "epoch": 2.8631906345846474, "grad_norm": 0.6695727199391603, "learning_rate": 5.434008598867835e-08, "loss": 0.0554, "step": 33140 }, { "epoch": 2.8640546027906173, "grad_norm": 0.6600310210773601, "learning_rate": 5.365650509884357e-08, "loss": 0.0586, "step": 33150 }, { "epoch": 2.864918570996587, "grad_norm": 0.6484929852915966, "learning_rate": 5.297722788750958e-08, "loss": 0.0556, "step": 33160 }, { "epoch": 2.8657825392025575, "grad_norm": 0.7134033307243018, "learning_rate": 5.230225494569585e-08, "loss": 0.0543, "step": 33170 }, { "epoch": 2.8666465074085274, "grad_norm": 0.6946163701427388, "learning_rate": 5.1631586860678703e-08, "loss": 0.0557, "step": 33180 }, { "epoch": 2.8675104756144973, "grad_norm": 0.6743236939216565, "learning_rate": 5.0965224215987484e-08, "loss": 0.0557, "step": 33190 }, { "epoch": 2.8683744438204672, "grad_norm": 0.6385782451925563, "learning_rate": 5.030316759140674e-08, "loss": 0.0543, "step": 33200 }, { "epoch": 2.8692384120264376, "grad_norm": 0.6894700813928674, "learning_rate": 4.9645417562973476e-08, "loss": 0.056, "step": 33210 }, { "epoch": 2.8701023802324075, "grad_norm": 0.6675629300734072, "learning_rate": 4.899197470297823e-08, "loss": 0.056, "step": 33220 }, { "epoch": 2.8709663484383774, "grad_norm": 0.668713281409164, "learning_rate": 4.834283957996344e-08, "loss": 0.0547, "step": 33230 }, { "epoch": 2.8718303166443473, "grad_norm": 0.6665885336321217, "learning_rate": 4.7698012758724547e-08, "loss": 0.0559, "step": 33240 }, { "epoch": 2.8726942848503176, "grad_norm": 0.6821105530534564, "learning_rate": 4.705749480030719e-08, "loss": 0.0572, "step": 33250 }, { "epoch": 2.8735582530562875, "grad_norm": 0.6708658577242343, "learning_rate": 4.6421286262008924e-08, "loss": 0.0572, "step": 33260 }, { "epoch": 2.8744222212622574, "grad_norm": 0.647767125351178, "learning_rate": 4.578938769737751e-08, "loss": 0.0584, "step": 33270 }, { "epoch": 2.875286189468228, "grad_norm": 0.6780529409748565, "learning_rate": 4.5161799656209814e-08, "loss": 0.0559, "step": 33280 }, { "epoch": 2.8761501576741977, "grad_norm": 0.649348742633913, "learning_rate": 4.453852268455461e-08, "loss": 0.0593, "step": 33290 }, { "epoch": 2.8770141258801676, "grad_norm": 0.6676144330672698, "learning_rate": 4.391955732470643e-08, "loss": 0.0566, "step": 33300 }, { "epoch": 2.8778780940861375, "grad_norm": 0.686248518816948, "learning_rate": 4.330490411521116e-08, "loss": 0.0577, "step": 33310 }, { "epoch": 2.8787420622921074, "grad_norm": 0.6842785616361624, "learning_rate": 4.2694563590861546e-08, "loss": 0.0562, "step": 33320 }, { "epoch": 2.8796060304980777, "grad_norm": 0.6437126649798809, "learning_rate": 4.2088536282698376e-08, "loss": 0.057, "step": 33330 }, { "epoch": 2.8804699987040476, "grad_norm": 0.6942606890668203, "learning_rate": 4.14868227180093e-08, "loss": 0.0588, "step": 33340 }, { "epoch": 2.8813339669100175, "grad_norm": 0.6577488295499587, "learning_rate": 4.0889423420328866e-08, "loss": 0.0559, "step": 33350 }, { "epoch": 2.882197935115988, "grad_norm": 0.6658072223920517, "learning_rate": 4.029633890943796e-08, "loss": 0.0557, "step": 33360 }, { "epoch": 2.8830619033219578, "grad_norm": 0.6897837859797347, "learning_rate": 3.97075697013638e-08, "loss": 0.0577, "step": 33370 }, { "epoch": 2.8839258715279277, "grad_norm": 0.6755459921707765, "learning_rate": 3.912311630837717e-08, "loss": 0.0582, "step": 33380 }, { "epoch": 2.884789839733898, "grad_norm": 0.688782499527184, "learning_rate": 3.8542979238995746e-08, "loss": 0.0589, "step": 33390 }, { "epoch": 2.885653807939868, "grad_norm": 0.6655676381591664, "learning_rate": 3.7967158997981315e-08, "loss": 0.0576, "step": 33400 }, { "epoch": 2.886517776145838, "grad_norm": 0.6986677921599695, "learning_rate": 3.739565608633866e-08, "loss": 0.0558, "step": 33410 }, { "epoch": 2.8873817443518077, "grad_norm": 0.663380414330348, "learning_rate": 3.6828471001317254e-08, "loss": 0.0577, "step": 33420 }, { "epoch": 2.8882457125577776, "grad_norm": 0.6936533010310243, "learning_rate": 3.626560423640957e-08, "loss": 0.0589, "step": 33430 }, { "epoch": 2.889109680763748, "grad_norm": 0.6477438704961663, "learning_rate": 3.570705628134996e-08, "loss": 0.0577, "step": 33440 }, { "epoch": 2.889973648969718, "grad_norm": 0.6871551024773646, "learning_rate": 3.5152827622116356e-08, "loss": 0.0571, "step": 33450 }, { "epoch": 2.8908376171756878, "grad_norm": 0.6764162456797573, "learning_rate": 3.460291874092747e-08, "loss": 0.0584, "step": 33460 }, { "epoch": 2.891701585381658, "grad_norm": 0.6676655057360613, "learning_rate": 3.405733011624446e-08, "loss": 0.0578, "step": 33470 }, { "epoch": 2.892565553587628, "grad_norm": 0.662029257328337, "learning_rate": 3.3516062222768706e-08, "loss": 0.0547, "step": 33480 }, { "epoch": 2.893429521793598, "grad_norm": 0.7215604812631449, "learning_rate": 3.297911553144295e-08, "loss": 0.0572, "step": 33490 }, { "epoch": 2.8942934899995683, "grad_norm": 0.6623796053756021, "learning_rate": 3.2446490509450145e-08, "loss": 0.0557, "step": 33500 }, { "epoch": 2.895157458205538, "grad_norm": 0.6771863576032046, "learning_rate": 3.1918187620211816e-08, "loss": 0.0571, "step": 33510 }, { "epoch": 2.896021426411508, "grad_norm": 0.6760263824674185, "learning_rate": 3.1394207323390826e-08, "loss": 0.0572, "step": 33520 }, { "epoch": 2.896885394617478, "grad_norm": 0.664126918625583, "learning_rate": 3.087455007488804e-08, "loss": 0.0575, "step": 33530 }, { "epoch": 2.897749362823448, "grad_norm": 0.695653549487125, "learning_rate": 3.035921632684236e-08, "loss": 0.0578, "step": 33540 }, { "epoch": 2.898613331029418, "grad_norm": 0.6849873662405368, "learning_rate": 2.984820652763232e-08, "loss": 0.0565, "step": 33550 }, { "epoch": 2.899477299235388, "grad_norm": 0.6401444792075645, "learning_rate": 2.9341521121873385e-08, "loss": 0.0587, "step": 33560 }, { "epoch": 2.900341267441358, "grad_norm": 0.6562321741503842, "learning_rate": 2.8839160550418466e-08, "loss": 0.0578, "step": 33570 }, { "epoch": 2.9012052356473284, "grad_norm": 0.7165707023965725, "learning_rate": 2.8341125250357928e-08, "loss": 0.056, "step": 33580 }, { "epoch": 2.9020692038532983, "grad_norm": 0.6535414446662702, "learning_rate": 2.7847415655019026e-08, "loss": 0.0562, "step": 33590 }, { "epoch": 2.902933172059268, "grad_norm": 0.6590199643709393, "learning_rate": 2.7358032193964824e-08, "loss": 0.0568, "step": 33600 }, { "epoch": 2.9037971402652385, "grad_norm": 0.7038395574717661, "learning_rate": 2.6872975292994153e-08, "loss": 0.0574, "step": 33610 }, { "epoch": 2.9046611084712084, "grad_norm": 0.6603831559878955, "learning_rate": 2.63922453741422e-08, "loss": 0.0537, "step": 33620 }, { "epoch": 2.9055250766771783, "grad_norm": 0.6809259145907269, "learning_rate": 2.5915842855678276e-08, "loss": 0.0561, "step": 33630 }, { "epoch": 2.906389044883148, "grad_norm": 0.6675281253432127, "learning_rate": 2.5443768152108583e-08, "loss": 0.0545, "step": 33640 }, { "epoch": 2.907253013089118, "grad_norm": 0.6681893173881671, "learning_rate": 2.4976021674171236e-08, "loss": 0.0547, "step": 33650 }, { "epoch": 2.9081169812950884, "grad_norm": 0.6592809611380828, "learning_rate": 2.4512603828839575e-08, "loss": 0.0561, "step": 33660 }, { "epoch": 2.9089809495010583, "grad_norm": 0.6490028684571275, "learning_rate": 2.4053515019322184e-08, "loss": 0.0565, "step": 33670 }, { "epoch": 2.9098449177070282, "grad_norm": 0.656601858138169, "learning_rate": 2.3598755645057868e-08, "loss": 0.0571, "step": 33680 }, { "epoch": 2.9107088859129986, "grad_norm": 0.6941640271804507, "learning_rate": 2.3148326101722352e-08, "loss": 0.0574, "step": 33690 }, { "epoch": 2.9115728541189685, "grad_norm": 0.6603126085653375, "learning_rate": 2.2702226781219916e-08, "loss": 0.0581, "step": 33700 }, { "epoch": 2.9124368223249384, "grad_norm": 0.668590890656347, "learning_rate": 2.2260458071691194e-08, "loss": 0.0568, "step": 33710 }, { "epoch": 2.9133007905309087, "grad_norm": 0.6493466506588934, "learning_rate": 2.1823020357506497e-08, "loss": 0.0569, "step": 33720 }, { "epoch": 2.9141647587368786, "grad_norm": 0.6539909184550178, "learning_rate": 2.1389914019268045e-08, "loss": 0.0574, "step": 33730 }, { "epoch": 2.9150287269428485, "grad_norm": 0.6527098662158097, "learning_rate": 2.0961139433810508e-08, "loss": 0.0556, "step": 33740 }, { "epoch": 2.9158926951488184, "grad_norm": 0.7010404064747029, "learning_rate": 2.0536696974198245e-08, "loss": 0.0556, "step": 33750 }, { "epoch": 2.9167566633547883, "grad_norm": 0.6286359295433286, "learning_rate": 2.0116587009727516e-08, "loss": 0.0565, "step": 33760 }, { "epoch": 2.9176206315607587, "grad_norm": 0.6462394665804463, "learning_rate": 1.9700809905924267e-08, "loss": 0.0551, "step": 33770 }, { "epoch": 2.9184845997667286, "grad_norm": 0.6467936095668956, "learning_rate": 1.928936602454523e-08, "loss": 0.0566, "step": 33780 }, { "epoch": 2.9193485679726985, "grad_norm": 0.6431999964981738, "learning_rate": 1.888225572357627e-08, "loss": 0.0527, "step": 33790 }, { "epoch": 2.920212536178669, "grad_norm": 0.677044077943376, "learning_rate": 1.8479479357232933e-08, "loss": 0.0584, "step": 33800 }, { "epoch": 2.9210765043846387, "grad_norm": 0.6490379738984648, "learning_rate": 1.808103727595989e-08, "loss": 0.0556, "step": 33810 }, { "epoch": 2.9219404725906086, "grad_norm": 0.6365858333329392, "learning_rate": 1.7686929826430944e-08, "loss": 0.0579, "step": 33820 }, { "epoch": 2.9228044407965785, "grad_norm": 0.6736372234260222, "learning_rate": 1.7297157351547912e-08, "loss": 0.055, "step": 33830 }, { "epoch": 2.9236684090025484, "grad_norm": 0.6796407556520421, "learning_rate": 1.6911720190441183e-08, "loss": 0.0543, "step": 33840 }, { "epoch": 2.9245323772085188, "grad_norm": 0.6906670089068052, "learning_rate": 1.6530618678469723e-08, "loss": 0.0585, "step": 33850 }, { "epoch": 2.9253963454144887, "grad_norm": 0.6885910897171846, "learning_rate": 1.6153853147218846e-08, "loss": 0.058, "step": 33860 }, { "epoch": 2.9262603136204586, "grad_norm": 0.6780701321572589, "learning_rate": 1.5781423924502438e-08, "loss": 0.0557, "step": 33870 }, { "epoch": 2.927124281826429, "grad_norm": 0.6588324510313825, "learning_rate": 1.541333133436018e-08, "loss": 0.058, "step": 33880 }, { "epoch": 2.927988250032399, "grad_norm": 0.6491208370689091, "learning_rate": 1.5049575697060337e-08, "loss": 0.0569, "step": 33890 }, { "epoch": 2.9288522182383687, "grad_norm": 0.6517897930275939, "learning_rate": 1.4690157329096399e-08, "loss": 0.0555, "step": 33900 }, { "epoch": 2.929716186444339, "grad_norm": 0.7042225264467061, "learning_rate": 1.4335076543188776e-08, "loss": 0.0555, "step": 33910 }, { "epoch": 2.930580154650309, "grad_norm": 0.6580352339265412, "learning_rate": 1.398433364828311e-08, "loss": 0.0579, "step": 33920 }, { "epoch": 2.931444122856279, "grad_norm": 0.6628295724247529, "learning_rate": 1.3637928949551404e-08, "loss": 0.0563, "step": 33930 }, { "epoch": 2.9323080910622488, "grad_norm": 0.6664636967953322, "learning_rate": 1.3295862748390898e-08, "loss": 0.0564, "step": 33940 }, { "epoch": 2.9331720592682187, "grad_norm": 0.684321235610932, "learning_rate": 1.2958135342424071e-08, "loss": 0.0549, "step": 33950 }, { "epoch": 2.934036027474189, "grad_norm": 0.678915128766411, "learning_rate": 1.262474702549865e-08, "loss": 0.0569, "step": 33960 }, { "epoch": 2.934899995680159, "grad_norm": 0.6801977159105094, "learning_rate": 1.2295698087685937e-08, "loss": 0.0582, "step": 33970 }, { "epoch": 2.935763963886129, "grad_norm": 0.630776425498901, "learning_rate": 1.1970988815283025e-08, "loss": 0.0563, "step": 33980 }, { "epoch": 2.936627932092099, "grad_norm": 0.6857107028827767, "learning_rate": 1.1650619490810588e-08, "loss": 0.0551, "step": 33990 }, { "epoch": 2.937491900298069, "grad_norm": 0.6480468923534133, "learning_rate": 1.1334590393013434e-08, "loss": 0.0559, "step": 34000 }, { "epoch": 2.938355868504039, "grad_norm": 0.6734319616204011, "learning_rate": 1.1022901796858832e-08, "loss": 0.0559, "step": 34010 }, { "epoch": 2.9392198367100093, "grad_norm": 0.6603543423140258, "learning_rate": 1.0715553973539849e-08, "loss": 0.0588, "step": 34020 }, { "epoch": 2.940083804915979, "grad_norm": 0.7978284748396589, "learning_rate": 1.0412547190470912e-08, "loss": 0.0564, "step": 34030 }, { "epoch": 2.940947773121949, "grad_norm": 0.681328579342505, "learning_rate": 1.0113881711289464e-08, "loss": 0.0581, "step": 34040 }, { "epoch": 2.941811741327919, "grad_norm": 0.6893855973861117, "learning_rate": 9.819557795857082e-09, "loss": 0.058, "step": 34050 }, { "epoch": 2.942675709533889, "grad_norm": 0.6615292039101736, "learning_rate": 9.52957570025559e-09, "loss": 0.0558, "step": 34060 }, { "epoch": 2.9435396777398593, "grad_norm": 0.6897433502814333, "learning_rate": 9.243935676792603e-09, "loss": 0.0573, "step": 34070 }, { "epoch": 2.944403645945829, "grad_norm": 0.6570832913653472, "learning_rate": 8.96263797399377e-09, "loss": 0.0556, "step": 34080 }, { "epoch": 2.945267614151799, "grad_norm": 0.6792304176789006, "learning_rate": 8.685682836609421e-09, "loss": 0.0543, "step": 34090 }, { "epoch": 2.9461315823577694, "grad_norm": 0.6579739271767348, "learning_rate": 8.413070505610132e-09, "loss": 0.058, "step": 34100 }, { "epoch": 2.9469955505637393, "grad_norm": 0.669809562415206, "learning_rate": 8.144801218189502e-09, "loss": 0.0578, "step": 34110 }, { "epoch": 2.947859518769709, "grad_norm": 0.6799320416148782, "learning_rate": 7.880875207760264e-09, "loss": 0.0567, "step": 34120 }, { "epoch": 2.9487234869756795, "grad_norm": 0.6629897134562075, "learning_rate": 7.621292703957617e-09, "loss": 0.0578, "step": 34130 }, { "epoch": 2.9495874551816494, "grad_norm": 0.6552021162116146, "learning_rate": 7.366053932637563e-09, "loss": 0.0538, "step": 34140 }, { "epoch": 2.9504514233876193, "grad_norm": 0.683065500854779, "learning_rate": 7.115159115875792e-09, "loss": 0.0553, "step": 34150 }, { "epoch": 2.9513153915935892, "grad_norm": 0.6874739280286147, "learning_rate": 6.8686084719693515e-09, "loss": 0.0597, "step": 34160 }, { "epoch": 2.952179359799559, "grad_norm": 0.6422584639658296, "learning_rate": 6.626402215434979e-09, "loss": 0.0569, "step": 34170 }, { "epoch": 2.9530433280055295, "grad_norm": 0.7158995024434536, "learning_rate": 6.388540557010214e-09, "loss": 0.0574, "step": 34180 }, { "epoch": 2.9539072962114994, "grad_norm": 0.6680913688006366, "learning_rate": 6.15502370365173e-09, "loss": 0.0564, "step": 34190 }, { "epoch": 2.9547712644174693, "grad_norm": 0.7093620408628543, "learning_rate": 5.925851858535891e-09, "loss": 0.0567, "step": 34200 }, { "epoch": 2.9556352326234396, "grad_norm": 0.6286177131295109, "learning_rate": 5.7010252210593085e-09, "loss": 0.0561, "step": 34210 }, { "epoch": 2.9564992008294095, "grad_norm": 0.6348257609589285, "learning_rate": 5.480543986837172e-09, "loss": 0.0525, "step": 34220 }, { "epoch": 2.9573631690353794, "grad_norm": 0.7138653645631411, "learning_rate": 5.264408347704364e-09, "loss": 0.0563, "step": 34230 }, { "epoch": 2.95822713724135, "grad_norm": 0.6958979162027398, "learning_rate": 5.0526184917149e-09, "loss": 0.0545, "step": 34240 }, { "epoch": 2.9590911054473197, "grad_norm": 0.6673351506192208, "learning_rate": 4.845174603140823e-09, "loss": 0.0573, "step": 34250 }, { "epoch": 2.9599550736532896, "grad_norm": 0.6597776756569177, "learning_rate": 4.6420768624738654e-09, "loss": 0.0543, "step": 34260 }, { "epoch": 2.9608190418592595, "grad_norm": 0.6907853197848071, "learning_rate": 4.44332544642323e-09, "loss": 0.0561, "step": 34270 }, { "epoch": 2.9616830100652294, "grad_norm": 0.6367120740998353, "learning_rate": 4.2489205279172555e-09, "loss": 0.0556, "step": 34280 }, { "epoch": 2.9625469782711997, "grad_norm": 0.6835548939710773, "learning_rate": 4.05886227610286e-09, "loss": 0.0563, "step": 34290 }, { "epoch": 2.9634109464771696, "grad_norm": 0.6656890164686422, "learning_rate": 3.873150856344432e-09, "loss": 0.0568, "step": 34300 }, { "epoch": 2.9642749146831395, "grad_norm": 0.6556734265025637, "learning_rate": 3.6917864302238315e-09, "loss": 0.0564, "step": 34310 }, { "epoch": 2.96513888288911, "grad_norm": 0.7248595471350836, "learning_rate": 3.5147691555420526e-09, "loss": 0.057, "step": 34320 }, { "epoch": 2.9660028510950798, "grad_norm": 0.662123530396553, "learning_rate": 3.34209918631645e-09, "loss": 0.0583, "step": 34330 }, { "epoch": 2.9668668193010497, "grad_norm": 0.6509452814103437, "learning_rate": 3.1737766727824028e-09, "loss": 0.0582, "step": 34340 }, { "epoch": 2.96773078750702, "grad_norm": 0.7050813393203283, "learning_rate": 3.0098017613933163e-09, "loss": 0.0564, "step": 34350 }, { "epoch": 2.96859475571299, "grad_norm": 0.6654292686780983, "learning_rate": 2.8501745948184e-09, "loss": 0.0559, "step": 34360 }, { "epoch": 2.96945872391896, "grad_norm": 0.6821160988216692, "learning_rate": 2.6948953119459997e-09, "loss": 0.0571, "step": 34370 }, { "epoch": 2.9703226921249297, "grad_norm": 0.684418049892257, "learning_rate": 2.54396404787971e-09, "loss": 0.0555, "step": 34380 }, { "epoch": 2.9711866603308996, "grad_norm": 0.6814190481168771, "learning_rate": 2.3973809339405963e-09, "loss": 0.0571, "step": 34390 }, { "epoch": 2.97205062853687, "grad_norm": 0.6795433479173877, "learning_rate": 2.2551460976666385e-09, "loss": 0.0565, "step": 34400 }, { "epoch": 2.97291459674284, "grad_norm": 0.6785623293407695, "learning_rate": 2.1172596628127316e-09, "loss": 0.0567, "step": 34410 }, { "epoch": 2.9737785649488098, "grad_norm": 0.6809051694327195, "learning_rate": 1.9837217493501313e-09, "loss": 0.0555, "step": 34420 }, { "epoch": 2.97464253315478, "grad_norm": 0.6369055902211925, "learning_rate": 1.8545324734664526e-09, "loss": 0.0569, "step": 34430 }, { "epoch": 2.97550650136075, "grad_norm": 0.6910310396003512, "learning_rate": 1.7296919475662254e-09, "loss": 0.0562, "step": 34440 }, { "epoch": 2.97637046956672, "grad_norm": 0.7202616011304915, "learning_rate": 1.6092002802686747e-09, "loss": 0.0578, "step": 34450 }, { "epoch": 2.97723443777269, "grad_norm": 0.6564232420344026, "learning_rate": 1.49305757641105e-09, "loss": 0.0561, "step": 34460 }, { "epoch": 2.9780984059786597, "grad_norm": 0.6755704437504937, "learning_rate": 1.3812639370458514e-09, "loss": 0.0579, "step": 34470 }, { "epoch": 2.97896237418463, "grad_norm": 0.6502855806426288, "learning_rate": 1.2738194594419384e-09, "loss": 0.0557, "step": 34480 }, { "epoch": 2.9798263423906, "grad_norm": 0.6439414003128155, "learning_rate": 1.1707242370834204e-09, "loss": 0.0575, "step": 34490 }, { "epoch": 2.98069031059657, "grad_norm": 0.6340317895115861, "learning_rate": 1.0719783596707666e-09, "loss": 0.0564, "step": 34500 }, { "epoch": 2.98155427880254, "grad_norm": 0.6753564648972211, "learning_rate": 9.775819131202513e-10, "loss": 0.0556, "step": 34510 }, { "epoch": 2.98241824700851, "grad_norm": 0.6455724063217956, "learning_rate": 8.87534979562843e-10, "loss": 0.0559, "step": 34520 }, { "epoch": 2.98328221521448, "grad_norm": 0.6909467453570447, "learning_rate": 8.018376373469805e-10, "loss": 0.0557, "step": 34530 }, { "epoch": 2.9841461834204503, "grad_norm": 0.6744291133600068, "learning_rate": 7.204899610352423e-10, "loss": 0.0538, "step": 34540 }, { "epoch": 2.9850101516264202, "grad_norm": 0.6400069292789241, "learning_rate": 6.434920214060114e-10, "loss": 0.0565, "step": 34550 }, { "epoch": 2.98587411983239, "grad_norm": 0.6660665422309995, "learning_rate": 5.708438854523656e-10, "loss": 0.0567, "step": 34560 }, { "epoch": 2.98673808803836, "grad_norm": 0.6877542654558588, "learning_rate": 5.02545616384853e-10, "loss": 0.0589, "step": 34570 }, { "epoch": 2.98760205624433, "grad_norm": 0.6685372840389431, "learning_rate": 4.385972736264954e-10, "loss": 0.0567, "step": 34580 }, { "epoch": 2.9884660244503003, "grad_norm": 0.6579606458219801, "learning_rate": 3.7899891281834025e-10, "loss": 0.0568, "step": 34590 }, { "epoch": 2.98932999265627, "grad_norm": 0.6810902715442537, "learning_rate": 3.2375058581446407e-10, "loss": 0.0585, "step": 34600 }, { "epoch": 2.99019396086224, "grad_norm": 0.6969850812240518, "learning_rate": 2.728523406847483e-10, "loss": 0.0569, "step": 34610 }, { "epoch": 2.9910579290682104, "grad_norm": 0.6943564708722185, "learning_rate": 2.263042217148792e-10, "loss": 0.0569, "step": 34620 }, { "epoch": 2.9919218972741803, "grad_norm": 0.7012718186319877, "learning_rate": 1.8410626940523757e-10, "loss": 0.0564, "step": 34630 }, { "epoch": 2.9927858654801502, "grad_norm": 0.6768713971350516, "learning_rate": 1.4625852047034373e-10, "loss": 0.0591, "step": 34640 }, { "epoch": 2.9936498336861206, "grad_norm": 0.6859084073565257, "learning_rate": 1.1276100784163302e-10, "loss": 0.0576, "step": 34650 }, { "epoch": 2.9945138018920905, "grad_norm": 0.6851362984991908, "learning_rate": 8.361376066357007e-11, "loss": 0.0553, "step": 34660 }, { "epoch": 2.9953777700980604, "grad_norm": 0.6529437264727286, "learning_rate": 5.881680429586922e-11, "loss": 0.0546, "step": 34670 }, { "epoch": 2.9962417383040303, "grad_norm": 0.6852860443468494, "learning_rate": 3.8370160315159834e-11, "loss": 0.0563, "step": 34680 }, { "epoch": 2.99710570651, "grad_norm": 0.6575050306681262, "learning_rate": 2.2273846509990314e-11, "loss": 0.0579, "step": 34690 }, { "epoch": 2.9979696747159705, "grad_norm": 0.6579715634072224, "learning_rate": 1.052787688637924e-11, "loss": 0.0549, "step": 34700 }, { "epoch": 2.9988336429219404, "grad_norm": 0.7080004725681195, "learning_rate": 3.132261663929548e-12, "loss": 0.06, "step": 34710 }, { "epoch": 2.9996976111279103, "grad_norm": 0.6662699930555676, "learning_rate": 8.700727749388904e-14, "loss": 0.058, "step": 34720 }, { "epoch": 2.9998704047691045, "step": 34722, "total_flos": 7939707523891200.0, "train_loss": 0.11055064558756254, "train_runtime": 74830.8535, "train_samples_per_second": 59.395, "train_steps_per_second": 0.464 } ], "logging_steps": 10, "max_steps": 34722, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7939707523891200.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }