{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 31359, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003188877196339169, "grad_norm": 2.681196928024292, "learning_rate": 1.5944385981695843e-05, "loss": 9.6714, "step": 10 }, { "epoch": 0.0006377754392678338, "grad_norm": 2.1921708583831787, "learning_rate": 3.1888771963391685e-05, "loss": 9.1417, "step": 20 }, { "epoch": 0.0009566631589017507, "grad_norm": 2.14097261428833, "learning_rate": 4.7833157945087525e-05, "loss": 8.6391, "step": 30 }, { "epoch": 0.0012755508785356677, "grad_norm": 1.5710511207580566, "learning_rate": 6.377754392678337e-05, "loss": 8.037, "step": 40 }, { "epoch": 0.0015944385981695845, "grad_norm": 1.208488941192627, "learning_rate": 7.972192990847922e-05, "loss": 7.4423, "step": 50 }, { "epoch": 0.0019133263178035013, "grad_norm": 0.9196066856384277, "learning_rate": 9.566631589017505e-05, "loss": 7.0456, "step": 60 }, { "epoch": 0.0022322140374374183, "grad_norm": 0.9510890245437622, "learning_rate": 0.00011161070187187091, "loss": 6.6663, "step": 70 }, { "epoch": 0.0025511017570713354, "grad_norm": 0.6426137089729309, "learning_rate": 0.00012755508785356674, "loss": 6.3935, "step": 80 }, { "epoch": 0.002869989476705252, "grad_norm": 0.5806668996810913, "learning_rate": 0.0001434994738352626, "loss": 6.2414, "step": 90 }, { "epoch": 0.003188877196339169, "grad_norm": 0.7059349417686462, "learning_rate": 0.00015944385981695843, "loss": 6.1057, "step": 100 }, { "epoch": 0.003507764915973086, "grad_norm": 0.8364503383636475, "learning_rate": 0.00017538824579865428, "loss": 5.9646, "step": 110 }, { "epoch": 0.0038266526356070026, "grad_norm": 0.9244943857192993, "learning_rate": 0.0001913326317803501, "loss": 5.8405, "step": 120 }, { "epoch": 0.00414554035524092, "grad_norm": 0.7110545039176941, "learning_rate": 0.00020727701776204597, "loss": 5.7436, "step": 130 }, { "epoch": 0.004464428074874837, "grad_norm": 0.7149806618690491, "learning_rate": 0.00022322140374374182, "loss": 5.606, "step": 140 }, { "epoch": 0.004783315794508754, "grad_norm": 0.8666573166847229, "learning_rate": 0.00023916578972543764, "loss": 5.4951, "step": 150 }, { "epoch": 0.005102203514142671, "grad_norm": 0.8655577898025513, "learning_rate": 0.0002551101757071335, "loss": 5.3836, "step": 160 }, { "epoch": 0.005421091233776587, "grad_norm": 0.9628815650939941, "learning_rate": 0.00027105456168882936, "loss": 5.308, "step": 170 }, { "epoch": 0.005739978953410504, "grad_norm": 0.9321894645690918, "learning_rate": 0.0002869989476705252, "loss": 5.194, "step": 180 }, { "epoch": 0.006058866673044421, "grad_norm": 1.1412914991378784, "learning_rate": 0.00030294333365222105, "loss": 5.1022, "step": 190 }, { "epoch": 0.006377754392678338, "grad_norm": 0.960950493812561, "learning_rate": 0.00031888771963391687, "loss": 4.9813, "step": 200 }, { "epoch": 0.006696642112312255, "grad_norm": 0.8928449749946594, "learning_rate": 0.0003348321056156127, "loss": 4.8914, "step": 210 }, { "epoch": 0.007015529831946172, "grad_norm": 0.876811146736145, "learning_rate": 0.00035077649159730856, "loss": 4.7856, "step": 220 }, { "epoch": 0.007334417551580088, "grad_norm": 1.1284464597702026, "learning_rate": 0.00036672087757900443, "loss": 4.7037, "step": 230 }, { "epoch": 0.007653305271214005, "grad_norm": 0.9718582630157471, "learning_rate": 0.0003826652635607002, "loss": 4.6136, "step": 240 }, { "epoch": 0.007972192990847922, "grad_norm": 1.113908052444458, "learning_rate": 0.00039860964954239607, "loss": 4.5741, "step": 250 }, { "epoch": 0.00829108071048184, "grad_norm": 1.2405577898025513, "learning_rate": 0.00041455403552409194, "loss": 4.4705, "step": 260 }, { "epoch": 0.008609968430115756, "grad_norm": 0.8793233633041382, "learning_rate": 0.0004304984215057878, "loss": 4.4333, "step": 270 }, { "epoch": 0.008928856149749673, "grad_norm": 0.8734621405601501, "learning_rate": 0.00044644280748748364, "loss": 4.3445, "step": 280 }, { "epoch": 0.00924774386938359, "grad_norm": 0.9244428277015686, "learning_rate": 0.0004623871934691794, "loss": 4.3066, "step": 290 }, { "epoch": 0.009566631589017507, "grad_norm": 0.8140283226966858, "learning_rate": 0.0004783315794508753, "loss": 4.2334, "step": 300 }, { "epoch": 0.009885519308651424, "grad_norm": 1.2293978929519653, "learning_rate": 0.0004942759654325711, "loss": 4.1764, "step": 310 }, { "epoch": 0.010204407028285341, "grad_norm": 1.0468558073043823, "learning_rate": 0.0005, "loss": 4.1287, "step": 320 }, { "epoch": 0.010523294747919257, "grad_norm": 1.0311610698699951, "learning_rate": 0.0005, "loss": 4.0477, "step": 330 }, { "epoch": 0.010842182467553174, "grad_norm": 0.8697277307510376, "learning_rate": 0.0005, "loss": 4.0209, "step": 340 }, { "epoch": 0.01116107018718709, "grad_norm": 0.8703891634941101, "learning_rate": 0.0005, "loss": 3.9472, "step": 350 }, { "epoch": 0.011479957906821008, "grad_norm": 0.8960039019584656, "learning_rate": 0.0005, "loss": 3.9306, "step": 360 }, { "epoch": 0.011798845626454925, "grad_norm": 0.8864967823028564, "learning_rate": 0.0005, "loss": 3.8554, "step": 370 }, { "epoch": 0.012117733346088842, "grad_norm": 0.9100608825683594, "learning_rate": 0.0005, "loss": 3.8203, "step": 380 }, { "epoch": 0.012436621065722759, "grad_norm": 0.9239365458488464, "learning_rate": 0.0005, "loss": 3.7934, "step": 390 }, { "epoch": 0.012755508785356676, "grad_norm": 1.0264390707015991, "learning_rate": 0.0005, "loss": 3.7561, "step": 400 }, { "epoch": 0.013074396504990593, "grad_norm": 0.9991441369056702, "learning_rate": 0.0005, "loss": 3.704, "step": 410 }, { "epoch": 0.01339328422462451, "grad_norm": 1.0525001287460327, "learning_rate": 0.0005, "loss": 3.6787, "step": 420 }, { "epoch": 0.013712171944258427, "grad_norm": 0.9851738214492798, "learning_rate": 0.0005, "loss": 3.6323, "step": 430 }, { "epoch": 0.014031059663892344, "grad_norm": 1.1018930673599243, "learning_rate": 0.0005, "loss": 3.5947, "step": 440 }, { "epoch": 0.014349947383526261, "grad_norm": 0.9886248111724854, "learning_rate": 0.0005, "loss": 3.5393, "step": 450 }, { "epoch": 0.014668835103160176, "grad_norm": 0.9339925050735474, "learning_rate": 0.0005, "loss": 3.5243, "step": 460 }, { "epoch": 0.014987722822794093, "grad_norm": 0.9865897297859192, "learning_rate": 0.0005, "loss": 3.5089, "step": 470 }, { "epoch": 0.01530661054242801, "grad_norm": 1.0084065198898315, "learning_rate": 0.0005, "loss": 3.4716, "step": 480 }, { "epoch": 0.01562549826206193, "grad_norm": 1.1506084203720093, "learning_rate": 0.0005, "loss": 3.4007, "step": 490 }, { "epoch": 0.015944385981695845, "grad_norm": 1.1100022792816162, "learning_rate": 0.0005, "loss": 3.4015, "step": 500 }, { "epoch": 0.016263273701329763, "grad_norm": 1.1417492628097534, "learning_rate": 0.0005, "loss": 3.3523, "step": 510 }, { "epoch": 0.01658216142096368, "grad_norm": 1.2564970254898071, "learning_rate": 0.0005, "loss": 3.3233, "step": 520 }, { "epoch": 0.016901049140597594, "grad_norm": 1.175044298171997, "learning_rate": 0.0005, "loss": 3.3223, "step": 530 }, { "epoch": 0.017219936860231513, "grad_norm": 1.1329243183135986, "learning_rate": 0.0005, "loss": 3.2792, "step": 540 }, { "epoch": 0.017538824579865428, "grad_norm": 0.9119124412536621, "learning_rate": 0.0005, "loss": 3.2103, "step": 550 }, { "epoch": 0.017857712299499347, "grad_norm": 1.143915057182312, "learning_rate": 0.0005, "loss": 3.2056, "step": 560 }, { "epoch": 0.018176600019133262, "grad_norm": 1.0789998769760132, "learning_rate": 0.0005, "loss": 3.18, "step": 570 }, { "epoch": 0.01849548773876718, "grad_norm": 1.4629336595535278, "learning_rate": 0.0005, "loss": 3.1569, "step": 580 }, { "epoch": 0.018814375458401096, "grad_norm": 1.2161282300949097, "learning_rate": 0.0005, "loss": 3.1021, "step": 590 }, { "epoch": 0.019133263178035015, "grad_norm": 1.2440675497055054, "learning_rate": 0.0005, "loss": 3.1033, "step": 600 }, { "epoch": 0.01945215089766893, "grad_norm": 1.496854543685913, "learning_rate": 0.0005, "loss": 3.0736, "step": 610 }, { "epoch": 0.01977103861730285, "grad_norm": 1.3418179750442505, "learning_rate": 0.0005, "loss": 3.0, "step": 620 }, { "epoch": 0.020089926336936764, "grad_norm": 1.0596439838409424, "learning_rate": 0.0005, "loss": 2.9936, "step": 630 }, { "epoch": 0.020408814056570683, "grad_norm": 1.0319935083389282, "learning_rate": 0.0005, "loss": 2.9885, "step": 640 }, { "epoch": 0.0207277017762046, "grad_norm": 1.1878321170806885, "learning_rate": 0.0005, "loss": 2.9625, "step": 650 }, { "epoch": 0.021046589495838514, "grad_norm": 1.1297259330749512, "learning_rate": 0.0005, "loss": 2.9487, "step": 660 }, { "epoch": 0.021365477215472432, "grad_norm": 1.0326006412506104, "learning_rate": 0.0005, "loss": 2.9224, "step": 670 }, { "epoch": 0.021684364935106348, "grad_norm": 0.9553630352020264, "learning_rate": 0.0005, "loss": 2.8889, "step": 680 }, { "epoch": 0.022003252654740266, "grad_norm": 1.092453956604004, "learning_rate": 0.0005, "loss": 2.8851, "step": 690 }, { "epoch": 0.02232214037437418, "grad_norm": 1.2691729068756104, "learning_rate": 0.0005, "loss": 2.8428, "step": 700 }, { "epoch": 0.0226410280940081, "grad_norm": 1.1289976835250854, "learning_rate": 0.0005, "loss": 2.8537, "step": 710 }, { "epoch": 0.022959915813642016, "grad_norm": 1.206107258796692, "learning_rate": 0.0005, "loss": 2.7994, "step": 720 }, { "epoch": 0.023278803533275935, "grad_norm": 1.0612897872924805, "learning_rate": 0.0005, "loss": 2.7953, "step": 730 }, { "epoch": 0.02359769125290985, "grad_norm": 1.0839524269104004, "learning_rate": 0.0005, "loss": 2.7583, "step": 740 }, { "epoch": 0.02391657897254377, "grad_norm": 0.9679558277130127, "learning_rate": 0.0005, "loss": 2.7797, "step": 750 }, { "epoch": 0.024235466692177684, "grad_norm": 1.139195442199707, "learning_rate": 0.0005, "loss": 2.7543, "step": 760 }, { "epoch": 0.024554354411811603, "grad_norm": 1.2014336585998535, "learning_rate": 0.0005, "loss": 2.7655, "step": 770 }, { "epoch": 0.024873242131445518, "grad_norm": 1.0808125734329224, "learning_rate": 0.0005, "loss": 2.6973, "step": 780 }, { "epoch": 0.025192129851079433, "grad_norm": 1.2096153497695923, "learning_rate": 0.0005, "loss": 2.7071, "step": 790 }, { "epoch": 0.025511017570713352, "grad_norm": 1.1014368534088135, "learning_rate": 0.0005, "loss": 2.7005, "step": 800 }, { "epoch": 0.025829905290347267, "grad_norm": 1.1239562034606934, "learning_rate": 0.0005, "loss": 2.6559, "step": 810 }, { "epoch": 0.026148793009981186, "grad_norm": 1.0517128705978394, "learning_rate": 0.0005, "loss": 2.6809, "step": 820 }, { "epoch": 0.0264676807296151, "grad_norm": 1.086920142173767, "learning_rate": 0.0005, "loss": 2.6294, "step": 830 }, { "epoch": 0.02678656844924902, "grad_norm": 1.189956784248352, "learning_rate": 0.0005, "loss": 2.6253, "step": 840 }, { "epoch": 0.027105456168882935, "grad_norm": 1.3793237209320068, "learning_rate": 0.0005, "loss": 2.6066, "step": 850 }, { "epoch": 0.027424343888516854, "grad_norm": 1.0952047109603882, "learning_rate": 0.0005, "loss": 2.6004, "step": 860 }, { "epoch": 0.02774323160815077, "grad_norm": 1.0428546667099, "learning_rate": 0.0005, "loss": 2.5666, "step": 870 }, { "epoch": 0.028062119327784688, "grad_norm": 1.1217527389526367, "learning_rate": 0.0005, "loss": 2.5656, "step": 880 }, { "epoch": 0.028381007047418604, "grad_norm": 1.186268925666809, "learning_rate": 0.0005, "loss": 2.5476, "step": 890 }, { "epoch": 0.028699894767052522, "grad_norm": 1.3410747051239014, "learning_rate": 0.0005, "loss": 2.5374, "step": 900 }, { "epoch": 0.029018782486686438, "grad_norm": 1.0802406072616577, "learning_rate": 0.0005, "loss": 2.4922, "step": 910 }, { "epoch": 0.029337670206320353, "grad_norm": 1.1122218370437622, "learning_rate": 0.0005, "loss": 2.5051, "step": 920 }, { "epoch": 0.02965655792595427, "grad_norm": 1.207636833190918, "learning_rate": 0.0005, "loss": 2.5154, "step": 930 }, { "epoch": 0.029975445645588187, "grad_norm": 1.0092180967330933, "learning_rate": 0.0005, "loss": 2.5097, "step": 940 }, { "epoch": 0.030294333365222106, "grad_norm": 0.9906138777732849, "learning_rate": 0.0005, "loss": 2.4812, "step": 950 }, { "epoch": 0.03061322108485602, "grad_norm": 1.0704057216644287, "learning_rate": 0.0005, "loss": 2.475, "step": 960 }, { "epoch": 0.03093210880448994, "grad_norm": 1.0282485485076904, "learning_rate": 0.0005, "loss": 2.4569, "step": 970 }, { "epoch": 0.03125099652412386, "grad_norm": 1.147625207901001, "learning_rate": 0.0005, "loss": 2.4775, "step": 980 }, { "epoch": 0.03156988424375777, "grad_norm": 1.1026866436004639, "learning_rate": 0.0005, "loss": 2.4594, "step": 990 }, { "epoch": 0.03188877196339169, "grad_norm": 1.0910698175430298, "learning_rate": 0.0005, "loss": 2.4308, "step": 1000 }, { "epoch": 0.03220765968302561, "grad_norm": 0.9975622296333313, "learning_rate": 0.0005, "loss": 2.422, "step": 1010 }, { "epoch": 0.03252654740265953, "grad_norm": 1.1360799074172974, "learning_rate": 0.0005, "loss": 2.4286, "step": 1020 }, { "epoch": 0.03284543512229344, "grad_norm": 0.971255362033844, "learning_rate": 0.0005, "loss": 2.4026, "step": 1030 }, { "epoch": 0.03316432284192736, "grad_norm": 0.9612915515899658, "learning_rate": 0.0005, "loss": 2.3425, "step": 1040 }, { "epoch": 0.033483210561561276, "grad_norm": 0.983697772026062, "learning_rate": 0.0005, "loss": 2.3794, "step": 1050 }, { "epoch": 0.03380209828119519, "grad_norm": 1.234744668006897, "learning_rate": 0.0005, "loss": 2.3309, "step": 1060 }, { "epoch": 0.03412098600082911, "grad_norm": 1.040803074836731, "learning_rate": 0.0005, "loss": 2.3611, "step": 1070 }, { "epoch": 0.034439873720463025, "grad_norm": 0.9408783912658691, "learning_rate": 0.0005, "loss": 2.352, "step": 1080 }, { "epoch": 0.034758761440096944, "grad_norm": 0.9767993688583374, "learning_rate": 0.0005, "loss": 2.3517, "step": 1090 }, { "epoch": 0.035077649159730856, "grad_norm": 1.0135048627853394, "learning_rate": 0.0005, "loss": 2.3566, "step": 1100 }, { "epoch": 0.035396536879364775, "grad_norm": 1.091168761253357, "learning_rate": 0.0005, "loss": 2.3013, "step": 1110 }, { "epoch": 0.035715424598998693, "grad_norm": 1.0776761770248413, "learning_rate": 0.0005, "loss": 2.3057, "step": 1120 }, { "epoch": 0.03603431231863261, "grad_norm": 1.0987141132354736, "learning_rate": 0.0005, "loss": 2.3045, "step": 1130 }, { "epoch": 0.036353200038266524, "grad_norm": 1.1936005353927612, "learning_rate": 0.0005, "loss": 2.3158, "step": 1140 }, { "epoch": 0.03667208775790044, "grad_norm": 0.9826390147209167, "learning_rate": 0.0005, "loss": 2.2981, "step": 1150 }, { "epoch": 0.03699097547753436, "grad_norm": 1.0962214469909668, "learning_rate": 0.0005, "loss": 2.2778, "step": 1160 }, { "epoch": 0.03730986319716828, "grad_norm": 1.0983978509902954, "learning_rate": 0.0005, "loss": 2.2461, "step": 1170 }, { "epoch": 0.03762875091680219, "grad_norm": 1.0521897077560425, "learning_rate": 0.0005, "loss": 2.2119, "step": 1180 }, { "epoch": 0.03794763863643611, "grad_norm": 0.9314139485359192, "learning_rate": 0.0005, "loss": 2.2284, "step": 1190 }, { "epoch": 0.03826652635607003, "grad_norm": 0.9761852622032166, "learning_rate": 0.0005, "loss": 2.2425, "step": 1200 }, { "epoch": 0.03858541407570394, "grad_norm": 0.9609031081199646, "learning_rate": 0.0005, "loss": 2.2046, "step": 1210 }, { "epoch": 0.03890430179533786, "grad_norm": 1.3822377920150757, "learning_rate": 0.0005, "loss": 2.2251, "step": 1220 }, { "epoch": 0.03922318951497178, "grad_norm": 0.9732682108879089, "learning_rate": 0.0005, "loss": 2.2095, "step": 1230 }, { "epoch": 0.0395420772346057, "grad_norm": 1.0873984098434448, "learning_rate": 0.0005, "loss": 2.2142, "step": 1240 }, { "epoch": 0.03986096495423961, "grad_norm": 1.0694262981414795, "learning_rate": 0.0005, "loss": 2.179, "step": 1250 }, { "epoch": 0.04017985267387353, "grad_norm": 1.1795161962509155, "learning_rate": 0.0005, "loss": 2.1788, "step": 1260 }, { "epoch": 0.04049874039350745, "grad_norm": 0.96063631772995, "learning_rate": 0.0005, "loss": 2.1825, "step": 1270 }, { "epoch": 0.040817628113141366, "grad_norm": 0.9153825044631958, "learning_rate": 0.0005, "loss": 2.1649, "step": 1280 }, { "epoch": 0.04113651583277528, "grad_norm": 1.3356082439422607, "learning_rate": 0.0005, "loss": 2.1648, "step": 1290 }, { "epoch": 0.0414554035524092, "grad_norm": 0.9260676503181458, "learning_rate": 0.0005, "loss": 2.1341, "step": 1300 }, { "epoch": 0.041774291272043115, "grad_norm": 0.9380478262901306, "learning_rate": 0.0005, "loss": 2.117, "step": 1310 }, { "epoch": 0.04209317899167703, "grad_norm": 0.9406986832618713, "learning_rate": 0.0005, "loss": 2.1629, "step": 1320 }, { "epoch": 0.042412066711310946, "grad_norm": 1.151871681213379, "learning_rate": 0.0005, "loss": 2.1444, "step": 1330 }, { "epoch": 0.042730954430944865, "grad_norm": 1.0236380100250244, "learning_rate": 0.0005, "loss": 2.1186, "step": 1340 }, { "epoch": 0.04304984215057878, "grad_norm": 0.9131529331207275, "learning_rate": 0.0005, "loss": 2.1427, "step": 1350 }, { "epoch": 0.043368729870212695, "grad_norm": 0.9835687279701233, "learning_rate": 0.0005, "loss": 2.1181, "step": 1360 }, { "epoch": 0.043687617589846614, "grad_norm": 0.9309093356132507, "learning_rate": 0.0005, "loss": 2.1202, "step": 1370 }, { "epoch": 0.04400650530948053, "grad_norm": 0.9279321432113647, "learning_rate": 0.0005, "loss": 2.1264, "step": 1380 }, { "epoch": 0.04432539302911445, "grad_norm": 0.8885673880577087, "learning_rate": 0.0005, "loss": 2.0761, "step": 1390 }, { "epoch": 0.04464428074874836, "grad_norm": 0.9342918992042542, "learning_rate": 0.0005, "loss": 2.1036, "step": 1400 }, { "epoch": 0.04496316846838228, "grad_norm": 0.9131391644477844, "learning_rate": 0.0005, "loss": 2.104, "step": 1410 }, { "epoch": 0.0452820561880162, "grad_norm": 0.974519670009613, "learning_rate": 0.0005, "loss": 2.0614, "step": 1420 }, { "epoch": 0.04560094390765012, "grad_norm": 0.9178261756896973, "learning_rate": 0.0005, "loss": 2.0875, "step": 1430 }, { "epoch": 0.04591983162728403, "grad_norm": 0.9592669606208801, "learning_rate": 0.0005, "loss": 2.0546, "step": 1440 }, { "epoch": 0.04623871934691795, "grad_norm": 0.9199544191360474, "learning_rate": 0.0005, "loss": 2.1031, "step": 1450 }, { "epoch": 0.04655760706655187, "grad_norm": 1.066640019416809, "learning_rate": 0.0005, "loss": 2.0426, "step": 1460 }, { "epoch": 0.04687649478618578, "grad_norm": 0.919484555721283, "learning_rate": 0.0005, "loss": 2.0447, "step": 1470 }, { "epoch": 0.0471953825058197, "grad_norm": 0.9945185780525208, "learning_rate": 0.0005, "loss": 2.0507, "step": 1480 }, { "epoch": 0.04751427022545362, "grad_norm": 0.9459637403488159, "learning_rate": 0.0005, "loss": 2.0422, "step": 1490 }, { "epoch": 0.04783315794508754, "grad_norm": 0.9146264791488647, "learning_rate": 0.0005, "loss": 2.0266, "step": 1500 }, { "epoch": 0.04815204566472145, "grad_norm": 0.912291407585144, "learning_rate": 0.0005, "loss": 2.0012, "step": 1510 }, { "epoch": 0.04847093338435537, "grad_norm": 0.9094820618629456, "learning_rate": 0.0005, "loss": 2.0285, "step": 1520 }, { "epoch": 0.048789821103989287, "grad_norm": 0.9189174771308899, "learning_rate": 0.0005, "loss": 2.0342, "step": 1530 }, { "epoch": 0.049108708823623205, "grad_norm": 0.985211193561554, "learning_rate": 0.0005, "loss": 1.9833, "step": 1540 }, { "epoch": 0.04942759654325712, "grad_norm": 0.9880468845367432, "learning_rate": 0.0005, "loss": 2.0074, "step": 1550 }, { "epoch": 0.049746484262891036, "grad_norm": 1.0333455801010132, "learning_rate": 0.0005, "loss": 1.9974, "step": 1560 }, { "epoch": 0.050065371982524955, "grad_norm": 1.1017787456512451, "learning_rate": 0.0005, "loss": 2.001, "step": 1570 }, { "epoch": 0.050384259702158866, "grad_norm": 0.8883527517318726, "learning_rate": 0.0005, "loss": 1.968, "step": 1580 }, { "epoch": 0.050703147421792785, "grad_norm": 0.8553420901298523, "learning_rate": 0.0005, "loss": 1.9722, "step": 1590 }, { "epoch": 0.051022035141426704, "grad_norm": 1.0707831382751465, "learning_rate": 0.0005, "loss": 1.9713, "step": 1600 }, { "epoch": 0.05134092286106062, "grad_norm": 1.1193410158157349, "learning_rate": 0.0005, "loss": 1.9747, "step": 1610 }, { "epoch": 0.051659810580694535, "grad_norm": 1.1910367012023926, "learning_rate": 0.0005, "loss": 1.9869, "step": 1620 }, { "epoch": 0.05197869830032845, "grad_norm": 1.4951293468475342, "learning_rate": 0.0005, "loss": 1.9484, "step": 1630 }, { "epoch": 0.05229758601996237, "grad_norm": 0.9338844418525696, "learning_rate": 0.0005, "loss": 1.9554, "step": 1640 }, { "epoch": 0.05261647373959629, "grad_norm": 1.0976824760437012, "learning_rate": 0.0005, "loss": 1.9719, "step": 1650 }, { "epoch": 0.0529353614592302, "grad_norm": 1.0786077976226807, "learning_rate": 0.0005, "loss": 1.9556, "step": 1660 }, { "epoch": 0.05325424917886412, "grad_norm": 0.9365764260292053, "learning_rate": 0.0005, "loss": 1.9511, "step": 1670 }, { "epoch": 0.05357313689849804, "grad_norm": 0.8856982588768005, "learning_rate": 0.0005, "loss": 1.903, "step": 1680 }, { "epoch": 0.05389202461813196, "grad_norm": 0.8451250791549683, "learning_rate": 0.0005, "loss": 1.9223, "step": 1690 }, { "epoch": 0.05421091233776587, "grad_norm": 0.8898938894271851, "learning_rate": 0.0005, "loss": 1.9358, "step": 1700 }, { "epoch": 0.05452980005739979, "grad_norm": 0.9035238027572632, "learning_rate": 0.0005, "loss": 1.9235, "step": 1710 }, { "epoch": 0.05484868777703371, "grad_norm": 0.8509615659713745, "learning_rate": 0.0005, "loss": 1.9259, "step": 1720 }, { "epoch": 0.05516757549666762, "grad_norm": 0.8624535799026489, "learning_rate": 0.0005, "loss": 1.9197, "step": 1730 }, { "epoch": 0.05548646321630154, "grad_norm": 1.032548427581787, "learning_rate": 0.0005, "loss": 1.9275, "step": 1740 }, { "epoch": 0.05580535093593546, "grad_norm": 0.8983070254325867, "learning_rate": 0.0005, "loss": 1.9079, "step": 1750 }, { "epoch": 0.056124238655569376, "grad_norm": 0.8418633341789246, "learning_rate": 0.0005, "loss": 1.9123, "step": 1760 }, { "epoch": 0.05644312637520329, "grad_norm": 0.9653736352920532, "learning_rate": 0.0005, "loss": 1.8994, "step": 1770 }, { "epoch": 0.05676201409483721, "grad_norm": 0.9427167177200317, "learning_rate": 0.0005, "loss": 1.9248, "step": 1780 }, { "epoch": 0.057080901814471126, "grad_norm": 1.0386487245559692, "learning_rate": 0.0005, "loss": 1.895, "step": 1790 }, { "epoch": 0.057399789534105045, "grad_norm": 1.0085606575012207, "learning_rate": 0.0005, "loss": 1.8912, "step": 1800 }, { "epoch": 0.057718677253738956, "grad_norm": 1.0900158882141113, "learning_rate": 0.0005, "loss": 1.9122, "step": 1810 }, { "epoch": 0.058037564973372875, "grad_norm": 0.8093986511230469, "learning_rate": 0.0005, "loss": 1.8888, "step": 1820 }, { "epoch": 0.058356452693006794, "grad_norm": 1.0416687726974487, "learning_rate": 0.0005, "loss": 1.8784, "step": 1830 }, { "epoch": 0.058675340412640706, "grad_norm": 0.825915515422821, "learning_rate": 0.0005, "loss": 1.8781, "step": 1840 }, { "epoch": 0.058994228132274625, "grad_norm": 0.896038293838501, "learning_rate": 0.0005, "loss": 1.8479, "step": 1850 }, { "epoch": 0.05931311585190854, "grad_norm": 0.9250373840332031, "learning_rate": 0.0005, "loss": 1.8707, "step": 1860 }, { "epoch": 0.05963200357154246, "grad_norm": 0.8338049054145813, "learning_rate": 0.0005, "loss": 1.8715, "step": 1870 }, { "epoch": 0.059950891291176374, "grad_norm": 0.8659617900848389, "learning_rate": 0.0005, "loss": 1.8735, "step": 1880 }, { "epoch": 0.06026977901081029, "grad_norm": 1.0219095945358276, "learning_rate": 0.0005, "loss": 1.8642, "step": 1890 }, { "epoch": 0.06058866673044421, "grad_norm": 0.8440826535224915, "learning_rate": 0.0005, "loss": 1.8672, "step": 1900 }, { "epoch": 0.06090755445007813, "grad_norm": 0.8432860970497131, "learning_rate": 0.0005, "loss": 1.8538, "step": 1910 }, { "epoch": 0.06122644216971204, "grad_norm": 0.8388710021972656, "learning_rate": 0.0005, "loss": 1.8308, "step": 1920 }, { "epoch": 0.06154532988934596, "grad_norm": 0.9477251768112183, "learning_rate": 0.0005, "loss": 1.8575, "step": 1930 }, { "epoch": 0.06186421760897988, "grad_norm": 0.8459080457687378, "learning_rate": 0.0005, "loss": 1.8327, "step": 1940 }, { "epoch": 0.0621831053286138, "grad_norm": 0.8614171743392944, "learning_rate": 0.0005, "loss": 1.8447, "step": 1950 }, { "epoch": 0.06250199304824772, "grad_norm": 0.8767685294151306, "learning_rate": 0.0005, "loss": 1.8365, "step": 1960 }, { "epoch": 0.06282088076788163, "grad_norm": 0.856082558631897, "learning_rate": 0.0005, "loss": 1.847, "step": 1970 }, { "epoch": 0.06313976848751554, "grad_norm": 0.8638346791267395, "learning_rate": 0.0005, "loss": 1.8228, "step": 1980 }, { "epoch": 0.06345865620714947, "grad_norm": 0.8353111743927002, "learning_rate": 0.0005, "loss": 1.8124, "step": 1990 }, { "epoch": 0.06377754392678338, "grad_norm": 0.8533226847648621, "learning_rate": 0.0005, "loss": 1.8204, "step": 2000 }, { "epoch": 0.06409643164641729, "grad_norm": 0.7952243089675903, "learning_rate": 0.0005, "loss": 1.8185, "step": 2010 }, { "epoch": 0.06441531936605122, "grad_norm": 0.8410241007804871, "learning_rate": 0.0005, "loss": 1.8369, "step": 2020 }, { "epoch": 0.06473420708568513, "grad_norm": 1.3236920833587646, "learning_rate": 0.0005, "loss": 1.8089, "step": 2030 }, { "epoch": 0.06505309480531905, "grad_norm": 0.9753334522247314, "learning_rate": 0.0005, "loss": 1.8173, "step": 2040 }, { "epoch": 0.06537198252495297, "grad_norm": 0.811911940574646, "learning_rate": 0.0005, "loss": 1.7852, "step": 2050 }, { "epoch": 0.06569087024458688, "grad_norm": 0.8044120669364929, "learning_rate": 0.0005, "loss": 1.7763, "step": 2060 }, { "epoch": 0.0660097579642208, "grad_norm": 0.8142799735069275, "learning_rate": 0.0005, "loss": 1.8074, "step": 2070 }, { "epoch": 0.06632864568385471, "grad_norm": 0.8504486680030823, "learning_rate": 0.0005, "loss": 1.7901, "step": 2080 }, { "epoch": 0.06664753340348863, "grad_norm": 0.9170723557472229, "learning_rate": 0.0005, "loss": 1.8103, "step": 2090 }, { "epoch": 0.06696642112312255, "grad_norm": 0.8451781272888184, "learning_rate": 0.0005, "loss": 1.8068, "step": 2100 }, { "epoch": 0.06728530884275646, "grad_norm": 0.8535498976707458, "learning_rate": 0.0005, "loss": 1.7775, "step": 2110 }, { "epoch": 0.06760419656239038, "grad_norm": 0.852803111076355, "learning_rate": 0.0005, "loss": 1.7843, "step": 2120 }, { "epoch": 0.0679230842820243, "grad_norm": 0.8039234280586243, "learning_rate": 0.0005, "loss": 1.785, "step": 2130 }, { "epoch": 0.06824197200165821, "grad_norm": 0.8931598663330078, "learning_rate": 0.0005, "loss": 1.7751, "step": 2140 }, { "epoch": 0.06856085972129214, "grad_norm": 0.8621709942817688, "learning_rate": 0.0005, "loss": 1.7848, "step": 2150 }, { "epoch": 0.06887974744092605, "grad_norm": 0.8556991815567017, "learning_rate": 0.0005, "loss": 1.7845, "step": 2160 }, { "epoch": 0.06919863516055996, "grad_norm": 0.8265063166618347, "learning_rate": 0.0005, "loss": 1.7709, "step": 2170 }, { "epoch": 0.06951752288019389, "grad_norm": 0.9165987372398376, "learning_rate": 0.0005, "loss": 1.7663, "step": 2180 }, { "epoch": 0.0698364105998278, "grad_norm": 0.8655464053153992, "learning_rate": 0.0005, "loss": 1.7503, "step": 2190 }, { "epoch": 0.07015529831946171, "grad_norm": 0.8358378410339355, "learning_rate": 0.0005, "loss": 1.767, "step": 2200 }, { "epoch": 0.07047418603909564, "grad_norm": 0.7970449328422546, "learning_rate": 0.0005, "loss": 1.7433, "step": 2210 }, { "epoch": 0.07079307375872955, "grad_norm": 0.8039758205413818, "learning_rate": 0.0005, "loss": 1.7564, "step": 2220 }, { "epoch": 0.07111196147836348, "grad_norm": 0.7957394123077393, "learning_rate": 0.0005, "loss": 1.7397, "step": 2230 }, { "epoch": 0.07143084919799739, "grad_norm": 0.8606018424034119, "learning_rate": 0.0005, "loss": 1.7447, "step": 2240 }, { "epoch": 0.0717497369176313, "grad_norm": 0.7819696664810181, "learning_rate": 0.0005, "loss": 1.7233, "step": 2250 }, { "epoch": 0.07206862463726522, "grad_norm": 0.775804877281189, "learning_rate": 0.0005, "loss": 1.7132, "step": 2260 }, { "epoch": 0.07238751235689914, "grad_norm": 0.9183884859085083, "learning_rate": 0.0005, "loss": 1.7403, "step": 2270 }, { "epoch": 0.07270640007653305, "grad_norm": 0.8144393563270569, "learning_rate": 0.0005, "loss": 1.7333, "step": 2280 }, { "epoch": 0.07302528779616697, "grad_norm": 0.8179895281791687, "learning_rate": 0.0005, "loss": 1.714, "step": 2290 }, { "epoch": 0.07334417551580089, "grad_norm": 0.7930556535720825, "learning_rate": 0.0005, "loss": 1.7198, "step": 2300 }, { "epoch": 0.0736630632354348, "grad_norm": 0.7923799157142639, "learning_rate": 0.0005, "loss": 1.7112, "step": 2310 }, { "epoch": 0.07398195095506872, "grad_norm": 0.8188552856445312, "learning_rate": 0.0005, "loss": 1.7261, "step": 2320 }, { "epoch": 0.07430083867470264, "grad_norm": 0.7819402813911438, "learning_rate": 0.0005, "loss": 1.7271, "step": 2330 }, { "epoch": 0.07461972639433656, "grad_norm": 0.7663988471031189, "learning_rate": 0.0005, "loss": 1.7076, "step": 2340 }, { "epoch": 0.07493861411397047, "grad_norm": 0.7735258340835571, "learning_rate": 0.0005, "loss": 1.7281, "step": 2350 }, { "epoch": 0.07525750183360438, "grad_norm": 0.7962582111358643, "learning_rate": 0.0005, "loss": 1.717, "step": 2360 }, { "epoch": 0.07557638955323831, "grad_norm": 0.7835531830787659, "learning_rate": 0.0005, "loss": 1.6795, "step": 2370 }, { "epoch": 0.07589527727287222, "grad_norm": 0.7508684396743774, "learning_rate": 0.0005, "loss": 1.6853, "step": 2380 }, { "epoch": 0.07621416499250613, "grad_norm": 0.9591211080551147, "learning_rate": 0.0005, "loss": 1.722, "step": 2390 }, { "epoch": 0.07653305271214006, "grad_norm": 0.7573085427284241, "learning_rate": 0.0005, "loss": 1.7091, "step": 2400 }, { "epoch": 0.07685194043177397, "grad_norm": 0.791395902633667, "learning_rate": 0.0005, "loss": 1.7169, "step": 2410 }, { "epoch": 0.07717082815140788, "grad_norm": 0.7925633788108826, "learning_rate": 0.0005, "loss": 1.6926, "step": 2420 }, { "epoch": 0.07748971587104181, "grad_norm": 0.7916731238365173, "learning_rate": 0.0005, "loss": 1.6913, "step": 2430 }, { "epoch": 0.07780860359067572, "grad_norm": 0.7813157439231873, "learning_rate": 0.0005, "loss": 1.6846, "step": 2440 }, { "epoch": 0.07812749131030965, "grad_norm": 0.9070016145706177, "learning_rate": 0.0005, "loss": 1.6771, "step": 2450 }, { "epoch": 0.07844637902994356, "grad_norm": 0.8243926763534546, "learning_rate": 0.0005, "loss": 1.6888, "step": 2460 }, { "epoch": 0.07876526674957747, "grad_norm": 0.8692016005516052, "learning_rate": 0.0005, "loss": 1.6894, "step": 2470 }, { "epoch": 0.0790841544692114, "grad_norm": 0.8020526170730591, "learning_rate": 0.0005, "loss": 1.6661, "step": 2480 }, { "epoch": 0.07940304218884531, "grad_norm": 0.739294171333313, "learning_rate": 0.0005, "loss": 1.7, "step": 2490 }, { "epoch": 0.07972192990847922, "grad_norm": 0.7623685598373413, "learning_rate": 0.0005, "loss": 1.6818, "step": 2500 }, { "epoch": 0.08004081762811315, "grad_norm": 0.7918365597724915, "learning_rate": 0.0005, "loss": 1.6575, "step": 2510 }, { "epoch": 0.08035970534774706, "grad_norm": 0.8381354212760925, "learning_rate": 0.0005, "loss": 1.6799, "step": 2520 }, { "epoch": 0.08067859306738097, "grad_norm": 0.7383261919021606, "learning_rate": 0.0005, "loss": 1.6723, "step": 2530 }, { "epoch": 0.0809974807870149, "grad_norm": 0.74249267578125, "learning_rate": 0.0005, "loss": 1.6751, "step": 2540 }, { "epoch": 0.0813163685066488, "grad_norm": 0.8358879685401917, "learning_rate": 0.0005, "loss": 1.6605, "step": 2550 }, { "epoch": 0.08163525622628273, "grad_norm": 0.8204383850097656, "learning_rate": 0.0005, "loss": 1.6672, "step": 2560 }, { "epoch": 0.08195414394591664, "grad_norm": 0.7568264007568359, "learning_rate": 0.0005, "loss": 1.6383, "step": 2570 }, { "epoch": 0.08227303166555056, "grad_norm": 0.7871072292327881, "learning_rate": 0.0005, "loss": 1.6717, "step": 2580 }, { "epoch": 0.08259191938518448, "grad_norm": 0.7703539133071899, "learning_rate": 0.0005, "loss": 1.6437, "step": 2590 }, { "epoch": 0.0829108071048184, "grad_norm": 0.7762648463249207, "learning_rate": 0.0005, "loss": 1.6584, "step": 2600 }, { "epoch": 0.0832296948244523, "grad_norm": 0.8607839345932007, "learning_rate": 0.0005, "loss": 1.6598, "step": 2610 }, { "epoch": 0.08354858254408623, "grad_norm": 0.7817122936248779, "learning_rate": 0.0005, "loss": 1.6381, "step": 2620 }, { "epoch": 0.08386747026372014, "grad_norm": 0.9235209226608276, "learning_rate": 0.0005, "loss": 1.6452, "step": 2630 }, { "epoch": 0.08418635798335405, "grad_norm": 0.7671082019805908, "learning_rate": 0.0005, "loss": 1.6419, "step": 2640 }, { "epoch": 0.08450524570298798, "grad_norm": 0.7085024118423462, "learning_rate": 0.0005, "loss": 1.6466, "step": 2650 }, { "epoch": 0.08482413342262189, "grad_norm": 0.7406449913978577, "learning_rate": 0.0005, "loss": 1.6507, "step": 2660 }, { "epoch": 0.08514302114225582, "grad_norm": 0.8121161460876465, "learning_rate": 0.0005, "loss": 1.6288, "step": 2670 }, { "epoch": 0.08546190886188973, "grad_norm": 0.766755998134613, "learning_rate": 0.0005, "loss": 1.6614, "step": 2680 }, { "epoch": 0.08578079658152364, "grad_norm": 0.8837841153144836, "learning_rate": 0.0005, "loss": 1.6366, "step": 2690 }, { "epoch": 0.08609968430115757, "grad_norm": 0.7676401734352112, "learning_rate": 0.0005, "loss": 1.6415, "step": 2700 }, { "epoch": 0.08641857202079148, "grad_norm": 0.8545228838920593, "learning_rate": 0.0005, "loss": 1.6338, "step": 2710 }, { "epoch": 0.08673745974042539, "grad_norm": 0.8082299828529358, "learning_rate": 0.0005, "loss": 1.6483, "step": 2720 }, { "epoch": 0.08705634746005932, "grad_norm": 0.802437961101532, "learning_rate": 0.0005, "loss": 1.6479, "step": 2730 }, { "epoch": 0.08737523517969323, "grad_norm": 0.8295302391052246, "learning_rate": 0.0005, "loss": 1.6678, "step": 2740 }, { "epoch": 0.08769412289932715, "grad_norm": 0.8102624416351318, "learning_rate": 0.0005, "loss": 1.6217, "step": 2750 }, { "epoch": 0.08801301061896107, "grad_norm": 0.7187849879264832, "learning_rate": 0.0005, "loss": 1.6188, "step": 2760 }, { "epoch": 0.08833189833859498, "grad_norm": 0.810950756072998, "learning_rate": 0.0005, "loss": 1.6191, "step": 2770 }, { "epoch": 0.0886507860582289, "grad_norm": 0.7218251824378967, "learning_rate": 0.0005, "loss": 1.6483, "step": 2780 }, { "epoch": 0.08896967377786281, "grad_norm": 0.7232185006141663, "learning_rate": 0.0005, "loss": 1.5982, "step": 2790 }, { "epoch": 0.08928856149749673, "grad_norm": 0.7348928451538086, "learning_rate": 0.0005, "loss": 1.6335, "step": 2800 }, { "epoch": 0.08960744921713065, "grad_norm": 0.793999195098877, "learning_rate": 0.0005, "loss": 1.5883, "step": 2810 }, { "epoch": 0.08992633693676456, "grad_norm": 0.7381916642189026, "learning_rate": 0.0005, "loss": 1.6178, "step": 2820 }, { "epoch": 0.09024522465639848, "grad_norm": 0.735526442527771, "learning_rate": 0.0005, "loss": 1.6145, "step": 2830 }, { "epoch": 0.0905641123760324, "grad_norm": 0.7207257747650146, "learning_rate": 0.0005, "loss": 1.6203, "step": 2840 }, { "epoch": 0.09088300009566631, "grad_norm": 0.7474569082260132, "learning_rate": 0.0005, "loss": 1.596, "step": 2850 }, { "epoch": 0.09120188781530024, "grad_norm": 0.7070997953414917, "learning_rate": 0.0005, "loss": 1.6022, "step": 2860 }, { "epoch": 0.09152077553493415, "grad_norm": 0.7811879515647888, "learning_rate": 0.0005, "loss": 1.6031, "step": 2870 }, { "epoch": 0.09183966325456806, "grad_norm": 0.7370976209640503, "learning_rate": 0.0005, "loss": 1.6109, "step": 2880 }, { "epoch": 0.09215855097420199, "grad_norm": 0.733187198638916, "learning_rate": 0.0005, "loss": 1.5982, "step": 2890 }, { "epoch": 0.0924774386938359, "grad_norm": 0.7401499152183533, "learning_rate": 0.0005, "loss": 1.6011, "step": 2900 }, { "epoch": 0.09279632641346981, "grad_norm": 0.7699506282806396, "learning_rate": 0.0005, "loss": 1.6043, "step": 2910 }, { "epoch": 0.09311521413310374, "grad_norm": 0.7591161727905273, "learning_rate": 0.0005, "loss": 1.6081, "step": 2920 }, { "epoch": 0.09343410185273765, "grad_norm": 0.7230544090270996, "learning_rate": 0.0005, "loss": 1.6115, "step": 2930 }, { "epoch": 0.09375298957237156, "grad_norm": 0.7355446219444275, "learning_rate": 0.0005, "loss": 1.6163, "step": 2940 }, { "epoch": 0.09407187729200549, "grad_norm": 0.7396198511123657, "learning_rate": 0.0005, "loss": 1.5915, "step": 2950 }, { "epoch": 0.0943907650116394, "grad_norm": 0.7075987458229065, "learning_rate": 0.0005, "loss": 1.5886, "step": 2960 }, { "epoch": 0.09470965273127332, "grad_norm": 0.7205303311347961, "learning_rate": 0.0005, "loss": 1.6006, "step": 2970 }, { "epoch": 0.09502854045090724, "grad_norm": 0.7048115730285645, "learning_rate": 0.0005, "loss": 1.5907, "step": 2980 }, { "epoch": 0.09534742817054115, "grad_norm": 0.7107388377189636, "learning_rate": 0.0005, "loss": 1.5916, "step": 2990 }, { "epoch": 0.09566631589017507, "grad_norm": 0.7625985741615295, "learning_rate": 0.0005, "loss": 1.6023, "step": 3000 }, { "epoch": 0.09598520360980899, "grad_norm": 0.7164791822433472, "learning_rate": 0.0005, "loss": 1.5883, "step": 3010 }, { "epoch": 0.0963040913294429, "grad_norm": 0.7049804925918579, "learning_rate": 0.0005, "loss": 1.5835, "step": 3020 }, { "epoch": 0.09662297904907682, "grad_norm": 0.7338706254959106, "learning_rate": 0.0005, "loss": 1.5845, "step": 3030 }, { "epoch": 0.09694186676871074, "grad_norm": 0.7438425421714783, "learning_rate": 0.0005, "loss": 1.5869, "step": 3040 }, { "epoch": 0.09726075448834465, "grad_norm": 0.7559459209442139, "learning_rate": 0.0005, "loss": 1.6004, "step": 3050 }, { "epoch": 0.09757964220797857, "grad_norm": 0.7225502729415894, "learning_rate": 0.0005, "loss": 1.5889, "step": 3060 }, { "epoch": 0.09789852992761248, "grad_norm": 0.7186091542243958, "learning_rate": 0.0005, "loss": 1.5739, "step": 3070 }, { "epoch": 0.09821741764724641, "grad_norm": 0.7074151635169983, "learning_rate": 0.0005, "loss": 1.5841, "step": 3080 }, { "epoch": 0.09853630536688032, "grad_norm": 0.7287281155586243, "learning_rate": 0.0005, "loss": 1.5719, "step": 3090 }, { "epoch": 0.09885519308651423, "grad_norm": 0.700652539730072, "learning_rate": 0.0005, "loss": 1.5617, "step": 3100 }, { "epoch": 0.09917408080614816, "grad_norm": 0.7762500047683716, "learning_rate": 0.0005, "loss": 1.5547, "step": 3110 }, { "epoch": 0.09949296852578207, "grad_norm": 0.7286017537117004, "learning_rate": 0.0005, "loss": 1.5845, "step": 3120 }, { "epoch": 0.09981185624541598, "grad_norm": 0.7398425340652466, "learning_rate": 0.0005, "loss": 1.5928, "step": 3130 }, { "epoch": 0.10013074396504991, "grad_norm": 0.6964101195335388, "learning_rate": 0.0005, "loss": 1.5711, "step": 3140 }, { "epoch": 0.10044963168468382, "grad_norm": 0.7937759160995483, "learning_rate": 0.0005, "loss": 1.5783, "step": 3150 }, { "epoch": 0.10076851940431773, "grad_norm": 0.7517949342727661, "learning_rate": 0.0005, "loss": 1.5573, "step": 3160 }, { "epoch": 0.10108740712395166, "grad_norm": 0.7467934489250183, "learning_rate": 0.0005, "loss": 1.5653, "step": 3170 }, { "epoch": 0.10140629484358557, "grad_norm": 0.686786949634552, "learning_rate": 0.0005, "loss": 1.5551, "step": 3180 }, { "epoch": 0.1017251825632195, "grad_norm": 0.7808279991149902, "learning_rate": 0.0005, "loss": 1.5439, "step": 3190 }, { "epoch": 0.10204407028285341, "grad_norm": 0.7051215767860413, "learning_rate": 0.0005, "loss": 1.5463, "step": 3200 }, { "epoch": 0.10236295800248732, "grad_norm": 0.7086171507835388, "learning_rate": 0.0005, "loss": 1.5796, "step": 3210 }, { "epoch": 0.10268184572212125, "grad_norm": 0.7291361093521118, "learning_rate": 0.0005, "loss": 1.5741, "step": 3220 }, { "epoch": 0.10300073344175516, "grad_norm": 0.7728156447410583, "learning_rate": 0.0005, "loss": 1.5405, "step": 3230 }, { "epoch": 0.10331962116138907, "grad_norm": 0.7691132426261902, "learning_rate": 0.0005, "loss": 1.5261, "step": 3240 }, { "epoch": 0.103638508881023, "grad_norm": 0.7408841252326965, "learning_rate": 0.0005, "loss": 1.5516, "step": 3250 }, { "epoch": 0.1039573966006569, "grad_norm": 0.7222586870193481, "learning_rate": 0.0005, "loss": 1.5657, "step": 3260 }, { "epoch": 0.10427628432029083, "grad_norm": 0.7752788066864014, "learning_rate": 0.0005, "loss": 1.5175, "step": 3270 }, { "epoch": 0.10459517203992474, "grad_norm": 0.7558481097221375, "learning_rate": 0.0005, "loss": 1.5609, "step": 3280 }, { "epoch": 0.10491405975955866, "grad_norm": 0.6986261010169983, "learning_rate": 0.0005, "loss": 1.5228, "step": 3290 }, { "epoch": 0.10523294747919258, "grad_norm": 0.7217074632644653, "learning_rate": 0.0005, "loss": 1.5521, "step": 3300 }, { "epoch": 0.1055518351988265, "grad_norm": 0.7102519869804382, "learning_rate": 0.0005, "loss": 1.542, "step": 3310 }, { "epoch": 0.1058707229184604, "grad_norm": 0.7186601161956787, "learning_rate": 0.0005, "loss": 1.5246, "step": 3320 }, { "epoch": 0.10618961063809433, "grad_norm": 0.7427906394004822, "learning_rate": 0.0005, "loss": 1.5897, "step": 3330 }, { "epoch": 0.10650849835772824, "grad_norm": 0.6721486449241638, "learning_rate": 0.0005, "loss": 1.555, "step": 3340 }, { "epoch": 0.10682738607736215, "grad_norm": 0.7453601360321045, "learning_rate": 0.0005, "loss": 1.5611, "step": 3350 }, { "epoch": 0.10714627379699608, "grad_norm": 0.733101487159729, "learning_rate": 0.0005, "loss": 1.5424, "step": 3360 }, { "epoch": 0.10746516151662999, "grad_norm": 0.6932695508003235, "learning_rate": 0.0005, "loss": 1.5567, "step": 3370 }, { "epoch": 0.10778404923626392, "grad_norm": 0.7290396690368652, "learning_rate": 0.0005, "loss": 1.5409, "step": 3380 }, { "epoch": 0.10810293695589783, "grad_norm": 0.749410092830658, "learning_rate": 0.0005, "loss": 1.5056, "step": 3390 }, { "epoch": 0.10842182467553174, "grad_norm": 0.7252135276794434, "learning_rate": 0.0005, "loss": 1.526, "step": 3400 }, { "epoch": 0.10874071239516567, "grad_norm": 0.6941826939582825, "learning_rate": 0.0005, "loss": 1.5365, "step": 3410 }, { "epoch": 0.10905960011479958, "grad_norm": 0.6843234896659851, "learning_rate": 0.0005, "loss": 1.5437, "step": 3420 }, { "epoch": 0.10937848783443349, "grad_norm": 0.7058086395263672, "learning_rate": 0.0005, "loss": 1.5404, "step": 3430 }, { "epoch": 0.10969737555406742, "grad_norm": 0.7085135579109192, "learning_rate": 0.0005, "loss": 1.5432, "step": 3440 }, { "epoch": 0.11001626327370133, "grad_norm": 0.6775384545326233, "learning_rate": 0.0005, "loss": 1.5483, "step": 3450 }, { "epoch": 0.11033515099333524, "grad_norm": 0.6983676552772522, "learning_rate": 0.0005, "loss": 1.5062, "step": 3460 }, { "epoch": 0.11065403871296917, "grad_norm": 0.7024519443511963, "learning_rate": 0.0005, "loss": 1.5343, "step": 3470 }, { "epoch": 0.11097292643260308, "grad_norm": 0.7096553444862366, "learning_rate": 0.0005, "loss": 1.5145, "step": 3480 }, { "epoch": 0.111291814152237, "grad_norm": 0.7284001111984253, "learning_rate": 0.0005, "loss": 1.5124, "step": 3490 }, { "epoch": 0.11161070187187092, "grad_norm": 0.6964322328567505, "learning_rate": 0.0005, "loss": 1.5246, "step": 3500 }, { "epoch": 0.11192958959150483, "grad_norm": 0.6839665174484253, "learning_rate": 0.0005, "loss": 1.5239, "step": 3510 }, { "epoch": 0.11224847731113875, "grad_norm": 0.665112316608429, "learning_rate": 0.0005, "loss": 1.5158, "step": 3520 }, { "epoch": 0.11256736503077266, "grad_norm": 0.7121447920799255, "learning_rate": 0.0005, "loss": 1.5107, "step": 3530 }, { "epoch": 0.11288625275040658, "grad_norm": 0.6824846267700195, "learning_rate": 0.0005, "loss": 1.5239, "step": 3540 }, { "epoch": 0.1132051404700405, "grad_norm": 0.6695664525032043, "learning_rate": 0.0005, "loss": 1.5204, "step": 3550 }, { "epoch": 0.11352402818967441, "grad_norm": 0.6862232089042664, "learning_rate": 0.0005, "loss": 1.5173, "step": 3560 }, { "epoch": 0.11384291590930833, "grad_norm": 0.6916778087615967, "learning_rate": 0.0005, "loss": 1.4952, "step": 3570 }, { "epoch": 0.11416180362894225, "grad_norm": 0.6827712655067444, "learning_rate": 0.0005, "loss": 1.4932, "step": 3580 }, { "epoch": 0.11448069134857616, "grad_norm": 0.7031466960906982, "learning_rate": 0.0005, "loss": 1.5347, "step": 3590 }, { "epoch": 0.11479957906821009, "grad_norm": 0.7130076289176941, "learning_rate": 0.0005, "loss": 1.5229, "step": 3600 }, { "epoch": 0.115118466787844, "grad_norm": 0.6662548184394836, "learning_rate": 0.0005, "loss": 1.4887, "step": 3610 }, { "epoch": 0.11543735450747791, "grad_norm": 0.6988325715065002, "learning_rate": 0.0005, "loss": 1.4849, "step": 3620 }, { "epoch": 0.11575624222711184, "grad_norm": 0.7220348715782166, "learning_rate": 0.0005, "loss": 1.5171, "step": 3630 }, { "epoch": 0.11607512994674575, "grad_norm": 0.7088020443916321, "learning_rate": 0.0005, "loss": 1.5283, "step": 3640 }, { "epoch": 0.11639401766637966, "grad_norm": 0.7373560070991516, "learning_rate": 0.0005, "loss": 1.5044, "step": 3650 }, { "epoch": 0.11671290538601359, "grad_norm": 0.6741558313369751, "learning_rate": 0.0005, "loss": 1.4895, "step": 3660 }, { "epoch": 0.1170317931056475, "grad_norm": 0.6753981113433838, "learning_rate": 0.0005, "loss": 1.5154, "step": 3670 }, { "epoch": 0.11735068082528141, "grad_norm": 0.6715815663337708, "learning_rate": 0.0005, "loss": 1.5322, "step": 3680 }, { "epoch": 0.11766956854491534, "grad_norm": 0.6624891757965088, "learning_rate": 0.0005, "loss": 1.4974, "step": 3690 }, { "epoch": 0.11798845626454925, "grad_norm": 0.6958360075950623, "learning_rate": 0.0005, "loss": 1.5063, "step": 3700 }, { "epoch": 0.11830734398418317, "grad_norm": 0.6877066493034363, "learning_rate": 0.0005, "loss": 1.5088, "step": 3710 }, { "epoch": 0.11862623170381709, "grad_norm": 0.6606530547142029, "learning_rate": 0.0005, "loss": 1.4886, "step": 3720 }, { "epoch": 0.118945119423451, "grad_norm": 0.6680471897125244, "learning_rate": 0.0005, "loss": 1.5079, "step": 3730 }, { "epoch": 0.11926400714308492, "grad_norm": 0.6819968819618225, "learning_rate": 0.0005, "loss": 1.5104, "step": 3740 }, { "epoch": 0.11958289486271884, "grad_norm": 0.6881066560745239, "learning_rate": 0.0005, "loss": 1.4819, "step": 3750 }, { "epoch": 0.11990178258235275, "grad_norm": 0.6798304915428162, "learning_rate": 0.0005, "loss": 1.4933, "step": 3760 }, { "epoch": 0.12022067030198667, "grad_norm": 0.7182119488716125, "learning_rate": 0.0005, "loss": 1.506, "step": 3770 }, { "epoch": 0.12053955802162059, "grad_norm": 0.6950844526290894, "learning_rate": 0.0005, "loss": 1.5218, "step": 3780 }, { "epoch": 0.12085844574125451, "grad_norm": 0.692387580871582, "learning_rate": 0.0005, "loss": 1.4787, "step": 3790 }, { "epoch": 0.12117733346088842, "grad_norm": 0.7074518203735352, "learning_rate": 0.0005, "loss": 1.4774, "step": 3800 }, { "epoch": 0.12149622118052233, "grad_norm": 0.6878761649131775, "learning_rate": 0.0005, "loss": 1.4782, "step": 3810 }, { "epoch": 0.12181510890015626, "grad_norm": 0.6472737789154053, "learning_rate": 0.0005, "loss": 1.4832, "step": 3820 }, { "epoch": 0.12213399661979017, "grad_norm": 0.6680033802986145, "learning_rate": 0.0005, "loss": 1.4781, "step": 3830 }, { "epoch": 0.12245288433942408, "grad_norm": 0.7118170857429504, "learning_rate": 0.0005, "loss": 1.4986, "step": 3840 }, { "epoch": 0.12277177205905801, "grad_norm": 0.6713599562644958, "learning_rate": 0.0005, "loss": 1.4971, "step": 3850 }, { "epoch": 0.12309065977869192, "grad_norm": 0.6707881093025208, "learning_rate": 0.0005, "loss": 1.4985, "step": 3860 }, { "epoch": 0.12340954749832583, "grad_norm": 0.7143165469169617, "learning_rate": 0.0005, "loss": 1.5075, "step": 3870 }, { "epoch": 0.12372843521795976, "grad_norm": 0.6780462265014648, "learning_rate": 0.0005, "loss": 1.4691, "step": 3880 }, { "epoch": 0.12404732293759367, "grad_norm": 0.7073563933372498, "learning_rate": 0.0005, "loss": 1.4906, "step": 3890 }, { "epoch": 0.1243662106572276, "grad_norm": 0.6700391173362732, "learning_rate": 0.0005, "loss": 1.4866, "step": 3900 }, { "epoch": 0.12468509837686151, "grad_norm": 0.6709848046302795, "learning_rate": 0.0005, "loss": 1.4813, "step": 3910 }, { "epoch": 0.12500398609649543, "grad_norm": 0.6658124923706055, "learning_rate": 0.0005, "loss": 1.4889, "step": 3920 }, { "epoch": 0.12532287381612933, "grad_norm": 0.6852616667747498, "learning_rate": 0.0005, "loss": 1.4599, "step": 3930 }, { "epoch": 0.12564176153576326, "grad_norm": 0.6966326832771301, "learning_rate": 0.0005, "loss": 1.4588, "step": 3940 }, { "epoch": 0.12596064925539718, "grad_norm": 0.6924886703491211, "learning_rate": 0.0005, "loss": 1.4731, "step": 3950 }, { "epoch": 0.12627953697503108, "grad_norm": 0.7122734785079956, "learning_rate": 0.0005, "loss": 1.4586, "step": 3960 }, { "epoch": 0.126598424694665, "grad_norm": 0.6784149408340454, "learning_rate": 0.0005, "loss": 1.4796, "step": 3970 }, { "epoch": 0.12691731241429893, "grad_norm": 0.6619938015937805, "learning_rate": 0.0005, "loss": 1.4619, "step": 3980 }, { "epoch": 0.12723620013393283, "grad_norm": 0.6966860890388489, "learning_rate": 0.0005, "loss": 1.4702, "step": 3990 }, { "epoch": 0.12755508785356676, "grad_norm": 0.6390230655670166, "learning_rate": 0.0005, "loss": 1.4623, "step": 4000 }, { "epoch": 0.12787397557320068, "grad_norm": 0.6684625744819641, "learning_rate": 0.0005, "loss": 1.48, "step": 4010 }, { "epoch": 0.12819286329283458, "grad_norm": 0.796931266784668, "learning_rate": 0.0005, "loss": 1.4648, "step": 4020 }, { "epoch": 0.1285117510124685, "grad_norm": 0.6581517457962036, "learning_rate": 0.0005, "loss": 1.4751, "step": 4030 }, { "epoch": 0.12883063873210243, "grad_norm": 0.6862014532089233, "learning_rate": 0.0005, "loss": 1.4666, "step": 4040 }, { "epoch": 0.12914952645173636, "grad_norm": 0.7265594005584717, "learning_rate": 0.0005, "loss": 1.4742, "step": 4050 }, { "epoch": 0.12946841417137026, "grad_norm": 0.667931079864502, "learning_rate": 0.0005, "loss": 1.4865, "step": 4060 }, { "epoch": 0.12978730189100418, "grad_norm": 0.6380496025085449, "learning_rate": 0.0005, "loss": 1.4761, "step": 4070 }, { "epoch": 0.1301061896106381, "grad_norm": 0.688275158405304, "learning_rate": 0.0005, "loss": 1.4506, "step": 4080 }, { "epoch": 0.130425077330272, "grad_norm": 0.6321500539779663, "learning_rate": 0.0005, "loss": 1.4676, "step": 4090 }, { "epoch": 0.13074396504990593, "grad_norm": 0.6487618088722229, "learning_rate": 0.0005, "loss": 1.464, "step": 4100 }, { "epoch": 0.13106285276953986, "grad_norm": 0.674380898475647, "learning_rate": 0.0005, "loss": 1.4641, "step": 4110 }, { "epoch": 0.13138174048917375, "grad_norm": 0.6674288511276245, "learning_rate": 0.0005, "loss": 1.4433, "step": 4120 }, { "epoch": 0.13170062820880768, "grad_norm": 0.611838161945343, "learning_rate": 0.0005, "loss": 1.4598, "step": 4130 }, { "epoch": 0.1320195159284416, "grad_norm": 0.6699022650718689, "learning_rate": 0.0005, "loss": 1.4584, "step": 4140 }, { "epoch": 0.1323384036480755, "grad_norm": 0.6623623371124268, "learning_rate": 0.0005, "loss": 1.4421, "step": 4150 }, { "epoch": 0.13265729136770943, "grad_norm": 0.648499608039856, "learning_rate": 0.0005, "loss": 1.4512, "step": 4160 }, { "epoch": 0.13297617908734335, "grad_norm": 0.6404657363891602, "learning_rate": 0.0005, "loss": 1.4548, "step": 4170 }, { "epoch": 0.13329506680697725, "grad_norm": 0.6383042335510254, "learning_rate": 0.0005, "loss": 1.431, "step": 4180 }, { "epoch": 0.13361395452661118, "grad_norm": 0.6430786848068237, "learning_rate": 0.0005, "loss": 1.4438, "step": 4190 }, { "epoch": 0.1339328422462451, "grad_norm": 0.643415629863739, "learning_rate": 0.0005, "loss": 1.4543, "step": 4200 }, { "epoch": 0.134251729965879, "grad_norm": 0.644809365272522, "learning_rate": 0.0005, "loss": 1.4467, "step": 4210 }, { "epoch": 0.13457061768551293, "grad_norm": 0.6084710955619812, "learning_rate": 0.0005, "loss": 1.4426, "step": 4220 }, { "epoch": 0.13488950540514685, "grad_norm": 0.6093063950538635, "learning_rate": 0.0005, "loss": 1.4329, "step": 4230 }, { "epoch": 0.13520839312478075, "grad_norm": 0.6573523879051208, "learning_rate": 0.0005, "loss": 1.4752, "step": 4240 }, { "epoch": 0.13552728084441468, "grad_norm": 0.6293638944625854, "learning_rate": 0.0005, "loss": 1.4492, "step": 4250 }, { "epoch": 0.1358461685640486, "grad_norm": 0.6632397174835205, "learning_rate": 0.0005, "loss": 1.4643, "step": 4260 }, { "epoch": 0.13616505628368253, "grad_norm": 0.6814329028129578, "learning_rate": 0.0005, "loss": 1.4011, "step": 4270 }, { "epoch": 0.13648394400331643, "grad_norm": 0.7331423163414001, "learning_rate": 0.0005, "loss": 1.4466, "step": 4280 }, { "epoch": 0.13680283172295035, "grad_norm": 0.645703136920929, "learning_rate": 0.0005, "loss": 1.4247, "step": 4290 }, { "epoch": 0.13712171944258428, "grad_norm": 0.6080441474914551, "learning_rate": 0.0005, "loss": 1.4398, "step": 4300 }, { "epoch": 0.13744060716221818, "grad_norm": 0.6814742684364319, "learning_rate": 0.0005, "loss": 1.4523, "step": 4310 }, { "epoch": 0.1377594948818521, "grad_norm": 0.6626353859901428, "learning_rate": 0.0005, "loss": 1.4323, "step": 4320 }, { "epoch": 0.13807838260148603, "grad_norm": 0.6317815184593201, "learning_rate": 0.0005, "loss": 1.4403, "step": 4330 }, { "epoch": 0.13839727032111993, "grad_norm": 0.6251382827758789, "learning_rate": 0.0005, "loss": 1.4377, "step": 4340 }, { "epoch": 0.13871615804075385, "grad_norm": 0.6542896032333374, "learning_rate": 0.0005, "loss": 1.44, "step": 4350 }, { "epoch": 0.13903504576038778, "grad_norm": 0.6480959057807922, "learning_rate": 0.0005, "loss": 1.4399, "step": 4360 }, { "epoch": 0.13935393348002167, "grad_norm": 0.6671744585037231, "learning_rate": 0.0005, "loss": 1.4552, "step": 4370 }, { "epoch": 0.1396728211996556, "grad_norm": 0.6613544225692749, "learning_rate": 0.0005, "loss": 1.4311, "step": 4380 }, { "epoch": 0.13999170891928953, "grad_norm": 0.6300452947616577, "learning_rate": 0.0005, "loss": 1.4346, "step": 4390 }, { "epoch": 0.14031059663892342, "grad_norm": 0.6960680484771729, "learning_rate": 0.0005, "loss": 1.4496, "step": 4400 }, { "epoch": 0.14062948435855735, "grad_norm": 0.6525735259056091, "learning_rate": 0.0005, "loss": 1.4524, "step": 4410 }, { "epoch": 0.14094837207819128, "grad_norm": 0.6144914031028748, "learning_rate": 0.0005, "loss": 1.4161, "step": 4420 }, { "epoch": 0.14126725979782517, "grad_norm": 0.6260567307472229, "learning_rate": 0.0005, "loss": 1.431, "step": 4430 }, { "epoch": 0.1415861475174591, "grad_norm": 0.6403346657752991, "learning_rate": 0.0005, "loss": 1.4194, "step": 4440 }, { "epoch": 0.14190503523709302, "grad_norm": 0.6182661652565002, "learning_rate": 0.0005, "loss": 1.4047, "step": 4450 }, { "epoch": 0.14222392295672695, "grad_norm": 0.6593475341796875, "learning_rate": 0.0005, "loss": 1.4063, "step": 4460 }, { "epoch": 0.14254281067636085, "grad_norm": 0.661416232585907, "learning_rate": 0.0005, "loss": 1.4229, "step": 4470 }, { "epoch": 0.14286169839599477, "grad_norm": 0.6097050905227661, "learning_rate": 0.0005, "loss": 1.4242, "step": 4480 }, { "epoch": 0.1431805861156287, "grad_norm": 0.6463915109634399, "learning_rate": 0.0005, "loss": 1.4195, "step": 4490 }, { "epoch": 0.1434994738352626, "grad_norm": 0.7001182436943054, "learning_rate": 0.0005, "loss": 1.4472, "step": 4500 }, { "epoch": 0.14381836155489652, "grad_norm": 0.6229003667831421, "learning_rate": 0.0005, "loss": 1.4525, "step": 4510 }, { "epoch": 0.14413724927453045, "grad_norm": 0.6146069169044495, "learning_rate": 0.0005, "loss": 1.4217, "step": 4520 }, { "epoch": 0.14445613699416435, "grad_norm": 0.6353657245635986, "learning_rate": 0.0005, "loss": 1.4177, "step": 4530 }, { "epoch": 0.14477502471379827, "grad_norm": 0.646435558795929, "learning_rate": 0.0005, "loss": 1.43, "step": 4540 }, { "epoch": 0.1450939124334322, "grad_norm": 0.6327551603317261, "learning_rate": 0.0005, "loss": 1.4269, "step": 4550 }, { "epoch": 0.1454128001530661, "grad_norm": 0.63446044921875, "learning_rate": 0.0005, "loss": 1.4469, "step": 4560 }, { "epoch": 0.14573168787270002, "grad_norm": 0.6193106770515442, "learning_rate": 0.0005, "loss": 1.4215, "step": 4570 }, { "epoch": 0.14605057559233395, "grad_norm": 0.607028067111969, "learning_rate": 0.0005, "loss": 1.4083, "step": 4580 }, { "epoch": 0.14636946331196785, "grad_norm": 0.6031425595283508, "learning_rate": 0.0005, "loss": 1.4102, "step": 4590 }, { "epoch": 0.14668835103160177, "grad_norm": 0.633690595626831, "learning_rate": 0.0005, "loss": 1.4287, "step": 4600 }, { "epoch": 0.1470072387512357, "grad_norm": 0.6302832365036011, "learning_rate": 0.0005, "loss": 1.4223, "step": 4610 }, { "epoch": 0.1473261264708696, "grad_norm": 0.6090842485427856, "learning_rate": 0.0005, "loss": 1.4273, "step": 4620 }, { "epoch": 0.14764501419050352, "grad_norm": 0.6385555863380432, "learning_rate": 0.0005, "loss": 1.4256, "step": 4630 }, { "epoch": 0.14796390191013745, "grad_norm": 0.6406751871109009, "learning_rate": 0.0005, "loss": 1.4308, "step": 4640 }, { "epoch": 0.14828278962977134, "grad_norm": 0.6482434868812561, "learning_rate": 0.0005, "loss": 1.4267, "step": 4650 }, { "epoch": 0.14860167734940527, "grad_norm": 0.7152848839759827, "learning_rate": 0.0005, "loss": 1.4185, "step": 4660 }, { "epoch": 0.1489205650690392, "grad_norm": 0.6866331100463867, "learning_rate": 0.0005, "loss": 1.4083, "step": 4670 }, { "epoch": 0.14923945278867312, "grad_norm": 0.6515335440635681, "learning_rate": 0.0005, "loss": 1.4259, "step": 4680 }, { "epoch": 0.14955834050830702, "grad_norm": 0.6101216673851013, "learning_rate": 0.0005, "loss": 1.4207, "step": 4690 }, { "epoch": 0.14987722822794095, "grad_norm": 0.6153760552406311, "learning_rate": 0.0005, "loss": 1.3919, "step": 4700 }, { "epoch": 0.15019611594757487, "grad_norm": 0.6293302178382874, "learning_rate": 0.0005, "loss": 1.4083, "step": 4710 }, { "epoch": 0.15051500366720877, "grad_norm": 0.6889540553092957, "learning_rate": 0.0005, "loss": 1.3989, "step": 4720 }, { "epoch": 0.1508338913868427, "grad_norm": 0.6246590614318848, "learning_rate": 0.0005, "loss": 1.4081, "step": 4730 }, { "epoch": 0.15115277910647662, "grad_norm": 0.6354957818984985, "learning_rate": 0.0005, "loss": 1.4072, "step": 4740 }, { "epoch": 0.15147166682611052, "grad_norm": 0.6348922252655029, "learning_rate": 0.0005, "loss": 1.3956, "step": 4750 }, { "epoch": 0.15179055454574444, "grad_norm": 0.6126857995986938, "learning_rate": 0.0005, "loss": 1.3893, "step": 4760 }, { "epoch": 0.15210944226537837, "grad_norm": 0.6216022372245789, "learning_rate": 0.0005, "loss": 1.4103, "step": 4770 }, { "epoch": 0.15242832998501227, "grad_norm": 0.5942848920822144, "learning_rate": 0.0005, "loss": 1.3894, "step": 4780 }, { "epoch": 0.1527472177046462, "grad_norm": 0.6769245862960815, "learning_rate": 0.0005, "loss": 1.4215, "step": 4790 }, { "epoch": 0.15306610542428012, "grad_norm": 0.6551666855812073, "learning_rate": 0.0005, "loss": 1.3935, "step": 4800 }, { "epoch": 0.15338499314391402, "grad_norm": 0.6036399006843567, "learning_rate": 0.0005, "loss": 1.4144, "step": 4810 }, { "epoch": 0.15370388086354794, "grad_norm": 0.5946652889251709, "learning_rate": 0.0005, "loss": 1.4069, "step": 4820 }, { "epoch": 0.15402276858318187, "grad_norm": 0.577434778213501, "learning_rate": 0.0005, "loss": 1.3909, "step": 4830 }, { "epoch": 0.15434165630281577, "grad_norm": 0.581184446811676, "learning_rate": 0.0005, "loss": 1.4093, "step": 4840 }, { "epoch": 0.1546605440224497, "grad_norm": 0.6142603158950806, "learning_rate": 0.0005, "loss": 1.4001, "step": 4850 }, { "epoch": 0.15497943174208362, "grad_norm": 0.6196522116661072, "learning_rate": 0.0005, "loss": 1.3984, "step": 4860 }, { "epoch": 0.15529831946171752, "grad_norm": 0.6150513291358948, "learning_rate": 0.0005, "loss": 1.3812, "step": 4870 }, { "epoch": 0.15561720718135144, "grad_norm": 0.6181188821792603, "learning_rate": 0.0005, "loss": 1.3767, "step": 4880 }, { "epoch": 0.15593609490098537, "grad_norm": 0.5967665910720825, "learning_rate": 0.0005, "loss": 1.4041, "step": 4890 }, { "epoch": 0.1562549826206193, "grad_norm": 0.6157842874526978, "learning_rate": 0.0005, "loss": 1.4045, "step": 4900 }, { "epoch": 0.1565738703402532, "grad_norm": 0.5986465215682983, "learning_rate": 0.0005, "loss": 1.4117, "step": 4910 }, { "epoch": 0.15689275805988712, "grad_norm": 0.6068465113639832, "learning_rate": 0.0005, "loss": 1.3989, "step": 4920 }, { "epoch": 0.15721164577952104, "grad_norm": 0.6013087630271912, "learning_rate": 0.0005, "loss": 1.4044, "step": 4930 }, { "epoch": 0.15753053349915494, "grad_norm": 0.5771439671516418, "learning_rate": 0.0005, "loss": 1.3809, "step": 4940 }, { "epoch": 0.15784942121878887, "grad_norm": 0.62718665599823, "learning_rate": 0.0005, "loss": 1.3753, "step": 4950 }, { "epoch": 0.1581683089384228, "grad_norm": 0.6010807752609253, "learning_rate": 0.0005, "loss": 1.4032, "step": 4960 }, { "epoch": 0.1584871966580567, "grad_norm": 0.5989909768104553, "learning_rate": 0.0005, "loss": 1.3964, "step": 4970 }, { "epoch": 0.15880608437769062, "grad_norm": 0.6056174635887146, "learning_rate": 0.0005, "loss": 1.3824, "step": 4980 }, { "epoch": 0.15912497209732454, "grad_norm": 0.645205557346344, "learning_rate": 0.0005, "loss": 1.385, "step": 4990 }, { "epoch": 0.15944385981695844, "grad_norm": 0.5958195328712463, "learning_rate": 0.0005, "loss": 1.3599, "step": 5000 }, { "epoch": 0.15976274753659236, "grad_norm": 0.6125824451446533, "learning_rate": 0.0005, "loss": 1.3972, "step": 5010 }, { "epoch": 0.1600816352562263, "grad_norm": 0.5644070506095886, "learning_rate": 0.0005, "loss": 1.3807, "step": 5020 }, { "epoch": 0.1604005229758602, "grad_norm": 0.6514316201210022, "learning_rate": 0.0005, "loss": 1.4205, "step": 5030 }, { "epoch": 0.1607194106954941, "grad_norm": 0.5961187481880188, "learning_rate": 0.0005, "loss": 1.3991, "step": 5040 }, { "epoch": 0.16103829841512804, "grad_norm": 0.5987887978553772, "learning_rate": 0.0005, "loss": 1.3844, "step": 5050 }, { "epoch": 0.16135718613476194, "grad_norm": 0.6017197370529175, "learning_rate": 0.0005, "loss": 1.3868, "step": 5060 }, { "epoch": 0.16167607385439586, "grad_norm": 0.6067407131195068, "learning_rate": 0.0005, "loss": 1.3791, "step": 5070 }, { "epoch": 0.1619949615740298, "grad_norm": 0.579651951789856, "learning_rate": 0.0005, "loss": 1.3845, "step": 5080 }, { "epoch": 0.16231384929366371, "grad_norm": 0.571557879447937, "learning_rate": 0.0005, "loss": 1.4063, "step": 5090 }, { "epoch": 0.1626327370132976, "grad_norm": 0.5880619287490845, "learning_rate": 0.0005, "loss": 1.3924, "step": 5100 }, { "epoch": 0.16295162473293154, "grad_norm": 0.6160964965820312, "learning_rate": 0.0005, "loss": 1.3861, "step": 5110 }, { "epoch": 0.16327051245256546, "grad_norm": 0.6288329362869263, "learning_rate": 0.0005, "loss": 1.3661, "step": 5120 }, { "epoch": 0.16358940017219936, "grad_norm": 0.5810098052024841, "learning_rate": 0.0005, "loss": 1.3856, "step": 5130 }, { "epoch": 0.1639082878918333, "grad_norm": 0.6061198115348816, "learning_rate": 0.0005, "loss": 1.3792, "step": 5140 }, { "epoch": 0.1642271756114672, "grad_norm": 0.5987685918807983, "learning_rate": 0.0005, "loss": 1.3813, "step": 5150 }, { "epoch": 0.1645460633311011, "grad_norm": 0.6192308068275452, "learning_rate": 0.0005, "loss": 1.3805, "step": 5160 }, { "epoch": 0.16486495105073504, "grad_norm": 0.6115210056304932, "learning_rate": 0.0005, "loss": 1.3873, "step": 5170 }, { "epoch": 0.16518383877036896, "grad_norm": 0.5993800759315491, "learning_rate": 0.0005, "loss": 1.3931, "step": 5180 }, { "epoch": 0.16550272649000286, "grad_norm": 0.5932030081748962, "learning_rate": 0.0005, "loss": 1.4059, "step": 5190 }, { "epoch": 0.1658216142096368, "grad_norm": 0.5708052515983582, "learning_rate": 0.0005, "loss": 1.361, "step": 5200 }, { "epoch": 0.1661405019292707, "grad_norm": 0.5772805213928223, "learning_rate": 0.0005, "loss": 1.3641, "step": 5210 }, { "epoch": 0.1664593896489046, "grad_norm": 0.5902208685874939, "learning_rate": 0.0005, "loss": 1.3697, "step": 5220 }, { "epoch": 0.16677827736853854, "grad_norm": 0.6566534042358398, "learning_rate": 0.0005, "loss": 1.3673, "step": 5230 }, { "epoch": 0.16709716508817246, "grad_norm": 0.5957956314086914, "learning_rate": 0.0005, "loss": 1.3819, "step": 5240 }, { "epoch": 0.16741605280780636, "grad_norm": 0.5835301280021667, "learning_rate": 0.0005, "loss": 1.3761, "step": 5250 }, { "epoch": 0.16773494052744028, "grad_norm": 0.6204524636268616, "learning_rate": 0.0005, "loss": 1.3636, "step": 5260 }, { "epoch": 0.1680538282470742, "grad_norm": 0.6161551475524902, "learning_rate": 0.0005, "loss": 1.3584, "step": 5270 }, { "epoch": 0.1683727159667081, "grad_norm": 0.5636275410652161, "learning_rate": 0.0005, "loss": 1.3727, "step": 5280 }, { "epoch": 0.16869160368634203, "grad_norm": 0.5892273187637329, "learning_rate": 0.0005, "loss": 1.3688, "step": 5290 }, { "epoch": 0.16901049140597596, "grad_norm": 0.5970210433006287, "learning_rate": 0.0005, "loss": 1.3697, "step": 5300 }, { "epoch": 0.16932937912560989, "grad_norm": 0.5779142379760742, "learning_rate": 0.0005, "loss": 1.3734, "step": 5310 }, { "epoch": 0.16964826684524378, "grad_norm": 0.6144216060638428, "learning_rate": 0.0005, "loss": 1.3671, "step": 5320 }, { "epoch": 0.1699671545648777, "grad_norm": 0.5949397683143616, "learning_rate": 0.0005, "loss": 1.383, "step": 5330 }, { "epoch": 0.17028604228451164, "grad_norm": 0.6158933043479919, "learning_rate": 0.0005, "loss": 1.385, "step": 5340 }, { "epoch": 0.17060493000414553, "grad_norm": 0.6164098381996155, "learning_rate": 0.0005, "loss": 1.3707, "step": 5350 }, { "epoch": 0.17092381772377946, "grad_norm": 0.6229051351547241, "learning_rate": 0.0005, "loss": 1.3695, "step": 5360 }, { "epoch": 0.17124270544341338, "grad_norm": 0.5980965495109558, "learning_rate": 0.0005, "loss": 1.3562, "step": 5370 }, { "epoch": 0.17156159316304728, "grad_norm": 0.5617382526397705, "learning_rate": 0.0005, "loss": 1.3436, "step": 5380 }, { "epoch": 0.1718804808826812, "grad_norm": 0.5663665533065796, "learning_rate": 0.0005, "loss": 1.3608, "step": 5390 }, { "epoch": 0.17219936860231513, "grad_norm": 0.5852437019348145, "learning_rate": 0.0005, "loss": 1.3885, "step": 5400 }, { "epoch": 0.17251825632194903, "grad_norm": 0.5675665140151978, "learning_rate": 0.0005, "loss": 1.3581, "step": 5410 }, { "epoch": 0.17283714404158296, "grad_norm": 0.5745238065719604, "learning_rate": 0.0005, "loss": 1.3736, "step": 5420 }, { "epoch": 0.17315603176121688, "grad_norm": 0.5912173390388489, "learning_rate": 0.0005, "loss": 1.3652, "step": 5430 }, { "epoch": 0.17347491948085078, "grad_norm": 0.5719927549362183, "learning_rate": 0.0005, "loss": 1.3722, "step": 5440 }, { "epoch": 0.1737938072004847, "grad_norm": 0.5705735683441162, "learning_rate": 0.0005, "loss": 1.3752, "step": 5450 }, { "epoch": 0.17411269492011863, "grad_norm": 0.6265610456466675, "learning_rate": 0.0005, "loss": 1.3506, "step": 5460 }, { "epoch": 0.17443158263975253, "grad_norm": 0.5637654662132263, "learning_rate": 0.0005, "loss": 1.3734, "step": 5470 }, { "epoch": 0.17475047035938646, "grad_norm": 0.5880090594291687, "learning_rate": 0.0005, "loss": 1.3779, "step": 5480 }, { "epoch": 0.17506935807902038, "grad_norm": 0.5817082524299622, "learning_rate": 0.0005, "loss": 1.3343, "step": 5490 }, { "epoch": 0.1753882457986543, "grad_norm": 0.5926219820976257, "learning_rate": 0.0005, "loss": 1.3365, "step": 5500 }, { "epoch": 0.1757071335182882, "grad_norm": 0.5910172462463379, "learning_rate": 0.0005, "loss": 1.3721, "step": 5510 }, { "epoch": 0.17602602123792213, "grad_norm": 0.5813695192337036, "learning_rate": 0.0005, "loss": 1.3704, "step": 5520 }, { "epoch": 0.17634490895755606, "grad_norm": 0.5902416110038757, "learning_rate": 0.0005, "loss": 1.3513, "step": 5530 }, { "epoch": 0.17666379667718995, "grad_norm": 0.6056077480316162, "learning_rate": 0.0005, "loss": 1.3543, "step": 5540 }, { "epoch": 0.17698268439682388, "grad_norm": 0.6056801080703735, "learning_rate": 0.0005, "loss": 1.3714, "step": 5550 }, { "epoch": 0.1773015721164578, "grad_norm": 0.5975509285926819, "learning_rate": 0.0005, "loss": 1.3634, "step": 5560 }, { "epoch": 0.1776204598360917, "grad_norm": 0.5552816390991211, "learning_rate": 0.0005, "loss": 1.3502, "step": 5570 }, { "epoch": 0.17793934755572563, "grad_norm": 0.5838658809661865, "learning_rate": 0.0005, "loss": 1.3397, "step": 5580 }, { "epoch": 0.17825823527535956, "grad_norm": 0.5876995921134949, "learning_rate": 0.0005, "loss": 1.3776, "step": 5590 }, { "epoch": 0.17857712299499345, "grad_norm": 0.5738474130630493, "learning_rate": 0.0005, "loss": 1.3667, "step": 5600 }, { "epoch": 0.17889601071462738, "grad_norm": 0.5683244466781616, "learning_rate": 0.0005, "loss": 1.3628, "step": 5610 }, { "epoch": 0.1792148984342613, "grad_norm": 0.5847506523132324, "learning_rate": 0.0005, "loss": 1.3759, "step": 5620 }, { "epoch": 0.1795337861538952, "grad_norm": 0.5738845467567444, "learning_rate": 0.0005, "loss": 1.3629, "step": 5630 }, { "epoch": 0.17985267387352913, "grad_norm": 0.5815554261207581, "learning_rate": 0.0005, "loss": 1.3511, "step": 5640 }, { "epoch": 0.18017156159316305, "grad_norm": 0.5679082870483398, "learning_rate": 0.0005, "loss": 1.37, "step": 5650 }, { "epoch": 0.18049044931279695, "grad_norm": 0.5690569877624512, "learning_rate": 0.0005, "loss": 1.3632, "step": 5660 }, { "epoch": 0.18080933703243088, "grad_norm": 0.5927101969718933, "learning_rate": 0.0005, "loss": 1.362, "step": 5670 }, { "epoch": 0.1811282247520648, "grad_norm": 0.5851690769195557, "learning_rate": 0.0005, "loss": 1.3414, "step": 5680 }, { "epoch": 0.1814471124716987, "grad_norm": 0.5578193664550781, "learning_rate": 0.0005, "loss": 1.3451, "step": 5690 }, { "epoch": 0.18176600019133263, "grad_norm": 0.5566641688346863, "learning_rate": 0.0005, "loss": 1.3481, "step": 5700 }, { "epoch": 0.18208488791096655, "grad_norm": 0.5803577899932861, "learning_rate": 0.0005, "loss": 1.3584, "step": 5710 }, { "epoch": 0.18240377563060048, "grad_norm": 0.574792206287384, "learning_rate": 0.0005, "loss": 1.3742, "step": 5720 }, { "epoch": 0.18272266335023438, "grad_norm": 0.5856817960739136, "learning_rate": 0.0005, "loss": 1.337, "step": 5730 }, { "epoch": 0.1830415510698683, "grad_norm": 0.6036618947982788, "learning_rate": 0.0005, "loss": 1.3275, "step": 5740 }, { "epoch": 0.18336043878950223, "grad_norm": 0.5599528551101685, "learning_rate": 0.0005, "loss": 1.3694, "step": 5750 }, { "epoch": 0.18367932650913613, "grad_norm": 0.573541522026062, "learning_rate": 0.0005, "loss": 1.3507, "step": 5760 }, { "epoch": 0.18399821422877005, "grad_norm": 0.617546558380127, "learning_rate": 0.0005, "loss": 1.3508, "step": 5770 }, { "epoch": 0.18431710194840398, "grad_norm": 0.5568141341209412, "learning_rate": 0.0005, "loss": 1.3327, "step": 5780 }, { "epoch": 0.18463598966803788, "grad_norm": 0.548579216003418, "learning_rate": 0.0005, "loss": 1.3349, "step": 5790 }, { "epoch": 0.1849548773876718, "grad_norm": 0.5528845191001892, "learning_rate": 0.0005, "loss": 1.3442, "step": 5800 }, { "epoch": 0.18527376510730573, "grad_norm": 0.5503314137458801, "learning_rate": 0.0005, "loss": 1.3472, "step": 5810 }, { "epoch": 0.18559265282693962, "grad_norm": 0.5958365797996521, "learning_rate": 0.0005, "loss": 1.3391, "step": 5820 }, { "epoch": 0.18591154054657355, "grad_norm": 0.5654858350753784, "learning_rate": 0.0005, "loss": 1.3448, "step": 5830 }, { "epoch": 0.18623042826620748, "grad_norm": 0.6166044473648071, "learning_rate": 0.0005, "loss": 1.3442, "step": 5840 }, { "epoch": 0.18654931598584137, "grad_norm": 0.5609534382820129, "learning_rate": 0.0005, "loss": 1.3509, "step": 5850 }, { "epoch": 0.1868682037054753, "grad_norm": 0.5806050300598145, "learning_rate": 0.0005, "loss": 1.3431, "step": 5860 }, { "epoch": 0.18718709142510923, "grad_norm": 0.6101890206336975, "learning_rate": 0.0005, "loss": 1.3339, "step": 5870 }, { "epoch": 0.18750597914474312, "grad_norm": 0.5922510027885437, "learning_rate": 0.0005, "loss": 1.3578, "step": 5880 }, { "epoch": 0.18782486686437705, "grad_norm": 0.5860453248023987, "learning_rate": 0.0005, "loss": 1.3468, "step": 5890 }, { "epoch": 0.18814375458401097, "grad_norm": 0.5527572631835938, "learning_rate": 0.0005, "loss": 1.3397, "step": 5900 }, { "epoch": 0.18846264230364487, "grad_norm": 0.5668513178825378, "learning_rate": 0.0005, "loss": 1.3356, "step": 5910 }, { "epoch": 0.1887815300232788, "grad_norm": 0.536500871181488, "learning_rate": 0.0005, "loss": 1.3372, "step": 5920 }, { "epoch": 0.18910041774291272, "grad_norm": 0.5639796853065491, "learning_rate": 0.0005, "loss": 1.3358, "step": 5930 }, { "epoch": 0.18941930546254665, "grad_norm": 0.5703504085540771, "learning_rate": 0.0005, "loss": 1.349, "step": 5940 }, { "epoch": 0.18973819318218055, "grad_norm": 0.573939859867096, "learning_rate": 0.0005, "loss": 1.3361, "step": 5950 }, { "epoch": 0.19005708090181447, "grad_norm": 0.5942139029502869, "learning_rate": 0.0005, "loss": 1.3656, "step": 5960 }, { "epoch": 0.1903759686214484, "grad_norm": 0.5792593359947205, "learning_rate": 0.0005, "loss": 1.3463, "step": 5970 }, { "epoch": 0.1906948563410823, "grad_norm": 0.5661564469337463, "learning_rate": 0.0005, "loss": 1.3356, "step": 5980 }, { "epoch": 0.19101374406071622, "grad_norm": 0.6026549339294434, "learning_rate": 0.0005, "loss": 1.3432, "step": 5990 }, { "epoch": 0.19133263178035015, "grad_norm": 0.5632230043411255, "learning_rate": 0.0005, "loss": 1.3223, "step": 6000 }, { "epoch": 0.19165151949998405, "grad_norm": 0.5511482357978821, "learning_rate": 0.0005, "loss": 1.3489, "step": 6010 }, { "epoch": 0.19197040721961797, "grad_norm": 0.5755233764648438, "learning_rate": 0.0005, "loss": 1.3262, "step": 6020 }, { "epoch": 0.1922892949392519, "grad_norm": 0.5578722953796387, "learning_rate": 0.0005, "loss": 1.3175, "step": 6030 }, { "epoch": 0.1926081826588858, "grad_norm": 0.5360584259033203, "learning_rate": 0.0005, "loss": 1.3301, "step": 6040 }, { "epoch": 0.19292707037851972, "grad_norm": 0.5476540923118591, "learning_rate": 0.0005, "loss": 1.3303, "step": 6050 }, { "epoch": 0.19324595809815365, "grad_norm": 0.5518861413002014, "learning_rate": 0.0005, "loss": 1.3216, "step": 6060 }, { "epoch": 0.19356484581778755, "grad_norm": 0.5956515669822693, "learning_rate": 0.0005, "loss": 1.3313, "step": 6070 }, { "epoch": 0.19388373353742147, "grad_norm": 0.5531336665153503, "learning_rate": 0.0005, "loss": 1.3255, "step": 6080 }, { "epoch": 0.1942026212570554, "grad_norm": 0.5420064330101013, "learning_rate": 0.0005, "loss": 1.3311, "step": 6090 }, { "epoch": 0.1945215089766893, "grad_norm": 0.539386510848999, "learning_rate": 0.0005, "loss": 1.3359, "step": 6100 }, { "epoch": 0.19484039669632322, "grad_norm": 0.5622890591621399, "learning_rate": 0.0005, "loss": 1.3406, "step": 6110 }, { "epoch": 0.19515928441595715, "grad_norm": 0.6167592406272888, "learning_rate": 0.0005, "loss": 1.3323, "step": 6120 }, { "epoch": 0.19547817213559107, "grad_norm": 0.5873464345932007, "learning_rate": 0.0005, "loss": 1.3123, "step": 6130 }, { "epoch": 0.19579705985522497, "grad_norm": 0.5778335928916931, "learning_rate": 0.0005, "loss": 1.3387, "step": 6140 }, { "epoch": 0.1961159475748589, "grad_norm": 0.5280761122703552, "learning_rate": 0.0005, "loss": 1.3042, "step": 6150 }, { "epoch": 0.19643483529449282, "grad_norm": 0.5577932000160217, "learning_rate": 0.0005, "loss": 1.3082, "step": 6160 }, { "epoch": 0.19675372301412672, "grad_norm": 0.554754376411438, "learning_rate": 0.0005, "loss": 1.3451, "step": 6170 }, { "epoch": 0.19707261073376064, "grad_norm": 0.5626440048217773, "learning_rate": 0.0005, "loss": 1.3236, "step": 6180 }, { "epoch": 0.19739149845339457, "grad_norm": 0.5483393669128418, "learning_rate": 0.0005, "loss": 1.3454, "step": 6190 }, { "epoch": 0.19771038617302847, "grad_norm": 0.5727918148040771, "learning_rate": 0.0005, "loss": 1.3288, "step": 6200 }, { "epoch": 0.1980292738926624, "grad_norm": 0.5533772706985474, "learning_rate": 0.0005, "loss": 1.3344, "step": 6210 }, { "epoch": 0.19834816161229632, "grad_norm": 0.5411686301231384, "learning_rate": 0.0005, "loss": 1.3095, "step": 6220 }, { "epoch": 0.19866704933193022, "grad_norm": 0.5486521124839783, "learning_rate": 0.0005, "loss": 1.3103, "step": 6230 }, { "epoch": 0.19898593705156414, "grad_norm": 0.5434985160827637, "learning_rate": 0.0005, "loss": 1.3179, "step": 6240 }, { "epoch": 0.19930482477119807, "grad_norm": 0.5697535872459412, "learning_rate": 0.0005, "loss": 1.3321, "step": 6250 }, { "epoch": 0.19962371249083197, "grad_norm": 0.5627325177192688, "learning_rate": 0.0005, "loss": 1.311, "step": 6260 }, { "epoch": 0.1999426002104659, "grad_norm": 0.5584254264831543, "learning_rate": 0.0005, "loss": 1.326, "step": 6270 }, { "epoch": 0.20026148793009982, "grad_norm": 0.5627668499946594, "learning_rate": 0.0005, "loss": 1.3252, "step": 6280 }, { "epoch": 0.20058037564973372, "grad_norm": 0.5576115250587463, "learning_rate": 0.0005, "loss": 1.3356, "step": 6290 }, { "epoch": 0.20089926336936764, "grad_norm": 0.5420365333557129, "learning_rate": 0.0005, "loss": 1.3271, "step": 6300 }, { "epoch": 0.20121815108900157, "grad_norm": 0.5424570441246033, "learning_rate": 0.0005, "loss": 1.322, "step": 6310 }, { "epoch": 0.20153703880863547, "grad_norm": 0.5431638360023499, "learning_rate": 0.0005, "loss": 1.3184, "step": 6320 }, { "epoch": 0.2018559265282694, "grad_norm": 0.5798773765563965, "learning_rate": 0.0005, "loss": 1.3165, "step": 6330 }, { "epoch": 0.20217481424790332, "grad_norm": 0.5577746033668518, "learning_rate": 0.0005, "loss": 1.3209, "step": 6340 }, { "epoch": 0.20249370196753724, "grad_norm": 0.5490031242370605, "learning_rate": 0.0005, "loss": 1.3127, "step": 6350 }, { "epoch": 0.20281258968717114, "grad_norm": 0.5594183802604675, "learning_rate": 0.0005, "loss": 1.3175, "step": 6360 }, { "epoch": 0.20313147740680507, "grad_norm": 0.5752307772636414, "learning_rate": 0.0005, "loss": 1.3382, "step": 6370 }, { "epoch": 0.203450365126439, "grad_norm": 0.5838162302970886, "learning_rate": 0.0005, "loss": 1.3274, "step": 6380 }, { "epoch": 0.2037692528460729, "grad_norm": 0.5440608859062195, "learning_rate": 0.0005, "loss": 1.314, "step": 6390 }, { "epoch": 0.20408814056570682, "grad_norm": 0.5637199282646179, "learning_rate": 0.0005, "loss": 1.301, "step": 6400 }, { "epoch": 0.20440702828534074, "grad_norm": 0.583897054195404, "learning_rate": 0.0005, "loss": 1.331, "step": 6410 }, { "epoch": 0.20472591600497464, "grad_norm": 0.5349193215370178, "learning_rate": 0.0005, "loss": 1.3085, "step": 6420 }, { "epoch": 0.20504480372460857, "grad_norm": 0.572115957736969, "learning_rate": 0.0005, "loss": 1.3353, "step": 6430 }, { "epoch": 0.2053636914442425, "grad_norm": 0.5371711254119873, "learning_rate": 0.0005, "loss": 1.3115, "step": 6440 }, { "epoch": 0.2056825791638764, "grad_norm": 0.5468522310256958, "learning_rate": 0.0005, "loss": 1.3058, "step": 6450 }, { "epoch": 0.20600146688351031, "grad_norm": 0.513854444026947, "learning_rate": 0.0005, "loss": 1.3067, "step": 6460 }, { "epoch": 0.20632035460314424, "grad_norm": 0.5438308715820312, "learning_rate": 0.0005, "loss": 1.3245, "step": 6470 }, { "epoch": 0.20663924232277814, "grad_norm": 0.5419757962226868, "learning_rate": 0.0005, "loss": 1.317, "step": 6480 }, { "epoch": 0.20695813004241206, "grad_norm": 0.5453127026557922, "learning_rate": 0.0005, "loss": 1.3188, "step": 6490 }, { "epoch": 0.207277017762046, "grad_norm": 0.5806515216827393, "learning_rate": 0.0005, "loss": 1.3101, "step": 6500 }, { "epoch": 0.2075959054816799, "grad_norm": 0.5650323033332825, "learning_rate": 0.0005, "loss": 1.31, "step": 6510 }, { "epoch": 0.2079147932013138, "grad_norm": 0.5299544334411621, "learning_rate": 0.0005, "loss": 1.3122, "step": 6520 }, { "epoch": 0.20823368092094774, "grad_norm": 0.5299350619316101, "learning_rate": 0.0005, "loss": 1.3182, "step": 6530 }, { "epoch": 0.20855256864058166, "grad_norm": 0.544809877872467, "learning_rate": 0.0005, "loss": 1.3349, "step": 6540 }, { "epoch": 0.20887145636021556, "grad_norm": 0.5426610112190247, "learning_rate": 0.0005, "loss": 1.3163, "step": 6550 }, { "epoch": 0.2091903440798495, "grad_norm": 0.5313812494277954, "learning_rate": 0.0005, "loss": 1.3082, "step": 6560 }, { "epoch": 0.20950923179948341, "grad_norm": 0.5384641289710999, "learning_rate": 0.0005, "loss": 1.3154, "step": 6570 }, { "epoch": 0.2098281195191173, "grad_norm": 0.5682090520858765, "learning_rate": 0.0005, "loss": 1.3104, "step": 6580 }, { "epoch": 0.21014700723875124, "grad_norm": 0.5284824371337891, "learning_rate": 0.0005, "loss": 1.3181, "step": 6590 }, { "epoch": 0.21046589495838516, "grad_norm": 0.546111524105072, "learning_rate": 0.0005, "loss": 1.3002, "step": 6600 }, { "epoch": 0.21078478267801906, "grad_norm": 0.5103371143341064, "learning_rate": 0.0005, "loss": 1.3159, "step": 6610 }, { "epoch": 0.211103670397653, "grad_norm": 0.5273628830909729, "learning_rate": 0.0005, "loss": 1.3016, "step": 6620 }, { "epoch": 0.2114225581172869, "grad_norm": 0.5440049767494202, "learning_rate": 0.0005, "loss": 1.3133, "step": 6630 }, { "epoch": 0.2117414458369208, "grad_norm": 0.5308232307434082, "learning_rate": 0.0005, "loss": 1.3247, "step": 6640 }, { "epoch": 0.21206033355655474, "grad_norm": 0.5194700360298157, "learning_rate": 0.0005, "loss": 1.2996, "step": 6650 }, { "epoch": 0.21237922127618866, "grad_norm": 0.5374641418457031, "learning_rate": 0.0005, "loss": 1.312, "step": 6660 }, { "epoch": 0.21269810899582256, "grad_norm": 0.5642796754837036, "learning_rate": 0.0005, "loss": 1.3089, "step": 6670 }, { "epoch": 0.21301699671545649, "grad_norm": 0.5580105781555176, "learning_rate": 0.0005, "loss": 1.3046, "step": 6680 }, { "epoch": 0.2133358844350904, "grad_norm": 0.5559312105178833, "learning_rate": 0.0005, "loss": 1.3164, "step": 6690 }, { "epoch": 0.2136547721547243, "grad_norm": 0.5587300062179565, "learning_rate": 0.0005, "loss": 1.3083, "step": 6700 }, { "epoch": 0.21397365987435824, "grad_norm": 0.5269460678100586, "learning_rate": 0.0005, "loss": 1.2957, "step": 6710 }, { "epoch": 0.21429254759399216, "grad_norm": 0.5704488158226013, "learning_rate": 0.0005, "loss": 1.3109, "step": 6720 }, { "epoch": 0.21461143531362606, "grad_norm": 0.5360540747642517, "learning_rate": 0.0005, "loss": 1.3009, "step": 6730 }, { "epoch": 0.21493032303325998, "grad_norm": 0.5284568071365356, "learning_rate": 0.0005, "loss": 1.3047, "step": 6740 }, { "epoch": 0.2152492107528939, "grad_norm": 0.5489094257354736, "learning_rate": 0.0005, "loss": 1.3223, "step": 6750 }, { "epoch": 0.21556809847252784, "grad_norm": 0.524774968624115, "learning_rate": 0.0005, "loss": 1.3144, "step": 6760 }, { "epoch": 0.21588698619216173, "grad_norm": 0.5359005928039551, "learning_rate": 0.0005, "loss": 1.3012, "step": 6770 }, { "epoch": 0.21620587391179566, "grad_norm": 0.5301958322525024, "learning_rate": 0.0005, "loss": 1.2909, "step": 6780 }, { "epoch": 0.21652476163142959, "grad_norm": 0.5353237390518188, "learning_rate": 0.0005, "loss": 1.2899, "step": 6790 }, { "epoch": 0.21684364935106348, "grad_norm": 0.551356852054596, "learning_rate": 0.0005, "loss": 1.2839, "step": 6800 }, { "epoch": 0.2171625370706974, "grad_norm": 0.5137448906898499, "learning_rate": 0.0005, "loss": 1.3146, "step": 6810 }, { "epoch": 0.21748142479033133, "grad_norm": 0.5484623312950134, "learning_rate": 0.0005, "loss": 1.3036, "step": 6820 }, { "epoch": 0.21780031250996523, "grad_norm": 0.5172665119171143, "learning_rate": 0.0005, "loss": 1.3037, "step": 6830 }, { "epoch": 0.21811920022959916, "grad_norm": 0.5307798981666565, "learning_rate": 0.0005, "loss": 1.3003, "step": 6840 }, { "epoch": 0.21843808794923308, "grad_norm": 0.5149516463279724, "learning_rate": 0.0005, "loss": 1.2833, "step": 6850 }, { "epoch": 0.21875697566886698, "grad_norm": 0.5275344252586365, "learning_rate": 0.0005, "loss": 1.2849, "step": 6860 }, { "epoch": 0.2190758633885009, "grad_norm": 0.5206189155578613, "learning_rate": 0.0005, "loss": 1.3039, "step": 6870 }, { "epoch": 0.21939475110813483, "grad_norm": 0.5311983823776245, "learning_rate": 0.0005, "loss": 1.3034, "step": 6880 }, { "epoch": 0.21971363882776873, "grad_norm": 0.5286182165145874, "learning_rate": 0.0005, "loss": 1.2961, "step": 6890 }, { "epoch": 0.22003252654740266, "grad_norm": 0.6045443415641785, "learning_rate": 0.0005, "loss": 1.3086, "step": 6900 }, { "epoch": 0.22035141426703658, "grad_norm": 0.5496406555175781, "learning_rate": 0.0005, "loss": 1.3021, "step": 6910 }, { "epoch": 0.22067030198667048, "grad_norm": 0.5401288270950317, "learning_rate": 0.0005, "loss": 1.3183, "step": 6920 }, { "epoch": 0.2209891897063044, "grad_norm": 0.5402335524559021, "learning_rate": 0.0005, "loss": 1.3063, "step": 6930 }, { "epoch": 0.22130807742593833, "grad_norm": 0.5452973246574402, "learning_rate": 0.0005, "loss": 1.2922, "step": 6940 }, { "epoch": 0.22162696514557226, "grad_norm": 0.5425865650177002, "learning_rate": 0.0005, "loss": 1.2902, "step": 6950 }, { "epoch": 0.22194585286520616, "grad_norm": 0.5472378134727478, "learning_rate": 0.0005, "loss": 1.2816, "step": 6960 }, { "epoch": 0.22226474058484008, "grad_norm": 0.5230280160903931, "learning_rate": 0.0005, "loss": 1.2981, "step": 6970 }, { "epoch": 0.222583628304474, "grad_norm": 0.5512002110481262, "learning_rate": 0.0005, "loss": 1.2964, "step": 6980 }, { "epoch": 0.2229025160241079, "grad_norm": 0.5232899785041809, "learning_rate": 0.0005, "loss": 1.3066, "step": 6990 }, { "epoch": 0.22322140374374183, "grad_norm": 0.5482252836227417, "learning_rate": 0.0005, "loss": 1.3177, "step": 7000 }, { "epoch": 0.22354029146337576, "grad_norm": 0.5405844449996948, "learning_rate": 0.0005, "loss": 1.2851, "step": 7010 }, { "epoch": 0.22385917918300965, "grad_norm": 0.5282372236251831, "learning_rate": 0.0005, "loss": 1.3024, "step": 7020 }, { "epoch": 0.22417806690264358, "grad_norm": 0.5448574423789978, "learning_rate": 0.0005, "loss": 1.2734, "step": 7030 }, { "epoch": 0.2244969546222775, "grad_norm": 0.5466793775558472, "learning_rate": 0.0005, "loss": 1.2765, "step": 7040 }, { "epoch": 0.2248158423419114, "grad_norm": 0.5259258151054382, "learning_rate": 0.0005, "loss": 1.2955, "step": 7050 }, { "epoch": 0.22513473006154533, "grad_norm": 0.507414698600769, "learning_rate": 0.0005, "loss": 1.29, "step": 7060 }, { "epoch": 0.22545361778117926, "grad_norm": 0.5376728177070618, "learning_rate": 0.0005, "loss": 1.2839, "step": 7070 }, { "epoch": 0.22577250550081315, "grad_norm": 0.5190555453300476, "learning_rate": 0.0005, "loss": 1.3078, "step": 7080 }, { "epoch": 0.22609139322044708, "grad_norm": 0.5176852941513062, "learning_rate": 0.0005, "loss": 1.3132, "step": 7090 }, { "epoch": 0.226410280940081, "grad_norm": 0.5273293852806091, "learning_rate": 0.0005, "loss": 1.2938, "step": 7100 }, { "epoch": 0.2267291686597149, "grad_norm": 0.5152580142021179, "learning_rate": 0.0005, "loss": 1.3058, "step": 7110 }, { "epoch": 0.22704805637934883, "grad_norm": 0.5310482382774353, "learning_rate": 0.0005, "loss": 1.2821, "step": 7120 }, { "epoch": 0.22736694409898275, "grad_norm": 0.5562300086021423, "learning_rate": 0.0005, "loss": 1.2986, "step": 7130 }, { "epoch": 0.22768583181861665, "grad_norm": 0.529692530632019, "learning_rate": 0.0005, "loss": 1.2916, "step": 7140 }, { "epoch": 0.22800471953825058, "grad_norm": 0.5387162566184998, "learning_rate": 0.0005, "loss": 1.2918, "step": 7150 }, { "epoch": 0.2283236072578845, "grad_norm": 0.5448839664459229, "learning_rate": 0.0005, "loss": 1.2934, "step": 7160 }, { "epoch": 0.22864249497751843, "grad_norm": 0.5419529676437378, "learning_rate": 0.0005, "loss": 1.3105, "step": 7170 }, { "epoch": 0.22896138269715233, "grad_norm": 0.5596801042556763, "learning_rate": 0.0005, "loss": 1.2759, "step": 7180 }, { "epoch": 0.22928027041678625, "grad_norm": 0.5463248491287231, "learning_rate": 0.0005, "loss": 1.285, "step": 7190 }, { "epoch": 0.22959915813642018, "grad_norm": 0.5241756439208984, "learning_rate": 0.0005, "loss": 1.2835, "step": 7200 }, { "epoch": 0.22991804585605408, "grad_norm": 0.5329312682151794, "learning_rate": 0.0005, "loss": 1.2818, "step": 7210 }, { "epoch": 0.230236933575688, "grad_norm": 0.49857091903686523, "learning_rate": 0.0005, "loss": 1.2621, "step": 7220 }, { "epoch": 0.23055582129532193, "grad_norm": 0.5254444479942322, "learning_rate": 0.0005, "loss": 1.2955, "step": 7230 }, { "epoch": 0.23087470901495583, "grad_norm": 0.5368390679359436, "learning_rate": 0.0005, "loss": 1.2988, "step": 7240 }, { "epoch": 0.23119359673458975, "grad_norm": 0.5087143778800964, "learning_rate": 0.0005, "loss": 1.2879, "step": 7250 }, { "epoch": 0.23151248445422368, "grad_norm": 0.5278485417366028, "learning_rate": 0.0005, "loss": 1.2773, "step": 7260 }, { "epoch": 0.23183137217385758, "grad_norm": 0.5035619735717773, "learning_rate": 0.0005, "loss": 1.2755, "step": 7270 }, { "epoch": 0.2321502598934915, "grad_norm": 0.5509461760520935, "learning_rate": 0.0005, "loss": 1.3007, "step": 7280 }, { "epoch": 0.23246914761312543, "grad_norm": 0.5227276682853699, "learning_rate": 0.0005, "loss": 1.2881, "step": 7290 }, { "epoch": 0.23278803533275932, "grad_norm": 0.5183719396591187, "learning_rate": 0.0005, "loss": 1.2799, "step": 7300 }, { "epoch": 0.23310692305239325, "grad_norm": 0.5770544409751892, "learning_rate": 0.0005, "loss": 1.2969, "step": 7310 }, { "epoch": 0.23342581077202718, "grad_norm": 0.5247523784637451, "learning_rate": 0.0005, "loss": 1.2864, "step": 7320 }, { "epoch": 0.23374469849166107, "grad_norm": 0.5339850187301636, "learning_rate": 0.0005, "loss": 1.271, "step": 7330 }, { "epoch": 0.234063586211295, "grad_norm": 0.513947069644928, "learning_rate": 0.0005, "loss": 1.2904, "step": 7340 }, { "epoch": 0.23438247393092893, "grad_norm": 0.507355809211731, "learning_rate": 0.0005, "loss": 1.2875, "step": 7350 }, { "epoch": 0.23470136165056282, "grad_norm": 0.5152395367622375, "learning_rate": 0.0005, "loss": 1.2773, "step": 7360 }, { "epoch": 0.23502024937019675, "grad_norm": 0.5163211822509766, "learning_rate": 0.0005, "loss": 1.2803, "step": 7370 }, { "epoch": 0.23533913708983067, "grad_norm": 0.5329576134681702, "learning_rate": 0.0005, "loss": 1.2877, "step": 7380 }, { "epoch": 0.2356580248094646, "grad_norm": 0.515319287776947, "learning_rate": 0.0005, "loss": 1.2851, "step": 7390 }, { "epoch": 0.2359769125290985, "grad_norm": 0.5044041872024536, "learning_rate": 0.0005, "loss": 1.2724, "step": 7400 }, { "epoch": 0.23629580024873242, "grad_norm": 0.5218659043312073, "learning_rate": 0.0005, "loss": 1.2765, "step": 7410 }, { "epoch": 0.23661468796836635, "grad_norm": 0.5328546166419983, "learning_rate": 0.0005, "loss": 1.3, "step": 7420 }, { "epoch": 0.23693357568800025, "grad_norm": 0.5284678339958191, "learning_rate": 0.0005, "loss": 1.2623, "step": 7430 }, { "epoch": 0.23725246340763417, "grad_norm": 0.5512279272079468, "learning_rate": 0.0005, "loss": 1.2746, "step": 7440 }, { "epoch": 0.2375713511272681, "grad_norm": 0.5096049308776855, "learning_rate": 0.0005, "loss": 1.298, "step": 7450 }, { "epoch": 0.237890238846902, "grad_norm": 0.5141339898109436, "learning_rate": 0.0005, "loss": 1.2937, "step": 7460 }, { "epoch": 0.23820912656653592, "grad_norm": 0.5120203495025635, "learning_rate": 0.0005, "loss": 1.2836, "step": 7470 }, { "epoch": 0.23852801428616985, "grad_norm": 0.5968315005302429, "learning_rate": 0.0005, "loss": 1.3015, "step": 7480 }, { "epoch": 0.23884690200580375, "grad_norm": 0.49347633123397827, "learning_rate": 0.0005, "loss": 1.2705, "step": 7490 }, { "epoch": 0.23916578972543767, "grad_norm": 0.50834721326828, "learning_rate": 0.0005, "loss": 1.3048, "step": 7500 }, { "epoch": 0.2394846774450716, "grad_norm": 0.5340325236320496, "learning_rate": 0.0005, "loss": 1.2792, "step": 7510 }, { "epoch": 0.2398035651647055, "grad_norm": 0.5159140825271606, "learning_rate": 0.0005, "loss": 1.2863, "step": 7520 }, { "epoch": 0.24012245288433942, "grad_norm": 0.5006542801856995, "learning_rate": 0.0005, "loss": 1.2877, "step": 7530 }, { "epoch": 0.24044134060397335, "grad_norm": 0.5034393072128296, "learning_rate": 0.0005, "loss": 1.2783, "step": 7540 }, { "epoch": 0.24076022832360724, "grad_norm": 0.5194485187530518, "learning_rate": 0.0005, "loss": 1.2617, "step": 7550 }, { "epoch": 0.24107911604324117, "grad_norm": 0.4872956871986389, "learning_rate": 0.0005, "loss": 1.2691, "step": 7560 }, { "epoch": 0.2413980037628751, "grad_norm": 0.5291786193847656, "learning_rate": 0.0005, "loss": 1.2955, "step": 7570 }, { "epoch": 0.24171689148250902, "grad_norm": 0.5270318388938904, "learning_rate": 0.0005, "loss": 1.2738, "step": 7580 }, { "epoch": 0.24203577920214292, "grad_norm": 0.5061559081077576, "learning_rate": 0.0005, "loss": 1.2767, "step": 7590 }, { "epoch": 0.24235466692177685, "grad_norm": 0.5161083936691284, "learning_rate": 0.0005, "loss": 1.2884, "step": 7600 }, { "epoch": 0.24267355464141077, "grad_norm": 0.5098254084587097, "learning_rate": 0.0005, "loss": 1.254, "step": 7610 }, { "epoch": 0.24299244236104467, "grad_norm": 0.5221891403198242, "learning_rate": 0.0005, "loss": 1.2629, "step": 7620 }, { "epoch": 0.2433113300806786, "grad_norm": 0.4903581142425537, "learning_rate": 0.0005, "loss": 1.2831, "step": 7630 }, { "epoch": 0.24363021780031252, "grad_norm": 0.5214158892631531, "learning_rate": 0.0005, "loss": 1.306, "step": 7640 }, { "epoch": 0.24394910551994642, "grad_norm": 0.5176328420639038, "learning_rate": 0.0005, "loss": 1.2712, "step": 7650 }, { "epoch": 0.24426799323958034, "grad_norm": 0.5082030296325684, "learning_rate": 0.0005, "loss": 1.2707, "step": 7660 }, { "epoch": 0.24458688095921427, "grad_norm": 0.5280209183692932, "learning_rate": 0.0005, "loss": 1.2687, "step": 7670 }, { "epoch": 0.24490576867884817, "grad_norm": 0.5040051341056824, "learning_rate": 0.0005, "loss": 1.2586, "step": 7680 }, { "epoch": 0.2452246563984821, "grad_norm": 0.5304771065711975, "learning_rate": 0.0005, "loss": 1.2823, "step": 7690 }, { "epoch": 0.24554354411811602, "grad_norm": 0.5364067554473877, "learning_rate": 0.0005, "loss": 1.2667, "step": 7700 }, { "epoch": 0.24586243183774992, "grad_norm": 0.5052260160446167, "learning_rate": 0.0005, "loss": 1.2513, "step": 7710 }, { "epoch": 0.24618131955738384, "grad_norm": 0.5323502421379089, "learning_rate": 0.0005, "loss": 1.2708, "step": 7720 }, { "epoch": 0.24650020727701777, "grad_norm": 0.49328145384788513, "learning_rate": 0.0005, "loss": 1.2792, "step": 7730 }, { "epoch": 0.24681909499665167, "grad_norm": 0.5066396594047546, "learning_rate": 0.0005, "loss": 1.2724, "step": 7740 }, { "epoch": 0.2471379827162856, "grad_norm": 0.5193277597427368, "learning_rate": 0.0005, "loss": 1.2937, "step": 7750 }, { "epoch": 0.24745687043591952, "grad_norm": 0.506821870803833, "learning_rate": 0.0005, "loss": 1.264, "step": 7760 }, { "epoch": 0.24777575815555342, "grad_norm": 0.4907330572605133, "learning_rate": 0.0005, "loss": 1.2785, "step": 7770 }, { "epoch": 0.24809464587518734, "grad_norm": 0.53122478723526, "learning_rate": 0.0005, "loss": 1.2495, "step": 7780 }, { "epoch": 0.24841353359482127, "grad_norm": 0.5209131836891174, "learning_rate": 0.0005, "loss": 1.2884, "step": 7790 }, { "epoch": 0.2487324213144552, "grad_norm": 0.5227766036987305, "learning_rate": 0.0005, "loss": 1.2676, "step": 7800 }, { "epoch": 0.2490513090340891, "grad_norm": 0.5297362208366394, "learning_rate": 0.0005, "loss": 1.2781, "step": 7810 }, { "epoch": 0.24937019675372302, "grad_norm": 0.5168874859809875, "learning_rate": 0.0005, "loss": 1.2637, "step": 7820 }, { "epoch": 0.24968908447335694, "grad_norm": 0.5064091682434082, "learning_rate": 0.0005, "loss": 1.2512, "step": 7830 }, { "epoch": 0.25000797219299087, "grad_norm": 0.49836963415145874, "learning_rate": 0.0005, "loss": 1.2539, "step": 7840 }, { "epoch": 0.25032685991262477, "grad_norm": 0.49791836738586426, "learning_rate": 0.0005, "loss": 1.2421, "step": 7850 }, { "epoch": 0.25064574763225866, "grad_norm": 0.5025899410247803, "learning_rate": 0.0005, "loss": 1.2452, "step": 7860 }, { "epoch": 0.2509646353518926, "grad_norm": 0.5179859399795532, "learning_rate": 0.0005, "loss": 1.2486, "step": 7870 }, { "epoch": 0.2512835230715265, "grad_norm": 0.47681480646133423, "learning_rate": 0.0005, "loss": 1.2545, "step": 7880 }, { "epoch": 0.2516024107911604, "grad_norm": 0.5042859315872192, "learning_rate": 0.0005, "loss": 1.2929, "step": 7890 }, { "epoch": 0.25192129851079437, "grad_norm": 0.49867138266563416, "learning_rate": 0.0005, "loss": 1.267, "step": 7900 }, { "epoch": 0.25224018623042826, "grad_norm": 0.5240023732185364, "learning_rate": 0.0005, "loss": 1.2651, "step": 7910 }, { "epoch": 0.25255907395006216, "grad_norm": 0.5321725606918335, "learning_rate": 0.0005, "loss": 1.2684, "step": 7920 }, { "epoch": 0.2528779616696961, "grad_norm": 0.534798800945282, "learning_rate": 0.0005, "loss": 1.2606, "step": 7930 }, { "epoch": 0.25319684938933, "grad_norm": 0.5044108033180237, "learning_rate": 0.0005, "loss": 1.2564, "step": 7940 }, { "epoch": 0.2535157371089639, "grad_norm": 0.48750004172325134, "learning_rate": 0.0005, "loss": 1.2609, "step": 7950 }, { "epoch": 0.25383462482859787, "grad_norm": 0.5071402192115784, "learning_rate": 0.0005, "loss": 1.2736, "step": 7960 }, { "epoch": 0.25415351254823176, "grad_norm": 0.49629124999046326, "learning_rate": 0.0005, "loss": 1.266, "step": 7970 }, { "epoch": 0.25447240026786566, "grad_norm": 0.5239920020103455, "learning_rate": 0.0005, "loss": 1.2882, "step": 7980 }, { "epoch": 0.2547912879874996, "grad_norm": 0.5081344842910767, "learning_rate": 0.0005, "loss": 1.2813, "step": 7990 }, { "epoch": 0.2551101757071335, "grad_norm": 0.49004441499710083, "learning_rate": 0.0005, "loss": 1.2561, "step": 8000 }, { "epoch": 0.2554290634267674, "grad_norm": 0.5031710267066956, "learning_rate": 0.0005, "loss": 1.249, "step": 8010 }, { "epoch": 0.25574795114640136, "grad_norm": 0.5036451816558838, "learning_rate": 0.0005, "loss": 1.2641, "step": 8020 }, { "epoch": 0.25606683886603526, "grad_norm": 0.4846326410770416, "learning_rate": 0.0005, "loss": 1.249, "step": 8030 }, { "epoch": 0.25638572658566916, "grad_norm": 0.5058959722518921, "learning_rate": 0.0005, "loss": 1.2644, "step": 8040 }, { "epoch": 0.2567046143053031, "grad_norm": 0.5171712636947632, "learning_rate": 0.0005, "loss": 1.2767, "step": 8050 }, { "epoch": 0.257023502024937, "grad_norm": 0.5237571597099304, "learning_rate": 0.0005, "loss": 1.2619, "step": 8060 }, { "epoch": 0.2573423897445709, "grad_norm": 0.4908455014228821, "learning_rate": 0.0005, "loss": 1.2743, "step": 8070 }, { "epoch": 0.25766127746420486, "grad_norm": 0.504867434501648, "learning_rate": 0.0005, "loss": 1.259, "step": 8080 }, { "epoch": 0.25798016518383876, "grad_norm": 0.5199716687202454, "learning_rate": 0.0005, "loss": 1.2925, "step": 8090 }, { "epoch": 0.2582990529034727, "grad_norm": 0.49130043387413025, "learning_rate": 0.0005, "loss": 1.2742, "step": 8100 }, { "epoch": 0.2586179406231066, "grad_norm": 0.517345666885376, "learning_rate": 0.0005, "loss": 1.2719, "step": 8110 }, { "epoch": 0.2589368283427405, "grad_norm": 0.49142032861709595, "learning_rate": 0.0005, "loss": 1.2468, "step": 8120 }, { "epoch": 0.25925571606237446, "grad_norm": 0.49685269594192505, "learning_rate": 0.0005, "loss": 1.2534, "step": 8130 }, { "epoch": 0.25957460378200836, "grad_norm": 0.48098668456077576, "learning_rate": 0.0005, "loss": 1.2317, "step": 8140 }, { "epoch": 0.25989349150164226, "grad_norm": 0.5050351619720459, "learning_rate": 0.0005, "loss": 1.2576, "step": 8150 }, { "epoch": 0.2602123792212762, "grad_norm": 0.506672739982605, "learning_rate": 0.0005, "loss": 1.2539, "step": 8160 }, { "epoch": 0.2605312669409101, "grad_norm": 0.49320903420448303, "learning_rate": 0.0005, "loss": 1.2421, "step": 8170 }, { "epoch": 0.260850154660544, "grad_norm": 0.5003274083137512, "learning_rate": 0.0005, "loss": 1.2658, "step": 8180 }, { "epoch": 0.26116904238017796, "grad_norm": 0.498389333486557, "learning_rate": 0.0005, "loss": 1.2512, "step": 8190 }, { "epoch": 0.26148793009981186, "grad_norm": 0.49021822214126587, "learning_rate": 0.0005, "loss": 1.2569, "step": 8200 }, { "epoch": 0.26180681781944576, "grad_norm": 0.49076321721076965, "learning_rate": 0.0005, "loss": 1.2751, "step": 8210 }, { "epoch": 0.2621257055390797, "grad_norm": 0.5223627686500549, "learning_rate": 0.0005, "loss": 1.2742, "step": 8220 }, { "epoch": 0.2624445932587136, "grad_norm": 0.4935097396373749, "learning_rate": 0.0005, "loss": 1.2523, "step": 8230 }, { "epoch": 0.2627634809783475, "grad_norm": 0.47696298360824585, "learning_rate": 0.0005, "loss": 1.246, "step": 8240 }, { "epoch": 0.26308236869798146, "grad_norm": 0.5192391276359558, "learning_rate": 0.0005, "loss": 1.2377, "step": 8250 }, { "epoch": 0.26340125641761536, "grad_norm": 0.47619664669036865, "learning_rate": 0.0005, "loss": 1.2697, "step": 8260 }, { "epoch": 0.26372014413724926, "grad_norm": 0.4984939992427826, "learning_rate": 0.0005, "loss": 1.2588, "step": 8270 }, { "epoch": 0.2640390318568832, "grad_norm": 0.49781128764152527, "learning_rate": 0.0005, "loss": 1.265, "step": 8280 }, { "epoch": 0.2643579195765171, "grad_norm": 0.5310728549957275, "learning_rate": 0.0005, "loss": 1.2626, "step": 8290 }, { "epoch": 0.264676807296151, "grad_norm": 0.4933306574821472, "learning_rate": 0.0005, "loss": 1.2506, "step": 8300 }, { "epoch": 0.26499569501578496, "grad_norm": 0.5005725622177124, "learning_rate": 0.0005, "loss": 1.2512, "step": 8310 }, { "epoch": 0.26531458273541886, "grad_norm": 0.4766061305999756, "learning_rate": 0.0005, "loss": 1.2401, "step": 8320 }, { "epoch": 0.26563347045505276, "grad_norm": 0.5212782025337219, "learning_rate": 0.0005, "loss": 1.2389, "step": 8330 }, { "epoch": 0.2659523581746867, "grad_norm": 0.49564340710639954, "learning_rate": 0.0005, "loss": 1.2662, "step": 8340 }, { "epoch": 0.2662712458943206, "grad_norm": 0.5133705139160156, "learning_rate": 0.0005, "loss": 1.2501, "step": 8350 }, { "epoch": 0.2665901336139545, "grad_norm": 0.4940152168273926, "learning_rate": 0.0005, "loss": 1.2794, "step": 8360 }, { "epoch": 0.26690902133358846, "grad_norm": 0.5306583642959595, "learning_rate": 0.0005, "loss": 1.2737, "step": 8370 }, { "epoch": 0.26722790905322236, "grad_norm": 0.4824816584587097, "learning_rate": 0.0005, "loss": 1.2582, "step": 8380 }, { "epoch": 0.26754679677285625, "grad_norm": 0.5017719268798828, "learning_rate": 0.0005, "loss": 1.2583, "step": 8390 }, { "epoch": 0.2678656844924902, "grad_norm": 0.5216484665870667, "learning_rate": 0.0005, "loss": 1.2525, "step": 8400 }, { "epoch": 0.2681845722121241, "grad_norm": 0.5009568333625793, "learning_rate": 0.0005, "loss": 1.2473, "step": 8410 }, { "epoch": 0.268503459931758, "grad_norm": 0.4954254627227783, "learning_rate": 0.0005, "loss": 1.2477, "step": 8420 }, { "epoch": 0.26882234765139196, "grad_norm": 0.4804369807243347, "learning_rate": 0.0005, "loss": 1.2432, "step": 8430 }, { "epoch": 0.26914123537102586, "grad_norm": 0.4917435050010681, "learning_rate": 0.0005, "loss": 1.2392, "step": 8440 }, { "epoch": 0.26946012309065975, "grad_norm": 0.49070221185684204, "learning_rate": 0.0005, "loss": 1.2515, "step": 8450 }, { "epoch": 0.2697790108102937, "grad_norm": 0.4955213665962219, "learning_rate": 0.0005, "loss": 1.2485, "step": 8460 }, { "epoch": 0.2700978985299276, "grad_norm": 0.4929785430431366, "learning_rate": 0.0005, "loss": 1.2606, "step": 8470 }, { "epoch": 0.2704167862495615, "grad_norm": 0.47558659315109253, "learning_rate": 0.0005, "loss": 1.2425, "step": 8480 }, { "epoch": 0.27073567396919546, "grad_norm": 0.5124787092208862, "learning_rate": 0.0005, "loss": 1.2464, "step": 8490 }, { "epoch": 0.27105456168882935, "grad_norm": 0.47737860679626465, "learning_rate": 0.0005, "loss": 1.2731, "step": 8500 }, { "epoch": 0.2713734494084633, "grad_norm": 0.4930284917354584, "learning_rate": 0.0005, "loss": 1.2417, "step": 8510 }, { "epoch": 0.2716923371280972, "grad_norm": 0.48435503244400024, "learning_rate": 0.0005, "loss": 1.238, "step": 8520 }, { "epoch": 0.2720112248477311, "grad_norm": 0.48783987760543823, "learning_rate": 0.0005, "loss": 1.2385, "step": 8530 }, { "epoch": 0.27233011256736506, "grad_norm": 0.48623397946357727, "learning_rate": 0.0005, "loss": 1.2498, "step": 8540 }, { "epoch": 0.27264900028699895, "grad_norm": 0.5029369592666626, "learning_rate": 0.0005, "loss": 1.2725, "step": 8550 }, { "epoch": 0.27296788800663285, "grad_norm": 0.4850176274776459, "learning_rate": 0.0005, "loss": 1.2601, "step": 8560 }, { "epoch": 0.2732867757262668, "grad_norm": 0.4850327968597412, "learning_rate": 0.0005, "loss": 1.2708, "step": 8570 }, { "epoch": 0.2736056634459007, "grad_norm": 0.48380863666534424, "learning_rate": 0.0005, "loss": 1.2322, "step": 8580 }, { "epoch": 0.2739245511655346, "grad_norm": 0.48448917269706726, "learning_rate": 0.0005, "loss": 1.2386, "step": 8590 }, { "epoch": 0.27424343888516856, "grad_norm": 0.5155131220817566, "learning_rate": 0.0005, "loss": 1.2579, "step": 8600 }, { "epoch": 0.27456232660480245, "grad_norm": 0.4893667697906494, "learning_rate": 0.0005, "loss": 1.2625, "step": 8610 }, { "epoch": 0.27488121432443635, "grad_norm": 0.4755896329879761, "learning_rate": 0.0005, "loss": 1.23, "step": 8620 }, { "epoch": 0.2752001020440703, "grad_norm": 0.4704909920692444, "learning_rate": 0.0005, "loss": 1.2391, "step": 8630 }, { "epoch": 0.2755189897637042, "grad_norm": 0.48995453119277954, "learning_rate": 0.0005, "loss": 1.2358, "step": 8640 }, { "epoch": 0.2758378774833381, "grad_norm": 0.4795413911342621, "learning_rate": 0.0005, "loss": 1.2648, "step": 8650 }, { "epoch": 0.27615676520297205, "grad_norm": 0.4703845977783203, "learning_rate": 0.0005, "loss": 1.2421, "step": 8660 }, { "epoch": 0.27647565292260595, "grad_norm": 0.48536381125450134, "learning_rate": 0.0005, "loss": 1.2467, "step": 8670 }, { "epoch": 0.27679454064223985, "grad_norm": 0.4863326847553253, "learning_rate": 0.0005, "loss": 1.2332, "step": 8680 }, { "epoch": 0.2771134283618738, "grad_norm": 0.4987434148788452, "learning_rate": 0.0005, "loss": 1.2286, "step": 8690 }, { "epoch": 0.2774323160815077, "grad_norm": 0.4930117726325989, "learning_rate": 0.0005, "loss": 1.2462, "step": 8700 }, { "epoch": 0.2777512038011416, "grad_norm": 0.47064122557640076, "learning_rate": 0.0005, "loss": 1.2494, "step": 8710 }, { "epoch": 0.27807009152077555, "grad_norm": 0.5006730556488037, "learning_rate": 0.0005, "loss": 1.2479, "step": 8720 }, { "epoch": 0.27838897924040945, "grad_norm": 0.4980914294719696, "learning_rate": 0.0005, "loss": 1.2202, "step": 8730 }, { "epoch": 0.27870786696004335, "grad_norm": 0.49676623940467834, "learning_rate": 0.0005, "loss": 1.2397, "step": 8740 }, { "epoch": 0.2790267546796773, "grad_norm": 0.5021325945854187, "learning_rate": 0.0005, "loss": 1.2487, "step": 8750 }, { "epoch": 0.2793456423993112, "grad_norm": 0.4874837100505829, "learning_rate": 0.0005, "loss": 1.2396, "step": 8760 }, { "epoch": 0.2796645301189451, "grad_norm": 0.4959019422531128, "learning_rate": 0.0005, "loss": 1.2452, "step": 8770 }, { "epoch": 0.27998341783857905, "grad_norm": 0.4884132146835327, "learning_rate": 0.0005, "loss": 1.2535, "step": 8780 }, { "epoch": 0.28030230555821295, "grad_norm": 0.47749945521354675, "learning_rate": 0.0005, "loss": 1.2397, "step": 8790 }, { "epoch": 0.28062119327784685, "grad_norm": 0.49802863597869873, "learning_rate": 0.0005, "loss": 1.2327, "step": 8800 }, { "epoch": 0.2809400809974808, "grad_norm": 0.4573667645454407, "learning_rate": 0.0005, "loss": 1.257, "step": 8810 }, { "epoch": 0.2812589687171147, "grad_norm": 0.47507792711257935, "learning_rate": 0.0005, "loss": 1.2424, "step": 8820 }, { "epoch": 0.2815778564367486, "grad_norm": 0.4829874336719513, "learning_rate": 0.0005, "loss": 1.2699, "step": 8830 }, { "epoch": 0.28189674415638255, "grad_norm": 0.48574453592300415, "learning_rate": 0.0005, "loss": 1.2402, "step": 8840 }, { "epoch": 0.28221563187601645, "grad_norm": 0.47372007369995117, "learning_rate": 0.0005, "loss": 1.2436, "step": 8850 }, { "epoch": 0.28253451959565035, "grad_norm": 0.4779689908027649, "learning_rate": 0.0005, "loss": 1.247, "step": 8860 }, { "epoch": 0.2828534073152843, "grad_norm": 0.5082475543022156, "learning_rate": 0.0005, "loss": 1.2308, "step": 8870 }, { "epoch": 0.2831722950349182, "grad_norm": 0.48237717151641846, "learning_rate": 0.0005, "loss": 1.2502, "step": 8880 }, { "epoch": 0.2834911827545521, "grad_norm": 0.4815846383571625, "learning_rate": 0.0005, "loss": 1.2288, "step": 8890 }, { "epoch": 0.28381007047418605, "grad_norm": 0.5010101199150085, "learning_rate": 0.0005, "loss": 1.2487, "step": 8900 }, { "epoch": 0.28412895819381995, "grad_norm": 0.49066150188446045, "learning_rate": 0.0005, "loss": 1.2343, "step": 8910 }, { "epoch": 0.2844478459134539, "grad_norm": 0.48220255970954895, "learning_rate": 0.0005, "loss": 1.2351, "step": 8920 }, { "epoch": 0.2847667336330878, "grad_norm": 0.4660911560058594, "learning_rate": 0.0005, "loss": 1.2439, "step": 8930 }, { "epoch": 0.2850856213527217, "grad_norm": 0.5226732492446899, "learning_rate": 0.0005, "loss": 1.239, "step": 8940 }, { "epoch": 0.28540450907235565, "grad_norm": 0.4776681065559387, "learning_rate": 0.0005, "loss": 1.2482, "step": 8950 }, { "epoch": 0.28572339679198955, "grad_norm": 0.4984127879142761, "learning_rate": 0.0005, "loss": 1.2289, "step": 8960 }, { "epoch": 0.28604228451162345, "grad_norm": 0.484502375125885, "learning_rate": 0.0005, "loss": 1.2339, "step": 8970 }, { "epoch": 0.2863611722312574, "grad_norm": 0.48515409231185913, "learning_rate": 0.0005, "loss": 1.2377, "step": 8980 }, { "epoch": 0.2866800599508913, "grad_norm": 0.48801496624946594, "learning_rate": 0.0005, "loss": 1.2372, "step": 8990 }, { "epoch": 0.2869989476705252, "grad_norm": 0.503757655620575, "learning_rate": 0.0005, "loss": 1.2642, "step": 9000 }, { "epoch": 0.28731783539015915, "grad_norm": 0.47584301233291626, "learning_rate": 0.0005, "loss": 1.2172, "step": 9010 }, { "epoch": 0.28763672310979305, "grad_norm": 0.48166683316230774, "learning_rate": 0.0005, "loss": 1.238, "step": 9020 }, { "epoch": 0.28795561082942694, "grad_norm": 0.47695672512054443, "learning_rate": 0.0005, "loss": 1.2382, "step": 9030 }, { "epoch": 0.2882744985490609, "grad_norm": 0.48136505484580994, "learning_rate": 0.0005, "loss": 1.2453, "step": 9040 }, { "epoch": 0.2885933862686948, "grad_norm": 0.4705105721950531, "learning_rate": 0.0005, "loss": 1.2316, "step": 9050 }, { "epoch": 0.2889122739883287, "grad_norm": 0.47545409202575684, "learning_rate": 0.0005, "loss": 1.2235, "step": 9060 }, { "epoch": 0.28923116170796265, "grad_norm": 0.48143270611763, "learning_rate": 0.0005, "loss": 1.2318, "step": 9070 }, { "epoch": 0.28955004942759655, "grad_norm": 0.4943726360797882, "learning_rate": 0.0005, "loss": 1.2389, "step": 9080 }, { "epoch": 0.28986893714723044, "grad_norm": 0.4849379062652588, "learning_rate": 0.0005, "loss": 1.2589, "step": 9090 }, { "epoch": 0.2901878248668644, "grad_norm": 0.4788464307785034, "learning_rate": 0.0005, "loss": 1.2294, "step": 9100 }, { "epoch": 0.2905067125864983, "grad_norm": 0.4739382266998291, "learning_rate": 0.0005, "loss": 1.2329, "step": 9110 }, { "epoch": 0.2908256003061322, "grad_norm": 0.5209506750106812, "learning_rate": 0.0005, "loss": 1.2454, "step": 9120 }, { "epoch": 0.29114448802576615, "grad_norm": 0.4819193184375763, "learning_rate": 0.0005, "loss": 1.2409, "step": 9130 }, { "epoch": 0.29146337574540004, "grad_norm": 0.4805019199848175, "learning_rate": 0.0005, "loss": 1.2297, "step": 9140 }, { "epoch": 0.29178226346503394, "grad_norm": 0.4745943248271942, "learning_rate": 0.0005, "loss": 1.2266, "step": 9150 }, { "epoch": 0.2921011511846679, "grad_norm": 0.47815170884132385, "learning_rate": 0.0005, "loss": 1.2464, "step": 9160 }, { "epoch": 0.2924200389043018, "grad_norm": 0.47933608293533325, "learning_rate": 0.0005, "loss": 1.2507, "step": 9170 }, { "epoch": 0.2927389266239357, "grad_norm": 0.4662920832633972, "learning_rate": 0.0005, "loss": 1.2414, "step": 9180 }, { "epoch": 0.29305781434356964, "grad_norm": 0.4747319519519806, "learning_rate": 0.0005, "loss": 1.2223, "step": 9190 }, { "epoch": 0.29337670206320354, "grad_norm": 0.4738784730434418, "learning_rate": 0.0005, "loss": 1.2254, "step": 9200 }, { "epoch": 0.29369558978283744, "grad_norm": 0.4642687141895294, "learning_rate": 0.0005, "loss": 1.2193, "step": 9210 }, { "epoch": 0.2940144775024714, "grad_norm": 0.48891007900238037, "learning_rate": 0.0005, "loss": 1.2275, "step": 9220 }, { "epoch": 0.2943333652221053, "grad_norm": 0.4732115864753723, "learning_rate": 0.0005, "loss": 1.2375, "step": 9230 }, { "epoch": 0.2946522529417392, "grad_norm": 0.4736773669719696, "learning_rate": 0.0005, "loss": 1.216, "step": 9240 }, { "epoch": 0.29497114066137314, "grad_norm": 0.4922406077384949, "learning_rate": 0.0005, "loss": 1.2357, "step": 9250 }, { "epoch": 0.29529002838100704, "grad_norm": 0.4949341416358948, "learning_rate": 0.0005, "loss": 1.2299, "step": 9260 }, { "epoch": 0.29560891610064094, "grad_norm": 0.4667598009109497, "learning_rate": 0.0005, "loss": 1.2184, "step": 9270 }, { "epoch": 0.2959278038202749, "grad_norm": 0.4855829179286957, "learning_rate": 0.0005, "loss": 1.2289, "step": 9280 }, { "epoch": 0.2962466915399088, "grad_norm": 0.4694145917892456, "learning_rate": 0.0005, "loss": 1.2085, "step": 9290 }, { "epoch": 0.2965655792595427, "grad_norm": 0.466633141040802, "learning_rate": 0.0005, "loss": 1.228, "step": 9300 }, { "epoch": 0.29688446697917664, "grad_norm": 0.4754934012889862, "learning_rate": 0.0005, "loss": 1.2398, "step": 9310 }, { "epoch": 0.29720335469881054, "grad_norm": 0.47169917821884155, "learning_rate": 0.0005, "loss": 1.2096, "step": 9320 }, { "epoch": 0.29752224241844444, "grad_norm": 0.476678729057312, "learning_rate": 0.0005, "loss": 1.2385, "step": 9330 }, { "epoch": 0.2978411301380784, "grad_norm": 0.4720135033130646, "learning_rate": 0.0005, "loss": 1.2181, "step": 9340 }, { "epoch": 0.2981600178577123, "grad_norm": 0.4739975035190582, "learning_rate": 0.0005, "loss": 1.218, "step": 9350 }, { "epoch": 0.29847890557734624, "grad_norm": 0.475588321685791, "learning_rate": 0.0005, "loss": 1.2289, "step": 9360 }, { "epoch": 0.29879779329698014, "grad_norm": 0.4978891909122467, "learning_rate": 0.0005, "loss": 1.233, "step": 9370 }, { "epoch": 0.29911668101661404, "grad_norm": 0.47417309880256653, "learning_rate": 0.0005, "loss": 1.2259, "step": 9380 }, { "epoch": 0.299435568736248, "grad_norm": 0.4477439522743225, "learning_rate": 0.0005, "loss": 1.2146, "step": 9390 }, { "epoch": 0.2997544564558819, "grad_norm": 0.4888128340244293, "learning_rate": 0.0005, "loss": 1.2264, "step": 9400 }, { "epoch": 0.3000733441755158, "grad_norm": 0.45956334471702576, "learning_rate": 0.0005, "loss": 1.2328, "step": 9410 }, { "epoch": 0.30039223189514974, "grad_norm": 0.4596233367919922, "learning_rate": 0.0005, "loss": 1.228, "step": 9420 }, { "epoch": 0.30071111961478364, "grad_norm": 0.4777589738368988, "learning_rate": 0.0005, "loss": 1.2207, "step": 9430 }, { "epoch": 0.30103000733441754, "grad_norm": 0.4956303834915161, "learning_rate": 0.0005, "loss": 1.2327, "step": 9440 }, { "epoch": 0.3013488950540515, "grad_norm": 0.4638616144657135, "learning_rate": 0.0005, "loss": 1.2341, "step": 9450 }, { "epoch": 0.3016677827736854, "grad_norm": 0.4854125678539276, "learning_rate": 0.0005, "loss": 1.2394, "step": 9460 }, { "epoch": 0.3019866704933193, "grad_norm": 0.4745231568813324, "learning_rate": 0.0005, "loss": 1.2479, "step": 9470 }, { "epoch": 0.30230555821295324, "grad_norm": 0.47391799092292786, "learning_rate": 0.0005, "loss": 1.2291, "step": 9480 }, { "epoch": 0.30262444593258714, "grad_norm": 0.4805321991443634, "learning_rate": 0.0005, "loss": 1.2325, "step": 9490 }, { "epoch": 0.30294333365222104, "grad_norm": 0.4893067479133606, "learning_rate": 0.0005, "loss": 1.2322, "step": 9500 }, { "epoch": 0.303262221371855, "grad_norm": 0.4834955036640167, "learning_rate": 0.0005, "loss": 1.2159, "step": 9510 }, { "epoch": 0.3035811090914889, "grad_norm": 0.4895293414592743, "learning_rate": 0.0005, "loss": 1.2223, "step": 9520 }, { "epoch": 0.3038999968111228, "grad_norm": 0.4753477871417999, "learning_rate": 0.0005, "loss": 1.2063, "step": 9530 }, { "epoch": 0.30421888453075674, "grad_norm": 0.47976627945899963, "learning_rate": 0.0005, "loss": 1.2204, "step": 9540 }, { "epoch": 0.30453777225039064, "grad_norm": 0.4710116386413574, "learning_rate": 0.0005, "loss": 1.1968, "step": 9550 }, { "epoch": 0.30485665997002453, "grad_norm": 0.46347519755363464, "learning_rate": 0.0005, "loss": 1.2224, "step": 9560 }, { "epoch": 0.3051755476896585, "grad_norm": 0.4651038646697998, "learning_rate": 0.0005, "loss": 1.2198, "step": 9570 }, { "epoch": 0.3054944354092924, "grad_norm": 0.4524023234844208, "learning_rate": 0.0005, "loss": 1.236, "step": 9580 }, { "epoch": 0.3058133231289263, "grad_norm": 0.4605560600757599, "learning_rate": 0.0005, "loss": 1.222, "step": 9590 }, { "epoch": 0.30613221084856024, "grad_norm": 0.5155693888664246, "learning_rate": 0.0005, "loss": 1.2167, "step": 9600 }, { "epoch": 0.30645109856819414, "grad_norm": 0.48311251401901245, "learning_rate": 0.0005, "loss": 1.2207, "step": 9610 }, { "epoch": 0.30676998628782803, "grad_norm": 0.47260165214538574, "learning_rate": 0.0005, "loss": 1.2305, "step": 9620 }, { "epoch": 0.307088874007462, "grad_norm": 0.4718555510044098, "learning_rate": 0.0005, "loss": 1.1954, "step": 9630 }, { "epoch": 0.3074077617270959, "grad_norm": 0.46655380725860596, "learning_rate": 0.0005, "loss": 1.2258, "step": 9640 }, { "epoch": 0.3077266494467298, "grad_norm": 0.4614108204841614, "learning_rate": 0.0005, "loss": 1.2205, "step": 9650 }, { "epoch": 0.30804553716636374, "grad_norm": 0.45910724997520447, "learning_rate": 0.0005, "loss": 1.2105, "step": 9660 }, { "epoch": 0.30836442488599763, "grad_norm": 0.47271105647087097, "learning_rate": 0.0005, "loss": 1.2095, "step": 9670 }, { "epoch": 0.30868331260563153, "grad_norm": 0.45809072256088257, "learning_rate": 0.0005, "loss": 1.2192, "step": 9680 }, { "epoch": 0.3090022003252655, "grad_norm": 0.4718875288963318, "learning_rate": 0.0005, "loss": 1.2293, "step": 9690 }, { "epoch": 0.3093210880448994, "grad_norm": 0.44941264390945435, "learning_rate": 0.0005, "loss": 1.2226, "step": 9700 }, { "epoch": 0.3096399757645333, "grad_norm": 0.46634870767593384, "learning_rate": 0.0005, "loss": 1.2337, "step": 9710 }, { "epoch": 0.30995886348416724, "grad_norm": 0.4822189211845398, "learning_rate": 0.0005, "loss": 1.2324, "step": 9720 }, { "epoch": 0.31027775120380113, "grad_norm": 0.47028934955596924, "learning_rate": 0.0005, "loss": 1.2305, "step": 9730 }, { "epoch": 0.31059663892343503, "grad_norm": 0.48364537954330444, "learning_rate": 0.0005, "loss": 1.2244, "step": 9740 }, { "epoch": 0.310915526643069, "grad_norm": 0.469500333070755, "learning_rate": 0.0005, "loss": 1.2086, "step": 9750 }, { "epoch": 0.3112344143627029, "grad_norm": 0.44105562567710876, "learning_rate": 0.0005, "loss": 1.222, "step": 9760 }, { "epoch": 0.31155330208233684, "grad_norm": 0.4658164978027344, "learning_rate": 0.0005, "loss": 1.2396, "step": 9770 }, { "epoch": 0.31187218980197073, "grad_norm": 0.46329885721206665, "learning_rate": 0.0005, "loss": 1.203, "step": 9780 }, { "epoch": 0.31219107752160463, "grad_norm": 0.46826115250587463, "learning_rate": 0.0005, "loss": 1.2125, "step": 9790 }, { "epoch": 0.3125099652412386, "grad_norm": 0.45736852288246155, "learning_rate": 0.0005, "loss": 1.2275, "step": 9800 }, { "epoch": 0.3128288529608725, "grad_norm": 0.4739522635936737, "learning_rate": 0.0005, "loss": 1.2263, "step": 9810 }, { "epoch": 0.3131477406805064, "grad_norm": 0.4789111316204071, "learning_rate": 0.0005, "loss": 1.2277, "step": 9820 }, { "epoch": 0.31346662840014033, "grad_norm": 0.46404367685317993, "learning_rate": 0.0005, "loss": 1.2127, "step": 9830 }, { "epoch": 0.31378551611977423, "grad_norm": 0.4566570520401001, "learning_rate": 0.0005, "loss": 1.2174, "step": 9840 }, { "epoch": 0.31410440383940813, "grad_norm": 0.46813470125198364, "learning_rate": 0.0005, "loss": 1.1996, "step": 9850 }, { "epoch": 0.3144232915590421, "grad_norm": 0.4678925573825836, "learning_rate": 0.0005, "loss": 1.2285, "step": 9860 }, { "epoch": 0.314742179278676, "grad_norm": 0.4692615866661072, "learning_rate": 0.0005, "loss": 1.2266, "step": 9870 }, { "epoch": 0.3150610669983099, "grad_norm": 0.46353766322135925, "learning_rate": 0.0005, "loss": 1.2248, "step": 9880 }, { "epoch": 0.31537995471794383, "grad_norm": 0.4622415006160736, "learning_rate": 0.0005, "loss": 1.2079, "step": 9890 }, { "epoch": 0.31569884243757773, "grad_norm": 0.44016581773757935, "learning_rate": 0.0005, "loss": 1.2117, "step": 9900 }, { "epoch": 0.31601773015721163, "grad_norm": 0.44085219502449036, "learning_rate": 0.0005, "loss": 1.2008, "step": 9910 }, { "epoch": 0.3163366178768456, "grad_norm": 0.473407119512558, "learning_rate": 0.0005, "loss": 1.2106, "step": 9920 }, { "epoch": 0.3166555055964795, "grad_norm": 0.4463644027709961, "learning_rate": 0.0005, "loss": 1.2212, "step": 9930 }, { "epoch": 0.3169743933161134, "grad_norm": 0.47022417187690735, "learning_rate": 0.0005, "loss": 1.2217, "step": 9940 }, { "epoch": 0.31729328103574733, "grad_norm": 0.45228978991508484, "learning_rate": 0.0005, "loss": 1.2263, "step": 9950 }, { "epoch": 0.31761216875538123, "grad_norm": 0.45226162672042847, "learning_rate": 0.0005, "loss": 1.2165, "step": 9960 }, { "epoch": 0.31793105647501513, "grad_norm": 0.46043747663497925, "learning_rate": 0.0005, "loss": 1.2378, "step": 9970 }, { "epoch": 0.3182499441946491, "grad_norm": 0.4622530937194824, "learning_rate": 0.0005, "loss": 1.2129, "step": 9980 }, { "epoch": 0.318568831914283, "grad_norm": 0.4723272919654846, "learning_rate": 0.0005, "loss": 1.2202, "step": 9990 }, { "epoch": 0.3188877196339169, "grad_norm": 0.4637812077999115, "learning_rate": 0.0005, "loss": 1.2209, "step": 10000 }, { "epoch": 0.31920660735355083, "grad_norm": 0.4611336886882782, "learning_rate": 0.0005, "loss": 1.2244, "step": 10010 }, { "epoch": 0.31952549507318473, "grad_norm": 0.47000154852867126, "learning_rate": 0.0005, "loss": 1.2047, "step": 10020 }, { "epoch": 0.3198443827928186, "grad_norm": 0.451267272233963, "learning_rate": 0.0005, "loss": 1.2125, "step": 10030 }, { "epoch": 0.3201632705124526, "grad_norm": 0.4722524881362915, "learning_rate": 0.0005, "loss": 1.2124, "step": 10040 }, { "epoch": 0.3204821582320865, "grad_norm": 0.48461806774139404, "learning_rate": 0.0005, "loss": 1.2339, "step": 10050 }, { "epoch": 0.3208010459517204, "grad_norm": 0.4575521647930145, "learning_rate": 0.0005, "loss": 1.2186, "step": 10060 }, { "epoch": 0.32111993367135433, "grad_norm": 0.4630667269229889, "learning_rate": 0.0005, "loss": 1.2015, "step": 10070 }, { "epoch": 0.3214388213909882, "grad_norm": 0.452785462141037, "learning_rate": 0.0005, "loss": 1.2178, "step": 10080 }, { "epoch": 0.3217577091106221, "grad_norm": 0.4500238001346588, "learning_rate": 0.0005, "loss": 1.2138, "step": 10090 }, { "epoch": 0.3220765968302561, "grad_norm": 0.46511325240135193, "learning_rate": 0.0005, "loss": 1.2234, "step": 10100 }, { "epoch": 0.32239548454989, "grad_norm": 0.4618381857872009, "learning_rate": 0.0005, "loss": 1.2198, "step": 10110 }, { "epoch": 0.3227143722695239, "grad_norm": 0.4588926434516907, "learning_rate": 0.0005, "loss": 1.1924, "step": 10120 }, { "epoch": 0.32303325998915783, "grad_norm": 0.49134373664855957, "learning_rate": 0.0005, "loss": 1.2022, "step": 10130 }, { "epoch": 0.3233521477087917, "grad_norm": 0.4511118233203888, "learning_rate": 0.0005, "loss": 1.2077, "step": 10140 }, { "epoch": 0.3236710354284256, "grad_norm": 0.45254966616630554, "learning_rate": 0.0005, "loss": 1.219, "step": 10150 }, { "epoch": 0.3239899231480596, "grad_norm": 0.4560336470603943, "learning_rate": 0.0005, "loss": 1.2097, "step": 10160 }, { "epoch": 0.3243088108676935, "grad_norm": 0.467703640460968, "learning_rate": 0.0005, "loss": 1.2075, "step": 10170 }, { "epoch": 0.32462769858732743, "grad_norm": 0.46083834767341614, "learning_rate": 0.0005, "loss": 1.2012, "step": 10180 }, { "epoch": 0.3249465863069613, "grad_norm": 0.5108883380889893, "learning_rate": 0.0005, "loss": 1.1997, "step": 10190 }, { "epoch": 0.3252654740265952, "grad_norm": 0.46303167939186096, "learning_rate": 0.0005, "loss": 1.209, "step": 10200 }, { "epoch": 0.3255843617462292, "grad_norm": 0.46116575598716736, "learning_rate": 0.0005, "loss": 1.2078, "step": 10210 }, { "epoch": 0.3259032494658631, "grad_norm": 0.46415525674819946, "learning_rate": 0.0005, "loss": 1.2107, "step": 10220 }, { "epoch": 0.326222137185497, "grad_norm": 0.4536409378051758, "learning_rate": 0.0005, "loss": 1.2397, "step": 10230 }, { "epoch": 0.3265410249051309, "grad_norm": 0.4473925530910492, "learning_rate": 0.0005, "loss": 1.2301, "step": 10240 }, { "epoch": 0.3268599126247648, "grad_norm": 0.46480435132980347, "learning_rate": 0.0005, "loss": 1.185, "step": 10250 }, { "epoch": 0.3271788003443987, "grad_norm": 0.4542619287967682, "learning_rate": 0.0005, "loss": 1.2121, "step": 10260 }, { "epoch": 0.3274976880640327, "grad_norm": 0.44392162561416626, "learning_rate": 0.0005, "loss": 1.2085, "step": 10270 }, { "epoch": 0.3278165757836666, "grad_norm": 0.465130478143692, "learning_rate": 0.0005, "loss": 1.2149, "step": 10280 }, { "epoch": 0.3281354635033005, "grad_norm": 0.4558584988117218, "learning_rate": 0.0005, "loss": 1.2166, "step": 10290 }, { "epoch": 0.3284543512229344, "grad_norm": 0.45904284715652466, "learning_rate": 0.0005, "loss": 1.2063, "step": 10300 }, { "epoch": 0.3287732389425683, "grad_norm": 0.48073023557662964, "learning_rate": 0.0005, "loss": 1.2313, "step": 10310 }, { "epoch": 0.3290921266622022, "grad_norm": 0.4648429751396179, "learning_rate": 0.0005, "loss": 1.2006, "step": 10320 }, { "epoch": 0.3294110143818362, "grad_norm": 0.4585430920124054, "learning_rate": 0.0005, "loss": 1.2345, "step": 10330 }, { "epoch": 0.3297299021014701, "grad_norm": 0.46392467617988586, "learning_rate": 0.0005, "loss": 1.2242, "step": 10340 }, { "epoch": 0.33004878982110397, "grad_norm": 0.4722246527671814, "learning_rate": 0.0005, "loss": 1.1988, "step": 10350 }, { "epoch": 0.3303676775407379, "grad_norm": 0.49293071031570435, "learning_rate": 0.0005, "loss": 1.2072, "step": 10360 }, { "epoch": 0.3306865652603718, "grad_norm": 0.4574640393257141, "learning_rate": 0.0005, "loss": 1.2284, "step": 10370 }, { "epoch": 0.3310054529800057, "grad_norm": 0.4533388614654541, "learning_rate": 0.0005, "loss": 1.2003, "step": 10380 }, { "epoch": 0.3313243406996397, "grad_norm": 0.461215078830719, "learning_rate": 0.0005, "loss": 1.1933, "step": 10390 }, { "epoch": 0.3316432284192736, "grad_norm": 0.4396399259567261, "learning_rate": 0.0005, "loss": 1.2125, "step": 10400 }, { "epoch": 0.33196211613890747, "grad_norm": 0.439049631357193, "learning_rate": 0.0005, "loss": 1.1929, "step": 10410 }, { "epoch": 0.3322810038585414, "grad_norm": 0.46211275458335876, "learning_rate": 0.0005, "loss": 1.2061, "step": 10420 }, { "epoch": 0.3325998915781753, "grad_norm": 0.4407380223274231, "learning_rate": 0.0005, "loss": 1.1955, "step": 10430 }, { "epoch": 0.3329187792978092, "grad_norm": 0.45931512117385864, "learning_rate": 0.0005, "loss": 1.2269, "step": 10440 }, { "epoch": 0.3332376670174432, "grad_norm": 0.4495917558670044, "learning_rate": 0.0005, "loss": 1.202, "step": 10450 }, { "epoch": 0.33355655473707707, "grad_norm": 0.4619958698749542, "learning_rate": 0.0005, "loss": 1.1961, "step": 10460 }, { "epoch": 0.33387544245671097, "grad_norm": 0.4782272279262543, "learning_rate": 0.0005, "loss": 1.217, "step": 10470 }, { "epoch": 0.3341943301763449, "grad_norm": 0.4582221210002899, "learning_rate": 0.0005, "loss": 1.2012, "step": 10480 }, { "epoch": 0.3345132178959788, "grad_norm": 0.44622093439102173, "learning_rate": 0.0005, "loss": 1.1942, "step": 10490 }, { "epoch": 0.3348321056156127, "grad_norm": 0.4802570343017578, "learning_rate": 0.0005, "loss": 1.2005, "step": 10500 }, { "epoch": 0.33515099333524667, "grad_norm": 0.46790650486946106, "learning_rate": 0.0005, "loss": 1.2128, "step": 10510 }, { "epoch": 0.33546988105488057, "grad_norm": 0.45268651843070984, "learning_rate": 0.0005, "loss": 1.2211, "step": 10520 }, { "epoch": 0.33578876877451447, "grad_norm": 0.45402753353118896, "learning_rate": 0.0005, "loss": 1.2109, "step": 10530 }, { "epoch": 0.3361076564941484, "grad_norm": 0.4552929401397705, "learning_rate": 0.0005, "loss": 1.1862, "step": 10540 }, { "epoch": 0.3364265442137823, "grad_norm": 0.4635280668735504, "learning_rate": 0.0005, "loss": 1.1947, "step": 10550 }, { "epoch": 0.3367454319334162, "grad_norm": 0.4471782147884369, "learning_rate": 0.0005, "loss": 1.2033, "step": 10560 }, { "epoch": 0.33706431965305017, "grad_norm": 0.46447694301605225, "learning_rate": 0.0005, "loss": 1.192, "step": 10570 }, { "epoch": 0.33738320737268407, "grad_norm": 0.4564967453479767, "learning_rate": 0.0005, "loss": 1.2017, "step": 10580 }, { "epoch": 0.337702095092318, "grad_norm": 0.451744019985199, "learning_rate": 0.0005, "loss": 1.1689, "step": 10590 }, { "epoch": 0.3380209828119519, "grad_norm": 0.46503663063049316, "learning_rate": 0.0005, "loss": 1.1923, "step": 10600 }, { "epoch": 0.3383398705315858, "grad_norm": 0.45748820900917053, "learning_rate": 0.0005, "loss": 1.2015, "step": 10610 }, { "epoch": 0.33865875825121977, "grad_norm": 0.43980541825294495, "learning_rate": 0.0005, "loss": 1.2261, "step": 10620 }, { "epoch": 0.33897764597085367, "grad_norm": 0.4510059058666229, "learning_rate": 0.0005, "loss": 1.1988, "step": 10630 }, { "epoch": 0.33929653369048757, "grad_norm": 0.45995068550109863, "learning_rate": 0.0005, "loss": 1.2083, "step": 10640 }, { "epoch": 0.3396154214101215, "grad_norm": 0.44615572690963745, "learning_rate": 0.0005, "loss": 1.2075, "step": 10650 }, { "epoch": 0.3399343091297554, "grad_norm": 0.4503238797187805, "learning_rate": 0.0005, "loss": 1.2034, "step": 10660 }, { "epoch": 0.3402531968493893, "grad_norm": 0.43003490567207336, "learning_rate": 0.0005, "loss": 1.207, "step": 10670 }, { "epoch": 0.34057208456902327, "grad_norm": 0.4652049243450165, "learning_rate": 0.0005, "loss": 1.2131, "step": 10680 }, { "epoch": 0.34089097228865717, "grad_norm": 0.4483892619609833, "learning_rate": 0.0005, "loss": 1.2003, "step": 10690 }, { "epoch": 0.34120986000829107, "grad_norm": 0.4728235602378845, "learning_rate": 0.0005, "loss": 1.2066, "step": 10700 }, { "epoch": 0.341528747727925, "grad_norm": 0.44875046610832214, "learning_rate": 0.0005, "loss": 1.2013, "step": 10710 }, { "epoch": 0.3418476354475589, "grad_norm": 0.4573599696159363, "learning_rate": 0.0005, "loss": 1.2079, "step": 10720 }, { "epoch": 0.3421665231671928, "grad_norm": 0.45679569244384766, "learning_rate": 0.0005, "loss": 1.1825, "step": 10730 }, { "epoch": 0.34248541088682677, "grad_norm": 0.4661482572555542, "learning_rate": 0.0005, "loss": 1.217, "step": 10740 }, { "epoch": 0.34280429860646067, "grad_norm": 0.44722360372543335, "learning_rate": 0.0005, "loss": 1.1993, "step": 10750 }, { "epoch": 0.34312318632609456, "grad_norm": 0.4567379653453827, "learning_rate": 0.0005, "loss": 1.1921, "step": 10760 }, { "epoch": 0.3434420740457285, "grad_norm": 0.4360160231590271, "learning_rate": 0.0005, "loss": 1.1815, "step": 10770 }, { "epoch": 0.3437609617653624, "grad_norm": 0.45101627707481384, "learning_rate": 0.0005, "loss": 1.2016, "step": 10780 }, { "epoch": 0.3440798494849963, "grad_norm": 0.4734707474708557, "learning_rate": 0.0005, "loss": 1.1938, "step": 10790 }, { "epoch": 0.34439873720463027, "grad_norm": 0.4517531096935272, "learning_rate": 0.0005, "loss": 1.2056, "step": 10800 }, { "epoch": 0.34471762492426417, "grad_norm": 0.46230632066726685, "learning_rate": 0.0005, "loss": 1.2213, "step": 10810 }, { "epoch": 0.34503651264389806, "grad_norm": 0.4498429000377655, "learning_rate": 0.0005, "loss": 1.21, "step": 10820 }, { "epoch": 0.345355400363532, "grad_norm": 0.4520126283168793, "learning_rate": 0.0005, "loss": 1.196, "step": 10830 }, { "epoch": 0.3456742880831659, "grad_norm": 0.4503188133239746, "learning_rate": 0.0005, "loss": 1.1855, "step": 10840 }, { "epoch": 0.3459931758027998, "grad_norm": 0.4529699683189392, "learning_rate": 0.0005, "loss": 1.1873, "step": 10850 }, { "epoch": 0.34631206352243377, "grad_norm": 0.4275098741054535, "learning_rate": 0.0005, "loss": 1.1872, "step": 10860 }, { "epoch": 0.34663095124206766, "grad_norm": 0.4628719985485077, "learning_rate": 0.0005, "loss": 1.2048, "step": 10870 }, { "epoch": 0.34694983896170156, "grad_norm": 0.4489217698574066, "learning_rate": 0.0005, "loss": 1.2196, "step": 10880 }, { "epoch": 0.3472687266813355, "grad_norm": 0.4392339885234833, "learning_rate": 0.0005, "loss": 1.204, "step": 10890 }, { "epoch": 0.3475876144009694, "grad_norm": 0.45925724506378174, "learning_rate": 0.0005, "loss": 1.21, "step": 10900 }, { "epoch": 0.3479065021206033, "grad_norm": 0.46018514037132263, "learning_rate": 0.0005, "loss": 1.1791, "step": 10910 }, { "epoch": 0.34822538984023726, "grad_norm": 0.46905317902565, "learning_rate": 0.0005, "loss": 1.204, "step": 10920 }, { "epoch": 0.34854427755987116, "grad_norm": 0.4454002380371094, "learning_rate": 0.0005, "loss": 1.209, "step": 10930 }, { "epoch": 0.34886316527950506, "grad_norm": 0.4870285987854004, "learning_rate": 0.0005, "loss": 1.189, "step": 10940 }, { "epoch": 0.349182052999139, "grad_norm": 0.45176053047180176, "learning_rate": 0.0005, "loss": 1.2131, "step": 10950 }, { "epoch": 0.3495009407187729, "grad_norm": 0.4306580722332001, "learning_rate": 0.0005, "loss": 1.1981, "step": 10960 }, { "epoch": 0.3498198284384068, "grad_norm": 0.4551098644733429, "learning_rate": 0.0005, "loss": 1.2007, "step": 10970 }, { "epoch": 0.35013871615804076, "grad_norm": 0.4566822350025177, "learning_rate": 0.0005, "loss": 1.2013, "step": 10980 }, { "epoch": 0.35045760387767466, "grad_norm": 0.46287766098976135, "learning_rate": 0.0005, "loss": 1.1762, "step": 10990 }, { "epoch": 0.3507764915973086, "grad_norm": 0.45999109745025635, "learning_rate": 0.0005, "loss": 1.196, "step": 11000 }, { "epoch": 0.3510953793169425, "grad_norm": 0.4592169523239136, "learning_rate": 0.0005, "loss": 1.2074, "step": 11010 }, { "epoch": 0.3514142670365764, "grad_norm": 0.4633693993091583, "learning_rate": 0.0005, "loss": 1.1902, "step": 11020 }, { "epoch": 0.35173315475621036, "grad_norm": 0.43769994378089905, "learning_rate": 0.0005, "loss": 1.1725, "step": 11030 }, { "epoch": 0.35205204247584426, "grad_norm": 0.4633389413356781, "learning_rate": 0.0005, "loss": 1.1902, "step": 11040 }, { "epoch": 0.35237093019547816, "grad_norm": 0.4522418975830078, "learning_rate": 0.0005, "loss": 1.1719, "step": 11050 }, { "epoch": 0.3526898179151121, "grad_norm": 0.4321923553943634, "learning_rate": 0.0005, "loss": 1.1958, "step": 11060 }, { "epoch": 0.353008705634746, "grad_norm": 0.4340358376502991, "learning_rate": 0.0005, "loss": 1.1893, "step": 11070 }, { "epoch": 0.3533275933543799, "grad_norm": 0.4427970051765442, "learning_rate": 0.0005, "loss": 1.184, "step": 11080 }, { "epoch": 0.35364648107401386, "grad_norm": 0.4384293854236603, "learning_rate": 0.0005, "loss": 1.174, "step": 11090 }, { "epoch": 0.35396536879364776, "grad_norm": 0.45408880710601807, "learning_rate": 0.0005, "loss": 1.177, "step": 11100 }, { "epoch": 0.35428425651328166, "grad_norm": 0.4425502121448517, "learning_rate": 0.0005, "loss": 1.2027, "step": 11110 }, { "epoch": 0.3546031442329156, "grad_norm": 0.4402698576450348, "learning_rate": 0.0005, "loss": 1.2067, "step": 11120 }, { "epoch": 0.3549220319525495, "grad_norm": 0.45279955863952637, "learning_rate": 0.0005, "loss": 1.2, "step": 11130 }, { "epoch": 0.3552409196721834, "grad_norm": 0.4827028214931488, "learning_rate": 0.0005, "loss": 1.1948, "step": 11140 }, { "epoch": 0.35555980739181736, "grad_norm": 0.4452892243862152, "learning_rate": 0.0005, "loss": 1.1916, "step": 11150 }, { "epoch": 0.35587869511145126, "grad_norm": 0.4474916160106659, "learning_rate": 0.0005, "loss": 1.199, "step": 11160 }, { "epoch": 0.35619758283108516, "grad_norm": 0.43313753604888916, "learning_rate": 0.0005, "loss": 1.1972, "step": 11170 }, { "epoch": 0.3565164705507191, "grad_norm": 0.4603961110115051, "learning_rate": 0.0005, "loss": 1.1911, "step": 11180 }, { "epoch": 0.356835358270353, "grad_norm": 0.4489038586616516, "learning_rate": 0.0005, "loss": 1.1858, "step": 11190 }, { "epoch": 0.3571542459899869, "grad_norm": 0.4428690969944, "learning_rate": 0.0005, "loss": 1.1666, "step": 11200 }, { "epoch": 0.35747313370962086, "grad_norm": 0.43662405014038086, "learning_rate": 0.0005, "loss": 1.2024, "step": 11210 }, { "epoch": 0.35779202142925476, "grad_norm": 0.42232009768486023, "learning_rate": 0.0005, "loss": 1.1872, "step": 11220 }, { "epoch": 0.35811090914888866, "grad_norm": 0.44732868671417236, "learning_rate": 0.0005, "loss": 1.2081, "step": 11230 }, { "epoch": 0.3584297968685226, "grad_norm": 0.44722238183021545, "learning_rate": 0.0005, "loss": 1.2073, "step": 11240 }, { "epoch": 0.3587486845881565, "grad_norm": 0.46360519528388977, "learning_rate": 0.0005, "loss": 1.1852, "step": 11250 }, { "epoch": 0.3590675723077904, "grad_norm": 0.46256133913993835, "learning_rate": 0.0005, "loss": 1.1761, "step": 11260 }, { "epoch": 0.35938646002742436, "grad_norm": 0.4595744013786316, "learning_rate": 0.0005, "loss": 1.2061, "step": 11270 }, { "epoch": 0.35970534774705826, "grad_norm": 0.4376492500305176, "learning_rate": 0.0005, "loss": 1.175, "step": 11280 }, { "epoch": 0.36002423546669216, "grad_norm": 0.47340521216392517, "learning_rate": 0.0005, "loss": 1.1821, "step": 11290 }, { "epoch": 0.3603431231863261, "grad_norm": 0.47642752528190613, "learning_rate": 0.0005, "loss": 1.1941, "step": 11300 }, { "epoch": 0.36066201090596, "grad_norm": 0.44139501452445984, "learning_rate": 0.0005, "loss": 1.1818, "step": 11310 }, { "epoch": 0.3609808986255939, "grad_norm": 0.4428239166736603, "learning_rate": 0.0005, "loss": 1.178, "step": 11320 }, { "epoch": 0.36129978634522786, "grad_norm": 0.4435330331325531, "learning_rate": 0.0005, "loss": 1.1774, "step": 11330 }, { "epoch": 0.36161867406486176, "grad_norm": 0.4315539300441742, "learning_rate": 0.0005, "loss": 1.2015, "step": 11340 }, { "epoch": 0.36193756178449565, "grad_norm": 0.4380136728286743, "learning_rate": 0.0005, "loss": 1.1874, "step": 11350 }, { "epoch": 0.3622564495041296, "grad_norm": 0.44613757729530334, "learning_rate": 0.0005, "loss": 1.1948, "step": 11360 }, { "epoch": 0.3625753372237635, "grad_norm": 0.439974844455719, "learning_rate": 0.0005, "loss": 1.1864, "step": 11370 }, { "epoch": 0.3628942249433974, "grad_norm": 0.4617900848388672, "learning_rate": 0.0005, "loss": 1.185, "step": 11380 }, { "epoch": 0.36321311266303136, "grad_norm": 0.4472865164279938, "learning_rate": 0.0005, "loss": 1.1888, "step": 11390 }, { "epoch": 0.36353200038266525, "grad_norm": 0.44490471482276917, "learning_rate": 0.0005, "loss": 1.1955, "step": 11400 }, { "epoch": 0.3638508881022992, "grad_norm": 0.43914979696273804, "learning_rate": 0.0005, "loss": 1.1781, "step": 11410 }, { "epoch": 0.3641697758219331, "grad_norm": 0.4476803243160248, "learning_rate": 0.0005, "loss": 1.1889, "step": 11420 }, { "epoch": 0.364488663541567, "grad_norm": 0.438223659992218, "learning_rate": 0.0005, "loss": 1.1659, "step": 11430 }, { "epoch": 0.36480755126120096, "grad_norm": 0.4376436173915863, "learning_rate": 0.0005, "loss": 1.1903, "step": 11440 }, { "epoch": 0.36512643898083486, "grad_norm": 0.44571664929389954, "learning_rate": 0.0005, "loss": 1.188, "step": 11450 }, { "epoch": 0.36544532670046875, "grad_norm": 0.44138994812965393, "learning_rate": 0.0005, "loss": 1.171, "step": 11460 }, { "epoch": 0.3657642144201027, "grad_norm": 0.458737313747406, "learning_rate": 0.0005, "loss": 1.1912, "step": 11470 }, { "epoch": 0.3660831021397366, "grad_norm": 0.4489744007587433, "learning_rate": 0.0005, "loss": 1.1821, "step": 11480 }, { "epoch": 0.3664019898593705, "grad_norm": 0.45018211007118225, "learning_rate": 0.0005, "loss": 1.1903, "step": 11490 }, { "epoch": 0.36672087757900446, "grad_norm": 0.45186272263526917, "learning_rate": 0.0005, "loss": 1.2084, "step": 11500 }, { "epoch": 0.36703976529863835, "grad_norm": 0.44593289494514465, "learning_rate": 0.0005, "loss": 1.1764, "step": 11510 }, { "epoch": 0.36735865301827225, "grad_norm": 0.45831361413002014, "learning_rate": 0.0005, "loss": 1.2031, "step": 11520 }, { "epoch": 0.3676775407379062, "grad_norm": 0.44360455870628357, "learning_rate": 0.0005, "loss": 1.1727, "step": 11530 }, { "epoch": 0.3679964284575401, "grad_norm": 0.4307442009449005, "learning_rate": 0.0005, "loss": 1.1638, "step": 11540 }, { "epoch": 0.368315316177174, "grad_norm": 0.4518921971321106, "learning_rate": 0.0005, "loss": 1.1875, "step": 11550 }, { "epoch": 0.36863420389680795, "grad_norm": 0.43437421321868896, "learning_rate": 0.0005, "loss": 1.1774, "step": 11560 }, { "epoch": 0.36895309161644185, "grad_norm": 0.4409539997577667, "learning_rate": 0.0005, "loss": 1.1971, "step": 11570 }, { "epoch": 0.36927197933607575, "grad_norm": 0.43100041151046753, "learning_rate": 0.0005, "loss": 1.1633, "step": 11580 }, { "epoch": 0.3695908670557097, "grad_norm": 0.441154807806015, "learning_rate": 0.0005, "loss": 1.1709, "step": 11590 }, { "epoch": 0.3699097547753436, "grad_norm": 0.44322308897972107, "learning_rate": 0.0005, "loss": 1.2069, "step": 11600 }, { "epoch": 0.3702286424949775, "grad_norm": 0.4248104989528656, "learning_rate": 0.0005, "loss": 1.1893, "step": 11610 }, { "epoch": 0.37054753021461145, "grad_norm": 0.46101078391075134, "learning_rate": 0.0005, "loss": 1.1997, "step": 11620 }, { "epoch": 0.37086641793424535, "grad_norm": 0.4464070200920105, "learning_rate": 0.0005, "loss": 1.1887, "step": 11630 }, { "epoch": 0.37118530565387925, "grad_norm": 0.4598291218280792, "learning_rate": 0.0005, "loss": 1.2104, "step": 11640 }, { "epoch": 0.3715041933735132, "grad_norm": 0.40784159302711487, "learning_rate": 0.0005, "loss": 1.1796, "step": 11650 }, { "epoch": 0.3718230810931471, "grad_norm": 0.4431672692298889, "learning_rate": 0.0005, "loss": 1.2027, "step": 11660 }, { "epoch": 0.372141968812781, "grad_norm": 0.45555850863456726, "learning_rate": 0.0005, "loss": 1.2067, "step": 11670 }, { "epoch": 0.37246085653241495, "grad_norm": 0.43322280049324036, "learning_rate": 0.0005, "loss": 1.1851, "step": 11680 }, { "epoch": 0.37277974425204885, "grad_norm": 0.4348566234111786, "learning_rate": 0.0005, "loss": 1.1799, "step": 11690 }, { "epoch": 0.37309863197168275, "grad_norm": 0.42040494084358215, "learning_rate": 0.0005, "loss": 1.1667, "step": 11700 }, { "epoch": 0.3734175196913167, "grad_norm": 0.4358799159526825, "learning_rate": 0.0005, "loss": 1.1818, "step": 11710 }, { "epoch": 0.3737364074109506, "grad_norm": 0.4317675828933716, "learning_rate": 0.0005, "loss": 1.1811, "step": 11720 }, { "epoch": 0.3740552951305845, "grad_norm": 0.4507165849208832, "learning_rate": 0.0005, "loss": 1.1913, "step": 11730 }, { "epoch": 0.37437418285021845, "grad_norm": 0.43800291419029236, "learning_rate": 0.0005, "loss": 1.1965, "step": 11740 }, { "epoch": 0.37469307056985235, "grad_norm": 0.45497605204582214, "learning_rate": 0.0005, "loss": 1.2025, "step": 11750 }, { "epoch": 0.37501195828948625, "grad_norm": 0.4357222020626068, "learning_rate": 0.0005, "loss": 1.1949, "step": 11760 }, { "epoch": 0.3753308460091202, "grad_norm": 0.46163061261177063, "learning_rate": 0.0005, "loss": 1.1907, "step": 11770 }, { "epoch": 0.3756497337287541, "grad_norm": 0.43153810501098633, "learning_rate": 0.0005, "loss": 1.1813, "step": 11780 }, { "epoch": 0.375968621448388, "grad_norm": 0.4308339059352875, "learning_rate": 0.0005, "loss": 1.2098, "step": 11790 }, { "epoch": 0.37628750916802195, "grad_norm": 0.4210801422595978, "learning_rate": 0.0005, "loss": 1.1825, "step": 11800 }, { "epoch": 0.37660639688765585, "grad_norm": 0.4504418671131134, "learning_rate": 0.0005, "loss": 1.1906, "step": 11810 }, { "epoch": 0.37692528460728975, "grad_norm": 0.42673376202583313, "learning_rate": 0.0005, "loss": 1.1925, "step": 11820 }, { "epoch": 0.3772441723269237, "grad_norm": 0.42398470640182495, "learning_rate": 0.0005, "loss": 1.1663, "step": 11830 }, { "epoch": 0.3775630600465576, "grad_norm": 0.437023401260376, "learning_rate": 0.0005, "loss": 1.1899, "step": 11840 }, { "epoch": 0.37788194776619155, "grad_norm": 0.4427235722541809, "learning_rate": 0.0005, "loss": 1.1861, "step": 11850 }, { "epoch": 0.37820083548582545, "grad_norm": 0.44343510270118713, "learning_rate": 0.0005, "loss": 1.1963, "step": 11860 }, { "epoch": 0.37851972320545935, "grad_norm": 0.43557801842689514, "learning_rate": 0.0005, "loss": 1.1906, "step": 11870 }, { "epoch": 0.3788386109250933, "grad_norm": 0.43895554542541504, "learning_rate": 0.0005, "loss": 1.1911, "step": 11880 }, { "epoch": 0.3791574986447272, "grad_norm": 0.4184170961380005, "learning_rate": 0.0005, "loss": 1.1792, "step": 11890 }, { "epoch": 0.3794763863643611, "grad_norm": 0.4252275824546814, "learning_rate": 0.0005, "loss": 1.1738, "step": 11900 }, { "epoch": 0.37979527408399505, "grad_norm": 0.43294379115104675, "learning_rate": 0.0005, "loss": 1.1898, "step": 11910 }, { "epoch": 0.38011416180362895, "grad_norm": 0.4419662654399872, "learning_rate": 0.0005, "loss": 1.1968, "step": 11920 }, { "epoch": 0.38043304952326285, "grad_norm": 0.42845383286476135, "learning_rate": 0.0005, "loss": 1.1602, "step": 11930 }, { "epoch": 0.3807519372428968, "grad_norm": 0.4342953860759735, "learning_rate": 0.0005, "loss": 1.1538, "step": 11940 }, { "epoch": 0.3810708249625307, "grad_norm": 0.4351995885372162, "learning_rate": 0.0005, "loss": 1.177, "step": 11950 }, { "epoch": 0.3813897126821646, "grad_norm": 0.43881794810295105, "learning_rate": 0.0005, "loss": 1.1927, "step": 11960 }, { "epoch": 0.38170860040179855, "grad_norm": 0.4251026213169098, "learning_rate": 0.0005, "loss": 1.1852, "step": 11970 }, { "epoch": 0.38202748812143245, "grad_norm": 0.4276167154312134, "learning_rate": 0.0005, "loss": 1.166, "step": 11980 }, { "epoch": 0.38234637584106634, "grad_norm": 0.44587334990501404, "learning_rate": 0.0005, "loss": 1.1749, "step": 11990 }, { "epoch": 0.3826652635607003, "grad_norm": 0.46067747473716736, "learning_rate": 0.0005, "loss": 1.1795, "step": 12000 }, { "epoch": 0.3829841512803342, "grad_norm": 0.43104439973831177, "learning_rate": 0.0005, "loss": 1.1913, "step": 12010 }, { "epoch": 0.3833030389999681, "grad_norm": 0.44496071338653564, "learning_rate": 0.0005, "loss": 1.2024, "step": 12020 }, { "epoch": 0.38362192671960205, "grad_norm": 0.4469740390777588, "learning_rate": 0.0005, "loss": 1.177, "step": 12030 }, { "epoch": 0.38394081443923594, "grad_norm": 0.4275093078613281, "learning_rate": 0.0005, "loss": 1.1733, "step": 12040 }, { "epoch": 0.38425970215886984, "grad_norm": 0.4296175539493561, "learning_rate": 0.0005, "loss": 1.1801, "step": 12050 }, { "epoch": 0.3845785898785038, "grad_norm": 0.43391743302345276, "learning_rate": 0.0005, "loss": 1.1688, "step": 12060 }, { "epoch": 0.3848974775981377, "grad_norm": 0.4239729940891266, "learning_rate": 0.0005, "loss": 1.1883, "step": 12070 }, { "epoch": 0.3852163653177716, "grad_norm": 0.4424794614315033, "learning_rate": 0.0005, "loss": 1.1819, "step": 12080 }, { "epoch": 0.38553525303740555, "grad_norm": 0.4417058229446411, "learning_rate": 0.0005, "loss": 1.1742, "step": 12090 }, { "epoch": 0.38585414075703944, "grad_norm": 0.43083709478378296, "learning_rate": 0.0005, "loss": 1.1843, "step": 12100 }, { "epoch": 0.38617302847667334, "grad_norm": 0.434842973947525, "learning_rate": 0.0005, "loss": 1.1875, "step": 12110 }, { "epoch": 0.3864919161963073, "grad_norm": 0.4427316188812256, "learning_rate": 0.0005, "loss": 1.1912, "step": 12120 }, { "epoch": 0.3868108039159412, "grad_norm": 0.44422024488449097, "learning_rate": 0.0005, "loss": 1.1893, "step": 12130 }, { "epoch": 0.3871296916355751, "grad_norm": 0.42151498794555664, "learning_rate": 0.0005, "loss": 1.1565, "step": 12140 }, { "epoch": 0.38744857935520904, "grad_norm": 0.42576828598976135, "learning_rate": 0.0005, "loss": 1.1992, "step": 12150 }, { "epoch": 0.38776746707484294, "grad_norm": 0.4259600341320038, "learning_rate": 0.0005, "loss": 1.1968, "step": 12160 }, { "epoch": 0.38808635479447684, "grad_norm": 0.41919681429862976, "learning_rate": 0.0005, "loss": 1.1566, "step": 12170 }, { "epoch": 0.3884052425141108, "grad_norm": 0.43111515045166016, "learning_rate": 0.0005, "loss": 1.1983, "step": 12180 }, { "epoch": 0.3887241302337447, "grad_norm": 0.4664030373096466, "learning_rate": 0.0005, "loss": 1.1738, "step": 12190 }, { "epoch": 0.3890430179533786, "grad_norm": 0.4208368957042694, "learning_rate": 0.0005, "loss": 1.1711, "step": 12200 }, { "epoch": 0.38936190567301254, "grad_norm": 0.4356335401535034, "learning_rate": 0.0005, "loss": 1.1787, "step": 12210 }, { "epoch": 0.38968079339264644, "grad_norm": 0.42046454548835754, "learning_rate": 0.0005, "loss": 1.1818, "step": 12220 }, { "epoch": 0.38999968111228034, "grad_norm": 0.41660618782043457, "learning_rate": 0.0005, "loss": 1.1674, "step": 12230 }, { "epoch": 0.3903185688319143, "grad_norm": 0.43841058015823364, "learning_rate": 0.0005, "loss": 1.1689, "step": 12240 }, { "epoch": 0.3906374565515482, "grad_norm": 0.4638073146343231, "learning_rate": 0.0005, "loss": 1.1595, "step": 12250 }, { "epoch": 0.39095634427118214, "grad_norm": 0.4431755840778351, "learning_rate": 0.0005, "loss": 1.1745, "step": 12260 }, { "epoch": 0.39127523199081604, "grad_norm": 0.4278421401977539, "learning_rate": 0.0005, "loss": 1.1848, "step": 12270 }, { "epoch": 0.39159411971044994, "grad_norm": 0.4424417316913605, "learning_rate": 0.0005, "loss": 1.1735, "step": 12280 }, { "epoch": 0.3919130074300839, "grad_norm": 0.43157055974006653, "learning_rate": 0.0005, "loss": 1.1764, "step": 12290 }, { "epoch": 0.3922318951497178, "grad_norm": 0.43115106225013733, "learning_rate": 0.0005, "loss": 1.1909, "step": 12300 }, { "epoch": 0.3925507828693517, "grad_norm": 0.4346446394920349, "learning_rate": 0.0005, "loss": 1.1777, "step": 12310 }, { "epoch": 0.39286967058898564, "grad_norm": 0.43510472774505615, "learning_rate": 0.0005, "loss": 1.1889, "step": 12320 }, { "epoch": 0.39318855830861954, "grad_norm": 0.4409896433353424, "learning_rate": 0.0005, "loss": 1.187, "step": 12330 }, { "epoch": 0.39350744602825344, "grad_norm": 0.4372926354408264, "learning_rate": 0.0005, "loss": 1.1625, "step": 12340 }, { "epoch": 0.3938263337478874, "grad_norm": 0.4218306839466095, "learning_rate": 0.0005, "loss": 1.1609, "step": 12350 }, { "epoch": 0.3941452214675213, "grad_norm": 0.4451087415218353, "learning_rate": 0.0005, "loss": 1.2059, "step": 12360 }, { "epoch": 0.3944641091871552, "grad_norm": 0.4136224687099457, "learning_rate": 0.0005, "loss": 1.1685, "step": 12370 }, { "epoch": 0.39478299690678914, "grad_norm": 0.4264117479324341, "learning_rate": 0.0005, "loss": 1.1674, "step": 12380 }, { "epoch": 0.39510188462642304, "grad_norm": 0.45373213291168213, "learning_rate": 0.0005, "loss": 1.1692, "step": 12390 }, { "epoch": 0.39542077234605694, "grad_norm": 0.4304463565349579, "learning_rate": 0.0005, "loss": 1.1847, "step": 12400 }, { "epoch": 0.3957396600656909, "grad_norm": 0.4379657506942749, "learning_rate": 0.0005, "loss": 1.1655, "step": 12410 }, { "epoch": 0.3960585477853248, "grad_norm": 0.4436817169189453, "learning_rate": 0.0005, "loss": 1.1784, "step": 12420 }, { "epoch": 0.3963774355049587, "grad_norm": 0.43348872661590576, "learning_rate": 0.0005, "loss": 1.1819, "step": 12430 }, { "epoch": 0.39669632322459264, "grad_norm": 0.42717644572257996, "learning_rate": 0.0005, "loss": 1.1569, "step": 12440 }, { "epoch": 0.39701521094422654, "grad_norm": 0.4594727158546448, "learning_rate": 0.0005, "loss": 1.1825, "step": 12450 }, { "epoch": 0.39733409866386044, "grad_norm": 0.41570910811424255, "learning_rate": 0.0005, "loss": 1.1639, "step": 12460 }, { "epoch": 0.3976529863834944, "grad_norm": 0.4306057095527649, "learning_rate": 0.0005, "loss": 1.1904, "step": 12470 }, { "epoch": 0.3979718741031283, "grad_norm": 0.4283396005630493, "learning_rate": 0.0005, "loss": 1.1881, "step": 12480 }, { "epoch": 0.3982907618227622, "grad_norm": 0.4277869164943695, "learning_rate": 0.0005, "loss": 1.1743, "step": 12490 }, { "epoch": 0.39860964954239614, "grad_norm": 0.4295739233493805, "learning_rate": 0.0005, "loss": 1.1919, "step": 12500 }, { "epoch": 0.39892853726203004, "grad_norm": 0.4237912893295288, "learning_rate": 0.0005, "loss": 1.1662, "step": 12510 }, { "epoch": 0.39924742498166393, "grad_norm": 0.42816078662872314, "learning_rate": 0.0005, "loss": 1.1684, "step": 12520 }, { "epoch": 0.3995663127012979, "grad_norm": 0.420441210269928, "learning_rate": 0.0005, "loss": 1.1875, "step": 12530 }, { "epoch": 0.3998852004209318, "grad_norm": 0.4237239956855774, "learning_rate": 0.0005, "loss": 1.1732, "step": 12540 }, { "epoch": 0.4002040881405657, "grad_norm": 0.43103668093681335, "learning_rate": 0.0005, "loss": 1.1517, "step": 12550 }, { "epoch": 0.40052297586019964, "grad_norm": 0.42024657130241394, "learning_rate": 0.0005, "loss": 1.168, "step": 12560 }, { "epoch": 0.40084186357983353, "grad_norm": 0.43330562114715576, "learning_rate": 0.0005, "loss": 1.1971, "step": 12570 }, { "epoch": 0.40116075129946743, "grad_norm": 0.4399755895137787, "learning_rate": 0.0005, "loss": 1.1461, "step": 12580 }, { "epoch": 0.4014796390191014, "grad_norm": 0.4258183538913727, "learning_rate": 0.0005, "loss": 1.1975, "step": 12590 }, { "epoch": 0.4017985267387353, "grad_norm": 0.4347604513168335, "learning_rate": 0.0005, "loss": 1.1687, "step": 12600 }, { "epoch": 0.4021174144583692, "grad_norm": 0.424478679895401, "learning_rate": 0.0005, "loss": 1.1737, "step": 12610 }, { "epoch": 0.40243630217800314, "grad_norm": 0.42958155274391174, "learning_rate": 0.0005, "loss": 1.1814, "step": 12620 }, { "epoch": 0.40275518989763703, "grad_norm": 0.426096111536026, "learning_rate": 0.0005, "loss": 1.1575, "step": 12630 }, { "epoch": 0.40307407761727093, "grad_norm": 0.4363478422164917, "learning_rate": 0.0005, "loss": 1.156, "step": 12640 }, { "epoch": 0.4033929653369049, "grad_norm": 0.41518527269363403, "learning_rate": 0.0005, "loss": 1.1786, "step": 12650 }, { "epoch": 0.4037118530565388, "grad_norm": 0.42658382654190063, "learning_rate": 0.0005, "loss": 1.1673, "step": 12660 }, { "epoch": 0.40403074077617274, "grad_norm": 0.42411237955093384, "learning_rate": 0.0005, "loss": 1.1663, "step": 12670 }, { "epoch": 0.40434962849580663, "grad_norm": 0.4455260634422302, "learning_rate": 0.0005, "loss": 1.1758, "step": 12680 }, { "epoch": 0.40466851621544053, "grad_norm": 0.428243488073349, "learning_rate": 0.0005, "loss": 1.1588, "step": 12690 }, { "epoch": 0.4049874039350745, "grad_norm": 0.4175855815410614, "learning_rate": 0.0005, "loss": 1.1768, "step": 12700 }, { "epoch": 0.4053062916547084, "grad_norm": 0.42678576707839966, "learning_rate": 0.0005, "loss": 1.1582, "step": 12710 }, { "epoch": 0.4056251793743423, "grad_norm": 0.44132933020591736, "learning_rate": 0.0005, "loss": 1.1794, "step": 12720 }, { "epoch": 0.40594406709397624, "grad_norm": 0.44395214319229126, "learning_rate": 0.0005, "loss": 1.1632, "step": 12730 }, { "epoch": 0.40626295481361013, "grad_norm": 0.44196462631225586, "learning_rate": 0.0005, "loss": 1.1761, "step": 12740 }, { "epoch": 0.40658184253324403, "grad_norm": 0.4145953059196472, "learning_rate": 0.0005, "loss": 1.1646, "step": 12750 }, { "epoch": 0.406900730252878, "grad_norm": 0.43945613503456116, "learning_rate": 0.0005, "loss": 1.1682, "step": 12760 }, { "epoch": 0.4072196179725119, "grad_norm": 0.42901989817619324, "learning_rate": 0.0005, "loss": 1.1838, "step": 12770 }, { "epoch": 0.4075385056921458, "grad_norm": 0.44105032086372375, "learning_rate": 0.0005, "loss": 1.1648, "step": 12780 }, { "epoch": 0.40785739341177973, "grad_norm": 0.4337145686149597, "learning_rate": 0.0005, "loss": 1.1676, "step": 12790 }, { "epoch": 0.40817628113141363, "grad_norm": 0.46507975459098816, "learning_rate": 0.0005, "loss": 1.1839, "step": 12800 }, { "epoch": 0.40849516885104753, "grad_norm": 0.43042483925819397, "learning_rate": 0.0005, "loss": 1.161, "step": 12810 }, { "epoch": 0.4088140565706815, "grad_norm": 0.42969202995300293, "learning_rate": 0.0005, "loss": 1.1644, "step": 12820 }, { "epoch": 0.4091329442903154, "grad_norm": 0.43208038806915283, "learning_rate": 0.0005, "loss": 1.1608, "step": 12830 }, { "epoch": 0.4094518320099493, "grad_norm": 0.4246330261230469, "learning_rate": 0.0005, "loss": 1.1395, "step": 12840 }, { "epoch": 0.40977071972958323, "grad_norm": 0.4385770857334137, "learning_rate": 0.0005, "loss": 1.1721, "step": 12850 }, { "epoch": 0.41008960744921713, "grad_norm": 0.40635982155799866, "learning_rate": 0.0005, "loss": 1.1554, "step": 12860 }, { "epoch": 0.41040849516885103, "grad_norm": 0.41713228821754456, "learning_rate": 0.0005, "loss": 1.1593, "step": 12870 }, { "epoch": 0.410727382888485, "grad_norm": 0.4230583608150482, "learning_rate": 0.0005, "loss": 1.172, "step": 12880 }, { "epoch": 0.4110462706081189, "grad_norm": 0.44719910621643066, "learning_rate": 0.0005, "loss": 1.1767, "step": 12890 }, { "epoch": 0.4113651583277528, "grad_norm": 0.42063602805137634, "learning_rate": 0.0005, "loss": 1.1557, "step": 12900 }, { "epoch": 0.41168404604738673, "grad_norm": 0.41372743248939514, "learning_rate": 0.0005, "loss": 1.1572, "step": 12910 }, { "epoch": 0.41200293376702063, "grad_norm": 0.4369075298309326, "learning_rate": 0.0005, "loss": 1.165, "step": 12920 }, { "epoch": 0.4123218214866545, "grad_norm": 0.43609854578971863, "learning_rate": 0.0005, "loss": 1.1567, "step": 12930 }, { "epoch": 0.4126407092062885, "grad_norm": 0.41011208295822144, "learning_rate": 0.0005, "loss": 1.1786, "step": 12940 }, { "epoch": 0.4129595969259224, "grad_norm": 0.43973419070243835, "learning_rate": 0.0005, "loss": 1.1755, "step": 12950 }, { "epoch": 0.4132784846455563, "grad_norm": 0.4214501678943634, "learning_rate": 0.0005, "loss": 1.1744, "step": 12960 }, { "epoch": 0.41359737236519023, "grad_norm": 0.42284494638442993, "learning_rate": 0.0005, "loss": 1.1742, "step": 12970 }, { "epoch": 0.41391626008482413, "grad_norm": 0.43984121084213257, "learning_rate": 0.0005, "loss": 1.1808, "step": 12980 }, { "epoch": 0.414235147804458, "grad_norm": 0.4616177976131439, "learning_rate": 0.0005, "loss": 1.1607, "step": 12990 }, { "epoch": 0.414554035524092, "grad_norm": 0.420259565114975, "learning_rate": 0.0005, "loss": 1.1679, "step": 13000 }, { "epoch": 0.4148729232437259, "grad_norm": 0.4331679046154022, "learning_rate": 0.0005, "loss": 1.1488, "step": 13010 }, { "epoch": 0.4151918109633598, "grad_norm": 0.42250847816467285, "learning_rate": 0.0005, "loss": 1.1873, "step": 13020 }, { "epoch": 0.41551069868299373, "grad_norm": 0.45015767216682434, "learning_rate": 0.0005, "loss": 1.1737, "step": 13030 }, { "epoch": 0.4158295864026276, "grad_norm": 0.4316963255405426, "learning_rate": 0.0005, "loss": 1.1608, "step": 13040 }, { "epoch": 0.4161484741222615, "grad_norm": 0.4264684021472931, "learning_rate": 0.0005, "loss": 1.1651, "step": 13050 }, { "epoch": 0.4164673618418955, "grad_norm": 0.44561827182769775, "learning_rate": 0.0005, "loss": 1.1813, "step": 13060 }, { "epoch": 0.4167862495615294, "grad_norm": 0.42165082693099976, "learning_rate": 0.0005, "loss": 1.167, "step": 13070 }, { "epoch": 0.41710513728116333, "grad_norm": 0.4280150532722473, "learning_rate": 0.0005, "loss": 1.1726, "step": 13080 }, { "epoch": 0.4174240250007972, "grad_norm": 0.43624866008758545, "learning_rate": 0.0005, "loss": 1.1699, "step": 13090 }, { "epoch": 0.4177429127204311, "grad_norm": 0.429521381855011, "learning_rate": 0.0005, "loss": 1.159, "step": 13100 }, { "epoch": 0.4180618004400651, "grad_norm": 0.4358668625354767, "learning_rate": 0.0005, "loss": 1.1704, "step": 13110 }, { "epoch": 0.418380688159699, "grad_norm": 0.42937684059143066, "learning_rate": 0.0005, "loss": 1.1778, "step": 13120 }, { "epoch": 0.4186995758793329, "grad_norm": 0.4294481575489044, "learning_rate": 0.0005, "loss": 1.1718, "step": 13130 }, { "epoch": 0.41901846359896683, "grad_norm": 0.42481085658073425, "learning_rate": 0.0005, "loss": 1.1659, "step": 13140 }, { "epoch": 0.4193373513186007, "grad_norm": 0.41510918736457825, "learning_rate": 0.0005, "loss": 1.1796, "step": 13150 }, { "epoch": 0.4196562390382346, "grad_norm": 0.43382757902145386, "learning_rate": 0.0005, "loss": 1.1604, "step": 13160 }, { "epoch": 0.4199751267578686, "grad_norm": 0.4185143709182739, "learning_rate": 0.0005, "loss": 1.1658, "step": 13170 }, { "epoch": 0.4202940144775025, "grad_norm": 0.4401332437992096, "learning_rate": 0.0005, "loss": 1.1515, "step": 13180 }, { "epoch": 0.4206129021971364, "grad_norm": 0.4322357773780823, "learning_rate": 0.0005, "loss": 1.1659, "step": 13190 }, { "epoch": 0.4209317899167703, "grad_norm": 0.42740195989608765, "learning_rate": 0.0005, "loss": 1.1863, "step": 13200 }, { "epoch": 0.4212506776364042, "grad_norm": 0.4310891032218933, "learning_rate": 0.0005, "loss": 1.1719, "step": 13210 }, { "epoch": 0.4215695653560381, "grad_norm": 0.44095975160598755, "learning_rate": 0.0005, "loss": 1.1635, "step": 13220 }, { "epoch": 0.4218884530756721, "grad_norm": 0.4069562554359436, "learning_rate": 0.0005, "loss": 1.1734, "step": 13230 }, { "epoch": 0.422207340795306, "grad_norm": 0.42801713943481445, "learning_rate": 0.0005, "loss": 1.1558, "step": 13240 }, { "epoch": 0.42252622851493987, "grad_norm": 0.4137992262840271, "learning_rate": 0.0005, "loss": 1.1467, "step": 13250 }, { "epoch": 0.4228451162345738, "grad_norm": 0.42337340116500854, "learning_rate": 0.0005, "loss": 1.1541, "step": 13260 }, { "epoch": 0.4231640039542077, "grad_norm": 0.4387548863887787, "learning_rate": 0.0005, "loss": 1.1738, "step": 13270 }, { "epoch": 0.4234828916738416, "grad_norm": 0.423705130815506, "learning_rate": 0.0005, "loss": 1.1468, "step": 13280 }, { "epoch": 0.4238017793934756, "grad_norm": 0.418472021818161, "learning_rate": 0.0005, "loss": 1.1587, "step": 13290 }, { "epoch": 0.4241206671131095, "grad_norm": 0.4126065671443939, "learning_rate": 0.0005, "loss": 1.179, "step": 13300 }, { "epoch": 0.42443955483274337, "grad_norm": 0.42028361558914185, "learning_rate": 0.0005, "loss": 1.1812, "step": 13310 }, { "epoch": 0.4247584425523773, "grad_norm": 0.4209747612476349, "learning_rate": 0.0005, "loss": 1.1574, "step": 13320 }, { "epoch": 0.4250773302720112, "grad_norm": 0.4240000545978546, "learning_rate": 0.0005, "loss": 1.1689, "step": 13330 }, { "epoch": 0.4253962179916451, "grad_norm": 0.42597630620002747, "learning_rate": 0.0005, "loss": 1.1598, "step": 13340 }, { "epoch": 0.4257151057112791, "grad_norm": 0.43621891736984253, "learning_rate": 0.0005, "loss": 1.1601, "step": 13350 }, { "epoch": 0.42603399343091297, "grad_norm": 0.44976115226745605, "learning_rate": 0.0005, "loss": 1.1533, "step": 13360 }, { "epoch": 0.42635288115054687, "grad_norm": 0.425903856754303, "learning_rate": 0.0005, "loss": 1.1671, "step": 13370 }, { "epoch": 0.4266717688701808, "grad_norm": 0.41458699107170105, "learning_rate": 0.0005, "loss": 1.1479, "step": 13380 }, { "epoch": 0.4269906565898147, "grad_norm": 0.4458717703819275, "learning_rate": 0.0005, "loss": 1.1533, "step": 13390 }, { "epoch": 0.4273095443094486, "grad_norm": 0.4254353642463684, "learning_rate": 0.0005, "loss": 1.1509, "step": 13400 }, { "epoch": 0.4276284320290826, "grad_norm": 0.4332568347454071, "learning_rate": 0.0005, "loss": 1.1423, "step": 13410 }, { "epoch": 0.42794731974871647, "grad_norm": 0.4396291971206665, "learning_rate": 0.0005, "loss": 1.175, "step": 13420 }, { "epoch": 0.42826620746835037, "grad_norm": 0.41766542196273804, "learning_rate": 0.0005, "loss": 1.1629, "step": 13430 }, { "epoch": 0.4285850951879843, "grad_norm": 0.44327792525291443, "learning_rate": 0.0005, "loss": 1.1706, "step": 13440 }, { "epoch": 0.4289039829076182, "grad_norm": 0.4321768283843994, "learning_rate": 0.0005, "loss": 1.1568, "step": 13450 }, { "epoch": 0.4292228706272521, "grad_norm": 0.4065990746021271, "learning_rate": 0.0005, "loss": 1.1606, "step": 13460 }, { "epoch": 0.42954175834688607, "grad_norm": 0.41537582874298096, "learning_rate": 0.0005, "loss": 1.1647, "step": 13470 }, { "epoch": 0.42986064606651997, "grad_norm": 0.4257226586341858, "learning_rate": 0.0005, "loss": 1.1768, "step": 13480 }, { "epoch": 0.4301795337861539, "grad_norm": 0.3980241119861603, "learning_rate": 0.0005, "loss": 1.154, "step": 13490 }, { "epoch": 0.4304984215057878, "grad_norm": 0.42201271653175354, "learning_rate": 0.0005, "loss": 1.1632, "step": 13500 }, { "epoch": 0.4308173092254217, "grad_norm": 0.41972705721855164, "learning_rate": 0.0005, "loss": 1.152, "step": 13510 }, { "epoch": 0.43113619694505567, "grad_norm": 0.4092382788658142, "learning_rate": 0.0005, "loss": 1.1834, "step": 13520 }, { "epoch": 0.43145508466468957, "grad_norm": 0.41219362616539, "learning_rate": 0.0005, "loss": 1.1528, "step": 13530 }, { "epoch": 0.43177397238432347, "grad_norm": 0.41763296723365784, "learning_rate": 0.0005, "loss": 1.1608, "step": 13540 }, { "epoch": 0.4320928601039574, "grad_norm": 0.4221011698246002, "learning_rate": 0.0005, "loss": 1.1705, "step": 13550 }, { "epoch": 0.4324117478235913, "grad_norm": 0.4206273853778839, "learning_rate": 0.0005, "loss": 1.1581, "step": 13560 }, { "epoch": 0.4327306355432252, "grad_norm": 0.42883896827697754, "learning_rate": 0.0005, "loss": 1.1612, "step": 13570 }, { "epoch": 0.43304952326285917, "grad_norm": 0.4313361942768097, "learning_rate": 0.0005, "loss": 1.1689, "step": 13580 }, { "epoch": 0.43336841098249307, "grad_norm": 0.41933777928352356, "learning_rate": 0.0005, "loss": 1.1562, "step": 13590 }, { "epoch": 0.43368729870212697, "grad_norm": 0.44052955508232117, "learning_rate": 0.0005, "loss": 1.1537, "step": 13600 }, { "epoch": 0.4340061864217609, "grad_norm": 0.4265698194503784, "learning_rate": 0.0005, "loss": 1.1595, "step": 13610 }, { "epoch": 0.4343250741413948, "grad_norm": 0.4011492133140564, "learning_rate": 0.0005, "loss": 1.1793, "step": 13620 }, { "epoch": 0.4346439618610287, "grad_norm": 0.4022427499294281, "learning_rate": 0.0005, "loss": 1.1432, "step": 13630 }, { "epoch": 0.43496284958066267, "grad_norm": 0.4209270775318146, "learning_rate": 0.0005, "loss": 1.165, "step": 13640 }, { "epoch": 0.43528173730029657, "grad_norm": 0.4407374858856201, "learning_rate": 0.0005, "loss": 1.1607, "step": 13650 }, { "epoch": 0.43560062501993047, "grad_norm": 0.4113706648349762, "learning_rate": 0.0005, "loss": 1.1457, "step": 13660 }, { "epoch": 0.4359195127395644, "grad_norm": 0.42725327610969543, "learning_rate": 0.0005, "loss": 1.1668, "step": 13670 }, { "epoch": 0.4362384004591983, "grad_norm": 0.42654532194137573, "learning_rate": 0.0005, "loss": 1.1522, "step": 13680 }, { "epoch": 0.4365572881788322, "grad_norm": 0.41542568802833557, "learning_rate": 0.0005, "loss": 1.1496, "step": 13690 }, { "epoch": 0.43687617589846617, "grad_norm": 0.4379822611808777, "learning_rate": 0.0005, "loss": 1.1668, "step": 13700 }, { "epoch": 0.43719506361810007, "grad_norm": 0.42327746748924255, "learning_rate": 0.0005, "loss": 1.1498, "step": 13710 }, { "epoch": 0.43751395133773396, "grad_norm": 0.41067299246788025, "learning_rate": 0.0005, "loss": 1.1342, "step": 13720 }, { "epoch": 0.4378328390573679, "grad_norm": 0.4195924997329712, "learning_rate": 0.0005, "loss": 1.1741, "step": 13730 }, { "epoch": 0.4381517267770018, "grad_norm": 0.41079947352409363, "learning_rate": 0.0005, "loss": 1.1619, "step": 13740 }, { "epoch": 0.4384706144966357, "grad_norm": 0.41595518589019775, "learning_rate": 0.0005, "loss": 1.1534, "step": 13750 }, { "epoch": 0.43878950221626967, "grad_norm": 0.4261476397514343, "learning_rate": 0.0005, "loss": 1.1495, "step": 13760 }, { "epoch": 0.43910838993590356, "grad_norm": 0.4288196265697479, "learning_rate": 0.0005, "loss": 1.1637, "step": 13770 }, { "epoch": 0.43942727765553746, "grad_norm": 0.44342705607414246, "learning_rate": 0.0005, "loss": 1.165, "step": 13780 }, { "epoch": 0.4397461653751714, "grad_norm": 0.4130489230155945, "learning_rate": 0.0005, "loss": 1.1606, "step": 13790 }, { "epoch": 0.4400650530948053, "grad_norm": 0.418384313583374, "learning_rate": 0.0005, "loss": 1.1607, "step": 13800 }, { "epoch": 0.4403839408144392, "grad_norm": 0.4078846573829651, "learning_rate": 0.0005, "loss": 1.146, "step": 13810 }, { "epoch": 0.44070282853407317, "grad_norm": 0.427979439496994, "learning_rate": 0.0005, "loss": 1.1497, "step": 13820 }, { "epoch": 0.44102171625370706, "grad_norm": 0.4124217629432678, "learning_rate": 0.0005, "loss": 1.1528, "step": 13830 }, { "epoch": 0.44134060397334096, "grad_norm": 0.42145681381225586, "learning_rate": 0.0005, "loss": 1.1374, "step": 13840 }, { "epoch": 0.4416594916929749, "grad_norm": 0.41087475419044495, "learning_rate": 0.0005, "loss": 1.1522, "step": 13850 }, { "epoch": 0.4419783794126088, "grad_norm": 0.42248791456222534, "learning_rate": 0.0005, "loss": 1.1567, "step": 13860 }, { "epoch": 0.4422972671322427, "grad_norm": 0.4094279408454895, "learning_rate": 0.0005, "loss": 1.1404, "step": 13870 }, { "epoch": 0.44261615485187666, "grad_norm": 0.4195246398448944, "learning_rate": 0.0005, "loss": 1.1353, "step": 13880 }, { "epoch": 0.44293504257151056, "grad_norm": 0.39682644605636597, "learning_rate": 0.0005, "loss": 1.1392, "step": 13890 }, { "epoch": 0.4432539302911445, "grad_norm": 0.4020282030105591, "learning_rate": 0.0005, "loss": 1.1631, "step": 13900 }, { "epoch": 0.4435728180107784, "grad_norm": 0.4157191216945648, "learning_rate": 0.0005, "loss": 1.144, "step": 13910 }, { "epoch": 0.4438917057304123, "grad_norm": 0.4178768992424011, "learning_rate": 0.0005, "loss": 1.1582, "step": 13920 }, { "epoch": 0.44421059345004626, "grad_norm": 0.4138433039188385, "learning_rate": 0.0005, "loss": 1.1669, "step": 13930 }, { "epoch": 0.44452948116968016, "grad_norm": 0.4343213737010956, "learning_rate": 0.0005, "loss": 1.1389, "step": 13940 }, { "epoch": 0.44484836888931406, "grad_norm": 0.413717657327652, "learning_rate": 0.0005, "loss": 1.1524, "step": 13950 }, { "epoch": 0.445167256608948, "grad_norm": 0.409458190202713, "learning_rate": 0.0005, "loss": 1.1737, "step": 13960 }, { "epoch": 0.4454861443285819, "grad_norm": 0.4400709569454193, "learning_rate": 0.0005, "loss": 1.1473, "step": 13970 }, { "epoch": 0.4458050320482158, "grad_norm": 0.4103112518787384, "learning_rate": 0.0005, "loss": 1.152, "step": 13980 }, { "epoch": 0.44612391976784976, "grad_norm": 0.4030466079711914, "learning_rate": 0.0005, "loss": 1.1472, "step": 13990 }, { "epoch": 0.44644280748748366, "grad_norm": 0.42554450035095215, "learning_rate": 0.0005, "loss": 1.1455, "step": 14000 }, { "epoch": 0.44676169520711756, "grad_norm": 0.41038069128990173, "learning_rate": 0.0005, "loss": 1.1594, "step": 14010 }, { "epoch": 0.4470805829267515, "grad_norm": 0.4226815402507782, "learning_rate": 0.0005, "loss": 1.1603, "step": 14020 }, { "epoch": 0.4473994706463854, "grad_norm": 0.4165605306625366, "learning_rate": 0.0005, "loss": 1.1596, "step": 14030 }, { "epoch": 0.4477183583660193, "grad_norm": 0.41343143582344055, "learning_rate": 0.0005, "loss": 1.1435, "step": 14040 }, { "epoch": 0.44803724608565326, "grad_norm": 0.4170566499233246, "learning_rate": 0.0005, "loss": 1.1341, "step": 14050 }, { "epoch": 0.44835613380528716, "grad_norm": 0.4268888235092163, "learning_rate": 0.0005, "loss": 1.143, "step": 14060 }, { "epoch": 0.44867502152492106, "grad_norm": 0.41298648715019226, "learning_rate": 0.0005, "loss": 1.1344, "step": 14070 }, { "epoch": 0.448993909244555, "grad_norm": 0.4087713658809662, "learning_rate": 0.0005, "loss": 1.1482, "step": 14080 }, { "epoch": 0.4493127969641889, "grad_norm": 0.40675875544548035, "learning_rate": 0.0005, "loss": 1.1469, "step": 14090 }, { "epoch": 0.4496316846838228, "grad_norm": 0.418449342250824, "learning_rate": 0.0005, "loss": 1.1489, "step": 14100 }, { "epoch": 0.44995057240345676, "grad_norm": 0.42261627316474915, "learning_rate": 0.0005, "loss": 1.1529, "step": 14110 }, { "epoch": 0.45026946012309066, "grad_norm": 0.40711814165115356, "learning_rate": 0.0005, "loss": 1.1418, "step": 14120 }, { "epoch": 0.45058834784272456, "grad_norm": 0.407012015581131, "learning_rate": 0.0005, "loss": 1.1438, "step": 14130 }, { "epoch": 0.4509072355623585, "grad_norm": 0.41482147574424744, "learning_rate": 0.0005, "loss": 1.14, "step": 14140 }, { "epoch": 0.4512261232819924, "grad_norm": 0.4099246561527252, "learning_rate": 0.0005, "loss": 1.1732, "step": 14150 }, { "epoch": 0.4515450110016263, "grad_norm": 0.4071347713470459, "learning_rate": 0.0005, "loss": 1.1456, "step": 14160 }, { "epoch": 0.45186389872126026, "grad_norm": 0.4065478444099426, "learning_rate": 0.0005, "loss": 1.1378, "step": 14170 }, { "epoch": 0.45218278644089416, "grad_norm": 0.40051156282424927, "learning_rate": 0.0005, "loss": 1.1486, "step": 14180 }, { "epoch": 0.45250167416052806, "grad_norm": 0.40654802322387695, "learning_rate": 0.0005, "loss": 1.1315, "step": 14190 }, { "epoch": 0.452820561880162, "grad_norm": 0.41403114795684814, "learning_rate": 0.0005, "loss": 1.1609, "step": 14200 }, { "epoch": 0.4531394495997959, "grad_norm": 0.4000602066516876, "learning_rate": 0.0005, "loss": 1.1606, "step": 14210 }, { "epoch": 0.4534583373194298, "grad_norm": 0.39985981583595276, "learning_rate": 0.0005, "loss": 1.1508, "step": 14220 }, { "epoch": 0.45377722503906376, "grad_norm": 0.4208848774433136, "learning_rate": 0.0005, "loss": 1.1375, "step": 14230 }, { "epoch": 0.45409611275869766, "grad_norm": 0.41846519708633423, "learning_rate": 0.0005, "loss": 1.1314, "step": 14240 }, { "epoch": 0.45441500047833155, "grad_norm": 0.41302967071533203, "learning_rate": 0.0005, "loss": 1.1531, "step": 14250 }, { "epoch": 0.4547338881979655, "grad_norm": 0.41688963770866394, "learning_rate": 0.0005, "loss": 1.1639, "step": 14260 }, { "epoch": 0.4550527759175994, "grad_norm": 0.4071559011936188, "learning_rate": 0.0005, "loss": 1.1474, "step": 14270 }, { "epoch": 0.4553716636372333, "grad_norm": 0.41632965207099915, "learning_rate": 0.0005, "loss": 1.1551, "step": 14280 }, { "epoch": 0.45569055135686726, "grad_norm": 0.4219265878200531, "learning_rate": 0.0005, "loss": 1.1587, "step": 14290 }, { "epoch": 0.45600943907650116, "grad_norm": 0.42590487003326416, "learning_rate": 0.0005, "loss": 1.163, "step": 14300 }, { "epoch": 0.45632832679613505, "grad_norm": 0.4112999141216278, "learning_rate": 0.0005, "loss": 1.1649, "step": 14310 }, { "epoch": 0.456647214515769, "grad_norm": 0.4211723208427429, "learning_rate": 0.0005, "loss": 1.1522, "step": 14320 }, { "epoch": 0.4569661022354029, "grad_norm": 0.4247067868709564, "learning_rate": 0.0005, "loss": 1.145, "step": 14330 }, { "epoch": 0.45728498995503686, "grad_norm": 0.4237869381904602, "learning_rate": 0.0005, "loss": 1.1372, "step": 14340 }, { "epoch": 0.45760387767467076, "grad_norm": 0.40753382444381714, "learning_rate": 0.0005, "loss": 1.1495, "step": 14350 }, { "epoch": 0.45792276539430465, "grad_norm": 0.3949471414089203, "learning_rate": 0.0005, "loss": 1.1419, "step": 14360 }, { "epoch": 0.4582416531139386, "grad_norm": 0.41907766461372375, "learning_rate": 0.0005, "loss": 1.172, "step": 14370 }, { "epoch": 0.4585605408335725, "grad_norm": 0.4017127752304077, "learning_rate": 0.0005, "loss": 1.159, "step": 14380 }, { "epoch": 0.4588794285532064, "grad_norm": 0.4082666337490082, "learning_rate": 0.0005, "loss": 1.1411, "step": 14390 }, { "epoch": 0.45919831627284036, "grad_norm": 0.4150290787220001, "learning_rate": 0.0005, "loss": 1.166, "step": 14400 }, { "epoch": 0.45951720399247425, "grad_norm": 0.3951573967933655, "learning_rate": 0.0005, "loss": 1.1188, "step": 14410 }, { "epoch": 0.45983609171210815, "grad_norm": 0.40606120228767395, "learning_rate": 0.0005, "loss": 1.1425, "step": 14420 }, { "epoch": 0.4601549794317421, "grad_norm": 0.4190419316291809, "learning_rate": 0.0005, "loss": 1.1442, "step": 14430 }, { "epoch": 0.460473867151376, "grad_norm": 0.42822325229644775, "learning_rate": 0.0005, "loss": 1.163, "step": 14440 }, { "epoch": 0.4607927548710099, "grad_norm": 0.40327173471450806, "learning_rate": 0.0005, "loss": 1.1391, "step": 14450 }, { "epoch": 0.46111164259064386, "grad_norm": 0.42737719416618347, "learning_rate": 0.0005, "loss": 1.1554, "step": 14460 }, { "epoch": 0.46143053031027775, "grad_norm": 0.4340229034423828, "learning_rate": 0.0005, "loss": 1.1435, "step": 14470 }, { "epoch": 0.46174941802991165, "grad_norm": 0.4117223024368286, "learning_rate": 0.0005, "loss": 1.1304, "step": 14480 }, { "epoch": 0.4620683057495456, "grad_norm": 0.4109940826892853, "learning_rate": 0.0005, "loss": 1.1399, "step": 14490 }, { "epoch": 0.4623871934691795, "grad_norm": 0.4243199825286865, "learning_rate": 0.0005, "loss": 1.1481, "step": 14500 }, { "epoch": 0.4627060811888134, "grad_norm": 0.43547868728637695, "learning_rate": 0.0005, "loss": 1.14, "step": 14510 }, { "epoch": 0.46302496890844735, "grad_norm": 0.41224968433380127, "learning_rate": 0.0005, "loss": 1.1398, "step": 14520 }, { "epoch": 0.46334385662808125, "grad_norm": 0.41750672459602356, "learning_rate": 0.0005, "loss": 1.1661, "step": 14530 }, { "epoch": 0.46366274434771515, "grad_norm": 0.39538687467575073, "learning_rate": 0.0005, "loss": 1.1334, "step": 14540 }, { "epoch": 0.4639816320673491, "grad_norm": 0.4226230978965759, "learning_rate": 0.0005, "loss": 1.1621, "step": 14550 }, { "epoch": 0.464300519786983, "grad_norm": 0.41002535820007324, "learning_rate": 0.0005, "loss": 1.1453, "step": 14560 }, { "epoch": 0.4646194075066169, "grad_norm": 0.41372770071029663, "learning_rate": 0.0005, "loss": 1.1478, "step": 14570 }, { "epoch": 0.46493829522625085, "grad_norm": 0.410127192735672, "learning_rate": 0.0005, "loss": 1.1447, "step": 14580 }, { "epoch": 0.46525718294588475, "grad_norm": 0.40535253286361694, "learning_rate": 0.0005, "loss": 1.1455, "step": 14590 }, { "epoch": 0.46557607066551865, "grad_norm": 0.4096167981624603, "learning_rate": 0.0005, "loss": 1.1449, "step": 14600 }, { "epoch": 0.4658949583851526, "grad_norm": 0.41067060828208923, "learning_rate": 0.0005, "loss": 1.155, "step": 14610 }, { "epoch": 0.4662138461047865, "grad_norm": 0.40875697135925293, "learning_rate": 0.0005, "loss": 1.1477, "step": 14620 }, { "epoch": 0.4665327338244204, "grad_norm": 0.4016030430793762, "learning_rate": 0.0005, "loss": 1.152, "step": 14630 }, { "epoch": 0.46685162154405435, "grad_norm": 0.41118061542510986, "learning_rate": 0.0005, "loss": 1.1248, "step": 14640 }, { "epoch": 0.46717050926368825, "grad_norm": 0.4004136323928833, "learning_rate": 0.0005, "loss": 1.149, "step": 14650 }, { "epoch": 0.46748939698332215, "grad_norm": 0.4095616042613983, "learning_rate": 0.0005, "loss": 1.1405, "step": 14660 }, { "epoch": 0.4678082847029561, "grad_norm": 0.41689032316207886, "learning_rate": 0.0005, "loss": 1.144, "step": 14670 }, { "epoch": 0.46812717242259, "grad_norm": 0.397847980260849, "learning_rate": 0.0005, "loss": 1.1308, "step": 14680 }, { "epoch": 0.4684460601422239, "grad_norm": 0.410165935754776, "learning_rate": 0.0005, "loss": 1.1371, "step": 14690 }, { "epoch": 0.46876494786185785, "grad_norm": 0.4155512750148773, "learning_rate": 0.0005, "loss": 1.1417, "step": 14700 }, { "epoch": 0.46908383558149175, "grad_norm": 0.4086609184741974, "learning_rate": 0.0005, "loss": 1.1567, "step": 14710 }, { "epoch": 0.46940272330112565, "grad_norm": 0.4168750047683716, "learning_rate": 0.0005, "loss": 1.1477, "step": 14720 }, { "epoch": 0.4697216110207596, "grad_norm": 0.4083302915096283, "learning_rate": 0.0005, "loss": 1.1445, "step": 14730 }, { "epoch": 0.4700404987403935, "grad_norm": 0.3982124924659729, "learning_rate": 0.0005, "loss": 1.1555, "step": 14740 }, { "epoch": 0.47035938646002745, "grad_norm": 0.4231313467025757, "learning_rate": 0.0005, "loss": 1.1589, "step": 14750 }, { "epoch": 0.47067827417966135, "grad_norm": 0.3778389096260071, "learning_rate": 0.0005, "loss": 1.1284, "step": 14760 }, { "epoch": 0.47099716189929525, "grad_norm": 0.40142810344696045, "learning_rate": 0.0005, "loss": 1.1164, "step": 14770 }, { "epoch": 0.4713160496189292, "grad_norm": 0.4071197807788849, "learning_rate": 0.0005, "loss": 1.1509, "step": 14780 }, { "epoch": 0.4716349373385631, "grad_norm": 0.4330447316169739, "learning_rate": 0.0005, "loss": 1.1559, "step": 14790 }, { "epoch": 0.471953825058197, "grad_norm": 0.4079609513282776, "learning_rate": 0.0005, "loss": 1.1519, "step": 14800 }, { "epoch": 0.47227271277783095, "grad_norm": 0.4308188557624817, "learning_rate": 0.0005, "loss": 1.1414, "step": 14810 }, { "epoch": 0.47259160049746485, "grad_norm": 0.44308769702911377, "learning_rate": 0.0005, "loss": 1.1535, "step": 14820 }, { "epoch": 0.47291048821709875, "grad_norm": 0.404365599155426, "learning_rate": 0.0005, "loss": 1.1549, "step": 14830 }, { "epoch": 0.4732293759367327, "grad_norm": 0.4012412428855896, "learning_rate": 0.0005, "loss": 1.1512, "step": 14840 }, { "epoch": 0.4735482636563666, "grad_norm": 0.3904338479042053, "learning_rate": 0.0005, "loss": 1.1456, "step": 14850 }, { "epoch": 0.4738671513760005, "grad_norm": 0.4106074571609497, "learning_rate": 0.0005, "loss": 1.1322, "step": 14860 }, { "epoch": 0.47418603909563445, "grad_norm": 0.41735175251960754, "learning_rate": 0.0005, "loss": 1.133, "step": 14870 }, { "epoch": 0.47450492681526835, "grad_norm": 0.40341079235076904, "learning_rate": 0.0005, "loss": 1.1376, "step": 14880 }, { "epoch": 0.47482381453490224, "grad_norm": 0.42312002182006836, "learning_rate": 0.0005, "loss": 1.1462, "step": 14890 }, { "epoch": 0.4751427022545362, "grad_norm": 0.4028998613357544, "learning_rate": 0.0005, "loss": 1.1457, "step": 14900 }, { "epoch": 0.4754615899741701, "grad_norm": 0.41534656286239624, "learning_rate": 0.0005, "loss": 1.1572, "step": 14910 }, { "epoch": 0.475780477693804, "grad_norm": 0.39137589931488037, "learning_rate": 0.0005, "loss": 1.1413, "step": 14920 }, { "epoch": 0.47609936541343795, "grad_norm": 0.3952145576477051, "learning_rate": 0.0005, "loss": 1.1538, "step": 14930 }, { "epoch": 0.47641825313307185, "grad_norm": 0.42215418815612793, "learning_rate": 0.0005, "loss": 1.158, "step": 14940 }, { "epoch": 0.47673714085270574, "grad_norm": 0.40796101093292236, "learning_rate": 0.0005, "loss": 1.1366, "step": 14950 }, { "epoch": 0.4770560285723397, "grad_norm": 0.387132465839386, "learning_rate": 0.0005, "loss": 1.1482, "step": 14960 }, { "epoch": 0.4773749162919736, "grad_norm": 0.4274901747703552, "learning_rate": 0.0005, "loss": 1.151, "step": 14970 }, { "epoch": 0.4776938040116075, "grad_norm": 0.4018956124782562, "learning_rate": 0.0005, "loss": 1.1365, "step": 14980 }, { "epoch": 0.47801269173124145, "grad_norm": 0.3979787528514862, "learning_rate": 0.0005, "loss": 1.1255, "step": 14990 }, { "epoch": 0.47833157945087534, "grad_norm": 0.43101415038108826, "learning_rate": 0.0005, "loss": 1.158, "step": 15000 }, { "epoch": 0.47865046717050924, "grad_norm": 0.4156745672225952, "learning_rate": 0.0005, "loss": 1.1584, "step": 15010 }, { "epoch": 0.4789693548901432, "grad_norm": 0.4341460168361664, "learning_rate": 0.0005, "loss": 1.1339, "step": 15020 }, { "epoch": 0.4792882426097771, "grad_norm": 0.4072680175304413, "learning_rate": 0.0005, "loss": 1.1539, "step": 15030 }, { "epoch": 0.479607130329411, "grad_norm": 0.3964672386646271, "learning_rate": 0.0005, "loss": 1.1333, "step": 15040 }, { "epoch": 0.47992601804904494, "grad_norm": 0.41430017352104187, "learning_rate": 0.0005, "loss": 1.1457, "step": 15050 }, { "epoch": 0.48024490576867884, "grad_norm": 0.3992692828178406, "learning_rate": 0.0005, "loss": 1.1347, "step": 15060 }, { "epoch": 0.48056379348831274, "grad_norm": 0.4212576448917389, "learning_rate": 0.0005, "loss": 1.1504, "step": 15070 }, { "epoch": 0.4808826812079467, "grad_norm": 0.4008728861808777, "learning_rate": 0.0005, "loss": 1.1461, "step": 15080 }, { "epoch": 0.4812015689275806, "grad_norm": 0.4172374904155731, "learning_rate": 0.0005, "loss": 1.1416, "step": 15090 }, { "epoch": 0.4815204566472145, "grad_norm": 0.4110826849937439, "learning_rate": 0.0005, "loss": 1.1534, "step": 15100 }, { "epoch": 0.48183934436684844, "grad_norm": 0.4187602698802948, "learning_rate": 0.0005, "loss": 1.1269, "step": 15110 }, { "epoch": 0.48215823208648234, "grad_norm": 0.39021268486976624, "learning_rate": 0.0005, "loss": 1.1219, "step": 15120 }, { "epoch": 0.48247711980611624, "grad_norm": 0.4110153317451477, "learning_rate": 0.0005, "loss": 1.1314, "step": 15130 }, { "epoch": 0.4827960075257502, "grad_norm": 0.4028952121734619, "learning_rate": 0.0005, "loss": 1.1363, "step": 15140 }, { "epoch": 0.4831148952453841, "grad_norm": 0.39289188385009766, "learning_rate": 0.0005, "loss": 1.1257, "step": 15150 }, { "epoch": 0.48343378296501804, "grad_norm": 0.4015601873397827, "learning_rate": 0.0005, "loss": 1.132, "step": 15160 }, { "epoch": 0.48375267068465194, "grad_norm": 0.4219626188278198, "learning_rate": 0.0005, "loss": 1.1365, "step": 15170 }, { "epoch": 0.48407155840428584, "grad_norm": 0.41227683424949646, "learning_rate": 0.0005, "loss": 1.1337, "step": 15180 }, { "epoch": 0.4843904461239198, "grad_norm": 0.4118657112121582, "learning_rate": 0.0005, "loss": 1.1277, "step": 15190 }, { "epoch": 0.4847093338435537, "grad_norm": 0.42577946186065674, "learning_rate": 0.0005, "loss": 1.1668, "step": 15200 }, { "epoch": 0.4850282215631876, "grad_norm": 0.39030152559280396, "learning_rate": 0.0005, "loss": 1.127, "step": 15210 }, { "epoch": 0.48534710928282154, "grad_norm": 0.3986493945121765, "learning_rate": 0.0005, "loss": 1.1388, "step": 15220 }, { "epoch": 0.48566599700245544, "grad_norm": 0.40662774443626404, "learning_rate": 0.0005, "loss": 1.1502, "step": 15230 }, { "epoch": 0.48598488472208934, "grad_norm": 0.4098156690597534, "learning_rate": 0.0005, "loss": 1.1467, "step": 15240 }, { "epoch": 0.4863037724417233, "grad_norm": 0.38814619183540344, "learning_rate": 0.0005, "loss": 1.132, "step": 15250 }, { "epoch": 0.4866226601613572, "grad_norm": 0.394285649061203, "learning_rate": 0.0005, "loss": 1.1384, "step": 15260 }, { "epoch": 0.4869415478809911, "grad_norm": 0.39842134714126587, "learning_rate": 0.0005, "loss": 1.1355, "step": 15270 }, { "epoch": 0.48726043560062504, "grad_norm": 0.3929298222064972, "learning_rate": 0.0005, "loss": 1.1205, "step": 15280 }, { "epoch": 0.48757932332025894, "grad_norm": 0.4007817804813385, "learning_rate": 0.0005, "loss": 1.1399, "step": 15290 }, { "epoch": 0.48789821103989284, "grad_norm": 0.39471328258514404, "learning_rate": 0.0005, "loss": 1.1271, "step": 15300 }, { "epoch": 0.4882170987595268, "grad_norm": 0.4145445227622986, "learning_rate": 0.0005, "loss": 1.122, "step": 15310 }, { "epoch": 0.4885359864791607, "grad_norm": 0.4210784137248993, "learning_rate": 0.0005, "loss": 1.1379, "step": 15320 }, { "epoch": 0.4888548741987946, "grad_norm": 0.4189470708370209, "learning_rate": 0.0005, "loss": 1.1394, "step": 15330 }, { "epoch": 0.48917376191842854, "grad_norm": 0.41203248500823975, "learning_rate": 0.0005, "loss": 1.1239, "step": 15340 }, { "epoch": 0.48949264963806244, "grad_norm": 0.4104352593421936, "learning_rate": 0.0005, "loss": 1.1285, "step": 15350 }, { "epoch": 0.48981153735769634, "grad_norm": 0.39657625555992126, "learning_rate": 0.0005, "loss": 1.1247, "step": 15360 }, { "epoch": 0.4901304250773303, "grad_norm": 0.4017868936061859, "learning_rate": 0.0005, "loss": 1.1461, "step": 15370 }, { "epoch": 0.4904493127969642, "grad_norm": 0.4098726511001587, "learning_rate": 0.0005, "loss": 1.141, "step": 15380 }, { "epoch": 0.4907682005165981, "grad_norm": 0.4074094891548157, "learning_rate": 0.0005, "loss": 1.1486, "step": 15390 }, { "epoch": 0.49108708823623204, "grad_norm": 0.42989563941955566, "learning_rate": 0.0005, "loss": 1.1252, "step": 15400 }, { "epoch": 0.49140597595586594, "grad_norm": 0.41132497787475586, "learning_rate": 0.0005, "loss": 1.1439, "step": 15410 }, { "epoch": 0.49172486367549983, "grad_norm": 0.41117823123931885, "learning_rate": 0.0005, "loss": 1.145, "step": 15420 }, { "epoch": 0.4920437513951338, "grad_norm": 0.4057653844356537, "learning_rate": 0.0005, "loss": 1.1377, "step": 15430 }, { "epoch": 0.4923626391147677, "grad_norm": 0.39292091131210327, "learning_rate": 0.0005, "loss": 1.1527, "step": 15440 }, { "epoch": 0.4926815268344016, "grad_norm": 0.41839951276779175, "learning_rate": 0.0005, "loss": 1.1633, "step": 15450 }, { "epoch": 0.49300041455403554, "grad_norm": 0.4018307030200958, "learning_rate": 0.0005, "loss": 1.1584, "step": 15460 }, { "epoch": 0.49331930227366944, "grad_norm": 0.42629310488700867, "learning_rate": 0.0005, "loss": 1.1557, "step": 15470 }, { "epoch": 0.49363818999330333, "grad_norm": 0.4066459536552429, "learning_rate": 0.0005, "loss": 1.1268, "step": 15480 }, { "epoch": 0.4939570777129373, "grad_norm": 0.4150345027446747, "learning_rate": 0.0005, "loss": 1.1552, "step": 15490 }, { "epoch": 0.4942759654325712, "grad_norm": 0.4128059446811676, "learning_rate": 0.0005, "loss": 1.1308, "step": 15500 }, { "epoch": 0.4945948531522051, "grad_norm": 0.4207080006599426, "learning_rate": 0.0005, "loss": 1.1431, "step": 15510 }, { "epoch": 0.49491374087183904, "grad_norm": 0.39305001497268677, "learning_rate": 0.0005, "loss": 1.1484, "step": 15520 }, { "epoch": 0.49523262859147293, "grad_norm": 0.40596818923950195, "learning_rate": 0.0005, "loss": 1.1378, "step": 15530 }, { "epoch": 0.49555151631110683, "grad_norm": 0.3958894610404968, "learning_rate": 0.0005, "loss": 1.1372, "step": 15540 }, { "epoch": 0.4958704040307408, "grad_norm": 0.39964666962623596, "learning_rate": 0.0005, "loss": 1.1452, "step": 15550 }, { "epoch": 0.4961892917503747, "grad_norm": 0.3986499607563019, "learning_rate": 0.0005, "loss": 1.1357, "step": 15560 }, { "epoch": 0.49650817947000864, "grad_norm": 0.38713058829307556, "learning_rate": 0.0005, "loss": 1.146, "step": 15570 }, { "epoch": 0.49682706718964254, "grad_norm": 0.4117546081542969, "learning_rate": 0.0005, "loss": 1.1424, "step": 15580 }, { "epoch": 0.49714595490927643, "grad_norm": 0.41599640250205994, "learning_rate": 0.0005, "loss": 1.1458, "step": 15590 }, { "epoch": 0.4974648426289104, "grad_norm": 0.4060603678226471, "learning_rate": 0.0005, "loss": 1.1298, "step": 15600 }, { "epoch": 0.4977837303485443, "grad_norm": 0.40494588017463684, "learning_rate": 0.0005, "loss": 1.1315, "step": 15610 }, { "epoch": 0.4981026180681782, "grad_norm": 0.39388027787208557, "learning_rate": 0.0005, "loss": 1.1406, "step": 15620 }, { "epoch": 0.49842150578781214, "grad_norm": 0.4042576849460602, "learning_rate": 0.0005, "loss": 1.1253, "step": 15630 }, { "epoch": 0.49874039350744603, "grad_norm": 0.40436872839927673, "learning_rate": 0.0005, "loss": 1.1388, "step": 15640 }, { "epoch": 0.49905928122707993, "grad_norm": 0.40439584851264954, "learning_rate": 0.0005, "loss": 1.139, "step": 15650 }, { "epoch": 0.4993781689467139, "grad_norm": 0.3996795117855072, "learning_rate": 0.0005, "loss": 1.1563, "step": 15660 }, { "epoch": 0.4996970566663478, "grad_norm": 0.415203720331192, "learning_rate": 0.0005, "loss": 1.1362, "step": 15670 }, { "epoch": 0.5000159443859817, "grad_norm": 0.40682661533355713, "learning_rate": 0.0005, "loss": 1.1387, "step": 15680 }, { "epoch": 0.5003348321056156, "grad_norm": 0.38640740513801575, "learning_rate": 0.0005, "loss": 1.1269, "step": 15690 }, { "epoch": 0.5006537198252495, "grad_norm": 0.4006939232349396, "learning_rate": 0.0005, "loss": 1.1346, "step": 15700 }, { "epoch": 0.5009726075448835, "grad_norm": 0.3977952003479004, "learning_rate": 0.0005, "loss": 1.1391, "step": 15710 }, { "epoch": 0.5012914952645173, "grad_norm": 0.3980969786643982, "learning_rate": 0.0005, "loss": 1.1486, "step": 15720 }, { "epoch": 0.5016103829841513, "grad_norm": 0.4002665877342224, "learning_rate": 0.0005, "loss": 1.1279, "step": 15730 }, { "epoch": 0.5019292707037852, "grad_norm": 0.39593833684921265, "learning_rate": 0.0005, "loss": 1.1584, "step": 15740 }, { "epoch": 0.5022481584234191, "grad_norm": 0.3968457877635956, "learning_rate": 0.0005, "loss": 1.1389, "step": 15750 }, { "epoch": 0.502567046143053, "grad_norm": 0.4009703993797302, "learning_rate": 0.0005, "loss": 1.1151, "step": 15760 }, { "epoch": 0.502885933862687, "grad_norm": 0.40247270464897156, "learning_rate": 0.0005, "loss": 1.1419, "step": 15770 }, { "epoch": 0.5032048215823208, "grad_norm": 0.4238397181034088, "learning_rate": 0.0005, "loss": 1.1362, "step": 15780 }, { "epoch": 0.5035237093019548, "grad_norm": 0.3963928520679474, "learning_rate": 0.0005, "loss": 1.1311, "step": 15790 }, { "epoch": 0.5038425970215887, "grad_norm": 0.38498803973197937, "learning_rate": 0.0005, "loss": 1.1301, "step": 15800 }, { "epoch": 0.5041614847412226, "grad_norm": 0.40925782918930054, "learning_rate": 0.0005, "loss": 1.1447, "step": 15810 }, { "epoch": 0.5044803724608565, "grad_norm": 0.3941743075847626, "learning_rate": 0.0005, "loss": 1.1325, "step": 15820 }, { "epoch": 0.5047992601804905, "grad_norm": 0.3878701627254486, "learning_rate": 0.0005, "loss": 1.1247, "step": 15830 }, { "epoch": 0.5051181479001243, "grad_norm": 0.4078117609024048, "learning_rate": 0.0005, "loss": 1.1386, "step": 15840 }, { "epoch": 0.5054370356197583, "grad_norm": 0.40194058418273926, "learning_rate": 0.0005, "loss": 1.1454, "step": 15850 }, { "epoch": 0.5057559233393922, "grad_norm": 0.3996395170688629, "learning_rate": 0.0005, "loss": 1.1478, "step": 15860 }, { "epoch": 0.5060748110590261, "grad_norm": 0.41087689995765686, "learning_rate": 0.0005, "loss": 1.126, "step": 15870 }, { "epoch": 0.50639369877866, "grad_norm": 0.3939761817455292, "learning_rate": 0.0005, "loss": 1.1361, "step": 15880 }, { "epoch": 0.506712586498294, "grad_norm": 0.4000106453895569, "learning_rate": 0.0005, "loss": 1.1259, "step": 15890 }, { "epoch": 0.5070314742179278, "grad_norm": 0.4082937240600586, "learning_rate": 0.0005, "loss": 1.1292, "step": 15900 }, { "epoch": 0.5073503619375618, "grad_norm": 0.3926817774772644, "learning_rate": 0.0005, "loss": 1.1273, "step": 15910 }, { "epoch": 0.5076692496571957, "grad_norm": 0.41120755672454834, "learning_rate": 0.0005, "loss": 1.1279, "step": 15920 }, { "epoch": 0.5079881373768296, "grad_norm": 0.3957878351211548, "learning_rate": 0.0005, "loss": 1.1347, "step": 15930 }, { "epoch": 0.5083070250964635, "grad_norm": 0.37787875533103943, "learning_rate": 0.0005, "loss": 1.1279, "step": 15940 }, { "epoch": 0.5086259128160975, "grad_norm": 0.40983471274375916, "learning_rate": 0.0005, "loss": 1.1298, "step": 15950 }, { "epoch": 0.5089448005357313, "grad_norm": 0.417005717754364, "learning_rate": 0.0005, "loss": 1.1374, "step": 15960 }, { "epoch": 0.5092636882553653, "grad_norm": 0.4130145013332367, "learning_rate": 0.0005, "loss": 1.1416, "step": 15970 }, { "epoch": 0.5095825759749992, "grad_norm": 0.389676034450531, "learning_rate": 0.0005, "loss": 1.131, "step": 15980 }, { "epoch": 0.5099014636946331, "grad_norm": 0.4049873352050781, "learning_rate": 0.0005, "loss": 1.1198, "step": 15990 }, { "epoch": 0.510220351414267, "grad_norm": 0.40844064950942993, "learning_rate": 0.0005, "loss": 1.1209, "step": 16000 }, { "epoch": 0.510539239133901, "grad_norm": 0.4057811498641968, "learning_rate": 0.0005, "loss": 1.1303, "step": 16010 }, { "epoch": 0.5108581268535348, "grad_norm": 0.39924463629722595, "learning_rate": 0.0005, "loss": 1.1399, "step": 16020 }, { "epoch": 0.5111770145731688, "grad_norm": 0.39572224020957947, "learning_rate": 0.0005, "loss": 1.1104, "step": 16030 }, { "epoch": 0.5114959022928027, "grad_norm": 0.42480066418647766, "learning_rate": 0.0005, "loss": 1.1508, "step": 16040 }, { "epoch": 0.5118147900124366, "grad_norm": 0.40182217955589294, "learning_rate": 0.0005, "loss": 1.1243, "step": 16050 }, { "epoch": 0.5121336777320705, "grad_norm": 0.41160985827445984, "learning_rate": 0.0005, "loss": 1.1514, "step": 16060 }, { "epoch": 0.5124525654517045, "grad_norm": 0.40019845962524414, "learning_rate": 0.0005, "loss": 1.1029, "step": 16070 }, { "epoch": 0.5127714531713383, "grad_norm": 0.40229541063308716, "learning_rate": 0.0005, "loss": 1.1206, "step": 16080 }, { "epoch": 0.5130903408909723, "grad_norm": 0.39097946882247925, "learning_rate": 0.0005, "loss": 1.1369, "step": 16090 }, { "epoch": 0.5134092286106062, "grad_norm": 0.4011414051055908, "learning_rate": 0.0005, "loss": 1.1363, "step": 16100 }, { "epoch": 0.5137281163302401, "grad_norm": 0.3968360424041748, "learning_rate": 0.0005, "loss": 1.1295, "step": 16110 }, { "epoch": 0.514047004049874, "grad_norm": 0.39342719316482544, "learning_rate": 0.0005, "loss": 1.1258, "step": 16120 }, { "epoch": 0.514365891769508, "grad_norm": 0.38369807600975037, "learning_rate": 0.0005, "loss": 1.1257, "step": 16130 }, { "epoch": 0.5146847794891418, "grad_norm": 0.38821354508399963, "learning_rate": 0.0005, "loss": 1.1276, "step": 16140 }, { "epoch": 0.5150036672087758, "grad_norm": 0.41388481855392456, "learning_rate": 0.0005, "loss": 1.1349, "step": 16150 }, { "epoch": 0.5153225549284097, "grad_norm": 0.40181416273117065, "learning_rate": 0.0005, "loss": 1.1299, "step": 16160 }, { "epoch": 0.5156414426480436, "grad_norm": 0.4062349498271942, "learning_rate": 0.0005, "loss": 1.1163, "step": 16170 }, { "epoch": 0.5159603303676775, "grad_norm": 0.4037853181362152, "learning_rate": 0.0005, "loss": 1.1189, "step": 16180 }, { "epoch": 0.5162792180873115, "grad_norm": 0.3858471214771271, "learning_rate": 0.0005, "loss": 1.123, "step": 16190 }, { "epoch": 0.5165981058069454, "grad_norm": 0.3916098475456238, "learning_rate": 0.0005, "loss": 1.1359, "step": 16200 }, { "epoch": 0.5169169935265793, "grad_norm": 0.40132418274879456, "learning_rate": 0.0005, "loss": 1.1262, "step": 16210 }, { "epoch": 0.5172358812462132, "grad_norm": 0.3784230351448059, "learning_rate": 0.0005, "loss": 1.1295, "step": 16220 }, { "epoch": 0.5175547689658472, "grad_norm": 0.4010845720767975, "learning_rate": 0.0005, "loss": 1.1272, "step": 16230 }, { "epoch": 0.517873656685481, "grad_norm": 0.4017618000507355, "learning_rate": 0.0005, "loss": 1.128, "step": 16240 }, { "epoch": 0.518192544405115, "grad_norm": 0.3865583539009094, "learning_rate": 0.0005, "loss": 1.1378, "step": 16250 }, { "epoch": 0.5185114321247489, "grad_norm": 0.38878291845321655, "learning_rate": 0.0005, "loss": 1.1296, "step": 16260 }, { "epoch": 0.5188303198443828, "grad_norm": 0.3995644152164459, "learning_rate": 0.0005, "loss": 1.1167, "step": 16270 }, { "epoch": 0.5191492075640167, "grad_norm": 0.3930690586566925, "learning_rate": 0.0005, "loss": 1.1164, "step": 16280 }, { "epoch": 0.5194680952836507, "grad_norm": 0.4176512658596039, "learning_rate": 0.0005, "loss": 1.1214, "step": 16290 }, { "epoch": 0.5197869830032845, "grad_norm": 0.37958794832229614, "learning_rate": 0.0005, "loss": 1.1471, "step": 16300 }, { "epoch": 0.5201058707229185, "grad_norm": 0.3942332863807678, "learning_rate": 0.0005, "loss": 1.1451, "step": 16310 }, { "epoch": 0.5204247584425524, "grad_norm": 0.3994196355342865, "learning_rate": 0.0005, "loss": 1.125, "step": 16320 }, { "epoch": 0.5207436461621863, "grad_norm": 0.40982067584991455, "learning_rate": 0.0005, "loss": 1.1321, "step": 16330 }, { "epoch": 0.5210625338818202, "grad_norm": 0.4163874387741089, "learning_rate": 0.0005, "loss": 1.1433, "step": 16340 }, { "epoch": 0.5213814216014542, "grad_norm": 0.4115857183933258, "learning_rate": 0.0005, "loss": 1.1186, "step": 16350 }, { "epoch": 0.521700309321088, "grad_norm": 0.3939268887042999, "learning_rate": 0.0005, "loss": 1.1093, "step": 16360 }, { "epoch": 0.522019197040722, "grad_norm": 0.4047539234161377, "learning_rate": 0.0005, "loss": 1.1203, "step": 16370 }, { "epoch": 0.5223380847603559, "grad_norm": 0.4223722815513611, "learning_rate": 0.0005, "loss": 1.1159, "step": 16380 }, { "epoch": 0.5226569724799898, "grad_norm": 0.3986281454563141, "learning_rate": 0.0005, "loss": 1.1577, "step": 16390 }, { "epoch": 0.5229758601996237, "grad_norm": 0.40140584111213684, "learning_rate": 0.0005, "loss": 1.1186, "step": 16400 }, { "epoch": 0.5232947479192577, "grad_norm": 0.41486597061157227, "learning_rate": 0.0005, "loss": 1.1207, "step": 16410 }, { "epoch": 0.5236136356388915, "grad_norm": 0.39416739344596863, "learning_rate": 0.0005, "loss": 1.1158, "step": 16420 }, { "epoch": 0.5239325233585255, "grad_norm": 0.39388272166252136, "learning_rate": 0.0005, "loss": 1.1466, "step": 16430 }, { "epoch": 0.5242514110781594, "grad_norm": 0.40144652128219604, "learning_rate": 0.0005, "loss": 1.1374, "step": 16440 }, { "epoch": 0.5245702987977933, "grad_norm": 0.4018017053604126, "learning_rate": 0.0005, "loss": 1.1444, "step": 16450 }, { "epoch": 0.5248891865174272, "grad_norm": 0.40438687801361084, "learning_rate": 0.0005, "loss": 1.1275, "step": 16460 }, { "epoch": 0.5252080742370612, "grad_norm": 0.4010547697544098, "learning_rate": 0.0005, "loss": 1.1383, "step": 16470 }, { "epoch": 0.525526961956695, "grad_norm": 0.41225293278694153, "learning_rate": 0.0005, "loss": 1.1536, "step": 16480 }, { "epoch": 0.525845849676329, "grad_norm": 0.39326658844947815, "learning_rate": 0.0005, "loss": 1.1366, "step": 16490 }, { "epoch": 0.5261647373959629, "grad_norm": 0.3904922306537628, "learning_rate": 0.0005, "loss": 1.1103, "step": 16500 }, { "epoch": 0.5264836251155968, "grad_norm": 0.3908495008945465, "learning_rate": 0.0005, "loss": 1.1172, "step": 16510 }, { "epoch": 0.5268025128352307, "grad_norm": 0.412609726190567, "learning_rate": 0.0005, "loss": 1.1317, "step": 16520 }, { "epoch": 0.5271214005548647, "grad_norm": 0.38488397002220154, "learning_rate": 0.0005, "loss": 1.1207, "step": 16530 }, { "epoch": 0.5274402882744985, "grad_norm": 0.40790820121765137, "learning_rate": 0.0005, "loss": 1.1597, "step": 16540 }, { "epoch": 0.5277591759941325, "grad_norm": 0.4010877013206482, "learning_rate": 0.0005, "loss": 1.1119, "step": 16550 }, { "epoch": 0.5280780637137664, "grad_norm": 0.39443594217300415, "learning_rate": 0.0005, "loss": 1.1303, "step": 16560 }, { "epoch": 0.5283969514334003, "grad_norm": 0.41272708773612976, "learning_rate": 0.0005, "loss": 1.1182, "step": 16570 }, { "epoch": 0.5287158391530342, "grad_norm": 0.40090036392211914, "learning_rate": 0.0005, "loss": 1.1059, "step": 16580 }, { "epoch": 0.5290347268726682, "grad_norm": 0.3933262526988983, "learning_rate": 0.0005, "loss": 1.1184, "step": 16590 }, { "epoch": 0.529353614592302, "grad_norm": 0.3988369405269623, "learning_rate": 0.0005, "loss": 1.1239, "step": 16600 }, { "epoch": 0.529672502311936, "grad_norm": 0.3974078595638275, "learning_rate": 0.0005, "loss": 1.1132, "step": 16610 }, { "epoch": 0.5299913900315699, "grad_norm": 0.39606592059135437, "learning_rate": 0.0005, "loss": 1.1491, "step": 16620 }, { "epoch": 0.5303102777512038, "grad_norm": 0.39793723821640015, "learning_rate": 0.0005, "loss": 1.1312, "step": 16630 }, { "epoch": 0.5306291654708377, "grad_norm": 0.38791102170944214, "learning_rate": 0.0005, "loss": 1.1274, "step": 16640 }, { "epoch": 0.5309480531904717, "grad_norm": 0.4097859263420105, "learning_rate": 0.0005, "loss": 1.1315, "step": 16650 }, { "epoch": 0.5312669409101055, "grad_norm": 0.3862121105194092, "learning_rate": 0.0005, "loss": 1.1332, "step": 16660 }, { "epoch": 0.5315858286297395, "grad_norm": 0.3790407180786133, "learning_rate": 0.0005, "loss": 1.1102, "step": 16670 }, { "epoch": 0.5319047163493734, "grad_norm": 0.39420583844184875, "learning_rate": 0.0005, "loss": 1.1593, "step": 16680 }, { "epoch": 0.5322236040690073, "grad_norm": 0.38086998462677, "learning_rate": 0.0005, "loss": 1.1187, "step": 16690 }, { "epoch": 0.5325424917886412, "grad_norm": 0.3970024287700653, "learning_rate": 0.0005, "loss": 1.1196, "step": 16700 }, { "epoch": 0.5328613795082752, "grad_norm": 0.40243950486183167, "learning_rate": 0.0005, "loss": 1.1154, "step": 16710 }, { "epoch": 0.533180267227909, "grad_norm": 0.392729789018631, "learning_rate": 0.0005, "loss": 1.14, "step": 16720 }, { "epoch": 0.533499154947543, "grad_norm": 0.4108237028121948, "learning_rate": 0.0005, "loss": 1.1352, "step": 16730 }, { "epoch": 0.5338180426671769, "grad_norm": 0.3943747580051422, "learning_rate": 0.0005, "loss": 1.1215, "step": 16740 }, { "epoch": 0.5341369303868108, "grad_norm": 0.4041931927204132, "learning_rate": 0.0005, "loss": 1.1134, "step": 16750 }, { "epoch": 0.5344558181064447, "grad_norm": 0.3883357346057892, "learning_rate": 0.0005, "loss": 1.1344, "step": 16760 }, { "epoch": 0.5347747058260787, "grad_norm": 0.38386496901512146, "learning_rate": 0.0005, "loss": 1.1285, "step": 16770 }, { "epoch": 0.5350935935457125, "grad_norm": 0.3961685597896576, "learning_rate": 0.0005, "loss": 1.1328, "step": 16780 }, { "epoch": 0.5354124812653465, "grad_norm": 0.4196777939796448, "learning_rate": 0.0005, "loss": 1.1319, "step": 16790 }, { "epoch": 0.5357313689849804, "grad_norm": 0.39921364188194275, "learning_rate": 0.0005, "loss": 1.1217, "step": 16800 }, { "epoch": 0.5360502567046143, "grad_norm": 0.40605226159095764, "learning_rate": 0.0005, "loss": 1.1503, "step": 16810 }, { "epoch": 0.5363691444242482, "grad_norm": 0.4120405614376068, "learning_rate": 0.0005, "loss": 1.1284, "step": 16820 }, { "epoch": 0.5366880321438822, "grad_norm": 0.3855202794075012, "learning_rate": 0.0005, "loss": 1.1233, "step": 16830 }, { "epoch": 0.537006919863516, "grad_norm": 0.4094316065311432, "learning_rate": 0.0005, "loss": 1.1495, "step": 16840 }, { "epoch": 0.53732580758315, "grad_norm": 0.395519495010376, "learning_rate": 0.0005, "loss": 1.131, "step": 16850 }, { "epoch": 0.5376446953027839, "grad_norm": 0.3976709544658661, "learning_rate": 0.0005, "loss": 1.1316, "step": 16860 }, { "epoch": 0.5379635830224178, "grad_norm": 0.38913607597351074, "learning_rate": 0.0005, "loss": 1.1177, "step": 16870 }, { "epoch": 0.5382824707420517, "grad_norm": 0.3925860524177551, "learning_rate": 0.0005, "loss": 1.1052, "step": 16880 }, { "epoch": 0.5386013584616857, "grad_norm": 0.384029358625412, "learning_rate": 0.0005, "loss": 1.1369, "step": 16890 }, { "epoch": 0.5389202461813195, "grad_norm": 0.3836941123008728, "learning_rate": 0.0005, "loss": 1.1342, "step": 16900 }, { "epoch": 0.5392391339009535, "grad_norm": 0.39144232869148254, "learning_rate": 0.0005, "loss": 1.112, "step": 16910 }, { "epoch": 0.5395580216205874, "grad_norm": 0.3911305069923401, "learning_rate": 0.0005, "loss": 1.14, "step": 16920 }, { "epoch": 0.5398769093402213, "grad_norm": 0.39506804943084717, "learning_rate": 0.0005, "loss": 1.1223, "step": 16930 }, { "epoch": 0.5401957970598552, "grad_norm": 0.39567089080810547, "learning_rate": 0.0005, "loss": 1.1295, "step": 16940 }, { "epoch": 0.5405146847794892, "grad_norm": 0.40075957775115967, "learning_rate": 0.0005, "loss": 1.119, "step": 16950 }, { "epoch": 0.540833572499123, "grad_norm": 0.42073139548301697, "learning_rate": 0.0005, "loss": 1.1242, "step": 16960 }, { "epoch": 0.541152460218757, "grad_norm": 0.38740274310112, "learning_rate": 0.0005, "loss": 1.124, "step": 16970 }, { "epoch": 0.5414713479383909, "grad_norm": 0.39646822214126587, "learning_rate": 0.0005, "loss": 1.1167, "step": 16980 }, { "epoch": 0.5417902356580248, "grad_norm": 0.3964032232761383, "learning_rate": 0.0005, "loss": 1.1422, "step": 16990 }, { "epoch": 0.5421091233776587, "grad_norm": 0.38478758931159973, "learning_rate": 0.0005, "loss": 1.1057, "step": 17000 }, { "epoch": 0.5424280110972927, "grad_norm": 0.3826146423816681, "learning_rate": 0.0005, "loss": 1.1097, "step": 17010 }, { "epoch": 0.5427468988169266, "grad_norm": 0.3929329514503479, "learning_rate": 0.0005, "loss": 1.0978, "step": 17020 }, { "epoch": 0.5430657865365605, "grad_norm": 0.3981013000011444, "learning_rate": 0.0005, "loss": 1.1299, "step": 17030 }, { "epoch": 0.5433846742561944, "grad_norm": 0.3714570701122284, "learning_rate": 0.0005, "loss": 1.1061, "step": 17040 }, { "epoch": 0.5437035619758284, "grad_norm": 0.38850119709968567, "learning_rate": 0.0005, "loss": 1.1096, "step": 17050 }, { "epoch": 0.5440224496954622, "grad_norm": 0.3953388035297394, "learning_rate": 0.0005, "loss": 1.1347, "step": 17060 }, { "epoch": 0.5443413374150962, "grad_norm": 0.3949526846408844, "learning_rate": 0.0005, "loss": 1.1354, "step": 17070 }, { "epoch": 0.5446602251347301, "grad_norm": 0.39056915044784546, "learning_rate": 0.0005, "loss": 1.1398, "step": 17080 }, { "epoch": 0.544979112854364, "grad_norm": 0.3963756561279297, "learning_rate": 0.0005, "loss": 1.1321, "step": 17090 }, { "epoch": 0.5452980005739979, "grad_norm": 0.39296120405197144, "learning_rate": 0.0005, "loss": 1.118, "step": 17100 }, { "epoch": 0.5456168882936319, "grad_norm": 0.39938679337501526, "learning_rate": 0.0005, "loss": 1.1324, "step": 17110 }, { "epoch": 0.5459357760132657, "grad_norm": 0.4136159121990204, "learning_rate": 0.0005, "loss": 1.1346, "step": 17120 }, { "epoch": 0.5462546637328997, "grad_norm": 0.39979586005210876, "learning_rate": 0.0005, "loss": 1.1239, "step": 17130 }, { "epoch": 0.5465735514525336, "grad_norm": 0.3993443548679352, "learning_rate": 0.0005, "loss": 1.1163, "step": 17140 }, { "epoch": 0.5468924391721675, "grad_norm": 0.3846668601036072, "learning_rate": 0.0005, "loss": 1.1193, "step": 17150 }, { "epoch": 0.5472113268918014, "grad_norm": 0.3863378167152405, "learning_rate": 0.0005, "loss": 1.1212, "step": 17160 }, { "epoch": 0.5475302146114354, "grad_norm": 0.3799877166748047, "learning_rate": 0.0005, "loss": 1.1148, "step": 17170 }, { "epoch": 0.5478491023310692, "grad_norm": 0.40554550290107727, "learning_rate": 0.0005, "loss": 1.1378, "step": 17180 }, { "epoch": 0.5481679900507032, "grad_norm": 0.3773590922355652, "learning_rate": 0.0005, "loss": 1.1133, "step": 17190 }, { "epoch": 0.5484868777703371, "grad_norm": 0.39433354139328003, "learning_rate": 0.0005, "loss": 1.1343, "step": 17200 }, { "epoch": 0.548805765489971, "grad_norm": 0.3862454891204834, "learning_rate": 0.0005, "loss": 1.1232, "step": 17210 }, { "epoch": 0.5491246532096049, "grad_norm": 0.38670477271080017, "learning_rate": 0.0005, "loss": 1.1136, "step": 17220 }, { "epoch": 0.5494435409292389, "grad_norm": 0.39055192470550537, "learning_rate": 0.0005, "loss": 1.1129, "step": 17230 }, { "epoch": 0.5497624286488727, "grad_norm": 0.40744680166244507, "learning_rate": 0.0005, "loss": 1.13, "step": 17240 }, { "epoch": 0.5500813163685067, "grad_norm": 0.38350918889045715, "learning_rate": 0.0005, "loss": 1.1296, "step": 17250 }, { "epoch": 0.5504002040881406, "grad_norm": 0.4025372564792633, "learning_rate": 0.0005, "loss": 1.1229, "step": 17260 }, { "epoch": 0.5507190918077745, "grad_norm": 0.3817196190357208, "learning_rate": 0.0005, "loss": 1.1481, "step": 17270 }, { "epoch": 0.5510379795274084, "grad_norm": 0.37860438227653503, "learning_rate": 0.0005, "loss": 1.1058, "step": 17280 }, { "epoch": 0.5513568672470424, "grad_norm": 0.41163647174835205, "learning_rate": 0.0005, "loss": 1.1423, "step": 17290 }, { "epoch": 0.5516757549666762, "grad_norm": 0.389542818069458, "learning_rate": 0.0005, "loss": 1.1164, "step": 17300 }, { "epoch": 0.5519946426863102, "grad_norm": 0.39946138858795166, "learning_rate": 0.0005, "loss": 1.1228, "step": 17310 }, { "epoch": 0.5523135304059441, "grad_norm": 0.3970085084438324, "learning_rate": 0.0005, "loss": 1.1236, "step": 17320 }, { "epoch": 0.552632418125578, "grad_norm": 0.3923346996307373, "learning_rate": 0.0005, "loss": 1.1115, "step": 17330 }, { "epoch": 0.5529513058452119, "grad_norm": 0.3953569531440735, "learning_rate": 0.0005, "loss": 1.1243, "step": 17340 }, { "epoch": 0.5532701935648459, "grad_norm": 0.4045715034008026, "learning_rate": 0.0005, "loss": 1.1226, "step": 17350 }, { "epoch": 0.5535890812844797, "grad_norm": 0.38618502020835876, "learning_rate": 0.0005, "loss": 1.1002, "step": 17360 }, { "epoch": 0.5539079690041137, "grad_norm": 0.38182103633880615, "learning_rate": 0.0005, "loss": 1.1163, "step": 17370 }, { "epoch": 0.5542268567237476, "grad_norm": 0.3986895978450775, "learning_rate": 0.0005, "loss": 1.1312, "step": 17380 }, { "epoch": 0.5545457444433814, "grad_norm": 0.4195130169391632, "learning_rate": 0.0005, "loss": 1.1188, "step": 17390 }, { "epoch": 0.5548646321630154, "grad_norm": 0.3816862404346466, "learning_rate": 0.0005, "loss": 1.1481, "step": 17400 }, { "epoch": 0.5551835198826494, "grad_norm": 0.3993696868419647, "learning_rate": 0.0005, "loss": 1.1273, "step": 17410 }, { "epoch": 0.5555024076022832, "grad_norm": 0.38697922229766846, "learning_rate": 0.0005, "loss": 1.1092, "step": 17420 }, { "epoch": 0.5558212953219172, "grad_norm": 0.40851518511772156, "learning_rate": 0.0005, "loss": 1.1213, "step": 17430 }, { "epoch": 0.5561401830415511, "grad_norm": 0.3991389870643616, "learning_rate": 0.0005, "loss": 1.1257, "step": 17440 }, { "epoch": 0.556459070761185, "grad_norm": 0.392604798078537, "learning_rate": 0.0005, "loss": 1.1252, "step": 17450 }, { "epoch": 0.5567779584808189, "grad_norm": 0.3909078538417816, "learning_rate": 0.0005, "loss": 1.1249, "step": 17460 }, { "epoch": 0.5570968462004529, "grad_norm": 0.3915567100048065, "learning_rate": 0.0005, "loss": 1.1257, "step": 17470 }, { "epoch": 0.5574157339200867, "grad_norm": 0.38957881927490234, "learning_rate": 0.0005, "loss": 1.1027, "step": 17480 }, { "epoch": 0.5577346216397207, "grad_norm": 0.38382086157798767, "learning_rate": 0.0005, "loss": 1.1296, "step": 17490 }, { "epoch": 0.5580535093593546, "grad_norm": 0.39388954639434814, "learning_rate": 0.0005, "loss": 1.1249, "step": 17500 }, { "epoch": 0.5583723970789884, "grad_norm": 0.395594984292984, "learning_rate": 0.0005, "loss": 1.1243, "step": 17510 }, { "epoch": 0.5586912847986224, "grad_norm": 0.4204639792442322, "learning_rate": 0.0005, "loss": 1.1221, "step": 17520 }, { "epoch": 0.5590101725182564, "grad_norm": 0.41280484199523926, "learning_rate": 0.0005, "loss": 1.1094, "step": 17530 }, { "epoch": 0.5593290602378902, "grad_norm": 0.37822672724723816, "learning_rate": 0.0005, "loss": 1.1089, "step": 17540 }, { "epoch": 0.5596479479575242, "grad_norm": 0.38607358932495117, "learning_rate": 0.0005, "loss": 1.1461, "step": 17550 }, { "epoch": 0.5599668356771581, "grad_norm": 0.3974449038505554, "learning_rate": 0.0005, "loss": 1.1057, "step": 17560 }, { "epoch": 0.560285723396792, "grad_norm": 0.39886006712913513, "learning_rate": 0.0005, "loss": 1.1197, "step": 17570 }, { "epoch": 0.5606046111164259, "grad_norm": 0.4036238193511963, "learning_rate": 0.0005, "loss": 1.1223, "step": 17580 }, { "epoch": 0.5609234988360599, "grad_norm": 0.40054237842559814, "learning_rate": 0.0005, "loss": 1.1176, "step": 17590 }, { "epoch": 0.5612423865556937, "grad_norm": 0.38854557275772095, "learning_rate": 0.0005, "loss": 1.1162, "step": 17600 }, { "epoch": 0.5615612742753276, "grad_norm": 0.38694027066230774, "learning_rate": 0.0005, "loss": 1.121, "step": 17610 }, { "epoch": 0.5618801619949616, "grad_norm": 0.37990602850914, "learning_rate": 0.0005, "loss": 1.1177, "step": 17620 }, { "epoch": 0.5621990497145954, "grad_norm": 0.39579102396965027, "learning_rate": 0.0005, "loss": 1.1269, "step": 17630 }, { "epoch": 0.5625179374342294, "grad_norm": 0.39988967776298523, "learning_rate": 0.0005, "loss": 1.1112, "step": 17640 }, { "epoch": 0.5628368251538634, "grad_norm": 0.3899117410182953, "learning_rate": 0.0005, "loss": 1.0975, "step": 17650 }, { "epoch": 0.5631557128734972, "grad_norm": 0.391559362411499, "learning_rate": 0.0005, "loss": 1.1202, "step": 17660 }, { "epoch": 0.5634746005931311, "grad_norm": 0.3928084075450897, "learning_rate": 0.0005, "loss": 1.1191, "step": 17670 }, { "epoch": 0.5637934883127651, "grad_norm": 0.4055452048778534, "learning_rate": 0.0005, "loss": 1.114, "step": 17680 }, { "epoch": 0.5641123760323989, "grad_norm": 0.39144572615623474, "learning_rate": 0.0005, "loss": 1.1277, "step": 17690 }, { "epoch": 0.5644312637520329, "grad_norm": 0.3878569006919861, "learning_rate": 0.0005, "loss": 1.1205, "step": 17700 }, { "epoch": 0.5647501514716669, "grad_norm": 0.3888893127441406, "learning_rate": 0.0005, "loss": 1.1238, "step": 17710 }, { "epoch": 0.5650690391913007, "grad_norm": 0.38882070779800415, "learning_rate": 0.0005, "loss": 1.1176, "step": 17720 }, { "epoch": 0.5653879269109346, "grad_norm": 0.3775336444377899, "learning_rate": 0.0005, "loss": 1.0967, "step": 17730 }, { "epoch": 0.5657068146305686, "grad_norm": 0.386237233877182, "learning_rate": 0.0005, "loss": 1.1149, "step": 17740 }, { "epoch": 0.5660257023502024, "grad_norm": 0.38431134819984436, "learning_rate": 0.0005, "loss": 1.1073, "step": 17750 }, { "epoch": 0.5663445900698364, "grad_norm": 0.4077931344509125, "learning_rate": 0.0005, "loss": 1.1196, "step": 17760 }, { "epoch": 0.5666634777894703, "grad_norm": 0.397979736328125, "learning_rate": 0.0005, "loss": 1.1078, "step": 17770 }, { "epoch": 0.5669823655091042, "grad_norm": 0.40055903792381287, "learning_rate": 0.0005, "loss": 1.1186, "step": 17780 }, { "epoch": 0.5673012532287381, "grad_norm": 0.40928784012794495, "learning_rate": 0.0005, "loss": 1.1277, "step": 17790 }, { "epoch": 0.5676201409483721, "grad_norm": 0.38073426485061646, "learning_rate": 0.0005, "loss": 1.0975, "step": 17800 }, { "epoch": 0.5679390286680059, "grad_norm": 0.39049458503723145, "learning_rate": 0.0005, "loss": 1.1193, "step": 17810 }, { "epoch": 0.5682579163876399, "grad_norm": 0.39856138825416565, "learning_rate": 0.0005, "loss": 1.1151, "step": 17820 }, { "epoch": 0.5685768041072738, "grad_norm": 0.3846971094608307, "learning_rate": 0.0005, "loss": 1.1088, "step": 17830 }, { "epoch": 0.5688956918269078, "grad_norm": 0.4002894461154938, "learning_rate": 0.0005, "loss": 1.105, "step": 17840 }, { "epoch": 0.5692145795465416, "grad_norm": 0.3965441584587097, "learning_rate": 0.0005, "loss": 1.1151, "step": 17850 }, { "epoch": 0.5695334672661756, "grad_norm": 0.38790929317474365, "learning_rate": 0.0005, "loss": 1.109, "step": 17860 }, { "epoch": 0.5698523549858096, "grad_norm": 0.3736358880996704, "learning_rate": 0.0005, "loss": 1.1091, "step": 17870 }, { "epoch": 0.5701712427054434, "grad_norm": 0.3990086317062378, "learning_rate": 0.0005, "loss": 1.1215, "step": 17880 }, { "epoch": 0.5704901304250773, "grad_norm": 0.3745183050632477, "learning_rate": 0.0005, "loss": 1.1109, "step": 17890 }, { "epoch": 0.5708090181447113, "grad_norm": 0.39467811584472656, "learning_rate": 0.0005, "loss": 1.1141, "step": 17900 }, { "epoch": 0.5711279058643451, "grad_norm": 0.3837774395942688, "learning_rate": 0.0005, "loss": 1.1156, "step": 17910 }, { "epoch": 0.5714467935839791, "grad_norm": 0.3765852153301239, "learning_rate": 0.0005, "loss": 1.1126, "step": 17920 }, { "epoch": 0.571765681303613, "grad_norm": 0.37944474816322327, "learning_rate": 0.0005, "loss": 1.1152, "step": 17930 }, { "epoch": 0.5720845690232469, "grad_norm": 0.4042120575904846, "learning_rate": 0.0005, "loss": 1.1293, "step": 17940 }, { "epoch": 0.5724034567428808, "grad_norm": 0.39046669006347656, "learning_rate": 0.0005, "loss": 1.1078, "step": 17950 }, { "epoch": 0.5727223444625148, "grad_norm": 0.39052364230155945, "learning_rate": 0.0005, "loss": 1.0919, "step": 17960 }, { "epoch": 0.5730412321821486, "grad_norm": 0.3719790577888489, "learning_rate": 0.0005, "loss": 1.108, "step": 17970 }, { "epoch": 0.5733601199017826, "grad_norm": 0.4557461738586426, "learning_rate": 0.0005, "loss": 1.1243, "step": 17980 }, { "epoch": 0.5736790076214165, "grad_norm": 0.3853502571582794, "learning_rate": 0.0005, "loss": 1.1299, "step": 17990 }, { "epoch": 0.5739978953410504, "grad_norm": 0.4154195785522461, "learning_rate": 0.0005, "loss": 1.1106, "step": 18000 }, { "epoch": 0.5743167830606843, "grad_norm": 0.37731653451919556, "learning_rate": 0.0005, "loss": 1.105, "step": 18010 }, { "epoch": 0.5746356707803183, "grad_norm": 0.40308648347854614, "learning_rate": 0.0005, "loss": 1.1173, "step": 18020 }, { "epoch": 0.5749545584999521, "grad_norm": 0.3943016231060028, "learning_rate": 0.0005, "loss": 1.1209, "step": 18030 }, { "epoch": 0.5752734462195861, "grad_norm": 0.3769054114818573, "learning_rate": 0.0005, "loss": 1.0992, "step": 18040 }, { "epoch": 0.57559233393922, "grad_norm": 0.40331166982650757, "learning_rate": 0.0005, "loss": 1.1115, "step": 18050 }, { "epoch": 0.5759112216588539, "grad_norm": 0.40439677238464355, "learning_rate": 0.0005, "loss": 1.1066, "step": 18060 }, { "epoch": 0.5762301093784878, "grad_norm": 0.3903745710849762, "learning_rate": 0.0005, "loss": 1.1162, "step": 18070 }, { "epoch": 0.5765489970981218, "grad_norm": 0.388918936252594, "learning_rate": 0.0005, "loss": 1.1097, "step": 18080 }, { "epoch": 0.5768678848177556, "grad_norm": 0.40826553106307983, "learning_rate": 0.0005, "loss": 1.1326, "step": 18090 }, { "epoch": 0.5771867725373896, "grad_norm": 0.385471910238266, "learning_rate": 0.0005, "loss": 1.1257, "step": 18100 }, { "epoch": 0.5775056602570235, "grad_norm": 0.3771023750305176, "learning_rate": 0.0005, "loss": 1.1121, "step": 18110 }, { "epoch": 0.5778245479766574, "grad_norm": 0.40232816338539124, "learning_rate": 0.0005, "loss": 1.1218, "step": 18120 }, { "epoch": 0.5781434356962913, "grad_norm": 0.3964270353317261, "learning_rate": 0.0005, "loss": 1.0979, "step": 18130 }, { "epoch": 0.5784623234159253, "grad_norm": 0.3804101049900055, "learning_rate": 0.0005, "loss": 1.114, "step": 18140 }, { "epoch": 0.5787812111355591, "grad_norm": 0.3876723647117615, "learning_rate": 0.0005, "loss": 1.0882, "step": 18150 }, { "epoch": 0.5791000988551931, "grad_norm": 0.39598479866981506, "learning_rate": 0.0005, "loss": 1.1089, "step": 18160 }, { "epoch": 0.579418986574827, "grad_norm": 0.40432387590408325, "learning_rate": 0.0005, "loss": 1.0953, "step": 18170 }, { "epoch": 0.5797378742944609, "grad_norm": 0.37542155385017395, "learning_rate": 0.0005, "loss": 1.1234, "step": 18180 }, { "epoch": 0.5800567620140948, "grad_norm": 0.379314124584198, "learning_rate": 0.0005, "loss": 1.1158, "step": 18190 }, { "epoch": 0.5803756497337288, "grad_norm": 0.3933337330818176, "learning_rate": 0.0005, "loss": 1.1233, "step": 18200 }, { "epoch": 0.5806945374533626, "grad_norm": 0.3895704448223114, "learning_rate": 0.0005, "loss": 1.1099, "step": 18210 }, { "epoch": 0.5810134251729966, "grad_norm": 0.37216246128082275, "learning_rate": 0.0005, "loss": 1.1094, "step": 18220 }, { "epoch": 0.5813323128926305, "grad_norm": 0.37889426946640015, "learning_rate": 0.0005, "loss": 1.1331, "step": 18230 }, { "epoch": 0.5816512006122644, "grad_norm": 0.3779814839363098, "learning_rate": 0.0005, "loss": 1.1053, "step": 18240 }, { "epoch": 0.5819700883318983, "grad_norm": 0.384083092212677, "learning_rate": 0.0005, "loss": 1.1169, "step": 18250 }, { "epoch": 0.5822889760515323, "grad_norm": 0.39483460783958435, "learning_rate": 0.0005, "loss": 1.1075, "step": 18260 }, { "epoch": 0.5826078637711661, "grad_norm": 0.4070870280265808, "learning_rate": 0.0005, "loss": 1.1181, "step": 18270 }, { "epoch": 0.5829267514908001, "grad_norm": 0.396951287984848, "learning_rate": 0.0005, "loss": 1.1002, "step": 18280 }, { "epoch": 0.583245639210434, "grad_norm": 0.3835306763648987, "learning_rate": 0.0005, "loss": 1.1126, "step": 18290 }, { "epoch": 0.5835645269300679, "grad_norm": 0.3818987309932709, "learning_rate": 0.0005, "loss": 1.1075, "step": 18300 }, { "epoch": 0.5838834146497018, "grad_norm": 0.3779695928096771, "learning_rate": 0.0005, "loss": 1.0975, "step": 18310 }, { "epoch": 0.5842023023693358, "grad_norm": 0.3806639611721039, "learning_rate": 0.0005, "loss": 1.1089, "step": 18320 }, { "epoch": 0.5845211900889696, "grad_norm": 0.3965880870819092, "learning_rate": 0.0005, "loss": 1.1147, "step": 18330 }, { "epoch": 0.5848400778086036, "grad_norm": 0.38795900344848633, "learning_rate": 0.0005, "loss": 1.106, "step": 18340 }, { "epoch": 0.5851589655282375, "grad_norm": 0.3765144646167755, "learning_rate": 0.0005, "loss": 1.1132, "step": 18350 }, { "epoch": 0.5854778532478714, "grad_norm": 0.374661386013031, "learning_rate": 0.0005, "loss": 1.1295, "step": 18360 }, { "epoch": 0.5857967409675053, "grad_norm": 0.388966828584671, "learning_rate": 0.0005, "loss": 1.1295, "step": 18370 }, { "epoch": 0.5861156286871393, "grad_norm": 0.4160568416118622, "learning_rate": 0.0005, "loss": 1.1335, "step": 18380 }, { "epoch": 0.5864345164067731, "grad_norm": 0.39939290285110474, "learning_rate": 0.0005, "loss": 1.1024, "step": 18390 }, { "epoch": 0.5867534041264071, "grad_norm": 0.3849220275878906, "learning_rate": 0.0005, "loss": 1.1041, "step": 18400 }, { "epoch": 0.587072291846041, "grad_norm": 0.3832233250141144, "learning_rate": 0.0005, "loss": 1.1084, "step": 18410 }, { "epoch": 0.5873911795656749, "grad_norm": 0.3949020504951477, "learning_rate": 0.0005, "loss": 1.1269, "step": 18420 }, { "epoch": 0.5877100672853088, "grad_norm": 0.39243316650390625, "learning_rate": 0.0005, "loss": 1.1221, "step": 18430 }, { "epoch": 0.5880289550049428, "grad_norm": 0.3923371136188507, "learning_rate": 0.0005, "loss": 1.1198, "step": 18440 }, { "epoch": 0.5883478427245766, "grad_norm": 0.3877916932106018, "learning_rate": 0.0005, "loss": 1.1132, "step": 18450 }, { "epoch": 0.5886667304442106, "grad_norm": 0.3907372057437897, "learning_rate": 0.0005, "loss": 1.0928, "step": 18460 }, { "epoch": 0.5889856181638445, "grad_norm": 0.39084115624427795, "learning_rate": 0.0005, "loss": 1.1069, "step": 18470 }, { "epoch": 0.5893045058834784, "grad_norm": 0.374729186296463, "learning_rate": 0.0005, "loss": 1.1187, "step": 18480 }, { "epoch": 0.5896233936031123, "grad_norm": 0.36638641357421875, "learning_rate": 0.0005, "loss": 1.1038, "step": 18490 }, { "epoch": 0.5899422813227463, "grad_norm": 0.3777466416358948, "learning_rate": 0.0005, "loss": 1.1147, "step": 18500 }, { "epoch": 0.5902611690423801, "grad_norm": 0.3874850869178772, "learning_rate": 0.0005, "loss": 1.1116, "step": 18510 }, { "epoch": 0.5905800567620141, "grad_norm": 0.4039003252983093, "learning_rate": 0.0005, "loss": 1.1216, "step": 18520 }, { "epoch": 0.590898944481648, "grad_norm": 0.4028369188308716, "learning_rate": 0.0005, "loss": 1.1036, "step": 18530 }, { "epoch": 0.5912178322012819, "grad_norm": 0.39203813672065735, "learning_rate": 0.0005, "loss": 1.1149, "step": 18540 }, { "epoch": 0.5915367199209158, "grad_norm": 0.3853701651096344, "learning_rate": 0.0005, "loss": 1.1192, "step": 18550 }, { "epoch": 0.5918556076405498, "grad_norm": 0.389746755361557, "learning_rate": 0.0005, "loss": 1.132, "step": 18560 }, { "epoch": 0.5921744953601836, "grad_norm": 0.3897966742515564, "learning_rate": 0.0005, "loss": 1.0949, "step": 18570 }, { "epoch": 0.5924933830798176, "grad_norm": 0.40840062499046326, "learning_rate": 0.0005, "loss": 1.102, "step": 18580 }, { "epoch": 0.5928122707994515, "grad_norm": 0.3809534013271332, "learning_rate": 0.0005, "loss": 1.1128, "step": 18590 }, { "epoch": 0.5931311585190854, "grad_norm": 0.38073065876960754, "learning_rate": 0.0005, "loss": 1.1099, "step": 18600 }, { "epoch": 0.5934500462387193, "grad_norm": 0.4003779888153076, "learning_rate": 0.0005, "loss": 1.1182, "step": 18610 }, { "epoch": 0.5937689339583533, "grad_norm": 0.38311606645584106, "learning_rate": 0.0005, "loss": 1.1133, "step": 18620 }, { "epoch": 0.5940878216779871, "grad_norm": 0.3980417549610138, "learning_rate": 0.0005, "loss": 1.1108, "step": 18630 }, { "epoch": 0.5944067093976211, "grad_norm": 0.39308735728263855, "learning_rate": 0.0005, "loss": 1.112, "step": 18640 }, { "epoch": 0.594725597117255, "grad_norm": 0.3650916814804077, "learning_rate": 0.0005, "loss": 1.1127, "step": 18650 }, { "epoch": 0.5950444848368889, "grad_norm": 0.3832719027996063, "learning_rate": 0.0005, "loss": 1.086, "step": 18660 }, { "epoch": 0.5953633725565228, "grad_norm": 0.39278343319892883, "learning_rate": 0.0005, "loss": 1.1029, "step": 18670 }, { "epoch": 0.5956822602761568, "grad_norm": 0.3769834041595459, "learning_rate": 0.0005, "loss": 1.1116, "step": 18680 }, { "epoch": 0.5960011479957907, "grad_norm": 0.381989061832428, "learning_rate": 0.0005, "loss": 1.1047, "step": 18690 }, { "epoch": 0.5963200357154246, "grad_norm": 0.38305485248565674, "learning_rate": 0.0005, "loss": 1.0906, "step": 18700 }, { "epoch": 0.5966389234350585, "grad_norm": 0.3805263638496399, "learning_rate": 0.0005, "loss": 1.1158, "step": 18710 }, { "epoch": 0.5969578111546925, "grad_norm": 0.3875347673892975, "learning_rate": 0.0005, "loss": 1.1095, "step": 18720 }, { "epoch": 0.5972766988743263, "grad_norm": 0.3871545195579529, "learning_rate": 0.0005, "loss": 1.1108, "step": 18730 }, { "epoch": 0.5975955865939603, "grad_norm": 0.3692149519920349, "learning_rate": 0.0005, "loss": 1.1256, "step": 18740 }, { "epoch": 0.5979144743135942, "grad_norm": 0.3667621612548828, "learning_rate": 0.0005, "loss": 1.0938, "step": 18750 }, { "epoch": 0.5982333620332281, "grad_norm": 0.38246843218803406, "learning_rate": 0.0005, "loss": 1.1166, "step": 18760 }, { "epoch": 0.598552249752862, "grad_norm": 0.38617420196533203, "learning_rate": 0.0005, "loss": 1.1086, "step": 18770 }, { "epoch": 0.598871137472496, "grad_norm": 0.3871334195137024, "learning_rate": 0.0005, "loss": 1.1041, "step": 18780 }, { "epoch": 0.5991900251921298, "grad_norm": 0.38116347789764404, "learning_rate": 0.0005, "loss": 1.1395, "step": 18790 }, { "epoch": 0.5995089129117638, "grad_norm": 0.38512861728668213, "learning_rate": 0.0005, "loss": 1.0727, "step": 18800 }, { "epoch": 0.5998278006313977, "grad_norm": 0.3731023669242859, "learning_rate": 0.0005, "loss": 1.096, "step": 18810 }, { "epoch": 0.6001466883510316, "grad_norm": 0.3894040286540985, "learning_rate": 0.0005, "loss": 1.1203, "step": 18820 }, { "epoch": 0.6004655760706655, "grad_norm": 0.38910865783691406, "learning_rate": 0.0005, "loss": 1.1099, "step": 18830 }, { "epoch": 0.6007844637902995, "grad_norm": 0.39119401574134827, "learning_rate": 0.0005, "loss": 1.1067, "step": 18840 }, { "epoch": 0.6011033515099333, "grad_norm": 0.39401862025260925, "learning_rate": 0.0005, "loss": 1.1443, "step": 18850 }, { "epoch": 0.6014222392295673, "grad_norm": 0.38900884985923767, "learning_rate": 0.0005, "loss": 1.1052, "step": 18860 }, { "epoch": 0.6017411269492012, "grad_norm": 0.3946762681007385, "learning_rate": 0.0005, "loss": 1.1095, "step": 18870 }, { "epoch": 0.6020600146688351, "grad_norm": 0.38308626413345337, "learning_rate": 0.0005, "loss": 1.0929, "step": 18880 }, { "epoch": 0.602378902388469, "grad_norm": 0.3667078912258148, "learning_rate": 0.0005, "loss": 1.108, "step": 18890 }, { "epoch": 0.602697790108103, "grad_norm": 0.3811316192150116, "learning_rate": 0.0005, "loss": 1.1106, "step": 18900 }, { "epoch": 0.6030166778277368, "grad_norm": 0.3822799026966095, "learning_rate": 0.0005, "loss": 1.11, "step": 18910 }, { "epoch": 0.6033355655473708, "grad_norm": 0.37707287073135376, "learning_rate": 0.0005, "loss": 1.1164, "step": 18920 }, { "epoch": 0.6036544532670047, "grad_norm": 0.3722653388977051, "learning_rate": 0.0005, "loss": 1.087, "step": 18930 }, { "epoch": 0.6039733409866386, "grad_norm": 0.3983718454837799, "learning_rate": 0.0005, "loss": 1.0896, "step": 18940 }, { "epoch": 0.6042922287062725, "grad_norm": 0.3902481496334076, "learning_rate": 0.0005, "loss": 1.115, "step": 18950 }, { "epoch": 0.6046111164259065, "grad_norm": 0.3895500898361206, "learning_rate": 0.0005, "loss": 1.1194, "step": 18960 }, { "epoch": 0.6049300041455403, "grad_norm": 0.3960655629634857, "learning_rate": 0.0005, "loss": 1.1132, "step": 18970 }, { "epoch": 0.6052488918651743, "grad_norm": 0.39928701519966125, "learning_rate": 0.0005, "loss": 1.1145, "step": 18980 }, { "epoch": 0.6055677795848082, "grad_norm": 0.40216585993766785, "learning_rate": 0.0005, "loss": 1.1151, "step": 18990 }, { "epoch": 0.6058866673044421, "grad_norm": 0.37362903356552124, "learning_rate": 0.0005, "loss": 1.1073, "step": 19000 }, { "epoch": 0.606205555024076, "grad_norm": 0.40238964557647705, "learning_rate": 0.0005, "loss": 1.1007, "step": 19010 }, { "epoch": 0.60652444274371, "grad_norm": 0.38304927945137024, "learning_rate": 0.0005, "loss": 1.1208, "step": 19020 }, { "epoch": 0.6068433304633438, "grad_norm": 0.38120222091674805, "learning_rate": 0.0005, "loss": 1.1043, "step": 19030 }, { "epoch": 0.6071622181829778, "grad_norm": 0.3859049081802368, "learning_rate": 0.0005, "loss": 1.0943, "step": 19040 }, { "epoch": 0.6074811059026117, "grad_norm": 0.37868672609329224, "learning_rate": 0.0005, "loss": 1.0978, "step": 19050 }, { "epoch": 0.6077999936222456, "grad_norm": 0.3834395110607147, "learning_rate": 0.0005, "loss": 1.1088, "step": 19060 }, { "epoch": 0.6081188813418795, "grad_norm": 0.3883598744869232, "learning_rate": 0.0005, "loss": 1.1118, "step": 19070 }, { "epoch": 0.6084377690615135, "grad_norm": 0.3869861364364624, "learning_rate": 0.0005, "loss": 1.103, "step": 19080 }, { "epoch": 0.6087566567811473, "grad_norm": 0.3876883387565613, "learning_rate": 0.0005, "loss": 1.0852, "step": 19090 }, { "epoch": 0.6090755445007813, "grad_norm": 0.402549684047699, "learning_rate": 0.0005, "loss": 1.1133, "step": 19100 }, { "epoch": 0.6093944322204152, "grad_norm": 0.3869187831878662, "learning_rate": 0.0005, "loss": 1.0844, "step": 19110 }, { "epoch": 0.6097133199400491, "grad_norm": 0.40940797328948975, "learning_rate": 0.0005, "loss": 1.1096, "step": 19120 }, { "epoch": 0.610032207659683, "grad_norm": 0.3896699547767639, "learning_rate": 0.0005, "loss": 1.1233, "step": 19130 }, { "epoch": 0.610351095379317, "grad_norm": 0.38502994179725647, "learning_rate": 0.0005, "loss": 1.12, "step": 19140 }, { "epoch": 0.6106699830989508, "grad_norm": 0.37965127825737, "learning_rate": 0.0005, "loss": 1.1192, "step": 19150 }, { "epoch": 0.6109888708185848, "grad_norm": 0.3973645269870758, "learning_rate": 0.0005, "loss": 1.0942, "step": 19160 }, { "epoch": 0.6113077585382187, "grad_norm": 0.3911329209804535, "learning_rate": 0.0005, "loss": 1.121, "step": 19170 }, { "epoch": 0.6116266462578526, "grad_norm": 0.3806923031806946, "learning_rate": 0.0005, "loss": 1.1192, "step": 19180 }, { "epoch": 0.6119455339774865, "grad_norm": 0.38102033734321594, "learning_rate": 0.0005, "loss": 1.1168, "step": 19190 }, { "epoch": 0.6122644216971205, "grad_norm": 0.39193785190582275, "learning_rate": 0.0005, "loss": 1.0999, "step": 19200 }, { "epoch": 0.6125833094167543, "grad_norm": 0.365323007106781, "learning_rate": 0.0005, "loss": 1.0853, "step": 19210 }, { "epoch": 0.6129021971363883, "grad_norm": 0.39409881830215454, "learning_rate": 0.0005, "loss": 1.1065, "step": 19220 }, { "epoch": 0.6132210848560222, "grad_norm": 0.3841363787651062, "learning_rate": 0.0005, "loss": 1.094, "step": 19230 }, { "epoch": 0.6135399725756561, "grad_norm": 0.3903886079788208, "learning_rate": 0.0005, "loss": 1.1018, "step": 19240 }, { "epoch": 0.61385886029529, "grad_norm": 0.3933718800544739, "learning_rate": 0.0005, "loss": 1.0862, "step": 19250 }, { "epoch": 0.614177748014924, "grad_norm": 0.38507020473480225, "learning_rate": 0.0005, "loss": 1.1148, "step": 19260 }, { "epoch": 0.6144966357345578, "grad_norm": 0.3783412575721741, "learning_rate": 0.0005, "loss": 1.1174, "step": 19270 }, { "epoch": 0.6148155234541918, "grad_norm": 0.3768440783023834, "learning_rate": 0.0005, "loss": 1.1083, "step": 19280 }, { "epoch": 0.6151344111738257, "grad_norm": 0.37053754925727844, "learning_rate": 0.0005, "loss": 1.09, "step": 19290 }, { "epoch": 0.6154532988934596, "grad_norm": 0.3970535397529602, "learning_rate": 0.0005, "loss": 1.1147, "step": 19300 }, { "epoch": 0.6157721866130935, "grad_norm": 0.3832497298717499, "learning_rate": 0.0005, "loss": 1.1118, "step": 19310 }, { "epoch": 0.6160910743327275, "grad_norm": 0.41348445415496826, "learning_rate": 0.0005, "loss": 1.1003, "step": 19320 }, { "epoch": 0.6164099620523613, "grad_norm": 0.3903905749320984, "learning_rate": 0.0005, "loss": 1.1281, "step": 19330 }, { "epoch": 0.6167288497719953, "grad_norm": 0.3750537931919098, "learning_rate": 0.0005, "loss": 1.1033, "step": 19340 }, { "epoch": 0.6170477374916292, "grad_norm": 0.38821011781692505, "learning_rate": 0.0005, "loss": 1.1045, "step": 19350 }, { "epoch": 0.6173666252112631, "grad_norm": 0.385237455368042, "learning_rate": 0.0005, "loss": 1.1076, "step": 19360 }, { "epoch": 0.617685512930897, "grad_norm": 0.37394216656684875, "learning_rate": 0.0005, "loss": 1.1085, "step": 19370 }, { "epoch": 0.618004400650531, "grad_norm": 0.38589802384376526, "learning_rate": 0.0005, "loss": 1.114, "step": 19380 }, { "epoch": 0.6183232883701648, "grad_norm": 0.40107953548431396, "learning_rate": 0.0005, "loss": 1.1248, "step": 19390 }, { "epoch": 0.6186421760897988, "grad_norm": 0.371114045381546, "learning_rate": 0.0005, "loss": 1.0976, "step": 19400 }, { "epoch": 0.6189610638094327, "grad_norm": 0.3722735345363617, "learning_rate": 0.0005, "loss": 1.0997, "step": 19410 }, { "epoch": 0.6192799515290666, "grad_norm": 0.4006235599517822, "learning_rate": 0.0005, "loss": 1.1236, "step": 19420 }, { "epoch": 0.6195988392487005, "grad_norm": 0.3824571371078491, "learning_rate": 0.0005, "loss": 1.1023, "step": 19430 }, { "epoch": 0.6199177269683345, "grad_norm": 0.38375917077064514, "learning_rate": 0.0005, "loss": 1.1097, "step": 19440 }, { "epoch": 0.6202366146879683, "grad_norm": 0.36844027042388916, "learning_rate": 0.0005, "loss": 1.0865, "step": 19450 }, { "epoch": 0.6205555024076023, "grad_norm": 0.38515403866767883, "learning_rate": 0.0005, "loss": 1.1242, "step": 19460 }, { "epoch": 0.6208743901272362, "grad_norm": 0.39014798402786255, "learning_rate": 0.0005, "loss": 1.0959, "step": 19470 }, { "epoch": 0.6211932778468701, "grad_norm": 0.3807680904865265, "learning_rate": 0.0005, "loss": 1.1045, "step": 19480 }, { "epoch": 0.621512165566504, "grad_norm": 0.39916208386421204, "learning_rate": 0.0005, "loss": 1.1077, "step": 19490 }, { "epoch": 0.621831053286138, "grad_norm": 0.38831251859664917, "learning_rate": 0.0005, "loss": 1.109, "step": 19500 }, { "epoch": 0.6221499410057719, "grad_norm": 0.3683846592903137, "learning_rate": 0.0005, "loss": 1.1111, "step": 19510 }, { "epoch": 0.6224688287254058, "grad_norm": 0.3902853727340698, "learning_rate": 0.0005, "loss": 1.102, "step": 19520 }, { "epoch": 0.6227877164450397, "grad_norm": 0.39055487513542175, "learning_rate": 0.0005, "loss": 1.1183, "step": 19530 }, { "epoch": 0.6231066041646737, "grad_norm": 0.380648136138916, "learning_rate": 0.0005, "loss": 1.1002, "step": 19540 }, { "epoch": 0.6234254918843075, "grad_norm": 0.36444613337516785, "learning_rate": 0.0005, "loss": 1.1011, "step": 19550 }, { "epoch": 0.6237443796039415, "grad_norm": 0.39328739047050476, "learning_rate": 0.0005, "loss": 1.126, "step": 19560 }, { "epoch": 0.6240632673235754, "grad_norm": 0.3845846354961395, "learning_rate": 0.0005, "loss": 1.1174, "step": 19570 }, { "epoch": 0.6243821550432093, "grad_norm": 0.3922548294067383, "learning_rate": 0.0005, "loss": 1.1122, "step": 19580 }, { "epoch": 0.6247010427628432, "grad_norm": 0.3749217987060547, "learning_rate": 0.0005, "loss": 1.1073, "step": 19590 }, { "epoch": 0.6250199304824772, "grad_norm": 0.3772096335887909, "learning_rate": 0.0005, "loss": 1.1021, "step": 19600 }, { "epoch": 0.625338818202111, "grad_norm": 0.3921220600605011, "learning_rate": 0.0005, "loss": 1.1128, "step": 19610 }, { "epoch": 0.625657705921745, "grad_norm": 0.38062894344329834, "learning_rate": 0.0005, "loss": 1.1215, "step": 19620 }, { "epoch": 0.6259765936413789, "grad_norm": 0.3739464282989502, "learning_rate": 0.0005, "loss": 1.0983, "step": 19630 }, { "epoch": 0.6262954813610128, "grad_norm": 0.3996415436267853, "learning_rate": 0.0005, "loss": 1.1093, "step": 19640 }, { "epoch": 0.6266143690806467, "grad_norm": 0.37616029381752014, "learning_rate": 0.0005, "loss": 1.1107, "step": 19650 }, { "epoch": 0.6269332568002807, "grad_norm": 0.3891390860080719, "learning_rate": 0.0005, "loss": 1.1001, "step": 19660 }, { "epoch": 0.6272521445199145, "grad_norm": 0.38982507586479187, "learning_rate": 0.0005, "loss": 1.0998, "step": 19670 }, { "epoch": 0.6275710322395485, "grad_norm": 0.3861311376094818, "learning_rate": 0.0005, "loss": 1.1155, "step": 19680 }, { "epoch": 0.6278899199591824, "grad_norm": 0.38030752539634705, "learning_rate": 0.0005, "loss": 1.1024, "step": 19690 }, { "epoch": 0.6282088076788163, "grad_norm": 0.40073028206825256, "learning_rate": 0.0005, "loss": 1.1032, "step": 19700 }, { "epoch": 0.6285276953984502, "grad_norm": 0.38168585300445557, "learning_rate": 0.0005, "loss": 1.0865, "step": 19710 }, { "epoch": 0.6288465831180842, "grad_norm": 0.37104278802871704, "learning_rate": 0.0005, "loss": 1.1062, "step": 19720 }, { "epoch": 0.629165470837718, "grad_norm": 0.3792668879032135, "learning_rate": 0.0005, "loss": 1.098, "step": 19730 }, { "epoch": 0.629484358557352, "grad_norm": 0.38486728072166443, "learning_rate": 0.0005, "loss": 1.1056, "step": 19740 }, { "epoch": 0.6298032462769859, "grad_norm": 0.3664900064468384, "learning_rate": 0.0005, "loss": 1.0944, "step": 19750 }, { "epoch": 0.6301221339966198, "grad_norm": 0.36216047406196594, "learning_rate": 0.0005, "loss": 1.0908, "step": 19760 }, { "epoch": 0.6304410217162537, "grad_norm": 0.38465407490730286, "learning_rate": 0.0005, "loss": 1.0963, "step": 19770 }, { "epoch": 0.6307599094358877, "grad_norm": 0.39357104897499084, "learning_rate": 0.0005, "loss": 1.0946, "step": 19780 }, { "epoch": 0.6310787971555215, "grad_norm": 0.38242307305336, "learning_rate": 0.0005, "loss": 1.1089, "step": 19790 }, { "epoch": 0.6313976848751555, "grad_norm": 0.394965261220932, "learning_rate": 0.0005, "loss": 1.0989, "step": 19800 }, { "epoch": 0.6317165725947894, "grad_norm": 0.3696037530899048, "learning_rate": 0.0005, "loss": 1.1052, "step": 19810 }, { "epoch": 0.6320354603144233, "grad_norm": 0.39433643221855164, "learning_rate": 0.0005, "loss": 1.1002, "step": 19820 }, { "epoch": 0.6323543480340572, "grad_norm": 0.39929360151290894, "learning_rate": 0.0005, "loss": 1.1046, "step": 19830 }, { "epoch": 0.6326732357536912, "grad_norm": 0.385371595621109, "learning_rate": 0.0005, "loss": 1.1151, "step": 19840 }, { "epoch": 0.632992123473325, "grad_norm": 0.36684107780456543, "learning_rate": 0.0005, "loss": 1.0942, "step": 19850 }, { "epoch": 0.633311011192959, "grad_norm": 0.3931552469730377, "learning_rate": 0.0005, "loss": 1.104, "step": 19860 }, { "epoch": 0.6336298989125929, "grad_norm": 0.3704059422016144, "learning_rate": 0.0005, "loss": 1.0976, "step": 19870 }, { "epoch": 0.6339487866322268, "grad_norm": 0.3817865252494812, "learning_rate": 0.0005, "loss": 1.1033, "step": 19880 }, { "epoch": 0.6342676743518607, "grad_norm": 0.3732118606567383, "learning_rate": 0.0005, "loss": 1.1094, "step": 19890 }, { "epoch": 0.6345865620714947, "grad_norm": 0.3995611369609833, "learning_rate": 0.0005, "loss": 1.1045, "step": 19900 }, { "epoch": 0.6349054497911285, "grad_norm": 0.4252803325653076, "learning_rate": 0.0005, "loss": 1.1128, "step": 19910 }, { "epoch": 0.6352243375107625, "grad_norm": 0.3813573718070984, "learning_rate": 0.0005, "loss": 1.1065, "step": 19920 }, { "epoch": 0.6355432252303964, "grad_norm": 0.3913743197917938, "learning_rate": 0.0005, "loss": 1.0895, "step": 19930 }, { "epoch": 0.6358621129500303, "grad_norm": 0.39196574687957764, "learning_rate": 0.0005, "loss": 1.0954, "step": 19940 }, { "epoch": 0.6361810006696642, "grad_norm": 0.3680713176727295, "learning_rate": 0.0005, "loss": 1.0881, "step": 19950 }, { "epoch": 0.6364998883892982, "grad_norm": 0.3810781240463257, "learning_rate": 0.0005, "loss": 1.1045, "step": 19960 }, { "epoch": 0.636818776108932, "grad_norm": 0.3877175748348236, "learning_rate": 0.0005, "loss": 1.1149, "step": 19970 }, { "epoch": 0.637137663828566, "grad_norm": 0.370029479265213, "learning_rate": 0.0005, "loss": 1.1207, "step": 19980 }, { "epoch": 0.6374565515481999, "grad_norm": 0.3744882643222809, "learning_rate": 0.0005, "loss": 1.1102, "step": 19990 }, { "epoch": 0.6377754392678338, "grad_norm": 0.3977319896221161, "learning_rate": 0.0005, "loss": 1.1017, "step": 20000 }, { "epoch": 0.6380943269874677, "grad_norm": 0.37014520168304443, "learning_rate": 0.0005, "loss": 1.1124, "step": 20010 }, { "epoch": 0.6384132147071017, "grad_norm": 0.37795954942703247, "learning_rate": 0.0005, "loss": 1.1125, "step": 20020 }, { "epoch": 0.6387321024267355, "grad_norm": 0.38005727529525757, "learning_rate": 0.0005, "loss": 1.1233, "step": 20030 }, { "epoch": 0.6390509901463695, "grad_norm": 0.3620345890522003, "learning_rate": 0.0005, "loss": 1.0987, "step": 20040 }, { "epoch": 0.6393698778660034, "grad_norm": 0.3555848300457001, "learning_rate": 0.0005, "loss": 1.0906, "step": 20050 }, { "epoch": 0.6396887655856373, "grad_norm": 0.37240397930145264, "learning_rate": 0.0005, "loss": 1.1008, "step": 20060 }, { "epoch": 0.6400076533052712, "grad_norm": 0.37049400806427, "learning_rate": 0.0005, "loss": 1.1029, "step": 20070 }, { "epoch": 0.6403265410249052, "grad_norm": 0.37072935700416565, "learning_rate": 0.0005, "loss": 1.0949, "step": 20080 }, { "epoch": 0.640645428744539, "grad_norm": 0.3630337417125702, "learning_rate": 0.0005, "loss": 1.1095, "step": 20090 }, { "epoch": 0.640964316464173, "grad_norm": 0.38237258791923523, "learning_rate": 0.0005, "loss": 1.1151, "step": 20100 }, { "epoch": 0.6412832041838069, "grad_norm": 0.3834942579269409, "learning_rate": 0.0005, "loss": 1.088, "step": 20110 }, { "epoch": 0.6416020919034408, "grad_norm": 0.3801743686199188, "learning_rate": 0.0005, "loss": 1.1113, "step": 20120 }, { "epoch": 0.6419209796230747, "grad_norm": 0.36740273237228394, "learning_rate": 0.0005, "loss": 1.1085, "step": 20130 }, { "epoch": 0.6422398673427087, "grad_norm": 0.38204872608184814, "learning_rate": 0.0005, "loss": 1.1032, "step": 20140 }, { "epoch": 0.6425587550623425, "grad_norm": 0.38014906644821167, "learning_rate": 0.0005, "loss": 1.1098, "step": 20150 }, { "epoch": 0.6428776427819765, "grad_norm": 0.36667174100875854, "learning_rate": 0.0005, "loss": 1.109, "step": 20160 }, { "epoch": 0.6431965305016104, "grad_norm": 0.3805667459964752, "learning_rate": 0.0005, "loss": 1.1101, "step": 20170 }, { "epoch": 0.6435154182212443, "grad_norm": 0.36804214119911194, "learning_rate": 0.0005, "loss": 1.1068, "step": 20180 }, { "epoch": 0.6438343059408782, "grad_norm": 0.37310531735420227, "learning_rate": 0.0005, "loss": 1.0728, "step": 20190 }, { "epoch": 0.6441531936605122, "grad_norm": 0.3750821053981781, "learning_rate": 0.0005, "loss": 1.0997, "step": 20200 }, { "epoch": 0.644472081380146, "grad_norm": 0.396422803401947, "learning_rate": 0.0005, "loss": 1.1176, "step": 20210 }, { "epoch": 0.64479096909978, "grad_norm": 0.39535143971443176, "learning_rate": 0.0005, "loss": 1.0924, "step": 20220 }, { "epoch": 0.6451098568194139, "grad_norm": 0.3851791322231293, "learning_rate": 0.0005, "loss": 1.1042, "step": 20230 }, { "epoch": 0.6454287445390477, "grad_norm": 0.3816549777984619, "learning_rate": 0.0005, "loss": 1.0909, "step": 20240 }, { "epoch": 0.6457476322586817, "grad_norm": 0.3699096739292145, "learning_rate": 0.0005, "loss": 1.0845, "step": 20250 }, { "epoch": 0.6460665199783157, "grad_norm": 0.3914874196052551, "learning_rate": 0.0005, "loss": 1.098, "step": 20260 }, { "epoch": 0.6463854076979495, "grad_norm": 0.4025990664958954, "learning_rate": 0.0005, "loss": 1.1098, "step": 20270 }, { "epoch": 0.6467042954175835, "grad_norm": 0.3710470497608185, "learning_rate": 0.0005, "loss": 1.1193, "step": 20280 }, { "epoch": 0.6470231831372174, "grad_norm": 0.38040199875831604, "learning_rate": 0.0005, "loss": 1.0883, "step": 20290 }, { "epoch": 0.6473420708568512, "grad_norm": 0.38826727867126465, "learning_rate": 0.0005, "loss": 1.1073, "step": 20300 }, { "epoch": 0.6476609585764852, "grad_norm": 0.39304104447364807, "learning_rate": 0.0005, "loss": 1.0937, "step": 20310 }, { "epoch": 0.6479798462961192, "grad_norm": 0.3646523952484131, "learning_rate": 0.0005, "loss": 1.0911, "step": 20320 }, { "epoch": 0.6482987340157531, "grad_norm": 0.36644312739372253, "learning_rate": 0.0005, "loss": 1.0735, "step": 20330 }, { "epoch": 0.648617621735387, "grad_norm": 0.37986788153648376, "learning_rate": 0.0005, "loss": 1.1079, "step": 20340 }, { "epoch": 0.6489365094550209, "grad_norm": 0.3844556212425232, "learning_rate": 0.0005, "loss": 1.1149, "step": 20350 }, { "epoch": 0.6492553971746549, "grad_norm": 0.3708367347717285, "learning_rate": 0.0005, "loss": 1.1058, "step": 20360 }, { "epoch": 0.6495742848942887, "grad_norm": 0.3721873462200165, "learning_rate": 0.0005, "loss": 1.096, "step": 20370 }, { "epoch": 0.6498931726139227, "grad_norm": 0.36754587292671204, "learning_rate": 0.0005, "loss": 1.115, "step": 20380 }, { "epoch": 0.6502120603335566, "grad_norm": 0.3798966407775879, "learning_rate": 0.0004987293446490035, "loss": 1.0767, "step": 20390 }, { "epoch": 0.6505309480531904, "grad_norm": 0.40216562151908875, "learning_rate": 0.000496824661048599, "loss": 1.1075, "step": 20400 }, { "epoch": 0.6508498357728244, "grad_norm": 0.37482136487960815, "learning_rate": 0.0004949272515732417, "loss": 1.0795, "step": 20410 }, { "epoch": 0.6511687234924584, "grad_norm": 0.37605640292167664, "learning_rate": 0.0004930370884425194, "loss": 1.1036, "step": 20420 }, { "epoch": 0.6514876112120922, "grad_norm": 0.37845471501350403, "learning_rate": 0.0004911541439821156, "loss": 1.1062, "step": 20430 }, { "epoch": 0.6518064989317262, "grad_norm": 0.3742932379245758, "learning_rate": 0.000489278390623404, "loss": 1.1239, "step": 20440 }, { "epoch": 0.6521253866513601, "grad_norm": 0.3708976209163666, "learning_rate": 0.0004874098009030445, "loss": 1.0989, "step": 20450 }, { "epoch": 0.652444274370994, "grad_norm": 0.38528645038604736, "learning_rate": 0.0004855483474625822, "loss": 1.0936, "step": 20460 }, { "epoch": 0.6527631620906279, "grad_norm": 0.39533546566963196, "learning_rate": 0.0004836940030480454, "loss": 1.0938, "step": 20470 }, { "epoch": 0.6530820498102619, "grad_norm": 0.36083102226257324, "learning_rate": 0.0004818467405095477, "loss": 1.0641, "step": 20480 }, { "epoch": 0.6534009375298957, "grad_norm": 0.3747830092906952, "learning_rate": 0.00048000653280089, "loss": 1.0919, "step": 20490 }, { "epoch": 0.6537198252495297, "grad_norm": 0.38523662090301514, "learning_rate": 0.00047817335297916465, "loss": 1.0922, "step": 20500 }, { "epoch": 0.6540387129691636, "grad_norm": 0.3711490333080292, "learning_rate": 0.0004763471742043608, "loss": 1.0861, "step": 20510 }, { "epoch": 0.6543576006887974, "grad_norm": 0.37779441475868225, "learning_rate": 0.00047452796973897166, "loss": 1.0979, "step": 20520 }, { "epoch": 0.6546764884084314, "grad_norm": 0.37991273403167725, "learning_rate": 0.0004727157129476028, "loss": 1.082, "step": 20530 }, { "epoch": 0.6549953761280654, "grad_norm": 0.38536524772644043, "learning_rate": 0.0004709103772965826, "loss": 1.1007, "step": 20540 }, { "epoch": 0.6553142638476992, "grad_norm": 0.36973538994789124, "learning_rate": 0.0004691119363535731, "loss": 1.0886, "step": 20550 }, { "epoch": 0.6556331515673332, "grad_norm": 0.35994893312454224, "learning_rate": 0.0004673203637871834, "loss": 1.0805, "step": 20560 }, { "epoch": 0.6559520392869671, "grad_norm": 0.3755747973918915, "learning_rate": 0.0004655356333665843, "loss": 1.1021, "step": 20570 }, { "epoch": 0.656270927006601, "grad_norm": 0.3772231638431549, "learning_rate": 0.000463757718961124, "loss": 1.1012, "step": 20580 }, { "epoch": 0.6565898147262349, "grad_norm": 0.3790730834007263, "learning_rate": 0.00046198659453994544, "loss": 1.0853, "step": 20590 }, { "epoch": 0.6569087024458689, "grad_norm": 0.36576566100120544, "learning_rate": 0.0004602222341716053, "loss": 1.0923, "step": 20600 }, { "epoch": 0.6572275901655027, "grad_norm": 0.3624275326728821, "learning_rate": 0.0004584646120236944, "loss": 1.0946, "step": 20610 }, { "epoch": 0.6575464778851366, "grad_norm": 0.3723544776439667, "learning_rate": 0.0004567137023624594, "loss": 1.0819, "step": 20620 }, { "epoch": 0.6578653656047706, "grad_norm": 0.3639419376850128, "learning_rate": 0.0004549694795524261, "loss": 1.0697, "step": 20630 }, { "epoch": 0.6581842533244044, "grad_norm": 0.3809690475463867, "learning_rate": 0.0004532319180560237, "loss": 1.1277, "step": 20640 }, { "epoch": 0.6585031410440384, "grad_norm": 0.41316747665405273, "learning_rate": 0.00045150099243321175, "loss": 1.0938, "step": 20650 }, { "epoch": 0.6588220287636724, "grad_norm": 0.36777839064598083, "learning_rate": 0.00044977667734110685, "loss": 1.0731, "step": 20660 }, { "epoch": 0.6591409164833062, "grad_norm": 0.3870874047279358, "learning_rate": 0.00044805894753361183, "loss": 1.0922, "step": 20670 }, { "epoch": 0.6594598042029401, "grad_norm": 0.36223119497299194, "learning_rate": 0.0004463477778610465, "loss": 1.0844, "step": 20680 }, { "epoch": 0.6597786919225741, "grad_norm": 0.3737870752811432, "learning_rate": 0.0004446431432697789, "loss": 1.0826, "step": 20690 }, { "epoch": 0.6600975796422079, "grad_norm": 0.35541796684265137, "learning_rate": 0.0004429450188018587, "loss": 1.0889, "step": 20700 }, { "epoch": 0.6604164673618419, "grad_norm": 0.3769436180591583, "learning_rate": 0.00044125337959465206, "loss": 1.088, "step": 20710 }, { "epoch": 0.6607353550814759, "grad_norm": 0.3823726177215576, "learning_rate": 0.00043956820088047696, "loss": 1.0515, "step": 20720 }, { "epoch": 0.6610542428011097, "grad_norm": 0.36033788323402405, "learning_rate": 0.0004378894579862412, "loss": 1.0833, "step": 20730 }, { "epoch": 0.6613731305207436, "grad_norm": 0.3760935664176941, "learning_rate": 0.00043621712633308096, "loss": 1.0874, "step": 20740 }, { "epoch": 0.6616920182403776, "grad_norm": 0.3767266571521759, "learning_rate": 0.00043455118143600075, "loss": 1.0913, "step": 20750 }, { "epoch": 0.6620109059600114, "grad_norm": 0.3505248725414276, "learning_rate": 0.00043289159890351497, "loss": 1.0709, "step": 20760 }, { "epoch": 0.6623297936796454, "grad_norm": 0.3828575015068054, "learning_rate": 0.00043123835443729117, "loss": 1.0933, "step": 20770 }, { "epoch": 0.6626486813992793, "grad_norm": 0.38556039333343506, "learning_rate": 0.0004295914238317937, "loss": 1.0806, "step": 20780 }, { "epoch": 0.6629675691189132, "grad_norm": 0.36559784412384033, "learning_rate": 0.00042795078297392985, "loss": 1.0812, "step": 20790 }, { "epoch": 0.6632864568385471, "grad_norm": 0.36894142627716064, "learning_rate": 0.0004263164078426964, "loss": 1.1001, "step": 20800 }, { "epoch": 0.6636053445581811, "grad_norm": 0.37960314750671387, "learning_rate": 0.0004246882745088281, "loss": 1.0672, "step": 20810 }, { "epoch": 0.6639242322778149, "grad_norm": 0.394055038690567, "learning_rate": 0.0004230663591344474, "loss": 1.0909, "step": 20820 }, { "epoch": 0.6642431199974489, "grad_norm": 0.37684398889541626, "learning_rate": 0.00042145063797271515, "loss": 1.071, "step": 20830 }, { "epoch": 0.6645620077170828, "grad_norm": 0.36336904764175415, "learning_rate": 0.00041984108736748337, "loss": 1.0739, "step": 20840 }, { "epoch": 0.6648808954367167, "grad_norm": 0.3808850347995758, "learning_rate": 0.00041823768375294853, "loss": 1.0814, "step": 20850 }, { "epoch": 0.6651997831563506, "grad_norm": 0.36502712965011597, "learning_rate": 0.0004166404036533064, "loss": 1.0715, "step": 20860 }, { "epoch": 0.6655186708759846, "grad_norm": 0.3796021640300751, "learning_rate": 0.0004150492236824088, "loss": 1.0884, "step": 20870 }, { "epoch": 0.6658375585956184, "grad_norm": 0.3758836090564728, "learning_rate": 0.000413464120543421, "loss": 1.076, "step": 20880 }, { "epoch": 0.6661564463152524, "grad_norm": 0.35722362995147705, "learning_rate": 0.00041188507102848026, "loss": 1.073, "step": 20890 }, { "epoch": 0.6664753340348863, "grad_norm": 0.3790309429168701, "learning_rate": 0.00041031205201835666, "loss": 1.0811, "step": 20900 }, { "epoch": 0.6667942217545202, "grad_norm": 0.36754101514816284, "learning_rate": 0.0004087450404821141, "loss": 1.0704, "step": 20910 }, { "epoch": 0.6671131094741541, "grad_norm": 0.3516288995742798, "learning_rate": 0.0004071840134767734, "loss": 1.0562, "step": 20920 }, { "epoch": 0.6674319971937881, "grad_norm": 0.35020914673805237, "learning_rate": 0.0004056289481469763, "loss": 1.0726, "step": 20930 }, { "epoch": 0.6677508849134219, "grad_norm": 0.37123730778694153, "learning_rate": 0.00040407982172465073, "loss": 1.0619, "step": 20940 }, { "epoch": 0.6680697726330559, "grad_norm": 0.376804381608963, "learning_rate": 0.00040253661152867774, "loss": 1.0813, "step": 20950 }, { "epoch": 0.6683886603526898, "grad_norm": 0.36201369762420654, "learning_rate": 0.00040099929496455883, "loss": 1.0784, "step": 20960 }, { "epoch": 0.6687075480723237, "grad_norm": 0.3564017415046692, "learning_rate": 0.0003994678495240859, "loss": 1.0586, "step": 20970 }, { "epoch": 0.6690264357919576, "grad_norm": 0.3649623394012451, "learning_rate": 0.0003979422527850112, "loss": 1.0657, "step": 20980 }, { "epoch": 0.6693453235115916, "grad_norm": 0.3724578022956848, "learning_rate": 0.00039642248241071923, "loss": 1.0699, "step": 20990 }, { "epoch": 0.6696642112312254, "grad_norm": 0.37061256170272827, "learning_rate": 0.0003949085161498995, "loss": 1.0841, "step": 21000 }, { "epoch": 0.6699830989508594, "grad_norm": 0.3638942539691925, "learning_rate": 0.00039340033183622096, "loss": 1.067, "step": 21010 }, { "epoch": 0.6703019866704933, "grad_norm": 0.3802492916584015, "learning_rate": 0.00039189790738800755, "loss": 1.0667, "step": 21020 }, { "epoch": 0.6706208743901272, "grad_norm": 0.3844676911830902, "learning_rate": 0.00039040122080791445, "loss": 1.0759, "step": 21030 }, { "epoch": 0.6709397621097611, "grad_norm": 0.36140817403793335, "learning_rate": 0.0003889102501826066, "loss": 1.0672, "step": 21040 }, { "epoch": 0.6712586498293951, "grad_norm": 0.355047345161438, "learning_rate": 0.00038742497368243723, "loss": 1.0726, "step": 21050 }, { "epoch": 0.6715775375490289, "grad_norm": 0.39054790139198303, "learning_rate": 0.00038594536956112894, "loss": 1.0715, "step": 21060 }, { "epoch": 0.6718964252686629, "grad_norm": 0.3946945071220398, "learning_rate": 0.00038447141615545455, "loss": 1.0707, "step": 21070 }, { "epoch": 0.6722153129882968, "grad_norm": 0.3749053478240967, "learning_rate": 0.00038300309188492066, "loss": 1.0905, "step": 21080 }, { "epoch": 0.6725342007079307, "grad_norm": 0.36572372913360596, "learning_rate": 0.00038154037525145126, "loss": 1.0643, "step": 21090 }, { "epoch": 0.6728530884275646, "grad_norm": 0.3694365620613098, "learning_rate": 0.0003800832448390728, "loss": 1.0378, "step": 21100 }, { "epoch": 0.6731719761471986, "grad_norm": 0.36773478984832764, "learning_rate": 0.00037863167931360134, "loss": 1.0899, "step": 21110 }, { "epoch": 0.6734908638668324, "grad_norm": 0.3820645809173584, "learning_rate": 0.00037718565742232924, "loss": 1.0651, "step": 21120 }, { "epoch": 0.6738097515864664, "grad_norm": 0.3728492259979248, "learning_rate": 0.0003757451579937149, "loss": 1.0503, "step": 21130 }, { "epoch": 0.6741286393061003, "grad_norm": 0.3656805753707886, "learning_rate": 0.00037431015993707214, "loss": 1.0667, "step": 21140 }, { "epoch": 0.6744475270257342, "grad_norm": 0.3748362958431244, "learning_rate": 0.00037288064224226184, "loss": 1.0628, "step": 21150 }, { "epoch": 0.6747664147453681, "grad_norm": 0.36744022369384766, "learning_rate": 0.00037145658397938383, "loss": 1.086, "step": 21160 }, { "epoch": 0.6750853024650021, "grad_norm": 0.3650926351547241, "learning_rate": 0.0003700379642984711, "loss": 1.0481, "step": 21170 }, { "epoch": 0.675404190184636, "grad_norm": 0.3765900135040283, "learning_rate": 0.0003686247624291839, "loss": 1.0837, "step": 21180 }, { "epoch": 0.6757230779042699, "grad_norm": 0.37482020258903503, "learning_rate": 0.000367216957680506, "loss": 1.0429, "step": 21190 }, { "epoch": 0.6760419656239038, "grad_norm": 0.3568904995918274, "learning_rate": 0.00036581452944044174, "loss": 1.0468, "step": 21200 }, { "epoch": 0.6763608533435378, "grad_norm": 0.35615864396095276, "learning_rate": 0.0003644174571757142, "loss": 1.0304, "step": 21210 }, { "epoch": 0.6766797410631716, "grad_norm": 0.359070360660553, "learning_rate": 0.00036302572043146427, "loss": 1.0553, "step": 21220 }, { "epoch": 0.6769986287828056, "grad_norm": 0.35599997639656067, "learning_rate": 0.00036163929883095176, "loss": 1.0406, "step": 21230 }, { "epoch": 0.6773175165024395, "grad_norm": 0.3729799687862396, "learning_rate": 0.0003602581720752565, "loss": 1.0523, "step": 21240 }, { "epoch": 0.6776364042220734, "grad_norm": 0.35726866126060486, "learning_rate": 0.00035888231994298154, "loss": 1.0702, "step": 21250 }, { "epoch": 0.6779552919417073, "grad_norm": 0.36645516753196716, "learning_rate": 0.00035751172228995677, "loss": 1.051, "step": 21260 }, { "epoch": 0.6782741796613413, "grad_norm": 0.37250199913978577, "learning_rate": 0.00035614635904894417, "loss": 1.0621, "step": 21270 }, { "epoch": 0.6785930673809751, "grad_norm": 0.3667907118797302, "learning_rate": 0.000354786210229344, "loss": 1.0707, "step": 21280 }, { "epoch": 0.6789119551006091, "grad_norm": 0.35865122079849243, "learning_rate": 0.0003534312559169021, "loss": 1.0424, "step": 21290 }, { "epoch": 0.679230842820243, "grad_norm": 0.36637625098228455, "learning_rate": 0.00035208147627341824, "loss": 1.0598, "step": 21300 }, { "epoch": 0.6795497305398769, "grad_norm": 0.36743950843811035, "learning_rate": 0.0003507368515364557, "loss": 1.0601, "step": 21310 }, { "epoch": 0.6798686182595108, "grad_norm": 0.3567495048046112, "learning_rate": 0.00034939736201905193, "loss": 1.05, "step": 21320 }, { "epoch": 0.6801875059791448, "grad_norm": 0.3582441806793213, "learning_rate": 0.0003480629881094304, "loss": 1.0754, "step": 21330 }, { "epoch": 0.6805063936987786, "grad_norm": 0.36522796750068665, "learning_rate": 0.0003467337102707133, "loss": 1.0412, "step": 21340 }, { "epoch": 0.6808252814184126, "grad_norm": 0.3645346462726593, "learning_rate": 0.0003454095090406356, "loss": 1.0676, "step": 21350 }, { "epoch": 0.6811441691380465, "grad_norm": 0.36625224351882935, "learning_rate": 0.00034409036503126023, "loss": 1.0462, "step": 21360 }, { "epoch": 0.6814630568576804, "grad_norm": 0.3716904819011688, "learning_rate": 0.00034277625892869374, "loss": 1.0536, "step": 21370 }, { "epoch": 0.6817819445773143, "grad_norm": 0.36496251821517944, "learning_rate": 0.0003414671714928041, "loss": 1.0798, "step": 21380 }, { "epoch": 0.6821008322969483, "grad_norm": 0.386317640542984, "learning_rate": 0.00034016308355693865, "loss": 1.0535, "step": 21390 }, { "epoch": 0.6824197200165821, "grad_norm": 0.3683474361896515, "learning_rate": 0.0003388639760276436, "loss": 1.0552, "step": 21400 }, { "epoch": 0.6827386077362161, "grad_norm": 0.3529580533504486, "learning_rate": 0.0003375698298843843, "loss": 1.0574, "step": 21410 }, { "epoch": 0.68305749545585, "grad_norm": 0.3774348795413971, "learning_rate": 0.00033628062617926724, "loss": 1.0483, "step": 21420 }, { "epoch": 0.6833763831754839, "grad_norm": 0.3695205748081207, "learning_rate": 0.0003349963460367619, "loss": 1.0497, "step": 21430 }, { "epoch": 0.6836952708951178, "grad_norm": 0.3666650950908661, "learning_rate": 0.0003337169706534251, "loss": 1.0553, "step": 21440 }, { "epoch": 0.6840141586147518, "grad_norm": 0.3757452666759491, "learning_rate": 0.00033244248129762514, "loss": 1.0438, "step": 21450 }, { "epoch": 0.6843330463343856, "grad_norm": 0.3607637882232666, "learning_rate": 0.00033117285930926786, "loss": 1.0489, "step": 21460 }, { "epoch": 0.6846519340540196, "grad_norm": 0.3701781928539276, "learning_rate": 0.0003299080860995236, "loss": 1.0499, "step": 21470 }, { "epoch": 0.6849708217736535, "grad_norm": 0.3538482189178467, "learning_rate": 0.00032864814315055425, "loss": 1.0465, "step": 21480 }, { "epoch": 0.6852897094932874, "grad_norm": 0.3631685972213745, "learning_rate": 0.00032739301201524317, "loss": 1.0559, "step": 21490 }, { "epoch": 0.6856085972129213, "grad_norm": 0.3718867301940918, "learning_rate": 0.0003261426743169244, "loss": 1.0677, "step": 21500 }, { "epoch": 0.6859274849325553, "grad_norm": 0.35852405428886414, "learning_rate": 0.0003248971117491138, "loss": 1.0414, "step": 21510 }, { "epoch": 0.6862463726521891, "grad_norm": 0.3558112680912018, "learning_rate": 0.00032365630607524107, "loss": 1.0516, "step": 21520 }, { "epoch": 0.6865652603718231, "grad_norm": 0.3524588644504547, "learning_rate": 0.00032242023912838264, "loss": 1.0414, "step": 21530 }, { "epoch": 0.686884148091457, "grad_norm": 0.3678439259529114, "learning_rate": 0.00032118889281099574, "loss": 1.054, "step": 21540 }, { "epoch": 0.6872030358110909, "grad_norm": 0.34993577003479004, "learning_rate": 0.0003199622490946535, "loss": 1.0423, "step": 21550 }, { "epoch": 0.6875219235307248, "grad_norm": 0.36512652039527893, "learning_rate": 0.0003187402900197809, "loss": 1.0519, "step": 21560 }, { "epoch": 0.6878408112503588, "grad_norm": 0.37657028436660767, "learning_rate": 0.0003175229976953918, "loss": 1.0358, "step": 21570 }, { "epoch": 0.6881596989699926, "grad_norm": 0.3696797788143158, "learning_rate": 0.000316310354298827, "loss": 1.0247, "step": 21580 }, { "epoch": 0.6884785866896266, "grad_norm": 0.3626907467842102, "learning_rate": 0.00031510234207549347, "loss": 1.0446, "step": 21590 }, { "epoch": 0.6887974744092605, "grad_norm": 0.36002790927886963, "learning_rate": 0.0003138989433386042, "loss": 1.0376, "step": 21600 }, { "epoch": 0.6891163621288944, "grad_norm": 0.3639136552810669, "learning_rate": 0.0003127001404689193, "loss": 1.0464, "step": 21610 }, { "epoch": 0.6894352498485283, "grad_norm": 0.37046611309051514, "learning_rate": 0.0003115059159144878, "loss": 1.05, "step": 21620 }, { "epoch": 0.6897541375681623, "grad_norm": 0.3616935610771179, "learning_rate": 0.0003103162521903914, "loss": 1.0301, "step": 21630 }, { "epoch": 0.6900730252877961, "grad_norm": 0.37073081731796265, "learning_rate": 0.00030913113187848755, "loss": 1.0426, "step": 21640 }, { "epoch": 0.6903919130074301, "grad_norm": 0.36260053515434265, "learning_rate": 0.00030795053762715507, "loss": 1.0484, "step": 21650 }, { "epoch": 0.690710800727064, "grad_norm": 0.35945361852645874, "learning_rate": 0.00030677445215103975, "loss": 1.0465, "step": 21660 }, { "epoch": 0.6910296884466979, "grad_norm": 0.3656920790672302, "learning_rate": 0.00030560285823080156, "loss": 1.0339, "step": 21670 }, { "epoch": 0.6913485761663318, "grad_norm": 0.3640262186527252, "learning_rate": 0.00030443573871286224, "loss": 1.0495, "step": 21680 }, { "epoch": 0.6916674638859658, "grad_norm": 0.3602169454097748, "learning_rate": 0.0003032730765091543, "loss": 1.0446, "step": 21690 }, { "epoch": 0.6919863516055996, "grad_norm": 0.36435046792030334, "learning_rate": 0.000302114854596871, "loss": 1.0479, "step": 21700 }, { "epoch": 0.6923052393252336, "grad_norm": 0.3639476001262665, "learning_rate": 0.00030096105601821663, "loss": 1.0413, "step": 21710 }, { "epoch": 0.6926241270448675, "grad_norm": 0.37392833828926086, "learning_rate": 0.0002998116638801587, "loss": 1.0367, "step": 21720 }, { "epoch": 0.6929430147645014, "grad_norm": 0.3621789216995239, "learning_rate": 0.0002986666613541806, "loss": 1.0523, "step": 21730 }, { "epoch": 0.6932619024841353, "grad_norm": 0.36604568362236023, "learning_rate": 0.00029752603167603484, "loss": 1.0348, "step": 21740 }, { "epoch": 0.6935807902037693, "grad_norm": 0.351910799741745, "learning_rate": 0.00029638975814549775, "loss": 1.0403, "step": 21750 }, { "epoch": 0.6938996779234031, "grad_norm": 0.35878685116767883, "learning_rate": 0.0002952578241261252, "loss": 1.0458, "step": 21760 }, { "epoch": 0.6942185656430371, "grad_norm": 0.36928388476371765, "learning_rate": 0.00029413021304500875, "loss": 1.0391, "step": 21770 }, { "epoch": 0.694537453362671, "grad_norm": 0.3780742883682251, "learning_rate": 0.0002930069083925332, "loss": 1.0471, "step": 21780 }, { "epoch": 0.6948563410823049, "grad_norm": 0.36558210849761963, "learning_rate": 0.0002918878937221347, "loss": 1.0406, "step": 21790 }, { "epoch": 0.6951752288019388, "grad_norm": 0.36797404289245605, "learning_rate": 0.00029077315265005994, "loss": 1.0435, "step": 21800 }, { "epoch": 0.6954941165215728, "grad_norm": 0.363120436668396, "learning_rate": 0.00028966266885512655, "loss": 1.0339, "step": 21810 }, { "epoch": 0.6958130042412066, "grad_norm": 0.36143240332603455, "learning_rate": 0.0002885564260784838, "loss": 1.0464, "step": 21820 }, { "epoch": 0.6961318919608406, "grad_norm": 0.36355921626091003, "learning_rate": 0.00028745440812337464, "loss": 1.0251, "step": 21830 }, { "epoch": 0.6964507796804745, "grad_norm": 0.3523947596549988, "learning_rate": 0.00028635659885489876, "loss": 1.0296, "step": 21840 }, { "epoch": 0.6967696674001084, "grad_norm": 0.36901727318763733, "learning_rate": 0.00028526298219977615, "loss": 1.0425, "step": 21850 }, { "epoch": 0.6970885551197423, "grad_norm": 0.371360182762146, "learning_rate": 0.0002841735421461118, "loss": 1.0334, "step": 21860 }, { "epoch": 0.6974074428393763, "grad_norm": 0.35016298294067383, "learning_rate": 0.00028308826274316135, "loss": 1.0492, "step": 21870 }, { "epoch": 0.6977263305590101, "grad_norm": 0.3725981116294861, "learning_rate": 0.00028200712810109736, "loss": 1.0343, "step": 21880 }, { "epoch": 0.6980452182786441, "grad_norm": 0.36036965250968933, "learning_rate": 0.00028093012239077697, "loss": 1.0412, "step": 21890 }, { "epoch": 0.698364105998278, "grad_norm": 0.3615533411502838, "learning_rate": 0.0002798572298435098, "loss": 1.0385, "step": 21900 }, { "epoch": 0.6986829937179119, "grad_norm": 0.34284889698028564, "learning_rate": 0.00027878843475082743, "loss": 1.0215, "step": 21910 }, { "epoch": 0.6990018814375458, "grad_norm": 0.35769203305244446, "learning_rate": 0.00027772372146425304, "loss": 1.0215, "step": 21920 }, { "epoch": 0.6993207691571798, "grad_norm": 0.3554302155971527, "learning_rate": 0.00027666307439507253, "loss": 1.0346, "step": 21930 }, { "epoch": 0.6996396568768136, "grad_norm": 0.35582250356674194, "learning_rate": 0.0002756064780141064, "loss": 1.0164, "step": 21940 }, { "epoch": 0.6999585445964476, "grad_norm": 0.35530009865760803, "learning_rate": 0.00027455391685148186, "loss": 1.0241, "step": 21950 }, { "epoch": 0.7002774323160815, "grad_norm": 0.3660754859447479, "learning_rate": 0.0002735053754964071, "loss": 1.0366, "step": 21960 }, { "epoch": 0.7005963200357154, "grad_norm": 0.36314699053764343, "learning_rate": 0.00027246083859694485, "loss": 1.0118, "step": 21970 }, { "epoch": 0.7009152077553493, "grad_norm": 0.3499937355518341, "learning_rate": 0.0002714202908597884, "loss": 1.0371, "step": 21980 }, { "epoch": 0.7012340954749833, "grad_norm": 0.3634529411792755, "learning_rate": 0.00027038371705003693, "loss": 1.0425, "step": 21990 }, { "epoch": 0.7015529831946172, "grad_norm": 0.3665272295475006, "learning_rate": 0.0002693511019909731, "loss": 1.0308, "step": 22000 }, { "epoch": 0.7018718709142511, "grad_norm": 0.3651745617389679, "learning_rate": 0.0002683224305638403, "loss": 1.0397, "step": 22010 }, { "epoch": 0.702190758633885, "grad_norm": 0.371563196182251, "learning_rate": 0.0002672976877076218, "loss": 1.0231, "step": 22020 }, { "epoch": 0.702509646353519, "grad_norm": 0.3721901476383209, "learning_rate": 0.0002662768584188198, "loss": 1.0377, "step": 22030 }, { "epoch": 0.7028285340731528, "grad_norm": 0.36019212007522583, "learning_rate": 0.0002652599277512359, "loss": 1.0302, "step": 22040 }, { "epoch": 0.7031474217927868, "grad_norm": 0.3766479194164276, "learning_rate": 0.00026424688081575263, "loss": 1.0314, "step": 22050 }, { "epoch": 0.7034663095124207, "grad_norm": 0.34915393590927124, "learning_rate": 0.0002632377027801149, "loss": 1.0087, "step": 22060 }, { "epoch": 0.7037851972320546, "grad_norm": 0.36800262331962585, "learning_rate": 0.000262232378868713, "loss": 1.0457, "step": 22070 }, { "epoch": 0.7041040849516885, "grad_norm": 0.3524855673313141, "learning_rate": 0.00026123089436236655, "loss": 1.0279, "step": 22080 }, { "epoch": 0.7044229726713225, "grad_norm": 0.3520108461380005, "learning_rate": 0.0002602332345981087, "loss": 1.0168, "step": 22090 }, { "epoch": 0.7047418603909563, "grad_norm": 0.3598593771457672, "learning_rate": 0.00025923938496897155, "loss": 1.0387, "step": 22100 }, { "epoch": 0.7050607481105903, "grad_norm": 0.37009191513061523, "learning_rate": 0.0002582493309237722, "loss": 1.0246, "step": 22110 }, { "epoch": 0.7053796358302242, "grad_norm": 0.35536834597587585, "learning_rate": 0.0002572630579669, "loss": 1.0162, "step": 22120 }, { "epoch": 0.7056985235498581, "grad_norm": 0.3501632511615753, "learning_rate": 0.0002562805516581037, "loss": 1.0323, "step": 22130 }, { "epoch": 0.706017411269492, "grad_norm": 0.3461703658103943, "learning_rate": 0.00025530179761228084, "loss": 1.0198, "step": 22140 }, { "epoch": 0.706336298989126, "grad_norm": 0.34733515977859497, "learning_rate": 0.00025432678149926643, "loss": 1.0226, "step": 22150 }, { "epoch": 0.7066551867087598, "grad_norm": 0.3601519465446472, "learning_rate": 0.00025335548904362355, "loss": 1.0376, "step": 22160 }, { "epoch": 0.7069740744283938, "grad_norm": 0.3628826141357422, "learning_rate": 0.0002523879060244341, "loss": 1.0034, "step": 22170 }, { "epoch": 0.7072929621480277, "grad_norm": 0.3661800026893616, "learning_rate": 0.00025142401827509084, "loss": 1.0231, "step": 22180 }, { "epoch": 0.7076118498676616, "grad_norm": 0.36240875720977783, "learning_rate": 0.00025046381168308975, "loss": 1.032, "step": 22190 }, { "epoch": 0.7079307375872955, "grad_norm": 0.35451745986938477, "learning_rate": 0.00024950727218982346, "loss": 1.0213, "step": 22200 }, { "epoch": 0.7082496253069295, "grad_norm": 0.3512251079082489, "learning_rate": 0.00024855438579037556, "loss": 1.024, "step": 22210 }, { "epoch": 0.7085685130265633, "grad_norm": 0.349729984998703, "learning_rate": 0.0002476051385333152, "loss": 1.0192, "step": 22220 }, { "epoch": 0.7088874007461973, "grad_norm": 0.3506690561771393, "learning_rate": 0.0002466595165204934, "loss": 1.0379, "step": 22230 }, { "epoch": 0.7092062884658312, "grad_norm": 0.368215411901474, "learning_rate": 0.000245717505906839, "loss": 1.0402, "step": 22240 }, { "epoch": 0.7095251761854651, "grad_norm": 0.36005720496177673, "learning_rate": 0.00024477909290015614, "loss": 1.014, "step": 22250 }, { "epoch": 0.709844063905099, "grad_norm": 0.34494173526763916, "learning_rate": 0.00024384426376092245, "loss": 1.0154, "step": 22260 }, { "epoch": 0.710162951624733, "grad_norm": 0.3667897582054138, "learning_rate": 0.00024291300480208775, "loss": 1.0186, "step": 22270 }, { "epoch": 0.7104818393443668, "grad_norm": 0.36466264724731445, "learning_rate": 0.00024198530238887377, "loss": 1.0204, "step": 22280 }, { "epoch": 0.7108007270640008, "grad_norm": 0.35865655541419983, "learning_rate": 0.00024106114293857438, "loss": 1.0302, "step": 22290 }, { "epoch": 0.7111196147836347, "grad_norm": 0.3657973110675812, "learning_rate": 0.00024014051292035676, "loss": 1.0283, "step": 22300 }, { "epoch": 0.7114385025032686, "grad_norm": 0.36389386653900146, "learning_rate": 0.00023922339885506342, "loss": 1.0234, "step": 22310 }, { "epoch": 0.7117573902229025, "grad_norm": 0.3584628105163574, "learning_rate": 0.00023830978731501472, "loss": 1.0254, "step": 22320 }, { "epoch": 0.7120762779425365, "grad_norm": 0.36497706174850464, "learning_rate": 0.00023739966492381225, "loss": 1.0239, "step": 22330 }, { "epoch": 0.7123951656621703, "grad_norm": 0.36257028579711914, "learning_rate": 0.0002364930183561431, "loss": 1.0161, "step": 22340 }, { "epoch": 0.7127140533818043, "grad_norm": 0.3534430265426636, "learning_rate": 0.00023558983433758467, "loss": 1.0032, "step": 22350 }, { "epoch": 0.7130329411014382, "grad_norm": 0.36840540170669556, "learning_rate": 0.0002346900996444103, "loss": 1.0212, "step": 22360 }, { "epoch": 0.7133518288210721, "grad_norm": 0.35739049315452576, "learning_rate": 0.00023379380110339585, "loss": 1.0179, "step": 22370 }, { "epoch": 0.713670716540706, "grad_norm": 0.3666969835758209, "learning_rate": 0.00023290092559162653, "loss": 1.0044, "step": 22380 }, { "epoch": 0.71398960426034, "grad_norm": 0.35255569219589233, "learning_rate": 0.0002320114600363049, "loss": 1.01, "step": 22390 }, { "epoch": 0.7143084919799738, "grad_norm": 0.3584805130958557, "learning_rate": 0.0002311253914145597, "loss": 0.9974, "step": 22400 }, { "epoch": 0.7146273796996078, "grad_norm": 0.3672718405723572, "learning_rate": 0.00023024270675325468, "loss": 1.033, "step": 22410 }, { "epoch": 0.7149462674192417, "grad_norm": 0.3601375222206116, "learning_rate": 0.0002293633931287991, "loss": 1.0273, "step": 22420 }, { "epoch": 0.7152651551388756, "grad_norm": 0.35628193616867065, "learning_rate": 0.00022848743766695835, "loss": 1.0063, "step": 22430 }, { "epoch": 0.7155840428585095, "grad_norm": 0.3655938506126404, "learning_rate": 0.00022761482754266545, "loss": 1.0111, "step": 22440 }, { "epoch": 0.7159029305781435, "grad_norm": 0.37356406450271606, "learning_rate": 0.00022674554997983317, "loss": 1.0128, "step": 22450 }, { "epoch": 0.7162218182977773, "grad_norm": 0.3504394590854645, "learning_rate": 0.0002258795922511673, "loss": 1.0223, "step": 22460 }, { "epoch": 0.7165407060174113, "grad_norm": 0.359099417924881, "learning_rate": 0.00022501694167797987, "loss": 1.0093, "step": 22470 }, { "epoch": 0.7168595937370452, "grad_norm": 0.3635914921760559, "learning_rate": 0.00022415758563000388, "loss": 1.0067, "step": 22480 }, { "epoch": 0.7171784814566791, "grad_norm": 0.3575493097305298, "learning_rate": 0.0002233015115252082, "loss": 1.0181, "step": 22490 }, { "epoch": 0.717497369176313, "grad_norm": 0.3696117401123047, "learning_rate": 0.00022244870682961338, "loss": 1.0185, "step": 22500 }, { "epoch": 0.717816256895947, "grad_norm": 0.3546154201030731, "learning_rate": 0.00022159915905710817, "loss": 1.0165, "step": 22510 }, { "epoch": 0.7181351446155808, "grad_norm": 0.35239526629447937, "learning_rate": 0.00022075285576926662, "loss": 1.005, "step": 22520 }, { "epoch": 0.7184540323352148, "grad_norm": 0.3775861859321594, "learning_rate": 0.00021990978457516616, "loss": 1.0082, "step": 22530 }, { "epoch": 0.7187729200548487, "grad_norm": 0.3526906371116638, "learning_rate": 0.00021906993313120594, "loss": 1.0081, "step": 22540 }, { "epoch": 0.7190918077744826, "grad_norm": 0.36010169982910156, "learning_rate": 0.00021823328914092629, "loss": 1.0089, "step": 22550 }, { "epoch": 0.7194106954941165, "grad_norm": 0.3513311445713043, "learning_rate": 0.00021739984035482846, "loss": 1.0087, "step": 22560 }, { "epoch": 0.7197295832137505, "grad_norm": 0.36615613102912903, "learning_rate": 0.00021656957457019568, "loss": 1.0124, "step": 22570 }, { "epoch": 0.7200484709333843, "grad_norm": 0.36499765515327454, "learning_rate": 0.00021574247963091408, "loss": 1.0119, "step": 22580 }, { "epoch": 0.7203673586530183, "grad_norm": 0.37087002396583557, "learning_rate": 0.00021491854342729487, "loss": 1.0047, "step": 22590 }, { "epoch": 0.7206862463726522, "grad_norm": 0.3546287417411804, "learning_rate": 0.00021409775389589705, "loss": 1.0131, "step": 22600 }, { "epoch": 0.7210051340922861, "grad_norm": 0.3611092269420624, "learning_rate": 0.00021328009901935087, "loss": 0.9961, "step": 22610 }, { "epoch": 0.72132402181192, "grad_norm": 0.3478728234767914, "learning_rate": 0.00021246556682618162, "loss": 1.0134, "step": 22620 }, { "epoch": 0.721642909531554, "grad_norm": 0.3665063977241516, "learning_rate": 0.0002116541453906347, "loss": 1.0069, "step": 22630 }, { "epoch": 0.7219617972511878, "grad_norm": 0.35230499505996704, "learning_rate": 0.0002108458228325007, "loss": 0.9957, "step": 22640 }, { "epoch": 0.7222806849708218, "grad_norm": 0.3609510362148285, "learning_rate": 0.0002100405873169417, "loss": 1.0128, "step": 22650 }, { "epoch": 0.7225995726904557, "grad_norm": 0.38953086733818054, "learning_rate": 0.0002092384270543178, "loss": 1.0046, "step": 22660 }, { "epoch": 0.7229184604100896, "grad_norm": 0.36705484986305237, "learning_rate": 0.00020843933030001454, "loss": 0.9997, "step": 22670 }, { "epoch": 0.7232373481297235, "grad_norm": 0.35538023710250854, "learning_rate": 0.00020764328535427122, "loss": 1.005, "step": 22680 }, { "epoch": 0.7235562358493575, "grad_norm": 0.3577936589717865, "learning_rate": 0.0002068502805620091, "loss": 1.0213, "step": 22690 }, { "epoch": 0.7238751235689913, "grad_norm": 0.3556760549545288, "learning_rate": 0.0002060603043126613, "loss": 1.0144, "step": 22700 }, { "epoch": 0.7241940112886253, "grad_norm": 0.37856200337409973, "learning_rate": 0.00020527334504000228, "loss": 1.0109, "step": 22710 }, { "epoch": 0.7245128990082592, "grad_norm": 0.3712582290172577, "learning_rate": 0.00020448939122197894, "loss": 0.997, "step": 22720 }, { "epoch": 0.7248317867278931, "grad_norm": 0.3549712598323822, "learning_rate": 0.00020370843138054165, "loss": 1.0027, "step": 22730 }, { "epoch": 0.725150674447527, "grad_norm": 0.37729141116142273, "learning_rate": 0.00020293045408147637, "loss": 0.9918, "step": 22740 }, { "epoch": 0.725469562167161, "grad_norm": 0.36283665895462036, "learning_rate": 0.00020215544793423702, "loss": 0.9999, "step": 22750 }, { "epoch": 0.7257884498867948, "grad_norm": 0.37182876467704773, "learning_rate": 0.00020138340159177898, "loss": 1.0097, "step": 22760 }, { "epoch": 0.7261073376064288, "grad_norm": 0.36891797184944153, "learning_rate": 0.00020061430375039268, "loss": 1.0067, "step": 22770 }, { "epoch": 0.7264262253260627, "grad_norm": 0.36455395817756653, "learning_rate": 0.00019984814314953845, "loss": 0.9968, "step": 22780 }, { "epoch": 0.7267451130456966, "grad_norm": 0.36560770869255066, "learning_rate": 0.0001990849085716812, "loss": 1.0218, "step": 22790 }, { "epoch": 0.7270640007653305, "grad_norm": 0.3545674681663513, "learning_rate": 0.0001983245888421265, "loss": 1.001, "step": 22800 }, { "epoch": 0.7273828884849645, "grad_norm": 0.3545055687427521, "learning_rate": 0.000197567172828857, "loss": 1.0128, "step": 22810 }, { "epoch": 0.7277017762045984, "grad_norm": 0.36697012186050415, "learning_rate": 0.0001968126494423692, "loss": 1.0175, "step": 22820 }, { "epoch": 0.7280206639242323, "grad_norm": 0.35822004079818726, "learning_rate": 0.0001960610076355113, "loss": 1.0116, "step": 22830 }, { "epoch": 0.7283395516438662, "grad_norm": 0.3711821436882019, "learning_rate": 0.00019531223640332132, "loss": 1.0206, "step": 22840 }, { "epoch": 0.7286584393635002, "grad_norm": 0.3546949028968811, "learning_rate": 0.00019456632478286607, "loss": 1.0219, "step": 22850 }, { "epoch": 0.728977327083134, "grad_norm": 0.36350348591804504, "learning_rate": 0.00019382326185308054, "loss": 0.9975, "step": 22860 }, { "epoch": 0.729296214802768, "grad_norm": 0.3645276725292206, "learning_rate": 0.00019308303673460815, "loss": 1.0059, "step": 22870 }, { "epoch": 0.7296151025224019, "grad_norm": 0.3612558841705322, "learning_rate": 0.0001923456385896413, "loss": 0.9885, "step": 22880 }, { "epoch": 0.7299339902420358, "grad_norm": 0.35814574360847473, "learning_rate": 0.0001916110566217628, "loss": 0.9994, "step": 22890 }, { "epoch": 0.7302528779616697, "grad_norm": 0.36161062121391296, "learning_rate": 0.00019087928007578776, "loss": 1.0177, "step": 22900 }, { "epoch": 0.7305717656813037, "grad_norm": 0.350792795419693, "learning_rate": 0.0001901502982376061, "loss": 0.9979, "step": 22910 }, { "epoch": 0.7308906534009375, "grad_norm": 0.35586076974868774, "learning_rate": 0.00018942410043402573, "loss": 0.9949, "step": 22920 }, { "epoch": 0.7312095411205715, "grad_norm": 0.3612673282623291, "learning_rate": 0.0001887006760326162, "loss": 1.0101, "step": 22930 }, { "epoch": 0.7315284288402054, "grad_norm": 0.35979345440864563, "learning_rate": 0.00018798001444155318, "loss": 0.9986, "step": 22940 }, { "epoch": 0.7318473165598393, "grad_norm": 0.3546121418476105, "learning_rate": 0.00018726210510946323, "loss": 0.9955, "step": 22950 }, { "epoch": 0.7321662042794732, "grad_norm": 0.35720688104629517, "learning_rate": 0.00018654693752526939, "loss": 0.9991, "step": 22960 }, { "epoch": 0.7324850919991072, "grad_norm": 0.36118242144584656, "learning_rate": 0.00018583450121803722, "loss": 0.986, "step": 22970 }, { "epoch": 0.732803979718741, "grad_norm": 0.36114799976348877, "learning_rate": 0.00018512478575682153, "loss": 0.9957, "step": 22980 }, { "epoch": 0.733122867438375, "grad_norm": 0.35737699270248413, "learning_rate": 0.00018441778075051386, "loss": 0.9852, "step": 22990 }, { "epoch": 0.7334417551580089, "grad_norm": 0.3749620020389557, "learning_rate": 0.00018371347584768985, "loss": 1.0069, "step": 23000 }, { "epoch": 0.7337606428776428, "grad_norm": 0.35069161653518677, "learning_rate": 0.0001830118607364582, "loss": 0.9929, "step": 23010 }, { "epoch": 0.7340795305972767, "grad_norm": 0.36476099491119385, "learning_rate": 0.00018231292514430937, "loss": 1.0061, "step": 23020 }, { "epoch": 0.7343984183169107, "grad_norm": 0.35989847779273987, "learning_rate": 0.00018161665883796536, "loss": 1.0136, "step": 23030 }, { "epoch": 0.7347173060365445, "grad_norm": 0.353773295879364, "learning_rate": 0.0001809230516232297, "loss": 0.9859, "step": 23040 }, { "epoch": 0.7350361937561785, "grad_norm": 0.3702070713043213, "learning_rate": 0.0001802320933448384, "loss": 1.0134, "step": 23050 }, { "epoch": 0.7353550814758124, "grad_norm": 0.37036627531051636, "learning_rate": 0.0001795437738863111, "loss": 1.001, "step": 23060 }, { "epoch": 0.7356739691954463, "grad_norm": 0.355307936668396, "learning_rate": 0.000178858083169803, "loss": 0.9962, "step": 23070 }, { "epoch": 0.7359928569150802, "grad_norm": 0.35053226351737976, "learning_rate": 0.00017817501115595735, "loss": 1.0033, "step": 23080 }, { "epoch": 0.7363117446347142, "grad_norm": 0.3521139919757843, "learning_rate": 0.0001774945478437585, "loss": 0.9785, "step": 23090 }, { "epoch": 0.736630632354348, "grad_norm": 0.36836716532707214, "learning_rate": 0.00017681668327038525, "loss": 0.9966, "step": 23100 }, { "epoch": 0.736949520073982, "grad_norm": 0.3591693937778473, "learning_rate": 0.00017614140751106525, "loss": 0.9818, "step": 23110 }, { "epoch": 0.7372684077936159, "grad_norm": 0.36045387387275696, "learning_rate": 0.00017546871067892955, "loss": 0.9973, "step": 23120 }, { "epoch": 0.7375872955132498, "grad_norm": 0.3539069592952728, "learning_rate": 0.00017479858292486792, "loss": 0.99, "step": 23130 }, { "epoch": 0.7379061832328837, "grad_norm": 0.37071090936660767, "learning_rate": 0.0001741310144373845, "loss": 1.0186, "step": 23140 }, { "epoch": 0.7382250709525177, "grad_norm": 0.34401729702949524, "learning_rate": 0.00017346599544245428, "loss": 1.0128, "step": 23150 }, { "epoch": 0.7385439586721515, "grad_norm": 0.362400621175766, "learning_rate": 0.00017280351620338, "loss": 0.9964, "step": 23160 }, { "epoch": 0.7388628463917855, "grad_norm": 0.3564687669277191, "learning_rate": 0.00017214356702064952, "loss": 1.0032, "step": 23170 }, { "epoch": 0.7391817341114194, "grad_norm": 0.35708966851234436, "learning_rate": 0.00017148613823179387, "loss": 0.9913, "step": 23180 }, { "epoch": 0.7395006218310533, "grad_norm": 0.3605559170246124, "learning_rate": 0.00017083122021124575, "loss": 0.9906, "step": 23190 }, { "epoch": 0.7398195095506872, "grad_norm": 0.36472660303115845, "learning_rate": 0.00017017880337019854, "loss": 0.9896, "step": 23200 }, { "epoch": 0.7401383972703212, "grad_norm": 0.3542068302631378, "learning_rate": 0.00016952887815646604, "loss": 0.9998, "step": 23210 }, { "epoch": 0.740457284989955, "grad_norm": 0.3632705807685852, "learning_rate": 0.0001688814350543425, "loss": 0.9837, "step": 23220 }, { "epoch": 0.740776172709589, "grad_norm": 0.3601367473602295, "learning_rate": 0.00016823646458446337, "loss": 0.9773, "step": 23230 }, { "epoch": 0.7410950604292229, "grad_norm": 0.3728727102279663, "learning_rate": 0.00016759395730366647, "loss": 0.9863, "step": 23240 }, { "epoch": 0.7414139481488567, "grad_norm": 0.3813025951385498, "learning_rate": 0.00016695390380485372, "loss": 0.9903, "step": 23250 }, { "epoch": 0.7417328358684907, "grad_norm": 0.3660871088504791, "learning_rate": 0.00016631629471685345, "loss": 0.9877, "step": 23260 }, { "epoch": 0.7420517235881247, "grad_norm": 0.35876861214637756, "learning_rate": 0.0001656811207042832, "loss": 0.9885, "step": 23270 }, { "epoch": 0.7423706113077585, "grad_norm": 0.3620452284812927, "learning_rate": 0.00016504837246741303, "loss": 0.981, "step": 23280 }, { "epoch": 0.7426894990273925, "grad_norm": 0.3587225675582886, "learning_rate": 0.00016441804074202924, "loss": 0.9838, "step": 23290 }, { "epoch": 0.7430083867470264, "grad_norm": 0.3670322895050049, "learning_rate": 0.00016379011629929894, "loss": 1.0061, "step": 23300 }, { "epoch": 0.7433272744666602, "grad_norm": 0.36198362708091736, "learning_rate": 0.00016316458994563475, "loss": 0.9907, "step": 23310 }, { "epoch": 0.7436461621862942, "grad_norm": 0.3827613890171051, "learning_rate": 0.00016254145252256035, "loss": 1.0024, "step": 23320 }, { "epoch": 0.7439650499059282, "grad_norm": 0.3546290993690491, "learning_rate": 0.0001619206949065762, "loss": 0.999, "step": 23330 }, { "epoch": 0.744283937625562, "grad_norm": 0.36129066348075867, "learning_rate": 0.00016130230800902616, "loss": 0.9967, "step": 23340 }, { "epoch": 0.744602825345196, "grad_norm": 0.356289803981781, "learning_rate": 0.00016068628277596432, "loss": 0.9941, "step": 23350 }, { "epoch": 0.7449217130648299, "grad_norm": 0.3629865348339081, "learning_rate": 0.00016007261018802238, "loss": 0.9751, "step": 23360 }, { "epoch": 0.7452406007844637, "grad_norm": 0.34594714641571045, "learning_rate": 0.00015946128126027772, "loss": 0.9881, "step": 23370 }, { "epoch": 0.7455594885040977, "grad_norm": 0.3698471784591675, "learning_rate": 0.0001588522870421218, "loss": 0.9857, "step": 23380 }, { "epoch": 0.7458783762237317, "grad_norm": 0.37548011541366577, "learning_rate": 0.00015824561861712907, "loss": 0.9918, "step": 23390 }, { "epoch": 0.7461972639433655, "grad_norm": 0.3651987612247467, "learning_rate": 0.00015764126710292645, "loss": 0.9896, "step": 23400 }, { "epoch": 0.7465161516629994, "grad_norm": 0.34970924258232117, "learning_rate": 0.0001570392236510633, "loss": 1.003, "step": 23410 }, { "epoch": 0.7468350393826334, "grad_norm": 0.3621990978717804, "learning_rate": 0.00015643947944688188, "loss": 0.9941, "step": 23420 }, { "epoch": 0.7471539271022672, "grad_norm": 0.35395199060440063, "learning_rate": 0.00015584202570938826, "loss": 0.9834, "step": 23430 }, { "epoch": 0.7474728148219012, "grad_norm": 0.37184348702430725, "learning_rate": 0.00015524685369112376, "loss": 0.9715, "step": 23440 }, { "epoch": 0.7477917025415352, "grad_norm": 0.349755197763443, "learning_rate": 0.00015465395467803683, "loss": 0.976, "step": 23450 }, { "epoch": 0.748110590261169, "grad_norm": 0.37444648146629333, "learning_rate": 0.00015406331998935564, "loss": 1.0032, "step": 23460 }, { "epoch": 0.748429477980803, "grad_norm": 0.36452704668045044, "learning_rate": 0.00015347494097746068, "loss": 0.9924, "step": 23470 }, { "epoch": 0.7487483657004369, "grad_norm": 0.3637312352657318, "learning_rate": 0.0001528888090277585, "loss": 0.9791, "step": 23480 }, { "epoch": 0.7490672534200707, "grad_norm": 0.36109039187431335, "learning_rate": 0.0001523049155585553, "loss": 0.9841, "step": 23490 }, { "epoch": 0.7493861411397047, "grad_norm": 0.36957335472106934, "learning_rate": 0.00015172325202093142, "loss": 0.9906, "step": 23500 }, { "epoch": 0.7497050288593387, "grad_norm": 0.3457266688346863, "learning_rate": 0.00015114380989861618, "loss": 0.9777, "step": 23510 }, { "epoch": 0.7500239165789725, "grad_norm": 0.359375, "learning_rate": 0.0001505665807078631, "loss": 0.9763, "step": 23520 }, { "epoch": 0.7503428042986064, "grad_norm": 0.36092039942741394, "learning_rate": 0.00014999155599732582, "loss": 0.9858, "step": 23530 }, { "epoch": 0.7506616920182404, "grad_norm": 0.34542810916900635, "learning_rate": 0.00014941872734793423, "loss": 0.9811, "step": 23540 }, { "epoch": 0.7509805797378742, "grad_norm": 0.36623185873031616, "learning_rate": 0.0001488480863727712, "loss": 0.9933, "step": 23550 }, { "epoch": 0.7512994674575082, "grad_norm": 0.34604063630104065, "learning_rate": 0.00014827962471694999, "loss": 0.9802, "step": 23560 }, { "epoch": 0.7516183551771422, "grad_norm": 0.36921989917755127, "learning_rate": 0.00014771333405749166, "loss": 0.993, "step": 23570 }, { "epoch": 0.751937242896776, "grad_norm": 0.3590368330478668, "learning_rate": 0.00014714920610320336, "loss": 0.9773, "step": 23580 }, { "epoch": 0.75225613061641, "grad_norm": 0.36252063512802124, "learning_rate": 0.00014658723259455698, "loss": 0.9775, "step": 23590 }, { "epoch": 0.7525750183360439, "grad_norm": 0.36665475368499756, "learning_rate": 0.00014602740530356807, "loss": 0.9747, "step": 23600 }, { "epoch": 0.7528939060556777, "grad_norm": 0.36198699474334717, "learning_rate": 0.00014546971603367545, "loss": 0.9912, "step": 23610 }, { "epoch": 0.7532127937753117, "grad_norm": 0.3599792718887329, "learning_rate": 0.00014491415661962125, "loss": 0.9923, "step": 23620 }, { "epoch": 0.7535316814949456, "grad_norm": 0.36203503608703613, "learning_rate": 0.00014436071892733128, "loss": 0.9944, "step": 23630 }, { "epoch": 0.7538505692145795, "grad_norm": 0.34409478306770325, "learning_rate": 0.00014380939485379596, "loss": 0.9663, "step": 23640 }, { "epoch": 0.7541694569342134, "grad_norm": 0.38052651286125183, "learning_rate": 0.00014326017632695172, "loss": 0.9878, "step": 23650 }, { "epoch": 0.7544883446538474, "grad_norm": 0.3748937249183655, "learning_rate": 0.00014271305530556277, "loss": 0.9844, "step": 23660 }, { "epoch": 0.7548072323734814, "grad_norm": 0.3574012517929077, "learning_rate": 0.00014216802377910344, "loss": 0.9798, "step": 23670 }, { "epoch": 0.7551261200931152, "grad_norm": 0.3533320128917694, "learning_rate": 0.00014162507376764066, "loss": 0.9785, "step": 23680 }, { "epoch": 0.7554450078127491, "grad_norm": 0.3751009702682495, "learning_rate": 0.0001410841973217176, "loss": 0.9992, "step": 23690 }, { "epoch": 0.7557638955323831, "grad_norm": 0.35940220952033997, "learning_rate": 0.0001405453865222367, "loss": 0.9833, "step": 23700 }, { "epoch": 0.7560827832520169, "grad_norm": 0.350617378950119, "learning_rate": 0.0001400086334803442, "loss": 0.9887, "step": 23710 }, { "epoch": 0.7564016709716509, "grad_norm": 0.37829482555389404, "learning_rate": 0.0001394739303373143, "loss": 0.9735, "step": 23720 }, { "epoch": 0.7567205586912849, "grad_norm": 0.3666497468948364, "learning_rate": 0.0001389412692644344, "loss": 0.9778, "step": 23730 }, { "epoch": 0.7570394464109187, "grad_norm": 0.3569285273551941, "learning_rate": 0.00013841064246289018, "loss": 0.9804, "step": 23740 }, { "epoch": 0.7573583341305526, "grad_norm": 0.3598553240299225, "learning_rate": 0.00013788204216365162, "loss": 0.9758, "step": 23750 }, { "epoch": 0.7576772218501866, "grad_norm": 0.3719578683376312, "learning_rate": 0.00013735546062735915, "loss": 0.9679, "step": 23760 }, { "epoch": 0.7579961095698204, "grad_norm": 0.37967368960380554, "learning_rate": 0.00013683089014421051, "loss": 0.9899, "step": 23770 }, { "epoch": 0.7583149972894544, "grad_norm": 0.3560192584991455, "learning_rate": 0.0001363083230338476, "loss": 0.9694, "step": 23780 }, { "epoch": 0.7586338850090883, "grad_norm": 0.36281514167785645, "learning_rate": 0.00013578775164524418, "loss": 0.9864, "step": 23790 }, { "epoch": 0.7589527727287222, "grad_norm": 0.36002716422080994, "learning_rate": 0.00013526916835659387, "loss": 1.0041, "step": 23800 }, { "epoch": 0.7592716604483561, "grad_norm": 0.3768812119960785, "learning_rate": 0.00013475256557519852, "loss": 0.9687, "step": 23810 }, { "epoch": 0.7595905481679901, "grad_norm": 0.36057835817337036, "learning_rate": 0.00013423793573735703, "loss": 0.9824, "step": 23820 }, { "epoch": 0.7599094358876239, "grad_norm": 0.3561815619468689, "learning_rate": 0.00013372527130825463, "loss": 0.9826, "step": 23830 }, { "epoch": 0.7602283236072579, "grad_norm": 0.3604488968849182, "learning_rate": 0.00013321456478185254, "loss": 0.9808, "step": 23840 }, { "epoch": 0.7605472113268918, "grad_norm": 0.3659672141075134, "learning_rate": 0.00013270580868077813, "loss": 0.9812, "step": 23850 }, { "epoch": 0.7608660990465257, "grad_norm": 0.3699098527431488, "learning_rate": 0.00013219899555621538, "loss": 0.9706, "step": 23860 }, { "epoch": 0.7611849867661596, "grad_norm": 0.3599204123020172, "learning_rate": 0.00013169411798779585, "loss": 0.9992, "step": 23870 }, { "epoch": 0.7615038744857936, "grad_norm": 0.3598966896533966, "learning_rate": 0.00013119116858348995, "loss": 0.9848, "step": 23880 }, { "epoch": 0.7618227622054274, "grad_norm": 0.3556804358959198, "learning_rate": 0.0001306901399794989, "loss": 0.9872, "step": 23890 }, { "epoch": 0.7621416499250614, "grad_norm": 0.3712446689605713, "learning_rate": 0.0001301910248401467, "loss": 0.9711, "step": 23900 }, { "epoch": 0.7624605376446953, "grad_norm": 0.3657471835613251, "learning_rate": 0.0001296938158577729, "loss": 0.9854, "step": 23910 }, { "epoch": 0.7627794253643292, "grad_norm": 0.3565327227115631, "learning_rate": 0.0001291985057526255, "loss": 0.9698, "step": 23920 }, { "epoch": 0.7630983130839631, "grad_norm": 0.35236185789108276, "learning_rate": 0.00012870508727275446, "loss": 0.9737, "step": 23930 }, { "epoch": 0.7634172008035971, "grad_norm": 0.3755548894405365, "learning_rate": 0.0001282135531939054, "loss": 0.9882, "step": 23940 }, { "epoch": 0.7637360885232309, "grad_norm": 0.36770176887512207, "learning_rate": 0.00012772389631941392, "loss": 0.9807, "step": 23950 }, { "epoch": 0.7640549762428649, "grad_norm": 0.3637702763080597, "learning_rate": 0.00012723610948010017, "loss": 0.977, "step": 23960 }, { "epoch": 0.7643738639624988, "grad_norm": 0.36087551712989807, "learning_rate": 0.00012675018553416398, "loss": 0.9674, "step": 23970 }, { "epoch": 0.7646927516821327, "grad_norm": 0.3561502993106842, "learning_rate": 0.00012626611736708018, "loss": 0.9812, "step": 23980 }, { "epoch": 0.7650116394017666, "grad_norm": 0.37487906217575073, "learning_rate": 0.00012578389789149453, "loss": 0.9791, "step": 23990 }, { "epoch": 0.7653305271214006, "grad_norm": 0.3568199872970581, "learning_rate": 0.00012530352004711987, "loss": 0.9932, "step": 24000 }, { "epoch": 0.7656494148410344, "grad_norm": 0.3541073799133301, "learning_rate": 0.00012482497680063275, "loss": 0.9596, "step": 24010 }, { "epoch": 0.7659683025606684, "grad_norm": 0.3517587184906006, "learning_rate": 0.00012434826114557067, "loss": 0.9755, "step": 24020 }, { "epoch": 0.7662871902803023, "grad_norm": 0.37279677391052246, "learning_rate": 0.00012387336610222914, "loss": 0.9945, "step": 24030 }, { "epoch": 0.7666060779999362, "grad_norm": 0.34747180342674255, "learning_rate": 0.00012340028471755974, "loss": 0.9832, "step": 24040 }, { "epoch": 0.7669249657195701, "grad_norm": 0.36472681164741516, "learning_rate": 0.00012292901006506822, "loss": 0.9802, "step": 24050 }, { "epoch": 0.7672438534392041, "grad_norm": 0.3583838939666748, "learning_rate": 0.00012245953524471316, "loss": 0.9837, "step": 24060 }, { "epoch": 0.7675627411588379, "grad_norm": 0.3646237254142761, "learning_rate": 0.00012199185338280484, "loss": 1.0001, "step": 24070 }, { "epoch": 0.7678816288784719, "grad_norm": 0.3578408360481262, "learning_rate": 0.00012152595763190471, "loss": 0.9867, "step": 24080 }, { "epoch": 0.7682005165981058, "grad_norm": 0.358577162027359, "learning_rate": 0.00012106184117072507, "loss": 0.9757, "step": 24090 }, { "epoch": 0.7685194043177397, "grad_norm": 0.3546871840953827, "learning_rate": 0.00012059949720402918, "loss": 0.9805, "step": 24100 }, { "epoch": 0.7688382920373736, "grad_norm": 0.3737013638019562, "learning_rate": 0.00012013891896253189, "loss": 0.9851, "step": 24110 }, { "epoch": 0.7691571797570076, "grad_norm": 0.35779833793640137, "learning_rate": 0.00011968009970280033, "loss": 0.972, "step": 24120 }, { "epoch": 0.7694760674766414, "grad_norm": 0.3553289473056793, "learning_rate": 0.00011922303270715539, "loss": 0.9745, "step": 24130 }, { "epoch": 0.7697949551962754, "grad_norm": 0.3517867922782898, "learning_rate": 0.00011876771128357317, "loss": 0.9777, "step": 24140 }, { "epoch": 0.7701138429159093, "grad_norm": 0.36188703775405884, "learning_rate": 0.00011831412876558719, "loss": 0.9814, "step": 24150 }, { "epoch": 0.7704327306355432, "grad_norm": 0.3496958911418915, "learning_rate": 0.00011786227851219062, "loss": 0.9648, "step": 24160 }, { "epoch": 0.7707516183551771, "grad_norm": 0.3597332835197449, "learning_rate": 0.00011741215390773915, "loss": 0.9686, "step": 24170 }, { "epoch": 0.7710705060748111, "grad_norm": 0.35886675119400024, "learning_rate": 0.00011696374836185405, "loss": 0.9839, "step": 24180 }, { "epoch": 0.7713893937944449, "grad_norm": 0.3740817904472351, "learning_rate": 0.00011651705530932584, "loss": 0.9757, "step": 24190 }, { "epoch": 0.7717082815140789, "grad_norm": 0.35636112093925476, "learning_rate": 0.00011607206821001794, "loss": 0.9643, "step": 24200 }, { "epoch": 0.7720271692337128, "grad_norm": 0.34713831543922424, "learning_rate": 0.00011562878054877104, "loss": 0.9722, "step": 24210 }, { "epoch": 0.7723460569533467, "grad_norm": 0.3590894043445587, "learning_rate": 0.00011518718583530775, "loss": 0.9621, "step": 24220 }, { "epoch": 0.7726649446729806, "grad_norm": 0.3767780065536499, "learning_rate": 0.00011474727760413749, "loss": 0.9457, "step": 24230 }, { "epoch": 0.7729838323926146, "grad_norm": 0.35947349667549133, "learning_rate": 0.00011430904941446183, "loss": 0.9726, "step": 24240 }, { "epoch": 0.7733027201122484, "grad_norm": 0.3609798848628998, "learning_rate": 0.00011387249485008026, "loss": 0.9639, "step": 24250 }, { "epoch": 0.7736216078318824, "grad_norm": 0.3725574016571045, "learning_rate": 0.0001134376075192962, "loss": 0.9655, "step": 24260 }, { "epoch": 0.7739404955515163, "grad_norm": 0.35596516728401184, "learning_rate": 0.00011300438105482337, "loss": 0.9748, "step": 24270 }, { "epoch": 0.7742593832711502, "grad_norm": 0.36799517273902893, "learning_rate": 0.00011257280911369273, "loss": 0.983, "step": 24280 }, { "epoch": 0.7745782709907841, "grad_norm": 0.38049396872520447, "learning_rate": 0.00011214288537715942, "loss": 0.982, "step": 24290 }, { "epoch": 0.7748971587104181, "grad_norm": 0.35354548692703247, "learning_rate": 0.00011171460355061031, "loss": 0.9748, "step": 24300 }, { "epoch": 0.7752160464300519, "grad_norm": 0.36302658915519714, "learning_rate": 0.0001112879573634719, "loss": 0.978, "step": 24310 }, { "epoch": 0.7755349341496859, "grad_norm": 0.37108802795410156, "learning_rate": 0.00011086294056911845, "loss": 0.9826, "step": 24320 }, { "epoch": 0.7758538218693198, "grad_norm": 0.35210826992988586, "learning_rate": 0.00011043954694478053, "loss": 0.9664, "step": 24330 }, { "epoch": 0.7761727095889537, "grad_norm": 0.3648816645145416, "learning_rate": 0.00011001777029145393, "loss": 0.9785, "step": 24340 }, { "epoch": 0.7764915973085876, "grad_norm": 0.354042112827301, "learning_rate": 0.00010959760443380887, "loss": 0.9784, "step": 24350 }, { "epoch": 0.7768104850282216, "grad_norm": 0.3622848093509674, "learning_rate": 0.00010917904322009965, "loss": 0.9731, "step": 24360 }, { "epoch": 0.7771293727478554, "grad_norm": 0.3674158751964569, "learning_rate": 0.00010876208052207448, "loss": 0.9596, "step": 24370 }, { "epoch": 0.7774482604674894, "grad_norm": 0.35103335976600647, "learning_rate": 0.00010834671023488582, "loss": 0.9525, "step": 24380 }, { "epoch": 0.7777671481871233, "grad_norm": 0.3551388084888458, "learning_rate": 0.000107932926277001, "loss": 0.9719, "step": 24390 }, { "epoch": 0.7780860359067572, "grad_norm": 0.36495012044906616, "learning_rate": 0.00010752072259011318, "loss": 0.9728, "step": 24400 }, { "epoch": 0.7784049236263911, "grad_norm": 0.3599463999271393, "learning_rate": 0.00010711009313905259, "loss": 0.9761, "step": 24410 }, { "epoch": 0.7787238113460251, "grad_norm": 0.36352306604385376, "learning_rate": 0.00010670103191169825, "loss": 0.981, "step": 24420 }, { "epoch": 0.7790426990656589, "grad_norm": 0.3560295104980469, "learning_rate": 0.00010629353291888986, "loss": 0.9718, "step": 24430 }, { "epoch": 0.7793615867852929, "grad_norm": 0.3523396849632263, "learning_rate": 0.00010588759019434024, "loss": 0.9728, "step": 24440 }, { "epoch": 0.7796804745049268, "grad_norm": 0.35102060437202454, "learning_rate": 0.00010548319779454782, "loss": 0.961, "step": 24450 }, { "epoch": 0.7799993622245607, "grad_norm": 0.3619765639305115, "learning_rate": 0.00010508034979870972, "loss": 0.9715, "step": 24460 }, { "epoch": 0.7803182499441946, "grad_norm": 0.35871872305870056, "learning_rate": 0.00010467904030863507, "loss": 0.9693, "step": 24470 }, { "epoch": 0.7806371376638286, "grad_norm": 0.3562319278717041, "learning_rate": 0.00010427926344865853, "loss": 0.9565, "step": 24480 }, { "epoch": 0.7809560253834625, "grad_norm": 0.35921573638916016, "learning_rate": 0.00010388101336555442, "loss": 0.9703, "step": 24490 }, { "epoch": 0.7812749131030964, "grad_norm": 0.3715077340602875, "learning_rate": 0.00010348428422845097, "loss": 0.9746, "step": 24500 }, { "epoch": 0.7815938008227303, "grad_norm": 0.3662187457084656, "learning_rate": 0.00010308907022874489, "loss": 0.9526, "step": 24510 }, { "epoch": 0.7819126885423643, "grad_norm": 0.3650236427783966, "learning_rate": 0.00010269536558001635, "loss": 0.9763, "step": 24520 }, { "epoch": 0.7822315762619981, "grad_norm": 0.358048677444458, "learning_rate": 0.00010230316451794439, "loss": 0.9842, "step": 24530 }, { "epoch": 0.7825504639816321, "grad_norm": 0.3543560802936554, "learning_rate": 0.00010191246130022226, "loss": 0.959, "step": 24540 }, { "epoch": 0.782869351701266, "grad_norm": 0.36472126841545105, "learning_rate": 0.00010152325020647362, "loss": 0.9696, "step": 24550 }, { "epoch": 0.7831882394208999, "grad_norm": 0.3696049153804779, "learning_rate": 0.00010113552553816855, "loss": 0.9638, "step": 24560 }, { "epoch": 0.7835071271405338, "grad_norm": 0.36587175726890564, "learning_rate": 0.00010074928161854036, "loss": 0.9732, "step": 24570 }, { "epoch": 0.7838260148601678, "grad_norm": 0.3524242043495178, "learning_rate": 0.00010036451279250222, "loss": 0.9643, "step": 24580 }, { "epoch": 0.7841449025798016, "grad_norm": 0.3479771018028259, "learning_rate": 9.998121342656457e-05, "loss": 0.9707, "step": 24590 }, { "epoch": 0.7844637902994356, "grad_norm": 0.35497039556503296, "learning_rate": 9.959937790875249e-05, "loss": 0.9698, "step": 24600 }, { "epoch": 0.7847826780190695, "grad_norm": 0.35893678665161133, "learning_rate": 9.92190006485237e-05, "loss": 0.9627, "step": 24610 }, { "epoch": 0.7851015657387034, "grad_norm": 0.3627142906188965, "learning_rate": 9.884007607668652e-05, "loss": 0.9529, "step": 24620 }, { "epoch": 0.7854204534583373, "grad_norm": 0.3647702634334564, "learning_rate": 9.846259864531842e-05, "loss": 0.9648, "step": 24630 }, { "epoch": 0.7857393411779713, "grad_norm": 0.36695319414138794, "learning_rate": 9.808656282768486e-05, "loss": 0.9627, "step": 24640 }, { "epoch": 0.7860582288976051, "grad_norm": 0.36442694067955017, "learning_rate": 9.77119631181582e-05, "loss": 0.9624, "step": 24650 }, { "epoch": 0.7863771166172391, "grad_norm": 0.36140966415405273, "learning_rate": 9.733879403213728e-05, "loss": 0.9696, "step": 24660 }, { "epoch": 0.786696004336873, "grad_norm": 0.35129281878471375, "learning_rate": 9.696705010596698e-05, "loss": 0.9566, "step": 24670 }, { "epoch": 0.7870148920565069, "grad_norm": 0.36112743616104126, "learning_rate": 9.65967258968583e-05, "loss": 0.9759, "step": 24680 }, { "epoch": 0.7873337797761408, "grad_norm": 0.36856353282928467, "learning_rate": 9.622781598280861e-05, "loss": 0.9725, "step": 24690 }, { "epoch": 0.7876526674957748, "grad_norm": 0.3470911979675293, "learning_rate": 9.586031496252237e-05, "loss": 0.953, "step": 24700 }, { "epoch": 0.7879715552154086, "grad_norm": 0.3622485101222992, "learning_rate": 9.549421745533192e-05, "loss": 0.9723, "step": 24710 }, { "epoch": 0.7882904429350426, "grad_norm": 0.36138638854026794, "learning_rate": 9.512951810111879e-05, "loss": 0.9514, "step": 24720 }, { "epoch": 0.7886093306546765, "grad_norm": 0.36271795630455017, "learning_rate": 9.476621156023519e-05, "loss": 0.9726, "step": 24730 }, { "epoch": 0.7889282183743104, "grad_norm": 0.3721517324447632, "learning_rate": 9.44042925134258e-05, "loss": 0.9702, "step": 24740 }, { "epoch": 0.7892471060939443, "grad_norm": 0.3487117290496826, "learning_rate": 9.404375566174994e-05, "loss": 0.9836, "step": 24750 }, { "epoch": 0.7895659938135783, "grad_norm": 0.3537936210632324, "learning_rate": 9.368459572650401e-05, "loss": 0.9605, "step": 24760 }, { "epoch": 0.7898848815332121, "grad_norm": 0.359280526638031, "learning_rate": 9.33268074491441e-05, "loss": 0.9518, "step": 24770 }, { "epoch": 0.7902037692528461, "grad_norm": 0.34653404355049133, "learning_rate": 9.297038559120912e-05, "loss": 0.9743, "step": 24780 }, { "epoch": 0.79052265697248, "grad_norm": 0.360307902097702, "learning_rate": 9.2615324934244e-05, "loss": 0.9869, "step": 24790 }, { "epoch": 0.7908415446921139, "grad_norm": 0.3570634126663208, "learning_rate": 9.226162027972337e-05, "loss": 0.9536, "step": 24800 }, { "epoch": 0.7911604324117478, "grad_norm": 0.3584236204624176, "learning_rate": 9.190926644897531e-05, "loss": 0.9724, "step": 24810 }, { "epoch": 0.7914793201313818, "grad_norm": 0.36408424377441406, "learning_rate": 9.155825828310578e-05, "loss": 0.9641, "step": 24820 }, { "epoch": 0.7917982078510156, "grad_norm": 0.3730533719062805, "learning_rate": 9.120859064292278e-05, "loss": 0.9673, "step": 24830 }, { "epoch": 0.7921170955706496, "grad_norm": 0.35709601640701294, "learning_rate": 9.086025840886135e-05, "loss": 0.9588, "step": 24840 }, { "epoch": 0.7924359832902835, "grad_norm": 0.36663979291915894, "learning_rate": 9.05132564809085e-05, "loss": 0.9696, "step": 24850 }, { "epoch": 0.7927548710099174, "grad_norm": 0.3638729453086853, "learning_rate": 9.016757977852851e-05, "loss": 0.9577, "step": 24860 }, { "epoch": 0.7930737587295513, "grad_norm": 0.35785070061683655, "learning_rate": 8.982322324058868e-05, "loss": 0.9525, "step": 24870 }, { "epoch": 0.7933926464491853, "grad_norm": 0.36241087317466736, "learning_rate": 8.948018182528511e-05, "loss": 0.9621, "step": 24880 }, { "epoch": 0.7937115341688191, "grad_norm": 0.35988399386405945, "learning_rate": 8.913845051006889e-05, "loss": 0.9541, "step": 24890 }, { "epoch": 0.7940304218884531, "grad_norm": 0.3533669710159302, "learning_rate": 8.879802429157259e-05, "loss": 0.9546, "step": 24900 }, { "epoch": 0.794349309608087, "grad_norm": 0.36953115463256836, "learning_rate": 8.845889818553704e-05, "loss": 0.9684, "step": 24910 }, { "epoch": 0.7946681973277209, "grad_norm": 0.3601612150669098, "learning_rate": 8.81210672267383e-05, "loss": 0.9636, "step": 24920 }, { "epoch": 0.7949870850473548, "grad_norm": 0.3613516688346863, "learning_rate": 8.778452646891497e-05, "loss": 0.957, "step": 24930 }, { "epoch": 0.7953059727669888, "grad_norm": 0.3623802065849304, "learning_rate": 8.744927098469577e-05, "loss": 0.9593, "step": 24940 }, { "epoch": 0.7956248604866226, "grad_norm": 0.3671809732913971, "learning_rate": 8.711529586552748e-05, "loss": 0.9708, "step": 24950 }, { "epoch": 0.7959437482062566, "grad_norm": 0.3662140667438507, "learning_rate": 8.678259622160293e-05, "loss": 0.9586, "step": 24960 }, { "epoch": 0.7962626359258905, "grad_norm": 0.35915324091911316, "learning_rate": 8.645116718178947e-05, "loss": 0.9563, "step": 24970 }, { "epoch": 0.7965815236455244, "grad_norm": 0.3539397716522217, "learning_rate": 8.612100389355772e-05, "loss": 0.9631, "step": 24980 }, { "epoch": 0.7969004113651583, "grad_norm": 0.36351755261421204, "learning_rate": 8.579210152291042e-05, "loss": 0.9371, "step": 24990 }, { "epoch": 0.7972192990847923, "grad_norm": 0.36191827058792114, "learning_rate": 8.546445525431173e-05, "loss": 0.9566, "step": 25000 }, { "epoch": 0.7975381868044261, "grad_norm": 0.37281543016433716, "learning_rate": 8.513806029061663e-05, "loss": 0.975, "step": 25010 }, { "epoch": 0.7978570745240601, "grad_norm": 0.3713143467903137, "learning_rate": 8.481291185300078e-05, "loss": 0.956, "step": 25020 }, { "epoch": 0.798175962243694, "grad_norm": 0.37444189190864563, "learning_rate": 8.448900518089058e-05, "loss": 0.9518, "step": 25030 }, { "epoch": 0.7984948499633279, "grad_norm": 0.3919963538646698, "learning_rate": 8.416633553189332e-05, "loss": 0.9714, "step": 25040 }, { "epoch": 0.7988137376829618, "grad_norm": 0.3554207384586334, "learning_rate": 8.384489818172784e-05, "loss": 0.9544, "step": 25050 }, { "epoch": 0.7991326254025958, "grad_norm": 0.3600543439388275, "learning_rate": 8.35246884241554e-05, "loss": 0.9526, "step": 25060 }, { "epoch": 0.7994515131222296, "grad_norm": 0.36619463562965393, "learning_rate": 8.32057015709107e-05, "loss": 0.9734, "step": 25070 }, { "epoch": 0.7997704008418636, "grad_norm": 0.3708662986755371, "learning_rate": 8.288793295163325e-05, "loss": 0.9637, "step": 25080 }, { "epoch": 0.8000892885614975, "grad_norm": 0.35670143365859985, "learning_rate": 8.257137791379903e-05, "loss": 0.9712, "step": 25090 }, { "epoch": 0.8004081762811314, "grad_norm": 0.3521883189678192, "learning_rate": 8.225603182265234e-05, "loss": 0.9477, "step": 25100 }, { "epoch": 0.8007270640007653, "grad_norm": 0.352546364068985, "learning_rate": 8.194189006113793e-05, "loss": 0.9609, "step": 25110 }, { "epoch": 0.8010459517203993, "grad_norm": 0.36682891845703125, "learning_rate": 8.162894802983348e-05, "loss": 0.961, "step": 25120 }, { "epoch": 0.8013648394400331, "grad_norm": 0.37029045820236206, "learning_rate": 8.131720114688214e-05, "loss": 0.9521, "step": 25130 }, { "epoch": 0.8016837271596671, "grad_norm": 0.36519482731819153, "learning_rate": 8.100664484792551e-05, "loss": 0.9574, "step": 25140 }, { "epoch": 0.802002614879301, "grad_norm": 0.36591729521751404, "learning_rate": 8.069727458603683e-05, "loss": 0.9464, "step": 25150 }, { "epoch": 0.8023215025989349, "grad_norm": 0.3642500340938568, "learning_rate": 8.038908583165436e-05, "loss": 0.9746, "step": 25160 }, { "epoch": 0.8026403903185688, "grad_norm": 0.36521655321121216, "learning_rate": 8.008207407251511e-05, "loss": 0.9394, "step": 25170 }, { "epoch": 0.8029592780382028, "grad_norm": 0.3553067445755005, "learning_rate": 7.977623481358872e-05, "loss": 0.9494, "step": 25180 }, { "epoch": 0.8032781657578366, "grad_norm": 0.3684810400009155, "learning_rate": 7.947156357701168e-05, "loss": 0.9455, "step": 25190 }, { "epoch": 0.8035970534774706, "grad_norm": 0.3643917739391327, "learning_rate": 7.916805590202182e-05, "loss": 0.9567, "step": 25200 }, { "epoch": 0.8039159411971045, "grad_norm": 0.3606899380683899, "learning_rate": 7.886570734489289e-05, "loss": 0.9681, "step": 25210 }, { "epoch": 0.8042348289167384, "grad_norm": 0.36119991540908813, "learning_rate": 7.856451347886955e-05, "loss": 0.9555, "step": 25220 }, { "epoch": 0.8045537166363723, "grad_norm": 0.35547545552253723, "learning_rate": 7.82644698941026e-05, "loss": 0.944, "step": 25230 }, { "epoch": 0.8048726043560063, "grad_norm": 0.3734472990036011, "learning_rate": 7.796557219758433e-05, "loss": 0.97, "step": 25240 }, { "epoch": 0.8051914920756401, "grad_norm": 0.36550241708755493, "learning_rate": 7.766781601308432e-05, "loss": 0.9647, "step": 25250 }, { "epoch": 0.8055103797952741, "grad_norm": 0.3795914947986603, "learning_rate": 7.737119698108516e-05, "loss": 0.9377, "step": 25260 }, { "epoch": 0.805829267514908, "grad_norm": 0.34035786986351013, "learning_rate": 7.70757107587189e-05, "loss": 0.9327, "step": 25270 }, { "epoch": 0.8061481552345419, "grad_norm": 0.3551400601863861, "learning_rate": 7.678135301970321e-05, "loss": 0.9606, "step": 25280 }, { "epoch": 0.8064670429541758, "grad_norm": 0.35824182629585266, "learning_rate": 7.648811945427822e-05, "loss": 0.9599, "step": 25290 }, { "epoch": 0.8067859306738098, "grad_norm": 0.3525572419166565, "learning_rate": 7.61960057691433e-05, "loss": 0.9565, "step": 25300 }, { "epoch": 0.8071048183934437, "grad_norm": 0.36219167709350586, "learning_rate": 7.590500768739427e-05, "loss": 0.9413, "step": 25310 }, { "epoch": 0.8074237061130776, "grad_norm": 0.36099758744239807, "learning_rate": 7.561512094846075e-05, "loss": 0.9746, "step": 25320 }, { "epoch": 0.8077425938327115, "grad_norm": 0.35688111186027527, "learning_rate": 7.532634130804376e-05, "loss": 0.958, "step": 25330 }, { "epoch": 0.8080614815523455, "grad_norm": 0.36312273144721985, "learning_rate": 7.503866453805368e-05, "loss": 0.9557, "step": 25340 }, { "epoch": 0.8083803692719793, "grad_norm": 0.3614059090614319, "learning_rate": 7.475208642654819e-05, "loss": 0.9591, "step": 25350 }, { "epoch": 0.8086992569916133, "grad_norm": 0.3655224144458771, "learning_rate": 7.44666027776707e-05, "loss": 0.9518, "step": 25360 }, { "epoch": 0.8090181447112472, "grad_norm": 0.3513615131378174, "learning_rate": 7.418220941158897e-05, "loss": 0.9465, "step": 25370 }, { "epoch": 0.8093370324308811, "grad_norm": 0.3549514710903168, "learning_rate": 7.389890216443378e-05, "loss": 0.9628, "step": 25380 }, { "epoch": 0.809655920150515, "grad_norm": 0.3620496690273285, "learning_rate": 7.361667688823803e-05, "loss": 0.952, "step": 25390 }, { "epoch": 0.809974807870149, "grad_norm": 0.356606662273407, "learning_rate": 7.333552945087602e-05, "loss": 0.9508, "step": 25400 }, { "epoch": 0.8102936955897828, "grad_norm": 0.3640885055065155, "learning_rate": 7.305545573600293e-05, "loss": 0.9496, "step": 25410 }, { "epoch": 0.8106125833094168, "grad_norm": 0.3723457455635071, "learning_rate": 7.277645164299459e-05, "loss": 0.9541, "step": 25420 }, { "epoch": 0.8109314710290507, "grad_norm": 0.3665878474712372, "learning_rate": 7.249851308688739e-05, "loss": 0.9561, "step": 25430 }, { "epoch": 0.8112503587486846, "grad_norm": 0.3625575602054596, "learning_rate": 7.222163599831843e-05, "loss": 0.9649, "step": 25440 }, { "epoch": 0.8115692464683185, "grad_norm": 0.3609040677547455, "learning_rate": 7.194581632346617e-05, "loss": 0.9548, "step": 25450 }, { "epoch": 0.8118881341879525, "grad_norm": 0.34985458850860596, "learning_rate": 7.167105002399073e-05, "loss": 0.9395, "step": 25460 }, { "epoch": 0.8122070219075863, "grad_norm": 0.3752239942550659, "learning_rate": 7.139733307697503e-05, "loss": 0.9666, "step": 25470 }, { "epoch": 0.8125259096272203, "grad_norm": 0.34349602460861206, "learning_rate": 7.112466147486579e-05, "loss": 0.9695, "step": 25480 }, { "epoch": 0.8128447973468542, "grad_norm": 0.37290871143341064, "learning_rate": 7.08530312254149e-05, "loss": 0.9622, "step": 25490 }, { "epoch": 0.8131636850664881, "grad_norm": 0.3771199584007263, "learning_rate": 7.058243835162084e-05, "loss": 0.9633, "step": 25500 }, { "epoch": 0.813482572786122, "grad_norm": 0.3719727396965027, "learning_rate": 7.031287889167066e-05, "loss": 0.9571, "step": 25510 }, { "epoch": 0.813801460505756, "grad_norm": 0.37617650628089905, "learning_rate": 7.004434889888181e-05, "loss": 0.9532, "step": 25520 }, { "epoch": 0.8141203482253898, "grad_norm": 0.3691602349281311, "learning_rate": 6.977684444164437e-05, "loss": 0.9604, "step": 25530 }, { "epoch": 0.8144392359450238, "grad_norm": 0.35761821269989014, "learning_rate": 6.951036160336359e-05, "loss": 0.961, "step": 25540 }, { "epoch": 0.8147581236646577, "grad_norm": 0.3590810000896454, "learning_rate": 6.924489648240246e-05, "loss": 0.9575, "step": 25550 }, { "epoch": 0.8150770113842916, "grad_norm": 0.3555402159690857, "learning_rate": 6.89804451920246e-05, "loss": 0.9462, "step": 25560 }, { "epoch": 0.8153958991039255, "grad_norm": 0.35681602358818054, "learning_rate": 6.871700386033734e-05, "loss": 0.9583, "step": 25570 }, { "epoch": 0.8157147868235595, "grad_norm": 0.3650819957256317, "learning_rate": 6.845456863023507e-05, "loss": 0.9551, "step": 25580 }, { "epoch": 0.8160336745431933, "grad_norm": 0.36943039298057556, "learning_rate": 6.819313565934276e-05, "loss": 0.9673, "step": 25590 }, { "epoch": 0.8163525622628273, "grad_norm": 0.3585793673992157, "learning_rate": 6.793270111995963e-05, "loss": 0.9552, "step": 25600 }, { "epoch": 0.8166714499824612, "grad_norm": 0.3606432378292084, "learning_rate": 6.767326119900324e-05, "loss": 0.9538, "step": 25610 }, { "epoch": 0.8169903377020951, "grad_norm": 0.36778682470321655, "learning_rate": 6.741481209795356e-05, "loss": 0.9552, "step": 25620 }, { "epoch": 0.817309225421729, "grad_norm": 0.362591415643692, "learning_rate": 6.715735003279736e-05, "loss": 0.9656, "step": 25630 }, { "epoch": 0.817628113141363, "grad_norm": 0.3498934805393219, "learning_rate": 6.69008712339729e-05, "loss": 0.9598, "step": 25640 }, { "epoch": 0.8179470008609968, "grad_norm": 0.361508846282959, "learning_rate": 6.664537194631458e-05, "loss": 0.9403, "step": 25650 }, { "epoch": 0.8182658885806308, "grad_norm": 0.36598604917526245, "learning_rate": 6.639084842899812e-05, "loss": 0.9568, "step": 25660 }, { "epoch": 0.8185847763002647, "grad_norm": 0.3673774302005768, "learning_rate": 6.613729695548567e-05, "loss": 0.9531, "step": 25670 }, { "epoch": 0.8189036640198986, "grad_norm": 0.3787930905818939, "learning_rate": 6.588471381347133e-05, "loss": 0.9603, "step": 25680 }, { "epoch": 0.8192225517395325, "grad_norm": 0.36597883701324463, "learning_rate": 6.563309530482677e-05, "loss": 0.9596, "step": 25690 }, { "epoch": 0.8195414394591665, "grad_norm": 0.3917224705219269, "learning_rate": 6.538243774554706e-05, "loss": 0.9636, "step": 25700 }, { "epoch": 0.8198603271788003, "grad_norm": 0.3685980439186096, "learning_rate": 6.513273746569676e-05, "loss": 0.9542, "step": 25710 }, { "epoch": 0.8201792148984343, "grad_norm": 0.35182175040245056, "learning_rate": 6.48839908093562e-05, "loss": 0.9366, "step": 25720 }, { "epoch": 0.8204981026180682, "grad_norm": 0.36965203285217285, "learning_rate": 6.463619413456787e-05, "loss": 0.9558, "step": 25730 }, { "epoch": 0.8208169903377021, "grad_norm": 0.36861446499824524, "learning_rate": 6.438934381328326e-05, "loss": 0.9519, "step": 25740 }, { "epoch": 0.821135878057336, "grad_norm": 0.3652392625808716, "learning_rate": 6.414343623130956e-05, "loss": 0.9546, "step": 25750 }, { "epoch": 0.82145476577697, "grad_norm": 0.3519989550113678, "learning_rate": 6.389846778825685e-05, "loss": 0.928, "step": 25760 }, { "epoch": 0.8217736534966038, "grad_norm": 0.36737608909606934, "learning_rate": 6.365443489748536e-05, "loss": 0.9531, "step": 25770 }, { "epoch": 0.8220925412162378, "grad_norm": 0.36519476771354675, "learning_rate": 6.341133398605295e-05, "loss": 0.9467, "step": 25780 }, { "epoch": 0.8224114289358717, "grad_norm": 0.3782409727573395, "learning_rate": 6.316916149466283e-05, "loss": 0.9584, "step": 25790 }, { "epoch": 0.8227303166555056, "grad_norm": 0.36387908458709717, "learning_rate": 6.292791387761143e-05, "loss": 0.9313, "step": 25800 }, { "epoch": 0.8230492043751395, "grad_norm": 0.3558565080165863, "learning_rate": 6.268758760273642e-05, "loss": 0.9525, "step": 25810 }, { "epoch": 0.8233680920947735, "grad_norm": 0.3587815463542938, "learning_rate": 6.24481791513651e-05, "loss": 0.9593, "step": 25820 }, { "epoch": 0.8236869798144073, "grad_norm": 0.37237703800201416, "learning_rate": 6.220968501826283e-05, "loss": 0.9481, "step": 25830 }, { "epoch": 0.8240058675340413, "grad_norm": 0.35571229457855225, "learning_rate": 6.197210171158173e-05, "loss": 0.9452, "step": 25840 }, { "epoch": 0.8243247552536752, "grad_norm": 0.37391969561576843, "learning_rate": 6.173542575280949e-05, "loss": 0.9587, "step": 25850 }, { "epoch": 0.824643642973309, "grad_norm": 0.37451764941215515, "learning_rate": 6.149965367671856e-05, "loss": 0.9464, "step": 25860 }, { "epoch": 0.824962530692943, "grad_norm": 0.3647792339324951, "learning_rate": 6.126478203131529e-05, "loss": 0.9339, "step": 25870 }, { "epoch": 0.825281418412577, "grad_norm": 0.36442533135414124, "learning_rate": 6.1030807377789486e-05, "loss": 0.9413, "step": 25880 }, { "epoch": 0.8256003061322108, "grad_norm": 0.36027491092681885, "learning_rate": 6.0797726290463996e-05, "loss": 0.9562, "step": 25890 }, { "epoch": 0.8259191938518448, "grad_norm": 0.3716151714324951, "learning_rate": 6.056553535674458e-05, "loss": 0.9368, "step": 25900 }, { "epoch": 0.8262380815714787, "grad_norm": 0.382212370634079, "learning_rate": 6.033423117706994e-05, "loss": 0.9551, "step": 25910 }, { "epoch": 0.8265569692911126, "grad_norm": 0.3520452082157135, "learning_rate": 6.0103810364861955e-05, "loss": 0.9539, "step": 25920 }, { "epoch": 0.8268758570107465, "grad_norm": 0.36965930461883545, "learning_rate": 5.9874269546476105e-05, "loss": 0.9474, "step": 25930 }, { "epoch": 0.8271947447303805, "grad_norm": 0.35486385226249695, "learning_rate": 5.964560536115204e-05, "loss": 0.9547, "step": 25940 }, { "epoch": 0.8275136324500143, "grad_norm": 0.3544781506061554, "learning_rate": 5.941781446096441e-05, "loss": 0.9271, "step": 25950 }, { "epoch": 0.8278325201696483, "grad_norm": 0.3677733540534973, "learning_rate": 5.9190893510773834e-05, "loss": 0.955, "step": 25960 }, { "epoch": 0.8281514078892822, "grad_norm": 0.3709496855735779, "learning_rate": 5.896483918817807e-05, "loss": 0.9493, "step": 25970 }, { "epoch": 0.828470295608916, "grad_norm": 0.36802056431770325, "learning_rate": 5.873964818346338e-05, "loss": 0.9589, "step": 25980 }, { "epoch": 0.82878918332855, "grad_norm": 0.3739546239376068, "learning_rate": 5.8515317199556014e-05, "loss": 0.9445, "step": 25990 }, { "epoch": 0.829108071048184, "grad_norm": 0.3755469024181366, "learning_rate": 5.829184295197409e-05, "loss": 0.9434, "step": 26000 }, { "epoch": 0.8294269587678178, "grad_norm": 0.3417413532733917, "learning_rate": 5.806922216877932e-05, "loss": 0.9374, "step": 26010 }, { "epoch": 0.8297458464874518, "grad_norm": 0.36555731296539307, "learning_rate": 5.784745159052919e-05, "loss": 0.9373, "step": 26020 }, { "epoch": 0.8300647342070857, "grad_norm": 0.3578369617462158, "learning_rate": 5.7626527970229256e-05, "loss": 0.9585, "step": 26030 }, { "epoch": 0.8303836219267196, "grad_norm": 0.37149980664253235, "learning_rate": 5.7406448073285566e-05, "loss": 0.9487, "step": 26040 }, { "epoch": 0.8307025096463535, "grad_norm": 0.3658088147640228, "learning_rate": 5.718720867745734e-05, "loss": 0.9591, "step": 26050 }, { "epoch": 0.8310213973659875, "grad_norm": 0.3635800778865814, "learning_rate": 5.6968806572809736e-05, "loss": 0.946, "step": 26060 }, { "epoch": 0.8313402850856213, "grad_norm": 0.3664982318878174, "learning_rate": 5.675123856166692e-05, "loss": 0.9351, "step": 26070 }, { "epoch": 0.8316591728052553, "grad_norm": 0.36701416969299316, "learning_rate": 5.653450145856519e-05, "loss": 0.946, "step": 26080 }, { "epoch": 0.8319780605248892, "grad_norm": 0.3673885762691498, "learning_rate": 5.631859209020637e-05, "loss": 0.9445, "step": 26090 }, { "epoch": 0.832296948244523, "grad_norm": 0.3711278438568115, "learning_rate": 5.6103507295411355e-05, "loss": 0.9504, "step": 26100 }, { "epoch": 0.832615835964157, "grad_norm": 0.36567771434783936, "learning_rate": 5.5889243925073783e-05, "loss": 0.9828, "step": 26110 }, { "epoch": 0.832934723683791, "grad_norm": 0.3659859001636505, "learning_rate": 5.5675798842113984e-05, "loss": 0.9513, "step": 26120 }, { "epoch": 0.8332536114034248, "grad_norm": 0.36194849014282227, "learning_rate": 5.546316892143301e-05, "loss": 0.9443, "step": 26130 }, { "epoch": 0.8335724991230588, "grad_norm": 0.36451077461242676, "learning_rate": 5.525135104986689e-05, "loss": 0.938, "step": 26140 }, { "epoch": 0.8338913868426927, "grad_norm": 0.3581928610801697, "learning_rate": 5.5040342126141065e-05, "loss": 0.9473, "step": 26150 }, { "epoch": 0.8342102745623267, "grad_norm": 0.3767908811569214, "learning_rate": 5.483013906082494e-05, "loss": 0.9584, "step": 26160 }, { "epoch": 0.8345291622819605, "grad_norm": 0.3569718301296234, "learning_rate": 5.462073877628674e-05, "loss": 0.9194, "step": 26170 }, { "epoch": 0.8348480500015945, "grad_norm": 0.37712088227272034, "learning_rate": 5.4412138206648335e-05, "loss": 0.9696, "step": 26180 }, { "epoch": 0.8351669377212284, "grad_norm": 0.3611338436603546, "learning_rate": 5.420433429774042e-05, "loss": 0.9433, "step": 26190 }, { "epoch": 0.8354858254408623, "grad_norm": 0.365477055311203, "learning_rate": 5.3997324007057795e-05, "loss": 0.9307, "step": 26200 }, { "epoch": 0.8358047131604962, "grad_norm": 0.3720219135284424, "learning_rate": 5.379110430371478e-05, "loss": 0.9351, "step": 26210 }, { "epoch": 0.8361236008801302, "grad_norm": 0.35642099380493164, "learning_rate": 5.358567216840091e-05, "loss": 0.9441, "step": 26220 }, { "epoch": 0.836442488599764, "grad_norm": 0.3763499855995178, "learning_rate": 5.338102459333666e-05, "loss": 0.9426, "step": 26230 }, { "epoch": 0.836761376319398, "grad_norm": 0.3568282127380371, "learning_rate": 5.317715858222943e-05, "loss": 0.9431, "step": 26240 }, { "epoch": 0.8370802640390319, "grad_norm": 0.3555208444595337, "learning_rate": 5.297407115022969e-05, "loss": 0.9416, "step": 26250 }, { "epoch": 0.8373991517586657, "grad_norm": 0.3644401729106903, "learning_rate": 5.277175932388726e-05, "loss": 0.919, "step": 26260 }, { "epoch": 0.8377180394782997, "grad_norm": 0.35538750886917114, "learning_rate": 5.2570220141107796e-05, "loss": 0.9656, "step": 26270 }, { "epoch": 0.8380369271979337, "grad_norm": 0.36755502223968506, "learning_rate": 5.236945065110938e-05, "loss": 0.9645, "step": 26280 }, { "epoch": 0.8383558149175675, "grad_norm": 0.35435423254966736, "learning_rate": 5.216944791437939e-05, "loss": 0.9412, "step": 26290 }, { "epoch": 0.8386747026372015, "grad_norm": 0.3524496555328369, "learning_rate": 5.1970209002631356e-05, "loss": 0.9513, "step": 26300 }, { "epoch": 0.8389935903568354, "grad_norm": 0.36997079849243164, "learning_rate": 5.177173099876221e-05, "loss": 0.9451, "step": 26310 }, { "epoch": 0.8393124780764692, "grad_norm": 0.3610512912273407, "learning_rate": 5.157401099680946e-05, "loss": 0.94, "step": 26320 }, { "epoch": 0.8396313657961032, "grad_norm": 0.36900800466537476, "learning_rate": 5.13770461019087e-05, "loss": 0.9453, "step": 26330 }, { "epoch": 0.8399502535157372, "grad_norm": 0.3552963435649872, "learning_rate": 5.118083343025129e-05, "loss": 0.9421, "step": 26340 }, { "epoch": 0.840269141235371, "grad_norm": 0.3669191002845764, "learning_rate": 5.0985370109041986e-05, "loss": 0.9286, "step": 26350 }, { "epoch": 0.840588028955005, "grad_norm": 0.3605242371559143, "learning_rate": 5.079065327645699e-05, "loss": 0.9564, "step": 26360 }, { "epoch": 0.8409069166746389, "grad_norm": 0.3721179962158203, "learning_rate": 5.059668008160201e-05, "loss": 0.951, "step": 26370 }, { "epoch": 0.8412258043942727, "grad_norm": 0.37630388140678406, "learning_rate": 5.0403447684470534e-05, "loss": 0.9437, "step": 26380 }, { "epoch": 0.8415446921139067, "grad_norm": 0.3522168695926666, "learning_rate": 5.021095325590223e-05, "loss": 0.9353, "step": 26390 }, { "epoch": 0.8418635798335407, "grad_norm": 0.3843633532524109, "learning_rate": 5.001919397754154e-05, "loss": 0.961, "step": 26400 }, { "epoch": 0.8421824675531745, "grad_norm": 0.35673603415489197, "learning_rate": 4.9828167041796415e-05, "loss": 0.9534, "step": 26410 }, { "epoch": 0.8425013552728084, "grad_norm": 0.3700405955314636, "learning_rate": 4.9637869651797196e-05, "loss": 0.9264, "step": 26420 }, { "epoch": 0.8428202429924424, "grad_norm": 0.371220201253891, "learning_rate": 4.944829902135568e-05, "loss": 0.947, "step": 26430 }, { "epoch": 0.8431391307120762, "grad_norm": 0.35385560989379883, "learning_rate": 4.925945237492433e-05, "loss": 0.949, "step": 26440 }, { "epoch": 0.8434580184317102, "grad_norm": 0.36943143606185913, "learning_rate": 4.9071326947555595e-05, "loss": 0.9618, "step": 26450 }, { "epoch": 0.8437769061513442, "grad_norm": 0.3573361933231354, "learning_rate": 4.8883919984861484e-05, "loss": 0.9538, "step": 26460 }, { "epoch": 0.844095793870978, "grad_norm": 0.37482595443725586, "learning_rate": 4.86972287429732e-05, "loss": 0.933, "step": 26470 }, { "epoch": 0.844414681590612, "grad_norm": 0.3452557623386383, "learning_rate": 4.8511250488501005e-05, "loss": 0.9393, "step": 26480 }, { "epoch": 0.8447335693102459, "grad_norm": 0.35981285572052, "learning_rate": 4.832598249849414e-05, "loss": 0.9365, "step": 26490 }, { "epoch": 0.8450524570298797, "grad_norm": 0.34943288564682007, "learning_rate": 4.814142206040099e-05, "loss": 0.9419, "step": 26500 }, { "epoch": 0.8453713447495137, "grad_norm": 0.35167521238327026, "learning_rate": 4.795756647202945e-05, "loss": 0.9393, "step": 26510 }, { "epoch": 0.8456902324691477, "grad_norm": 0.35662704706192017, "learning_rate": 4.777441304150719e-05, "loss": 0.9325, "step": 26520 }, { "epoch": 0.8460091201887815, "grad_norm": 0.3618640601634979, "learning_rate": 4.759195908724238e-05, "loss": 0.9379, "step": 26530 }, { "epoch": 0.8463280079084154, "grad_norm": 0.3570464551448822, "learning_rate": 4.741020193788437e-05, "loss": 0.9363, "step": 26540 }, { "epoch": 0.8466468956280494, "grad_norm": 0.37565356492996216, "learning_rate": 4.72291389322846e-05, "loss": 0.9437, "step": 26550 }, { "epoch": 0.8469657833476832, "grad_norm": 0.36229774355888367, "learning_rate": 4.7048767419457626e-05, "loss": 0.9409, "step": 26560 }, { "epoch": 0.8472846710673172, "grad_norm": 0.3552214205265045, "learning_rate": 4.686908475854231e-05, "loss": 0.9561, "step": 26570 }, { "epoch": 0.8476035587869512, "grad_norm": 0.36420536041259766, "learning_rate": 4.669008831876315e-05, "loss": 0.9338, "step": 26580 }, { "epoch": 0.847922446506585, "grad_norm": 0.3625452518463135, "learning_rate": 4.651177547939179e-05, "loss": 0.9357, "step": 26590 }, { "epoch": 0.848241334226219, "grad_norm": 0.3741529881954193, "learning_rate": 4.633414362970859e-05, "loss": 0.9488, "step": 26600 }, { "epoch": 0.8485602219458529, "grad_norm": 0.3811874985694885, "learning_rate": 4.6157190168964464e-05, "loss": 0.9377, "step": 26610 }, { "epoch": 0.8488791096654867, "grad_norm": 0.3749126195907593, "learning_rate": 4.598091250634277e-05, "loss": 0.945, "step": 26620 }, { "epoch": 0.8491979973851207, "grad_norm": 0.3762841820716858, "learning_rate": 4.580530806092137e-05, "loss": 0.946, "step": 26630 }, { "epoch": 0.8495168851047546, "grad_norm": 0.3608279824256897, "learning_rate": 4.563037426163488e-05, "loss": 0.9599, "step": 26640 }, { "epoch": 0.8498357728243885, "grad_norm": 0.35772505402565, "learning_rate": 4.545610854723698e-05, "loss": 0.9429, "step": 26650 }, { "epoch": 0.8501546605440224, "grad_norm": 0.3615608811378479, "learning_rate": 4.528250836626295e-05, "loss": 0.9347, "step": 26660 }, { "epoch": 0.8504735482636564, "grad_norm": 0.35870596766471863, "learning_rate": 4.5109571176992264e-05, "loss": 0.9432, "step": 26670 }, { "epoch": 0.8507924359832902, "grad_norm": 0.3548199534416199, "learning_rate": 4.493729444741149e-05, "loss": 0.949, "step": 26680 }, { "epoch": 0.8511113237029242, "grad_norm": 0.34394973516464233, "learning_rate": 4.476567565517706e-05, "loss": 0.9346, "step": 26690 }, { "epoch": 0.8514302114225581, "grad_norm": 0.35724857449531555, "learning_rate": 4.459471228757844e-05, "loss": 0.9341, "step": 26700 }, { "epoch": 0.851749099142192, "grad_norm": 0.3644699454307556, "learning_rate": 4.442440184150135e-05, "loss": 0.9465, "step": 26710 }, { "epoch": 0.8520679868618259, "grad_norm": 0.36522284150123596, "learning_rate": 4.425474182339106e-05, "loss": 0.9398, "step": 26720 }, { "epoch": 0.8523868745814599, "grad_norm": 0.3725425899028778, "learning_rate": 4.40857297492159e-05, "loss": 0.9481, "step": 26730 }, { "epoch": 0.8527057623010937, "grad_norm": 0.36529541015625, "learning_rate": 4.391736314443091e-05, "loss": 0.9542, "step": 26740 }, { "epoch": 0.8530246500207277, "grad_norm": 0.3558250665664673, "learning_rate": 4.37496395439416e-05, "loss": 0.9518, "step": 26750 }, { "epoch": 0.8533435377403616, "grad_norm": 0.3752404749393463, "learning_rate": 4.3582556492067844e-05, "loss": 0.936, "step": 26760 }, { "epoch": 0.8536624254599955, "grad_norm": 0.37050309777259827, "learning_rate": 4.341611154250795e-05, "loss": 0.9322, "step": 26770 }, { "epoch": 0.8539813131796294, "grad_norm": 0.36518236994743347, "learning_rate": 4.325030225830281e-05, "loss": 0.9567, "step": 26780 }, { "epoch": 0.8543002008992634, "grad_norm": 0.3709019720554352, "learning_rate": 4.308512621180027e-05, "loss": 0.9464, "step": 26790 }, { "epoch": 0.8546190886188972, "grad_norm": 0.3707132637500763, "learning_rate": 4.2920580984619533e-05, "loss": 0.9245, "step": 26800 }, { "epoch": 0.8549379763385312, "grad_norm": 0.35442495346069336, "learning_rate": 4.275666416761577e-05, "loss": 0.9439, "step": 26810 }, { "epoch": 0.8552568640581651, "grad_norm": 0.3763255178928375, "learning_rate": 4.259337336084486e-05, "loss": 0.9557, "step": 26820 }, { "epoch": 0.855575751777799, "grad_norm": 0.3710927367210388, "learning_rate": 4.243070617352825e-05, "loss": 0.9561, "step": 26830 }, { "epoch": 0.8558946394974329, "grad_norm": 0.36720240116119385, "learning_rate": 4.226866022401794e-05, "loss": 0.9419, "step": 26840 }, { "epoch": 0.8562135272170669, "grad_norm": 0.37940865755081177, "learning_rate": 4.2107233139761615e-05, "loss": 0.9378, "step": 26850 }, { "epoch": 0.8565324149367007, "grad_norm": 0.35762712359428406, "learning_rate": 4.194642255726791e-05, "loss": 0.9367, "step": 26860 }, { "epoch": 0.8568513026563347, "grad_norm": 0.3704802095890045, "learning_rate": 4.1786226122071794e-05, "loss": 0.9501, "step": 26870 }, { "epoch": 0.8571701903759686, "grad_norm": 0.3747062385082245, "learning_rate": 4.162664148870013e-05, "loss": 0.9374, "step": 26880 }, { "epoch": 0.8574890780956025, "grad_norm": 0.3695331811904907, "learning_rate": 4.146766632063729e-05, "loss": 0.9485, "step": 26890 }, { "epoch": 0.8578079658152364, "grad_norm": 0.3804053068161011, "learning_rate": 4.1309298290290994e-05, "loss": 0.9504, "step": 26900 }, { "epoch": 0.8581268535348704, "grad_norm": 0.3694726228713989, "learning_rate": 4.1151535078958185e-05, "loss": 0.9515, "step": 26910 }, { "epoch": 0.8584457412545042, "grad_norm": 0.37324094772338867, "learning_rate": 4.099437437679111e-05, "loss": 0.9379, "step": 26920 }, { "epoch": 0.8587646289741382, "grad_norm": 0.35455772280693054, "learning_rate": 4.08378138827635e-05, "loss": 0.9565, "step": 26930 }, { "epoch": 0.8590835166937721, "grad_norm": 0.3658784329891205, "learning_rate": 4.0681851304636857e-05, "loss": 0.9395, "step": 26940 }, { "epoch": 0.859402404413406, "grad_norm": 0.36860978603363037, "learning_rate": 4.052648435892692e-05, "loss": 0.9333, "step": 26950 }, { "epoch": 0.8597212921330399, "grad_norm": 0.3664551377296448, "learning_rate": 4.037171077087022e-05, "loss": 0.9441, "step": 26960 }, { "epoch": 0.8600401798526739, "grad_norm": 0.36110377311706543, "learning_rate": 4.021752827439075e-05, "loss": 0.9171, "step": 26970 }, { "epoch": 0.8603590675723078, "grad_norm": 0.3666580617427826, "learning_rate": 4.0063934612066855e-05, "loss": 0.9306, "step": 26980 }, { "epoch": 0.8606779552919417, "grad_norm": 0.3739660978317261, "learning_rate": 3.991092753509812e-05, "loss": 0.9396, "step": 26990 }, { "epoch": 0.8609968430115756, "grad_norm": 0.36052122712135315, "learning_rate": 3.975850480327241e-05, "loss": 0.9236, "step": 27000 }, { "epoch": 0.8613157307312096, "grad_norm": 0.3642176389694214, "learning_rate": 3.960666418493324e-05, "loss": 0.9218, "step": 27010 }, { "epoch": 0.8616346184508434, "grad_norm": 0.36625078320503235, "learning_rate": 3.9455403456946875e-05, "loss": 0.9433, "step": 27020 }, { "epoch": 0.8619535061704774, "grad_norm": 0.3641470670700073, "learning_rate": 3.930472040466995e-05, "loss": 0.9443, "step": 27030 }, { "epoch": 0.8622723938901113, "grad_norm": 0.3567174971103668, "learning_rate": 3.915461282191693e-05, "loss": 0.9546, "step": 27040 }, { "epoch": 0.8625912816097452, "grad_norm": 0.3644573390483856, "learning_rate": 3.900507851092791e-05, "loss": 0.9586, "step": 27050 }, { "epoch": 0.8629101693293791, "grad_norm": 0.369090735912323, "learning_rate": 3.885611528233638e-05, "loss": 0.9501, "step": 27060 }, { "epoch": 0.8632290570490131, "grad_norm": 0.3611607849597931, "learning_rate": 3.870772095513717e-05, "loss": 0.9329, "step": 27070 }, { "epoch": 0.8635479447686469, "grad_norm": 0.3491160571575165, "learning_rate": 3.855989335665453e-05, "loss": 0.952, "step": 27080 }, { "epoch": 0.8638668324882809, "grad_norm": 0.368991881608963, "learning_rate": 3.841263032251032e-05, "loss": 0.95, "step": 27090 }, { "epoch": 0.8641857202079148, "grad_norm": 0.351935476064682, "learning_rate": 3.8265929696592315e-05, "loss": 0.9426, "step": 27100 }, { "epoch": 0.8645046079275487, "grad_norm": 0.3519881069660187, "learning_rate": 3.811978933102264e-05, "loss": 0.9313, "step": 27110 }, { "epoch": 0.8648234956471826, "grad_norm": 0.3692045509815216, "learning_rate": 3.797420708612632e-05, "loss": 0.9482, "step": 27120 }, { "epoch": 0.8651423833668166, "grad_norm": 0.36540305614471436, "learning_rate": 3.7829180830399963e-05, "loss": 0.9294, "step": 27130 }, { "epoch": 0.8654612710864504, "grad_norm": 0.35637855529785156, "learning_rate": 3.768470844048052e-05, "loss": 0.9244, "step": 27140 }, { "epoch": 0.8657801588060844, "grad_norm": 0.3536495268344879, "learning_rate": 3.7540787801114243e-05, "loss": 0.9418, "step": 27150 }, { "epoch": 0.8660990465257183, "grad_norm": 0.36629825830459595, "learning_rate": 3.739741680512569e-05, "loss": 0.9484, "step": 27160 }, { "epoch": 0.8664179342453522, "grad_norm": 0.3662286698818207, "learning_rate": 3.725459335338685e-05, "loss": 0.9328, "step": 27170 }, { "epoch": 0.8667368219649861, "grad_norm": 0.36647507548332214, "learning_rate": 3.711231535478648e-05, "loss": 0.9442, "step": 27180 }, { "epoch": 0.8670557096846201, "grad_norm": 0.3591477572917938, "learning_rate": 3.697058072619941e-05, "loss": 0.9401, "step": 27190 }, { "epoch": 0.8673745974042539, "grad_norm": 0.35763055086135864, "learning_rate": 3.6829387392456075e-05, "loss": 0.9369, "step": 27200 }, { "epoch": 0.8676934851238879, "grad_norm": 0.3626702129840851, "learning_rate": 3.668873328631214e-05, "loss": 0.937, "step": 27210 }, { "epoch": 0.8680123728435218, "grad_norm": 0.3550717234611511, "learning_rate": 3.6548616348418236e-05, "loss": 0.9322, "step": 27220 }, { "epoch": 0.8683312605631557, "grad_norm": 0.36245885491371155, "learning_rate": 3.640903452728978e-05, "loss": 0.9334, "step": 27230 }, { "epoch": 0.8686501482827896, "grad_norm": 0.36782872676849365, "learning_rate": 3.626998577927698e-05, "loss": 0.9322, "step": 27240 }, { "epoch": 0.8689690360024236, "grad_norm": 0.38292792439460754, "learning_rate": 3.6131468068534876e-05, "loss": 0.9441, "step": 27250 }, { "epoch": 0.8692879237220574, "grad_norm": 0.37048473954200745, "learning_rate": 3.599347936699354e-05, "loss": 0.9355, "step": 27260 }, { "epoch": 0.8696068114416914, "grad_norm": 0.3561244010925293, "learning_rate": 3.585601765432841e-05, "loss": 0.9362, "step": 27270 }, { "epoch": 0.8699256991613253, "grad_norm": 0.3700783848762512, "learning_rate": 3.571908091793068e-05, "loss": 0.9346, "step": 27280 }, { "epoch": 0.8702445868809592, "grad_norm": 0.358857661485672, "learning_rate": 3.558266715287785e-05, "loss": 0.9398, "step": 27290 }, { "epoch": 0.8705634746005931, "grad_norm": 0.3824344277381897, "learning_rate": 3.544677436190435e-05, "loss": 0.9489, "step": 27300 }, { "epoch": 0.8708823623202271, "grad_norm": 0.376953125, "learning_rate": 3.5311400555372326e-05, "loss": 0.9566, "step": 27310 }, { "epoch": 0.8712012500398609, "grad_norm": 0.35705527663230896, "learning_rate": 3.517654375124249e-05, "loss": 0.9313, "step": 27320 }, { "epoch": 0.8715201377594949, "grad_norm": 0.3527824878692627, "learning_rate": 3.5042201975045115e-05, "loss": 0.9491, "step": 27330 }, { "epoch": 0.8718390254791288, "grad_norm": 0.361565500497818, "learning_rate": 3.490837325985108e-05, "loss": 0.9515, "step": 27340 }, { "epoch": 0.8721579131987627, "grad_norm": 0.3611353039741516, "learning_rate": 3.4775055646243186e-05, "loss": 0.933, "step": 27350 }, { "epoch": 0.8724768009183966, "grad_norm": 0.36242130398750305, "learning_rate": 3.464224718228731e-05, "loss": 0.9474, "step": 27360 }, { "epoch": 0.8727956886380306, "grad_norm": 0.36209696531295776, "learning_rate": 3.450994592350395e-05, "loss": 0.9306, "step": 27370 }, { "epoch": 0.8731145763576644, "grad_norm": 0.3568836748600006, "learning_rate": 3.437814993283972e-05, "loss": 0.9371, "step": 27380 }, { "epoch": 0.8734334640772984, "grad_norm": 0.3655485212802887, "learning_rate": 3.424685728063894e-05, "loss": 0.9409, "step": 27390 }, { "epoch": 0.8737523517969323, "grad_norm": 0.36494147777557373, "learning_rate": 3.411606604461545e-05, "loss": 0.9339, "step": 27400 }, { "epoch": 0.8740712395165662, "grad_norm": 0.36795875430107117, "learning_rate": 3.398577430982446e-05, "loss": 0.9249, "step": 27410 }, { "epoch": 0.8743901272362001, "grad_norm": 0.36251020431518555, "learning_rate": 3.385598016863445e-05, "loss": 0.9301, "step": 27420 }, { "epoch": 0.8747090149558341, "grad_norm": 0.35069212317466736, "learning_rate": 3.372668172069933e-05, "loss": 0.9388, "step": 27430 }, { "epoch": 0.8750279026754679, "grad_norm": 0.3661258816719055, "learning_rate": 3.3597877072930536e-05, "loss": 0.9326, "step": 27440 }, { "epoch": 0.8753467903951019, "grad_norm": 0.37232705950737, "learning_rate": 3.3469564339469355e-05, "loss": 0.9388, "step": 27450 }, { "epoch": 0.8756656781147358, "grad_norm": 0.3602105379104614, "learning_rate": 3.334174164165931e-05, "loss": 0.9348, "step": 27460 }, { "epoch": 0.8759845658343697, "grad_norm": 0.3558805286884308, "learning_rate": 3.321440710801865e-05, "loss": 0.9433, "step": 27470 }, { "epoch": 0.8763034535540036, "grad_norm": 0.38075533509254456, "learning_rate": 3.308755887421296e-05, "loss": 0.9363, "step": 27480 }, { "epoch": 0.8766223412736376, "grad_norm": 0.35899147391319275, "learning_rate": 3.296119508302781e-05, "loss": 0.9301, "step": 27490 }, { "epoch": 0.8769412289932714, "grad_norm": 0.3670910894870758, "learning_rate": 3.2835313884341655e-05, "loss": 0.9487, "step": 27500 }, { "epoch": 0.8772601167129054, "grad_norm": 0.35804957151412964, "learning_rate": 3.2709913435098666e-05, "loss": 0.933, "step": 27510 }, { "epoch": 0.8775790044325393, "grad_norm": 0.37163132429122925, "learning_rate": 3.2584991899281825e-05, "loss": 0.9448, "step": 27520 }, { "epoch": 0.8778978921521732, "grad_norm": 0.35967984795570374, "learning_rate": 3.246054744788594e-05, "loss": 0.9144, "step": 27530 }, { "epoch": 0.8782167798718071, "grad_norm": 0.36492854356765747, "learning_rate": 3.233657825889095e-05, "loss": 0.9395, "step": 27540 }, { "epoch": 0.8785356675914411, "grad_norm": 0.3674900233745575, "learning_rate": 3.221308251723522e-05, "loss": 0.9434, "step": 27550 }, { "epoch": 0.8788545553110749, "grad_norm": 0.3588939607143402, "learning_rate": 3.2090058414788956e-05, "loss": 0.9357, "step": 27560 }, { "epoch": 0.8791734430307089, "grad_norm": 0.35960447788238525, "learning_rate": 3.196750415032777e-05, "loss": 0.9354, "step": 27570 }, { "epoch": 0.8794923307503428, "grad_norm": 0.3683167099952698, "learning_rate": 3.1845417929506246e-05, "loss": 0.9415, "step": 27580 }, { "epoch": 0.8798112184699767, "grad_norm": 0.36023595929145813, "learning_rate": 3.1723797964831725e-05, "loss": 0.9382, "step": 27590 }, { "epoch": 0.8801301061896106, "grad_norm": 0.36763185262680054, "learning_rate": 3.160264247563812e-05, "loss": 0.936, "step": 27600 }, { "epoch": 0.8804489939092446, "grad_norm": 0.362166166305542, "learning_rate": 3.1481949688059806e-05, "loss": 0.9333, "step": 27610 }, { "epoch": 0.8807678816288784, "grad_norm": 0.36778682470321655, "learning_rate": 3.1361717835005704e-05, "loss": 0.9392, "step": 27620 }, { "epoch": 0.8810867693485124, "grad_norm": 0.3676985502243042, "learning_rate": 3.1241945156133386e-05, "loss": 0.9399, "step": 27630 }, { "epoch": 0.8814056570681463, "grad_norm": 0.3742704391479492, "learning_rate": 3.1122629897823284e-05, "loss": 0.9337, "step": 27640 }, { "epoch": 0.8817245447877802, "grad_norm": 0.36281928420066833, "learning_rate": 3.100377031315304e-05, "loss": 0.9327, "step": 27650 }, { "epoch": 0.8820434325074141, "grad_norm": 0.3667699992656708, "learning_rate": 3.088536466187193e-05, "loss": 0.9452, "step": 27660 }, { "epoch": 0.8823623202270481, "grad_norm": 0.36892062425613403, "learning_rate": 3.076741121037534e-05, "loss": 0.9261, "step": 27670 }, { "epoch": 0.8826812079466819, "grad_norm": 0.3637031614780426, "learning_rate": 3.064990823167945e-05, "loss": 0.9376, "step": 27680 }, { "epoch": 0.8830000956663159, "grad_norm": 0.3612978458404541, "learning_rate": 3.053285400539591e-05, "loss": 0.9573, "step": 27690 }, { "epoch": 0.8833189833859498, "grad_norm": 0.3765890300273895, "learning_rate": 3.041624681770667e-05, "loss": 0.9367, "step": 27700 }, { "epoch": 0.8836378711055837, "grad_norm": 0.36441487073898315, "learning_rate": 3.030008496133884e-05, "loss": 0.9228, "step": 27710 }, { "epoch": 0.8839567588252176, "grad_norm": 0.361269474029541, "learning_rate": 3.0184366735539748e-05, "loss": 0.9288, "step": 27720 }, { "epoch": 0.8842756465448516, "grad_norm": 0.37258458137512207, "learning_rate": 3.006909044605203e-05, "loss": 0.9353, "step": 27730 }, { "epoch": 0.8845945342644854, "grad_norm": 0.364519864320755, "learning_rate": 2.995425440508881e-05, "loss": 0.9169, "step": 27740 }, { "epoch": 0.8849134219841194, "grad_norm": 0.35370734333992004, "learning_rate": 2.983985693130898e-05, "loss": 0.9495, "step": 27750 }, { "epoch": 0.8852323097037533, "grad_norm": 0.38416677713394165, "learning_rate": 2.9725896349792608e-05, "loss": 0.9371, "step": 27760 }, { "epoch": 0.8855511974233872, "grad_norm": 0.3630027174949646, "learning_rate": 2.9612370992016398e-05, "loss": 0.9366, "step": 27770 }, { "epoch": 0.8858700851430211, "grad_norm": 0.36047670245170593, "learning_rate": 2.9499279195829267e-05, "loss": 0.9219, "step": 27780 }, { "epoch": 0.8861889728626551, "grad_norm": 0.35632213950157166, "learning_rate": 2.9386619305428005e-05, "loss": 0.9305, "step": 27790 }, { "epoch": 0.886507860582289, "grad_norm": 0.3719460368156433, "learning_rate": 2.9274389671333044e-05, "loss": 0.9455, "step": 27800 }, { "epoch": 0.8868267483019229, "grad_norm": 0.37200984358787537, "learning_rate": 2.9162588650364285e-05, "loss": 0.9365, "step": 27810 }, { "epoch": 0.8871456360215568, "grad_norm": 0.3699634373188019, "learning_rate": 2.9051214605617067e-05, "loss": 0.9182, "step": 27820 }, { "epoch": 0.8874645237411908, "grad_norm": 0.36919641494750977, "learning_rate": 2.8940265906438172e-05, "loss": 0.9275, "step": 27830 }, { "epoch": 0.8877834114608246, "grad_norm": 0.3644123673439026, "learning_rate": 2.882974092840196e-05, "loss": 0.9334, "step": 27840 }, { "epoch": 0.8881022991804586, "grad_norm": 0.36178484559059143, "learning_rate": 2.871963805328664e-05, "loss": 0.9486, "step": 27850 }, { "epoch": 0.8884211869000925, "grad_norm": 0.36924728751182556, "learning_rate": 2.860995566905046e-05, "loss": 0.9361, "step": 27860 }, { "epoch": 0.8887400746197264, "grad_norm": 0.349809467792511, "learning_rate": 2.850069216980822e-05, "loss": 0.9321, "step": 27870 }, { "epoch": 0.8890589623393603, "grad_norm": 0.37572547793388367, "learning_rate": 2.8391845955807693e-05, "loss": 0.9389, "step": 27880 }, { "epoch": 0.8893778500589943, "grad_norm": 0.36704355478286743, "learning_rate": 2.8283415433406215e-05, "loss": 0.919, "step": 27890 }, { "epoch": 0.8896967377786281, "grad_norm": 0.35531923174858093, "learning_rate": 2.8175399015047376e-05, "loss": 0.9412, "step": 27900 }, { "epoch": 0.8900156254982621, "grad_norm": 0.35668015480041504, "learning_rate": 2.8067795119237755e-05, "loss": 0.9412, "step": 27910 }, { "epoch": 0.890334513217896, "grad_norm": 0.3683586120605469, "learning_rate": 2.796060217052376e-05, "loss": 0.9495, "step": 27920 }, { "epoch": 0.8906534009375299, "grad_norm": 0.3708488345146179, "learning_rate": 2.785381859946858e-05, "loss": 0.9303, "step": 27930 }, { "epoch": 0.8909722886571638, "grad_norm": 0.3647977113723755, "learning_rate": 2.7747442842629192e-05, "loss": 0.9326, "step": 27940 }, { "epoch": 0.8912911763767978, "grad_norm": 0.3660475015640259, "learning_rate": 2.7641473342533486e-05, "loss": 0.9385, "step": 27950 }, { "epoch": 0.8916100640964316, "grad_norm": 0.373983770608902, "learning_rate": 2.753590854765743e-05, "loss": 0.9424, "step": 27960 }, { "epoch": 0.8919289518160656, "grad_norm": 0.36659717559814453, "learning_rate": 2.7430746912402397e-05, "loss": 0.9463, "step": 27970 }, { "epoch": 0.8922478395356995, "grad_norm": 0.3667260706424713, "learning_rate": 2.73259868970725e-05, "loss": 0.9566, "step": 27980 }, { "epoch": 0.8925667272553334, "grad_norm": 0.3601289689540863, "learning_rate": 2.722162696785207e-05, "loss": 0.9342, "step": 27990 }, { "epoch": 0.8928856149749673, "grad_norm": 0.3438984453678131, "learning_rate": 2.711766559678319e-05, "loss": 0.9327, "step": 28000 }, { "epoch": 0.8932045026946013, "grad_norm": 0.37399184703826904, "learning_rate": 2.7014101261743303e-05, "loss": 0.9334, "step": 28010 }, { "epoch": 0.8935233904142351, "grad_norm": 0.35472747683525085, "learning_rate": 2.6910932446423003e-05, "loss": 0.9376, "step": 28020 }, { "epoch": 0.8938422781338691, "grad_norm": 0.3564119338989258, "learning_rate": 2.6808157640303726e-05, "loss": 0.9407, "step": 28030 }, { "epoch": 0.894161165853503, "grad_norm": 0.3564133644104004, "learning_rate": 2.670577533863571e-05, "loss": 0.9287, "step": 28040 }, { "epoch": 0.8944800535731369, "grad_norm": 0.3710532784461975, "learning_rate": 2.6603784042415948e-05, "loss": 0.9168, "step": 28050 }, { "epoch": 0.8947989412927708, "grad_norm": 0.37403547763824463, "learning_rate": 2.6502182258366217e-05, "loss": 0.9314, "step": 28060 }, { "epoch": 0.8951178290124048, "grad_norm": 0.38338151574134827, "learning_rate": 2.640096849891124e-05, "loss": 0.9309, "step": 28070 }, { "epoch": 0.8954367167320386, "grad_norm": 0.3532122075557709, "learning_rate": 2.630014128215691e-05, "loss": 0.9309, "step": 28080 }, { "epoch": 0.8957556044516726, "grad_norm": 0.37361615896224976, "learning_rate": 2.6199699131868562e-05, "loss": 0.9278, "step": 28090 }, { "epoch": 0.8960744921713065, "grad_norm": 0.36820292472839355, "learning_rate": 2.6099640577449387e-05, "loss": 0.9379, "step": 28100 }, { "epoch": 0.8963933798909404, "grad_norm": 0.3665463626384735, "learning_rate": 2.5999964153918897e-05, "loss": 0.9271, "step": 28110 }, { "epoch": 0.8967122676105743, "grad_norm": 0.3633195161819458, "learning_rate": 2.5900668401891458e-05, "loss": 0.9474, "step": 28120 }, { "epoch": 0.8970311553302083, "grad_norm": 0.36331549286842346, "learning_rate": 2.5801751867554953e-05, "loss": 0.9264, "step": 28130 }, { "epoch": 0.8973500430498421, "grad_norm": 0.3619482219219208, "learning_rate": 2.570321310264946e-05, "loss": 0.9533, "step": 28140 }, { "epoch": 0.8976689307694761, "grad_norm": 0.3781132996082306, "learning_rate": 2.5605050664446088e-05, "loss": 0.9373, "step": 28150 }, { "epoch": 0.89798781848911, "grad_norm": 0.36209985613822937, "learning_rate": 2.550726311572581e-05, "loss": 0.9373, "step": 28160 }, { "epoch": 0.8983067062087439, "grad_norm": 0.3673580288887024, "learning_rate": 2.540984902475846e-05, "loss": 0.9245, "step": 28170 }, { "epoch": 0.8986255939283778, "grad_norm": 0.3630748391151428, "learning_rate": 2.5312806965281734e-05, "loss": 0.9319, "step": 28180 }, { "epoch": 0.8989444816480118, "grad_norm": 0.36070698499679565, "learning_rate": 2.5216135516480365e-05, "loss": 0.9287, "step": 28190 }, { "epoch": 0.8992633693676456, "grad_norm": 0.3583659529685974, "learning_rate": 2.5119833262965248e-05, "loss": 0.9333, "step": 28200 }, { "epoch": 0.8995822570872796, "grad_norm": 0.36007121205329895, "learning_rate": 2.502389879475276e-05, "loss": 0.9368, "step": 28210 }, { "epoch": 0.8999011448069135, "grad_norm": 0.37289687991142273, "learning_rate": 2.4928330707244114e-05, "loss": 0.949, "step": 28220 }, { "epoch": 0.9002200325265474, "grad_norm": 0.3503265678882599, "learning_rate": 2.4833127601204777e-05, "loss": 0.9214, "step": 28230 }, { "epoch": 0.9005389202461813, "grad_norm": 0.35933589935302734, "learning_rate": 2.4738288082744006e-05, "loss": 0.9406, "step": 28240 }, { "epoch": 0.9008578079658153, "grad_norm": 0.36435064673423767, "learning_rate": 2.4643810763294427e-05, "loss": 0.94, "step": 28250 }, { "epoch": 0.9011766956854491, "grad_norm": 0.3533856272697449, "learning_rate": 2.4549694259591695e-05, "loss": 0.9161, "step": 28260 }, { "epoch": 0.9014955834050831, "grad_norm": 0.3634324073791504, "learning_rate": 2.4455937193654268e-05, "loss": 0.9391, "step": 28270 }, { "epoch": 0.901814471124717, "grad_norm": 0.3563332259654999, "learning_rate": 2.4362538192763198e-05, "loss": 0.9248, "step": 28280 }, { "epoch": 0.9021333588443509, "grad_norm": 0.3568084239959717, "learning_rate": 2.4269495889442062e-05, "loss": 0.9422, "step": 28290 }, { "epoch": 0.9024522465639848, "grad_norm": 0.36897915601730347, "learning_rate": 2.417680892143693e-05, "loss": 0.9609, "step": 28300 }, { "epoch": 0.9027711342836188, "grad_norm": 0.3564818799495697, "learning_rate": 2.4084475931696406e-05, "loss": 0.9402, "step": 28310 }, { "epoch": 0.9030900220032526, "grad_norm": 0.36032170057296753, "learning_rate": 2.399249556835179e-05, "loss": 0.928, "step": 28320 }, { "epoch": 0.9034089097228866, "grad_norm": 0.362189382314682, "learning_rate": 2.390086648469725e-05, "loss": 0.9331, "step": 28330 }, { "epoch": 0.9037277974425205, "grad_norm": 0.36130815744400024, "learning_rate": 2.3809587339170133e-05, "loss": 0.9158, "step": 28340 }, { "epoch": 0.9040466851621544, "grad_norm": 0.3681578040122986, "learning_rate": 2.3718656795331296e-05, "loss": 0.9383, "step": 28350 }, { "epoch": 0.9043655728817883, "grad_norm": 0.359544038772583, "learning_rate": 2.362807352184559e-05, "loss": 0.9365, "step": 28360 }, { "epoch": 0.9046844606014223, "grad_norm": 0.36564376950263977, "learning_rate": 2.3537836192462286e-05, "loss": 0.9365, "step": 28370 }, { "epoch": 0.9050033483210561, "grad_norm": 0.36053574085235596, "learning_rate": 2.344794348599573e-05, "loss": 0.9249, "step": 28380 }, { "epoch": 0.9053222360406901, "grad_norm": 0.3556235134601593, "learning_rate": 2.3358394086305966e-05, "loss": 0.9233, "step": 28390 }, { "epoch": 0.905641123760324, "grad_norm": 0.3614828884601593, "learning_rate": 2.3269186682279475e-05, "loss": 0.9318, "step": 28400 }, { "epoch": 0.9059600114799579, "grad_norm": 0.3779839277267456, "learning_rate": 2.3180319967809967e-05, "loss": 0.9378, "step": 28410 }, { "epoch": 0.9062788991995918, "grad_norm": 0.3644469678401947, "learning_rate": 2.3091792641779272e-05, "loss": 0.9291, "step": 28420 }, { "epoch": 0.9065977869192258, "grad_norm": 0.35818055272102356, "learning_rate": 2.300360340803829e-05, "loss": 0.9551, "step": 28430 }, { "epoch": 0.9069166746388596, "grad_norm": 0.3682006299495697, "learning_rate": 2.2915750975388005e-05, "loss": 0.9419, "step": 28440 }, { "epoch": 0.9072355623584936, "grad_norm": 0.3534643352031708, "learning_rate": 2.2828234057560574e-05, "loss": 0.9411, "step": 28450 }, { "epoch": 0.9075544500781275, "grad_norm": 0.37065860629081726, "learning_rate": 2.2741051373200522e-05, "loss": 0.9274, "step": 28460 }, { "epoch": 0.9078733377977614, "grad_norm": 0.3683619797229767, "learning_rate": 2.265420164584595e-05, "loss": 0.9381, "step": 28470 }, { "epoch": 0.9081922255173953, "grad_norm": 0.3642658293247223, "learning_rate": 2.2567683603909864e-05, "loss": 0.9379, "step": 28480 }, { "epoch": 0.9085111132370293, "grad_norm": 0.38277313113212585, "learning_rate": 2.2481495980661557e-05, "loss": 0.9378, "step": 28490 }, { "epoch": 0.9088300009566631, "grad_norm": 0.3668515682220459, "learning_rate": 2.239563751420805e-05, "loss": 0.9369, "step": 28500 }, { "epoch": 0.9091488886762971, "grad_norm": 0.36150866746902466, "learning_rate": 2.2310106947475637e-05, "loss": 0.9302, "step": 28510 }, { "epoch": 0.909467776395931, "grad_norm": 0.3532048463821411, "learning_rate": 2.2224903028191445e-05, "loss": 0.9422, "step": 28520 }, { "epoch": 0.9097866641155649, "grad_norm": 0.3641529083251953, "learning_rate": 2.2140024508865157e-05, "loss": 0.906, "step": 28530 }, { "epoch": 0.9101055518351988, "grad_norm": 0.3548017740249634, "learning_rate": 2.205547014677069e-05, "loss": 0.9347, "step": 28540 }, { "epoch": 0.9104244395548328, "grad_norm": 0.35412734746932983, "learning_rate": 2.197123870392802e-05, "loss": 0.9245, "step": 28550 }, { "epoch": 0.9107433272744666, "grad_norm": 0.36349058151245117, "learning_rate": 2.1887328947085065e-05, "loss": 0.9468, "step": 28560 }, { "epoch": 0.9110622149941006, "grad_norm": 0.3631904125213623, "learning_rate": 2.1803739647699623e-05, "loss": 0.9366, "step": 28570 }, { "epoch": 0.9113811027137345, "grad_norm": 0.35928815603256226, "learning_rate": 2.172046958192138e-05, "loss": 0.9385, "step": 28580 }, { "epoch": 0.9116999904333684, "grad_norm": 0.3604944944381714, "learning_rate": 2.1637517530574002e-05, "loss": 0.9448, "step": 28590 }, { "epoch": 0.9120188781530023, "grad_norm": 0.38209038972854614, "learning_rate": 2.155488227913727e-05, "loss": 0.9502, "step": 28600 }, { "epoch": 0.9123377658726363, "grad_norm": 0.3778879642486572, "learning_rate": 2.14725626177293e-05, "loss": 0.9441, "step": 28610 }, { "epoch": 0.9126566535922701, "grad_norm": 0.36064228415489197, "learning_rate": 2.139055734108886e-05, "loss": 0.9304, "step": 28620 }, { "epoch": 0.9129755413119041, "grad_norm": 0.3631671071052551, "learning_rate": 2.1308865248557674e-05, "loss": 0.9226, "step": 28630 }, { "epoch": 0.913294429031538, "grad_norm": 0.35460859537124634, "learning_rate": 2.122748514406288e-05, "loss": 0.9311, "step": 28640 }, { "epoch": 0.913613316751172, "grad_norm": 0.38259729743003845, "learning_rate": 2.1146415836099496e-05, "loss": 0.9268, "step": 28650 }, { "epoch": 0.9139322044708058, "grad_norm": 0.37809115648269653, "learning_rate": 2.1065656137712995e-05, "loss": 0.9334, "step": 28660 }, { "epoch": 0.9142510921904398, "grad_norm": 0.36083880066871643, "learning_rate": 2.0985204866481902e-05, "loss": 0.9367, "step": 28670 }, { "epoch": 0.9145699799100737, "grad_norm": 0.36416465044021606, "learning_rate": 2.0905060844500506e-05, "loss": 0.9435, "step": 28680 }, { "epoch": 0.9148888676297076, "grad_norm": 0.359147846698761, "learning_rate": 2.0825222898361594e-05, "loss": 0.9449, "step": 28690 }, { "epoch": 0.9152077553493415, "grad_norm": 0.36608290672302246, "learning_rate": 2.07456898591393e-05, "loss": 0.9321, "step": 28700 }, { "epoch": 0.9155266430689755, "grad_norm": 0.3677798807621002, "learning_rate": 2.0666460562371957e-05, "loss": 0.9344, "step": 28710 }, { "epoch": 0.9158455307886093, "grad_norm": 0.3638545572757721, "learning_rate": 2.0587533848045053e-05, "loss": 0.9397, "step": 28720 }, { "epoch": 0.9161644185082433, "grad_norm": 0.37101566791534424, "learning_rate": 2.050890856057427e-05, "loss": 0.9513, "step": 28730 }, { "epoch": 0.9164833062278772, "grad_norm": 0.3840157091617584, "learning_rate": 2.0430583548788544e-05, "loss": 0.9536, "step": 28740 }, { "epoch": 0.9168021939475111, "grad_norm": 0.3592657446861267, "learning_rate": 2.0352557665913218e-05, "loss": 0.9424, "step": 28750 }, { "epoch": 0.917121081667145, "grad_norm": 0.3559933602809906, "learning_rate": 2.027482976955325e-05, "loss": 0.9398, "step": 28760 }, { "epoch": 0.917439969386779, "grad_norm": 0.38258349895477295, "learning_rate": 2.0197398721676495e-05, "loss": 0.9341, "step": 28770 }, { "epoch": 0.9177588571064128, "grad_norm": 0.3753907084465027, "learning_rate": 2.0120263388597025e-05, "loss": 0.9365, "step": 28780 }, { "epoch": 0.9180777448260468, "grad_norm": 0.351815789937973, "learning_rate": 2.004342264095854e-05, "loss": 0.932, "step": 28790 }, { "epoch": 0.9183966325456807, "grad_norm": 0.35386383533477783, "learning_rate": 1.9966875353717854e-05, "loss": 0.9312, "step": 28800 }, { "epoch": 0.9187155202653146, "grad_norm": 0.36701905727386475, "learning_rate": 1.9890620406128368e-05, "loss": 0.9287, "step": 28810 }, { "epoch": 0.9190344079849485, "grad_norm": 0.369667112827301, "learning_rate": 1.981465668172373e-05, "loss": 0.9143, "step": 28820 }, { "epoch": 0.9193532957045825, "grad_norm": 0.36041995882987976, "learning_rate": 1.9738983068301432e-05, "loss": 0.928, "step": 28830 }, { "epoch": 0.9196721834242163, "grad_norm": 0.36003410816192627, "learning_rate": 1.966359845790656e-05, "loss": 0.935, "step": 28840 }, { "epoch": 0.9199910711438503, "grad_norm": 0.3556961715221405, "learning_rate": 1.9588501746815556e-05, "loss": 0.9373, "step": 28850 }, { "epoch": 0.9203099588634842, "grad_norm": 0.36700302362442017, "learning_rate": 1.9513691835520046e-05, "loss": 0.9397, "step": 28860 }, { "epoch": 0.920628846583118, "grad_norm": 0.36221426725387573, "learning_rate": 1.9439167628710803e-05, "loss": 0.9221, "step": 28870 }, { "epoch": 0.920947734302752, "grad_norm": 0.36436378955841064, "learning_rate": 1.936492803526162e-05, "loss": 0.9292, "step": 28880 }, { "epoch": 0.921266622022386, "grad_norm": 0.3724479079246521, "learning_rate": 1.9290971968213404e-05, "loss": 0.9406, "step": 28890 }, { "epoch": 0.9215855097420198, "grad_norm": 0.3676651418209076, "learning_rate": 1.9217298344758223e-05, "loss": 0.9428, "step": 28900 }, { "epoch": 0.9219043974616538, "grad_norm": 0.36604923009872437, "learning_rate": 1.9143906086223483e-05, "loss": 0.9308, "step": 28910 }, { "epoch": 0.9222232851812877, "grad_norm": 0.35990041494369507, "learning_rate": 1.907079411805611e-05, "loss": 0.9362, "step": 28920 }, { "epoch": 0.9225421729009216, "grad_norm": 0.3726346492767334, "learning_rate": 1.8997961369806826e-05, "loss": 0.9438, "step": 28930 }, { "epoch": 0.9228610606205555, "grad_norm": 0.3746441900730133, "learning_rate": 1.8925406775114476e-05, "loss": 0.9323, "step": 28940 }, { "epoch": 0.9231799483401895, "grad_norm": 0.3630741834640503, "learning_rate": 1.8853129271690422e-05, "loss": 0.9359, "step": 28950 }, { "epoch": 0.9234988360598233, "grad_norm": 0.3569645583629608, "learning_rate": 1.878112780130298e-05, "loss": 0.9336, "step": 28960 }, { "epoch": 0.9238177237794573, "grad_norm": 0.37686362862586975, "learning_rate": 1.8709401309761924e-05, "loss": 0.9282, "step": 28970 }, { "epoch": 0.9241366114990912, "grad_norm": 0.37148985266685486, "learning_rate": 1.863794874690307e-05, "loss": 0.9229, "step": 28980 }, { "epoch": 0.924455499218725, "grad_norm": 0.3649923503398895, "learning_rate": 1.8566769066572868e-05, "loss": 0.9283, "step": 28990 }, { "epoch": 0.924774386938359, "grad_norm": 0.3782605528831482, "learning_rate": 1.849586122661313e-05, "loss": 0.9235, "step": 29000 }, { "epoch": 0.925093274657993, "grad_norm": 0.3647046387195587, "learning_rate": 1.842522418884572e-05, "loss": 0.9334, "step": 29010 }, { "epoch": 0.9254121623776268, "grad_norm": 0.3661080300807953, "learning_rate": 1.8354856919057388e-05, "loss": 0.934, "step": 29020 }, { "epoch": 0.9257310500972608, "grad_norm": 0.36003896594047546, "learning_rate": 1.8284758386984637e-05, "loss": 0.9305, "step": 29030 }, { "epoch": 0.9260499378168947, "grad_norm": 0.3618946373462677, "learning_rate": 1.8214927566298603e-05, "loss": 0.9215, "step": 29040 }, { "epoch": 0.9263688255365286, "grad_norm": 0.35207855701446533, "learning_rate": 1.814536343459005e-05, "loss": 0.9259, "step": 29050 }, { "epoch": 0.9266877132561625, "grad_norm": 0.3590606153011322, "learning_rate": 1.8076064973354396e-05, "loss": 0.9411, "step": 29060 }, { "epoch": 0.9270066009757965, "grad_norm": 0.36371833086013794, "learning_rate": 1.80070311679768e-05, "loss": 0.9291, "step": 29070 }, { "epoch": 0.9273254886954303, "grad_norm": 0.35812294483184814, "learning_rate": 1.793826100771732e-05, "loss": 0.9124, "step": 29080 }, { "epoch": 0.9276443764150643, "grad_norm": 0.36259791254997253, "learning_rate": 1.7869753485696093e-05, "loss": 0.9206, "step": 29090 }, { "epoch": 0.9279632641346982, "grad_norm": 0.3575153648853302, "learning_rate": 1.7801507598878604e-05, "loss": 0.94, "step": 29100 }, { "epoch": 0.928282151854332, "grad_norm": 0.37916064262390137, "learning_rate": 1.7733522348061006e-05, "loss": 0.9586, "step": 29110 }, { "epoch": 0.928601039573966, "grad_norm": 0.3657781779766083, "learning_rate": 1.7665796737855474e-05, "loss": 0.9447, "step": 29120 }, { "epoch": 0.9289199272936, "grad_norm": 0.3646107614040375, "learning_rate": 1.7598329776675653e-05, "loss": 0.926, "step": 29130 }, { "epoch": 0.9292388150132338, "grad_norm": 0.3722059428691864, "learning_rate": 1.753112047672212e-05, "loss": 0.9316, "step": 29140 }, { "epoch": 0.9295577027328678, "grad_norm": 0.3507155776023865, "learning_rate": 1.7464167853967936e-05, "loss": 0.9156, "step": 29150 }, { "epoch": 0.9298765904525017, "grad_norm": 0.36297523975372314, "learning_rate": 1.739747092814422e-05, "loss": 0.9153, "step": 29160 }, { "epoch": 0.9301954781721355, "grad_norm": 0.37203118205070496, "learning_rate": 1.7331028722725825e-05, "loss": 0.9411, "step": 29170 }, { "epoch": 0.9305143658917695, "grad_norm": 0.3708671033382416, "learning_rate": 1.726484026491702e-05, "loss": 0.9293, "step": 29180 }, { "epoch": 0.9308332536114035, "grad_norm": 0.3578820824623108, "learning_rate": 1.7198904585637235e-05, "loss": 0.9084, "step": 29190 }, { "epoch": 0.9311521413310373, "grad_norm": 0.37341031432151794, "learning_rate": 1.713322071950692e-05, "loss": 0.9341, "step": 29200 }, { "epoch": 0.9314710290506713, "grad_norm": 0.3641612231731415, "learning_rate": 1.7067787704833355e-05, "loss": 0.9479, "step": 29210 }, { "epoch": 0.9317899167703052, "grad_norm": 0.3764784634113312, "learning_rate": 1.7002604583596607e-05, "loss": 0.9345, "step": 29220 }, { "epoch": 0.932108804489939, "grad_norm": 0.3550204336643219, "learning_rate": 1.6937670401435477e-05, "loss": 0.9257, "step": 29230 }, { "epoch": 0.932427692209573, "grad_norm": 0.36775845289230347, "learning_rate": 1.6872984207633558e-05, "loss": 0.9263, "step": 29240 }, { "epoch": 0.932746579929207, "grad_norm": 0.3655807673931122, "learning_rate": 1.6808545055105283e-05, "loss": 0.9296, "step": 29250 }, { "epoch": 0.9330654676488408, "grad_norm": 0.3733557164669037, "learning_rate": 1.6744352000382084e-05, "loss": 0.9325, "step": 29260 }, { "epoch": 0.9333843553684747, "grad_norm": 0.37416717410087585, "learning_rate": 1.6680404103598565e-05, "loss": 0.9246, "step": 29270 }, { "epoch": 0.9337032430881087, "grad_norm": 0.3751501739025116, "learning_rate": 1.6616700428478738e-05, "loss": 0.9244, "step": 29280 }, { "epoch": 0.9340221308077425, "grad_norm": 0.36550331115722656, "learning_rate": 1.6553240042322332e-05, "loss": 0.9258, "step": 29290 }, { "epoch": 0.9343410185273765, "grad_norm": 0.36365216970443726, "learning_rate": 1.6490022015991115e-05, "loss": 0.9206, "step": 29300 }, { "epoch": 0.9346599062470105, "grad_norm": 0.36747580766677856, "learning_rate": 1.6427045423895318e-05, "loss": 0.9354, "step": 29310 }, { "epoch": 0.9349787939666443, "grad_norm": 0.3531034588813782, "learning_rate": 1.636430934398004e-05, "loss": 0.9283, "step": 29320 }, { "epoch": 0.9352976816862782, "grad_norm": 0.36130332946777344, "learning_rate": 1.63018128577118e-05, "loss": 0.9481, "step": 29330 }, { "epoch": 0.9356165694059122, "grad_norm": 0.3671252727508545, "learning_rate": 1.623955505006505e-05, "loss": 0.9184, "step": 29340 }, { "epoch": 0.935935457125546, "grad_norm": 0.36904624104499817, "learning_rate": 1.61775350095088e-05, "loss": 0.9209, "step": 29350 }, { "epoch": 0.93625434484518, "grad_norm": 0.3691551685333252, "learning_rate": 1.611575182799324e-05, "loss": 0.9094, "step": 29360 }, { "epoch": 0.936573232564814, "grad_norm": 0.3752463161945343, "learning_rate": 1.605420460093651e-05, "loss": 0.9387, "step": 29370 }, { "epoch": 0.9368921202844478, "grad_norm": 0.36518752574920654, "learning_rate": 1.5992892427211376e-05, "loss": 0.9332, "step": 29380 }, { "epoch": 0.9372110080040817, "grad_norm": 0.37571388483047485, "learning_rate": 1.5931814409132093e-05, "loss": 0.9314, "step": 29390 }, { "epoch": 0.9375298957237157, "grad_norm": 0.3575666844844818, "learning_rate": 1.587096965244123e-05, "loss": 0.9375, "step": 29400 }, { "epoch": 0.9378487834433495, "grad_norm": 0.3595915138721466, "learning_rate": 1.5810357266296604e-05, "loss": 0.9316, "step": 29410 }, { "epoch": 0.9381676711629835, "grad_norm": 0.36832576990127563, "learning_rate": 1.5749976363258215e-05, "loss": 0.936, "step": 29420 }, { "epoch": 0.9384865588826174, "grad_norm": 0.38170093297958374, "learning_rate": 1.5689826059275262e-05, "loss": 0.9243, "step": 29430 }, { "epoch": 0.9388054466022513, "grad_norm": 0.36499080061912537, "learning_rate": 1.5629905473673202e-05, "loss": 0.9185, "step": 29440 }, { "epoch": 0.9391243343218852, "grad_norm": 0.3608885407447815, "learning_rate": 1.5570213729140846e-05, "loss": 0.9256, "step": 29450 }, { "epoch": 0.9394432220415192, "grad_norm": 0.3616710901260376, "learning_rate": 1.5510749951717523e-05, "loss": 0.9342, "step": 29460 }, { "epoch": 0.9397621097611532, "grad_norm": 0.3602309226989746, "learning_rate": 1.5451513270780287e-05, "loss": 0.9291, "step": 29470 }, { "epoch": 0.940080997480787, "grad_norm": 0.3594343662261963, "learning_rate": 1.539250281903115e-05, "loss": 0.9269, "step": 29480 }, { "epoch": 0.940399885200421, "grad_norm": 0.37072819471359253, "learning_rate": 1.5333717732484415e-05, "loss": 0.928, "step": 29490 }, { "epoch": 0.9407187729200549, "grad_norm": 0.3712194263935089, "learning_rate": 1.5275157150454e-05, "loss": 0.9367, "step": 29500 }, { "epoch": 0.9410376606396887, "grad_norm": 0.3576986789703369, "learning_rate": 1.5216820215540841e-05, "loss": 0.9242, "step": 29510 }, { "epoch": 0.9413565483593227, "grad_norm": 0.3759133517742157, "learning_rate": 1.5158706073620354e-05, "loss": 0.9216, "step": 29520 }, { "epoch": 0.9416754360789567, "grad_norm": 0.37520280480384827, "learning_rate": 1.5100813873829904e-05, "loss": 0.9113, "step": 29530 }, { "epoch": 0.9419943237985905, "grad_norm": 0.36566489934921265, "learning_rate": 1.5043142768556388e-05, "loss": 0.9398, "step": 29540 }, { "epoch": 0.9423132115182244, "grad_norm": 0.38121917843818665, "learning_rate": 1.4985691913423778e-05, "loss": 0.9336, "step": 29550 }, { "epoch": 0.9426320992378584, "grad_norm": 0.3673277795314789, "learning_rate": 1.4928460467280777e-05, "loss": 0.9235, "step": 29560 }, { "epoch": 0.9429509869574922, "grad_norm": 0.36724069714546204, "learning_rate": 1.4871447592188524e-05, "loss": 0.938, "step": 29570 }, { "epoch": 0.9432698746771262, "grad_norm": 0.3662523925304413, "learning_rate": 1.4814652453408288e-05, "loss": 0.9178, "step": 29580 }, { "epoch": 0.9435887623967602, "grad_norm": 0.37952548265457153, "learning_rate": 1.4758074219389278e-05, "loss": 0.9195, "step": 29590 }, { "epoch": 0.943907650116394, "grad_norm": 0.3726538121700287, "learning_rate": 1.4701712061756454e-05, "loss": 0.9359, "step": 29600 }, { "epoch": 0.944226537836028, "grad_norm": 0.36995553970336914, "learning_rate": 1.4645565155298395e-05, "loss": 0.9376, "step": 29610 }, { "epoch": 0.9445454255556619, "grad_norm": 0.3546082675457001, "learning_rate": 1.458963267795523e-05, "loss": 0.9383, "step": 29620 }, { "epoch": 0.9448643132752957, "grad_norm": 0.38009509444236755, "learning_rate": 1.4533913810806589e-05, "loss": 0.9405, "step": 29630 }, { "epoch": 0.9451832009949297, "grad_norm": 0.37401118874549866, "learning_rate": 1.4478407738059622e-05, "loss": 0.9234, "step": 29640 }, { "epoch": 0.9455020887145636, "grad_norm": 0.36730608344078064, "learning_rate": 1.4423113647037045e-05, "loss": 0.9182, "step": 29650 }, { "epoch": 0.9458209764341975, "grad_norm": 0.3604319393634796, "learning_rate": 1.4368030728165257e-05, "loss": 0.928, "step": 29660 }, { "epoch": 0.9461398641538314, "grad_norm": 0.3672143518924713, "learning_rate": 1.4313158174962467e-05, "loss": 0.9416, "step": 29670 }, { "epoch": 0.9464587518734654, "grad_norm": 0.36120569705963135, "learning_rate": 1.4258495184026909e-05, "loss": 0.9331, "step": 29680 }, { "epoch": 0.9467776395930992, "grad_norm": 0.3656829297542572, "learning_rate": 1.4204040955025053e-05, "loss": 0.9283, "step": 29690 }, { "epoch": 0.9470965273127332, "grad_norm": 0.3733628988265991, "learning_rate": 1.4149794690679904e-05, "loss": 0.9242, "step": 29700 }, { "epoch": 0.9474154150323671, "grad_norm": 0.35865920782089233, "learning_rate": 1.409575559675934e-05, "loss": 0.9078, "step": 29710 }, { "epoch": 0.947734302752001, "grad_norm": 0.3627816438674927, "learning_rate": 1.4041922882064455e-05, "loss": 0.9204, "step": 29720 }, { "epoch": 0.9480531904716349, "grad_norm": 0.37665456533432007, "learning_rate": 1.398829575841799e-05, "loss": 0.9425, "step": 29730 }, { "epoch": 0.9483720781912689, "grad_norm": 0.36541375517845154, "learning_rate": 1.3934873440652796e-05, "loss": 0.9157, "step": 29740 }, { "epoch": 0.9486909659109027, "grad_norm": 0.3650599420070648, "learning_rate": 1.3881655146600332e-05, "loss": 0.9322, "step": 29750 }, { "epoch": 0.9490098536305367, "grad_norm": 0.35816624760627747, "learning_rate": 1.3828640097079218e-05, "loss": 0.9298, "step": 29760 }, { "epoch": 0.9493287413501706, "grad_norm": 0.3787805140018463, "learning_rate": 1.377582751588382e-05, "loss": 0.926, "step": 29770 }, { "epoch": 0.9496476290698045, "grad_norm": 0.3747621476650238, "learning_rate": 1.3723216629772897e-05, "loss": 0.9266, "step": 29780 }, { "epoch": 0.9499665167894384, "grad_norm": 0.3703758716583252, "learning_rate": 1.3670806668458264e-05, "loss": 0.9331, "step": 29790 }, { "epoch": 0.9502854045090724, "grad_norm": 0.3748500347137451, "learning_rate": 1.3618596864593529e-05, "loss": 0.9279, "step": 29800 }, { "epoch": 0.9506042922287062, "grad_norm": 0.36413055658340454, "learning_rate": 1.3566586453762848e-05, "loss": 0.9146, "step": 29810 }, { "epoch": 0.9509231799483402, "grad_norm": 0.3669073283672333, "learning_rate": 1.3514774674469737e-05, "loss": 0.9376, "step": 29820 }, { "epoch": 0.9512420676679741, "grad_norm": 0.3571595847606659, "learning_rate": 1.3463160768125927e-05, "loss": 0.9411, "step": 29830 }, { "epoch": 0.951560955387608, "grad_norm": 0.3696027398109436, "learning_rate": 1.3411743979040244e-05, "loss": 0.9342, "step": 29840 }, { "epoch": 0.9518798431072419, "grad_norm": 0.3621768057346344, "learning_rate": 1.3360523554407562e-05, "loss": 0.9339, "step": 29850 }, { "epoch": 0.9521987308268759, "grad_norm": 0.3710097372531891, "learning_rate": 1.3309498744297768e-05, "loss": 0.922, "step": 29860 }, { "epoch": 0.9525176185465097, "grad_norm": 0.3726564347743988, "learning_rate": 1.3258668801644778e-05, "loss": 0.9394, "step": 29870 }, { "epoch": 0.9528365062661437, "grad_norm": 0.35227444767951965, "learning_rate": 1.3208032982235637e-05, "loss": 0.9212, "step": 29880 }, { "epoch": 0.9531553939857776, "grad_norm": 0.3657701313495636, "learning_rate": 1.315759054469956e-05, "loss": 0.9183, "step": 29890 }, { "epoch": 0.9534742817054115, "grad_norm": 0.36237528920173645, "learning_rate": 1.3107340750497128e-05, "loss": 0.9242, "step": 29900 }, { "epoch": 0.9537931694250454, "grad_norm": 0.35844841599464417, "learning_rate": 1.3057282863909452e-05, "loss": 0.9318, "step": 29910 }, { "epoch": 0.9541120571446794, "grad_norm": 0.36002638936042786, "learning_rate": 1.3007416152027412e-05, "loss": 0.9337, "step": 29920 }, { "epoch": 0.9544309448643132, "grad_norm": 0.3549569547176361, "learning_rate": 1.2957739884740917e-05, "loss": 0.9228, "step": 29930 }, { "epoch": 0.9547498325839472, "grad_norm": 0.3807508945465088, "learning_rate": 1.290825333472822e-05, "loss": 0.9276, "step": 29940 }, { "epoch": 0.9550687203035811, "grad_norm": 0.37096288800239563, "learning_rate": 1.2858955777445266e-05, "loss": 0.9555, "step": 29950 }, { "epoch": 0.955387608023215, "grad_norm": 0.36706507205963135, "learning_rate": 1.2809846491115096e-05, "loss": 0.9261, "step": 29960 }, { "epoch": 0.9557064957428489, "grad_norm": 0.3656713366508484, "learning_rate": 1.276092475671726e-05, "loss": 0.937, "step": 29970 }, { "epoch": 0.9560253834624829, "grad_norm": 0.36048075556755066, "learning_rate": 1.27121898579773e-05, "loss": 0.9251, "step": 29980 }, { "epoch": 0.9563442711821167, "grad_norm": 0.3719075620174408, "learning_rate": 1.266364108135627e-05, "loss": 0.9472, "step": 29990 }, { "epoch": 0.9566631589017507, "grad_norm": 0.3709539771080017, "learning_rate": 1.2615277716040274e-05, "loss": 0.9418, "step": 30000 }, { "epoch": 0.9569820466213846, "grad_norm": 0.3729771673679352, "learning_rate": 1.2567099053930065e-05, "loss": 0.943, "step": 30010 }, { "epoch": 0.9573009343410185, "grad_norm": 0.37757793068885803, "learning_rate": 1.2519104389630684e-05, "loss": 0.9169, "step": 30020 }, { "epoch": 0.9576198220606524, "grad_norm": 0.3567389249801636, "learning_rate": 1.2471293020441117e-05, "loss": 0.9265, "step": 30030 }, { "epoch": 0.9579387097802864, "grad_norm": 0.36267825961112976, "learning_rate": 1.2423664246344036e-05, "loss": 0.9259, "step": 30040 }, { "epoch": 0.9582575974999202, "grad_norm": 0.3645220398902893, "learning_rate": 1.2376217369995511e-05, "loss": 0.9184, "step": 30050 }, { "epoch": 0.9585764852195542, "grad_norm": 0.37258005142211914, "learning_rate": 1.2328951696714822e-05, "loss": 0.946, "step": 30060 }, { "epoch": 0.9588953729391881, "grad_norm": 0.3623313903808594, "learning_rate": 1.2281866534474292e-05, "loss": 0.9436, "step": 30070 }, { "epoch": 0.959214260658822, "grad_norm": 0.36983102560043335, "learning_rate": 1.2234961193889144e-05, "loss": 0.9276, "step": 30080 }, { "epoch": 0.9595331483784559, "grad_norm": 0.37520304322242737, "learning_rate": 1.218823498820741e-05, "loss": 0.9289, "step": 30090 }, { "epoch": 0.9598520360980899, "grad_norm": 0.37535175681114197, "learning_rate": 1.214168723329988e-05, "loss": 0.9358, "step": 30100 }, { "epoch": 0.9601709238177237, "grad_norm": 0.3650328516960144, "learning_rate": 1.2095317247650083e-05, "loss": 0.9339, "step": 30110 }, { "epoch": 0.9604898115373577, "grad_norm": 0.36983540654182434, "learning_rate": 1.204912435234431e-05, "loss": 0.9241, "step": 30120 }, { "epoch": 0.9608086992569916, "grad_norm": 0.3635700047016144, "learning_rate": 1.200310787106167e-05, "loss": 0.9177, "step": 30130 }, { "epoch": 0.9611275869766255, "grad_norm": 0.37635406851768494, "learning_rate": 1.195726713006419e-05, "loss": 0.9353, "step": 30140 }, { "epoch": 0.9614464746962594, "grad_norm": 0.36136430501937866, "learning_rate": 1.1911601458186958e-05, "loss": 0.9326, "step": 30150 }, { "epoch": 0.9617653624158934, "grad_norm": 0.36432045698165894, "learning_rate": 1.186611018682828e-05, "loss": 0.9207, "step": 30160 }, { "epoch": 0.9620842501355272, "grad_norm": 0.3685867488384247, "learning_rate": 1.1820792649939912e-05, "loss": 0.9199, "step": 30170 }, { "epoch": 0.9624031378551612, "grad_norm": 0.3692206144332886, "learning_rate": 1.1775648184017282e-05, "loss": 0.9319, "step": 30180 }, { "epoch": 0.9627220255747951, "grad_norm": 0.37184929847717285, "learning_rate": 1.1730676128089802e-05, "loss": 0.9388, "step": 30190 }, { "epoch": 0.963040913294429, "grad_norm": 0.3655369281768799, "learning_rate": 1.1685875823711168e-05, "loss": 0.9248, "step": 30200 }, { "epoch": 0.9633598010140629, "grad_norm": 0.3684496283531189, "learning_rate": 1.164124661494975e-05, "loss": 0.9341, "step": 30210 }, { "epoch": 0.9636786887336969, "grad_norm": 0.36519452929496765, "learning_rate": 1.1596787848378949e-05, "loss": 0.9407, "step": 30220 }, { "epoch": 0.9639975764533307, "grad_norm": 0.3807597756385803, "learning_rate": 1.1552498873067655e-05, "loss": 0.9397, "step": 30230 }, { "epoch": 0.9643164641729647, "grad_norm": 0.36671775579452515, "learning_rate": 1.1508379040570714e-05, "loss": 0.9339, "step": 30240 }, { "epoch": 0.9646353518925986, "grad_norm": 0.3577711582183838, "learning_rate": 1.146442770491943e-05, "loss": 0.9075, "step": 30250 }, { "epoch": 0.9649542396122325, "grad_norm": 0.3761603832244873, "learning_rate": 1.1420644222612106e-05, "loss": 0.9202, "step": 30260 }, { "epoch": 0.9652731273318664, "grad_norm": 0.36671191453933716, "learning_rate": 1.1377027952604628e-05, "loss": 0.9265, "step": 30270 }, { "epoch": 0.9655920150515004, "grad_norm": 0.3775249421596527, "learning_rate": 1.1333578256301075e-05, "loss": 0.9136, "step": 30280 }, { "epoch": 0.9659109027711343, "grad_norm": 0.3641510307788849, "learning_rate": 1.129029449754437e-05, "loss": 0.9238, "step": 30290 }, { "epoch": 0.9662297904907682, "grad_norm": 0.3587871491909027, "learning_rate": 1.1247176042606964e-05, "loss": 0.9374, "step": 30300 }, { "epoch": 0.9665486782104021, "grad_norm": 0.3657750189304352, "learning_rate": 1.1204222260181564e-05, "loss": 0.9234, "step": 30310 }, { "epoch": 0.9668675659300361, "grad_norm": 0.367485374212265, "learning_rate": 1.1161432521371883e-05, "loss": 0.9302, "step": 30320 }, { "epoch": 0.9671864536496699, "grad_norm": 0.3634987771511078, "learning_rate": 1.1118806199683434e-05, "loss": 0.9283, "step": 30330 }, { "epoch": 0.9675053413693039, "grad_norm": 0.3690277338027954, "learning_rate": 1.1076342671014357e-05, "loss": 0.9502, "step": 30340 }, { "epoch": 0.9678242290889378, "grad_norm": 0.39022156596183777, "learning_rate": 1.1034041313646285e-05, "loss": 0.944, "step": 30350 }, { "epoch": 0.9681431168085717, "grad_norm": 0.3651580214500427, "learning_rate": 1.099190150823524e-05, "loss": 0.9317, "step": 30360 }, { "epoch": 0.9684620045282056, "grad_norm": 0.37234464287757874, "learning_rate": 1.0949922637802553e-05, "loss": 0.9482, "step": 30370 }, { "epoch": 0.9687808922478396, "grad_norm": 0.36375316977500916, "learning_rate": 1.0908104087725861e-05, "loss": 0.9191, "step": 30380 }, { "epoch": 0.9690997799674734, "grad_norm": 0.36848706007003784, "learning_rate": 1.0866445245730072e-05, "loss": 0.927, "step": 30390 }, { "epoch": 0.9694186676871074, "grad_norm": 0.3552470803260803, "learning_rate": 1.0824945501878422e-05, "loss": 0.9389, "step": 30400 }, { "epoch": 0.9697375554067413, "grad_norm": 0.3650756776332855, "learning_rate": 1.0783604248563538e-05, "loss": 0.9215, "step": 30410 }, { "epoch": 0.9700564431263752, "grad_norm": 0.36942705512046814, "learning_rate": 1.0742420880498549e-05, "loss": 0.9352, "step": 30420 }, { "epoch": 0.9703753308460091, "grad_norm": 0.3720155656337738, "learning_rate": 1.0701394794708213e-05, "loss": 0.9347, "step": 30430 }, { "epoch": 0.9706942185656431, "grad_norm": 0.365630567073822, "learning_rate": 1.0660525390520096e-05, "loss": 0.9179, "step": 30440 }, { "epoch": 0.9710131062852769, "grad_norm": 0.3534587621688843, "learning_rate": 1.0619812069555778e-05, "loss": 0.933, "step": 30450 }, { "epoch": 0.9713319940049109, "grad_norm": 0.38552242517471313, "learning_rate": 1.0579254235722086e-05, "loss": 0.9374, "step": 30460 }, { "epoch": 0.9716508817245448, "grad_norm": 0.35881030559539795, "learning_rate": 1.0538851295202372e-05, "loss": 0.9409, "step": 30470 }, { "epoch": 0.9719697694441787, "grad_norm": 0.3738090693950653, "learning_rate": 1.0498602656447817e-05, "loss": 0.9397, "step": 30480 }, { "epoch": 0.9722886571638126, "grad_norm": 0.3673436939716339, "learning_rate": 1.0458507730168771e-05, "loss": 0.9377, "step": 30490 }, { "epoch": 0.9726075448834466, "grad_norm": 0.35112425684928894, "learning_rate": 1.0418565929326121e-05, "loss": 0.9298, "step": 30500 }, { "epoch": 0.9729264326030804, "grad_norm": 0.36641091108322144, "learning_rate": 1.0378776669122702e-05, "loss": 0.934, "step": 30510 }, { "epoch": 0.9732453203227144, "grad_norm": 0.36989322304725647, "learning_rate": 1.0339139366994728e-05, "loss": 0.9395, "step": 30520 }, { "epoch": 0.9735642080423483, "grad_norm": 0.36697059869766235, "learning_rate": 1.0299653442603272e-05, "loss": 0.9317, "step": 30530 }, { "epoch": 0.9738830957619822, "grad_norm": 0.36741960048675537, "learning_rate": 1.0260318317825752e-05, "loss": 0.9241, "step": 30540 }, { "epoch": 0.9742019834816161, "grad_norm": 0.36927348375320435, "learning_rate": 1.0221133416747503e-05, "loss": 0.9392, "step": 30550 }, { "epoch": 0.9745208712012501, "grad_norm": 0.37633517384529114, "learning_rate": 1.0182098165653291e-05, "loss": 0.9382, "step": 30560 }, { "epoch": 0.9748397589208839, "grad_norm": 0.3640388548374176, "learning_rate": 1.014321199301896e-05, "loss": 0.9333, "step": 30570 }, { "epoch": 0.9751586466405179, "grad_norm": 0.3635682165622711, "learning_rate": 1.0104474329503038e-05, "loss": 0.9291, "step": 30580 }, { "epoch": 0.9754775343601518, "grad_norm": 0.3714246153831482, "learning_rate": 1.0065884607938414e-05, "loss": 0.9402, "step": 30590 }, { "epoch": 0.9757964220797857, "grad_norm": 0.37632328271865845, "learning_rate": 1.0027442263324029e-05, "loss": 0.9568, "step": 30600 }, { "epoch": 0.9761153097994196, "grad_norm": 0.3554982542991638, "learning_rate": 9.989146732816599e-06, "loss": 0.9331, "step": 30610 }, { "epoch": 0.9764341975190536, "grad_norm": 0.36082297563552856, "learning_rate": 9.950997455722386e-06, "loss": 0.9289, "step": 30620 }, { "epoch": 0.9767530852386874, "grad_norm": 0.36781561374664307, "learning_rate": 9.912993873488982e-06, "loss": 0.9246, "step": 30630 }, { "epoch": 0.9770719729583214, "grad_norm": 0.376779705286026, "learning_rate": 9.875135429697123e-06, "loss": 0.9382, "step": 30640 }, { "epoch": 0.9773908606779553, "grad_norm": 0.3738979399204254, "learning_rate": 9.83742157005256e-06, "loss": 0.938, "step": 30650 }, { "epoch": 0.9777097483975892, "grad_norm": 0.3823069930076599, "learning_rate": 9.79985174237793e-06, "loss": 0.9239, "step": 30660 }, { "epoch": 0.9780286361172231, "grad_norm": 0.3665352165699005, "learning_rate": 9.762425396604675e-06, "loss": 0.9287, "step": 30670 }, { "epoch": 0.9783475238368571, "grad_norm": 0.367678165435791, "learning_rate": 9.72514198476499e-06, "loss": 0.9297, "step": 30680 }, { "epoch": 0.9786664115564909, "grad_norm": 0.3722160756587982, "learning_rate": 9.688000960983798e-06, "loss": 0.9356, "step": 30690 }, { "epoch": 0.9789852992761249, "grad_norm": 0.3580091595649719, "learning_rate": 9.65100178147076e-06, "loss": 0.9336, "step": 30700 }, { "epoch": 0.9793041869957588, "grad_norm": 0.36442312598228455, "learning_rate": 9.614143904512304e-06, "loss": 0.9423, "step": 30710 }, { "epoch": 0.9796230747153927, "grad_norm": 0.3623570501804352, "learning_rate": 9.577426790463718e-06, "loss": 0.9267, "step": 30720 }, { "epoch": 0.9799419624350266, "grad_norm": 0.3609250485897064, "learning_rate": 9.540849901741222e-06, "loss": 0.9132, "step": 30730 }, { "epoch": 0.9802608501546606, "grad_norm": 0.3623647093772888, "learning_rate": 9.504412702814105e-06, "loss": 0.9124, "step": 30740 }, { "epoch": 0.9805797378742944, "grad_norm": 0.354446142911911, "learning_rate": 9.468114660196888e-06, "loss": 0.9293, "step": 30750 }, { "epoch": 0.9808986255939284, "grad_norm": 0.3737424910068512, "learning_rate": 9.431955242441515e-06, "loss": 0.9363, "step": 30760 }, { "epoch": 0.9812175133135623, "grad_norm": 0.3702537715435028, "learning_rate": 9.395933920129571e-06, "loss": 0.9238, "step": 30770 }, { "epoch": 0.9815364010331962, "grad_norm": 0.3564508855342865, "learning_rate": 9.360050165864518e-06, "loss": 0.9313, "step": 30780 }, { "epoch": 0.9818552887528301, "grad_norm": 0.38061270117759705, "learning_rate": 9.324303454263998e-06, "loss": 0.9226, "step": 30790 }, { "epoch": 0.9821741764724641, "grad_norm": 0.3773099482059479, "learning_rate": 9.288693261952112e-06, "loss": 0.9213, "step": 30800 }, { "epoch": 0.9824930641920979, "grad_norm": 0.3710877597332001, "learning_rate": 9.253219067551782e-06, "loss": 0.9158, "step": 30810 }, { "epoch": 0.9828119519117319, "grad_norm": 0.35554859042167664, "learning_rate": 9.217880351677104e-06, "loss": 0.9177, "step": 30820 }, { "epoch": 0.9831308396313658, "grad_norm": 0.36319994926452637, "learning_rate": 9.182676596925743e-06, "loss": 0.9222, "step": 30830 }, { "epoch": 0.9834497273509997, "grad_norm": 0.3607344329357147, "learning_rate": 9.147607287871367e-06, "loss": 0.9144, "step": 30840 }, { "epoch": 0.9837686150706336, "grad_norm": 0.3698658049106598, "learning_rate": 9.112671911056089e-06, "loss": 0.9272, "step": 30850 }, { "epoch": 0.9840875027902676, "grad_norm": 0.36637309193611145, "learning_rate": 9.077869954982961e-06, "loss": 0.9225, "step": 30860 }, { "epoch": 0.9844063905099014, "grad_norm": 0.35287508368492126, "learning_rate": 9.043200910108472e-06, "loss": 0.9217, "step": 30870 }, { "epoch": 0.9847252782295354, "grad_norm": 0.3669984042644501, "learning_rate": 9.008664268835097e-06, "loss": 0.9206, "step": 30880 }, { "epoch": 0.9850441659491693, "grad_norm": 0.3705748915672302, "learning_rate": 8.97425952550387e-06, "loss": 0.9284, "step": 30890 }, { "epoch": 0.9853630536688032, "grad_norm": 0.36321109533309937, "learning_rate": 8.939986176386965e-06, "loss": 0.9291, "step": 30900 }, { "epoch": 0.9856819413884371, "grad_norm": 0.36526602506637573, "learning_rate": 8.90584371968033e-06, "loss": 0.9173, "step": 30910 }, { "epoch": 0.9860008291080711, "grad_norm": 0.3631839454174042, "learning_rate": 8.871831655496341e-06, "loss": 0.932, "step": 30920 }, { "epoch": 0.9863197168277049, "grad_norm": 0.3734815716743469, "learning_rate": 8.837949485856484e-06, "loss": 0.9255, "step": 30930 }, { "epoch": 0.9866386045473389, "grad_norm": 0.35923781991004944, "learning_rate": 8.804196714684051e-06, "loss": 0.9157, "step": 30940 }, { "epoch": 0.9869574922669728, "grad_norm": 0.375, "learning_rate": 8.770572847796902e-06, "loss": 0.9278, "step": 30950 }, { "epoch": 0.9872763799866067, "grad_norm": 0.3728904128074646, "learning_rate": 8.737077392900202e-06, "loss": 0.9347, "step": 30960 }, { "epoch": 0.9875952677062406, "grad_norm": 0.37896835803985596, "learning_rate": 8.703709859579232e-06, "loss": 0.9285, "step": 30970 }, { "epoch": 0.9879141554258746, "grad_norm": 0.3802531659603119, "learning_rate": 8.670469759292197e-06, "loss": 0.9317, "step": 30980 }, { "epoch": 0.9882330431455084, "grad_norm": 0.37327489256858826, "learning_rate": 8.637356605363086e-06, "loss": 0.9468, "step": 30990 }, { "epoch": 0.9885519308651424, "grad_norm": 0.364010751247406, "learning_rate": 8.604369912974537e-06, "loss": 0.9214, "step": 31000 }, { "epoch": 0.9888708185847763, "grad_norm": 0.3760271370410919, "learning_rate": 8.571509199160733e-06, "loss": 0.9575, "step": 31010 }, { "epoch": 0.9891897063044102, "grad_norm": 0.3785746097564697, "learning_rate": 8.53877398280035e-06, "loss": 0.9258, "step": 31020 }, { "epoch": 0.9895085940240441, "grad_norm": 0.3546142876148224, "learning_rate": 8.506163784609493e-06, "loss": 0.9279, "step": 31030 }, { "epoch": 0.9898274817436781, "grad_norm": 0.3674685060977936, "learning_rate": 8.47367812713469e-06, "loss": 0.9417, "step": 31040 }, { "epoch": 0.9901463694633119, "grad_norm": 0.3799147307872772, "learning_rate": 8.441316534745896e-06, "loss": 0.9308, "step": 31050 }, { "epoch": 0.9904652571829459, "grad_norm": 0.36110377311706543, "learning_rate": 8.409078533629542e-06, "loss": 0.9188, "step": 31060 }, { "epoch": 0.9907841449025798, "grad_norm": 0.3820250928401947, "learning_rate": 8.376963651781573e-06, "loss": 0.9259, "step": 31070 }, { "epoch": 0.9911030326222137, "grad_norm": 0.3705957531929016, "learning_rate": 8.34497141900056e-06, "loss": 0.9231, "step": 31080 }, { "epoch": 0.9914219203418476, "grad_norm": 0.3767108917236328, "learning_rate": 8.313101366880808e-06, "loss": 0.9255, "step": 31090 }, { "epoch": 0.9917408080614816, "grad_norm": 0.36577731370925903, "learning_rate": 8.28135302880549e-06, "loss": 0.926, "step": 31100 }, { "epoch": 0.9920596957811154, "grad_norm": 0.3568488359451294, "learning_rate": 8.24972593993983e-06, "loss": 0.908, "step": 31110 }, { "epoch": 0.9923785835007494, "grad_norm": 0.36216244101524353, "learning_rate": 8.21821963722429e-06, "loss": 0.9278, "step": 31120 }, { "epoch": 0.9926974712203833, "grad_norm": 0.37096574902534485, "learning_rate": 8.186833659367786e-06, "loss": 0.9247, "step": 31130 }, { "epoch": 0.9930163589400173, "grad_norm": 0.36498740315437317, "learning_rate": 8.155567546840937e-06, "loss": 0.9356, "step": 31140 }, { "epoch": 0.9933352466596511, "grad_norm": 0.3782957196235657, "learning_rate": 8.124420841869342e-06, "loss": 0.9269, "step": 31150 }, { "epoch": 0.9936541343792851, "grad_norm": 0.36367443203926086, "learning_rate": 8.093393088426874e-06, "loss": 0.9232, "step": 31160 }, { "epoch": 0.993973022098919, "grad_norm": 0.35848551988601685, "learning_rate": 8.062483832229001e-06, "loss": 0.9172, "step": 31170 }, { "epoch": 0.9942919098185529, "grad_norm": 0.3594017028808594, "learning_rate": 8.031692620726136e-06, "loss": 0.9167, "step": 31180 }, { "epoch": 0.9946107975381868, "grad_norm": 0.3806443214416504, "learning_rate": 8.001019003097014e-06, "loss": 0.9188, "step": 31190 }, { "epoch": 0.9949296852578208, "grad_norm": 0.3811398148536682, "learning_rate": 7.970462530242083e-06, "loss": 0.9247, "step": 31200 }, { "epoch": 0.9952485729774546, "grad_norm": 0.37172773480415344, "learning_rate": 7.940022754776945e-06, "loss": 0.9483, "step": 31210 }, { "epoch": 0.9955674606970886, "grad_norm": 0.3615531921386719, "learning_rate": 7.90969923102579e-06, "loss": 0.9141, "step": 31220 }, { "epoch": 0.9958863484167225, "grad_norm": 0.3517124354839325, "learning_rate": 7.879491515014875e-06, "loss": 0.9185, "step": 31230 }, { "epoch": 0.9962052361363564, "grad_norm": 0.37004709243774414, "learning_rate": 7.849399164466028e-06, "loss": 0.9141, "step": 31240 }, { "epoch": 0.9965241238559903, "grad_norm": 0.3684113919734955, "learning_rate": 7.819421738790164e-06, "loss": 0.934, "step": 31250 }, { "epoch": 0.9968430115756243, "grad_norm": 0.364093154668808, "learning_rate": 7.78955879908085e-06, "loss": 0.9143, "step": 31260 }, { "epoch": 0.9971618992952581, "grad_norm": 0.3692590892314911, "learning_rate": 7.759809908107856e-06, "loss": 0.9348, "step": 31270 }, { "epoch": 0.9974807870148921, "grad_norm": 0.37234070897102356, "learning_rate": 7.730174630310774e-06, "loss": 0.9188, "step": 31280 }, { "epoch": 0.997799674734526, "grad_norm": 0.3636535406112671, "learning_rate": 7.700652531792634e-06, "loss": 0.9163, "step": 31290 }, { "epoch": 0.9981185624541599, "grad_norm": 0.37809842824935913, "learning_rate": 7.671243180313544e-06, "loss": 0.9327, "step": 31300 }, { "epoch": 0.9984374501737938, "grad_norm": 0.36570221185684204, "learning_rate": 7.641946145284373e-06, "loss": 0.9096, "step": 31310 }, { "epoch": 0.9987563378934278, "grad_norm": 0.36101430654525757, "learning_rate": 7.612760997760436e-06, "loss": 0.927, "step": 31320 }, { "epoch": 0.9990752256130616, "grad_norm": 0.3641717731952667, "learning_rate": 7.583687310435224e-06, "loss": 0.9353, "step": 31330 }, { "epoch": 0.9993941133326956, "grad_norm": 0.36067697405815125, "learning_rate": 7.5547246576341395e-06, "loss": 0.911, "step": 31340 }, { "epoch": 0.9997130010523295, "grad_norm": 0.371115118265152, "learning_rate": 7.525872615308268e-06, "loss": 0.9357, "step": 31350 } ], "logging_steps": 10, "max_steps": 31359, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.46535369518422e+17, "train_batch_size": 512, "trial_name": null, "trial_params": null }