{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5413769559262596, "eval_steps": 500, "global_step": 134000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.040126536763131e-05, "grad_norm": 37380620.0, "learning_rate": 2e-08, "loss": 1265640.2, "step": 10 }, { "epoch": 8.080253073526263e-05, "grad_norm": 9634068.0, "learning_rate": 4e-08, "loss": 1485744.3, "step": 20 }, { "epoch": 0.00012120379610289395, "grad_norm": 14280229.0, "learning_rate": 6e-08, "loss": 1357241.2, "step": 30 }, { "epoch": 0.00016160506147052525, "grad_norm": 47433768.0, "learning_rate": 8e-08, "loss": 1486355.4, "step": 40 }, { "epoch": 0.00020200632683815657, "grad_norm": 36763048.0, "learning_rate": 1.0000000000000001e-07, "loss": 1302607.4, "step": 50 }, { "epoch": 0.0002424075922057879, "grad_norm": 58413324.0, "learning_rate": 1.2e-07, "loss": 1164601.1, "step": 60 }, { "epoch": 0.0002828088575734192, "grad_norm": 20137730.0, "learning_rate": 1.4e-07, "loss": 1635228.2, "step": 70 }, { "epoch": 0.0003232101229410505, "grad_norm": 110655184.0, "learning_rate": 1.6e-07, "loss": 1525543.8, "step": 80 }, { "epoch": 0.0003636113883086818, "grad_norm": 14134144.0, "learning_rate": 1.8e-07, "loss": 1310257.0, "step": 90 }, { "epoch": 0.00040401265367631315, "grad_norm": 19299640.0, "learning_rate": 2.0000000000000002e-07, "loss": 851063.5, "step": 100 }, { "epoch": 0.00044441391904394446, "grad_norm": 17030668.0, "learning_rate": 2.2e-07, "loss": 1405910.8, "step": 110 }, { "epoch": 0.0004848151844115758, "grad_norm": 5774159.0, "learning_rate": 2.4e-07, "loss": 1021530.2, "step": 120 }, { "epoch": 0.000525216449779207, "grad_norm": 39249404.0, "learning_rate": 2.6e-07, "loss": 1118213.8, "step": 130 }, { "epoch": 0.0005656177151468384, "grad_norm": 3952896.25, "learning_rate": 2.8e-07, "loss": 802054.8, "step": 140 }, { "epoch": 0.0006060189805144697, "grad_norm": 17492202.0, "learning_rate": 3.0000000000000004e-07, "loss": 782337.9, "step": 150 }, { "epoch": 0.000646420245882101, "grad_norm": 14565722.0, "learning_rate": 3.2e-07, "loss": 762118.95, "step": 160 }, { "epoch": 0.0006868215112497323, "grad_norm": 12661834.0, "learning_rate": 3.4e-07, "loss": 617497.6, "step": 170 }, { "epoch": 0.0007272227766173637, "grad_norm": 4945727.5, "learning_rate": 3.6e-07, "loss": 360572.95, "step": 180 }, { "epoch": 0.000767624041984995, "grad_norm": 3134309.5, "learning_rate": 3.8e-07, "loss": 247792.15, "step": 190 }, { "epoch": 0.0008080253073526263, "grad_norm": 23188752.0, "learning_rate": 4.0000000000000003e-07, "loss": 302759.75, "step": 200 }, { "epoch": 0.0008484265727202576, "grad_norm": 2640093.75, "learning_rate": 4.2e-07, "loss": 177911.875, "step": 210 }, { "epoch": 0.0008888278380878889, "grad_norm": 3475973.0, "learning_rate": 4.4e-07, "loss": 124657.5875, "step": 220 }, { "epoch": 0.0009292291034555202, "grad_norm": 7612417.5, "learning_rate": 4.6e-07, "loss": 133348.3, "step": 230 }, { "epoch": 0.0009696303688231516, "grad_norm": 3544448.5, "learning_rate": 4.8e-07, "loss": 108178.025, "step": 240 }, { "epoch": 0.0010100316341907828, "grad_norm": 1792657.375, "learning_rate": 5.000000000000001e-07, "loss": 147530.7625, "step": 250 }, { "epoch": 0.001050432899558414, "grad_norm": 807344.25, "learning_rate": 5.2e-07, "loss": 46811.725, "step": 260 }, { "epoch": 0.0010908341649260454, "grad_norm": 9978717.0, "learning_rate": 5.4e-07, "loss": 46902.9469, "step": 270 }, { "epoch": 0.0011312354302936767, "grad_norm": 1081468.125, "learning_rate": 5.6e-07, "loss": 61798.675, "step": 280 }, { "epoch": 0.001171636695661308, "grad_norm": 836054.125, "learning_rate": 5.8e-07, "loss": 18199.025, "step": 290 }, { "epoch": 0.0012120379610289394, "grad_norm": 713206.125, "learning_rate": 6.000000000000001e-07, "loss": 16179.9375, "step": 300 }, { "epoch": 0.0012524392263965707, "grad_norm": 736075.8125, "learning_rate": 6.2e-07, "loss": 18567.8047, "step": 310 }, { "epoch": 0.001292840491764202, "grad_norm": 524556.5, "learning_rate": 6.4e-07, "loss": 18508.9297, "step": 320 }, { "epoch": 0.0013332417571318333, "grad_norm": 74135.0703125, "learning_rate": 6.6e-07, "loss": 7819.6453, "step": 330 }, { "epoch": 0.0013736430224994647, "grad_norm": 94151.4921875, "learning_rate": 6.8e-07, "loss": 6291.5781, "step": 340 }, { "epoch": 0.001414044287867096, "grad_norm": 17262.64453125, "learning_rate": 7.000000000000001e-07, "loss": 2254.5939, "step": 350 }, { "epoch": 0.0014544455532347273, "grad_norm": 6977.82470703125, "learning_rate": 7.2e-07, "loss": 806.7898, "step": 360 }, { "epoch": 0.0014948468186023586, "grad_norm": 720.22705078125, "learning_rate": 7.400000000000001e-07, "loss": 1413.3216, "step": 370 }, { "epoch": 0.00153524808396999, "grad_norm": 12234.6259765625, "learning_rate": 7.6e-07, "loss": 569.8974, "step": 380 }, { "epoch": 0.0015756493493376213, "grad_norm": 12985.419921875, "learning_rate": 7.8e-07, "loss": 645.2221, "step": 390 }, { "epoch": 0.0016160506147052526, "grad_norm": 17890.517578125, "learning_rate": 8.000000000000001e-07, "loss": 556.4382, "step": 400 }, { "epoch": 0.001656451880072884, "grad_norm": 1073.23046875, "learning_rate": 8.200000000000001e-07, "loss": 579.9454, "step": 410 }, { "epoch": 0.0016968531454405152, "grad_norm": 946.015380859375, "learning_rate": 8.4e-07, "loss": 339.6989, "step": 420 }, { "epoch": 0.0017372544108081465, "grad_norm": 919.689697265625, "learning_rate": 8.6e-07, "loss": 544.2018, "step": 430 }, { "epoch": 0.0017776556761757779, "grad_norm": 1076.61962890625, "learning_rate": 8.8e-07, "loss": 477.4546, "step": 440 }, { "epoch": 0.0018180569415434092, "grad_norm": 6896.8623046875, "learning_rate": 9e-07, "loss": 551.458, "step": 450 }, { "epoch": 0.0018584582069110405, "grad_norm": 1263.240478515625, "learning_rate": 9.2e-07, "loss": 478.671, "step": 460 }, { "epoch": 0.0018988594722786718, "grad_norm": 6494.14501953125, "learning_rate": 9.400000000000001e-07, "loss": 551.3243, "step": 470 }, { "epoch": 0.0019392607376463031, "grad_norm": 1367.335205078125, "learning_rate": 9.6e-07, "loss": 566.3516, "step": 480 }, { "epoch": 0.0019796620030139342, "grad_norm": 2059.615478515625, "learning_rate": 9.8e-07, "loss": 440.9915, "step": 490 }, { "epoch": 0.0020200632683815656, "grad_norm": 1089.5028076171875, "learning_rate": 1.0000000000000002e-06, "loss": 450.5991, "step": 500 }, { "epoch": 0.002060464533749197, "grad_norm": 1123.5267333984375, "learning_rate": 1.0200000000000002e-06, "loss": 450.4043, "step": 510 }, { "epoch": 0.002100865799116828, "grad_norm": 1164.44677734375, "learning_rate": 1.04e-06, "loss": 401.3136, "step": 520 }, { "epoch": 0.0021412670644844595, "grad_norm": 1139.4189453125, "learning_rate": 1.06e-06, "loss": 510.3279, "step": 530 }, { "epoch": 0.002181668329852091, "grad_norm": 1584.498779296875, "learning_rate": 1.08e-06, "loss": 276.7529, "step": 540 }, { "epoch": 0.002222069595219722, "grad_norm": 13004.5224609375, "learning_rate": 1.1e-06, "loss": 517.8619, "step": 550 }, { "epoch": 0.0022624708605873535, "grad_norm": 6897.6220703125, "learning_rate": 1.12e-06, "loss": 371.9735, "step": 560 }, { "epoch": 0.002302872125954985, "grad_norm": 5952.30859375, "learning_rate": 1.14e-06, "loss": 321.4458, "step": 570 }, { "epoch": 0.002343273391322616, "grad_norm": 1368.4573974609375, "learning_rate": 1.16e-06, "loss": 444.6263, "step": 580 }, { "epoch": 0.0023836746566902474, "grad_norm": 6530.33154296875, "learning_rate": 1.18e-06, "loss": 576.2317, "step": 590 }, { "epoch": 0.0024240759220578788, "grad_norm": 918.2885131835938, "learning_rate": 1.2000000000000002e-06, "loss": 319.5849, "step": 600 }, { "epoch": 0.00246447718742551, "grad_norm": 5709.6923828125, "learning_rate": 1.2200000000000002e-06, "loss": 544.9418, "step": 610 }, { "epoch": 0.0025048784527931414, "grad_norm": 767.517822265625, "learning_rate": 1.24e-06, "loss": 475.3013, "step": 620 }, { "epoch": 0.0025452797181607727, "grad_norm": 1049.8690185546875, "learning_rate": 1.26e-06, "loss": 505.3646, "step": 630 }, { "epoch": 0.002585680983528404, "grad_norm": 6113.7568359375, "learning_rate": 1.28e-06, "loss": 371.3924, "step": 640 }, { "epoch": 0.0026260822488960354, "grad_norm": 961.7456665039062, "learning_rate": 1.3e-06, "loss": 434.1364, "step": 650 }, { "epoch": 0.0026664835142636667, "grad_norm": 827.18310546875, "learning_rate": 1.32e-06, "loss": 325.7226, "step": 660 }, { "epoch": 0.002706884779631298, "grad_norm": 800.2735595703125, "learning_rate": 1.34e-06, "loss": 362.5384, "step": 670 }, { "epoch": 0.0027472860449989293, "grad_norm": 2117.720703125, "learning_rate": 1.36e-06, "loss": 459.5714, "step": 680 }, { "epoch": 0.0027876873103665606, "grad_norm": 11787.2021484375, "learning_rate": 1.3800000000000001e-06, "loss": 452.4481, "step": 690 }, { "epoch": 0.002828088575734192, "grad_norm": 1027.6024169921875, "learning_rate": 1.4000000000000001e-06, "loss": 370.987, "step": 700 }, { "epoch": 0.0028684898411018233, "grad_norm": 2432.72802734375, "learning_rate": 1.4200000000000002e-06, "loss": 396.6002, "step": 710 }, { "epoch": 0.0029088911064694546, "grad_norm": 4263.763671875, "learning_rate": 1.44e-06, "loss": 514.6806, "step": 720 }, { "epoch": 0.002949292371837086, "grad_norm": 1061.5069580078125, "learning_rate": 1.46e-06, "loss": 367.6588, "step": 730 }, { "epoch": 0.0029896936372047172, "grad_norm": 7566.0791015625, "learning_rate": 1.4800000000000002e-06, "loss": 387.3527, "step": 740 }, { "epoch": 0.0030300949025723486, "grad_norm": 1594.1578369140625, "learning_rate": 1.5e-06, "loss": 286.798, "step": 750 }, { "epoch": 0.00307049616793998, "grad_norm": 1798.3486328125, "learning_rate": 1.52e-06, "loss": 265.2009, "step": 760 }, { "epoch": 0.003110897433307611, "grad_norm": 1887.8646240234375, "learning_rate": 1.54e-06, "loss": 511.3708, "step": 770 }, { "epoch": 0.0031512986986752425, "grad_norm": 1046.9554443359375, "learning_rate": 1.56e-06, "loss": 330.1199, "step": 780 }, { "epoch": 0.003191699964042874, "grad_norm": 14393.11328125, "learning_rate": 1.5800000000000003e-06, "loss": 459.7904, "step": 790 }, { "epoch": 0.003232101229410505, "grad_norm": 1148.2391357421875, "learning_rate": 1.6000000000000001e-06, "loss": 297.0352, "step": 800 }, { "epoch": 0.0032725024947781365, "grad_norm": 1655.5443115234375, "learning_rate": 1.62e-06, "loss": 380.9321, "step": 810 }, { "epoch": 0.003312903760145768, "grad_norm": 1723.20947265625, "learning_rate": 1.6400000000000002e-06, "loss": 356.5419, "step": 820 }, { "epoch": 0.003353305025513399, "grad_norm": 962.674560546875, "learning_rate": 1.6600000000000002e-06, "loss": 244.5568, "step": 830 }, { "epoch": 0.0033937062908810304, "grad_norm": 1860.3314208984375, "learning_rate": 1.68e-06, "loss": 314.3494, "step": 840 }, { "epoch": 0.0034341075562486618, "grad_norm": 677.5274047851562, "learning_rate": 1.7000000000000002e-06, "loss": 389.2205, "step": 850 }, { "epoch": 0.003474508821616293, "grad_norm": 847.7564697265625, "learning_rate": 1.72e-06, "loss": 421.029, "step": 860 }, { "epoch": 0.0035149100869839244, "grad_norm": 1153.310791015625, "learning_rate": 1.7399999999999999e-06, "loss": 338.7876, "step": 870 }, { "epoch": 0.0035553113523515557, "grad_norm": 702.7095947265625, "learning_rate": 1.76e-06, "loss": 375.8682, "step": 880 }, { "epoch": 0.003595712617719187, "grad_norm": 898.8328857421875, "learning_rate": 1.7800000000000001e-06, "loss": 437.3662, "step": 890 }, { "epoch": 0.0036361138830868184, "grad_norm": 1392.5733642578125, "learning_rate": 1.8e-06, "loss": 327.5625, "step": 900 }, { "epoch": 0.0036765151484544497, "grad_norm": 1176.8975830078125, "learning_rate": 1.8200000000000002e-06, "loss": 331.6111, "step": 910 }, { "epoch": 0.003716916413822081, "grad_norm": 844.1708374023438, "learning_rate": 1.84e-06, "loss": 462.9411, "step": 920 }, { "epoch": 0.0037573176791897123, "grad_norm": 1110.1351318359375, "learning_rate": 1.86e-06, "loss": 291.5723, "step": 930 }, { "epoch": 0.0037977189445573436, "grad_norm": 1204.96826171875, "learning_rate": 1.8800000000000002e-06, "loss": 453.21, "step": 940 }, { "epoch": 0.003838120209924975, "grad_norm": 788.923095703125, "learning_rate": 1.9e-06, "loss": 338.4679, "step": 950 }, { "epoch": 0.0038785214752926063, "grad_norm": 828.4852294921875, "learning_rate": 1.92e-06, "loss": 304.8583, "step": 960 }, { "epoch": 0.003918922740660237, "grad_norm": 3126.61181640625, "learning_rate": 1.94e-06, "loss": 368.0714, "step": 970 }, { "epoch": 0.0039593240060278685, "grad_norm": 3534.13232421875, "learning_rate": 1.96e-06, "loss": 321.7589, "step": 980 }, { "epoch": 0.0039997252713955, "grad_norm": 676.08251953125, "learning_rate": 1.98e-06, "loss": 336.8801, "step": 990 }, { "epoch": 0.004040126536763131, "grad_norm": 1422.3154296875, "learning_rate": 2.0000000000000003e-06, "loss": 310.1541, "step": 1000 }, { "epoch": 0.0040805278021307624, "grad_norm": 754.2792358398438, "learning_rate": 2.02e-06, "loss": 294.2774, "step": 1010 }, { "epoch": 0.004120929067498394, "grad_norm": 1239.5504150390625, "learning_rate": 2.0400000000000004e-06, "loss": 379.9203, "step": 1020 }, { "epoch": 0.004161330332866025, "grad_norm": 1107.9769287109375, "learning_rate": 2.06e-06, "loss": 394.3968, "step": 1030 }, { "epoch": 0.004201731598233656, "grad_norm": 747.5755615234375, "learning_rate": 2.08e-06, "loss": 357.4806, "step": 1040 }, { "epoch": 0.004242132863601288, "grad_norm": 897.203125, "learning_rate": 2.1000000000000002e-06, "loss": 376.0113, "step": 1050 }, { "epoch": 0.004282534128968919, "grad_norm": 3561.832763671875, "learning_rate": 2.12e-06, "loss": 385.5077, "step": 1060 }, { "epoch": 0.00432293539433655, "grad_norm": 2549.35205078125, "learning_rate": 2.14e-06, "loss": 448.231, "step": 1070 }, { "epoch": 0.004363336659704182, "grad_norm": 746.3321533203125, "learning_rate": 2.16e-06, "loss": 308.5054, "step": 1080 }, { "epoch": 0.004403737925071813, "grad_norm": 4975.05615234375, "learning_rate": 2.1800000000000003e-06, "loss": 604.6913, "step": 1090 }, { "epoch": 0.004444139190439444, "grad_norm": 865.9938354492188, "learning_rate": 2.2e-06, "loss": 239.4941, "step": 1100 }, { "epoch": 0.004484540455807076, "grad_norm": 1072.074462890625, "learning_rate": 2.2200000000000003e-06, "loss": 401.7543, "step": 1110 }, { "epoch": 0.004524941721174707, "grad_norm": 1182.0032958984375, "learning_rate": 2.24e-06, "loss": 330.9987, "step": 1120 }, { "epoch": 0.004565342986542338, "grad_norm": 814.4903564453125, "learning_rate": 2.26e-06, "loss": 379.418, "step": 1130 }, { "epoch": 0.00460574425190997, "grad_norm": 2526.64501953125, "learning_rate": 2.28e-06, "loss": 389.4854, "step": 1140 }, { "epoch": 0.004646145517277601, "grad_norm": 1358.009033203125, "learning_rate": 2.3e-06, "loss": 432.4065, "step": 1150 }, { "epoch": 0.004686546782645232, "grad_norm": 1503.1463623046875, "learning_rate": 2.32e-06, "loss": 417.3679, "step": 1160 }, { "epoch": 0.0047269480480128636, "grad_norm": 2745.59619140625, "learning_rate": 2.34e-06, "loss": 339.9085, "step": 1170 }, { "epoch": 0.004767349313380495, "grad_norm": 636.4331665039062, "learning_rate": 2.36e-06, "loss": 324.7576, "step": 1180 }, { "epoch": 0.004807750578748126, "grad_norm": 1743.3262939453125, "learning_rate": 2.38e-06, "loss": 337.6445, "step": 1190 }, { "epoch": 0.0048481518441157575, "grad_norm": 912.2630004882812, "learning_rate": 2.4000000000000003e-06, "loss": 342.137, "step": 1200 }, { "epoch": 0.004888553109483389, "grad_norm": 1142.1710205078125, "learning_rate": 2.42e-06, "loss": 280.1062, "step": 1210 }, { "epoch": 0.00492895437485102, "grad_norm": 981.51806640625, "learning_rate": 2.4400000000000004e-06, "loss": 291.3894, "step": 1220 }, { "epoch": 0.0049693556402186515, "grad_norm": 1795.322265625, "learning_rate": 2.46e-06, "loss": 349.2701, "step": 1230 }, { "epoch": 0.005009756905586283, "grad_norm": 1389.8359375, "learning_rate": 2.48e-06, "loss": 363.471, "step": 1240 }, { "epoch": 0.005050158170953914, "grad_norm": 1739.7503662109375, "learning_rate": 2.5e-06, "loss": 414.4333, "step": 1250 }, { "epoch": 0.0050905594363215454, "grad_norm": 1108.7142333984375, "learning_rate": 2.52e-06, "loss": 376.2317, "step": 1260 }, { "epoch": 0.005130960701689177, "grad_norm": 1234.8914794921875, "learning_rate": 2.54e-06, "loss": 432.4711, "step": 1270 }, { "epoch": 0.005171361967056808, "grad_norm": 1600.1077880859375, "learning_rate": 2.56e-06, "loss": 273.5047, "step": 1280 }, { "epoch": 0.005211763232424439, "grad_norm": 5998.88134765625, "learning_rate": 2.5800000000000003e-06, "loss": 417.7149, "step": 1290 }, { "epoch": 0.005252164497792071, "grad_norm": 1022.2550659179688, "learning_rate": 2.6e-06, "loss": 337.5613, "step": 1300 }, { "epoch": 0.005292565763159702, "grad_norm": 821.97998046875, "learning_rate": 2.6200000000000003e-06, "loss": 355.1617, "step": 1310 }, { "epoch": 0.005332967028527333, "grad_norm": 0.0, "learning_rate": 2.64e-06, "loss": 441.1887, "step": 1320 }, { "epoch": 0.005373368293894965, "grad_norm": 1180.22607421875, "learning_rate": 2.66e-06, "loss": 349.4191, "step": 1330 }, { "epoch": 0.005413769559262596, "grad_norm": 1288.874755859375, "learning_rate": 2.68e-06, "loss": 358.9353, "step": 1340 }, { "epoch": 0.005454170824630227, "grad_norm": 858.6513671875, "learning_rate": 2.7e-06, "loss": 298.1967, "step": 1350 }, { "epoch": 0.005494572089997859, "grad_norm": 826.8380737304688, "learning_rate": 2.72e-06, "loss": 266.4493, "step": 1360 }, { "epoch": 0.00553497335536549, "grad_norm": 1527.0496826171875, "learning_rate": 2.74e-06, "loss": 385.1624, "step": 1370 }, { "epoch": 0.005575374620733121, "grad_norm": 1727.4271240234375, "learning_rate": 2.7600000000000003e-06, "loss": 409.4935, "step": 1380 }, { "epoch": 0.005615775886100753, "grad_norm": 1242.2200927734375, "learning_rate": 2.78e-06, "loss": 423.3242, "step": 1390 }, { "epoch": 0.005656177151468384, "grad_norm": 784.5136108398438, "learning_rate": 2.8000000000000003e-06, "loss": 378.621, "step": 1400 }, { "epoch": 0.005696578416836015, "grad_norm": 686.701416015625, "learning_rate": 2.82e-06, "loss": 366.9333, "step": 1410 }, { "epoch": 0.0057369796822036466, "grad_norm": 2546.25390625, "learning_rate": 2.8400000000000003e-06, "loss": 372.8342, "step": 1420 }, { "epoch": 0.005777380947571278, "grad_norm": 1681.410400390625, "learning_rate": 2.86e-06, "loss": 383.352, "step": 1430 }, { "epoch": 0.005817782212938909, "grad_norm": 1604.5748291015625, "learning_rate": 2.88e-06, "loss": 390.5337, "step": 1440 }, { "epoch": 0.0058581834783065405, "grad_norm": 642.487060546875, "learning_rate": 2.9e-06, "loss": 304.9479, "step": 1450 }, { "epoch": 0.005898584743674172, "grad_norm": 2208.190185546875, "learning_rate": 2.92e-06, "loss": 419.7739, "step": 1460 }, { "epoch": 0.005938986009041803, "grad_norm": 558.7657470703125, "learning_rate": 2.9400000000000002e-06, "loss": 243.1836, "step": 1470 }, { "epoch": 0.0059793872744094345, "grad_norm": 866.2562866210938, "learning_rate": 2.9600000000000005e-06, "loss": 254.9255, "step": 1480 }, { "epoch": 0.006019788539777066, "grad_norm": 827.721923828125, "learning_rate": 2.9800000000000003e-06, "loss": 301.6956, "step": 1490 }, { "epoch": 0.006060189805144697, "grad_norm": 1020.5379638671875, "learning_rate": 3e-06, "loss": 236.8173, "step": 1500 }, { "epoch": 0.0061005910705123284, "grad_norm": 12217.869140625, "learning_rate": 3.0200000000000003e-06, "loss": 315.658, "step": 1510 }, { "epoch": 0.00614099233587996, "grad_norm": 1178.291259765625, "learning_rate": 3.04e-06, "loss": 300.3993, "step": 1520 }, { "epoch": 0.006181393601247591, "grad_norm": 2459.315185546875, "learning_rate": 3.06e-06, "loss": 315.2213, "step": 1530 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 3.08e-06, "loss": 271.6832, "step": 1540 }, { "epoch": 0.006262196131982854, "grad_norm": 1094.0899658203125, "learning_rate": 3.1e-06, "loss": 273.5612, "step": 1550 }, { "epoch": 0.006302597397350485, "grad_norm": 1611.595703125, "learning_rate": 3.12e-06, "loss": 291.6243, "step": 1560 }, { "epoch": 0.006342998662718116, "grad_norm": 1336.66259765625, "learning_rate": 3.14e-06, "loss": 322.2612, "step": 1570 }, { "epoch": 0.006383399928085748, "grad_norm": 2437.919189453125, "learning_rate": 3.1600000000000007e-06, "loss": 281.6811, "step": 1580 }, { "epoch": 0.006423801193453379, "grad_norm": 945.635498046875, "learning_rate": 3.1800000000000005e-06, "loss": 331.3091, "step": 1590 }, { "epoch": 0.00646420245882101, "grad_norm": 1270.96826171875, "learning_rate": 3.2000000000000003e-06, "loss": 337.9903, "step": 1600 }, { "epoch": 0.006504603724188642, "grad_norm": 1054.9232177734375, "learning_rate": 3.22e-06, "loss": 371.3227, "step": 1610 }, { "epoch": 0.006545004989556273, "grad_norm": 1814.677734375, "learning_rate": 3.24e-06, "loss": 390.2153, "step": 1620 }, { "epoch": 0.006585406254923904, "grad_norm": 648.9537963867188, "learning_rate": 3.2599999999999997e-06, "loss": 368.1877, "step": 1630 }, { "epoch": 0.006625807520291536, "grad_norm": 1473.499267578125, "learning_rate": 3.2800000000000004e-06, "loss": 386.7809, "step": 1640 }, { "epoch": 0.006666208785659167, "grad_norm": 2527.870849609375, "learning_rate": 3.3e-06, "loss": 414.1417, "step": 1650 }, { "epoch": 0.006706610051026798, "grad_norm": 1029.5302734375, "learning_rate": 3.3200000000000004e-06, "loss": 258.2289, "step": 1660 }, { "epoch": 0.0067470113163944296, "grad_norm": 748.2296142578125, "learning_rate": 3.34e-06, "loss": 348.1511, "step": 1670 }, { "epoch": 0.006787412581762061, "grad_norm": 2257.800537109375, "learning_rate": 3.36e-06, "loss": 326.2408, "step": 1680 }, { "epoch": 0.006827813847129692, "grad_norm": 987.0545654296875, "learning_rate": 3.38e-06, "loss": 285.0146, "step": 1690 }, { "epoch": 0.0068682151124973235, "grad_norm": 3093.51171875, "learning_rate": 3.4000000000000005e-06, "loss": 357.8721, "step": 1700 }, { "epoch": 0.006908616377864955, "grad_norm": 769.6875610351562, "learning_rate": 3.4200000000000003e-06, "loss": 312.937, "step": 1710 }, { "epoch": 0.006949017643232586, "grad_norm": 5268.0009765625, "learning_rate": 3.44e-06, "loss": 437.4973, "step": 1720 }, { "epoch": 0.0069894189086002175, "grad_norm": 875.3474731445312, "learning_rate": 3.46e-06, "loss": 303.2312, "step": 1730 }, { "epoch": 0.007029820173967849, "grad_norm": 988.2050170898438, "learning_rate": 3.4799999999999997e-06, "loss": 358.3378, "step": 1740 }, { "epoch": 0.00707022143933548, "grad_norm": 1848.81884765625, "learning_rate": 3.5000000000000004e-06, "loss": 354.324, "step": 1750 }, { "epoch": 0.0071106227047031114, "grad_norm": 985.3126220703125, "learning_rate": 3.52e-06, "loss": 387.196, "step": 1760 }, { "epoch": 0.007151023970070743, "grad_norm": 847.1213989257812, "learning_rate": 3.5400000000000004e-06, "loss": 298.6457, "step": 1770 }, { "epoch": 0.007191425235438374, "grad_norm": 1782.748779296875, "learning_rate": 3.5600000000000002e-06, "loss": 410.3063, "step": 1780 }, { "epoch": 0.007231826500806005, "grad_norm": 731.6522216796875, "learning_rate": 3.58e-06, "loss": 233.0396, "step": 1790 }, { "epoch": 0.007272227766173637, "grad_norm": 498.69366455078125, "learning_rate": 3.6e-06, "loss": 287.9018, "step": 1800 }, { "epoch": 0.007312629031541268, "grad_norm": 906.2838134765625, "learning_rate": 3.6200000000000005e-06, "loss": 345.1769, "step": 1810 }, { "epoch": 0.007353030296908899, "grad_norm": 713.8414916992188, "learning_rate": 3.6400000000000003e-06, "loss": 291.5412, "step": 1820 }, { "epoch": 0.007393431562276531, "grad_norm": 1315.886962890625, "learning_rate": 3.66e-06, "loss": 394.6411, "step": 1830 }, { "epoch": 0.007433832827644162, "grad_norm": 1429.893798828125, "learning_rate": 3.68e-06, "loss": 247.4847, "step": 1840 }, { "epoch": 0.007474234093011793, "grad_norm": 1034.032470703125, "learning_rate": 3.7e-06, "loss": 406.1072, "step": 1850 }, { "epoch": 0.007514635358379425, "grad_norm": 955.8413696289062, "learning_rate": 3.72e-06, "loss": 392.0067, "step": 1860 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 3.7400000000000006e-06, "loss": 235.0364, "step": 1870 }, { "epoch": 0.007595437889114687, "grad_norm": 13884.802734375, "learning_rate": 3.7600000000000004e-06, "loss": 336.9673, "step": 1880 }, { "epoch": 0.007635839154482319, "grad_norm": 3753.5439453125, "learning_rate": 3.7800000000000002e-06, "loss": 362.8436, "step": 1890 }, { "epoch": 0.00767624041984995, "grad_norm": 1046.0020751953125, "learning_rate": 3.8e-06, "loss": 370.7911, "step": 1900 }, { "epoch": 0.007716641685217581, "grad_norm": 2058.227783203125, "learning_rate": 3.82e-06, "loss": 356.5814, "step": 1910 }, { "epoch": 0.0077570429505852126, "grad_norm": 833.1560668945312, "learning_rate": 3.84e-06, "loss": 362.6392, "step": 1920 }, { "epoch": 0.007797444215952844, "grad_norm": 1860.0916748046875, "learning_rate": 3.86e-06, "loss": 327.3118, "step": 1930 }, { "epoch": 0.007837845481320474, "grad_norm": 2047.00146484375, "learning_rate": 3.88e-06, "loss": 431.6941, "step": 1940 }, { "epoch": 0.007878246746688106, "grad_norm": 1433.5374755859375, "learning_rate": 3.9e-06, "loss": 283.749, "step": 1950 }, { "epoch": 0.007918648012055737, "grad_norm": 1340.44287109375, "learning_rate": 3.92e-06, "loss": 323.1868, "step": 1960 }, { "epoch": 0.007959049277423368, "grad_norm": 5642.4560546875, "learning_rate": 3.9399999999999995e-06, "loss": 428.7537, "step": 1970 }, { "epoch": 0.007999450542791, "grad_norm": 519.6063232421875, "learning_rate": 3.96e-06, "loss": 304.1046, "step": 1980 }, { "epoch": 0.008039851808158631, "grad_norm": 1227.509765625, "learning_rate": 3.98e-06, "loss": 295.7083, "step": 1990 }, { "epoch": 0.008080253073526262, "grad_norm": 788.1338500976562, "learning_rate": 4.000000000000001e-06, "loss": 383.1164, "step": 2000 }, { "epoch": 0.008120654338893894, "grad_norm": 1502.9964599609375, "learning_rate": 4.0200000000000005e-06, "loss": 312.3206, "step": 2010 }, { "epoch": 0.008161055604261525, "grad_norm": 6165.92529296875, "learning_rate": 4.04e-06, "loss": 307.8958, "step": 2020 }, { "epoch": 0.008201456869629156, "grad_norm": 915.6934814453125, "learning_rate": 4.06e-06, "loss": 374.9606, "step": 2030 }, { "epoch": 0.008241858134996788, "grad_norm": 730.7386474609375, "learning_rate": 4.080000000000001e-06, "loss": 329.4409, "step": 2040 }, { "epoch": 0.008282259400364419, "grad_norm": 1635.5831298828125, "learning_rate": 4.1000000000000006e-06, "loss": 209.305, "step": 2050 }, { "epoch": 0.00832266066573205, "grad_norm": 4476.35888671875, "learning_rate": 4.12e-06, "loss": 415.3398, "step": 2060 }, { "epoch": 0.008363061931099681, "grad_norm": 1422.126220703125, "learning_rate": 4.14e-06, "loss": 360.3677, "step": 2070 }, { "epoch": 0.008403463196467313, "grad_norm": 620.626220703125, "learning_rate": 4.16e-06, "loss": 310.8916, "step": 2080 }, { "epoch": 0.008443864461834944, "grad_norm": 1371.109130859375, "learning_rate": 4.18e-06, "loss": 339.1394, "step": 2090 }, { "epoch": 0.008484265727202575, "grad_norm": 1084.7042236328125, "learning_rate": 4.2000000000000004e-06, "loss": 282.2104, "step": 2100 }, { "epoch": 0.008524666992570207, "grad_norm": 678.981689453125, "learning_rate": 4.22e-06, "loss": 248.9129, "step": 2110 }, { "epoch": 0.008565068257937838, "grad_norm": 1121.5936279296875, "learning_rate": 4.24e-06, "loss": 333.5619, "step": 2120 }, { "epoch": 0.00860546952330547, "grad_norm": 795.3826293945312, "learning_rate": 4.26e-06, "loss": 293.3469, "step": 2130 }, { "epoch": 0.0086458707886731, "grad_norm": 1109.0904541015625, "learning_rate": 4.28e-06, "loss": 365.5112, "step": 2140 }, { "epoch": 0.008686272054040732, "grad_norm": 778.438232421875, "learning_rate": 4.2999999999999995e-06, "loss": 211.6606, "step": 2150 }, { "epoch": 0.008726673319408363, "grad_norm": 1132.38818359375, "learning_rate": 4.32e-06, "loss": 386.3405, "step": 2160 }, { "epoch": 0.008767074584775995, "grad_norm": 986.1419677734375, "learning_rate": 4.34e-06, "loss": 326.4904, "step": 2170 }, { "epoch": 0.008807475850143626, "grad_norm": 880.376220703125, "learning_rate": 4.360000000000001e-06, "loss": 308.4621, "step": 2180 }, { "epoch": 0.008847877115511257, "grad_norm": 2747.5908203125, "learning_rate": 4.38e-06, "loss": 240.2334, "step": 2190 }, { "epoch": 0.008888278380878889, "grad_norm": 1022.6881103515625, "learning_rate": 4.4e-06, "loss": 348.5623, "step": 2200 }, { "epoch": 0.00892867964624652, "grad_norm": 701.6885986328125, "learning_rate": 4.420000000000001e-06, "loss": 239.2583, "step": 2210 }, { "epoch": 0.008969080911614151, "grad_norm": 812.2908325195312, "learning_rate": 4.440000000000001e-06, "loss": 305.8652, "step": 2220 }, { "epoch": 0.009009482176981783, "grad_norm": 936.9786376953125, "learning_rate": 4.4600000000000005e-06, "loss": 406.4304, "step": 2230 }, { "epoch": 0.009049883442349414, "grad_norm": 849.5304565429688, "learning_rate": 4.48e-06, "loss": 365.7475, "step": 2240 }, { "epoch": 0.009090284707717045, "grad_norm": 1223.7852783203125, "learning_rate": 4.5e-06, "loss": 295.9342, "step": 2250 }, { "epoch": 0.009130685973084677, "grad_norm": 2563.434814453125, "learning_rate": 4.52e-06, "loss": 282.5878, "step": 2260 }, { "epoch": 0.009171087238452308, "grad_norm": 3302.397705078125, "learning_rate": 4.540000000000001e-06, "loss": 292.655, "step": 2270 }, { "epoch": 0.00921148850381994, "grad_norm": 726.690185546875, "learning_rate": 4.56e-06, "loss": 278.9063, "step": 2280 }, { "epoch": 0.00925188976918757, "grad_norm": 611.144287109375, "learning_rate": 4.58e-06, "loss": 297.2235, "step": 2290 }, { "epoch": 0.009292291034555202, "grad_norm": 962.685302734375, "learning_rate": 4.6e-06, "loss": 224.3765, "step": 2300 }, { "epoch": 0.009332692299922833, "grad_norm": 792.4270629882812, "learning_rate": 4.62e-06, "loss": 339.5073, "step": 2310 }, { "epoch": 0.009373093565290464, "grad_norm": 993.0485229492188, "learning_rate": 4.64e-06, "loss": 268.8986, "step": 2320 }, { "epoch": 0.009413494830658096, "grad_norm": 10737.74609375, "learning_rate": 4.66e-06, "loss": 290.0178, "step": 2330 }, { "epoch": 0.009453896096025727, "grad_norm": 1019.1024780273438, "learning_rate": 4.68e-06, "loss": 290.3607, "step": 2340 }, { "epoch": 0.009494297361393358, "grad_norm": 582.261962890625, "learning_rate": 4.7e-06, "loss": 263.2255, "step": 2350 }, { "epoch": 0.00953469862676099, "grad_norm": 928.1985473632812, "learning_rate": 4.72e-06, "loss": 496.6325, "step": 2360 }, { "epoch": 0.009575099892128621, "grad_norm": 1354.3148193359375, "learning_rate": 4.74e-06, "loss": 317.678, "step": 2370 }, { "epoch": 0.009615501157496252, "grad_norm": 1359.7713623046875, "learning_rate": 4.76e-06, "loss": 263.0336, "step": 2380 }, { "epoch": 0.009655902422863884, "grad_norm": 759.7086791992188, "learning_rate": 4.780000000000001e-06, "loss": 328.0415, "step": 2390 }, { "epoch": 0.009696303688231515, "grad_norm": 593.6005249023438, "learning_rate": 4.800000000000001e-06, "loss": 368.25, "step": 2400 }, { "epoch": 0.009736704953599146, "grad_norm": 732.736328125, "learning_rate": 4.8200000000000004e-06, "loss": 175.4389, "step": 2410 }, { "epoch": 0.009777106218966778, "grad_norm": 1101.7640380859375, "learning_rate": 4.84e-06, "loss": 293.6888, "step": 2420 }, { "epoch": 0.009817507484334409, "grad_norm": 4335.19873046875, "learning_rate": 4.86e-06, "loss": 348.8911, "step": 2430 }, { "epoch": 0.00985790874970204, "grad_norm": 1072.8892822265625, "learning_rate": 4.880000000000001e-06, "loss": 231.7467, "step": 2440 }, { "epoch": 0.009898310015069672, "grad_norm": 608.339111328125, "learning_rate": 4.9000000000000005e-06, "loss": 241.5925, "step": 2450 }, { "epoch": 0.009938711280437303, "grad_norm": 1088.7662353515625, "learning_rate": 4.92e-06, "loss": 185.2763, "step": 2460 }, { "epoch": 0.009979112545804934, "grad_norm": 901.8641967773438, "learning_rate": 4.94e-06, "loss": 355.3011, "step": 2470 }, { "epoch": 0.010019513811172566, "grad_norm": 1227.393798828125, "learning_rate": 4.96e-06, "loss": 310.6582, "step": 2480 }, { "epoch": 0.010059915076540197, "grad_norm": 820.9786376953125, "learning_rate": 4.98e-06, "loss": 388.506, "step": 2490 }, { "epoch": 0.010100316341907828, "grad_norm": 631.5418701171875, "learning_rate": 5e-06, "loss": 186.973, "step": 2500 }, { "epoch": 0.01014071760727546, "grad_norm": 976.1464233398438, "learning_rate": 5.02e-06, "loss": 260.0689, "step": 2510 }, { "epoch": 0.010181118872643091, "grad_norm": 537.5474853515625, "learning_rate": 5.04e-06, "loss": 245.8573, "step": 2520 }, { "epoch": 0.010221520138010722, "grad_norm": 1130.83154296875, "learning_rate": 5.06e-06, "loss": 213.0081, "step": 2530 }, { "epoch": 0.010261921403378354, "grad_norm": 1045.5367431640625, "learning_rate": 5.08e-06, "loss": 273.8134, "step": 2540 }, { "epoch": 0.010302322668745985, "grad_norm": 934.2299194335938, "learning_rate": 5.1e-06, "loss": 351.3407, "step": 2550 }, { "epoch": 0.010342723934113616, "grad_norm": 603.625, "learning_rate": 5.12e-06, "loss": 189.3821, "step": 2560 }, { "epoch": 0.010383125199481247, "grad_norm": 2282.93115234375, "learning_rate": 5.140000000000001e-06, "loss": 306.1943, "step": 2570 }, { "epoch": 0.010423526464848879, "grad_norm": 1158.8792724609375, "learning_rate": 5.1600000000000006e-06, "loss": 293.8449, "step": 2580 }, { "epoch": 0.01046392773021651, "grad_norm": 1000.369140625, "learning_rate": 5.18e-06, "loss": 252.3016, "step": 2590 }, { "epoch": 0.010504328995584141, "grad_norm": 1298.2783203125, "learning_rate": 5.2e-06, "loss": 304.2064, "step": 2600 }, { "epoch": 0.010544730260951773, "grad_norm": 2631.5205078125, "learning_rate": 5.220000000000001e-06, "loss": 272.8798, "step": 2610 }, { "epoch": 0.010585131526319404, "grad_norm": 1061.5810546875, "learning_rate": 5.240000000000001e-06, "loss": 403.6962, "step": 2620 }, { "epoch": 0.010625532791687035, "grad_norm": 835.53369140625, "learning_rate": 5.2600000000000005e-06, "loss": 367.6394, "step": 2630 }, { "epoch": 0.010665934057054667, "grad_norm": 867.9429931640625, "learning_rate": 5.28e-06, "loss": 321.1476, "step": 2640 }, { "epoch": 0.010706335322422298, "grad_norm": 3146.14013671875, "learning_rate": 5.3e-06, "loss": 266.9892, "step": 2650 }, { "epoch": 0.01074673658778993, "grad_norm": 640.1483764648438, "learning_rate": 5.32e-06, "loss": 206.9268, "step": 2660 }, { "epoch": 0.01078713785315756, "grad_norm": 842.3226928710938, "learning_rate": 5.3400000000000005e-06, "loss": 325.724, "step": 2670 }, { "epoch": 0.010827539118525192, "grad_norm": 717.4343872070312, "learning_rate": 5.36e-06, "loss": 314.9767, "step": 2680 }, { "epoch": 0.010867940383892823, "grad_norm": 5089.775390625, "learning_rate": 5.38e-06, "loss": 329.5498, "step": 2690 }, { "epoch": 0.010908341649260455, "grad_norm": 1157.4693603515625, "learning_rate": 5.4e-06, "loss": 339.251, "step": 2700 }, { "epoch": 0.010948742914628086, "grad_norm": 1268.937744140625, "learning_rate": 5.42e-06, "loss": 361.0937, "step": 2710 }, { "epoch": 0.010989144179995717, "grad_norm": 855.2471313476562, "learning_rate": 5.44e-06, "loss": 285.6735, "step": 2720 }, { "epoch": 0.011029545445363349, "grad_norm": 777.139892578125, "learning_rate": 5.46e-06, "loss": 275.3022, "step": 2730 }, { "epoch": 0.01106994671073098, "grad_norm": 2281.784423828125, "learning_rate": 5.48e-06, "loss": 264.5153, "step": 2740 }, { "epoch": 0.011110347976098611, "grad_norm": 737.9111328125, "learning_rate": 5.500000000000001e-06, "loss": 200.1999, "step": 2750 }, { "epoch": 0.011150749241466243, "grad_norm": 7024.08251953125, "learning_rate": 5.5200000000000005e-06, "loss": 316.7326, "step": 2760 }, { "epoch": 0.011191150506833874, "grad_norm": 1147.371826171875, "learning_rate": 5.54e-06, "loss": 238.9098, "step": 2770 }, { "epoch": 0.011231551772201505, "grad_norm": 873.0858154296875, "learning_rate": 5.56e-06, "loss": 249.4478, "step": 2780 }, { "epoch": 0.011271953037569137, "grad_norm": 2017.5811767578125, "learning_rate": 5.580000000000001e-06, "loss": 280.6727, "step": 2790 }, { "epoch": 0.011312354302936768, "grad_norm": 689.9805908203125, "learning_rate": 5.600000000000001e-06, "loss": 238.7173, "step": 2800 }, { "epoch": 0.0113527555683044, "grad_norm": 1248.52490234375, "learning_rate": 5.62e-06, "loss": 243.3881, "step": 2810 }, { "epoch": 0.01139315683367203, "grad_norm": 756.8421020507812, "learning_rate": 5.64e-06, "loss": 192.3346, "step": 2820 }, { "epoch": 0.011433558099039662, "grad_norm": 1675.668701171875, "learning_rate": 5.66e-06, "loss": 323.5421, "step": 2830 }, { "epoch": 0.011473959364407293, "grad_norm": 730.6026000976562, "learning_rate": 5.680000000000001e-06, "loss": 235.7116, "step": 2840 }, { "epoch": 0.011514360629774924, "grad_norm": 962.4594116210938, "learning_rate": 5.7000000000000005e-06, "loss": 284.3505, "step": 2850 }, { "epoch": 0.011554761895142556, "grad_norm": 737.1028442382812, "learning_rate": 5.72e-06, "loss": 279.0431, "step": 2860 }, { "epoch": 0.011595163160510187, "grad_norm": 1873.3394775390625, "learning_rate": 5.74e-06, "loss": 293.4966, "step": 2870 }, { "epoch": 0.011635564425877818, "grad_norm": 699.8680419921875, "learning_rate": 5.76e-06, "loss": 201.6691, "step": 2880 }, { "epoch": 0.01167596569124545, "grad_norm": 1195.6563720703125, "learning_rate": 5.78e-06, "loss": 339.2745, "step": 2890 }, { "epoch": 0.011716366956613081, "grad_norm": 1244.9783935546875, "learning_rate": 5.8e-06, "loss": 295.2553, "step": 2900 }, { "epoch": 0.011756768221980712, "grad_norm": 1545.2462158203125, "learning_rate": 5.82e-06, "loss": 380.4514, "step": 2910 }, { "epoch": 0.011797169487348344, "grad_norm": 580.0228271484375, "learning_rate": 5.84e-06, "loss": 219.7412, "step": 2920 }, { "epoch": 0.011837570752715975, "grad_norm": 741.6790771484375, "learning_rate": 5.86e-06, "loss": 223.957, "step": 2930 }, { "epoch": 0.011877972018083606, "grad_norm": 633.643798828125, "learning_rate": 5.8800000000000005e-06, "loss": 292.0679, "step": 2940 }, { "epoch": 0.011918373283451238, "grad_norm": 11992.681640625, "learning_rate": 5.9e-06, "loss": 365.1848, "step": 2950 }, { "epoch": 0.011958774548818869, "grad_norm": 2407.924072265625, "learning_rate": 5.920000000000001e-06, "loss": 284.2829, "step": 2960 }, { "epoch": 0.0119991758141865, "grad_norm": 9947.2861328125, "learning_rate": 5.940000000000001e-06, "loss": 336.0969, "step": 2970 }, { "epoch": 0.012039577079554132, "grad_norm": 1071.607177734375, "learning_rate": 5.9600000000000005e-06, "loss": 288.0666, "step": 2980 }, { "epoch": 0.012079978344921763, "grad_norm": 1607.6715087890625, "learning_rate": 5.98e-06, "loss": 334.0282, "step": 2990 }, { "epoch": 0.012120379610289394, "grad_norm": 616.33642578125, "learning_rate": 6e-06, "loss": 176.2524, "step": 3000 }, { "epoch": 0.012160780875657026, "grad_norm": 1787.1187744140625, "learning_rate": 6.02e-06, "loss": 262.5472, "step": 3010 }, { "epoch": 0.012201182141024657, "grad_norm": 680.6809692382812, "learning_rate": 6.040000000000001e-06, "loss": 195.7003, "step": 3020 }, { "epoch": 0.012241583406392288, "grad_norm": 1115.2088623046875, "learning_rate": 6.0600000000000004e-06, "loss": 291.6268, "step": 3030 }, { "epoch": 0.01228198467175992, "grad_norm": 1072.4854736328125, "learning_rate": 6.08e-06, "loss": 399.983, "step": 3040 }, { "epoch": 0.01232238593712755, "grad_norm": 1406.6270751953125, "learning_rate": 6.1e-06, "loss": 332.8581, "step": 3050 }, { "epoch": 0.012362787202495182, "grad_norm": 915.8823852539062, "learning_rate": 6.12e-06, "loss": 242.4036, "step": 3060 }, { "epoch": 0.012403188467862813, "grad_norm": 1265.71826171875, "learning_rate": 6.1400000000000005e-06, "loss": 240.312, "step": 3070 }, { "epoch": 0.012443589733230445, "grad_norm": 1575.8834228515625, "learning_rate": 6.16e-06, "loss": 207.945, "step": 3080 }, { "epoch": 0.012483990998598076, "grad_norm": 1585.046142578125, "learning_rate": 6.18e-06, "loss": 328.1433, "step": 3090 }, { "epoch": 0.012524392263965707, "grad_norm": 3717.249755859375, "learning_rate": 6.2e-06, "loss": 368.012, "step": 3100 }, { "epoch": 0.012564793529333339, "grad_norm": 784.789306640625, "learning_rate": 6.22e-06, "loss": 339.9229, "step": 3110 }, { "epoch": 0.01260519479470097, "grad_norm": 1485.326171875, "learning_rate": 6.24e-06, "loss": 345.8715, "step": 3120 }, { "epoch": 0.012645596060068601, "grad_norm": 883.3239135742188, "learning_rate": 6.26e-06, "loss": 215.2944, "step": 3130 }, { "epoch": 0.012685997325436233, "grad_norm": 665.870849609375, "learning_rate": 6.28e-06, "loss": 247.7601, "step": 3140 }, { "epoch": 0.012726398590803864, "grad_norm": 4960.85546875, "learning_rate": 6.300000000000001e-06, "loss": 272.3599, "step": 3150 }, { "epoch": 0.012766799856171495, "grad_norm": 1986.215576171875, "learning_rate": 6.320000000000001e-06, "loss": 261.7445, "step": 3160 }, { "epoch": 0.012807201121539127, "grad_norm": 689.2445678710938, "learning_rate": 6.34e-06, "loss": 186.6741, "step": 3170 }, { "epoch": 0.012847602386906758, "grad_norm": 1272.3314208984375, "learning_rate": 6.360000000000001e-06, "loss": 362.3708, "step": 3180 }, { "epoch": 0.01288800365227439, "grad_norm": 1243.4554443359375, "learning_rate": 6.38e-06, "loss": 223.126, "step": 3190 }, { "epoch": 0.01292840491764202, "grad_norm": 0.0, "learning_rate": 6.4000000000000006e-06, "loss": 220.924, "step": 3200 }, { "epoch": 0.012968806183009652, "grad_norm": 1140.0751953125, "learning_rate": 6.4199999999999995e-06, "loss": 304.4165, "step": 3210 }, { "epoch": 0.013009207448377283, "grad_norm": 865.4572143554688, "learning_rate": 6.44e-06, "loss": 225.4173, "step": 3220 }, { "epoch": 0.013049608713744915, "grad_norm": 1003.0341796875, "learning_rate": 6.460000000000001e-06, "loss": 318.8464, "step": 3230 }, { "epoch": 0.013090009979112546, "grad_norm": 3230.90966796875, "learning_rate": 6.48e-06, "loss": 266.8362, "step": 3240 }, { "epoch": 0.013130411244480177, "grad_norm": 5314.744140625, "learning_rate": 6.5000000000000004e-06, "loss": 356.5823, "step": 3250 }, { "epoch": 0.013170812509847809, "grad_norm": 1545.9931640625, "learning_rate": 6.519999999999999e-06, "loss": 267.9298, "step": 3260 }, { "epoch": 0.01321121377521544, "grad_norm": 1124.9794921875, "learning_rate": 6.54e-06, "loss": 350.8778, "step": 3270 }, { "epoch": 0.013251615040583071, "grad_norm": 1584.3470458984375, "learning_rate": 6.560000000000001e-06, "loss": 268.6283, "step": 3280 }, { "epoch": 0.013292016305950703, "grad_norm": 1276.875244140625, "learning_rate": 6.58e-06, "loss": 401.9857, "step": 3290 }, { "epoch": 0.013332417571318334, "grad_norm": 1092.377197265625, "learning_rate": 6.6e-06, "loss": 331.368, "step": 3300 }, { "epoch": 0.013372818836685965, "grad_norm": 1179.2205810546875, "learning_rate": 6.62e-06, "loss": 177.4306, "step": 3310 }, { "epoch": 0.013413220102053596, "grad_norm": 1153.752685546875, "learning_rate": 6.640000000000001e-06, "loss": 229.9581, "step": 3320 }, { "epoch": 0.013453621367421228, "grad_norm": 998.432373046875, "learning_rate": 6.660000000000001e-06, "loss": 266.7427, "step": 3330 }, { "epoch": 0.013494022632788859, "grad_norm": 1151.6092529296875, "learning_rate": 6.68e-06, "loss": 331.803, "step": 3340 }, { "epoch": 0.01353442389815649, "grad_norm": 2472.0546875, "learning_rate": 6.700000000000001e-06, "loss": 236.3656, "step": 3350 }, { "epoch": 0.013574825163524122, "grad_norm": 1243.1534423828125, "learning_rate": 6.72e-06, "loss": 180.7836, "step": 3360 }, { "epoch": 0.013615226428891753, "grad_norm": 1034.3489990234375, "learning_rate": 6.740000000000001e-06, "loss": 343.4018, "step": 3370 }, { "epoch": 0.013655627694259384, "grad_norm": 905.5031127929688, "learning_rate": 6.76e-06, "loss": 140.6687, "step": 3380 }, { "epoch": 0.013696028959627016, "grad_norm": 1340.9151611328125, "learning_rate": 6.78e-06, "loss": 289.7602, "step": 3390 }, { "epoch": 0.013736430224994647, "grad_norm": 960.6358642578125, "learning_rate": 6.800000000000001e-06, "loss": 226.2994, "step": 3400 }, { "epoch": 0.013776831490362278, "grad_norm": 1423.4761962890625, "learning_rate": 6.82e-06, "loss": 430.8338, "step": 3410 }, { "epoch": 0.01381723275572991, "grad_norm": 1304.45263671875, "learning_rate": 6.840000000000001e-06, "loss": 328.4004, "step": 3420 }, { "epoch": 0.013857634021097541, "grad_norm": 9199.7373046875, "learning_rate": 6.8599999999999995e-06, "loss": 270.6068, "step": 3430 }, { "epoch": 0.013898035286465172, "grad_norm": 776.5551147460938, "learning_rate": 6.88e-06, "loss": 176.4451, "step": 3440 }, { "epoch": 0.013938436551832804, "grad_norm": 774.5919799804688, "learning_rate": 6.900000000000001e-06, "loss": 258.9075, "step": 3450 }, { "epoch": 0.013978837817200435, "grad_norm": 1491.8228759765625, "learning_rate": 6.92e-06, "loss": 295.3933, "step": 3460 }, { "epoch": 0.014019239082568066, "grad_norm": 1491.8526611328125, "learning_rate": 6.9400000000000005e-06, "loss": 239.9352, "step": 3470 }, { "epoch": 0.014059640347935698, "grad_norm": 1934.498779296875, "learning_rate": 6.9599999999999994e-06, "loss": 275.0346, "step": 3480 }, { "epoch": 0.014100041613303329, "grad_norm": 1181.4918212890625, "learning_rate": 6.98e-06, "loss": 238.241, "step": 3490 }, { "epoch": 0.01414044287867096, "grad_norm": 1785.65478515625, "learning_rate": 7.000000000000001e-06, "loss": 163.911, "step": 3500 }, { "epoch": 0.014180844144038592, "grad_norm": 601.7042846679688, "learning_rate": 7.0200000000000006e-06, "loss": 200.9242, "step": 3510 }, { "epoch": 0.014221245409406223, "grad_norm": 1390.438720703125, "learning_rate": 7.04e-06, "loss": 225.1192, "step": 3520 }, { "epoch": 0.014261646674773854, "grad_norm": 1239.8173828125, "learning_rate": 7.06e-06, "loss": 207.541, "step": 3530 }, { "epoch": 0.014302047940141486, "grad_norm": 5047.65478515625, "learning_rate": 7.080000000000001e-06, "loss": 200.282, "step": 3540 }, { "epoch": 0.014342449205509117, "grad_norm": 684.9512939453125, "learning_rate": 7.1e-06, "loss": 200.1313, "step": 3550 }, { "epoch": 0.014382850470876748, "grad_norm": 538.3458862304688, "learning_rate": 7.1200000000000004e-06, "loss": 271.5458, "step": 3560 }, { "epoch": 0.01442325173624438, "grad_norm": 1814.7567138671875, "learning_rate": 7.140000000000001e-06, "loss": 227.9855, "step": 3570 }, { "epoch": 0.01446365300161201, "grad_norm": 617.8508911132812, "learning_rate": 7.16e-06, "loss": 152.5538, "step": 3580 }, { "epoch": 0.014504054266979642, "grad_norm": 1195.5587158203125, "learning_rate": 7.180000000000001e-06, "loss": 220.7645, "step": 3590 }, { "epoch": 0.014544455532347273, "grad_norm": 963.2885131835938, "learning_rate": 7.2e-06, "loss": 237.8198, "step": 3600 }, { "epoch": 0.014584856797714905, "grad_norm": 1143.399658203125, "learning_rate": 7.22e-06, "loss": 246.5691, "step": 3610 }, { "epoch": 0.014625258063082536, "grad_norm": 936.9075927734375, "learning_rate": 7.240000000000001e-06, "loss": 225.2431, "step": 3620 }, { "epoch": 0.014665659328450167, "grad_norm": 505.3612365722656, "learning_rate": 7.26e-06, "loss": 209.9184, "step": 3630 }, { "epoch": 0.014706060593817799, "grad_norm": 4472.5458984375, "learning_rate": 7.280000000000001e-06, "loss": 251.8955, "step": 3640 }, { "epoch": 0.01474646185918543, "grad_norm": 1152.7425537109375, "learning_rate": 7.2999999999999996e-06, "loss": 267.1611, "step": 3650 }, { "epoch": 0.014786863124553061, "grad_norm": 1535.3359375, "learning_rate": 7.32e-06, "loss": 258.2474, "step": 3660 }, { "epoch": 0.014827264389920693, "grad_norm": 788.0407104492188, "learning_rate": 7.340000000000001e-06, "loss": 308.7459, "step": 3670 }, { "epoch": 0.014867665655288324, "grad_norm": 3632.613037109375, "learning_rate": 7.36e-06, "loss": 270.6488, "step": 3680 }, { "epoch": 0.014908066920655955, "grad_norm": 1338.3414306640625, "learning_rate": 7.3800000000000005e-06, "loss": 402.1751, "step": 3690 }, { "epoch": 0.014948468186023587, "grad_norm": 2017.5732421875, "learning_rate": 7.4e-06, "loss": 335.9066, "step": 3700 }, { "epoch": 0.014988869451391218, "grad_norm": 866.8572998046875, "learning_rate": 7.420000000000001e-06, "loss": 228.7708, "step": 3710 }, { "epoch": 0.01502927071675885, "grad_norm": 2627.818359375, "learning_rate": 7.44e-06, "loss": 237.4983, "step": 3720 }, { "epoch": 0.01506967198212648, "grad_norm": 1014.5845947265625, "learning_rate": 7.4600000000000006e-06, "loss": 289.3517, "step": 3730 }, { "epoch": 0.015110073247494112, "grad_norm": 1206.6943359375, "learning_rate": 7.480000000000001e-06, "loss": 253.7986, "step": 3740 }, { "epoch": 0.015150474512861743, "grad_norm": 1855.4755859375, "learning_rate": 7.5e-06, "loss": 384.5513, "step": 3750 }, { "epoch": 0.015190875778229375, "grad_norm": 745.6865844726562, "learning_rate": 7.520000000000001e-06, "loss": 246.6602, "step": 3760 }, { "epoch": 0.015231277043597006, "grad_norm": 1637.106201171875, "learning_rate": 7.54e-06, "loss": 291.9528, "step": 3770 }, { "epoch": 0.015271678308964637, "grad_norm": 2112.7109375, "learning_rate": 7.5600000000000005e-06, "loss": 228.7163, "step": 3780 }, { "epoch": 0.015312079574332269, "grad_norm": 1741.5140380859375, "learning_rate": 7.580000000000001e-06, "loss": 238.9058, "step": 3790 }, { "epoch": 0.0153524808396999, "grad_norm": 991.5599365234375, "learning_rate": 7.6e-06, "loss": 172.588, "step": 3800 }, { "epoch": 0.015392882105067531, "grad_norm": 1450.9693603515625, "learning_rate": 7.620000000000001e-06, "loss": 298.7596, "step": 3810 }, { "epoch": 0.015433283370435162, "grad_norm": 602.7521362304688, "learning_rate": 7.64e-06, "loss": 212.8531, "step": 3820 }, { "epoch": 0.015473684635802794, "grad_norm": 1867.60546875, "learning_rate": 7.660000000000001e-06, "loss": 314.0657, "step": 3830 }, { "epoch": 0.015514085901170425, "grad_norm": 6575.3671875, "learning_rate": 7.68e-06, "loss": 181.4101, "step": 3840 }, { "epoch": 0.015554487166538056, "grad_norm": 811.929443359375, "learning_rate": 7.7e-06, "loss": 203.4754, "step": 3850 }, { "epoch": 0.015594888431905688, "grad_norm": 1337.7786865234375, "learning_rate": 7.72e-06, "loss": 251.0148, "step": 3860 }, { "epoch": 0.01563528969727332, "grad_norm": 1872.110595703125, "learning_rate": 7.74e-06, "loss": 220.2635, "step": 3870 }, { "epoch": 0.01567569096264095, "grad_norm": 421.63671875, "learning_rate": 7.76e-06, "loss": 219.4246, "step": 3880 }, { "epoch": 0.01571609222800858, "grad_norm": 901.9820556640625, "learning_rate": 7.78e-06, "loss": 253.3245, "step": 3890 }, { "epoch": 0.01575649349337621, "grad_norm": 1378.5067138671875, "learning_rate": 7.8e-06, "loss": 272.5337, "step": 3900 }, { "epoch": 0.015796894758743844, "grad_norm": 899.2632446289062, "learning_rate": 7.820000000000001e-06, "loss": 221.5219, "step": 3910 }, { "epoch": 0.015837296024111474, "grad_norm": 2511.511474609375, "learning_rate": 7.84e-06, "loss": 183.7072, "step": 3920 }, { "epoch": 0.015877697289479107, "grad_norm": 1480.0848388671875, "learning_rate": 7.860000000000001e-06, "loss": 226.5134, "step": 3930 }, { "epoch": 0.015918098554846737, "grad_norm": 1251.0631103515625, "learning_rate": 7.879999999999999e-06, "loss": 220.8395, "step": 3940 }, { "epoch": 0.01595849982021437, "grad_norm": 1017.7593994140625, "learning_rate": 7.9e-06, "loss": 299.8441, "step": 3950 }, { "epoch": 0.015998901085582, "grad_norm": 750.101318359375, "learning_rate": 7.92e-06, "loss": 256.8377, "step": 3960 }, { "epoch": 0.016039302350949632, "grad_norm": 850.2648315429688, "learning_rate": 7.94e-06, "loss": 300.5525, "step": 3970 }, { "epoch": 0.016079703616317262, "grad_norm": 688.6962280273438, "learning_rate": 7.96e-06, "loss": 238.5739, "step": 3980 }, { "epoch": 0.016120104881684895, "grad_norm": 1160.3623046875, "learning_rate": 7.98e-06, "loss": 230.5895, "step": 3990 }, { "epoch": 0.016160506147052525, "grad_norm": 768.995849609375, "learning_rate": 8.000000000000001e-06, "loss": 273.8474, "step": 4000 }, { "epoch": 0.016200907412420158, "grad_norm": 733.1216430664062, "learning_rate": 8.02e-06, "loss": 213.2722, "step": 4010 }, { "epoch": 0.016241308677787787, "grad_norm": 0.0, "learning_rate": 8.040000000000001e-06, "loss": 222.9751, "step": 4020 }, { "epoch": 0.01628170994315542, "grad_norm": 800.3453979492188, "learning_rate": 8.06e-06, "loss": 268.3867, "step": 4030 }, { "epoch": 0.01632211120852305, "grad_norm": 1095.6239013671875, "learning_rate": 8.08e-06, "loss": 321.233, "step": 4040 }, { "epoch": 0.016362512473890683, "grad_norm": 2067.449462890625, "learning_rate": 8.1e-06, "loss": 218.0929, "step": 4050 }, { "epoch": 0.016402913739258312, "grad_norm": 1188.5040283203125, "learning_rate": 8.12e-06, "loss": 326.4176, "step": 4060 }, { "epoch": 0.016443315004625945, "grad_norm": 1143.6793212890625, "learning_rate": 8.14e-06, "loss": 244.4551, "step": 4070 }, { "epoch": 0.016483716269993575, "grad_norm": 878.0779418945312, "learning_rate": 8.160000000000001e-06, "loss": 235.9173, "step": 4080 }, { "epoch": 0.016524117535361208, "grad_norm": 737.4267578125, "learning_rate": 8.18e-06, "loss": 331.0821, "step": 4090 }, { "epoch": 0.016564518800728838, "grad_norm": 1495.6761474609375, "learning_rate": 8.200000000000001e-06, "loss": 248.3052, "step": 4100 }, { "epoch": 0.01660492006609647, "grad_norm": 814.0003051757812, "learning_rate": 8.22e-06, "loss": 199.0726, "step": 4110 }, { "epoch": 0.0166453213314641, "grad_norm": 634.8568725585938, "learning_rate": 8.24e-06, "loss": 313.8576, "step": 4120 }, { "epoch": 0.016685722596831733, "grad_norm": 714.36328125, "learning_rate": 8.26e-06, "loss": 215.2515, "step": 4130 }, { "epoch": 0.016726123862199363, "grad_norm": 1302.689697265625, "learning_rate": 8.28e-06, "loss": 262.0066, "step": 4140 }, { "epoch": 0.016766525127566996, "grad_norm": 1403.1646728515625, "learning_rate": 8.3e-06, "loss": 314.0872, "step": 4150 }, { "epoch": 0.016806926392934626, "grad_norm": 944.2760009765625, "learning_rate": 8.32e-06, "loss": 276.365, "step": 4160 }, { "epoch": 0.01684732765830226, "grad_norm": 2125.658447265625, "learning_rate": 8.34e-06, "loss": 337.9024, "step": 4170 }, { "epoch": 0.016887728923669888, "grad_norm": 2500.66455078125, "learning_rate": 8.36e-06, "loss": 241.9205, "step": 4180 }, { "epoch": 0.01692813018903752, "grad_norm": 637.8106689453125, "learning_rate": 8.380000000000001e-06, "loss": 239.1331, "step": 4190 }, { "epoch": 0.01696853145440515, "grad_norm": 1099.7269287109375, "learning_rate": 8.400000000000001e-06, "loss": 170.0579, "step": 4200 }, { "epoch": 0.017008932719772784, "grad_norm": 777.8555297851562, "learning_rate": 8.42e-06, "loss": 269.5526, "step": 4210 }, { "epoch": 0.017049333985140414, "grad_norm": 624.0740356445312, "learning_rate": 8.44e-06, "loss": 180.0498, "step": 4220 }, { "epoch": 0.017089735250508047, "grad_norm": 1288.8555908203125, "learning_rate": 8.46e-06, "loss": 287.3699, "step": 4230 }, { "epoch": 0.017130136515875676, "grad_norm": 689.3584594726562, "learning_rate": 8.48e-06, "loss": 308.6741, "step": 4240 }, { "epoch": 0.01717053778124331, "grad_norm": 1398.036376953125, "learning_rate": 8.500000000000002e-06, "loss": 266.2167, "step": 4250 }, { "epoch": 0.01721093904661094, "grad_norm": 789.7576293945312, "learning_rate": 8.52e-06, "loss": 255.249, "step": 4260 }, { "epoch": 0.017251340311978572, "grad_norm": 815.0007934570312, "learning_rate": 8.540000000000001e-06, "loss": 194.3588, "step": 4270 }, { "epoch": 0.0172917415773462, "grad_norm": 1327.666259765625, "learning_rate": 8.56e-06, "loss": 329.2384, "step": 4280 }, { "epoch": 0.017332142842713835, "grad_norm": 626.3024291992188, "learning_rate": 8.580000000000001e-06, "loss": 137.7933, "step": 4290 }, { "epoch": 0.017372544108081464, "grad_norm": 1358.869384765625, "learning_rate": 8.599999999999999e-06, "loss": 234.6338, "step": 4300 }, { "epoch": 0.017412945373449097, "grad_norm": 935.4063110351562, "learning_rate": 8.62e-06, "loss": 326.7994, "step": 4310 }, { "epoch": 0.017453346638816727, "grad_norm": 1433.051025390625, "learning_rate": 8.64e-06, "loss": 242.4297, "step": 4320 }, { "epoch": 0.01749374790418436, "grad_norm": 1025.8055419921875, "learning_rate": 8.66e-06, "loss": 168.3089, "step": 4330 }, { "epoch": 0.01753414916955199, "grad_norm": 2163.64794921875, "learning_rate": 8.68e-06, "loss": 246.4337, "step": 4340 }, { "epoch": 0.017574550434919622, "grad_norm": 1108.7593994140625, "learning_rate": 8.7e-06, "loss": 211.9252, "step": 4350 }, { "epoch": 0.017614951700287252, "grad_norm": 1170.3670654296875, "learning_rate": 8.720000000000001e-06, "loss": 196.4572, "step": 4360 }, { "epoch": 0.017655352965654885, "grad_norm": 822.1085815429688, "learning_rate": 8.740000000000001e-06, "loss": 236.9191, "step": 4370 }, { "epoch": 0.017695754231022515, "grad_norm": 551.1714477539062, "learning_rate": 8.76e-06, "loss": 262.1803, "step": 4380 }, { "epoch": 0.017736155496390148, "grad_norm": 941.1051635742188, "learning_rate": 8.78e-06, "loss": 197.1894, "step": 4390 }, { "epoch": 0.017776556761757777, "grad_norm": 812.2485961914062, "learning_rate": 8.8e-06, "loss": 304.7698, "step": 4400 }, { "epoch": 0.01781695802712541, "grad_norm": 740.1641845703125, "learning_rate": 8.82e-06, "loss": 207.9336, "step": 4410 }, { "epoch": 0.01785735929249304, "grad_norm": 1118.2490234375, "learning_rate": 8.840000000000002e-06, "loss": 201.0788, "step": 4420 }, { "epoch": 0.017897760557860673, "grad_norm": 2099.23583984375, "learning_rate": 8.86e-06, "loss": 222.6882, "step": 4430 }, { "epoch": 0.017938161823228303, "grad_norm": 1029.5682373046875, "learning_rate": 8.880000000000001e-06, "loss": 194.8024, "step": 4440 }, { "epoch": 0.017978563088595936, "grad_norm": 1188.818603515625, "learning_rate": 8.9e-06, "loss": 206.5788, "step": 4450 }, { "epoch": 0.018018964353963565, "grad_norm": 820.3424682617188, "learning_rate": 8.920000000000001e-06, "loss": 222.0988, "step": 4460 }, { "epoch": 0.0180593656193312, "grad_norm": 1431.5162353515625, "learning_rate": 8.939999999999999e-06, "loss": 210.3697, "step": 4470 }, { "epoch": 0.018099766884698828, "grad_norm": 1079.22314453125, "learning_rate": 8.96e-06, "loss": 236.7236, "step": 4480 }, { "epoch": 0.01814016815006646, "grad_norm": 1381.0302734375, "learning_rate": 8.98e-06, "loss": 200.5311, "step": 4490 }, { "epoch": 0.01818056941543409, "grad_norm": 799.3221435546875, "learning_rate": 9e-06, "loss": 229.802, "step": 4500 }, { "epoch": 0.018220970680801724, "grad_norm": 1207.725830078125, "learning_rate": 9.02e-06, "loss": 214.4318, "step": 4510 }, { "epoch": 0.018261371946169353, "grad_norm": 725.3284301757812, "learning_rate": 9.04e-06, "loss": 193.9023, "step": 4520 }, { "epoch": 0.018301773211536986, "grad_norm": 1104.83642578125, "learning_rate": 9.06e-06, "loss": 220.9035, "step": 4530 }, { "epoch": 0.018342174476904616, "grad_norm": 1509.99462890625, "learning_rate": 9.080000000000001e-06, "loss": 188.4938, "step": 4540 }, { "epoch": 0.01838257574227225, "grad_norm": 1129.591064453125, "learning_rate": 9.100000000000001e-06, "loss": 308.8094, "step": 4550 }, { "epoch": 0.01842297700763988, "grad_norm": 2700.010986328125, "learning_rate": 9.12e-06, "loss": 310.8748, "step": 4560 }, { "epoch": 0.01846337827300751, "grad_norm": 1198.5032958984375, "learning_rate": 9.14e-06, "loss": 219.3821, "step": 4570 }, { "epoch": 0.01850377953837514, "grad_norm": 907.4744873046875, "learning_rate": 9.16e-06, "loss": 272.8794, "step": 4580 }, { "epoch": 0.018544180803742774, "grad_norm": 480.4779968261719, "learning_rate": 9.180000000000002e-06, "loss": 303.4508, "step": 4590 }, { "epoch": 0.018584582069110404, "grad_norm": 5315.6416015625, "learning_rate": 9.2e-06, "loss": 298.2705, "step": 4600 }, { "epoch": 0.018624983334478037, "grad_norm": 872.8356323242188, "learning_rate": 9.220000000000002e-06, "loss": 203.8982, "step": 4610 }, { "epoch": 0.018665384599845666, "grad_norm": 593.9172973632812, "learning_rate": 9.24e-06, "loss": 175.9667, "step": 4620 }, { "epoch": 0.0187057858652133, "grad_norm": 869.2783203125, "learning_rate": 9.260000000000001e-06, "loss": 180.2353, "step": 4630 }, { "epoch": 0.01874618713058093, "grad_norm": 1448.7855224609375, "learning_rate": 9.28e-06, "loss": 180.4345, "step": 4640 }, { "epoch": 0.018786588395948562, "grad_norm": 2569.0322265625, "learning_rate": 9.3e-06, "loss": 263.0333, "step": 4650 }, { "epoch": 0.01882698966131619, "grad_norm": 1611.3876953125, "learning_rate": 9.32e-06, "loss": 286.5421, "step": 4660 }, { "epoch": 0.018867390926683825, "grad_norm": 762.2740478515625, "learning_rate": 9.34e-06, "loss": 272.6539, "step": 4670 }, { "epoch": 0.018907792192051454, "grad_norm": 1819.1978759765625, "learning_rate": 9.36e-06, "loss": 197.3641, "step": 4680 }, { "epoch": 0.018948193457419087, "grad_norm": 1281.1929931640625, "learning_rate": 9.38e-06, "loss": 199.3052, "step": 4690 }, { "epoch": 0.018988594722786717, "grad_norm": 4967.193359375, "learning_rate": 9.4e-06, "loss": 230.312, "step": 4700 }, { "epoch": 0.01902899598815435, "grad_norm": 1749.0260009765625, "learning_rate": 9.420000000000001e-06, "loss": 245.5051, "step": 4710 }, { "epoch": 0.01906939725352198, "grad_norm": 0.0, "learning_rate": 9.44e-06, "loss": 190.8588, "step": 4720 }, { "epoch": 0.019109798518889613, "grad_norm": 1861.452880859375, "learning_rate": 9.460000000000001e-06, "loss": 190.6377, "step": 4730 }, { "epoch": 0.019150199784257242, "grad_norm": 2230.98876953125, "learning_rate": 9.48e-06, "loss": 252.1078, "step": 4740 }, { "epoch": 0.019190601049624875, "grad_norm": 803.5484008789062, "learning_rate": 9.5e-06, "loss": 254.6172, "step": 4750 }, { "epoch": 0.019231002314992505, "grad_norm": 765.6219482421875, "learning_rate": 9.52e-06, "loss": 249.983, "step": 4760 }, { "epoch": 0.019271403580360138, "grad_norm": 932.5681762695312, "learning_rate": 9.54e-06, "loss": 235.5316, "step": 4770 }, { "epoch": 0.019311804845727767, "grad_norm": 981.9686889648438, "learning_rate": 9.560000000000002e-06, "loss": 229.3628, "step": 4780 }, { "epoch": 0.0193522061110954, "grad_norm": 436.2134704589844, "learning_rate": 9.58e-06, "loss": 158.716, "step": 4790 }, { "epoch": 0.01939260737646303, "grad_norm": 792.7994995117188, "learning_rate": 9.600000000000001e-06, "loss": 170.3208, "step": 4800 }, { "epoch": 0.019433008641830663, "grad_norm": 844.0726318359375, "learning_rate": 9.62e-06, "loss": 224.4318, "step": 4810 }, { "epoch": 0.019473409907198293, "grad_norm": 2132.043701171875, "learning_rate": 9.640000000000001e-06, "loss": 252.1749, "step": 4820 }, { "epoch": 0.019513811172565926, "grad_norm": 2546.78271484375, "learning_rate": 9.66e-06, "loss": 207.1322, "step": 4830 }, { "epoch": 0.019554212437933555, "grad_norm": 1447.4708251953125, "learning_rate": 9.68e-06, "loss": 163.8928, "step": 4840 }, { "epoch": 0.01959461370330119, "grad_norm": 1765.4603271484375, "learning_rate": 9.7e-06, "loss": 285.0885, "step": 4850 }, { "epoch": 0.019635014968668818, "grad_norm": 385.1683349609375, "learning_rate": 9.72e-06, "loss": 159.1181, "step": 4860 }, { "epoch": 0.01967541623403645, "grad_norm": 3586.742919921875, "learning_rate": 9.74e-06, "loss": 281.9318, "step": 4870 }, { "epoch": 0.01971581749940408, "grad_norm": 1368.7694091796875, "learning_rate": 9.760000000000001e-06, "loss": 191.7896, "step": 4880 }, { "epoch": 0.019756218764771714, "grad_norm": 826.7830810546875, "learning_rate": 9.78e-06, "loss": 174.5084, "step": 4890 }, { "epoch": 0.019796620030139343, "grad_norm": 890.3446044921875, "learning_rate": 9.800000000000001e-06, "loss": 290.2659, "step": 4900 }, { "epoch": 0.019837021295506976, "grad_norm": 1318.1915283203125, "learning_rate": 9.820000000000001e-06, "loss": 272.5934, "step": 4910 }, { "epoch": 0.019877422560874606, "grad_norm": 772.6763916015625, "learning_rate": 9.84e-06, "loss": 151.6077, "step": 4920 }, { "epoch": 0.01991782382624224, "grad_norm": 1587.762451171875, "learning_rate": 9.86e-06, "loss": 248.4316, "step": 4930 }, { "epoch": 0.01995822509160987, "grad_norm": 1252.715576171875, "learning_rate": 9.88e-06, "loss": 258.2403, "step": 4940 }, { "epoch": 0.0199986263569775, "grad_norm": 1054.4132080078125, "learning_rate": 9.900000000000002e-06, "loss": 212.0352, "step": 4950 }, { "epoch": 0.02003902762234513, "grad_norm": 1351.8643798828125, "learning_rate": 9.92e-06, "loss": 135.1369, "step": 4960 }, { "epoch": 0.020079428887712764, "grad_norm": 0.0, "learning_rate": 9.940000000000001e-06, "loss": 259.982, "step": 4970 }, { "epoch": 0.020119830153080394, "grad_norm": 1335.3504638671875, "learning_rate": 9.96e-06, "loss": 232.5153, "step": 4980 }, { "epoch": 0.020160231418448027, "grad_norm": 677.4559326171875, "learning_rate": 9.980000000000001e-06, "loss": 240.9019, "step": 4990 }, { "epoch": 0.020200632683815656, "grad_norm": 1134.4188232421875, "learning_rate": 1e-05, "loss": 172.6664, "step": 5000 }, { "epoch": 0.02024103394918329, "grad_norm": 1606.3121337890625, "learning_rate": 1.002e-05, "loss": 218.7504, "step": 5010 }, { "epoch": 0.02028143521455092, "grad_norm": 1248.30615234375, "learning_rate": 1.004e-05, "loss": 143.733, "step": 5020 }, { "epoch": 0.020321836479918552, "grad_norm": 1033.4849853515625, "learning_rate": 1.006e-05, "loss": 252.8728, "step": 5030 }, { "epoch": 0.020362237745286182, "grad_norm": 1231.0445556640625, "learning_rate": 1.008e-05, "loss": 243.1658, "step": 5040 }, { "epoch": 0.020402639010653815, "grad_norm": 1193.17236328125, "learning_rate": 1.0100000000000002e-05, "loss": 181.7209, "step": 5050 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 1.012e-05, "loss": 276.8536, "step": 5060 }, { "epoch": 0.020483441541389077, "grad_norm": 1306.9161376953125, "learning_rate": 1.0140000000000001e-05, "loss": 205.8765, "step": 5070 }, { "epoch": 0.020523842806756707, "grad_norm": 3135.527099609375, "learning_rate": 1.016e-05, "loss": 227.9693, "step": 5080 }, { "epoch": 0.02056424407212434, "grad_norm": 2155.43994140625, "learning_rate": 1.018e-05, "loss": 284.9991, "step": 5090 }, { "epoch": 0.02060464533749197, "grad_norm": 528.388427734375, "learning_rate": 1.02e-05, "loss": 214.0478, "step": 5100 }, { "epoch": 0.020645046602859603, "grad_norm": 924.0242309570312, "learning_rate": 1.022e-05, "loss": 253.1813, "step": 5110 }, { "epoch": 0.020685447868227232, "grad_norm": 1019.5430297851562, "learning_rate": 1.024e-05, "loss": 193.0214, "step": 5120 }, { "epoch": 0.020725849133594865, "grad_norm": 3559.9951171875, "learning_rate": 1.026e-05, "loss": 180.7234, "step": 5130 }, { "epoch": 0.020766250398962495, "grad_norm": 686.9107055664062, "learning_rate": 1.0280000000000002e-05, "loss": 177.0602, "step": 5140 }, { "epoch": 0.020806651664330128, "grad_norm": 616.1886596679688, "learning_rate": 1.03e-05, "loss": 150.3324, "step": 5150 }, { "epoch": 0.020847052929697758, "grad_norm": 768.7440185546875, "learning_rate": 1.0320000000000001e-05, "loss": 218.8612, "step": 5160 }, { "epoch": 0.02088745419506539, "grad_norm": 927.5042724609375, "learning_rate": 1.0340000000000001e-05, "loss": 247.3814, "step": 5170 }, { "epoch": 0.02092785546043302, "grad_norm": 1545.854736328125, "learning_rate": 1.036e-05, "loss": 202.6357, "step": 5180 }, { "epoch": 0.020968256725800653, "grad_norm": 950.5018310546875, "learning_rate": 1.038e-05, "loss": 245.9979, "step": 5190 }, { "epoch": 0.021008657991168283, "grad_norm": 829.0062866210938, "learning_rate": 1.04e-05, "loss": 244.4165, "step": 5200 }, { "epoch": 0.021049059256535916, "grad_norm": 1268.8380126953125, "learning_rate": 1.042e-05, "loss": 216.124, "step": 5210 }, { "epoch": 0.021089460521903546, "grad_norm": 1645.070068359375, "learning_rate": 1.0440000000000002e-05, "loss": 224.5982, "step": 5220 }, { "epoch": 0.02112986178727118, "grad_norm": 1425.95947265625, "learning_rate": 1.046e-05, "loss": 241.4969, "step": 5230 }, { "epoch": 0.021170263052638808, "grad_norm": 1239.9669189453125, "learning_rate": 1.0480000000000001e-05, "loss": 296.2492, "step": 5240 }, { "epoch": 0.02121066431800644, "grad_norm": 3276.483154296875, "learning_rate": 1.05e-05, "loss": 257.1036, "step": 5250 }, { "epoch": 0.02125106558337407, "grad_norm": 1606.2689208984375, "learning_rate": 1.0520000000000001e-05, "loss": 273.2055, "step": 5260 }, { "epoch": 0.021291466848741704, "grad_norm": 2068.524658203125, "learning_rate": 1.0539999999999999e-05, "loss": 209.8239, "step": 5270 }, { "epoch": 0.021331868114109333, "grad_norm": 1133.3856201171875, "learning_rate": 1.056e-05, "loss": 248.1236, "step": 5280 }, { "epoch": 0.021372269379476967, "grad_norm": 1275.0018310546875, "learning_rate": 1.058e-05, "loss": 238.0178, "step": 5290 }, { "epoch": 0.021412670644844596, "grad_norm": 759.3450927734375, "learning_rate": 1.06e-05, "loss": 269.6895, "step": 5300 }, { "epoch": 0.02145307191021223, "grad_norm": 4057.62744140625, "learning_rate": 1.062e-05, "loss": 268.4212, "step": 5310 }, { "epoch": 0.02149347317557986, "grad_norm": 933.5989379882812, "learning_rate": 1.064e-05, "loss": 200.5304, "step": 5320 }, { "epoch": 0.021533874440947492, "grad_norm": 709.9401245117188, "learning_rate": 1.0660000000000001e-05, "loss": 227.5934, "step": 5330 }, { "epoch": 0.02157427570631512, "grad_norm": 1363.613525390625, "learning_rate": 1.0680000000000001e-05, "loss": 195.4376, "step": 5340 }, { "epoch": 0.021614676971682754, "grad_norm": 860.6060180664062, "learning_rate": 1.0700000000000001e-05, "loss": 220.5302, "step": 5350 }, { "epoch": 0.021655078237050384, "grad_norm": 809.3516845703125, "learning_rate": 1.072e-05, "loss": 289.8672, "step": 5360 }, { "epoch": 0.021695479502418017, "grad_norm": 688.4315185546875, "learning_rate": 1.074e-05, "loss": 231.9644, "step": 5370 }, { "epoch": 0.021735880767785647, "grad_norm": 2667.476318359375, "learning_rate": 1.076e-05, "loss": 270.5944, "step": 5380 }, { "epoch": 0.02177628203315328, "grad_norm": 1891.450927734375, "learning_rate": 1.0780000000000002e-05, "loss": 249.0208, "step": 5390 }, { "epoch": 0.02181668329852091, "grad_norm": 3481.829345703125, "learning_rate": 1.08e-05, "loss": 301.6664, "step": 5400 }, { "epoch": 0.021857084563888542, "grad_norm": 956.6407470703125, "learning_rate": 1.0820000000000001e-05, "loss": 245.2214, "step": 5410 }, { "epoch": 0.021897485829256172, "grad_norm": 4194.12841796875, "learning_rate": 1.084e-05, "loss": 260.2813, "step": 5420 }, { "epoch": 0.021937887094623805, "grad_norm": 3680.693603515625, "learning_rate": 1.0860000000000001e-05, "loss": 249.7451, "step": 5430 }, { "epoch": 0.021978288359991435, "grad_norm": 1049.5306396484375, "learning_rate": 1.088e-05, "loss": 227.2931, "step": 5440 }, { "epoch": 0.022018689625359068, "grad_norm": 10489.931640625, "learning_rate": 1.09e-05, "loss": 334.8502, "step": 5450 }, { "epoch": 0.022059090890726697, "grad_norm": 1121.204833984375, "learning_rate": 1.092e-05, "loss": 220.4895, "step": 5460 }, { "epoch": 0.02209949215609433, "grad_norm": 1375.3389892578125, "learning_rate": 1.094e-05, "loss": 212.8332, "step": 5470 }, { "epoch": 0.02213989342146196, "grad_norm": 2596.01318359375, "learning_rate": 1.096e-05, "loss": 242.3982, "step": 5480 }, { "epoch": 0.022180294686829593, "grad_norm": 1790.5255126953125, "learning_rate": 1.098e-05, "loss": 228.799, "step": 5490 }, { "epoch": 0.022220695952197222, "grad_norm": 1782.9239501953125, "learning_rate": 1.1000000000000001e-05, "loss": 247.0614, "step": 5500 }, { "epoch": 0.022261097217564856, "grad_norm": 3071.753173828125, "learning_rate": 1.1020000000000001e-05, "loss": 190.9815, "step": 5510 }, { "epoch": 0.022301498482932485, "grad_norm": 778.1385498046875, "learning_rate": 1.1040000000000001e-05, "loss": 208.1198, "step": 5520 }, { "epoch": 0.022341899748300118, "grad_norm": 4282.1689453125, "learning_rate": 1.106e-05, "loss": 187.0153, "step": 5530 }, { "epoch": 0.022382301013667748, "grad_norm": 3060.544921875, "learning_rate": 1.108e-05, "loss": 243.2019, "step": 5540 }, { "epoch": 0.02242270227903538, "grad_norm": 1174.474853515625, "learning_rate": 1.11e-05, "loss": 240.466, "step": 5550 }, { "epoch": 0.02246310354440301, "grad_norm": 835.2548828125, "learning_rate": 1.112e-05, "loss": 195.4574, "step": 5560 }, { "epoch": 0.022503504809770643, "grad_norm": 679.6596069335938, "learning_rate": 1.114e-05, "loss": 116.5087, "step": 5570 }, { "epoch": 0.022543906075138273, "grad_norm": 4872.7197265625, "learning_rate": 1.1160000000000002e-05, "loss": 208.2604, "step": 5580 }, { "epoch": 0.022584307340505906, "grad_norm": 507.1872253417969, "learning_rate": 1.118e-05, "loss": 240.8744, "step": 5590 }, { "epoch": 0.022624708605873536, "grad_norm": 1005.294189453125, "learning_rate": 1.1200000000000001e-05, "loss": 184.084, "step": 5600 }, { "epoch": 0.02266510987124117, "grad_norm": 7254.05029296875, "learning_rate": 1.122e-05, "loss": 216.7616, "step": 5610 }, { "epoch": 0.0227055111366088, "grad_norm": 1527.6485595703125, "learning_rate": 1.124e-05, "loss": 197.5231, "step": 5620 }, { "epoch": 0.02274591240197643, "grad_norm": 1460.7916259765625, "learning_rate": 1.126e-05, "loss": 280.5447, "step": 5630 }, { "epoch": 0.02278631366734406, "grad_norm": 1024.780517578125, "learning_rate": 1.128e-05, "loss": 248.3504, "step": 5640 }, { "epoch": 0.022826714932711694, "grad_norm": 1925.320068359375, "learning_rate": 1.13e-05, "loss": 226.1158, "step": 5650 }, { "epoch": 0.022867116198079324, "grad_norm": 1352.9869384765625, "learning_rate": 1.132e-05, "loss": 243.9569, "step": 5660 }, { "epoch": 0.022907517463446957, "grad_norm": 1091.9189453125, "learning_rate": 1.134e-05, "loss": 186.5363, "step": 5670 }, { "epoch": 0.022947918728814586, "grad_norm": 544.18994140625, "learning_rate": 1.1360000000000001e-05, "loss": 178.7458, "step": 5680 }, { "epoch": 0.02298831999418222, "grad_norm": 0.0, "learning_rate": 1.1380000000000001e-05, "loss": 237.9985, "step": 5690 }, { "epoch": 0.02302872125954985, "grad_norm": 1084.4674072265625, "learning_rate": 1.1400000000000001e-05, "loss": 201.7748, "step": 5700 }, { "epoch": 0.023069122524917482, "grad_norm": 1035.4659423828125, "learning_rate": 1.142e-05, "loss": 207.2124, "step": 5710 }, { "epoch": 0.02310952379028511, "grad_norm": 733.1090087890625, "learning_rate": 1.144e-05, "loss": 178.1889, "step": 5720 }, { "epoch": 0.023149925055652745, "grad_norm": 876.4588012695312, "learning_rate": 1.146e-05, "loss": 158.9155, "step": 5730 }, { "epoch": 0.023190326321020374, "grad_norm": 657.6229248046875, "learning_rate": 1.148e-05, "loss": 194.4797, "step": 5740 }, { "epoch": 0.023230727586388007, "grad_norm": 1495.9139404296875, "learning_rate": 1.1500000000000002e-05, "loss": 222.9885, "step": 5750 }, { "epoch": 0.023271128851755637, "grad_norm": 2601.6259765625, "learning_rate": 1.152e-05, "loss": 304.1291, "step": 5760 }, { "epoch": 0.02331153011712327, "grad_norm": 1693.1273193359375, "learning_rate": 1.1540000000000001e-05, "loss": 199.6243, "step": 5770 }, { "epoch": 0.0233519313824909, "grad_norm": 945.2630615234375, "learning_rate": 1.156e-05, "loss": 126.1058, "step": 5780 }, { "epoch": 0.023392332647858533, "grad_norm": 616.036865234375, "learning_rate": 1.1580000000000001e-05, "loss": 265.3511, "step": 5790 }, { "epoch": 0.023432733913226162, "grad_norm": 908.6356811523438, "learning_rate": 1.16e-05, "loss": 215.007, "step": 5800 }, { "epoch": 0.023473135178593795, "grad_norm": 1166.1875, "learning_rate": 1.162e-05, "loss": 171.8394, "step": 5810 }, { "epoch": 0.023513536443961425, "grad_norm": 936.7083740234375, "learning_rate": 1.164e-05, "loss": 209.5779, "step": 5820 }, { "epoch": 0.023553937709329058, "grad_norm": 689.0086669921875, "learning_rate": 1.166e-05, "loss": 165.4548, "step": 5830 }, { "epoch": 0.023594338974696687, "grad_norm": 1105.93212890625, "learning_rate": 1.168e-05, "loss": 275.4574, "step": 5840 }, { "epoch": 0.02363474024006432, "grad_norm": 2241.417236328125, "learning_rate": 1.1700000000000001e-05, "loss": 359.449, "step": 5850 }, { "epoch": 0.02367514150543195, "grad_norm": 1302.2642822265625, "learning_rate": 1.172e-05, "loss": 259.1196, "step": 5860 }, { "epoch": 0.023715542770799583, "grad_norm": 1307.6990966796875, "learning_rate": 1.1740000000000001e-05, "loss": 215.3923, "step": 5870 }, { "epoch": 0.023755944036167213, "grad_norm": 523.6953735351562, "learning_rate": 1.1760000000000001e-05, "loss": 189.7961, "step": 5880 }, { "epoch": 0.023796345301534846, "grad_norm": 1645.471923828125, "learning_rate": 1.178e-05, "loss": 197.7059, "step": 5890 }, { "epoch": 0.023836746566902475, "grad_norm": 4609.2265625, "learning_rate": 1.18e-05, "loss": 312.7925, "step": 5900 }, { "epoch": 0.02387714783227011, "grad_norm": 886.41015625, "learning_rate": 1.182e-05, "loss": 227.9965, "step": 5910 }, { "epoch": 0.023917549097637738, "grad_norm": 1345.154052734375, "learning_rate": 1.1840000000000002e-05, "loss": 143.6501, "step": 5920 }, { "epoch": 0.02395795036300537, "grad_norm": 2758.603515625, "learning_rate": 1.186e-05, "loss": 189.3002, "step": 5930 }, { "epoch": 0.023998351628373, "grad_norm": 933.2481079101562, "learning_rate": 1.1880000000000001e-05, "loss": 170.3514, "step": 5940 }, { "epoch": 0.024038752893740634, "grad_norm": 1366.0477294921875, "learning_rate": 1.19e-05, "loss": 204.9711, "step": 5950 }, { "epoch": 0.024079154159108263, "grad_norm": 870.7377319335938, "learning_rate": 1.1920000000000001e-05, "loss": 182.9471, "step": 5960 }, { "epoch": 0.024119555424475896, "grad_norm": 1715.831298828125, "learning_rate": 1.1940000000000001e-05, "loss": 206.0364, "step": 5970 }, { "epoch": 0.024159956689843526, "grad_norm": 1668.3841552734375, "learning_rate": 1.196e-05, "loss": 226.4282, "step": 5980 }, { "epoch": 0.02420035795521116, "grad_norm": 1048.4296875, "learning_rate": 1.198e-05, "loss": 109.3125, "step": 5990 }, { "epoch": 0.02424075922057879, "grad_norm": 879.1497192382812, "learning_rate": 1.2e-05, "loss": 201.5203, "step": 6000 }, { "epoch": 0.02428116048594642, "grad_norm": 1213.1204833984375, "learning_rate": 1.202e-05, "loss": 163.2346, "step": 6010 }, { "epoch": 0.02432156175131405, "grad_norm": 1427.281494140625, "learning_rate": 1.204e-05, "loss": 174.4642, "step": 6020 }, { "epoch": 0.024361963016681684, "grad_norm": 1276.7529296875, "learning_rate": 1.206e-05, "loss": 204.9076, "step": 6030 }, { "epoch": 0.024402364282049314, "grad_norm": 2190.304443359375, "learning_rate": 1.2080000000000001e-05, "loss": 159.9595, "step": 6040 }, { "epoch": 0.024442765547416947, "grad_norm": 785.987060546875, "learning_rate": 1.2100000000000001e-05, "loss": 209.5406, "step": 6050 }, { "epoch": 0.024483166812784576, "grad_norm": 611.1929931640625, "learning_rate": 1.2120000000000001e-05, "loss": 221.088, "step": 6060 }, { "epoch": 0.02452356807815221, "grad_norm": 1893.1925048828125, "learning_rate": 1.214e-05, "loss": 215.4067, "step": 6070 }, { "epoch": 0.02456396934351984, "grad_norm": 4432.23095703125, "learning_rate": 1.216e-05, "loss": 221.237, "step": 6080 }, { "epoch": 0.02460437060888747, "grad_norm": 1176.1290283203125, "learning_rate": 1.2180000000000002e-05, "loss": 220.3091, "step": 6090 }, { "epoch": 0.0246447718742551, "grad_norm": 1318.6954345703125, "learning_rate": 1.22e-05, "loss": 250.0935, "step": 6100 }, { "epoch": 0.02468517313962273, "grad_norm": 1313.9632568359375, "learning_rate": 1.2220000000000002e-05, "loss": 226.1311, "step": 6110 }, { "epoch": 0.024725574404990364, "grad_norm": 1210.4168701171875, "learning_rate": 1.224e-05, "loss": 348.8165, "step": 6120 }, { "epoch": 0.024765975670357994, "grad_norm": 1387.2384033203125, "learning_rate": 1.2260000000000001e-05, "loss": 208.124, "step": 6130 }, { "epoch": 0.024806376935725627, "grad_norm": 1911.8060302734375, "learning_rate": 1.2280000000000001e-05, "loss": 208.8157, "step": 6140 }, { "epoch": 0.024846778201093257, "grad_norm": 770.5685424804688, "learning_rate": 1.23e-05, "loss": 205.734, "step": 6150 }, { "epoch": 0.02488717946646089, "grad_norm": 718.35498046875, "learning_rate": 1.232e-05, "loss": 200.1725, "step": 6160 }, { "epoch": 0.02492758073182852, "grad_norm": 1146.25244140625, "learning_rate": 1.234e-05, "loss": 197.4777, "step": 6170 }, { "epoch": 0.024967981997196152, "grad_norm": 773.2630004882812, "learning_rate": 1.236e-05, "loss": 217.971, "step": 6180 }, { "epoch": 0.025008383262563782, "grad_norm": 888.3221435546875, "learning_rate": 1.238e-05, "loss": 198.487, "step": 6190 }, { "epoch": 0.025048784527931415, "grad_norm": 971.0499267578125, "learning_rate": 1.24e-05, "loss": 239.6828, "step": 6200 }, { "epoch": 0.025089185793299044, "grad_norm": 10692.5146484375, "learning_rate": 1.2420000000000001e-05, "loss": 258.4363, "step": 6210 }, { "epoch": 0.025129587058666678, "grad_norm": 791.8817138671875, "learning_rate": 1.244e-05, "loss": 213.3801, "step": 6220 }, { "epoch": 0.025169988324034307, "grad_norm": 1000.6807861328125, "learning_rate": 1.2460000000000001e-05, "loss": 181.9501, "step": 6230 }, { "epoch": 0.02521038958940194, "grad_norm": 892.7822875976562, "learning_rate": 1.248e-05, "loss": 167.6201, "step": 6240 }, { "epoch": 0.02525079085476957, "grad_norm": 537.8553466796875, "learning_rate": 1.25e-05, "loss": 242.3135, "step": 6250 }, { "epoch": 0.025291192120137203, "grad_norm": 1086.2513427734375, "learning_rate": 1.252e-05, "loss": 222.7271, "step": 6260 }, { "epoch": 0.025331593385504832, "grad_norm": 4348.87353515625, "learning_rate": 1.2540000000000002e-05, "loss": 316.0617, "step": 6270 }, { "epoch": 0.025371994650872465, "grad_norm": 1642.019287109375, "learning_rate": 1.256e-05, "loss": 202.9976, "step": 6280 }, { "epoch": 0.025412395916240095, "grad_norm": 685.2844848632812, "learning_rate": 1.258e-05, "loss": 147.7885, "step": 6290 }, { "epoch": 0.025452797181607728, "grad_norm": 2664.384765625, "learning_rate": 1.2600000000000001e-05, "loss": 178.4338, "step": 6300 }, { "epoch": 0.025493198446975358, "grad_norm": 727.255126953125, "learning_rate": 1.2620000000000001e-05, "loss": 142.211, "step": 6310 }, { "epoch": 0.02553359971234299, "grad_norm": 2616.48486328125, "learning_rate": 1.2640000000000003e-05, "loss": 215.0363, "step": 6320 }, { "epoch": 0.02557400097771062, "grad_norm": 1724.4073486328125, "learning_rate": 1.2659999999999999e-05, "loss": 268.9592, "step": 6330 }, { "epoch": 0.025614402243078253, "grad_norm": 757.4139404296875, "learning_rate": 1.268e-05, "loss": 245.0375, "step": 6340 }, { "epoch": 0.025654803508445883, "grad_norm": 804.900634765625, "learning_rate": 1.27e-05, "loss": 258.7294, "step": 6350 }, { "epoch": 0.025695204773813516, "grad_norm": 1432.1217041015625, "learning_rate": 1.2720000000000002e-05, "loss": 226.9255, "step": 6360 }, { "epoch": 0.025735606039181146, "grad_norm": 1246.9285888671875, "learning_rate": 1.2740000000000002e-05, "loss": 190.6593, "step": 6370 }, { "epoch": 0.02577600730454878, "grad_norm": 1320.610107421875, "learning_rate": 1.276e-05, "loss": 254.1634, "step": 6380 }, { "epoch": 0.025816408569916408, "grad_norm": 5738.62939453125, "learning_rate": 1.278e-05, "loss": 214.6841, "step": 6390 }, { "epoch": 0.02585680983528404, "grad_norm": 649.080810546875, "learning_rate": 1.2800000000000001e-05, "loss": 170.7529, "step": 6400 }, { "epoch": 0.02589721110065167, "grad_norm": 756.0393676757812, "learning_rate": 1.2820000000000001e-05, "loss": 186.7894, "step": 6410 }, { "epoch": 0.025937612366019304, "grad_norm": 2048.561279296875, "learning_rate": 1.2839999999999999e-05, "loss": 182.1876, "step": 6420 }, { "epoch": 0.025978013631386934, "grad_norm": 805.5757446289062, "learning_rate": 1.286e-05, "loss": 196.0959, "step": 6430 }, { "epoch": 0.026018414896754567, "grad_norm": 5938.0439453125, "learning_rate": 1.288e-05, "loss": 254.1976, "step": 6440 }, { "epoch": 0.026058816162122196, "grad_norm": 1488.987060546875, "learning_rate": 1.29e-05, "loss": 250.4606, "step": 6450 }, { "epoch": 0.02609921742748983, "grad_norm": 1574.7130126953125, "learning_rate": 1.2920000000000002e-05, "loss": 157.1144, "step": 6460 }, { "epoch": 0.02613961869285746, "grad_norm": 736.7561645507812, "learning_rate": 1.294e-05, "loss": 220.315, "step": 6470 }, { "epoch": 0.026180019958225092, "grad_norm": 972.5496826171875, "learning_rate": 1.296e-05, "loss": 257.4698, "step": 6480 }, { "epoch": 0.02622042122359272, "grad_norm": 4659.64794921875, "learning_rate": 1.2980000000000001e-05, "loss": 201.6754, "step": 6490 }, { "epoch": 0.026260822488960354, "grad_norm": 803.3303833007812, "learning_rate": 1.3000000000000001e-05, "loss": 227.2734, "step": 6500 }, { "epoch": 0.026301223754327984, "grad_norm": 900.7424926757812, "learning_rate": 1.3020000000000002e-05, "loss": 204.3565, "step": 6510 }, { "epoch": 0.026341625019695617, "grad_norm": 880.3982543945312, "learning_rate": 1.3039999999999999e-05, "loss": 213.0109, "step": 6520 }, { "epoch": 0.026382026285063247, "grad_norm": 1765.91357421875, "learning_rate": 1.306e-05, "loss": 238.4321, "step": 6530 }, { "epoch": 0.02642242755043088, "grad_norm": 2665.921875, "learning_rate": 1.308e-05, "loss": 227.712, "step": 6540 }, { "epoch": 0.02646282881579851, "grad_norm": 2605.635009765625, "learning_rate": 1.3100000000000002e-05, "loss": 162.2536, "step": 6550 }, { "epoch": 0.026503230081166142, "grad_norm": 793.8833618164062, "learning_rate": 1.3120000000000001e-05, "loss": 192.5132, "step": 6560 }, { "epoch": 0.026543631346533772, "grad_norm": 2946.634765625, "learning_rate": 1.314e-05, "loss": 159.2729, "step": 6570 }, { "epoch": 0.026584032611901405, "grad_norm": 910.0969848632812, "learning_rate": 1.316e-05, "loss": 191.1152, "step": 6580 }, { "epoch": 0.026624433877269035, "grad_norm": 808.4738159179688, "learning_rate": 1.3180000000000001e-05, "loss": 117.279, "step": 6590 }, { "epoch": 0.026664835142636668, "grad_norm": 716.1993408203125, "learning_rate": 1.32e-05, "loss": 246.6991, "step": 6600 }, { "epoch": 0.026705236408004297, "grad_norm": 1362.393310546875, "learning_rate": 1.3220000000000002e-05, "loss": 233.1123, "step": 6610 }, { "epoch": 0.02674563767337193, "grad_norm": 1083.3197021484375, "learning_rate": 1.324e-05, "loss": 169.5999, "step": 6620 }, { "epoch": 0.02678603893873956, "grad_norm": 754.7849731445312, "learning_rate": 1.326e-05, "loss": 306.0971, "step": 6630 }, { "epoch": 0.026826440204107193, "grad_norm": 968.374755859375, "learning_rate": 1.3280000000000002e-05, "loss": 159.3224, "step": 6640 }, { "epoch": 0.026866841469474823, "grad_norm": 670.0838623046875, "learning_rate": 1.3300000000000001e-05, "loss": 155.3162, "step": 6650 }, { "epoch": 0.026907242734842456, "grad_norm": 2666.84326171875, "learning_rate": 1.3320000000000001e-05, "loss": 173.0536, "step": 6660 }, { "epoch": 0.026947644000210085, "grad_norm": 1368.1116943359375, "learning_rate": 1.334e-05, "loss": 197.1025, "step": 6670 }, { "epoch": 0.026988045265577718, "grad_norm": 1038.586181640625, "learning_rate": 1.336e-05, "loss": 156.9807, "step": 6680 }, { "epoch": 0.027028446530945348, "grad_norm": 1009.5262451171875, "learning_rate": 1.338e-05, "loss": 204.5245, "step": 6690 }, { "epoch": 0.02706884779631298, "grad_norm": 1839.5418701171875, "learning_rate": 1.3400000000000002e-05, "loss": 266.4659, "step": 6700 }, { "epoch": 0.02710924906168061, "grad_norm": 829.3477783203125, "learning_rate": 1.3420000000000002e-05, "loss": 240.5496, "step": 6710 }, { "epoch": 0.027149650327048244, "grad_norm": 2405.634033203125, "learning_rate": 1.344e-05, "loss": 190.1635, "step": 6720 }, { "epoch": 0.027190051592415873, "grad_norm": 3415.678466796875, "learning_rate": 1.346e-05, "loss": 256.6028, "step": 6730 }, { "epoch": 0.027230452857783506, "grad_norm": 1140.322998046875, "learning_rate": 1.3480000000000001e-05, "loss": 213.0321, "step": 6740 }, { "epoch": 0.027270854123151136, "grad_norm": 1814.612548828125, "learning_rate": 1.3500000000000001e-05, "loss": 289.2976, "step": 6750 }, { "epoch": 0.02731125538851877, "grad_norm": 5082.0244140625, "learning_rate": 1.352e-05, "loss": 213.1429, "step": 6760 }, { "epoch": 0.0273516566538864, "grad_norm": 791.2843627929688, "learning_rate": 1.3539999999999999e-05, "loss": 174.2937, "step": 6770 }, { "epoch": 0.02739205791925403, "grad_norm": 1486.5364990234375, "learning_rate": 1.356e-05, "loss": 211.7225, "step": 6780 }, { "epoch": 0.02743245918462166, "grad_norm": 2779.84765625, "learning_rate": 1.358e-05, "loss": 206.9393, "step": 6790 }, { "epoch": 0.027472860449989294, "grad_norm": 2755.901611328125, "learning_rate": 1.3600000000000002e-05, "loss": 244.6609, "step": 6800 }, { "epoch": 0.027513261715356924, "grad_norm": 1252.9466552734375, "learning_rate": 1.362e-05, "loss": 185.0354, "step": 6810 }, { "epoch": 0.027553662980724557, "grad_norm": 803.9773559570312, "learning_rate": 1.364e-05, "loss": 163.7829, "step": 6820 }, { "epoch": 0.027594064246092186, "grad_norm": 3479.50341796875, "learning_rate": 1.3660000000000001e-05, "loss": 199.5576, "step": 6830 }, { "epoch": 0.02763446551145982, "grad_norm": 4041.821533203125, "learning_rate": 1.3680000000000001e-05, "loss": 146.2625, "step": 6840 }, { "epoch": 0.02767486677682745, "grad_norm": 951.2683715820312, "learning_rate": 1.3700000000000001e-05, "loss": 242.1677, "step": 6850 }, { "epoch": 0.027715268042195082, "grad_norm": 2521.80224609375, "learning_rate": 1.3719999999999999e-05, "loss": 162.2531, "step": 6860 }, { "epoch": 0.02775566930756271, "grad_norm": 2120.54150390625, "learning_rate": 1.374e-05, "loss": 218.3948, "step": 6870 }, { "epoch": 0.027796070572930345, "grad_norm": 890.4383544921875, "learning_rate": 1.376e-05, "loss": 237.4137, "step": 6880 }, { "epoch": 0.027836471838297974, "grad_norm": 1014.8067626953125, "learning_rate": 1.3780000000000002e-05, "loss": 271.9613, "step": 6890 }, { "epoch": 0.027876873103665607, "grad_norm": 2228.795654296875, "learning_rate": 1.3800000000000002e-05, "loss": 160.2129, "step": 6900 }, { "epoch": 0.027917274369033237, "grad_norm": 1605.4246826171875, "learning_rate": 1.382e-05, "loss": 192.7938, "step": 6910 }, { "epoch": 0.02795767563440087, "grad_norm": 726.9761352539062, "learning_rate": 1.384e-05, "loss": 166.0171, "step": 6920 }, { "epoch": 0.0279980768997685, "grad_norm": 1300.53076171875, "learning_rate": 1.3860000000000001e-05, "loss": 155.9882, "step": 6930 }, { "epoch": 0.028038478165136133, "grad_norm": 1467.7305908203125, "learning_rate": 1.3880000000000001e-05, "loss": 175.5876, "step": 6940 }, { "epoch": 0.028078879430503762, "grad_norm": 653.6290283203125, "learning_rate": 1.3900000000000002e-05, "loss": 177.4526, "step": 6950 }, { "epoch": 0.028119280695871395, "grad_norm": 1209.3408203125, "learning_rate": 1.3919999999999999e-05, "loss": 139.5799, "step": 6960 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 1.394e-05, "loss": 114.6302, "step": 6970 }, { "epoch": 0.028200083226606658, "grad_norm": 2785.091796875, "learning_rate": 1.396e-05, "loss": 197.9756, "step": 6980 }, { "epoch": 0.028240484491974287, "grad_norm": 1010.1685180664062, "learning_rate": 1.3980000000000002e-05, "loss": 194.4374, "step": 6990 }, { "epoch": 0.02828088575734192, "grad_norm": 810.1452026367188, "learning_rate": 1.4000000000000001e-05, "loss": 185.4311, "step": 7000 }, { "epoch": 0.02832128702270955, "grad_norm": 793.3577270507812, "learning_rate": 1.402e-05, "loss": 186.9826, "step": 7010 }, { "epoch": 0.028361688288077183, "grad_norm": 1589.5145263671875, "learning_rate": 1.4040000000000001e-05, "loss": 224.4251, "step": 7020 }, { "epoch": 0.028402089553444813, "grad_norm": 730.669189453125, "learning_rate": 1.4060000000000001e-05, "loss": 129.3718, "step": 7030 }, { "epoch": 0.028442490818812446, "grad_norm": 2354.304443359375, "learning_rate": 1.408e-05, "loss": 176.9131, "step": 7040 }, { "epoch": 0.028482892084180075, "grad_norm": 1103.234619140625, "learning_rate": 1.4099999999999999e-05, "loss": 198.4438, "step": 7050 }, { "epoch": 0.02852329334954771, "grad_norm": 1036.2913818359375, "learning_rate": 1.412e-05, "loss": 228.0053, "step": 7060 }, { "epoch": 0.028563694614915338, "grad_norm": 3621.711181640625, "learning_rate": 1.414e-05, "loss": 140.6286, "step": 7070 }, { "epoch": 0.02860409588028297, "grad_norm": 1558.1011962890625, "learning_rate": 1.4160000000000002e-05, "loss": 187.4032, "step": 7080 }, { "epoch": 0.0286444971456506, "grad_norm": 2425.656494140625, "learning_rate": 1.4180000000000001e-05, "loss": 198.9401, "step": 7090 }, { "epoch": 0.028684898411018234, "grad_norm": 878.8297729492188, "learning_rate": 1.42e-05, "loss": 248.0995, "step": 7100 }, { "epoch": 0.028725299676385863, "grad_norm": 791.4844360351562, "learning_rate": 1.422e-05, "loss": 223.9328, "step": 7110 }, { "epoch": 0.028765700941753496, "grad_norm": 984.3999633789062, "learning_rate": 1.4240000000000001e-05, "loss": 207.1025, "step": 7120 }, { "epoch": 0.028806102207121126, "grad_norm": 1250.86669921875, "learning_rate": 1.426e-05, "loss": 168.4282, "step": 7130 }, { "epoch": 0.02884650347248876, "grad_norm": 993.6478881835938, "learning_rate": 1.4280000000000002e-05, "loss": 204.2866, "step": 7140 }, { "epoch": 0.02888690473785639, "grad_norm": 1394.8699951171875, "learning_rate": 1.43e-05, "loss": 321.6906, "step": 7150 }, { "epoch": 0.02892730600322402, "grad_norm": 1557.498291015625, "learning_rate": 1.432e-05, "loss": 240.2873, "step": 7160 }, { "epoch": 0.02896770726859165, "grad_norm": 3150.471435546875, "learning_rate": 1.434e-05, "loss": 173.3683, "step": 7170 }, { "epoch": 0.029008108533959284, "grad_norm": 40070.46875, "learning_rate": 1.4360000000000001e-05, "loss": 300.3157, "step": 7180 }, { "epoch": 0.029048509799326914, "grad_norm": 1962.33447265625, "learning_rate": 1.4380000000000001e-05, "loss": 240.5531, "step": 7190 }, { "epoch": 0.029088911064694547, "grad_norm": 9524.1953125, "learning_rate": 1.44e-05, "loss": 197.5709, "step": 7200 }, { "epoch": 0.029129312330062176, "grad_norm": 1291.194091796875, "learning_rate": 1.4420000000000001e-05, "loss": 173.9855, "step": 7210 }, { "epoch": 0.02916971359542981, "grad_norm": 508.72991943359375, "learning_rate": 1.444e-05, "loss": 198.4389, "step": 7220 }, { "epoch": 0.02921011486079744, "grad_norm": 3161.301513671875, "learning_rate": 1.4460000000000002e-05, "loss": 232.296, "step": 7230 }, { "epoch": 0.029250516126165072, "grad_norm": 888.337646484375, "learning_rate": 1.4480000000000002e-05, "loss": 165.0789, "step": 7240 }, { "epoch": 0.029290917391532702, "grad_norm": 1378.0032958984375, "learning_rate": 1.45e-05, "loss": 199.7313, "step": 7250 }, { "epoch": 0.029331318656900335, "grad_norm": 2439.7119140625, "learning_rate": 1.452e-05, "loss": 201.1323, "step": 7260 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 1.4540000000000001e-05, "loss": 139.8308, "step": 7270 }, { "epoch": 0.029412121187635597, "grad_norm": 697.4395751953125, "learning_rate": 1.4560000000000001e-05, "loss": 215.4804, "step": 7280 }, { "epoch": 0.029452522453003227, "grad_norm": 1335.80859375, "learning_rate": 1.4580000000000003e-05, "loss": 206.551, "step": 7290 }, { "epoch": 0.02949292371837086, "grad_norm": 3926.513427734375, "learning_rate": 1.4599999999999999e-05, "loss": 161.3456, "step": 7300 }, { "epoch": 0.02953332498373849, "grad_norm": 2071.3857421875, "learning_rate": 1.462e-05, "loss": 245.7546, "step": 7310 }, { "epoch": 0.029573726249106123, "grad_norm": 1162.638916015625, "learning_rate": 1.464e-05, "loss": 247.4758, "step": 7320 }, { "epoch": 0.029614127514473752, "grad_norm": 1904.095947265625, "learning_rate": 1.4660000000000002e-05, "loss": 209.4585, "step": 7330 }, { "epoch": 0.029654528779841385, "grad_norm": 2012.481689453125, "learning_rate": 1.4680000000000002e-05, "loss": 177.5473, "step": 7340 }, { "epoch": 0.029694930045209015, "grad_norm": 827.6367797851562, "learning_rate": 1.47e-05, "loss": 132.3128, "step": 7350 }, { "epoch": 0.029735331310576648, "grad_norm": 1051.8994140625, "learning_rate": 1.472e-05, "loss": 201.6083, "step": 7360 }, { "epoch": 0.029775732575944278, "grad_norm": 2179.918701171875, "learning_rate": 1.4740000000000001e-05, "loss": 142.6359, "step": 7370 }, { "epoch": 0.02981613384131191, "grad_norm": 1439.194091796875, "learning_rate": 1.4760000000000001e-05, "loss": 170.9291, "step": 7380 }, { "epoch": 0.02985653510667954, "grad_norm": 1795.2176513671875, "learning_rate": 1.4779999999999999e-05, "loss": 205.2162, "step": 7390 }, { "epoch": 0.029896936372047173, "grad_norm": 1061.8426513671875, "learning_rate": 1.48e-05, "loss": 291.1986, "step": 7400 }, { "epoch": 0.029937337637414803, "grad_norm": 1072.127685546875, "learning_rate": 1.482e-05, "loss": 190.8879, "step": 7410 }, { "epoch": 0.029977738902782436, "grad_norm": 1368.1405029296875, "learning_rate": 1.4840000000000002e-05, "loss": 196.2706, "step": 7420 }, { "epoch": 0.030018140168150065, "grad_norm": 1639.9654541015625, "learning_rate": 1.4860000000000002e-05, "loss": 129.1992, "step": 7430 }, { "epoch": 0.0300585414335177, "grad_norm": 1488.4791259765625, "learning_rate": 1.488e-05, "loss": 151.4941, "step": 7440 }, { "epoch": 0.030098942698885328, "grad_norm": 864.4747924804688, "learning_rate": 1.49e-05, "loss": 199.6265, "step": 7450 }, { "epoch": 0.03013934396425296, "grad_norm": 1255.5771484375, "learning_rate": 1.4920000000000001e-05, "loss": 226.9639, "step": 7460 }, { "epoch": 0.03017974522962059, "grad_norm": 1635.7647705078125, "learning_rate": 1.4940000000000001e-05, "loss": 174.848, "step": 7470 }, { "epoch": 0.030220146494988224, "grad_norm": 1959.9365234375, "learning_rate": 1.4960000000000002e-05, "loss": 215.0835, "step": 7480 }, { "epoch": 0.030260547760355853, "grad_norm": 1300.870361328125, "learning_rate": 1.4979999999999999e-05, "loss": 187.4061, "step": 7490 }, { "epoch": 0.030300949025723486, "grad_norm": 5733.919921875, "learning_rate": 1.5e-05, "loss": 203.0896, "step": 7500 }, { "epoch": 0.030341350291091116, "grad_norm": 6152.53173828125, "learning_rate": 1.502e-05, "loss": 292.1695, "step": 7510 }, { "epoch": 0.03038175155645875, "grad_norm": 1118.759033203125, "learning_rate": 1.5040000000000002e-05, "loss": 179.8201, "step": 7520 }, { "epoch": 0.03042215282182638, "grad_norm": 1003.1773681640625, "learning_rate": 1.5060000000000001e-05, "loss": 168.1782, "step": 7530 }, { "epoch": 0.030462554087194012, "grad_norm": 1641.7908935546875, "learning_rate": 1.508e-05, "loss": 234.2666, "step": 7540 }, { "epoch": 0.03050295535256164, "grad_norm": 6260.85791015625, "learning_rate": 1.51e-05, "loss": 151.5416, "step": 7550 }, { "epoch": 0.030543356617929274, "grad_norm": 1593.7576904296875, "learning_rate": 1.5120000000000001e-05, "loss": 159.2535, "step": 7560 }, { "epoch": 0.030583757883296904, "grad_norm": 1710.3023681640625, "learning_rate": 1.514e-05, "loss": 128.8493, "step": 7570 }, { "epoch": 0.030624159148664537, "grad_norm": 1299.740966796875, "learning_rate": 1.5160000000000002e-05, "loss": 291.4667, "step": 7580 }, { "epoch": 0.030664560414032167, "grad_norm": 772.3851318359375, "learning_rate": 1.518e-05, "loss": 254.7446, "step": 7590 }, { "epoch": 0.0307049616793998, "grad_norm": 4841.77294921875, "learning_rate": 1.52e-05, "loss": 261.962, "step": 7600 }, { "epoch": 0.03074536294476743, "grad_norm": 1306.54296875, "learning_rate": 1.5220000000000002e-05, "loss": 251.6979, "step": 7610 }, { "epoch": 0.030785764210135062, "grad_norm": 1031.349365234375, "learning_rate": 1.5240000000000001e-05, "loss": 180.2815, "step": 7620 }, { "epoch": 0.030826165475502692, "grad_norm": 1182.68994140625, "learning_rate": 1.5260000000000003e-05, "loss": 202.774, "step": 7630 }, { "epoch": 0.030866566740870325, "grad_norm": 524.015869140625, "learning_rate": 1.528e-05, "loss": 148.2353, "step": 7640 }, { "epoch": 0.030906968006237955, "grad_norm": 1932.3292236328125, "learning_rate": 1.53e-05, "loss": 169.3441, "step": 7650 }, { "epoch": 0.030947369271605588, "grad_norm": 840.219970703125, "learning_rate": 1.5320000000000002e-05, "loss": 129.2729, "step": 7660 }, { "epoch": 0.030987770536973217, "grad_norm": 1622.7508544921875, "learning_rate": 1.5340000000000002e-05, "loss": 160.2887, "step": 7670 }, { "epoch": 0.03102817180234085, "grad_norm": 2095.8623046875, "learning_rate": 1.536e-05, "loss": 245.4731, "step": 7680 }, { "epoch": 0.03106857306770848, "grad_norm": 3175.37646484375, "learning_rate": 1.538e-05, "loss": 208.5475, "step": 7690 }, { "epoch": 0.031108974333076113, "grad_norm": 924.9345092773438, "learning_rate": 1.54e-05, "loss": 170.3805, "step": 7700 }, { "epoch": 0.031149375598443742, "grad_norm": 1739.8531494140625, "learning_rate": 1.542e-05, "loss": 194.0813, "step": 7710 }, { "epoch": 0.031189776863811376, "grad_norm": 1644.390869140625, "learning_rate": 1.544e-05, "loss": 136.579, "step": 7720 }, { "epoch": 0.031230178129179005, "grad_norm": 789.648193359375, "learning_rate": 1.546e-05, "loss": 187.856, "step": 7730 }, { "epoch": 0.03127057939454664, "grad_norm": 1162.065185546875, "learning_rate": 1.548e-05, "loss": 114.7405, "step": 7740 }, { "epoch": 0.03131098065991427, "grad_norm": 1140.320556640625, "learning_rate": 1.55e-05, "loss": 181.2212, "step": 7750 }, { "epoch": 0.0313513819252819, "grad_norm": 2704.695556640625, "learning_rate": 1.552e-05, "loss": 241.535, "step": 7760 }, { "epoch": 0.03139178319064953, "grad_norm": 939.2614135742188, "learning_rate": 1.554e-05, "loss": 151.1776, "step": 7770 }, { "epoch": 0.03143218445601716, "grad_norm": 1043.3226318359375, "learning_rate": 1.556e-05, "loss": 200.2128, "step": 7780 }, { "epoch": 0.031472585721384796, "grad_norm": 1068.1412353515625, "learning_rate": 1.558e-05, "loss": 181.7895, "step": 7790 }, { "epoch": 0.03151298698675242, "grad_norm": 1479.6229248046875, "learning_rate": 1.56e-05, "loss": 196.9196, "step": 7800 }, { "epoch": 0.031553388252120056, "grad_norm": 701.58544921875, "learning_rate": 1.5620000000000003e-05, "loss": 137.6865, "step": 7810 }, { "epoch": 0.03159378951748769, "grad_norm": 1822.54345703125, "learning_rate": 1.5640000000000003e-05, "loss": 227.0767, "step": 7820 }, { "epoch": 0.03163419078285532, "grad_norm": 546.575439453125, "learning_rate": 1.566e-05, "loss": 146.3474, "step": 7830 }, { "epoch": 0.03167459204822295, "grad_norm": 1540.7342529296875, "learning_rate": 1.568e-05, "loss": 192.144, "step": 7840 }, { "epoch": 0.03171499331359058, "grad_norm": 1766.8616943359375, "learning_rate": 1.5700000000000002e-05, "loss": 194.4416, "step": 7850 }, { "epoch": 0.031755394578958214, "grad_norm": 1534.951416015625, "learning_rate": 1.5720000000000002e-05, "loss": 158.2836, "step": 7860 }, { "epoch": 0.03179579584432585, "grad_norm": 980.215576171875, "learning_rate": 1.5740000000000002e-05, "loss": 166.7142, "step": 7870 }, { "epoch": 0.03183619710969347, "grad_norm": 5774.56396484375, "learning_rate": 1.5759999999999998e-05, "loss": 305.0332, "step": 7880 }, { "epoch": 0.031876598375061106, "grad_norm": 1329.786865234375, "learning_rate": 1.578e-05, "loss": 137.7941, "step": 7890 }, { "epoch": 0.03191699964042874, "grad_norm": 1180.7379150390625, "learning_rate": 1.58e-05, "loss": 122.4153, "step": 7900 }, { "epoch": 0.03195740090579637, "grad_norm": 867.472900390625, "learning_rate": 1.582e-05, "loss": 210.3185, "step": 7910 }, { "epoch": 0.031997802171164, "grad_norm": 1057.9029541015625, "learning_rate": 1.584e-05, "loss": 209.8075, "step": 7920 }, { "epoch": 0.03203820343653163, "grad_norm": 1288.350341796875, "learning_rate": 1.586e-05, "loss": 198.4188, "step": 7930 }, { "epoch": 0.032078604701899265, "grad_norm": 0.0, "learning_rate": 1.588e-05, "loss": 145.5795, "step": 7940 }, { "epoch": 0.0321190059672669, "grad_norm": 983.6244506835938, "learning_rate": 1.59e-05, "loss": 227.0045, "step": 7950 }, { "epoch": 0.032159407232634524, "grad_norm": 2391.579345703125, "learning_rate": 1.592e-05, "loss": 261.5729, "step": 7960 }, { "epoch": 0.03219980849800216, "grad_norm": 2793.90234375, "learning_rate": 1.594e-05, "loss": 247.0734, "step": 7970 }, { "epoch": 0.03224020976336979, "grad_norm": 1532.1849365234375, "learning_rate": 1.596e-05, "loss": 199.9248, "step": 7980 }, { "epoch": 0.03228061102873742, "grad_norm": 1343.6947021484375, "learning_rate": 1.598e-05, "loss": 167.6554, "step": 7990 }, { "epoch": 0.03232101229410505, "grad_norm": 717.1405639648438, "learning_rate": 1.6000000000000003e-05, "loss": 218.2069, "step": 8000 }, { "epoch": 0.03236141355947268, "grad_norm": 2144.71337890625, "learning_rate": 1.6020000000000002e-05, "loss": 210.7316, "step": 8010 }, { "epoch": 0.032401814824840315, "grad_norm": 2510.060546875, "learning_rate": 1.604e-05, "loss": 300.955, "step": 8020 }, { "epoch": 0.03244221609020795, "grad_norm": 1254.329833984375, "learning_rate": 1.606e-05, "loss": 224.6631, "step": 8030 }, { "epoch": 0.032482617355575574, "grad_norm": 533.80517578125, "learning_rate": 1.6080000000000002e-05, "loss": 195.5154, "step": 8040 }, { "epoch": 0.03252301862094321, "grad_norm": 932.7647705078125, "learning_rate": 1.6100000000000002e-05, "loss": 173.2508, "step": 8050 }, { "epoch": 0.03256341988631084, "grad_norm": 1513.1566162109375, "learning_rate": 1.612e-05, "loss": 252.4677, "step": 8060 }, { "epoch": 0.03260382115167847, "grad_norm": 8973.974609375, "learning_rate": 1.6139999999999998e-05, "loss": 160.9031, "step": 8070 }, { "epoch": 0.0326442224170461, "grad_norm": 1346.521728515625, "learning_rate": 1.616e-05, "loss": 190.9977, "step": 8080 }, { "epoch": 0.03268462368241373, "grad_norm": 696.8346557617188, "learning_rate": 1.618e-05, "loss": 148.2339, "step": 8090 }, { "epoch": 0.032725024947781366, "grad_norm": 2406.046142578125, "learning_rate": 1.62e-05, "loss": 129.0067, "step": 8100 }, { "epoch": 0.032765426213149, "grad_norm": 1443.768310546875, "learning_rate": 1.622e-05, "loss": 206.6445, "step": 8110 }, { "epoch": 0.032805827478516625, "grad_norm": 2201.40087890625, "learning_rate": 1.624e-05, "loss": 188.9112, "step": 8120 }, { "epoch": 0.03284622874388426, "grad_norm": 699.2227783203125, "learning_rate": 1.626e-05, "loss": 193.334, "step": 8130 }, { "epoch": 0.03288663000925189, "grad_norm": 545.5186767578125, "learning_rate": 1.628e-05, "loss": 140.6468, "step": 8140 }, { "epoch": 0.032927031274619524, "grad_norm": 611.3945922851562, "learning_rate": 1.63e-05, "loss": 131.4079, "step": 8150 }, { "epoch": 0.03296743253998715, "grad_norm": 1427.6285400390625, "learning_rate": 1.6320000000000003e-05, "loss": 186.4774, "step": 8160 }, { "epoch": 0.03300783380535478, "grad_norm": 769.5264892578125, "learning_rate": 1.634e-05, "loss": 140.9033, "step": 8170 }, { "epoch": 0.033048235070722416, "grad_norm": 988.6732788085938, "learning_rate": 1.636e-05, "loss": 193.9264, "step": 8180 }, { "epoch": 0.03308863633609005, "grad_norm": 1023.501708984375, "learning_rate": 1.6380000000000002e-05, "loss": 173.0703, "step": 8190 }, { "epoch": 0.033129037601457675, "grad_norm": 1767.6136474609375, "learning_rate": 1.6400000000000002e-05, "loss": 235.5256, "step": 8200 }, { "epoch": 0.03316943886682531, "grad_norm": 1146.1134033203125, "learning_rate": 1.6420000000000002e-05, "loss": 174.4802, "step": 8210 }, { "epoch": 0.03320984013219294, "grad_norm": 723.232666015625, "learning_rate": 1.644e-05, "loss": 126.8088, "step": 8220 }, { "epoch": 0.033250241397560575, "grad_norm": 939.8434448242188, "learning_rate": 1.646e-05, "loss": 169.1382, "step": 8230 }, { "epoch": 0.0332906426629282, "grad_norm": 986.3238525390625, "learning_rate": 1.648e-05, "loss": 111.5822, "step": 8240 }, { "epoch": 0.033331043928295834, "grad_norm": 1208.2476806640625, "learning_rate": 1.65e-05, "loss": 191.2895, "step": 8250 }, { "epoch": 0.03337144519366347, "grad_norm": 827.257568359375, "learning_rate": 1.652e-05, "loss": 175.8226, "step": 8260 }, { "epoch": 0.0334118464590311, "grad_norm": 1213.089111328125, "learning_rate": 1.654e-05, "loss": 214.4393, "step": 8270 }, { "epoch": 0.033452247724398726, "grad_norm": 985.6383666992188, "learning_rate": 1.656e-05, "loss": 240.4976, "step": 8280 }, { "epoch": 0.03349264898976636, "grad_norm": 1016.156494140625, "learning_rate": 1.658e-05, "loss": 211.4707, "step": 8290 }, { "epoch": 0.03353305025513399, "grad_norm": 590.7755737304688, "learning_rate": 1.66e-05, "loss": 173.2719, "step": 8300 }, { "epoch": 0.033573451520501625, "grad_norm": 850.2467041015625, "learning_rate": 1.662e-05, "loss": 169.5091, "step": 8310 }, { "epoch": 0.03361385278586925, "grad_norm": 6493.62109375, "learning_rate": 1.664e-05, "loss": 206.8252, "step": 8320 }, { "epoch": 0.033654254051236884, "grad_norm": 721.207275390625, "learning_rate": 1.666e-05, "loss": 172.7568, "step": 8330 }, { "epoch": 0.03369465531660452, "grad_norm": 941.2609252929688, "learning_rate": 1.668e-05, "loss": 169.4456, "step": 8340 }, { "epoch": 0.03373505658197215, "grad_norm": 2292.378173828125, "learning_rate": 1.6700000000000003e-05, "loss": 217.5546, "step": 8350 }, { "epoch": 0.033775457847339777, "grad_norm": 752.173583984375, "learning_rate": 1.672e-05, "loss": 186.4127, "step": 8360 }, { "epoch": 0.03381585911270741, "grad_norm": 1046.0406494140625, "learning_rate": 1.674e-05, "loss": 156.5079, "step": 8370 }, { "epoch": 0.03385626037807504, "grad_norm": 926.9135131835938, "learning_rate": 1.6760000000000002e-05, "loss": 145.1286, "step": 8380 }, { "epoch": 0.033896661643442676, "grad_norm": 2465.574951171875, "learning_rate": 1.6780000000000002e-05, "loss": 230.1866, "step": 8390 }, { "epoch": 0.0339370629088103, "grad_norm": 1376.998779296875, "learning_rate": 1.6800000000000002e-05, "loss": 149.4985, "step": 8400 }, { "epoch": 0.033977464174177935, "grad_norm": 1141.4237060546875, "learning_rate": 1.6819999999999998e-05, "loss": 199.0205, "step": 8410 }, { "epoch": 0.03401786543954557, "grad_norm": 823.1524047851562, "learning_rate": 1.684e-05, "loss": 160.2142, "step": 8420 }, { "epoch": 0.0340582667049132, "grad_norm": 557.1250610351562, "learning_rate": 1.686e-05, "loss": 177.5694, "step": 8430 }, { "epoch": 0.03409866797028083, "grad_norm": 841.5819702148438, "learning_rate": 1.688e-05, "loss": 165.8316, "step": 8440 }, { "epoch": 0.03413906923564846, "grad_norm": 949.8247680664062, "learning_rate": 1.69e-05, "loss": 109.1915, "step": 8450 }, { "epoch": 0.03417947050101609, "grad_norm": 1175.8765869140625, "learning_rate": 1.692e-05, "loss": 186.9094, "step": 8460 }, { "epoch": 0.034219871766383726, "grad_norm": 601.2833862304688, "learning_rate": 1.694e-05, "loss": 173.7073, "step": 8470 }, { "epoch": 0.03426027303175135, "grad_norm": 1607.2193603515625, "learning_rate": 1.696e-05, "loss": 193.047, "step": 8480 }, { "epoch": 0.034300674297118985, "grad_norm": 794.0247802734375, "learning_rate": 1.698e-05, "loss": 230.634, "step": 8490 }, { "epoch": 0.03434107556248662, "grad_norm": 506.083740234375, "learning_rate": 1.7000000000000003e-05, "loss": 194.7215, "step": 8500 }, { "epoch": 0.03438147682785425, "grad_norm": 1354.748046875, "learning_rate": 1.702e-05, "loss": 237.6669, "step": 8510 }, { "epoch": 0.03442187809322188, "grad_norm": 903.7907104492188, "learning_rate": 1.704e-05, "loss": 176.5095, "step": 8520 }, { "epoch": 0.03446227935858951, "grad_norm": 1148.167236328125, "learning_rate": 1.706e-05, "loss": 178.5096, "step": 8530 }, { "epoch": 0.034502680623957144, "grad_norm": 2201.17919921875, "learning_rate": 1.7080000000000002e-05, "loss": 149.1055, "step": 8540 }, { "epoch": 0.03454308188932478, "grad_norm": 2470.737060546875, "learning_rate": 1.7100000000000002e-05, "loss": 239.2134, "step": 8550 }, { "epoch": 0.0345834831546924, "grad_norm": 1102.5830078125, "learning_rate": 1.712e-05, "loss": 116.4302, "step": 8560 }, { "epoch": 0.034623884420060036, "grad_norm": 1073.0867919921875, "learning_rate": 1.7140000000000002e-05, "loss": 208.7902, "step": 8570 }, { "epoch": 0.03466428568542767, "grad_norm": 1288.13818359375, "learning_rate": 1.7160000000000002e-05, "loss": 177.9251, "step": 8580 }, { "epoch": 0.0347046869507953, "grad_norm": 3063.13525390625, "learning_rate": 1.718e-05, "loss": 132.933, "step": 8590 }, { "epoch": 0.03474508821616293, "grad_norm": 817.6849365234375, "learning_rate": 1.7199999999999998e-05, "loss": 329.9039, "step": 8600 }, { "epoch": 0.03478548948153056, "grad_norm": 871.9563598632812, "learning_rate": 1.722e-05, "loss": 198.9892, "step": 8610 }, { "epoch": 0.034825890746898194, "grad_norm": 2624.832275390625, "learning_rate": 1.724e-05, "loss": 134.0866, "step": 8620 }, { "epoch": 0.03486629201226583, "grad_norm": 1550.62548828125, "learning_rate": 1.726e-05, "loss": 195.2643, "step": 8630 }, { "epoch": 0.034906693277633453, "grad_norm": 2074.622314453125, "learning_rate": 1.728e-05, "loss": 202.7179, "step": 8640 }, { "epoch": 0.03494709454300109, "grad_norm": 1240.3668212890625, "learning_rate": 1.73e-05, "loss": 164.9288, "step": 8650 }, { "epoch": 0.03498749580836872, "grad_norm": 1201.8397216796875, "learning_rate": 1.732e-05, "loss": 173.0605, "step": 8660 }, { "epoch": 0.03502789707373635, "grad_norm": 1204.3741455078125, "learning_rate": 1.734e-05, "loss": 161.6967, "step": 8670 }, { "epoch": 0.03506829833910398, "grad_norm": 1461.201904296875, "learning_rate": 1.736e-05, "loss": 91.053, "step": 8680 }, { "epoch": 0.03510869960447161, "grad_norm": 669.2667846679688, "learning_rate": 1.7380000000000003e-05, "loss": 125.9261, "step": 8690 }, { "epoch": 0.035149100869839245, "grad_norm": 902.3343505859375, "learning_rate": 1.74e-05, "loss": 250.4465, "step": 8700 }, { "epoch": 0.03518950213520688, "grad_norm": 2767.30224609375, "learning_rate": 1.742e-05, "loss": 194.7519, "step": 8710 }, { "epoch": 0.035229903400574504, "grad_norm": 778.5036010742188, "learning_rate": 1.7440000000000002e-05, "loss": 205.9209, "step": 8720 }, { "epoch": 0.03527030466594214, "grad_norm": 2534.48681640625, "learning_rate": 1.7460000000000002e-05, "loss": 201.9174, "step": 8730 }, { "epoch": 0.03531070593130977, "grad_norm": 3830.944580078125, "learning_rate": 1.7480000000000002e-05, "loss": 249.3413, "step": 8740 }, { "epoch": 0.0353511071966774, "grad_norm": 1058.1517333984375, "learning_rate": 1.75e-05, "loss": 156.2294, "step": 8750 }, { "epoch": 0.03539150846204503, "grad_norm": 1754.2557373046875, "learning_rate": 1.752e-05, "loss": 169.7632, "step": 8760 }, { "epoch": 0.03543190972741266, "grad_norm": 631.4237060546875, "learning_rate": 1.754e-05, "loss": 143.3636, "step": 8770 }, { "epoch": 0.035472310992780295, "grad_norm": 1334.2506103515625, "learning_rate": 1.756e-05, "loss": 172.2922, "step": 8780 }, { "epoch": 0.03551271225814793, "grad_norm": 1203.375, "learning_rate": 1.758e-05, "loss": 174.6774, "step": 8790 }, { "epoch": 0.035553113523515555, "grad_norm": 1072.609375, "learning_rate": 1.76e-05, "loss": 177.6043, "step": 8800 }, { "epoch": 0.03559351478888319, "grad_norm": 1734.830810546875, "learning_rate": 1.762e-05, "loss": 213.749, "step": 8810 }, { "epoch": 0.03563391605425082, "grad_norm": 1706.71875, "learning_rate": 1.764e-05, "loss": 141.3769, "step": 8820 }, { "epoch": 0.035674317319618454, "grad_norm": 823.6603393554688, "learning_rate": 1.766e-05, "loss": 211.0458, "step": 8830 }, { "epoch": 0.03571471858498608, "grad_norm": 1883.2462158203125, "learning_rate": 1.7680000000000004e-05, "loss": 147.5754, "step": 8840 }, { "epoch": 0.03575511985035371, "grad_norm": 1167.08935546875, "learning_rate": 1.77e-05, "loss": 158.7962, "step": 8850 }, { "epoch": 0.035795521115721346, "grad_norm": 653.7176513671875, "learning_rate": 1.772e-05, "loss": 162.9139, "step": 8860 }, { "epoch": 0.03583592238108898, "grad_norm": 1610.84912109375, "learning_rate": 1.774e-05, "loss": 147.6589, "step": 8870 }, { "epoch": 0.035876323646456605, "grad_norm": 764.0466918945312, "learning_rate": 1.7760000000000003e-05, "loss": 188.6739, "step": 8880 }, { "epoch": 0.03591672491182424, "grad_norm": 1810.3558349609375, "learning_rate": 1.7780000000000003e-05, "loss": 205.71, "step": 8890 }, { "epoch": 0.03595712617719187, "grad_norm": 968.5712280273438, "learning_rate": 1.78e-05, "loss": 164.1197, "step": 8900 }, { "epoch": 0.035997527442559504, "grad_norm": 2819.0068359375, "learning_rate": 1.7820000000000002e-05, "loss": 162.7462, "step": 8910 }, { "epoch": 0.03603792870792713, "grad_norm": 2304.89306640625, "learning_rate": 1.7840000000000002e-05, "loss": 166.9301, "step": 8920 }, { "epoch": 0.036078329973294763, "grad_norm": 1451.4320068359375, "learning_rate": 1.7860000000000002e-05, "loss": 199.7026, "step": 8930 }, { "epoch": 0.0361187312386624, "grad_norm": 1088.584228515625, "learning_rate": 1.7879999999999998e-05, "loss": 122.1127, "step": 8940 }, { "epoch": 0.03615913250403003, "grad_norm": 1242.6817626953125, "learning_rate": 1.79e-05, "loss": 205.5091, "step": 8950 }, { "epoch": 0.036199533769397656, "grad_norm": 3576.470458984375, "learning_rate": 1.792e-05, "loss": 222.864, "step": 8960 }, { "epoch": 0.03623993503476529, "grad_norm": 7665.30419921875, "learning_rate": 1.794e-05, "loss": 177.141, "step": 8970 }, { "epoch": 0.03628033630013292, "grad_norm": 874.79052734375, "learning_rate": 1.796e-05, "loss": 151.6607, "step": 8980 }, { "epoch": 0.036320737565500555, "grad_norm": 2215.158203125, "learning_rate": 1.798e-05, "loss": 187.3096, "step": 8990 }, { "epoch": 0.03636113883086818, "grad_norm": 2184.711669921875, "learning_rate": 1.8e-05, "loss": 192.0445, "step": 9000 }, { "epoch": 0.036401540096235814, "grad_norm": 1615.7005615234375, "learning_rate": 1.802e-05, "loss": 200.9116, "step": 9010 }, { "epoch": 0.03644194136160345, "grad_norm": 1396.12109375, "learning_rate": 1.804e-05, "loss": 116.5097, "step": 9020 }, { "epoch": 0.03648234262697108, "grad_norm": 2052.5712890625, "learning_rate": 1.8060000000000003e-05, "loss": 91.6937, "step": 9030 }, { "epoch": 0.036522743892338706, "grad_norm": 1198.88232421875, "learning_rate": 1.808e-05, "loss": 162.9016, "step": 9040 }, { "epoch": 0.03656314515770634, "grad_norm": 1004.2339477539062, "learning_rate": 1.81e-05, "loss": 165.1726, "step": 9050 }, { "epoch": 0.03660354642307397, "grad_norm": 2106.733642578125, "learning_rate": 1.812e-05, "loss": 293.9893, "step": 9060 }, { "epoch": 0.036643947688441605, "grad_norm": 1582.510986328125, "learning_rate": 1.8140000000000003e-05, "loss": 190.1606, "step": 9070 }, { "epoch": 0.03668434895380923, "grad_norm": 905.4036254882812, "learning_rate": 1.8160000000000002e-05, "loss": 141.9137, "step": 9080 }, { "epoch": 0.036724750219176865, "grad_norm": 411.4434509277344, "learning_rate": 1.818e-05, "loss": 135.4074, "step": 9090 }, { "epoch": 0.0367651514845445, "grad_norm": 2590.279541015625, "learning_rate": 1.8200000000000002e-05, "loss": 237.3702, "step": 9100 }, { "epoch": 0.03680555274991213, "grad_norm": 966.46435546875, "learning_rate": 1.8220000000000002e-05, "loss": 99.4366, "step": 9110 }, { "epoch": 0.03684595401527976, "grad_norm": 1426.09375, "learning_rate": 1.824e-05, "loss": 128.6301, "step": 9120 }, { "epoch": 0.03688635528064739, "grad_norm": 661.1119384765625, "learning_rate": 1.826e-05, "loss": 119.7455, "step": 9130 }, { "epoch": 0.03692675654601502, "grad_norm": 1419.5643310546875, "learning_rate": 1.828e-05, "loss": 157.1782, "step": 9140 }, { "epoch": 0.03696715781138265, "grad_norm": 875.4746704101562, "learning_rate": 1.83e-05, "loss": 167.5964, "step": 9150 }, { "epoch": 0.03700755907675028, "grad_norm": 914.225341796875, "learning_rate": 1.832e-05, "loss": 229.4271, "step": 9160 }, { "epoch": 0.037047960342117915, "grad_norm": 1139.4866943359375, "learning_rate": 1.834e-05, "loss": 176.9276, "step": 9170 }, { "epoch": 0.03708836160748555, "grad_norm": 1400.506103515625, "learning_rate": 1.8360000000000004e-05, "loss": 137.9502, "step": 9180 }, { "epoch": 0.037128762872853174, "grad_norm": 1892.8255615234375, "learning_rate": 1.838e-05, "loss": 177.446, "step": 9190 }, { "epoch": 0.03716916413822081, "grad_norm": 6680.482421875, "learning_rate": 1.84e-05, "loss": 177.2692, "step": 9200 }, { "epoch": 0.03720956540358844, "grad_norm": 5937.34521484375, "learning_rate": 1.842e-05, "loss": 209.457, "step": 9210 }, { "epoch": 0.037249966668956074, "grad_norm": 724.9736938476562, "learning_rate": 1.8440000000000003e-05, "loss": 144.7552, "step": 9220 }, { "epoch": 0.0372903679343237, "grad_norm": 1280.418212890625, "learning_rate": 1.846e-05, "loss": 177.4063, "step": 9230 }, { "epoch": 0.03733076919969133, "grad_norm": 4052.574462890625, "learning_rate": 1.848e-05, "loss": 192.6805, "step": 9240 }, { "epoch": 0.037371170465058966, "grad_norm": 1359.7706298828125, "learning_rate": 1.85e-05, "loss": 187.3014, "step": 9250 }, { "epoch": 0.0374115717304266, "grad_norm": 3640.478515625, "learning_rate": 1.8520000000000002e-05, "loss": 134.8755, "step": 9260 }, { "epoch": 0.037451972995794225, "grad_norm": 495.39373779296875, "learning_rate": 1.8540000000000002e-05, "loss": 199.3222, "step": 9270 }, { "epoch": 0.03749237426116186, "grad_norm": 614.9301147460938, "learning_rate": 1.856e-05, "loss": 172.9295, "step": 9280 }, { "epoch": 0.03753277552652949, "grad_norm": 1786.66552734375, "learning_rate": 1.858e-05, "loss": 170.4556, "step": 9290 }, { "epoch": 0.037573176791897124, "grad_norm": 1209.83740234375, "learning_rate": 1.86e-05, "loss": 179.2815, "step": 9300 }, { "epoch": 0.03761357805726475, "grad_norm": 935.8829345703125, "learning_rate": 1.862e-05, "loss": 153.5103, "step": 9310 }, { "epoch": 0.03765397932263238, "grad_norm": 1041.441162109375, "learning_rate": 1.864e-05, "loss": 220.6227, "step": 9320 }, { "epoch": 0.037694380588000016, "grad_norm": 1657.690673828125, "learning_rate": 1.866e-05, "loss": 238.1361, "step": 9330 }, { "epoch": 0.03773478185336765, "grad_norm": 1077.7347412109375, "learning_rate": 1.868e-05, "loss": 156.283, "step": 9340 }, { "epoch": 0.037775183118735275, "grad_norm": 834.681396484375, "learning_rate": 1.87e-05, "loss": 94.2577, "step": 9350 }, { "epoch": 0.03781558438410291, "grad_norm": 1160.058349609375, "learning_rate": 1.872e-05, "loss": 231.7988, "step": 9360 }, { "epoch": 0.03785598564947054, "grad_norm": 997.937255859375, "learning_rate": 1.8740000000000004e-05, "loss": 134.2654, "step": 9370 }, { "epoch": 0.037896386914838175, "grad_norm": 882.4503784179688, "learning_rate": 1.876e-05, "loss": 193.2273, "step": 9380 }, { "epoch": 0.0379367881802058, "grad_norm": 3181.698974609375, "learning_rate": 1.878e-05, "loss": 132.2644, "step": 9390 }, { "epoch": 0.037977189445573434, "grad_norm": 725.4539184570312, "learning_rate": 1.88e-05, "loss": 153.5085, "step": 9400 }, { "epoch": 0.03801759071094107, "grad_norm": 1430.8912353515625, "learning_rate": 1.8820000000000003e-05, "loss": 196.1231, "step": 9410 }, { "epoch": 0.0380579919763087, "grad_norm": 1368.989501953125, "learning_rate": 1.8840000000000003e-05, "loss": 139.1376, "step": 9420 }, { "epoch": 0.038098393241676326, "grad_norm": 838.9828491210938, "learning_rate": 1.886e-05, "loss": 131.1992, "step": 9430 }, { "epoch": 0.03813879450704396, "grad_norm": 3527.458251953125, "learning_rate": 1.888e-05, "loss": 187.7699, "step": 9440 }, { "epoch": 0.03817919577241159, "grad_norm": 2998.174072265625, "learning_rate": 1.8900000000000002e-05, "loss": 175.4584, "step": 9450 }, { "epoch": 0.038219597037779225, "grad_norm": 1297.8941650390625, "learning_rate": 1.8920000000000002e-05, "loss": 210.6279, "step": 9460 }, { "epoch": 0.03825999830314685, "grad_norm": 3371.50244140625, "learning_rate": 1.894e-05, "loss": 217.5112, "step": 9470 }, { "epoch": 0.038300399568514484, "grad_norm": 1407.463623046875, "learning_rate": 1.896e-05, "loss": 192.8969, "step": 9480 }, { "epoch": 0.03834080083388212, "grad_norm": 1015.240966796875, "learning_rate": 1.898e-05, "loss": 173.7605, "step": 9490 }, { "epoch": 0.03838120209924975, "grad_norm": 731.1430053710938, "learning_rate": 1.9e-05, "loss": 191.5719, "step": 9500 }, { "epoch": 0.03842160336461738, "grad_norm": 849.3814086914062, "learning_rate": 1.902e-05, "loss": 91.8381, "step": 9510 }, { "epoch": 0.03846200462998501, "grad_norm": 1715.9556884765625, "learning_rate": 1.904e-05, "loss": 175.3195, "step": 9520 }, { "epoch": 0.03850240589535264, "grad_norm": 7559.9091796875, "learning_rate": 1.906e-05, "loss": 175.7841, "step": 9530 }, { "epoch": 0.038542807160720276, "grad_norm": 689.8999633789062, "learning_rate": 1.908e-05, "loss": 116.3339, "step": 9540 }, { "epoch": 0.0385832084260879, "grad_norm": 873.10009765625, "learning_rate": 1.91e-05, "loss": 140.4007, "step": 9550 }, { "epoch": 0.038623609691455535, "grad_norm": 2358.655517578125, "learning_rate": 1.9120000000000003e-05, "loss": 168.361, "step": 9560 }, { "epoch": 0.03866401095682317, "grad_norm": 5055.4892578125, "learning_rate": 1.914e-05, "loss": 174.0588, "step": 9570 }, { "epoch": 0.0387044122221908, "grad_norm": 897.6202392578125, "learning_rate": 1.916e-05, "loss": 153.3665, "step": 9580 }, { "epoch": 0.03874481348755843, "grad_norm": 729.1337280273438, "learning_rate": 1.918e-05, "loss": 131.9806, "step": 9590 }, { "epoch": 0.03878521475292606, "grad_norm": 795.0892944335938, "learning_rate": 1.9200000000000003e-05, "loss": 166.5195, "step": 9600 }, { "epoch": 0.03882561601829369, "grad_norm": 644.804443359375, "learning_rate": 1.9220000000000002e-05, "loss": 129.34, "step": 9610 }, { "epoch": 0.038866017283661326, "grad_norm": 1547.818359375, "learning_rate": 1.924e-05, "loss": 165.855, "step": 9620 }, { "epoch": 0.03890641854902895, "grad_norm": 0.0, "learning_rate": 1.9260000000000002e-05, "loss": 181.7364, "step": 9630 }, { "epoch": 0.038946819814396585, "grad_norm": 2220.852294921875, "learning_rate": 1.9280000000000002e-05, "loss": 154.0527, "step": 9640 }, { "epoch": 0.03898722107976422, "grad_norm": 1048.51123046875, "learning_rate": 1.93e-05, "loss": 160.1465, "step": 9650 }, { "epoch": 0.03902762234513185, "grad_norm": 0.0, "learning_rate": 1.932e-05, "loss": 149.3159, "step": 9660 }, { "epoch": 0.03906802361049948, "grad_norm": 1180.42919921875, "learning_rate": 1.934e-05, "loss": 145.5787, "step": 9670 }, { "epoch": 0.03910842487586711, "grad_norm": 1122.5113525390625, "learning_rate": 1.936e-05, "loss": 124.5202, "step": 9680 }, { "epoch": 0.039148826141234744, "grad_norm": 1447.1502685546875, "learning_rate": 1.938e-05, "loss": 178.7178, "step": 9690 }, { "epoch": 0.03918922740660238, "grad_norm": 1735.78125, "learning_rate": 1.94e-05, "loss": 184.0263, "step": 9700 }, { "epoch": 0.03922962867197, "grad_norm": 660.9119873046875, "learning_rate": 1.942e-05, "loss": 127.7493, "step": 9710 }, { "epoch": 0.039270029937337636, "grad_norm": 1763.235107421875, "learning_rate": 1.944e-05, "loss": 176.0283, "step": 9720 }, { "epoch": 0.03931043120270527, "grad_norm": 2768.470947265625, "learning_rate": 1.946e-05, "loss": 107.1719, "step": 9730 }, { "epoch": 0.0393508324680729, "grad_norm": 600.2669067382812, "learning_rate": 1.948e-05, "loss": 162.8209, "step": 9740 }, { "epoch": 0.03939123373344053, "grad_norm": 3563.5947265625, "learning_rate": 1.9500000000000003e-05, "loss": 197.7512, "step": 9750 }, { "epoch": 0.03943163499880816, "grad_norm": 2130.047607421875, "learning_rate": 1.9520000000000003e-05, "loss": 160.2494, "step": 9760 }, { "epoch": 0.039472036264175794, "grad_norm": 2377.900146484375, "learning_rate": 1.954e-05, "loss": 171.3171, "step": 9770 }, { "epoch": 0.03951243752954343, "grad_norm": 1052.0291748046875, "learning_rate": 1.956e-05, "loss": 113.5263, "step": 9780 }, { "epoch": 0.039552838794911054, "grad_norm": 2078.976318359375, "learning_rate": 1.9580000000000002e-05, "loss": 191.9397, "step": 9790 }, { "epoch": 0.03959324006027869, "grad_norm": 773.3158569335938, "learning_rate": 1.9600000000000002e-05, "loss": 126.11, "step": 9800 }, { "epoch": 0.03963364132564632, "grad_norm": 1939.7193603515625, "learning_rate": 1.9620000000000002e-05, "loss": 210.1035, "step": 9810 }, { "epoch": 0.03967404259101395, "grad_norm": 531.8984985351562, "learning_rate": 1.9640000000000002e-05, "loss": 161.9592, "step": 9820 }, { "epoch": 0.03971444385638158, "grad_norm": 441.3695983886719, "learning_rate": 1.966e-05, "loss": 131.7898, "step": 9830 }, { "epoch": 0.03975484512174921, "grad_norm": 2530.7509765625, "learning_rate": 1.968e-05, "loss": 190.4342, "step": 9840 }, { "epoch": 0.039795246387116845, "grad_norm": 1077.1409912109375, "learning_rate": 1.97e-05, "loss": 141.0169, "step": 9850 }, { "epoch": 0.03983564765248448, "grad_norm": 1837.5992431640625, "learning_rate": 1.972e-05, "loss": 197.5706, "step": 9860 }, { "epoch": 0.039876048917852104, "grad_norm": 1183.7669677734375, "learning_rate": 1.974e-05, "loss": 186.1175, "step": 9870 }, { "epoch": 0.03991645018321974, "grad_norm": 584.4487915039062, "learning_rate": 1.976e-05, "loss": 128.3029, "step": 9880 }, { "epoch": 0.03995685144858737, "grad_norm": 731.9237060546875, "learning_rate": 1.978e-05, "loss": 97.8851, "step": 9890 }, { "epoch": 0.039997252713955, "grad_norm": 734.3873901367188, "learning_rate": 1.9800000000000004e-05, "loss": 170.2385, "step": 9900 }, { "epoch": 0.04003765397932263, "grad_norm": 530.0155029296875, "learning_rate": 1.982e-05, "loss": 140.1505, "step": 9910 }, { "epoch": 0.04007805524469026, "grad_norm": 1368.81298828125, "learning_rate": 1.984e-05, "loss": 188.7803, "step": 9920 }, { "epoch": 0.040118456510057895, "grad_norm": 2284.725341796875, "learning_rate": 1.986e-05, "loss": 213.4412, "step": 9930 }, { "epoch": 0.04015885777542553, "grad_norm": 953.0332641601562, "learning_rate": 1.9880000000000003e-05, "loss": 81.9998, "step": 9940 }, { "epoch": 0.040199259040793155, "grad_norm": 2036.2686767578125, "learning_rate": 1.9900000000000003e-05, "loss": 124.487, "step": 9950 }, { "epoch": 0.04023966030616079, "grad_norm": 1137.9842529296875, "learning_rate": 1.992e-05, "loss": 166.7126, "step": 9960 }, { "epoch": 0.04028006157152842, "grad_norm": 906.6007080078125, "learning_rate": 1.994e-05, "loss": 188.0849, "step": 9970 }, { "epoch": 0.040320462836896054, "grad_norm": 1181.5977783203125, "learning_rate": 1.9960000000000002e-05, "loss": 168.7636, "step": 9980 }, { "epoch": 0.04036086410226368, "grad_norm": 1022.9138793945312, "learning_rate": 1.9980000000000002e-05, "loss": 168.0031, "step": 9990 }, { "epoch": 0.04040126536763131, "grad_norm": 717.4224243164062, "learning_rate": 2e-05, "loss": 122.0701, "step": 10000 }, { "epoch": 0.040441666632998946, "grad_norm": 595.6052856445312, "learning_rate": 2.002e-05, "loss": 143.9377, "step": 10010 }, { "epoch": 0.04048206789836658, "grad_norm": 1366.42431640625, "learning_rate": 2.004e-05, "loss": 167.5448, "step": 10020 }, { "epoch": 0.040522469163734205, "grad_norm": 1101.7801513671875, "learning_rate": 2.006e-05, "loss": 192.1521, "step": 10030 }, { "epoch": 0.04056287042910184, "grad_norm": 1699.7630615234375, "learning_rate": 2.008e-05, "loss": 154.8807, "step": 10040 }, { "epoch": 0.04060327169446947, "grad_norm": 1256.3902587890625, "learning_rate": 2.01e-05, "loss": 134.1945, "step": 10050 }, { "epoch": 0.040643672959837104, "grad_norm": 0.0, "learning_rate": 2.012e-05, "loss": 131.3899, "step": 10060 }, { "epoch": 0.04068407422520473, "grad_norm": 1447.4774169921875, "learning_rate": 2.014e-05, "loss": 159.1337, "step": 10070 }, { "epoch": 0.040724475490572364, "grad_norm": 4435.76904296875, "learning_rate": 2.016e-05, "loss": 198.5379, "step": 10080 }, { "epoch": 0.04076487675594, "grad_norm": 852.9769287109375, "learning_rate": 2.0180000000000003e-05, "loss": 160.13, "step": 10090 }, { "epoch": 0.04080527802130763, "grad_norm": 1151.4747314453125, "learning_rate": 2.0200000000000003e-05, "loss": 162.3118, "step": 10100 }, { "epoch": 0.040845679286675256, "grad_norm": 1879.0950927734375, "learning_rate": 2.022e-05, "loss": 172.1667, "step": 10110 }, { "epoch": 0.04088608055204289, "grad_norm": 953.0318603515625, "learning_rate": 2.024e-05, "loss": 152.1001, "step": 10120 }, { "epoch": 0.04092648181741052, "grad_norm": 834.5413208007812, "learning_rate": 2.0260000000000003e-05, "loss": 166.7469, "step": 10130 }, { "epoch": 0.040966883082778155, "grad_norm": 1954.07080078125, "learning_rate": 2.0280000000000002e-05, "loss": 170.3608, "step": 10140 }, { "epoch": 0.04100728434814578, "grad_norm": 1222.4248046875, "learning_rate": 2.0300000000000002e-05, "loss": 168.8605, "step": 10150 }, { "epoch": 0.041047685613513414, "grad_norm": 2559.77099609375, "learning_rate": 2.032e-05, "loss": 169.7865, "step": 10160 }, { "epoch": 0.04108808687888105, "grad_norm": 2988.0048828125, "learning_rate": 2.0340000000000002e-05, "loss": 175.7436, "step": 10170 }, { "epoch": 0.04112848814424868, "grad_norm": 647.4199829101562, "learning_rate": 2.036e-05, "loss": 161.4738, "step": 10180 }, { "epoch": 0.041168889409616306, "grad_norm": 1833.9415283203125, "learning_rate": 2.038e-05, "loss": 198.7381, "step": 10190 }, { "epoch": 0.04120929067498394, "grad_norm": 734.6459350585938, "learning_rate": 2.04e-05, "loss": 145.9311, "step": 10200 }, { "epoch": 0.04124969194035157, "grad_norm": 1672.8916015625, "learning_rate": 2.042e-05, "loss": 149.8653, "step": 10210 }, { "epoch": 0.041290093205719205, "grad_norm": 1029.32421875, "learning_rate": 2.044e-05, "loss": 178.9316, "step": 10220 }, { "epoch": 0.04133049447108683, "grad_norm": 714.8513793945312, "learning_rate": 2.046e-05, "loss": 112.8458, "step": 10230 }, { "epoch": 0.041370895736454465, "grad_norm": 1445.0704345703125, "learning_rate": 2.048e-05, "loss": 164.2769, "step": 10240 }, { "epoch": 0.0414112970018221, "grad_norm": 2015.292236328125, "learning_rate": 2.05e-05, "loss": 258.4405, "step": 10250 }, { "epoch": 0.04145169826718973, "grad_norm": 820.0475463867188, "learning_rate": 2.052e-05, "loss": 139.5321, "step": 10260 }, { "epoch": 0.04149209953255736, "grad_norm": 886.0709838867188, "learning_rate": 2.054e-05, "loss": 148.4284, "step": 10270 }, { "epoch": 0.04153250079792499, "grad_norm": 1072.3577880859375, "learning_rate": 2.0560000000000003e-05, "loss": 192.8738, "step": 10280 }, { "epoch": 0.04157290206329262, "grad_norm": 747.4786987304688, "learning_rate": 2.0580000000000003e-05, "loss": 101.708, "step": 10290 }, { "epoch": 0.041613303328660256, "grad_norm": 5367.84521484375, "learning_rate": 2.06e-05, "loss": 194.0073, "step": 10300 }, { "epoch": 0.04165370459402788, "grad_norm": 1179.1737060546875, "learning_rate": 2.062e-05, "loss": 121.6645, "step": 10310 }, { "epoch": 0.041694105859395515, "grad_norm": 779.1863403320312, "learning_rate": 2.0640000000000002e-05, "loss": 156.9918, "step": 10320 }, { "epoch": 0.04173450712476315, "grad_norm": 609.2747192382812, "learning_rate": 2.0660000000000002e-05, "loss": 121.481, "step": 10330 }, { "epoch": 0.04177490839013078, "grad_norm": 856.9229736328125, "learning_rate": 2.0680000000000002e-05, "loss": 126.45, "step": 10340 }, { "epoch": 0.04181530965549841, "grad_norm": 3409.32568359375, "learning_rate": 2.07e-05, "loss": 135.8212, "step": 10350 }, { "epoch": 0.04185571092086604, "grad_norm": 2779.28369140625, "learning_rate": 2.072e-05, "loss": 180.5799, "step": 10360 }, { "epoch": 0.041896112186233674, "grad_norm": 2200.98583984375, "learning_rate": 2.074e-05, "loss": 228.0418, "step": 10370 }, { "epoch": 0.04193651345160131, "grad_norm": 864.7232055664062, "learning_rate": 2.076e-05, "loss": 200.8099, "step": 10380 }, { "epoch": 0.04197691471696893, "grad_norm": 373.90771484375, "learning_rate": 2.078e-05, "loss": 123.1325, "step": 10390 }, { "epoch": 0.042017315982336566, "grad_norm": 865.7789916992188, "learning_rate": 2.08e-05, "loss": 177.7623, "step": 10400 }, { "epoch": 0.0420577172477042, "grad_norm": 1141.6817626953125, "learning_rate": 2.082e-05, "loss": 150.2677, "step": 10410 }, { "epoch": 0.04209811851307183, "grad_norm": 720.0677490234375, "learning_rate": 2.084e-05, "loss": 146.5872, "step": 10420 }, { "epoch": 0.04213851977843946, "grad_norm": 1222.44775390625, "learning_rate": 2.086e-05, "loss": 162.8006, "step": 10430 }, { "epoch": 0.04217892104380709, "grad_norm": 909.986083984375, "learning_rate": 2.0880000000000003e-05, "loss": 193.938, "step": 10440 }, { "epoch": 0.042219322309174724, "grad_norm": 1559.7752685546875, "learning_rate": 2.09e-05, "loss": 167.0518, "step": 10450 }, { "epoch": 0.04225972357454236, "grad_norm": 2482.876953125, "learning_rate": 2.092e-05, "loss": 138.0267, "step": 10460 }, { "epoch": 0.04230012483990998, "grad_norm": 1091.695556640625, "learning_rate": 2.0940000000000003e-05, "loss": 103.3883, "step": 10470 }, { "epoch": 0.042340526105277616, "grad_norm": 806.2134399414062, "learning_rate": 2.0960000000000003e-05, "loss": 124.8749, "step": 10480 }, { "epoch": 0.04238092737064525, "grad_norm": 1276.008544921875, "learning_rate": 2.098e-05, "loss": 163.3479, "step": 10490 }, { "epoch": 0.04242132863601288, "grad_norm": 4489.3603515625, "learning_rate": 2.1e-05, "loss": 171.6303, "step": 10500 }, { "epoch": 0.04246172990138051, "grad_norm": 838.2408447265625, "learning_rate": 2.1020000000000002e-05, "loss": 150.1332, "step": 10510 }, { "epoch": 0.04250213116674814, "grad_norm": 1553.7894287109375, "learning_rate": 2.1040000000000002e-05, "loss": 161.144, "step": 10520 }, { "epoch": 0.042542532432115775, "grad_norm": 1126.568115234375, "learning_rate": 2.106e-05, "loss": 152.1566, "step": 10530 }, { "epoch": 0.04258293369748341, "grad_norm": 832.1701049804688, "learning_rate": 2.1079999999999998e-05, "loss": 139.9062, "step": 10540 }, { "epoch": 0.042623334962851034, "grad_norm": 519.9462890625, "learning_rate": 2.11e-05, "loss": 108.9907, "step": 10550 }, { "epoch": 0.04266373622821867, "grad_norm": 2303.817626953125, "learning_rate": 2.112e-05, "loss": 150.1262, "step": 10560 }, { "epoch": 0.0427041374935863, "grad_norm": 1118.6610107421875, "learning_rate": 2.114e-05, "loss": 127.0093, "step": 10570 }, { "epoch": 0.04274453875895393, "grad_norm": 1355.4498291015625, "learning_rate": 2.116e-05, "loss": 165.2866, "step": 10580 }, { "epoch": 0.04278494002432156, "grad_norm": 2492.5751953125, "learning_rate": 2.118e-05, "loss": 167.1242, "step": 10590 }, { "epoch": 0.04282534128968919, "grad_norm": 3182.501953125, "learning_rate": 2.12e-05, "loss": 146.6266, "step": 10600 }, { "epoch": 0.042865742555056825, "grad_norm": 1109.6849365234375, "learning_rate": 2.122e-05, "loss": 195.9611, "step": 10610 }, { "epoch": 0.04290614382042446, "grad_norm": 2110.990234375, "learning_rate": 2.124e-05, "loss": 199.2886, "step": 10620 }, { "epoch": 0.042946545085792084, "grad_norm": 1315.0323486328125, "learning_rate": 2.1260000000000003e-05, "loss": 136.6857, "step": 10630 }, { "epoch": 0.04298694635115972, "grad_norm": 1708.447998046875, "learning_rate": 2.128e-05, "loss": 173.377, "step": 10640 }, { "epoch": 0.04302734761652735, "grad_norm": 1813.0264892578125, "learning_rate": 2.13e-05, "loss": 166.6691, "step": 10650 }, { "epoch": 0.043067748881894984, "grad_norm": 1098.9163818359375, "learning_rate": 2.1320000000000003e-05, "loss": 187.8283, "step": 10660 }, { "epoch": 0.04310815014726261, "grad_norm": 608.4671630859375, "learning_rate": 2.1340000000000002e-05, "loss": 141.6912, "step": 10670 }, { "epoch": 0.04314855141263024, "grad_norm": 830.4765625, "learning_rate": 2.1360000000000002e-05, "loss": 142.6794, "step": 10680 }, { "epoch": 0.043188952677997876, "grad_norm": 600.3438110351562, "learning_rate": 2.138e-05, "loss": 130.3432, "step": 10690 }, { "epoch": 0.04322935394336551, "grad_norm": 1370.6168212890625, "learning_rate": 2.1400000000000002e-05, "loss": 209.9451, "step": 10700 }, { "epoch": 0.043269755208733135, "grad_norm": 622.1041870117188, "learning_rate": 2.142e-05, "loss": 169.206, "step": 10710 }, { "epoch": 0.04331015647410077, "grad_norm": 2445.648681640625, "learning_rate": 2.144e-05, "loss": 200.1026, "step": 10720 }, { "epoch": 0.0433505577394684, "grad_norm": 1902.2943115234375, "learning_rate": 2.146e-05, "loss": 187.5802, "step": 10730 }, { "epoch": 0.043390959004836034, "grad_norm": 1008.3157348632812, "learning_rate": 2.148e-05, "loss": 143.9581, "step": 10740 }, { "epoch": 0.04343136027020366, "grad_norm": 594.2481079101562, "learning_rate": 2.15e-05, "loss": 186.3464, "step": 10750 }, { "epoch": 0.04347176153557129, "grad_norm": 13106.4541015625, "learning_rate": 2.152e-05, "loss": 218.3369, "step": 10760 }, { "epoch": 0.043512162800938926, "grad_norm": 2128.98291015625, "learning_rate": 2.154e-05, "loss": 170.2695, "step": 10770 }, { "epoch": 0.04355256406630656, "grad_norm": 1214.33642578125, "learning_rate": 2.1560000000000004e-05, "loss": 147.123, "step": 10780 }, { "epoch": 0.043592965331674186, "grad_norm": 1065.6256103515625, "learning_rate": 2.158e-05, "loss": 168.7387, "step": 10790 }, { "epoch": 0.04363336659704182, "grad_norm": 1667.8955078125, "learning_rate": 2.16e-05, "loss": 152.4974, "step": 10800 }, { "epoch": 0.04367376786240945, "grad_norm": 1360.904052734375, "learning_rate": 2.162e-05, "loss": 254.4096, "step": 10810 }, { "epoch": 0.043714169127777085, "grad_norm": 1592.132080078125, "learning_rate": 2.1640000000000003e-05, "loss": 174.9611, "step": 10820 }, { "epoch": 0.04375457039314471, "grad_norm": 776.6605834960938, "learning_rate": 2.166e-05, "loss": 184.3046, "step": 10830 }, { "epoch": 0.043794971658512344, "grad_norm": 2525.784423828125, "learning_rate": 2.168e-05, "loss": 164.588, "step": 10840 }, { "epoch": 0.04383537292387998, "grad_norm": 1608.6549072265625, "learning_rate": 2.1700000000000002e-05, "loss": 196.8271, "step": 10850 }, { "epoch": 0.04387577418924761, "grad_norm": 892.496826171875, "learning_rate": 2.1720000000000002e-05, "loss": 148.6461, "step": 10860 }, { "epoch": 0.043916175454615236, "grad_norm": 1432.486572265625, "learning_rate": 2.1740000000000002e-05, "loss": 182.0831, "step": 10870 }, { "epoch": 0.04395657671998287, "grad_norm": 1320.6700439453125, "learning_rate": 2.176e-05, "loss": 180.9173, "step": 10880 }, { "epoch": 0.0439969779853505, "grad_norm": 1471.670166015625, "learning_rate": 2.178e-05, "loss": 178.3413, "step": 10890 }, { "epoch": 0.044037379250718135, "grad_norm": 1130.7198486328125, "learning_rate": 2.18e-05, "loss": 148.5332, "step": 10900 }, { "epoch": 0.04407778051608576, "grad_norm": 1092.3216552734375, "learning_rate": 2.182e-05, "loss": 114.7338, "step": 10910 }, { "epoch": 0.044118181781453394, "grad_norm": 1079.041259765625, "learning_rate": 2.184e-05, "loss": 132.0338, "step": 10920 }, { "epoch": 0.04415858304682103, "grad_norm": 2612.80517578125, "learning_rate": 2.186e-05, "loss": 138.605, "step": 10930 }, { "epoch": 0.04419898431218866, "grad_norm": 1243.3662109375, "learning_rate": 2.188e-05, "loss": 157.8075, "step": 10940 }, { "epoch": 0.04423938557755629, "grad_norm": 1165.291015625, "learning_rate": 2.19e-05, "loss": 123.9359, "step": 10950 }, { "epoch": 0.04427978684292392, "grad_norm": 970.86083984375, "learning_rate": 2.192e-05, "loss": 99.9449, "step": 10960 }, { "epoch": 0.04432018810829155, "grad_norm": 282.0602722167969, "learning_rate": 2.1940000000000003e-05, "loss": 126.6935, "step": 10970 }, { "epoch": 0.044360589373659186, "grad_norm": 2742.96044921875, "learning_rate": 2.196e-05, "loss": 216.6111, "step": 10980 }, { "epoch": 0.04440099063902681, "grad_norm": 725.5316772460938, "learning_rate": 2.198e-05, "loss": 283.1829, "step": 10990 }, { "epoch": 0.044441391904394445, "grad_norm": 1572.414794921875, "learning_rate": 2.2000000000000003e-05, "loss": 171.7036, "step": 11000 }, { "epoch": 0.04448179316976208, "grad_norm": 0.0, "learning_rate": 2.2020000000000003e-05, "loss": 158.3066, "step": 11010 }, { "epoch": 0.04452219443512971, "grad_norm": 1373.786376953125, "learning_rate": 2.2040000000000002e-05, "loss": 125.9849, "step": 11020 }, { "epoch": 0.04456259570049734, "grad_norm": 1483.981689453125, "learning_rate": 2.206e-05, "loss": 176.718, "step": 11030 }, { "epoch": 0.04460299696586497, "grad_norm": 997.575927734375, "learning_rate": 2.2080000000000002e-05, "loss": 164.0399, "step": 11040 }, { "epoch": 0.0446433982312326, "grad_norm": 550.5761108398438, "learning_rate": 2.2100000000000002e-05, "loss": 135.1882, "step": 11050 }, { "epoch": 0.044683799496600236, "grad_norm": 733.0974731445312, "learning_rate": 2.212e-05, "loss": 112.1112, "step": 11060 }, { "epoch": 0.04472420076196786, "grad_norm": 856.7572021484375, "learning_rate": 2.214e-05, "loss": 113.8155, "step": 11070 }, { "epoch": 0.044764602027335496, "grad_norm": 1171.519287109375, "learning_rate": 2.216e-05, "loss": 144.4537, "step": 11080 }, { "epoch": 0.04480500329270313, "grad_norm": 1287.0703125, "learning_rate": 2.218e-05, "loss": 122.8268, "step": 11090 }, { "epoch": 0.04484540455807076, "grad_norm": 1202.9896240234375, "learning_rate": 2.22e-05, "loss": 206.9014, "step": 11100 }, { "epoch": 0.04488580582343839, "grad_norm": 1963.7164306640625, "learning_rate": 2.222e-05, "loss": 128.2789, "step": 11110 }, { "epoch": 0.04492620708880602, "grad_norm": 2504.82958984375, "learning_rate": 2.224e-05, "loss": 124.5799, "step": 11120 }, { "epoch": 0.044966608354173654, "grad_norm": 1163.331298828125, "learning_rate": 2.226e-05, "loss": 91.1706, "step": 11130 }, { "epoch": 0.04500700961954129, "grad_norm": 10312.1435546875, "learning_rate": 2.228e-05, "loss": 200.2307, "step": 11140 }, { "epoch": 0.04504741088490891, "grad_norm": 1264.3104248046875, "learning_rate": 2.23e-05, "loss": 161.8118, "step": 11150 }, { "epoch": 0.045087812150276546, "grad_norm": 8854.8544921875, "learning_rate": 2.2320000000000003e-05, "loss": 162.5124, "step": 11160 }, { "epoch": 0.04512821341564418, "grad_norm": 4647.8427734375, "learning_rate": 2.234e-05, "loss": 137.7809, "step": 11170 }, { "epoch": 0.04516861468101181, "grad_norm": 1007.0010375976562, "learning_rate": 2.236e-05, "loss": 128.0276, "step": 11180 }, { "epoch": 0.04520901594637944, "grad_norm": 2929.648681640625, "learning_rate": 2.2380000000000003e-05, "loss": 132.0453, "step": 11190 }, { "epoch": 0.04524941721174707, "grad_norm": 2017.7265625, "learning_rate": 2.2400000000000002e-05, "loss": 242.607, "step": 11200 }, { "epoch": 0.045289818477114704, "grad_norm": 783.3958129882812, "learning_rate": 2.2420000000000002e-05, "loss": 146.9575, "step": 11210 }, { "epoch": 0.04533021974248234, "grad_norm": 1278.9561767578125, "learning_rate": 2.244e-05, "loss": 183.9778, "step": 11220 }, { "epoch": 0.045370621007849964, "grad_norm": 879.9774169921875, "learning_rate": 2.2460000000000002e-05, "loss": 139.0227, "step": 11230 }, { "epoch": 0.0454110222732176, "grad_norm": 1722.348876953125, "learning_rate": 2.248e-05, "loss": 229.9668, "step": 11240 }, { "epoch": 0.04545142353858523, "grad_norm": 652.4034423828125, "learning_rate": 2.25e-05, "loss": 129.4687, "step": 11250 }, { "epoch": 0.04549182480395286, "grad_norm": 1721.231689453125, "learning_rate": 2.252e-05, "loss": 168.8096, "step": 11260 }, { "epoch": 0.04553222606932049, "grad_norm": 1650.2803955078125, "learning_rate": 2.254e-05, "loss": 162.424, "step": 11270 }, { "epoch": 0.04557262733468812, "grad_norm": 954.37890625, "learning_rate": 2.256e-05, "loss": 197.3434, "step": 11280 }, { "epoch": 0.045613028600055755, "grad_norm": 4419.76123046875, "learning_rate": 2.258e-05, "loss": 210.9756, "step": 11290 }, { "epoch": 0.04565342986542339, "grad_norm": 1498.54296875, "learning_rate": 2.26e-05, "loss": 134.9217, "step": 11300 }, { "epoch": 0.045693831130791014, "grad_norm": 987.8052368164062, "learning_rate": 2.2620000000000004e-05, "loss": 163.9031, "step": 11310 }, { "epoch": 0.04573423239615865, "grad_norm": 1421.3912353515625, "learning_rate": 2.264e-05, "loss": 175.6454, "step": 11320 }, { "epoch": 0.04577463366152628, "grad_norm": 4952.677734375, "learning_rate": 2.266e-05, "loss": 191.8143, "step": 11330 }, { "epoch": 0.04581503492689391, "grad_norm": 1982.3878173828125, "learning_rate": 2.268e-05, "loss": 163.27, "step": 11340 }, { "epoch": 0.04585543619226154, "grad_norm": 1355.781005859375, "learning_rate": 2.2700000000000003e-05, "loss": 169.8846, "step": 11350 }, { "epoch": 0.04589583745762917, "grad_norm": 1866.7637939453125, "learning_rate": 2.2720000000000003e-05, "loss": 195.6275, "step": 11360 }, { "epoch": 0.045936238722996806, "grad_norm": 1269.4019775390625, "learning_rate": 2.274e-05, "loss": 136.8505, "step": 11370 }, { "epoch": 0.04597663998836444, "grad_norm": 712.604736328125, "learning_rate": 2.2760000000000002e-05, "loss": 142.4693, "step": 11380 }, { "epoch": 0.046017041253732065, "grad_norm": 730.568115234375, "learning_rate": 2.2780000000000002e-05, "loss": 123.7075, "step": 11390 }, { "epoch": 0.0460574425190997, "grad_norm": 748.4280395507812, "learning_rate": 2.2800000000000002e-05, "loss": 143.0049, "step": 11400 }, { "epoch": 0.04609784378446733, "grad_norm": 1697.1856689453125, "learning_rate": 2.282e-05, "loss": 126.3755, "step": 11410 }, { "epoch": 0.046138245049834964, "grad_norm": 2124.771240234375, "learning_rate": 2.284e-05, "loss": 172.3426, "step": 11420 }, { "epoch": 0.04617864631520259, "grad_norm": 1425.5914306640625, "learning_rate": 2.286e-05, "loss": 152.4624, "step": 11430 }, { "epoch": 0.04621904758057022, "grad_norm": 1039.5628662109375, "learning_rate": 2.288e-05, "loss": 168.2912, "step": 11440 }, { "epoch": 0.046259448845937856, "grad_norm": 956.6918334960938, "learning_rate": 2.29e-05, "loss": 153.5313, "step": 11450 }, { "epoch": 0.04629985011130549, "grad_norm": 1411.84423828125, "learning_rate": 2.292e-05, "loss": 180.7848, "step": 11460 }, { "epoch": 0.046340251376673115, "grad_norm": 1310.616455078125, "learning_rate": 2.294e-05, "loss": 156.0493, "step": 11470 }, { "epoch": 0.04638065264204075, "grad_norm": 1483.5418701171875, "learning_rate": 2.296e-05, "loss": 194.3713, "step": 11480 }, { "epoch": 0.04642105390740838, "grad_norm": 1165.32763671875, "learning_rate": 2.298e-05, "loss": 143.955, "step": 11490 }, { "epoch": 0.046461455172776014, "grad_norm": 1113.7464599609375, "learning_rate": 2.3000000000000003e-05, "loss": 151.1558, "step": 11500 }, { "epoch": 0.04650185643814364, "grad_norm": 591.7924194335938, "learning_rate": 2.302e-05, "loss": 147.8064, "step": 11510 }, { "epoch": 0.046542257703511274, "grad_norm": 375.6275329589844, "learning_rate": 2.304e-05, "loss": 147.5092, "step": 11520 }, { "epoch": 0.04658265896887891, "grad_norm": 1058.679443359375, "learning_rate": 2.306e-05, "loss": 129.6008, "step": 11530 }, { "epoch": 0.04662306023424654, "grad_norm": 2693.062255859375, "learning_rate": 2.3080000000000003e-05, "loss": 168.9607, "step": 11540 }, { "epoch": 0.046663461499614166, "grad_norm": 1006.8905029296875, "learning_rate": 2.3100000000000002e-05, "loss": 186.7646, "step": 11550 }, { "epoch": 0.0467038627649818, "grad_norm": 1659.6192626953125, "learning_rate": 2.312e-05, "loss": 245.8637, "step": 11560 }, { "epoch": 0.04674426403034943, "grad_norm": 1359.0103759765625, "learning_rate": 2.3140000000000002e-05, "loss": 150.9513, "step": 11570 }, { "epoch": 0.046784665295717065, "grad_norm": 627.9051513671875, "learning_rate": 2.3160000000000002e-05, "loss": 96.2488, "step": 11580 }, { "epoch": 0.04682506656108469, "grad_norm": 1331.7342529296875, "learning_rate": 2.318e-05, "loss": 139.2599, "step": 11590 }, { "epoch": 0.046865467826452324, "grad_norm": 1107.905517578125, "learning_rate": 2.32e-05, "loss": 127.3733, "step": 11600 }, { "epoch": 0.04690586909181996, "grad_norm": 1476.7293701171875, "learning_rate": 2.322e-05, "loss": 88.5844, "step": 11610 }, { "epoch": 0.04694627035718759, "grad_norm": 1438.66748046875, "learning_rate": 2.324e-05, "loss": 146.773, "step": 11620 }, { "epoch": 0.046986671622555216, "grad_norm": 1060.174560546875, "learning_rate": 2.326e-05, "loss": 185.9057, "step": 11630 }, { "epoch": 0.04702707288792285, "grad_norm": 1116.5933837890625, "learning_rate": 2.328e-05, "loss": 161.711, "step": 11640 }, { "epoch": 0.04706747415329048, "grad_norm": 1230.9957275390625, "learning_rate": 2.3300000000000004e-05, "loss": 161.603, "step": 11650 }, { "epoch": 0.047107875418658116, "grad_norm": 1050.322265625, "learning_rate": 2.332e-05, "loss": 87.1483, "step": 11660 }, { "epoch": 0.04714827668402574, "grad_norm": 561.3192138671875, "learning_rate": 2.334e-05, "loss": 72.6746, "step": 11670 }, { "epoch": 0.047188677949393375, "grad_norm": 768.3349609375, "learning_rate": 2.336e-05, "loss": 245.4826, "step": 11680 }, { "epoch": 0.04722907921476101, "grad_norm": 1956.8682861328125, "learning_rate": 2.3380000000000003e-05, "loss": 246.9039, "step": 11690 }, { "epoch": 0.04726948048012864, "grad_norm": 1342.6962890625, "learning_rate": 2.3400000000000003e-05, "loss": 143.455, "step": 11700 }, { "epoch": 0.04730988174549627, "grad_norm": 1271.6512451171875, "learning_rate": 2.342e-05, "loss": 119.4163, "step": 11710 }, { "epoch": 0.0473502830108639, "grad_norm": 973.38427734375, "learning_rate": 2.344e-05, "loss": 142.8418, "step": 11720 }, { "epoch": 0.04739068427623153, "grad_norm": 725.7868041992188, "learning_rate": 2.3460000000000002e-05, "loss": 151.6831, "step": 11730 }, { "epoch": 0.047431085541599166, "grad_norm": 1243.775146484375, "learning_rate": 2.3480000000000002e-05, "loss": 129.1701, "step": 11740 }, { "epoch": 0.04747148680696679, "grad_norm": 1194.3792724609375, "learning_rate": 2.35e-05, "loss": 179.6937, "step": 11750 }, { "epoch": 0.047511888072334425, "grad_norm": 836.063720703125, "learning_rate": 2.3520000000000002e-05, "loss": 96.7801, "step": 11760 }, { "epoch": 0.04755228933770206, "grad_norm": 6927.7421875, "learning_rate": 2.354e-05, "loss": 170.788, "step": 11770 }, { "epoch": 0.04759269060306969, "grad_norm": 1191.6163330078125, "learning_rate": 2.356e-05, "loss": 95.2363, "step": 11780 }, { "epoch": 0.04763309186843732, "grad_norm": 4237.19384765625, "learning_rate": 2.358e-05, "loss": 149.3932, "step": 11790 }, { "epoch": 0.04767349313380495, "grad_norm": 770.5933227539062, "learning_rate": 2.36e-05, "loss": 163.007, "step": 11800 }, { "epoch": 0.047713894399172584, "grad_norm": 393.3005065917969, "learning_rate": 2.362e-05, "loss": 169.8844, "step": 11810 }, { "epoch": 0.04775429566454022, "grad_norm": 1076.961181640625, "learning_rate": 2.364e-05, "loss": 144.8054, "step": 11820 }, { "epoch": 0.04779469692990784, "grad_norm": 881.7391967773438, "learning_rate": 2.366e-05, "loss": 157.4215, "step": 11830 }, { "epoch": 0.047835098195275476, "grad_norm": 1243.5374755859375, "learning_rate": 2.3680000000000004e-05, "loss": 159.1609, "step": 11840 }, { "epoch": 0.04787549946064311, "grad_norm": 1366.5406494140625, "learning_rate": 2.37e-05, "loss": 132.554, "step": 11850 }, { "epoch": 0.04791590072601074, "grad_norm": 1306.282470703125, "learning_rate": 2.372e-05, "loss": 133.127, "step": 11860 }, { "epoch": 0.04795630199137837, "grad_norm": 1843.0394287109375, "learning_rate": 2.374e-05, "loss": 167.0897, "step": 11870 }, { "epoch": 0.047996703256746, "grad_norm": 1241.4002685546875, "learning_rate": 2.3760000000000003e-05, "loss": 195.5516, "step": 11880 }, { "epoch": 0.048037104522113634, "grad_norm": 1342.593017578125, "learning_rate": 2.3780000000000003e-05, "loss": 188.283, "step": 11890 }, { "epoch": 0.04807750578748127, "grad_norm": 6172.35498046875, "learning_rate": 2.38e-05, "loss": 202.552, "step": 11900 }, { "epoch": 0.04811790705284889, "grad_norm": 979.6281127929688, "learning_rate": 2.3820000000000002e-05, "loss": 126.3583, "step": 11910 }, { "epoch": 0.048158308318216526, "grad_norm": 3780.92236328125, "learning_rate": 2.3840000000000002e-05, "loss": 125.3818, "step": 11920 }, { "epoch": 0.04819870958358416, "grad_norm": 1145.655029296875, "learning_rate": 2.3860000000000002e-05, "loss": 135.4183, "step": 11930 }, { "epoch": 0.04823911084895179, "grad_norm": 3658.722412109375, "learning_rate": 2.3880000000000002e-05, "loss": 180.5494, "step": 11940 }, { "epoch": 0.04827951211431942, "grad_norm": 1273.83740234375, "learning_rate": 2.39e-05, "loss": 208.1243, "step": 11950 }, { "epoch": 0.04831991337968705, "grad_norm": 701.6416015625, "learning_rate": 2.392e-05, "loss": 119.6717, "step": 11960 }, { "epoch": 0.048360314645054685, "grad_norm": 598.38818359375, "learning_rate": 2.394e-05, "loss": 106.7641, "step": 11970 }, { "epoch": 0.04840071591042232, "grad_norm": 697.3867797851562, "learning_rate": 2.396e-05, "loss": 157.0546, "step": 11980 }, { "epoch": 0.048441117175789944, "grad_norm": 2313.9013671875, "learning_rate": 2.398e-05, "loss": 199.3208, "step": 11990 }, { "epoch": 0.04848151844115758, "grad_norm": 712.7728881835938, "learning_rate": 2.4e-05, "loss": 163.3351, "step": 12000 }, { "epoch": 0.04852191970652521, "grad_norm": 2287.64794921875, "learning_rate": 2.402e-05, "loss": 156.7126, "step": 12010 }, { "epoch": 0.04856232097189284, "grad_norm": 1171.4212646484375, "learning_rate": 2.404e-05, "loss": 154.6671, "step": 12020 }, { "epoch": 0.04860272223726047, "grad_norm": 928.2921142578125, "learning_rate": 2.4060000000000003e-05, "loss": 189.5635, "step": 12030 }, { "epoch": 0.0486431235026281, "grad_norm": 2594.64794921875, "learning_rate": 2.408e-05, "loss": 122.9152, "step": 12040 }, { "epoch": 0.048683524767995735, "grad_norm": 625.3611450195312, "learning_rate": 2.41e-05, "loss": 157.0619, "step": 12050 }, { "epoch": 0.04872392603336337, "grad_norm": 1715.5015869140625, "learning_rate": 2.412e-05, "loss": 146.0929, "step": 12060 }, { "epoch": 0.048764327298730994, "grad_norm": 1481.7435302734375, "learning_rate": 2.4140000000000003e-05, "loss": 150.0942, "step": 12070 }, { "epoch": 0.04880472856409863, "grad_norm": 1766.0885009765625, "learning_rate": 2.4160000000000002e-05, "loss": 122.9469, "step": 12080 }, { "epoch": 0.04884512982946626, "grad_norm": 1821.820556640625, "learning_rate": 2.418e-05, "loss": 236.4261, "step": 12090 }, { "epoch": 0.048885531094833894, "grad_norm": 1121.892333984375, "learning_rate": 2.4200000000000002e-05, "loss": 194.8495, "step": 12100 }, { "epoch": 0.04892593236020152, "grad_norm": 2687.8505859375, "learning_rate": 2.4220000000000002e-05, "loss": 199.661, "step": 12110 }, { "epoch": 0.04896633362556915, "grad_norm": 1251.5821533203125, "learning_rate": 2.4240000000000002e-05, "loss": 188.0581, "step": 12120 }, { "epoch": 0.049006734890936786, "grad_norm": 1508.91455078125, "learning_rate": 2.426e-05, "loss": 155.5656, "step": 12130 }, { "epoch": 0.04904713615630442, "grad_norm": 1015.0963745117188, "learning_rate": 2.428e-05, "loss": 156.3022, "step": 12140 }, { "epoch": 0.049087537421672045, "grad_norm": 992.3895874023438, "learning_rate": 2.43e-05, "loss": 173.4455, "step": 12150 }, { "epoch": 0.04912793868703968, "grad_norm": 5113.12451171875, "learning_rate": 2.432e-05, "loss": 201.295, "step": 12160 }, { "epoch": 0.04916833995240731, "grad_norm": 1270.9464111328125, "learning_rate": 2.434e-05, "loss": 185.7967, "step": 12170 }, { "epoch": 0.04920874121777494, "grad_norm": 1565.5892333984375, "learning_rate": 2.4360000000000004e-05, "loss": 170.4479, "step": 12180 }, { "epoch": 0.04924914248314257, "grad_norm": 1304.619140625, "learning_rate": 2.438e-05, "loss": 173.9784, "step": 12190 }, { "epoch": 0.0492895437485102, "grad_norm": 1918.267822265625, "learning_rate": 2.44e-05, "loss": 100.1017, "step": 12200 }, { "epoch": 0.049329945013877836, "grad_norm": 3872.838134765625, "learning_rate": 2.442e-05, "loss": 128.8565, "step": 12210 }, { "epoch": 0.04937034627924546, "grad_norm": 1211.8031005859375, "learning_rate": 2.4440000000000003e-05, "loss": 130.2835, "step": 12220 }, { "epoch": 0.049410747544613096, "grad_norm": 1228.3138427734375, "learning_rate": 2.4460000000000003e-05, "loss": 191.7344, "step": 12230 }, { "epoch": 0.04945114880998073, "grad_norm": 931.3762817382812, "learning_rate": 2.448e-05, "loss": 188.0092, "step": 12240 }, { "epoch": 0.04949155007534836, "grad_norm": 3841.882568359375, "learning_rate": 2.45e-05, "loss": 176.4354, "step": 12250 }, { "epoch": 0.04953195134071599, "grad_norm": 868.33935546875, "learning_rate": 2.4520000000000002e-05, "loss": 211.7608, "step": 12260 }, { "epoch": 0.04957235260608362, "grad_norm": 1211.6044921875, "learning_rate": 2.4540000000000002e-05, "loss": 130.6806, "step": 12270 }, { "epoch": 0.049612753871451254, "grad_norm": 1216.9310302734375, "learning_rate": 2.4560000000000002e-05, "loss": 163.6193, "step": 12280 }, { "epoch": 0.04965315513681889, "grad_norm": 745.621337890625, "learning_rate": 2.4580000000000002e-05, "loss": 108.5664, "step": 12290 }, { "epoch": 0.04969355640218651, "grad_norm": 737.6925048828125, "learning_rate": 2.46e-05, "loss": 163.5928, "step": 12300 }, { "epoch": 0.049733957667554146, "grad_norm": 1154.443115234375, "learning_rate": 2.462e-05, "loss": 114.6066, "step": 12310 }, { "epoch": 0.04977435893292178, "grad_norm": 1484.0311279296875, "learning_rate": 2.464e-05, "loss": 141.8092, "step": 12320 }, { "epoch": 0.04981476019828941, "grad_norm": 1149.4248046875, "learning_rate": 2.466e-05, "loss": 142.7456, "step": 12330 }, { "epoch": 0.04985516146365704, "grad_norm": 1364.304931640625, "learning_rate": 2.468e-05, "loss": 207.4074, "step": 12340 }, { "epoch": 0.04989556272902467, "grad_norm": 802.7201538085938, "learning_rate": 2.47e-05, "loss": 162.2597, "step": 12350 }, { "epoch": 0.049935963994392304, "grad_norm": 1634.9937744140625, "learning_rate": 2.472e-05, "loss": 158.4684, "step": 12360 }, { "epoch": 0.04997636525975994, "grad_norm": 2859.273681640625, "learning_rate": 2.4740000000000004e-05, "loss": 171.345, "step": 12370 }, { "epoch": 0.050016766525127564, "grad_norm": 1695.544189453125, "learning_rate": 2.476e-05, "loss": 153.0457, "step": 12380 }, { "epoch": 0.0500571677904952, "grad_norm": 1252.4989013671875, "learning_rate": 2.478e-05, "loss": 223.5171, "step": 12390 }, { "epoch": 0.05009756905586283, "grad_norm": 0.0, "learning_rate": 2.48e-05, "loss": 124.2808, "step": 12400 }, { "epoch": 0.05013797032123046, "grad_norm": 1869.185546875, "learning_rate": 2.4820000000000003e-05, "loss": 176.1387, "step": 12410 }, { "epoch": 0.05017837158659809, "grad_norm": 2657.873291015625, "learning_rate": 2.4840000000000003e-05, "loss": 136.5703, "step": 12420 }, { "epoch": 0.05021877285196572, "grad_norm": 1434.21337890625, "learning_rate": 2.486e-05, "loss": 185.8033, "step": 12430 }, { "epoch": 0.050259174117333355, "grad_norm": 4622.36328125, "learning_rate": 2.488e-05, "loss": 238.0834, "step": 12440 }, { "epoch": 0.05029957538270099, "grad_norm": 2024.818359375, "learning_rate": 2.4900000000000002e-05, "loss": 134.9255, "step": 12450 }, { "epoch": 0.050339976648068614, "grad_norm": 2336.821044921875, "learning_rate": 2.4920000000000002e-05, "loss": 112.3334, "step": 12460 }, { "epoch": 0.05038037791343625, "grad_norm": 617.8527221679688, "learning_rate": 2.4940000000000002e-05, "loss": 69.1087, "step": 12470 }, { "epoch": 0.05042077917880388, "grad_norm": 871.863037109375, "learning_rate": 2.496e-05, "loss": 201.0841, "step": 12480 }, { "epoch": 0.05046118044417151, "grad_norm": 1000.557373046875, "learning_rate": 2.498e-05, "loss": 167.9705, "step": 12490 }, { "epoch": 0.05050158170953914, "grad_norm": 1009.2303466796875, "learning_rate": 2.5e-05, "loss": 119.5033, "step": 12500 }, { "epoch": 0.05054198297490677, "grad_norm": 2307.47607421875, "learning_rate": 2.5019999999999998e-05, "loss": 138.7849, "step": 12510 }, { "epoch": 0.050582384240274406, "grad_norm": 2979.96533203125, "learning_rate": 2.504e-05, "loss": 132.3597, "step": 12520 }, { "epoch": 0.05062278550564204, "grad_norm": 1333.7869873046875, "learning_rate": 2.506e-05, "loss": 114.2453, "step": 12530 }, { "epoch": 0.050663186771009665, "grad_norm": 2323.83935546875, "learning_rate": 2.5080000000000004e-05, "loss": 199.8365, "step": 12540 }, { "epoch": 0.0507035880363773, "grad_norm": 988.7455444335938, "learning_rate": 2.51e-05, "loss": 158.2298, "step": 12550 }, { "epoch": 0.05074398930174493, "grad_norm": 1229.513427734375, "learning_rate": 2.512e-05, "loss": 177.6321, "step": 12560 }, { "epoch": 0.050784390567112564, "grad_norm": 598.7608032226562, "learning_rate": 2.5140000000000003e-05, "loss": 205.6968, "step": 12570 }, { "epoch": 0.05082479183248019, "grad_norm": 476.6854553222656, "learning_rate": 2.516e-05, "loss": 135.5016, "step": 12580 }, { "epoch": 0.05086519309784782, "grad_norm": 441.1506652832031, "learning_rate": 2.5180000000000003e-05, "loss": 183.0943, "step": 12590 }, { "epoch": 0.050905594363215456, "grad_norm": 861.925537109375, "learning_rate": 2.5200000000000003e-05, "loss": 110.7362, "step": 12600 }, { "epoch": 0.05094599562858309, "grad_norm": 1135.808837890625, "learning_rate": 2.522e-05, "loss": 157.3768, "step": 12610 }, { "epoch": 0.050986396893950715, "grad_norm": 764.6640014648438, "learning_rate": 2.5240000000000002e-05, "loss": 197.2682, "step": 12620 }, { "epoch": 0.05102679815931835, "grad_norm": 1185.298583984375, "learning_rate": 2.526e-05, "loss": 154.5542, "step": 12630 }, { "epoch": 0.05106719942468598, "grad_norm": 572.9112548828125, "learning_rate": 2.5280000000000005e-05, "loss": 185.382, "step": 12640 }, { "epoch": 0.051107600690053615, "grad_norm": 0.0, "learning_rate": 2.5300000000000002e-05, "loss": 155.8, "step": 12650 }, { "epoch": 0.05114800195542124, "grad_norm": 1676.220703125, "learning_rate": 2.5319999999999998e-05, "loss": 153.2075, "step": 12660 }, { "epoch": 0.051188403220788874, "grad_norm": 3056.052001953125, "learning_rate": 2.534e-05, "loss": 160.0424, "step": 12670 }, { "epoch": 0.05122880448615651, "grad_norm": 3853.407958984375, "learning_rate": 2.536e-05, "loss": 142.0041, "step": 12680 }, { "epoch": 0.05126920575152414, "grad_norm": 1022.783447265625, "learning_rate": 2.5380000000000004e-05, "loss": 173.1114, "step": 12690 }, { "epoch": 0.051309607016891766, "grad_norm": 609.7467041015625, "learning_rate": 2.54e-05, "loss": 124.7601, "step": 12700 }, { "epoch": 0.0513500082822594, "grad_norm": 871.4917602539062, "learning_rate": 2.542e-05, "loss": 107.3807, "step": 12710 }, { "epoch": 0.05139040954762703, "grad_norm": 1300.36279296875, "learning_rate": 2.5440000000000004e-05, "loss": 128.8927, "step": 12720 }, { "epoch": 0.051430810812994665, "grad_norm": 1378.396484375, "learning_rate": 2.546e-05, "loss": 179.4028, "step": 12730 }, { "epoch": 0.05147121207836229, "grad_norm": 5043.6845703125, "learning_rate": 2.5480000000000003e-05, "loss": 145.2417, "step": 12740 }, { "epoch": 0.051511613343729924, "grad_norm": 956.9037475585938, "learning_rate": 2.5500000000000003e-05, "loss": 122.089, "step": 12750 }, { "epoch": 0.05155201460909756, "grad_norm": 655.8726806640625, "learning_rate": 2.552e-05, "loss": 172.3547, "step": 12760 }, { "epoch": 0.05159241587446519, "grad_norm": 6090.908203125, "learning_rate": 2.5540000000000003e-05, "loss": 200.5886, "step": 12770 }, { "epoch": 0.051632817139832816, "grad_norm": 760.5556640625, "learning_rate": 2.556e-05, "loss": 168.6275, "step": 12780 }, { "epoch": 0.05167321840520045, "grad_norm": 1935.444091796875, "learning_rate": 2.5580000000000002e-05, "loss": 173.0487, "step": 12790 }, { "epoch": 0.05171361967056808, "grad_norm": 1355.29931640625, "learning_rate": 2.5600000000000002e-05, "loss": 169.9729, "step": 12800 }, { "epoch": 0.051754020935935716, "grad_norm": 1155.65380859375, "learning_rate": 2.562e-05, "loss": 230.3313, "step": 12810 }, { "epoch": 0.05179442220130334, "grad_norm": 1020.41064453125, "learning_rate": 2.5640000000000002e-05, "loss": 151.9028, "step": 12820 }, { "epoch": 0.051834823466670975, "grad_norm": 1026.9921875, "learning_rate": 2.566e-05, "loss": 194.6138, "step": 12830 }, { "epoch": 0.05187522473203861, "grad_norm": 859.4483032226562, "learning_rate": 2.5679999999999998e-05, "loss": 137.587, "step": 12840 }, { "epoch": 0.05191562599740624, "grad_norm": 1088.9954833984375, "learning_rate": 2.57e-05, "loss": 137.7595, "step": 12850 }, { "epoch": 0.05195602726277387, "grad_norm": 576.4170532226562, "learning_rate": 2.572e-05, "loss": 112.6303, "step": 12860 }, { "epoch": 0.0519964285281415, "grad_norm": 1509.91943359375, "learning_rate": 2.5740000000000004e-05, "loss": 152.7388, "step": 12870 }, { "epoch": 0.05203682979350913, "grad_norm": 1924.8671875, "learning_rate": 2.576e-05, "loss": 150.297, "step": 12880 }, { "epoch": 0.052077231058876766, "grad_norm": 4206.75048828125, "learning_rate": 2.5779999999999997e-05, "loss": 153.3789, "step": 12890 }, { "epoch": 0.05211763232424439, "grad_norm": 732.2725219726562, "learning_rate": 2.58e-05, "loss": 146.1245, "step": 12900 }, { "epoch": 0.052158033589612025, "grad_norm": 804.0054321289062, "learning_rate": 2.582e-05, "loss": 155.7175, "step": 12910 }, { "epoch": 0.05219843485497966, "grad_norm": 1622.7066650390625, "learning_rate": 2.5840000000000003e-05, "loss": 181.8288, "step": 12920 }, { "epoch": 0.05223883612034729, "grad_norm": 1736.3221435546875, "learning_rate": 2.586e-05, "loss": 156.0094, "step": 12930 }, { "epoch": 0.05227923738571492, "grad_norm": 1899.74072265625, "learning_rate": 2.588e-05, "loss": 161.6015, "step": 12940 }, { "epoch": 0.05231963865108255, "grad_norm": 2343.95556640625, "learning_rate": 2.5900000000000003e-05, "loss": 170.2185, "step": 12950 }, { "epoch": 0.052360039916450184, "grad_norm": 1888.6707763671875, "learning_rate": 2.592e-05, "loss": 204.9056, "step": 12960 }, { "epoch": 0.05240044118181782, "grad_norm": 498.68780517578125, "learning_rate": 2.5940000000000002e-05, "loss": 142.0052, "step": 12970 }, { "epoch": 0.05244084244718544, "grad_norm": 838.1019287109375, "learning_rate": 2.5960000000000002e-05, "loss": 113.003, "step": 12980 }, { "epoch": 0.052481243712553076, "grad_norm": 636.7835083007812, "learning_rate": 2.598e-05, "loss": 96.236, "step": 12990 }, { "epoch": 0.05252164497792071, "grad_norm": 1207.0146484375, "learning_rate": 2.6000000000000002e-05, "loss": 128.6797, "step": 13000 }, { "epoch": 0.05256204624328834, "grad_norm": 2183.191650390625, "learning_rate": 2.602e-05, "loss": 135.0814, "step": 13010 }, { "epoch": 0.05260244750865597, "grad_norm": 1018.5341796875, "learning_rate": 2.6040000000000005e-05, "loss": 83.9305, "step": 13020 }, { "epoch": 0.0526428487740236, "grad_norm": 437.8675537109375, "learning_rate": 2.606e-05, "loss": 141.287, "step": 13030 }, { "epoch": 0.052683250039391234, "grad_norm": 2055.59228515625, "learning_rate": 2.6079999999999998e-05, "loss": 131.6824, "step": 13040 }, { "epoch": 0.05272365130475887, "grad_norm": 1526.861572265625, "learning_rate": 2.61e-05, "loss": 104.1964, "step": 13050 }, { "epoch": 0.05276405257012649, "grad_norm": 1107.270263671875, "learning_rate": 2.612e-05, "loss": 119.3528, "step": 13060 }, { "epoch": 0.052804453835494126, "grad_norm": 0.0, "learning_rate": 2.6140000000000004e-05, "loss": 209.7389, "step": 13070 }, { "epoch": 0.05284485510086176, "grad_norm": 887.7672729492188, "learning_rate": 2.616e-05, "loss": 134.3163, "step": 13080 }, { "epoch": 0.05288525636622939, "grad_norm": 600.7372436523438, "learning_rate": 2.618e-05, "loss": 120.8824, "step": 13090 }, { "epoch": 0.05292565763159702, "grad_norm": 2771.222900390625, "learning_rate": 2.6200000000000003e-05, "loss": 116.1366, "step": 13100 }, { "epoch": 0.05296605889696465, "grad_norm": 1079.399658203125, "learning_rate": 2.622e-05, "loss": 120.0783, "step": 13110 }, { "epoch": 0.053006460162332285, "grad_norm": 3635.591796875, "learning_rate": 2.6240000000000003e-05, "loss": 114.0672, "step": 13120 }, { "epoch": 0.05304686142769992, "grad_norm": 967.0631713867188, "learning_rate": 2.6260000000000003e-05, "loss": 197.697, "step": 13130 }, { "epoch": 0.053087262693067544, "grad_norm": 540.1041259765625, "learning_rate": 2.628e-05, "loss": 169.7453, "step": 13140 }, { "epoch": 0.05312766395843518, "grad_norm": 1378.034912109375, "learning_rate": 2.6300000000000002e-05, "loss": 152.1042, "step": 13150 }, { "epoch": 0.05316806522380281, "grad_norm": 2139.15234375, "learning_rate": 2.632e-05, "loss": 219.7844, "step": 13160 }, { "epoch": 0.05320846648917044, "grad_norm": 2143.552734375, "learning_rate": 2.6340000000000002e-05, "loss": 152.3353, "step": 13170 }, { "epoch": 0.05324886775453807, "grad_norm": 1372.249755859375, "learning_rate": 2.6360000000000002e-05, "loss": 216.3558, "step": 13180 }, { "epoch": 0.0532892690199057, "grad_norm": 723.7531127929688, "learning_rate": 2.6379999999999998e-05, "loss": 106.8604, "step": 13190 }, { "epoch": 0.053329670285273335, "grad_norm": 1488.490234375, "learning_rate": 2.64e-05, "loss": 128.0357, "step": 13200 }, { "epoch": 0.05337007155064097, "grad_norm": 962.85498046875, "learning_rate": 2.642e-05, "loss": 214.9135, "step": 13210 }, { "epoch": 0.053410472816008595, "grad_norm": 916.577392578125, "learning_rate": 2.6440000000000004e-05, "loss": 100.3119, "step": 13220 }, { "epoch": 0.05345087408137623, "grad_norm": 520.3284912109375, "learning_rate": 2.646e-05, "loss": 159.4224, "step": 13230 }, { "epoch": 0.05349127534674386, "grad_norm": 1644.9588623046875, "learning_rate": 2.648e-05, "loss": 153.6036, "step": 13240 }, { "epoch": 0.053531676612111494, "grad_norm": 834.4176025390625, "learning_rate": 2.6500000000000004e-05, "loss": 179.9941, "step": 13250 }, { "epoch": 0.05357207787747912, "grad_norm": 753.1478881835938, "learning_rate": 2.652e-05, "loss": 114.1454, "step": 13260 }, { "epoch": 0.05361247914284675, "grad_norm": 1375.6973876953125, "learning_rate": 2.6540000000000003e-05, "loss": 121.5828, "step": 13270 }, { "epoch": 0.053652880408214386, "grad_norm": 837.9177856445312, "learning_rate": 2.6560000000000003e-05, "loss": 162.0316, "step": 13280 }, { "epoch": 0.05369328167358202, "grad_norm": 1957.6124267578125, "learning_rate": 2.658e-05, "loss": 123.4925, "step": 13290 }, { "epoch": 0.053733682938949645, "grad_norm": 2115.5908203125, "learning_rate": 2.6600000000000003e-05, "loss": 163.0786, "step": 13300 }, { "epoch": 0.05377408420431728, "grad_norm": 767.2696533203125, "learning_rate": 2.662e-05, "loss": 99.3506, "step": 13310 }, { "epoch": 0.05381448546968491, "grad_norm": 1096.5877685546875, "learning_rate": 2.6640000000000002e-05, "loss": 130.1019, "step": 13320 }, { "epoch": 0.053854886735052544, "grad_norm": 990.4303588867188, "learning_rate": 2.6660000000000002e-05, "loss": 218.8272, "step": 13330 }, { "epoch": 0.05389528800042017, "grad_norm": 1289.1722412109375, "learning_rate": 2.668e-05, "loss": 181.8528, "step": 13340 }, { "epoch": 0.0539356892657878, "grad_norm": 779.4696655273438, "learning_rate": 2.6700000000000002e-05, "loss": 129.5975, "step": 13350 }, { "epoch": 0.053976090531155436, "grad_norm": 928.5990600585938, "learning_rate": 2.672e-05, "loss": 186.8758, "step": 13360 }, { "epoch": 0.05401649179652307, "grad_norm": 3074.220947265625, "learning_rate": 2.6740000000000005e-05, "loss": 182.7136, "step": 13370 }, { "epoch": 0.054056893061890696, "grad_norm": 1353.44970703125, "learning_rate": 2.676e-05, "loss": 115.3092, "step": 13380 }, { "epoch": 0.05409729432725833, "grad_norm": 1833.181396484375, "learning_rate": 2.678e-05, "loss": 136.1055, "step": 13390 }, { "epoch": 0.05413769559262596, "grad_norm": 671.771484375, "learning_rate": 2.6800000000000004e-05, "loss": 101.5414, "step": 13400 }, { "epoch": 0.054178096857993595, "grad_norm": 1407.6787109375, "learning_rate": 2.682e-05, "loss": 140.1277, "step": 13410 }, { "epoch": 0.05421849812336122, "grad_norm": 999.6378784179688, "learning_rate": 2.6840000000000004e-05, "loss": 173.1913, "step": 13420 }, { "epoch": 0.054258899388728854, "grad_norm": 1126.044189453125, "learning_rate": 2.686e-05, "loss": 100.9075, "step": 13430 }, { "epoch": 0.05429930065409649, "grad_norm": 995.6368408203125, "learning_rate": 2.688e-05, "loss": 152.3883, "step": 13440 }, { "epoch": 0.05433970191946412, "grad_norm": 1857.345947265625, "learning_rate": 2.6900000000000003e-05, "loss": 164.2901, "step": 13450 }, { "epoch": 0.054380103184831746, "grad_norm": 1917.1810302734375, "learning_rate": 2.692e-05, "loss": 117.7432, "step": 13460 }, { "epoch": 0.05442050445019938, "grad_norm": 1812.2779541015625, "learning_rate": 2.694e-05, "loss": 158.6927, "step": 13470 }, { "epoch": 0.05446090571556701, "grad_norm": 823.9198608398438, "learning_rate": 2.6960000000000003e-05, "loss": 147.1588, "step": 13480 }, { "epoch": 0.054501306980934645, "grad_norm": 1157.1685791015625, "learning_rate": 2.698e-05, "loss": 167.8175, "step": 13490 }, { "epoch": 0.05454170824630227, "grad_norm": 1123.951416015625, "learning_rate": 2.7000000000000002e-05, "loss": 194.7915, "step": 13500 }, { "epoch": 0.054582109511669905, "grad_norm": 1402.1549072265625, "learning_rate": 2.7020000000000002e-05, "loss": 137.6079, "step": 13510 }, { "epoch": 0.05462251077703754, "grad_norm": 786.1826782226562, "learning_rate": 2.704e-05, "loss": 113.2719, "step": 13520 }, { "epoch": 0.05466291204240517, "grad_norm": 1365.093017578125, "learning_rate": 2.7060000000000002e-05, "loss": 104.7612, "step": 13530 }, { "epoch": 0.0547033133077728, "grad_norm": 1461.1785888671875, "learning_rate": 2.7079999999999998e-05, "loss": 168.2114, "step": 13540 }, { "epoch": 0.05474371457314043, "grad_norm": 1377.26806640625, "learning_rate": 2.7100000000000005e-05, "loss": 184.7103, "step": 13550 }, { "epoch": 0.05478411583850806, "grad_norm": 1345.474853515625, "learning_rate": 2.712e-05, "loss": 137.9565, "step": 13560 }, { "epoch": 0.054824517103875696, "grad_norm": 1087.7122802734375, "learning_rate": 2.7139999999999998e-05, "loss": 152.1091, "step": 13570 }, { "epoch": 0.05486491836924332, "grad_norm": 0.0, "learning_rate": 2.716e-05, "loss": 82.2364, "step": 13580 }, { "epoch": 0.054905319634610955, "grad_norm": 730.9200439453125, "learning_rate": 2.718e-05, "loss": 165.8833, "step": 13590 }, { "epoch": 0.05494572089997859, "grad_norm": 1368.674072265625, "learning_rate": 2.7200000000000004e-05, "loss": 121.1773, "step": 13600 }, { "epoch": 0.05498612216534622, "grad_norm": 1869.849853515625, "learning_rate": 2.722e-05, "loss": 231.0817, "step": 13610 }, { "epoch": 0.05502652343071385, "grad_norm": 976.0435180664062, "learning_rate": 2.724e-05, "loss": 111.1706, "step": 13620 }, { "epoch": 0.05506692469608148, "grad_norm": 1996.230224609375, "learning_rate": 2.7260000000000003e-05, "loss": 189.7502, "step": 13630 }, { "epoch": 0.05510732596144911, "grad_norm": 971.2109985351562, "learning_rate": 2.728e-05, "loss": 117.1644, "step": 13640 }, { "epoch": 0.055147727226816746, "grad_norm": 1315.8665771484375, "learning_rate": 2.7300000000000003e-05, "loss": 185.0801, "step": 13650 }, { "epoch": 0.05518812849218437, "grad_norm": 600.6631469726562, "learning_rate": 2.7320000000000003e-05, "loss": 157.1564, "step": 13660 }, { "epoch": 0.055228529757552006, "grad_norm": 888.3346557617188, "learning_rate": 2.734e-05, "loss": 117.6915, "step": 13670 }, { "epoch": 0.05526893102291964, "grad_norm": 808.2348022460938, "learning_rate": 2.7360000000000002e-05, "loss": 99.7357, "step": 13680 }, { "epoch": 0.05530933228828727, "grad_norm": 1063.2569580078125, "learning_rate": 2.738e-05, "loss": 143.7845, "step": 13690 }, { "epoch": 0.0553497335536549, "grad_norm": 1069.0965576171875, "learning_rate": 2.7400000000000002e-05, "loss": 155.6282, "step": 13700 }, { "epoch": 0.05539013481902253, "grad_norm": 1055.5029296875, "learning_rate": 2.7420000000000002e-05, "loss": 173.6117, "step": 13710 }, { "epoch": 0.055430536084390164, "grad_norm": 940.4775390625, "learning_rate": 2.7439999999999998e-05, "loss": 120.3455, "step": 13720 }, { "epoch": 0.0554709373497578, "grad_norm": 1010.4285888671875, "learning_rate": 2.746e-05, "loss": 142.2922, "step": 13730 }, { "epoch": 0.05551133861512542, "grad_norm": 1600.6968994140625, "learning_rate": 2.748e-05, "loss": 157.9479, "step": 13740 }, { "epoch": 0.055551739880493056, "grad_norm": 1101.0772705078125, "learning_rate": 2.7500000000000004e-05, "loss": 137.5087, "step": 13750 }, { "epoch": 0.05559214114586069, "grad_norm": 2070.27978515625, "learning_rate": 2.752e-05, "loss": 125.7709, "step": 13760 }, { "epoch": 0.05563254241122832, "grad_norm": 869.279052734375, "learning_rate": 2.754e-05, "loss": 135.6122, "step": 13770 }, { "epoch": 0.05567294367659595, "grad_norm": 635.1331787109375, "learning_rate": 2.7560000000000004e-05, "loss": 120.2631, "step": 13780 }, { "epoch": 0.05571334494196358, "grad_norm": 712.9791870117188, "learning_rate": 2.758e-05, "loss": 132.9366, "step": 13790 }, { "epoch": 0.055753746207331215, "grad_norm": 1173.3760986328125, "learning_rate": 2.7600000000000003e-05, "loss": 174.3566, "step": 13800 }, { "epoch": 0.05579414747269885, "grad_norm": 1058.1973876953125, "learning_rate": 2.762e-05, "loss": 101.473, "step": 13810 }, { "epoch": 0.055834548738066474, "grad_norm": 1767.0400390625, "learning_rate": 2.764e-05, "loss": 149.575, "step": 13820 }, { "epoch": 0.05587495000343411, "grad_norm": 438.71697998046875, "learning_rate": 2.7660000000000003e-05, "loss": 150.794, "step": 13830 }, { "epoch": 0.05591535126880174, "grad_norm": 1495.567626953125, "learning_rate": 2.768e-05, "loss": 116.9854, "step": 13840 }, { "epoch": 0.05595575253416937, "grad_norm": 1169.4012451171875, "learning_rate": 2.7700000000000002e-05, "loss": 158.6589, "step": 13850 }, { "epoch": 0.055996153799537, "grad_norm": 1633.1995849609375, "learning_rate": 2.7720000000000002e-05, "loss": 126.1669, "step": 13860 }, { "epoch": 0.05603655506490463, "grad_norm": 1052.2532958984375, "learning_rate": 2.774e-05, "loss": 180.1025, "step": 13870 }, { "epoch": 0.056076956330272265, "grad_norm": 1021.2620239257812, "learning_rate": 2.7760000000000002e-05, "loss": 136.4397, "step": 13880 }, { "epoch": 0.0561173575956399, "grad_norm": 886.8902587890625, "learning_rate": 2.778e-05, "loss": 91.0113, "step": 13890 }, { "epoch": 0.056157758861007524, "grad_norm": 1001.5513305664062, "learning_rate": 2.7800000000000005e-05, "loss": 110.4852, "step": 13900 }, { "epoch": 0.05619816012637516, "grad_norm": 668.9522094726562, "learning_rate": 2.782e-05, "loss": 126.04, "step": 13910 }, { "epoch": 0.05623856139174279, "grad_norm": 3181.50146484375, "learning_rate": 2.7839999999999998e-05, "loss": 175.8362, "step": 13920 }, { "epoch": 0.05627896265711042, "grad_norm": 1067.25732421875, "learning_rate": 2.7860000000000004e-05, "loss": 124.6091, "step": 13930 }, { "epoch": 0.05631936392247805, "grad_norm": 758.8274536132812, "learning_rate": 2.788e-05, "loss": 129.1508, "step": 13940 }, { "epoch": 0.05635976518784568, "grad_norm": 845.1571044921875, "learning_rate": 2.7900000000000004e-05, "loss": 164.5396, "step": 13950 }, { "epoch": 0.056400166453213316, "grad_norm": 812.546875, "learning_rate": 2.792e-05, "loss": 144.2103, "step": 13960 }, { "epoch": 0.05644056771858095, "grad_norm": 1005.88720703125, "learning_rate": 2.794e-05, "loss": 131.6613, "step": 13970 }, { "epoch": 0.056480968983948575, "grad_norm": 2054.029541015625, "learning_rate": 2.7960000000000003e-05, "loss": 127.4749, "step": 13980 }, { "epoch": 0.05652137024931621, "grad_norm": 466.14068603515625, "learning_rate": 2.798e-05, "loss": 163.5435, "step": 13990 }, { "epoch": 0.05656177151468384, "grad_norm": 599.2301025390625, "learning_rate": 2.8000000000000003e-05, "loss": 107.817, "step": 14000 }, { "epoch": 0.056602172780051474, "grad_norm": 2105.31640625, "learning_rate": 2.8020000000000003e-05, "loss": 197.3428, "step": 14010 }, { "epoch": 0.0566425740454191, "grad_norm": 953.76025390625, "learning_rate": 2.804e-05, "loss": 147.2875, "step": 14020 }, { "epoch": 0.05668297531078673, "grad_norm": 495.1427001953125, "learning_rate": 2.8060000000000002e-05, "loss": 132.651, "step": 14030 }, { "epoch": 0.056723376576154366, "grad_norm": 1009.431884765625, "learning_rate": 2.8080000000000002e-05, "loss": 145.7106, "step": 14040 }, { "epoch": 0.056763777841522, "grad_norm": 939.3583374023438, "learning_rate": 2.8100000000000005e-05, "loss": 139.3796, "step": 14050 }, { "epoch": 0.056804179106889625, "grad_norm": 1285.1646728515625, "learning_rate": 2.8120000000000002e-05, "loss": 142.4715, "step": 14060 }, { "epoch": 0.05684458037225726, "grad_norm": 1945.31494140625, "learning_rate": 2.8139999999999998e-05, "loss": 181.1451, "step": 14070 }, { "epoch": 0.05688498163762489, "grad_norm": 811.0607299804688, "learning_rate": 2.816e-05, "loss": 126.362, "step": 14080 }, { "epoch": 0.056925382902992525, "grad_norm": 4073.4345703125, "learning_rate": 2.818e-05, "loss": 116.4024, "step": 14090 }, { "epoch": 0.05696578416836015, "grad_norm": 3792.585205078125, "learning_rate": 2.8199999999999998e-05, "loss": 192.2604, "step": 14100 }, { "epoch": 0.057006185433727784, "grad_norm": 2066.12060546875, "learning_rate": 2.822e-05, "loss": 119.7598, "step": 14110 }, { "epoch": 0.05704658669909542, "grad_norm": 938.1848754882812, "learning_rate": 2.824e-05, "loss": 127.1674, "step": 14120 }, { "epoch": 0.05708698796446305, "grad_norm": 1010.5850830078125, "learning_rate": 2.8260000000000004e-05, "loss": 144.3951, "step": 14130 }, { "epoch": 0.057127389229830676, "grad_norm": 2251.332275390625, "learning_rate": 2.828e-05, "loss": 202.1318, "step": 14140 }, { "epoch": 0.05716779049519831, "grad_norm": 701.1483154296875, "learning_rate": 2.83e-05, "loss": 138.087, "step": 14150 }, { "epoch": 0.05720819176056594, "grad_norm": 646.7645263671875, "learning_rate": 2.8320000000000003e-05, "loss": 114.6259, "step": 14160 }, { "epoch": 0.057248593025933575, "grad_norm": 1258.2159423828125, "learning_rate": 2.834e-05, "loss": 134.9526, "step": 14170 }, { "epoch": 0.0572889942913012, "grad_norm": 994.9586181640625, "learning_rate": 2.8360000000000003e-05, "loss": 116.3534, "step": 14180 }, { "epoch": 0.057329395556668834, "grad_norm": 1028.46240234375, "learning_rate": 2.8380000000000003e-05, "loss": 140.937, "step": 14190 }, { "epoch": 0.05736979682203647, "grad_norm": 814.48681640625, "learning_rate": 2.84e-05, "loss": 117.722, "step": 14200 }, { "epoch": 0.0574101980874041, "grad_norm": 1542.7169189453125, "learning_rate": 2.8420000000000002e-05, "loss": 115.8539, "step": 14210 }, { "epoch": 0.057450599352771727, "grad_norm": 576.6505737304688, "learning_rate": 2.844e-05, "loss": 111.9559, "step": 14220 }, { "epoch": 0.05749100061813936, "grad_norm": 1681.9095458984375, "learning_rate": 2.8460000000000002e-05, "loss": 145.2461, "step": 14230 }, { "epoch": 0.05753140188350699, "grad_norm": 1178.3514404296875, "learning_rate": 2.8480000000000002e-05, "loss": 178.6759, "step": 14240 }, { "epoch": 0.057571803148874626, "grad_norm": 1500.439208984375, "learning_rate": 2.8499999999999998e-05, "loss": 131.998, "step": 14250 }, { "epoch": 0.05761220441424225, "grad_norm": 1533.505126953125, "learning_rate": 2.852e-05, "loss": 147.4367, "step": 14260 }, { "epoch": 0.057652605679609885, "grad_norm": 495.7720947265625, "learning_rate": 2.854e-05, "loss": 115.4109, "step": 14270 }, { "epoch": 0.05769300694497752, "grad_norm": 561.2579956054688, "learning_rate": 2.8560000000000004e-05, "loss": 147.6145, "step": 14280 }, { "epoch": 0.05773340821034515, "grad_norm": 571.2040405273438, "learning_rate": 2.858e-05, "loss": 197.6198, "step": 14290 }, { "epoch": 0.05777380947571278, "grad_norm": 2524.6904296875, "learning_rate": 2.86e-05, "loss": 171.4188, "step": 14300 }, { "epoch": 0.05781421074108041, "grad_norm": 2208.06396484375, "learning_rate": 2.8620000000000004e-05, "loss": 124.7192, "step": 14310 }, { "epoch": 0.05785461200644804, "grad_norm": 1544.6385498046875, "learning_rate": 2.864e-05, "loss": 145.8727, "step": 14320 }, { "epoch": 0.057895013271815676, "grad_norm": 953.2141723632812, "learning_rate": 2.8660000000000003e-05, "loss": 98.7387, "step": 14330 }, { "epoch": 0.0579354145371833, "grad_norm": 497.53021240234375, "learning_rate": 2.868e-05, "loss": 167.9052, "step": 14340 }, { "epoch": 0.057975815802550935, "grad_norm": 441.49951171875, "learning_rate": 2.87e-05, "loss": 166.814, "step": 14350 }, { "epoch": 0.05801621706791857, "grad_norm": 2393.28271484375, "learning_rate": 2.8720000000000003e-05, "loss": 155.3981, "step": 14360 }, { "epoch": 0.0580566183332862, "grad_norm": 5085.328125, "learning_rate": 2.874e-05, "loss": 126.6876, "step": 14370 }, { "epoch": 0.05809701959865383, "grad_norm": 1251.988525390625, "learning_rate": 2.8760000000000002e-05, "loss": 161.9475, "step": 14380 }, { "epoch": 0.05813742086402146, "grad_norm": 2122.52392578125, "learning_rate": 2.8780000000000002e-05, "loss": 177.3616, "step": 14390 }, { "epoch": 0.058177822129389094, "grad_norm": 886.3876342773438, "learning_rate": 2.88e-05, "loss": 102.448, "step": 14400 }, { "epoch": 0.05821822339475673, "grad_norm": 1496.39697265625, "learning_rate": 2.8820000000000002e-05, "loss": 177.4485, "step": 14410 }, { "epoch": 0.05825862466012435, "grad_norm": 1387.0819091796875, "learning_rate": 2.8840000000000002e-05, "loss": 134.0211, "step": 14420 }, { "epoch": 0.058299025925491986, "grad_norm": 571.4561157226562, "learning_rate": 2.8860000000000005e-05, "loss": 109.2302, "step": 14430 }, { "epoch": 0.05833942719085962, "grad_norm": 679.628173828125, "learning_rate": 2.888e-05, "loss": 124.9287, "step": 14440 }, { "epoch": 0.05837982845622725, "grad_norm": 1030.914306640625, "learning_rate": 2.8899999999999998e-05, "loss": 141.9723, "step": 14450 }, { "epoch": 0.05842022972159488, "grad_norm": 1213.34033203125, "learning_rate": 2.8920000000000004e-05, "loss": 133.645, "step": 14460 }, { "epoch": 0.05846063098696251, "grad_norm": 867.3865356445312, "learning_rate": 2.894e-05, "loss": 109.0037, "step": 14470 }, { "epoch": 0.058501032252330144, "grad_norm": 1019.241943359375, "learning_rate": 2.8960000000000004e-05, "loss": 179.6844, "step": 14480 }, { "epoch": 0.05854143351769778, "grad_norm": 3133.170166015625, "learning_rate": 2.898e-05, "loss": 141.274, "step": 14490 }, { "epoch": 0.058581834783065403, "grad_norm": 675.6682739257812, "learning_rate": 2.9e-05, "loss": 116.3539, "step": 14500 }, { "epoch": 0.05862223604843304, "grad_norm": 915.9588012695312, "learning_rate": 2.9020000000000003e-05, "loss": 95.1154, "step": 14510 }, { "epoch": 0.05866263731380067, "grad_norm": 1290.10546875, "learning_rate": 2.904e-05, "loss": 179.5605, "step": 14520 }, { "epoch": 0.0587030385791683, "grad_norm": 712.0923461914062, "learning_rate": 2.9060000000000003e-05, "loss": 146.6162, "step": 14530 }, { "epoch": 0.05874343984453593, "grad_norm": 893.6064453125, "learning_rate": 2.9080000000000003e-05, "loss": 104.5347, "step": 14540 }, { "epoch": 0.05878384110990356, "grad_norm": 1025.2301025390625, "learning_rate": 2.91e-05, "loss": 104.5477, "step": 14550 }, { "epoch": 0.058824242375271195, "grad_norm": 2338.9072265625, "learning_rate": 2.9120000000000002e-05, "loss": 142.9934, "step": 14560 }, { "epoch": 0.05886464364063883, "grad_norm": 633.3606567382812, "learning_rate": 2.9140000000000002e-05, "loss": 98.545, "step": 14570 }, { "epoch": 0.058905044906006454, "grad_norm": 1361.783935546875, "learning_rate": 2.9160000000000005e-05, "loss": 113.2945, "step": 14580 }, { "epoch": 0.05894544617137409, "grad_norm": 1828.3387451171875, "learning_rate": 2.9180000000000002e-05, "loss": 119.5862, "step": 14590 }, { "epoch": 0.05898584743674172, "grad_norm": 2555.387939453125, "learning_rate": 2.9199999999999998e-05, "loss": 177.2158, "step": 14600 }, { "epoch": 0.05902624870210935, "grad_norm": 2425.751220703125, "learning_rate": 2.922e-05, "loss": 118.8401, "step": 14610 }, { "epoch": 0.05906664996747698, "grad_norm": 758.7974243164062, "learning_rate": 2.924e-05, "loss": 136.7834, "step": 14620 }, { "epoch": 0.05910705123284461, "grad_norm": 2093.38720703125, "learning_rate": 2.9260000000000004e-05, "loss": 91.4101, "step": 14630 }, { "epoch": 0.059147452498212245, "grad_norm": 1140.317138671875, "learning_rate": 2.928e-05, "loss": 142.2483, "step": 14640 }, { "epoch": 0.05918785376357988, "grad_norm": 764.8568115234375, "learning_rate": 2.93e-05, "loss": 105.0373, "step": 14650 }, { "epoch": 0.059228255028947505, "grad_norm": 562.0804443359375, "learning_rate": 2.9320000000000004e-05, "loss": 140.6483, "step": 14660 }, { "epoch": 0.05926865629431514, "grad_norm": 1717.2381591796875, "learning_rate": 2.934e-05, "loss": 120.7989, "step": 14670 }, { "epoch": 0.05930905755968277, "grad_norm": 1295.689208984375, "learning_rate": 2.9360000000000003e-05, "loss": 145.4503, "step": 14680 }, { "epoch": 0.059349458825050404, "grad_norm": 1297.196533203125, "learning_rate": 2.9380000000000003e-05, "loss": 127.0184, "step": 14690 }, { "epoch": 0.05938986009041803, "grad_norm": 483.24420166015625, "learning_rate": 2.94e-05, "loss": 153.2117, "step": 14700 }, { "epoch": 0.05943026135578566, "grad_norm": 777.18408203125, "learning_rate": 2.9420000000000003e-05, "loss": 126.1312, "step": 14710 }, { "epoch": 0.059470662621153296, "grad_norm": 1387.6092529296875, "learning_rate": 2.944e-05, "loss": 158.3849, "step": 14720 }, { "epoch": 0.05951106388652093, "grad_norm": 3654.914306640625, "learning_rate": 2.946e-05, "loss": 177.4717, "step": 14730 }, { "epoch": 0.059551465151888555, "grad_norm": 2103.89599609375, "learning_rate": 2.9480000000000002e-05, "loss": 133.8275, "step": 14740 }, { "epoch": 0.05959186641725619, "grad_norm": 506.49200439453125, "learning_rate": 2.95e-05, "loss": 121.0761, "step": 14750 }, { "epoch": 0.05963226768262382, "grad_norm": 1055.931396484375, "learning_rate": 2.9520000000000002e-05, "loss": 143.0577, "step": 14760 }, { "epoch": 0.059672668947991454, "grad_norm": 745.9114379882812, "learning_rate": 2.9540000000000002e-05, "loss": 140.8658, "step": 14770 }, { "epoch": 0.05971307021335908, "grad_norm": 809.5819702148438, "learning_rate": 2.9559999999999998e-05, "loss": 104.4415, "step": 14780 }, { "epoch": 0.059753471478726713, "grad_norm": 1688.5380859375, "learning_rate": 2.958e-05, "loss": 152.457, "step": 14790 }, { "epoch": 0.05979387274409435, "grad_norm": 910.4462280273438, "learning_rate": 2.96e-05, "loss": 149.1814, "step": 14800 }, { "epoch": 0.05983427400946198, "grad_norm": 753.3027954101562, "learning_rate": 2.9620000000000004e-05, "loss": 142.1045, "step": 14810 }, { "epoch": 0.059874675274829606, "grad_norm": 545.16552734375, "learning_rate": 2.964e-05, "loss": 120.4504, "step": 14820 }, { "epoch": 0.05991507654019724, "grad_norm": 1327.6309814453125, "learning_rate": 2.9659999999999997e-05, "loss": 109.4865, "step": 14830 }, { "epoch": 0.05995547780556487, "grad_norm": 867.6209106445312, "learning_rate": 2.9680000000000004e-05, "loss": 128.9373, "step": 14840 }, { "epoch": 0.059995879070932505, "grad_norm": 552.6983032226562, "learning_rate": 2.97e-05, "loss": 148.3395, "step": 14850 }, { "epoch": 0.06003628033630013, "grad_norm": 1947.0716552734375, "learning_rate": 2.9720000000000003e-05, "loss": 115.2734, "step": 14860 }, { "epoch": 0.060076681601667764, "grad_norm": 793.457763671875, "learning_rate": 2.974e-05, "loss": 112.6054, "step": 14870 }, { "epoch": 0.0601170828670354, "grad_norm": 849.2798461914062, "learning_rate": 2.976e-05, "loss": 126.2524, "step": 14880 }, { "epoch": 0.06015748413240303, "grad_norm": 1418.7293701171875, "learning_rate": 2.9780000000000003e-05, "loss": 135.3141, "step": 14890 }, { "epoch": 0.060197885397770656, "grad_norm": 787.8895263671875, "learning_rate": 2.98e-05, "loss": 150.2098, "step": 14900 }, { "epoch": 0.06023828666313829, "grad_norm": 1818.129150390625, "learning_rate": 2.9820000000000002e-05, "loss": 142.2445, "step": 14910 }, { "epoch": 0.06027868792850592, "grad_norm": 910.6742553710938, "learning_rate": 2.9840000000000002e-05, "loss": 178.141, "step": 14920 }, { "epoch": 0.060319089193873555, "grad_norm": 2399.4091796875, "learning_rate": 2.986e-05, "loss": 175.9365, "step": 14930 }, { "epoch": 0.06035949045924118, "grad_norm": 1148.7041015625, "learning_rate": 2.9880000000000002e-05, "loss": 145.5652, "step": 14940 }, { "epoch": 0.060399891724608815, "grad_norm": 1392.08740234375, "learning_rate": 2.9900000000000002e-05, "loss": 139.3894, "step": 14950 }, { "epoch": 0.06044029298997645, "grad_norm": 878.8516235351562, "learning_rate": 2.9920000000000005e-05, "loss": 65.7636, "step": 14960 }, { "epoch": 0.06048069425534408, "grad_norm": 2386.329833984375, "learning_rate": 2.994e-05, "loss": 186.3342, "step": 14970 }, { "epoch": 0.06052109552071171, "grad_norm": 365.4548645019531, "learning_rate": 2.9959999999999998e-05, "loss": 117.0936, "step": 14980 }, { "epoch": 0.06056149678607934, "grad_norm": 557.1009521484375, "learning_rate": 2.998e-05, "loss": 98.8324, "step": 14990 }, { "epoch": 0.06060189805144697, "grad_norm": 1404.343017578125, "learning_rate": 3e-05, "loss": 129.4244, "step": 15000 }, { "epoch": 0.060642299316814606, "grad_norm": 1246.394775390625, "learning_rate": 3.0020000000000004e-05, "loss": 126.248, "step": 15010 }, { "epoch": 0.06068270058218223, "grad_norm": 1066.50048828125, "learning_rate": 3.004e-05, "loss": 124.5184, "step": 15020 }, { "epoch": 0.060723101847549865, "grad_norm": 705.3773803710938, "learning_rate": 3.006e-05, "loss": 133.4564, "step": 15030 }, { "epoch": 0.0607635031129175, "grad_norm": 874.49072265625, "learning_rate": 3.0080000000000003e-05, "loss": 111.9202, "step": 15040 }, { "epoch": 0.06080390437828513, "grad_norm": 725.5115356445312, "learning_rate": 3.01e-05, "loss": 116.5958, "step": 15050 }, { "epoch": 0.06084430564365276, "grad_norm": 1046.1822509765625, "learning_rate": 3.0120000000000003e-05, "loss": 108.4798, "step": 15060 }, { "epoch": 0.06088470690902039, "grad_norm": 1189.979248046875, "learning_rate": 3.0140000000000003e-05, "loss": 141.0663, "step": 15070 }, { "epoch": 0.060925108174388024, "grad_norm": 3112.551513671875, "learning_rate": 3.016e-05, "loss": 172.9219, "step": 15080 }, { "epoch": 0.06096550943975566, "grad_norm": 2147.462890625, "learning_rate": 3.0180000000000002e-05, "loss": 146.7627, "step": 15090 }, { "epoch": 0.06100591070512328, "grad_norm": 1260.1737060546875, "learning_rate": 3.02e-05, "loss": 151.8984, "step": 15100 }, { "epoch": 0.061046311970490916, "grad_norm": 1158.15771484375, "learning_rate": 3.0220000000000005e-05, "loss": 116.2214, "step": 15110 }, { "epoch": 0.06108671323585855, "grad_norm": 3242.16259765625, "learning_rate": 3.0240000000000002e-05, "loss": 153.6299, "step": 15120 }, { "epoch": 0.06112711450122618, "grad_norm": 837.7076416015625, "learning_rate": 3.0259999999999998e-05, "loss": 84.5595, "step": 15130 }, { "epoch": 0.06116751576659381, "grad_norm": 2217.368408203125, "learning_rate": 3.028e-05, "loss": 154.1678, "step": 15140 }, { "epoch": 0.06120791703196144, "grad_norm": 753.1808471679688, "learning_rate": 3.03e-05, "loss": 130.1928, "step": 15150 }, { "epoch": 0.061248318297329074, "grad_norm": 1219.47607421875, "learning_rate": 3.0320000000000004e-05, "loss": 169.753, "step": 15160 }, { "epoch": 0.06128871956269671, "grad_norm": 6474.40478515625, "learning_rate": 3.034e-05, "loss": 149.4557, "step": 15170 }, { "epoch": 0.06132912082806433, "grad_norm": 5858.99658203125, "learning_rate": 3.036e-05, "loss": 191.529, "step": 15180 }, { "epoch": 0.061369522093431966, "grad_norm": 1600.056396484375, "learning_rate": 3.0380000000000004e-05, "loss": 130.8423, "step": 15190 }, { "epoch": 0.0614099233587996, "grad_norm": 814.158935546875, "learning_rate": 3.04e-05, "loss": 148.7461, "step": 15200 }, { "epoch": 0.061450324624167225, "grad_norm": 844.9783935546875, "learning_rate": 3.0420000000000004e-05, "loss": 148.264, "step": 15210 }, { "epoch": 0.06149072588953486, "grad_norm": 1373.4136962890625, "learning_rate": 3.0440000000000003e-05, "loss": 172.1749, "step": 15220 }, { "epoch": 0.06153112715490249, "grad_norm": 897.79248046875, "learning_rate": 3.046e-05, "loss": 113.2348, "step": 15230 }, { "epoch": 0.061571528420270125, "grad_norm": 1343.7520751953125, "learning_rate": 3.0480000000000003e-05, "loss": 98.1409, "step": 15240 }, { "epoch": 0.06161192968563775, "grad_norm": 828.697998046875, "learning_rate": 3.05e-05, "loss": 162.0212, "step": 15250 }, { "epoch": 0.061652330951005384, "grad_norm": 366.743896484375, "learning_rate": 3.0520000000000006e-05, "loss": 75.6308, "step": 15260 }, { "epoch": 0.06169273221637302, "grad_norm": 2019.9920654296875, "learning_rate": 3.054e-05, "loss": 237.3679, "step": 15270 }, { "epoch": 0.06173313348174065, "grad_norm": 1361.7977294921875, "learning_rate": 3.056e-05, "loss": 108.2089, "step": 15280 }, { "epoch": 0.061773534747108276, "grad_norm": 1101.6763916015625, "learning_rate": 3.058e-05, "loss": 90.1339, "step": 15290 }, { "epoch": 0.06181393601247591, "grad_norm": 1785.1817626953125, "learning_rate": 3.06e-05, "loss": 142.6872, "step": 15300 }, { "epoch": 0.06185433727784354, "grad_norm": 1439.0386962890625, "learning_rate": 3.062e-05, "loss": 135.7678, "step": 15310 }, { "epoch": 0.061894738543211175, "grad_norm": 1499.1324462890625, "learning_rate": 3.0640000000000005e-05, "loss": 123.4259, "step": 15320 }, { "epoch": 0.0619351398085788, "grad_norm": 1074.73681640625, "learning_rate": 3.066e-05, "loss": 98.1504, "step": 15330 }, { "epoch": 0.061975541073946434, "grad_norm": 1967.539306640625, "learning_rate": 3.0680000000000004e-05, "loss": 151.5911, "step": 15340 }, { "epoch": 0.06201594233931407, "grad_norm": 485.7192077636719, "learning_rate": 3.07e-05, "loss": 137.3458, "step": 15350 }, { "epoch": 0.0620563436046817, "grad_norm": 753.7666015625, "learning_rate": 3.072e-05, "loss": 103.7652, "step": 15360 }, { "epoch": 0.06209674487004933, "grad_norm": 1353.95556640625, "learning_rate": 3.074e-05, "loss": 117.3775, "step": 15370 }, { "epoch": 0.06213714613541696, "grad_norm": 438.0009765625, "learning_rate": 3.076e-05, "loss": 115.8443, "step": 15380 }, { "epoch": 0.06217754740078459, "grad_norm": 1873.119140625, "learning_rate": 3.078e-05, "loss": 134.3721, "step": 15390 }, { "epoch": 0.062217948666152226, "grad_norm": 1559.0321044921875, "learning_rate": 3.08e-05, "loss": 146.9015, "step": 15400 }, { "epoch": 0.06225834993151985, "grad_norm": 953.1947021484375, "learning_rate": 3.082e-05, "loss": 146.0465, "step": 15410 }, { "epoch": 0.062298751196887485, "grad_norm": 1465.51806640625, "learning_rate": 3.084e-05, "loss": 127.4744, "step": 15420 }, { "epoch": 0.06233915246225512, "grad_norm": 871.5877685546875, "learning_rate": 3.086e-05, "loss": 110.5899, "step": 15430 }, { "epoch": 0.06237955372762275, "grad_norm": 2501.28564453125, "learning_rate": 3.088e-05, "loss": 202.4409, "step": 15440 }, { "epoch": 0.06241995499299038, "grad_norm": 1591.2879638671875, "learning_rate": 3.09e-05, "loss": 129.12, "step": 15450 }, { "epoch": 0.06246035625835801, "grad_norm": 838.3685302734375, "learning_rate": 3.092e-05, "loss": 102.4406, "step": 15460 }, { "epoch": 0.06250075752372564, "grad_norm": 1001.7224731445312, "learning_rate": 3.0940000000000005e-05, "loss": 108.1954, "step": 15470 }, { "epoch": 0.06254115878909328, "grad_norm": 1513.2293701171875, "learning_rate": 3.096e-05, "loss": 103.5236, "step": 15480 }, { "epoch": 0.0625815600544609, "grad_norm": 1613.0614013671875, "learning_rate": 3.0980000000000005e-05, "loss": 159.1191, "step": 15490 }, { "epoch": 0.06262196131982854, "grad_norm": 1024.2308349609375, "learning_rate": 3.1e-05, "loss": 131.2479, "step": 15500 }, { "epoch": 0.06266236258519617, "grad_norm": 2692.8251953125, "learning_rate": 3.102e-05, "loss": 125.2973, "step": 15510 }, { "epoch": 0.0627027638505638, "grad_norm": 1083.9881591796875, "learning_rate": 3.104e-05, "loss": 121.1962, "step": 15520 }, { "epoch": 0.06274316511593143, "grad_norm": 1520.9947509765625, "learning_rate": 3.106e-05, "loss": 127.8408, "step": 15530 }, { "epoch": 0.06278356638129906, "grad_norm": 871.9234008789062, "learning_rate": 3.108e-05, "loss": 107.3415, "step": 15540 }, { "epoch": 0.06282396764666669, "grad_norm": 1313.199462890625, "learning_rate": 3.1100000000000004e-05, "loss": 124.2825, "step": 15550 }, { "epoch": 0.06286436891203433, "grad_norm": 997.6826782226562, "learning_rate": 3.112e-05, "loss": 116.8165, "step": 15560 }, { "epoch": 0.06290477017740195, "grad_norm": 713.2454833984375, "learning_rate": 3.1140000000000003e-05, "loss": 142.1327, "step": 15570 }, { "epoch": 0.06294517144276959, "grad_norm": 841.623779296875, "learning_rate": 3.116e-05, "loss": 136.2796, "step": 15580 }, { "epoch": 0.06298557270813722, "grad_norm": 1948.3192138671875, "learning_rate": 3.118e-05, "loss": 192.9212, "step": 15590 }, { "epoch": 0.06302597397350485, "grad_norm": 1726.7506103515625, "learning_rate": 3.12e-05, "loss": 172.2863, "step": 15600 }, { "epoch": 0.06306637523887249, "grad_norm": 1170.4307861328125, "learning_rate": 3.122e-05, "loss": 130.6975, "step": 15610 }, { "epoch": 0.06310677650424011, "grad_norm": 1774.3511962890625, "learning_rate": 3.1240000000000006e-05, "loss": 137.2435, "step": 15620 }, { "epoch": 0.06314717776960774, "grad_norm": 4738.48193359375, "learning_rate": 3.126e-05, "loss": 140.0143, "step": 15630 }, { "epoch": 0.06318757903497538, "grad_norm": 793.2022094726562, "learning_rate": 3.1280000000000005e-05, "loss": 163.8472, "step": 15640 }, { "epoch": 0.063227980300343, "grad_norm": 0.0, "learning_rate": 3.13e-05, "loss": 113.3165, "step": 15650 }, { "epoch": 0.06326838156571064, "grad_norm": 1195.0482177734375, "learning_rate": 3.132e-05, "loss": 151.147, "step": 15660 }, { "epoch": 0.06330878283107827, "grad_norm": 1180.14453125, "learning_rate": 3.134e-05, "loss": 100.6629, "step": 15670 }, { "epoch": 0.0633491840964459, "grad_norm": 926.8880615234375, "learning_rate": 3.136e-05, "loss": 161.5127, "step": 15680 }, { "epoch": 0.06338958536181354, "grad_norm": 1517.244873046875, "learning_rate": 3.138e-05, "loss": 164.7377, "step": 15690 }, { "epoch": 0.06342998662718116, "grad_norm": 942.4938354492188, "learning_rate": 3.1400000000000004e-05, "loss": 127.0913, "step": 15700 }, { "epoch": 0.06347038789254879, "grad_norm": 519.3255004882812, "learning_rate": 3.142e-05, "loss": 110.9436, "step": 15710 }, { "epoch": 0.06351078915791643, "grad_norm": 931.0354614257812, "learning_rate": 3.1440000000000004e-05, "loss": 155.0716, "step": 15720 }, { "epoch": 0.06355119042328405, "grad_norm": 2942.558837890625, "learning_rate": 3.146e-05, "loss": 106.2107, "step": 15730 }, { "epoch": 0.0635915916886517, "grad_norm": 1383.4794921875, "learning_rate": 3.1480000000000004e-05, "loss": 146.2436, "step": 15740 }, { "epoch": 0.06363199295401932, "grad_norm": 3386.990966796875, "learning_rate": 3.15e-05, "loss": 148.2886, "step": 15750 }, { "epoch": 0.06367239421938695, "grad_norm": 1014.3907470703125, "learning_rate": 3.1519999999999996e-05, "loss": 120.5053, "step": 15760 }, { "epoch": 0.06371279548475459, "grad_norm": 762.3274536132812, "learning_rate": 3.154e-05, "loss": 139.9023, "step": 15770 }, { "epoch": 0.06375319675012221, "grad_norm": 1421.413330078125, "learning_rate": 3.156e-05, "loss": 146.9265, "step": 15780 }, { "epoch": 0.06379359801548984, "grad_norm": 1463.6063232421875, "learning_rate": 3.1580000000000006e-05, "loss": 134.0383, "step": 15790 }, { "epoch": 0.06383399928085748, "grad_norm": 1137.2342529296875, "learning_rate": 3.16e-05, "loss": 161.4233, "step": 15800 }, { "epoch": 0.0638744005462251, "grad_norm": 1729.1817626953125, "learning_rate": 3.162e-05, "loss": 109.8035, "step": 15810 }, { "epoch": 0.06391480181159274, "grad_norm": 757.51025390625, "learning_rate": 3.164e-05, "loss": 83.0763, "step": 15820 }, { "epoch": 0.06395520307696037, "grad_norm": 5324.74169921875, "learning_rate": 3.166e-05, "loss": 132.4072, "step": 15830 }, { "epoch": 0.063995604342328, "grad_norm": 1629.523681640625, "learning_rate": 3.168e-05, "loss": 84.4823, "step": 15840 }, { "epoch": 0.06403600560769564, "grad_norm": 1089.279296875, "learning_rate": 3.1700000000000005e-05, "loss": 130.4876, "step": 15850 }, { "epoch": 0.06407640687306326, "grad_norm": 1170.0994873046875, "learning_rate": 3.172e-05, "loss": 144.4939, "step": 15860 }, { "epoch": 0.06411680813843089, "grad_norm": 1094.1689453125, "learning_rate": 3.1740000000000004e-05, "loss": 124.1748, "step": 15870 }, { "epoch": 0.06415720940379853, "grad_norm": 742.95703125, "learning_rate": 3.176e-05, "loss": 85.3714, "step": 15880 }, { "epoch": 0.06419761066916616, "grad_norm": 1239.1507568359375, "learning_rate": 3.1780000000000004e-05, "loss": 215.5378, "step": 15890 }, { "epoch": 0.0642380119345338, "grad_norm": 734.71484375, "learning_rate": 3.18e-05, "loss": 109.2592, "step": 15900 }, { "epoch": 0.06427841319990142, "grad_norm": 1894.3482666015625, "learning_rate": 3.182e-05, "loss": 115.1967, "step": 15910 }, { "epoch": 0.06431881446526905, "grad_norm": 626.6444091796875, "learning_rate": 3.184e-05, "loss": 85.9778, "step": 15920 }, { "epoch": 0.06435921573063669, "grad_norm": 1511.507568359375, "learning_rate": 3.186e-05, "loss": 182.5304, "step": 15930 }, { "epoch": 0.06439961699600431, "grad_norm": 693.2582397460938, "learning_rate": 3.188e-05, "loss": 90.9457, "step": 15940 }, { "epoch": 0.06444001826137194, "grad_norm": 683.79150390625, "learning_rate": 3.19e-05, "loss": 98.913, "step": 15950 }, { "epoch": 0.06448041952673958, "grad_norm": 1719.1845703125, "learning_rate": 3.192e-05, "loss": 129.3339, "step": 15960 }, { "epoch": 0.0645208207921072, "grad_norm": 1166.0206298828125, "learning_rate": 3.194e-05, "loss": 153.576, "step": 15970 }, { "epoch": 0.06456122205747485, "grad_norm": 1580.4215087890625, "learning_rate": 3.196e-05, "loss": 170.2588, "step": 15980 }, { "epoch": 0.06460162332284247, "grad_norm": 984.2314453125, "learning_rate": 3.198e-05, "loss": 156.6156, "step": 15990 }, { "epoch": 0.0646420245882101, "grad_norm": 1595.7958984375, "learning_rate": 3.2000000000000005e-05, "loss": 174.3226, "step": 16000 }, { "epoch": 0.06468242585357774, "grad_norm": 12306.052734375, "learning_rate": 3.202e-05, "loss": 203.855, "step": 16010 }, { "epoch": 0.06472282711894536, "grad_norm": 2370.59765625, "learning_rate": 3.2040000000000005e-05, "loss": 144.5134, "step": 16020 }, { "epoch": 0.06476322838431299, "grad_norm": 1784.508056640625, "learning_rate": 3.206e-05, "loss": 159.992, "step": 16030 }, { "epoch": 0.06480362964968063, "grad_norm": 894.4352416992188, "learning_rate": 3.208e-05, "loss": 162.522, "step": 16040 }, { "epoch": 0.06484403091504826, "grad_norm": 1397.84814453125, "learning_rate": 3.21e-05, "loss": 113.1159, "step": 16050 }, { "epoch": 0.0648844321804159, "grad_norm": 2406.892578125, "learning_rate": 3.212e-05, "loss": 104.4846, "step": 16060 }, { "epoch": 0.06492483344578352, "grad_norm": 1434.818115234375, "learning_rate": 3.214e-05, "loss": 142.6461, "step": 16070 }, { "epoch": 0.06496523471115115, "grad_norm": 3077.646240234375, "learning_rate": 3.2160000000000004e-05, "loss": 101.5211, "step": 16080 }, { "epoch": 0.06500563597651879, "grad_norm": 1358.7418212890625, "learning_rate": 3.218e-05, "loss": 129.6278, "step": 16090 }, { "epoch": 0.06504603724188641, "grad_norm": 11825.310546875, "learning_rate": 3.2200000000000003e-05, "loss": 145.8424, "step": 16100 }, { "epoch": 0.06508643850725404, "grad_norm": 1621.6112060546875, "learning_rate": 3.222e-05, "loss": 117.4145, "step": 16110 }, { "epoch": 0.06512683977262168, "grad_norm": 1896.356689453125, "learning_rate": 3.224e-05, "loss": 103.7541, "step": 16120 }, { "epoch": 0.0651672410379893, "grad_norm": 8822.8671875, "learning_rate": 3.226e-05, "loss": 174.6907, "step": 16130 }, { "epoch": 0.06520764230335695, "grad_norm": 1795.896728515625, "learning_rate": 3.2279999999999996e-05, "loss": 187.9359, "step": 16140 }, { "epoch": 0.06524804356872457, "grad_norm": 870.3344116210938, "learning_rate": 3.2300000000000006e-05, "loss": 113.2782, "step": 16150 }, { "epoch": 0.0652884448340922, "grad_norm": 1710.718505859375, "learning_rate": 3.232e-05, "loss": 114.0318, "step": 16160 }, { "epoch": 0.06532884609945984, "grad_norm": 879.309326171875, "learning_rate": 3.2340000000000005e-05, "loss": 159.0129, "step": 16170 }, { "epoch": 0.06536924736482747, "grad_norm": 1179.8787841796875, "learning_rate": 3.236e-05, "loss": 108.0792, "step": 16180 }, { "epoch": 0.06540964863019509, "grad_norm": 1053.493896484375, "learning_rate": 3.238e-05, "loss": 162.0999, "step": 16190 }, { "epoch": 0.06545004989556273, "grad_norm": 1169.240478515625, "learning_rate": 3.24e-05, "loss": 117.8543, "step": 16200 }, { "epoch": 0.06549045116093036, "grad_norm": 1077.4576416015625, "learning_rate": 3.242e-05, "loss": 161.1472, "step": 16210 }, { "epoch": 0.065530852426298, "grad_norm": 827.2643432617188, "learning_rate": 3.244e-05, "loss": 122.5282, "step": 16220 }, { "epoch": 0.06557125369166562, "grad_norm": 1683.487060546875, "learning_rate": 3.2460000000000004e-05, "loss": 163.201, "step": 16230 }, { "epoch": 0.06561165495703325, "grad_norm": 1506.306884765625, "learning_rate": 3.248e-05, "loss": 123.3369, "step": 16240 }, { "epoch": 0.06565205622240089, "grad_norm": 410.9195861816406, "learning_rate": 3.2500000000000004e-05, "loss": 102.7201, "step": 16250 }, { "epoch": 0.06569245748776852, "grad_norm": 484.0295104980469, "learning_rate": 3.252e-05, "loss": 191.6438, "step": 16260 }, { "epoch": 0.06573285875313614, "grad_norm": 649.029541015625, "learning_rate": 3.2540000000000004e-05, "loss": 139.1572, "step": 16270 }, { "epoch": 0.06577326001850378, "grad_norm": 2727.947509765625, "learning_rate": 3.256e-05, "loss": 198.9751, "step": 16280 }, { "epoch": 0.06581366128387141, "grad_norm": 879.7364501953125, "learning_rate": 3.2579999999999996e-05, "loss": 119.9532, "step": 16290 }, { "epoch": 0.06585406254923905, "grad_norm": 2567.528564453125, "learning_rate": 3.26e-05, "loss": 182.2825, "step": 16300 }, { "epoch": 0.06589446381460667, "grad_norm": 1746.7139892578125, "learning_rate": 3.262e-05, "loss": 109.7434, "step": 16310 }, { "epoch": 0.0659348650799743, "grad_norm": 1511.91845703125, "learning_rate": 3.2640000000000006e-05, "loss": 117.7299, "step": 16320 }, { "epoch": 0.06597526634534194, "grad_norm": 626.0679321289062, "learning_rate": 3.266e-05, "loss": 154.5593, "step": 16330 }, { "epoch": 0.06601566761070957, "grad_norm": 1950.873779296875, "learning_rate": 3.268e-05, "loss": 134.659, "step": 16340 }, { "epoch": 0.06605606887607719, "grad_norm": 2815.42919921875, "learning_rate": 3.27e-05, "loss": 189.5417, "step": 16350 }, { "epoch": 0.06609647014144483, "grad_norm": 791.9197387695312, "learning_rate": 3.272e-05, "loss": 104.6571, "step": 16360 }, { "epoch": 0.06613687140681246, "grad_norm": 961.1111450195312, "learning_rate": 3.274e-05, "loss": 110.3847, "step": 16370 }, { "epoch": 0.0661772726721801, "grad_norm": 1224.700927734375, "learning_rate": 3.2760000000000005e-05, "loss": 129.7902, "step": 16380 }, { "epoch": 0.06621767393754772, "grad_norm": 892.2734985351562, "learning_rate": 3.278e-05, "loss": 131.0276, "step": 16390 }, { "epoch": 0.06625807520291535, "grad_norm": 2211.9521484375, "learning_rate": 3.2800000000000004e-05, "loss": 102.1123, "step": 16400 }, { "epoch": 0.06629847646828299, "grad_norm": 2223.035888671875, "learning_rate": 3.282e-05, "loss": 130.2351, "step": 16410 }, { "epoch": 0.06633887773365062, "grad_norm": 896.9553833007812, "learning_rate": 3.2840000000000004e-05, "loss": 113.7288, "step": 16420 }, { "epoch": 0.06637927899901824, "grad_norm": 0.0, "learning_rate": 3.286e-05, "loss": 70.063, "step": 16430 }, { "epoch": 0.06641968026438588, "grad_norm": 665.3837890625, "learning_rate": 3.288e-05, "loss": 102.8864, "step": 16440 }, { "epoch": 0.06646008152975351, "grad_norm": 529.5450439453125, "learning_rate": 3.29e-05, "loss": 107.0171, "step": 16450 }, { "epoch": 0.06650048279512115, "grad_norm": 445.30950927734375, "learning_rate": 3.292e-05, "loss": 118.4387, "step": 16460 }, { "epoch": 0.06654088406048878, "grad_norm": 1200.6650390625, "learning_rate": 3.2940000000000006e-05, "loss": 115.6011, "step": 16470 }, { "epoch": 0.0665812853258564, "grad_norm": 997.8995971679688, "learning_rate": 3.296e-05, "loss": 101.1121, "step": 16480 }, { "epoch": 0.06662168659122404, "grad_norm": 1181.9188232421875, "learning_rate": 3.298e-05, "loss": 94.8608, "step": 16490 }, { "epoch": 0.06666208785659167, "grad_norm": 816.2405395507812, "learning_rate": 3.3e-05, "loss": 125.1284, "step": 16500 }, { "epoch": 0.0667024891219593, "grad_norm": 703.4663696289062, "learning_rate": 3.302e-05, "loss": 123.1175, "step": 16510 }, { "epoch": 0.06674289038732693, "grad_norm": 671.1204833984375, "learning_rate": 3.304e-05, "loss": 133.0177, "step": 16520 }, { "epoch": 0.06678329165269456, "grad_norm": 635.96728515625, "learning_rate": 3.3060000000000005e-05, "loss": 117.4494, "step": 16530 }, { "epoch": 0.0668236929180622, "grad_norm": 0.0, "learning_rate": 3.308e-05, "loss": 84.0806, "step": 16540 }, { "epoch": 0.06686409418342983, "grad_norm": 1110.686279296875, "learning_rate": 3.3100000000000005e-05, "loss": 149.0371, "step": 16550 }, { "epoch": 0.06690449544879745, "grad_norm": 1048.002197265625, "learning_rate": 3.312e-05, "loss": 118.8765, "step": 16560 }, { "epoch": 0.06694489671416509, "grad_norm": 532.6653442382812, "learning_rate": 3.314e-05, "loss": 115.488, "step": 16570 }, { "epoch": 0.06698529797953272, "grad_norm": 1357.1689453125, "learning_rate": 3.316e-05, "loss": 121.7834, "step": 16580 }, { "epoch": 0.06702569924490034, "grad_norm": 3220.8486328125, "learning_rate": 3.318e-05, "loss": 146.6197, "step": 16590 }, { "epoch": 0.06706610051026798, "grad_norm": 809.4137573242188, "learning_rate": 3.32e-05, "loss": 157.1161, "step": 16600 }, { "epoch": 0.06710650177563561, "grad_norm": 479.6476135253906, "learning_rate": 3.3220000000000004e-05, "loss": 123.665, "step": 16610 }, { "epoch": 0.06714690304100325, "grad_norm": 1147.013916015625, "learning_rate": 3.324e-05, "loss": 151.3696, "step": 16620 }, { "epoch": 0.06718730430637088, "grad_norm": 1325.5657958984375, "learning_rate": 3.3260000000000003e-05, "loss": 228.0247, "step": 16630 }, { "epoch": 0.0672277055717385, "grad_norm": 1421.17578125, "learning_rate": 3.328e-05, "loss": 174.1318, "step": 16640 }, { "epoch": 0.06726810683710614, "grad_norm": 872.6542358398438, "learning_rate": 3.33e-05, "loss": 118.2037, "step": 16650 }, { "epoch": 0.06730850810247377, "grad_norm": 936.040283203125, "learning_rate": 3.332e-05, "loss": 113.6235, "step": 16660 }, { "epoch": 0.0673489093678414, "grad_norm": 829.9392700195312, "learning_rate": 3.3339999999999996e-05, "loss": 124.7012, "step": 16670 }, { "epoch": 0.06738931063320903, "grad_norm": 829.8602294921875, "learning_rate": 3.336e-05, "loss": 122.1225, "step": 16680 }, { "epoch": 0.06742971189857666, "grad_norm": 591.7499389648438, "learning_rate": 3.338e-05, "loss": 117.5648, "step": 16690 }, { "epoch": 0.0674701131639443, "grad_norm": 585.739013671875, "learning_rate": 3.3400000000000005e-05, "loss": 120.4557, "step": 16700 }, { "epoch": 0.06751051442931193, "grad_norm": 1151.9459228515625, "learning_rate": 3.342e-05, "loss": 122.6646, "step": 16710 }, { "epoch": 0.06755091569467955, "grad_norm": 1100.1302490234375, "learning_rate": 3.344e-05, "loss": 124.8112, "step": 16720 }, { "epoch": 0.06759131696004719, "grad_norm": 1049.39501953125, "learning_rate": 3.346e-05, "loss": 131.1703, "step": 16730 }, { "epoch": 0.06763171822541482, "grad_norm": 976.2811889648438, "learning_rate": 3.348e-05, "loss": 104.992, "step": 16740 }, { "epoch": 0.06767211949078245, "grad_norm": 1059.683837890625, "learning_rate": 3.35e-05, "loss": 109.1284, "step": 16750 }, { "epoch": 0.06771252075615009, "grad_norm": 1005.8837280273438, "learning_rate": 3.3520000000000004e-05, "loss": 107.3349, "step": 16760 }, { "epoch": 0.06775292202151771, "grad_norm": 850.0352783203125, "learning_rate": 3.354e-05, "loss": 126.588, "step": 16770 }, { "epoch": 0.06779332328688535, "grad_norm": 1083.05712890625, "learning_rate": 3.3560000000000004e-05, "loss": 139.6427, "step": 16780 }, { "epoch": 0.06783372455225298, "grad_norm": 1554.07080078125, "learning_rate": 3.358e-05, "loss": 145.7689, "step": 16790 }, { "epoch": 0.0678741258176206, "grad_norm": 3450.517333984375, "learning_rate": 3.3600000000000004e-05, "loss": 140.9184, "step": 16800 }, { "epoch": 0.06791452708298824, "grad_norm": 1706.325439453125, "learning_rate": 3.362e-05, "loss": 199.2792, "step": 16810 }, { "epoch": 0.06795492834835587, "grad_norm": 1185.1922607421875, "learning_rate": 3.3639999999999996e-05, "loss": 140.3882, "step": 16820 }, { "epoch": 0.0679953296137235, "grad_norm": 1169.992919921875, "learning_rate": 3.366e-05, "loss": 123.5538, "step": 16830 }, { "epoch": 0.06803573087909114, "grad_norm": 1750.4180908203125, "learning_rate": 3.368e-05, "loss": 159.0448, "step": 16840 }, { "epoch": 0.06807613214445876, "grad_norm": 1771.9908447265625, "learning_rate": 3.3700000000000006e-05, "loss": 153.4333, "step": 16850 }, { "epoch": 0.0681165334098264, "grad_norm": 2022.061279296875, "learning_rate": 3.372e-05, "loss": 139.1972, "step": 16860 }, { "epoch": 0.06815693467519403, "grad_norm": 1393.0201416015625, "learning_rate": 3.374e-05, "loss": 152.1366, "step": 16870 }, { "epoch": 0.06819733594056165, "grad_norm": 2371.81494140625, "learning_rate": 3.376e-05, "loss": 142.447, "step": 16880 }, { "epoch": 0.0682377372059293, "grad_norm": 1049.164794921875, "learning_rate": 3.378e-05, "loss": 115.6586, "step": 16890 }, { "epoch": 0.06827813847129692, "grad_norm": 1043.5615234375, "learning_rate": 3.38e-05, "loss": 174.1537, "step": 16900 }, { "epoch": 0.06831853973666455, "grad_norm": 1559.866455078125, "learning_rate": 3.3820000000000005e-05, "loss": 108.5037, "step": 16910 }, { "epoch": 0.06835894100203219, "grad_norm": 2484.740478515625, "learning_rate": 3.384e-05, "loss": 184.837, "step": 16920 }, { "epoch": 0.06839934226739981, "grad_norm": 775.7838745117188, "learning_rate": 3.3860000000000004e-05, "loss": 126.9209, "step": 16930 }, { "epoch": 0.06843974353276745, "grad_norm": 471.7488708496094, "learning_rate": 3.388e-05, "loss": 135.7176, "step": 16940 }, { "epoch": 0.06848014479813508, "grad_norm": 969.9213256835938, "learning_rate": 3.3900000000000004e-05, "loss": 95.2846, "step": 16950 }, { "epoch": 0.0685205460635027, "grad_norm": 1296.1826171875, "learning_rate": 3.392e-05, "loss": 159.3641, "step": 16960 }, { "epoch": 0.06856094732887034, "grad_norm": 2220.822021484375, "learning_rate": 3.394e-05, "loss": 171.076, "step": 16970 }, { "epoch": 0.06860134859423797, "grad_norm": 764.9531860351562, "learning_rate": 3.396e-05, "loss": 139.6084, "step": 16980 }, { "epoch": 0.0686417498596056, "grad_norm": 2453.2900390625, "learning_rate": 3.398e-05, "loss": 162.632, "step": 16990 }, { "epoch": 0.06868215112497324, "grad_norm": 1016.1632690429688, "learning_rate": 3.4000000000000007e-05, "loss": 236.2789, "step": 17000 }, { "epoch": 0.06872255239034086, "grad_norm": 1268.531494140625, "learning_rate": 3.402e-05, "loss": 119.6548, "step": 17010 }, { "epoch": 0.0687629536557085, "grad_norm": 0.0, "learning_rate": 3.404e-05, "loss": 115.3437, "step": 17020 }, { "epoch": 0.06880335492107613, "grad_norm": 867.4444580078125, "learning_rate": 3.406e-05, "loss": 173.6102, "step": 17030 }, { "epoch": 0.06884375618644376, "grad_norm": 1306.5975341796875, "learning_rate": 3.408e-05, "loss": 147.4384, "step": 17040 }, { "epoch": 0.0688841574518114, "grad_norm": 701.0490112304688, "learning_rate": 3.41e-05, "loss": 138.4343, "step": 17050 }, { "epoch": 0.06892455871717902, "grad_norm": 1092.9249267578125, "learning_rate": 3.412e-05, "loss": 112.9486, "step": 17060 }, { "epoch": 0.06896495998254665, "grad_norm": 1403.239990234375, "learning_rate": 3.414e-05, "loss": 106.8123, "step": 17070 }, { "epoch": 0.06900536124791429, "grad_norm": 2368.7109375, "learning_rate": 3.4160000000000005e-05, "loss": 90.9559, "step": 17080 }, { "epoch": 0.06904576251328191, "grad_norm": 723.0379028320312, "learning_rate": 3.418e-05, "loss": 92.075, "step": 17090 }, { "epoch": 0.06908616377864955, "grad_norm": 1454.345703125, "learning_rate": 3.4200000000000005e-05, "loss": 122.138, "step": 17100 }, { "epoch": 0.06912656504401718, "grad_norm": 849.4160766601562, "learning_rate": 3.422e-05, "loss": 132.3787, "step": 17110 }, { "epoch": 0.0691669663093848, "grad_norm": 1275.969482421875, "learning_rate": 3.424e-05, "loss": 80.5995, "step": 17120 }, { "epoch": 0.06920736757475245, "grad_norm": 977.9779052734375, "learning_rate": 3.426e-05, "loss": 126.0984, "step": 17130 }, { "epoch": 0.06924776884012007, "grad_norm": 1691.71826171875, "learning_rate": 3.4280000000000004e-05, "loss": 123.841, "step": 17140 }, { "epoch": 0.0692881701054877, "grad_norm": 3872.140869140625, "learning_rate": 3.430000000000001e-05, "loss": 148.4945, "step": 17150 }, { "epoch": 0.06932857137085534, "grad_norm": 3100.976318359375, "learning_rate": 3.4320000000000003e-05, "loss": 117.4309, "step": 17160 }, { "epoch": 0.06936897263622296, "grad_norm": 1637.591552734375, "learning_rate": 3.434e-05, "loss": 119.0649, "step": 17170 }, { "epoch": 0.0694093739015906, "grad_norm": 8878.0693359375, "learning_rate": 3.436e-05, "loss": 140.0768, "step": 17180 }, { "epoch": 0.06944977516695823, "grad_norm": 959.3743286132812, "learning_rate": 3.438e-05, "loss": 172.569, "step": 17190 }, { "epoch": 0.06949017643232586, "grad_norm": 520.9686889648438, "learning_rate": 3.4399999999999996e-05, "loss": 93.1335, "step": 17200 }, { "epoch": 0.0695305776976935, "grad_norm": 1169.4296875, "learning_rate": 3.442e-05, "loss": 109.4803, "step": 17210 }, { "epoch": 0.06957097896306112, "grad_norm": 2471.82568359375, "learning_rate": 3.444e-05, "loss": 149.8675, "step": 17220 }, { "epoch": 0.06961138022842875, "grad_norm": 1193.904541015625, "learning_rate": 3.4460000000000005e-05, "loss": 137.5065, "step": 17230 }, { "epoch": 0.06965178149379639, "grad_norm": 796.9132080078125, "learning_rate": 3.448e-05, "loss": 89.8475, "step": 17240 }, { "epoch": 0.06969218275916401, "grad_norm": 688.8414916992188, "learning_rate": 3.45e-05, "loss": 125.9576, "step": 17250 }, { "epoch": 0.06973258402453165, "grad_norm": 480.16009521484375, "learning_rate": 3.452e-05, "loss": 78.7464, "step": 17260 }, { "epoch": 0.06977298528989928, "grad_norm": 783.6867065429688, "learning_rate": 3.454e-05, "loss": 108.8815, "step": 17270 }, { "epoch": 0.06981338655526691, "grad_norm": 0.0, "learning_rate": 3.456e-05, "loss": 116.7318, "step": 17280 }, { "epoch": 0.06985378782063455, "grad_norm": 972.9898681640625, "learning_rate": 3.4580000000000004e-05, "loss": 163.7325, "step": 17290 }, { "epoch": 0.06989418908600217, "grad_norm": 2305.32470703125, "learning_rate": 3.46e-05, "loss": 102.6796, "step": 17300 }, { "epoch": 0.0699345903513698, "grad_norm": 985.5175170898438, "learning_rate": 3.4620000000000004e-05, "loss": 133.6453, "step": 17310 }, { "epoch": 0.06997499161673744, "grad_norm": 966.200439453125, "learning_rate": 3.464e-05, "loss": 133.9981, "step": 17320 }, { "epoch": 0.07001539288210507, "grad_norm": 582.17041015625, "learning_rate": 3.4660000000000004e-05, "loss": 119.5202, "step": 17330 }, { "epoch": 0.0700557941474727, "grad_norm": 841.1046752929688, "learning_rate": 3.468e-05, "loss": 133.1583, "step": 17340 }, { "epoch": 0.07009619541284033, "grad_norm": 675.8413696289062, "learning_rate": 3.4699999999999996e-05, "loss": 139.8855, "step": 17350 }, { "epoch": 0.07013659667820796, "grad_norm": 957.8898315429688, "learning_rate": 3.472e-05, "loss": 137.3781, "step": 17360 }, { "epoch": 0.0701769979435756, "grad_norm": 3106.9208984375, "learning_rate": 3.474e-05, "loss": 167.9274, "step": 17370 }, { "epoch": 0.07021739920894322, "grad_norm": 1493.3551025390625, "learning_rate": 3.4760000000000006e-05, "loss": 163.2431, "step": 17380 }, { "epoch": 0.07025780047431085, "grad_norm": 993.3938598632812, "learning_rate": 3.478e-05, "loss": 114.7312, "step": 17390 }, { "epoch": 0.07029820173967849, "grad_norm": 1034.116943359375, "learning_rate": 3.48e-05, "loss": 70.717, "step": 17400 }, { "epoch": 0.07033860300504612, "grad_norm": 921.6234741210938, "learning_rate": 3.482e-05, "loss": 185.619, "step": 17410 }, { "epoch": 0.07037900427041376, "grad_norm": 1703.0343017578125, "learning_rate": 3.484e-05, "loss": 142.7865, "step": 17420 }, { "epoch": 0.07041940553578138, "grad_norm": 1368.336181640625, "learning_rate": 3.486e-05, "loss": 116.2519, "step": 17430 }, { "epoch": 0.07045980680114901, "grad_norm": 1559.697265625, "learning_rate": 3.4880000000000005e-05, "loss": 110.2219, "step": 17440 }, { "epoch": 0.07050020806651665, "grad_norm": 3143.30517578125, "learning_rate": 3.49e-05, "loss": 91.7808, "step": 17450 }, { "epoch": 0.07054060933188427, "grad_norm": 1277.235595703125, "learning_rate": 3.4920000000000004e-05, "loss": 143.4617, "step": 17460 }, { "epoch": 0.0705810105972519, "grad_norm": 2107.630615234375, "learning_rate": 3.494e-05, "loss": 141.62, "step": 17470 }, { "epoch": 0.07062141186261954, "grad_norm": 916.9269409179688, "learning_rate": 3.4960000000000004e-05, "loss": 124.4125, "step": 17480 }, { "epoch": 0.07066181312798717, "grad_norm": 495.5371398925781, "learning_rate": 3.498e-05, "loss": 113.1787, "step": 17490 }, { "epoch": 0.0707022143933548, "grad_norm": 722.7784423828125, "learning_rate": 3.5e-05, "loss": 113.6147, "step": 17500 }, { "epoch": 0.07074261565872243, "grad_norm": 8005.68017578125, "learning_rate": 3.502e-05, "loss": 160.4929, "step": 17510 }, { "epoch": 0.07078301692409006, "grad_norm": 836.4596557617188, "learning_rate": 3.504e-05, "loss": 93.0578, "step": 17520 }, { "epoch": 0.0708234181894577, "grad_norm": 1631.076171875, "learning_rate": 3.5060000000000007e-05, "loss": 126.6171, "step": 17530 }, { "epoch": 0.07086381945482532, "grad_norm": 639.418212890625, "learning_rate": 3.508e-05, "loss": 105.7042, "step": 17540 }, { "epoch": 0.07090422072019295, "grad_norm": 1241.2578125, "learning_rate": 3.51e-05, "loss": 201.5257, "step": 17550 }, { "epoch": 0.07094462198556059, "grad_norm": 835.3104248046875, "learning_rate": 3.512e-05, "loss": 159.556, "step": 17560 }, { "epoch": 0.07098502325092822, "grad_norm": 565.4751586914062, "learning_rate": 3.514e-05, "loss": 93.872, "step": 17570 }, { "epoch": 0.07102542451629586, "grad_norm": 599.105712890625, "learning_rate": 3.516e-05, "loss": 132.7931, "step": 17580 }, { "epoch": 0.07106582578166348, "grad_norm": 1342.3175048828125, "learning_rate": 3.518e-05, "loss": 121.4404, "step": 17590 }, { "epoch": 0.07110622704703111, "grad_norm": 853.9281616210938, "learning_rate": 3.52e-05, "loss": 185.0158, "step": 17600 }, { "epoch": 0.07114662831239875, "grad_norm": 1526.38427734375, "learning_rate": 3.5220000000000005e-05, "loss": 130.5014, "step": 17610 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 3.524e-05, "loss": 155.2073, "step": 17620 }, { "epoch": 0.071227430843134, "grad_norm": 722.3214111328125, "learning_rate": 3.5260000000000005e-05, "loss": 98.7197, "step": 17630 }, { "epoch": 0.07126783210850164, "grad_norm": 769.6561889648438, "learning_rate": 3.528e-05, "loss": 98.4344, "step": 17640 }, { "epoch": 0.07130823337386927, "grad_norm": 1916.5062255859375, "learning_rate": 3.53e-05, "loss": 169.392, "step": 17650 }, { "epoch": 0.07134863463923691, "grad_norm": 790.8156127929688, "learning_rate": 3.532e-05, "loss": 112.2699, "step": 17660 }, { "epoch": 0.07138903590460453, "grad_norm": 0.0, "learning_rate": 3.5340000000000004e-05, "loss": 153.1781, "step": 17670 }, { "epoch": 0.07142943716997216, "grad_norm": 1370.72216796875, "learning_rate": 3.536000000000001e-05, "loss": 170.2639, "step": 17680 }, { "epoch": 0.0714698384353398, "grad_norm": 466.6986999511719, "learning_rate": 3.5380000000000003e-05, "loss": 140.3446, "step": 17690 }, { "epoch": 0.07151023970070743, "grad_norm": 1512.586181640625, "learning_rate": 3.54e-05, "loss": 123.2379, "step": 17700 }, { "epoch": 0.07155064096607505, "grad_norm": 1187.423828125, "learning_rate": 3.542e-05, "loss": 121.9325, "step": 17710 }, { "epoch": 0.07159104223144269, "grad_norm": 1491.8526611328125, "learning_rate": 3.544e-05, "loss": 97.88, "step": 17720 }, { "epoch": 0.07163144349681032, "grad_norm": 819.60400390625, "learning_rate": 3.546e-05, "loss": 136.5916, "step": 17730 }, { "epoch": 0.07167184476217796, "grad_norm": 650.4725952148438, "learning_rate": 3.548e-05, "loss": 100.4026, "step": 17740 }, { "epoch": 0.07171224602754558, "grad_norm": 822.773681640625, "learning_rate": 3.55e-05, "loss": 103.9474, "step": 17750 }, { "epoch": 0.07175264729291321, "grad_norm": 879.6776123046875, "learning_rate": 3.5520000000000006e-05, "loss": 90.2404, "step": 17760 }, { "epoch": 0.07179304855828085, "grad_norm": 1487.169921875, "learning_rate": 3.554e-05, "loss": 172.1061, "step": 17770 }, { "epoch": 0.07183344982364848, "grad_norm": 953.589111328125, "learning_rate": 3.5560000000000005e-05, "loss": 150.0098, "step": 17780 }, { "epoch": 0.0718738510890161, "grad_norm": 688.1337890625, "learning_rate": 3.558e-05, "loss": 78.3569, "step": 17790 }, { "epoch": 0.07191425235438374, "grad_norm": 789.90283203125, "learning_rate": 3.56e-05, "loss": 106.5888, "step": 17800 }, { "epoch": 0.07195465361975137, "grad_norm": 934.8272094726562, "learning_rate": 3.562e-05, "loss": 111.0842, "step": 17810 }, { "epoch": 0.07199505488511901, "grad_norm": 336.4629211425781, "learning_rate": 3.5640000000000004e-05, "loss": 99.3697, "step": 17820 }, { "epoch": 0.07203545615048663, "grad_norm": 3090.107666015625, "learning_rate": 3.566e-05, "loss": 97.4285, "step": 17830 }, { "epoch": 0.07207585741585426, "grad_norm": 1428.7044677734375, "learning_rate": 3.5680000000000004e-05, "loss": 118.1064, "step": 17840 }, { "epoch": 0.0721162586812219, "grad_norm": 2355.272705078125, "learning_rate": 3.57e-05, "loss": 151.7737, "step": 17850 }, { "epoch": 0.07215665994658953, "grad_norm": 1052.816650390625, "learning_rate": 3.5720000000000004e-05, "loss": 86.3124, "step": 17860 }, { "epoch": 0.07219706121195715, "grad_norm": 683.0614624023438, "learning_rate": 3.574e-05, "loss": 120.8411, "step": 17870 }, { "epoch": 0.0722374624773248, "grad_norm": 1161.9776611328125, "learning_rate": 3.5759999999999996e-05, "loss": 145.4579, "step": 17880 }, { "epoch": 0.07227786374269242, "grad_norm": 1213.32861328125, "learning_rate": 3.578e-05, "loss": 160.1318, "step": 17890 }, { "epoch": 0.07231826500806006, "grad_norm": 1079.72119140625, "learning_rate": 3.58e-05, "loss": 125.1568, "step": 17900 }, { "epoch": 0.07235866627342769, "grad_norm": 960.3634643554688, "learning_rate": 3.5820000000000006e-05, "loss": 116.5516, "step": 17910 }, { "epoch": 0.07239906753879531, "grad_norm": 1030.158447265625, "learning_rate": 3.584e-05, "loss": 78.2271, "step": 17920 }, { "epoch": 0.07243946880416295, "grad_norm": 812.413818359375, "learning_rate": 3.586e-05, "loss": 122.26, "step": 17930 }, { "epoch": 0.07247987006953058, "grad_norm": 2242.89794921875, "learning_rate": 3.588e-05, "loss": 150.4362, "step": 17940 }, { "epoch": 0.0725202713348982, "grad_norm": 1707.64306640625, "learning_rate": 3.59e-05, "loss": 107.4575, "step": 17950 }, { "epoch": 0.07256067260026584, "grad_norm": 1091.1968994140625, "learning_rate": 3.592e-05, "loss": 176.9033, "step": 17960 }, { "epoch": 0.07260107386563347, "grad_norm": 660.7951049804688, "learning_rate": 3.594e-05, "loss": 120.4594, "step": 17970 }, { "epoch": 0.07264147513100111, "grad_norm": 1081.6932373046875, "learning_rate": 3.596e-05, "loss": 101.9581, "step": 17980 }, { "epoch": 0.07268187639636874, "grad_norm": 1247.1197509765625, "learning_rate": 3.5980000000000004e-05, "loss": 90.3047, "step": 17990 }, { "epoch": 0.07272227766173636, "grad_norm": 2352.89208984375, "learning_rate": 3.6e-05, "loss": 121.5598, "step": 18000 }, { "epoch": 0.072762678927104, "grad_norm": 774.28466796875, "learning_rate": 3.6020000000000004e-05, "loss": 126.1789, "step": 18010 }, { "epoch": 0.07280308019247163, "grad_norm": 821.19384765625, "learning_rate": 3.604e-05, "loss": 100.4537, "step": 18020 }, { "epoch": 0.07284348145783925, "grad_norm": 919.6599731445312, "learning_rate": 3.606e-05, "loss": 85.4199, "step": 18030 }, { "epoch": 0.0728838827232069, "grad_norm": 805.6433715820312, "learning_rate": 3.608e-05, "loss": 76.5743, "step": 18040 }, { "epoch": 0.07292428398857452, "grad_norm": 1283.3468017578125, "learning_rate": 3.61e-05, "loss": 128.92, "step": 18050 }, { "epoch": 0.07296468525394216, "grad_norm": 2695.972900390625, "learning_rate": 3.6120000000000007e-05, "loss": 131.2778, "step": 18060 }, { "epoch": 0.07300508651930979, "grad_norm": 2971.26904296875, "learning_rate": 3.614e-05, "loss": 103.6507, "step": 18070 }, { "epoch": 0.07304548778467741, "grad_norm": 1631.7698974609375, "learning_rate": 3.616e-05, "loss": 101.8744, "step": 18080 }, { "epoch": 0.07308588905004505, "grad_norm": 707.8176879882812, "learning_rate": 3.618e-05, "loss": 106.0584, "step": 18090 }, { "epoch": 0.07312629031541268, "grad_norm": 497.0185546875, "learning_rate": 3.62e-05, "loss": 74.8409, "step": 18100 }, { "epoch": 0.0731666915807803, "grad_norm": 1669.726806640625, "learning_rate": 3.622e-05, "loss": 105.9762, "step": 18110 }, { "epoch": 0.07320709284614794, "grad_norm": 557.0554809570312, "learning_rate": 3.624e-05, "loss": 116.4344, "step": 18120 }, { "epoch": 0.07324749411151557, "grad_norm": 2284.02099609375, "learning_rate": 3.626e-05, "loss": 122.7274, "step": 18130 }, { "epoch": 0.07328789537688321, "grad_norm": 1483.209228515625, "learning_rate": 3.6280000000000005e-05, "loss": 100.2296, "step": 18140 }, { "epoch": 0.07332829664225084, "grad_norm": 2553.381591796875, "learning_rate": 3.63e-05, "loss": 134.3823, "step": 18150 }, { "epoch": 0.07336869790761846, "grad_norm": 465.40289306640625, "learning_rate": 3.6320000000000005e-05, "loss": 104.473, "step": 18160 }, { "epoch": 0.0734090991729861, "grad_norm": 2143.05419921875, "learning_rate": 3.634e-05, "loss": 133.1543, "step": 18170 }, { "epoch": 0.07344950043835373, "grad_norm": 854.1031494140625, "learning_rate": 3.636e-05, "loss": 139.8475, "step": 18180 }, { "epoch": 0.07348990170372136, "grad_norm": 812.4703979492188, "learning_rate": 3.638e-05, "loss": 124.2388, "step": 18190 }, { "epoch": 0.073530302969089, "grad_norm": 693.6560668945312, "learning_rate": 3.6400000000000004e-05, "loss": 125.4126, "step": 18200 }, { "epoch": 0.07357070423445662, "grad_norm": 1367.3717041015625, "learning_rate": 3.642000000000001e-05, "loss": 141.6495, "step": 18210 }, { "epoch": 0.07361110549982426, "grad_norm": 1077.287353515625, "learning_rate": 3.6440000000000003e-05, "loss": 130.6979, "step": 18220 }, { "epoch": 0.07365150676519189, "grad_norm": 2508.4072265625, "learning_rate": 3.646e-05, "loss": 125.5967, "step": 18230 }, { "epoch": 0.07369190803055951, "grad_norm": 2781.412841796875, "learning_rate": 3.648e-05, "loss": 149.0504, "step": 18240 }, { "epoch": 0.07373230929592715, "grad_norm": 1022.107177734375, "learning_rate": 3.65e-05, "loss": 128.4223, "step": 18250 }, { "epoch": 0.07377271056129478, "grad_norm": 700.0909423828125, "learning_rate": 3.652e-05, "loss": 95.3364, "step": 18260 }, { "epoch": 0.0738131118266624, "grad_norm": 992.6243896484375, "learning_rate": 3.654e-05, "loss": 130.7981, "step": 18270 }, { "epoch": 0.07385351309203005, "grad_norm": 1268.7449951171875, "learning_rate": 3.656e-05, "loss": 116.5302, "step": 18280 }, { "epoch": 0.07389391435739767, "grad_norm": 915.7352905273438, "learning_rate": 3.6580000000000006e-05, "loss": 141.5618, "step": 18290 }, { "epoch": 0.0739343156227653, "grad_norm": 3412.534912109375, "learning_rate": 3.66e-05, "loss": 188.1361, "step": 18300 }, { "epoch": 0.07397471688813294, "grad_norm": 1475.6259765625, "learning_rate": 3.6620000000000005e-05, "loss": 125.9999, "step": 18310 }, { "epoch": 0.07401511815350056, "grad_norm": 838.94140625, "learning_rate": 3.664e-05, "loss": 142.8819, "step": 18320 }, { "epoch": 0.0740555194188682, "grad_norm": 2300.423583984375, "learning_rate": 3.666e-05, "loss": 168.7538, "step": 18330 }, { "epoch": 0.07409592068423583, "grad_norm": 2023.9940185546875, "learning_rate": 3.668e-05, "loss": 126.4141, "step": 18340 }, { "epoch": 0.07413632194960346, "grad_norm": 884.4683227539062, "learning_rate": 3.6700000000000004e-05, "loss": 64.7707, "step": 18350 }, { "epoch": 0.0741767232149711, "grad_norm": 598.0726928710938, "learning_rate": 3.672000000000001e-05, "loss": 90.2243, "step": 18360 }, { "epoch": 0.07421712448033872, "grad_norm": 852.4734497070312, "learning_rate": 3.6740000000000004e-05, "loss": 193.19, "step": 18370 }, { "epoch": 0.07425752574570635, "grad_norm": 926.2811889648438, "learning_rate": 3.676e-05, "loss": 113.1817, "step": 18380 }, { "epoch": 0.07429792701107399, "grad_norm": 840.8629760742188, "learning_rate": 3.6780000000000004e-05, "loss": 128.7378, "step": 18390 }, { "epoch": 0.07433832827644161, "grad_norm": 1284.182373046875, "learning_rate": 3.68e-05, "loss": 119.9407, "step": 18400 }, { "epoch": 0.07437872954180925, "grad_norm": 3427.597900390625, "learning_rate": 3.682e-05, "loss": 168.6104, "step": 18410 }, { "epoch": 0.07441913080717688, "grad_norm": 1045.6478271484375, "learning_rate": 3.684e-05, "loss": 92.3087, "step": 18420 }, { "epoch": 0.07445953207254451, "grad_norm": 1453.0216064453125, "learning_rate": 3.686e-05, "loss": 128.912, "step": 18430 }, { "epoch": 0.07449993333791215, "grad_norm": 1257.60986328125, "learning_rate": 3.6880000000000006e-05, "loss": 67.0141, "step": 18440 }, { "epoch": 0.07454033460327977, "grad_norm": 900.5206298828125, "learning_rate": 3.69e-05, "loss": 94.6233, "step": 18450 }, { "epoch": 0.0745807358686474, "grad_norm": 769.1461181640625, "learning_rate": 3.692e-05, "loss": 128.7582, "step": 18460 }, { "epoch": 0.07462113713401504, "grad_norm": 1010.8888549804688, "learning_rate": 3.694e-05, "loss": 115.287, "step": 18470 }, { "epoch": 0.07466153839938267, "grad_norm": 974.634765625, "learning_rate": 3.696e-05, "loss": 97.2538, "step": 18480 }, { "epoch": 0.0747019396647503, "grad_norm": 998.1504516601562, "learning_rate": 3.698e-05, "loss": 123.2764, "step": 18490 }, { "epoch": 0.07474234093011793, "grad_norm": 1043.8447265625, "learning_rate": 3.7e-05, "loss": 105.7509, "step": 18500 }, { "epoch": 0.07478274219548556, "grad_norm": 551.1446533203125, "learning_rate": 3.702e-05, "loss": 92.3631, "step": 18510 }, { "epoch": 0.0748231434608532, "grad_norm": 677.5746459960938, "learning_rate": 3.7040000000000005e-05, "loss": 92.7502, "step": 18520 }, { "epoch": 0.07486354472622082, "grad_norm": 935.2184448242188, "learning_rate": 3.706e-05, "loss": 94.4942, "step": 18530 }, { "epoch": 0.07490394599158845, "grad_norm": 4195.58837890625, "learning_rate": 3.7080000000000004e-05, "loss": 180.4082, "step": 18540 }, { "epoch": 0.07494434725695609, "grad_norm": 553.2954711914062, "learning_rate": 3.71e-05, "loss": 136.8525, "step": 18550 }, { "epoch": 0.07498474852232372, "grad_norm": 1196.5914306640625, "learning_rate": 3.712e-05, "loss": 87.8535, "step": 18560 }, { "epoch": 0.07502514978769136, "grad_norm": 925.3016967773438, "learning_rate": 3.714e-05, "loss": 99.2028, "step": 18570 }, { "epoch": 0.07506555105305898, "grad_norm": 743.458984375, "learning_rate": 3.716e-05, "loss": 115.926, "step": 18580 }, { "epoch": 0.07510595231842661, "grad_norm": 452.0498962402344, "learning_rate": 3.7180000000000007e-05, "loss": 72.2173, "step": 18590 }, { "epoch": 0.07514635358379425, "grad_norm": 1277.8033447265625, "learning_rate": 3.72e-05, "loss": 196.1923, "step": 18600 }, { "epoch": 0.07518675484916187, "grad_norm": 613.0692138671875, "learning_rate": 3.722e-05, "loss": 165.0734, "step": 18610 }, { "epoch": 0.0752271561145295, "grad_norm": 707.7522583007812, "learning_rate": 3.724e-05, "loss": 146.3568, "step": 18620 }, { "epoch": 0.07526755737989714, "grad_norm": 892.3648681640625, "learning_rate": 3.726e-05, "loss": 119.203, "step": 18630 }, { "epoch": 0.07530795864526477, "grad_norm": 763.5734252929688, "learning_rate": 3.728e-05, "loss": 154.0482, "step": 18640 }, { "epoch": 0.0753483599106324, "grad_norm": 779.2640991210938, "learning_rate": 3.73e-05, "loss": 73.6996, "step": 18650 }, { "epoch": 0.07538876117600003, "grad_norm": 2393.424560546875, "learning_rate": 3.732e-05, "loss": 129.6301, "step": 18660 }, { "epoch": 0.07542916244136766, "grad_norm": 2221.367919921875, "learning_rate": 3.7340000000000005e-05, "loss": 132.7365, "step": 18670 }, { "epoch": 0.0754695637067353, "grad_norm": 1009.402587890625, "learning_rate": 3.736e-05, "loss": 110.5086, "step": 18680 }, { "epoch": 0.07550996497210292, "grad_norm": 5788.82275390625, "learning_rate": 3.7380000000000005e-05, "loss": 93.0837, "step": 18690 }, { "epoch": 0.07555036623747055, "grad_norm": 989.4839477539062, "learning_rate": 3.74e-05, "loss": 120.5923, "step": 18700 }, { "epoch": 0.07559076750283819, "grad_norm": 1187.691650390625, "learning_rate": 3.742e-05, "loss": 191.3571, "step": 18710 }, { "epoch": 0.07563116876820582, "grad_norm": 1042.4420166015625, "learning_rate": 3.744e-05, "loss": 123.3321, "step": 18720 }, { "epoch": 0.07567157003357346, "grad_norm": 593.2315063476562, "learning_rate": 3.7460000000000004e-05, "loss": 124.6118, "step": 18730 }, { "epoch": 0.07571197129894108, "grad_norm": 1148.8316650390625, "learning_rate": 3.748000000000001e-05, "loss": 129.3611, "step": 18740 }, { "epoch": 0.07575237256430871, "grad_norm": 721.3247680664062, "learning_rate": 3.7500000000000003e-05, "loss": 115.121, "step": 18750 }, { "epoch": 0.07579277382967635, "grad_norm": 1153.5718994140625, "learning_rate": 3.752e-05, "loss": 96.7754, "step": 18760 }, { "epoch": 0.07583317509504398, "grad_norm": 559.9420166015625, "learning_rate": 3.754e-05, "loss": 117.4526, "step": 18770 }, { "epoch": 0.0758735763604116, "grad_norm": 1740.1900634765625, "learning_rate": 3.756e-05, "loss": 103.682, "step": 18780 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 3.758e-05, "loss": 74.3376, "step": 18790 }, { "epoch": 0.07595437889114687, "grad_norm": 1305.6412353515625, "learning_rate": 3.76e-05, "loss": 114.3259, "step": 18800 }, { "epoch": 0.07599478015651451, "grad_norm": 1425.0380859375, "learning_rate": 3.762e-05, "loss": 171.961, "step": 18810 }, { "epoch": 0.07603518142188213, "grad_norm": 1266.49072265625, "learning_rate": 3.7640000000000006e-05, "loss": 166.5889, "step": 18820 }, { "epoch": 0.07607558268724976, "grad_norm": 1901.879638671875, "learning_rate": 3.766e-05, "loss": 132.9882, "step": 18830 }, { "epoch": 0.0761159839526174, "grad_norm": 690.49951171875, "learning_rate": 3.7680000000000005e-05, "loss": 84.5623, "step": 18840 }, { "epoch": 0.07615638521798503, "grad_norm": 650.4954833984375, "learning_rate": 3.77e-05, "loss": 135.0686, "step": 18850 }, { "epoch": 0.07619678648335265, "grad_norm": 2350.57373046875, "learning_rate": 3.772e-05, "loss": 133.9776, "step": 18860 }, { "epoch": 0.07623718774872029, "grad_norm": 1563.426513671875, "learning_rate": 3.774e-05, "loss": 101.9517, "step": 18870 }, { "epoch": 0.07627758901408792, "grad_norm": 797.4249267578125, "learning_rate": 3.776e-05, "loss": 120.9917, "step": 18880 }, { "epoch": 0.07631799027945556, "grad_norm": 1120.25732421875, "learning_rate": 3.778000000000001e-05, "loss": 115.9363, "step": 18890 }, { "epoch": 0.07635839154482318, "grad_norm": 1334.6461181640625, "learning_rate": 3.7800000000000004e-05, "loss": 147.4801, "step": 18900 }, { "epoch": 0.07639879281019081, "grad_norm": 841.9830322265625, "learning_rate": 3.782e-05, "loss": 127.5146, "step": 18910 }, { "epoch": 0.07643919407555845, "grad_norm": 951.7286987304688, "learning_rate": 3.7840000000000004e-05, "loss": 222.0843, "step": 18920 }, { "epoch": 0.07647959534092608, "grad_norm": 1344.3798828125, "learning_rate": 3.786e-05, "loss": 118.1746, "step": 18930 }, { "epoch": 0.0765199966062937, "grad_norm": 763.527587890625, "learning_rate": 3.788e-05, "loss": 74.0978, "step": 18940 }, { "epoch": 0.07656039787166134, "grad_norm": 800.6107177734375, "learning_rate": 3.79e-05, "loss": 105.7953, "step": 18950 }, { "epoch": 0.07660079913702897, "grad_norm": 1480.68505859375, "learning_rate": 3.792e-05, "loss": 127.3082, "step": 18960 }, { "epoch": 0.07664120040239661, "grad_norm": 334.6526184082031, "learning_rate": 3.7940000000000006e-05, "loss": 85.6625, "step": 18970 }, { "epoch": 0.07668160166776423, "grad_norm": 1548.9031982421875, "learning_rate": 3.796e-05, "loss": 131.035, "step": 18980 }, { "epoch": 0.07672200293313186, "grad_norm": 1739.5743408203125, "learning_rate": 3.7980000000000006e-05, "loss": 172.2465, "step": 18990 }, { "epoch": 0.0767624041984995, "grad_norm": 3091.291015625, "learning_rate": 3.8e-05, "loss": 160.8068, "step": 19000 }, { "epoch": 0.07680280546386713, "grad_norm": 1500.0257568359375, "learning_rate": 3.802e-05, "loss": 76.5043, "step": 19010 }, { "epoch": 0.07684320672923475, "grad_norm": 1217.782958984375, "learning_rate": 3.804e-05, "loss": 102.2421, "step": 19020 }, { "epoch": 0.0768836079946024, "grad_norm": 1387.4803466796875, "learning_rate": 3.806e-05, "loss": 94.5826, "step": 19030 }, { "epoch": 0.07692400925997002, "grad_norm": 1300.2745361328125, "learning_rate": 3.808e-05, "loss": 130.5162, "step": 19040 }, { "epoch": 0.07696441052533766, "grad_norm": 1954.7742919921875, "learning_rate": 3.8100000000000005e-05, "loss": 157.192, "step": 19050 }, { "epoch": 0.07700481179070529, "grad_norm": 2402.571533203125, "learning_rate": 3.812e-05, "loss": 181.6468, "step": 19060 }, { "epoch": 0.07704521305607291, "grad_norm": 1889.8359375, "learning_rate": 3.8140000000000004e-05, "loss": 112.0516, "step": 19070 }, { "epoch": 0.07708561432144055, "grad_norm": 834.8114624023438, "learning_rate": 3.816e-05, "loss": 104.1592, "step": 19080 }, { "epoch": 0.07712601558680818, "grad_norm": 1373.436279296875, "learning_rate": 3.818e-05, "loss": 121.8585, "step": 19090 }, { "epoch": 0.0771664168521758, "grad_norm": 1009.5123901367188, "learning_rate": 3.82e-05, "loss": 156.3473, "step": 19100 }, { "epoch": 0.07720681811754344, "grad_norm": 986.4491577148438, "learning_rate": 3.822e-05, "loss": 123.8396, "step": 19110 }, { "epoch": 0.07724721938291107, "grad_norm": 690.7348022460938, "learning_rate": 3.8240000000000007e-05, "loss": 81.2586, "step": 19120 }, { "epoch": 0.07728762064827871, "grad_norm": 603.4257202148438, "learning_rate": 3.826e-05, "loss": 148.1277, "step": 19130 }, { "epoch": 0.07732802191364634, "grad_norm": 610.265869140625, "learning_rate": 3.828e-05, "loss": 107.6773, "step": 19140 }, { "epoch": 0.07736842317901396, "grad_norm": 596.2022094726562, "learning_rate": 3.83e-05, "loss": 81.1043, "step": 19150 }, { "epoch": 0.0774088244443816, "grad_norm": 630.5447387695312, "learning_rate": 3.832e-05, "loss": 75.0851, "step": 19160 }, { "epoch": 0.07744922570974923, "grad_norm": 967.669921875, "learning_rate": 3.834e-05, "loss": 68.293, "step": 19170 }, { "epoch": 0.07748962697511685, "grad_norm": 7127.17236328125, "learning_rate": 3.836e-05, "loss": 117.3339, "step": 19180 }, { "epoch": 0.0775300282404845, "grad_norm": 978.7310180664062, "learning_rate": 3.838e-05, "loss": 169.6352, "step": 19190 }, { "epoch": 0.07757042950585212, "grad_norm": 1465.735595703125, "learning_rate": 3.8400000000000005e-05, "loss": 133.1634, "step": 19200 }, { "epoch": 0.07761083077121976, "grad_norm": 1282.474609375, "learning_rate": 3.842e-05, "loss": 142.1115, "step": 19210 }, { "epoch": 0.07765123203658739, "grad_norm": 576.253173828125, "learning_rate": 3.8440000000000005e-05, "loss": 167.8867, "step": 19220 }, { "epoch": 0.07769163330195501, "grad_norm": 911.6886596679688, "learning_rate": 3.846e-05, "loss": 106.331, "step": 19230 }, { "epoch": 0.07773203456732265, "grad_norm": 1108.574462890625, "learning_rate": 3.848e-05, "loss": 159.7625, "step": 19240 }, { "epoch": 0.07777243583269028, "grad_norm": 1084.5352783203125, "learning_rate": 3.85e-05, "loss": 102.5427, "step": 19250 }, { "epoch": 0.0778128370980579, "grad_norm": 2015.353515625, "learning_rate": 3.8520000000000004e-05, "loss": 152.4607, "step": 19260 }, { "epoch": 0.07785323836342554, "grad_norm": 2406.21484375, "learning_rate": 3.854000000000001e-05, "loss": 115.7677, "step": 19270 }, { "epoch": 0.07789363962879317, "grad_norm": 1180.568115234375, "learning_rate": 3.8560000000000004e-05, "loss": 87.3702, "step": 19280 }, { "epoch": 0.07793404089416081, "grad_norm": 546.580810546875, "learning_rate": 3.858e-05, "loss": 71.9833, "step": 19290 }, { "epoch": 0.07797444215952844, "grad_norm": 1000.6842651367188, "learning_rate": 3.86e-05, "loss": 127.7328, "step": 19300 }, { "epoch": 0.07801484342489606, "grad_norm": 717.3246459960938, "learning_rate": 3.862e-05, "loss": 95.6857, "step": 19310 }, { "epoch": 0.0780552446902637, "grad_norm": 4112.51220703125, "learning_rate": 3.864e-05, "loss": 199.442, "step": 19320 }, { "epoch": 0.07809564595563133, "grad_norm": 972.7607421875, "learning_rate": 3.866e-05, "loss": 168.3066, "step": 19330 }, { "epoch": 0.07813604722099896, "grad_norm": 1725.4652099609375, "learning_rate": 3.868e-05, "loss": 125.3838, "step": 19340 }, { "epoch": 0.0781764484863666, "grad_norm": 991.9950561523438, "learning_rate": 3.8700000000000006e-05, "loss": 110.0063, "step": 19350 }, { "epoch": 0.07821684975173422, "grad_norm": 570.45751953125, "learning_rate": 3.872e-05, "loss": 148.7821, "step": 19360 }, { "epoch": 0.07825725101710186, "grad_norm": 998.0093994140625, "learning_rate": 3.8740000000000005e-05, "loss": 106.5317, "step": 19370 }, { "epoch": 0.07829765228246949, "grad_norm": 2785.006103515625, "learning_rate": 3.876e-05, "loss": 174.5811, "step": 19380 }, { "epoch": 0.07833805354783711, "grad_norm": 1203.6185302734375, "learning_rate": 3.878e-05, "loss": 159.747, "step": 19390 }, { "epoch": 0.07837845481320475, "grad_norm": 1574.8548583984375, "learning_rate": 3.88e-05, "loss": 161.0951, "step": 19400 }, { "epoch": 0.07841885607857238, "grad_norm": 3679.686279296875, "learning_rate": 3.882e-05, "loss": 140.7081, "step": 19410 }, { "epoch": 0.07845925734394, "grad_norm": 3366.86669921875, "learning_rate": 3.884e-05, "loss": 129.2223, "step": 19420 }, { "epoch": 0.07849965860930765, "grad_norm": 979.6845703125, "learning_rate": 3.8860000000000004e-05, "loss": 123.001, "step": 19430 }, { "epoch": 0.07854005987467527, "grad_norm": 706.135498046875, "learning_rate": 3.888e-05, "loss": 107.5821, "step": 19440 }, { "epoch": 0.07858046114004291, "grad_norm": 3030.436279296875, "learning_rate": 3.8900000000000004e-05, "loss": 136.2499, "step": 19450 }, { "epoch": 0.07862086240541054, "grad_norm": 341.5601501464844, "learning_rate": 3.892e-05, "loss": 87.6859, "step": 19460 }, { "epoch": 0.07866126367077816, "grad_norm": 890.729248046875, "learning_rate": 3.894e-05, "loss": 130.7491, "step": 19470 }, { "epoch": 0.0787016649361458, "grad_norm": 2620.10400390625, "learning_rate": 3.896e-05, "loss": 100.2728, "step": 19480 }, { "epoch": 0.07874206620151343, "grad_norm": 2374.006103515625, "learning_rate": 3.898e-05, "loss": 102.7446, "step": 19490 }, { "epoch": 0.07878246746688106, "grad_norm": 661.5973510742188, "learning_rate": 3.9000000000000006e-05, "loss": 103.1776, "step": 19500 }, { "epoch": 0.0788228687322487, "grad_norm": 496.9710693359375, "learning_rate": 3.902e-05, "loss": 104.6827, "step": 19510 }, { "epoch": 0.07886326999761632, "grad_norm": 950.9691772460938, "learning_rate": 3.9040000000000006e-05, "loss": 104.4576, "step": 19520 }, { "epoch": 0.07890367126298396, "grad_norm": 1480.8165283203125, "learning_rate": 3.906e-05, "loss": 123.9244, "step": 19530 }, { "epoch": 0.07894407252835159, "grad_norm": 1668.803955078125, "learning_rate": 3.908e-05, "loss": 114.0788, "step": 19540 }, { "epoch": 0.07898447379371921, "grad_norm": 998.9266357421875, "learning_rate": 3.91e-05, "loss": 116.9195, "step": 19550 }, { "epoch": 0.07902487505908685, "grad_norm": 685.7608032226562, "learning_rate": 3.912e-05, "loss": 120.8969, "step": 19560 }, { "epoch": 0.07906527632445448, "grad_norm": 1155.7894287109375, "learning_rate": 3.914e-05, "loss": 121.0664, "step": 19570 }, { "epoch": 0.07910567758982211, "grad_norm": 959.234130859375, "learning_rate": 3.9160000000000005e-05, "loss": 96.3062, "step": 19580 }, { "epoch": 0.07914607885518975, "grad_norm": 1889.412109375, "learning_rate": 3.918e-05, "loss": 81.9331, "step": 19590 }, { "epoch": 0.07918648012055737, "grad_norm": 966.55078125, "learning_rate": 3.9200000000000004e-05, "loss": 164.067, "step": 19600 }, { "epoch": 0.07922688138592501, "grad_norm": 645.4921875, "learning_rate": 3.922e-05, "loss": 126.4012, "step": 19610 }, { "epoch": 0.07926728265129264, "grad_norm": 1248.7474365234375, "learning_rate": 3.9240000000000004e-05, "loss": 109.2031, "step": 19620 }, { "epoch": 0.07930768391666027, "grad_norm": 2941.3408203125, "learning_rate": 3.926e-05, "loss": 100.5554, "step": 19630 }, { "epoch": 0.0793480851820279, "grad_norm": 1487.9503173828125, "learning_rate": 3.9280000000000003e-05, "loss": 120.6536, "step": 19640 }, { "epoch": 0.07938848644739553, "grad_norm": 956.5473022460938, "learning_rate": 3.9300000000000007e-05, "loss": 112.047, "step": 19650 }, { "epoch": 0.07942888771276316, "grad_norm": 675.1141967773438, "learning_rate": 3.932e-05, "loss": 121.3393, "step": 19660 }, { "epoch": 0.0794692889781308, "grad_norm": 1062.9857177734375, "learning_rate": 3.9340000000000006e-05, "loss": 118.6263, "step": 19670 }, { "epoch": 0.07950969024349842, "grad_norm": 796.38525390625, "learning_rate": 3.936e-05, "loss": 93.5364, "step": 19680 }, { "epoch": 0.07955009150886606, "grad_norm": 712.4339599609375, "learning_rate": 3.938e-05, "loss": 169.9139, "step": 19690 }, { "epoch": 0.07959049277423369, "grad_norm": 348.9084777832031, "learning_rate": 3.94e-05, "loss": 98.5714, "step": 19700 }, { "epoch": 0.07963089403960132, "grad_norm": 1002.7638549804688, "learning_rate": 3.942e-05, "loss": 101.6811, "step": 19710 }, { "epoch": 0.07967129530496896, "grad_norm": 1141.4864501953125, "learning_rate": 3.944e-05, "loss": 99.0602, "step": 19720 }, { "epoch": 0.07971169657033658, "grad_norm": 1789.052001953125, "learning_rate": 3.9460000000000005e-05, "loss": 146.9852, "step": 19730 }, { "epoch": 0.07975209783570421, "grad_norm": 1004.76611328125, "learning_rate": 3.948e-05, "loss": 108.5757, "step": 19740 }, { "epoch": 0.07979249910107185, "grad_norm": 1779.8487548828125, "learning_rate": 3.9500000000000005e-05, "loss": 92.2654, "step": 19750 }, { "epoch": 0.07983290036643947, "grad_norm": 1891.206787109375, "learning_rate": 3.952e-05, "loss": 130.2949, "step": 19760 }, { "epoch": 0.07987330163180711, "grad_norm": 1210.8375244140625, "learning_rate": 3.954e-05, "loss": 149.1875, "step": 19770 }, { "epoch": 0.07991370289717474, "grad_norm": 1035.046630859375, "learning_rate": 3.956e-05, "loss": 90.7728, "step": 19780 }, { "epoch": 0.07995410416254237, "grad_norm": 1166.8427734375, "learning_rate": 3.958e-05, "loss": 113.0428, "step": 19790 }, { "epoch": 0.07999450542791, "grad_norm": 1342.7369384765625, "learning_rate": 3.960000000000001e-05, "loss": 84.0289, "step": 19800 }, { "epoch": 0.08003490669327763, "grad_norm": 930.572509765625, "learning_rate": 3.9620000000000004e-05, "loss": 137.8586, "step": 19810 }, { "epoch": 0.08007530795864526, "grad_norm": 537.0020141601562, "learning_rate": 3.964e-05, "loss": 136.4454, "step": 19820 }, { "epoch": 0.0801157092240129, "grad_norm": 8671.7001953125, "learning_rate": 3.966e-05, "loss": 194.5568, "step": 19830 }, { "epoch": 0.08015611048938052, "grad_norm": 1121.135986328125, "learning_rate": 3.968e-05, "loss": 171.3159, "step": 19840 }, { "epoch": 0.08019651175474816, "grad_norm": 875.9639282226562, "learning_rate": 3.97e-05, "loss": 138.4204, "step": 19850 }, { "epoch": 0.08023691302011579, "grad_norm": 1031.24462890625, "learning_rate": 3.972e-05, "loss": 70.7438, "step": 19860 }, { "epoch": 0.08027731428548342, "grad_norm": 844.333251953125, "learning_rate": 3.974e-05, "loss": 86.4556, "step": 19870 }, { "epoch": 0.08031771555085106, "grad_norm": 775.6649780273438, "learning_rate": 3.9760000000000006e-05, "loss": 141.9916, "step": 19880 }, { "epoch": 0.08035811681621868, "grad_norm": 579.8970947265625, "learning_rate": 3.978e-05, "loss": 131.3396, "step": 19890 }, { "epoch": 0.08039851808158631, "grad_norm": 0.0, "learning_rate": 3.9800000000000005e-05, "loss": 98.0002, "step": 19900 }, { "epoch": 0.08043891934695395, "grad_norm": 1350.4500732421875, "learning_rate": 3.982e-05, "loss": 221.3071, "step": 19910 }, { "epoch": 0.08047932061232158, "grad_norm": 809.1064453125, "learning_rate": 3.984e-05, "loss": 82.145, "step": 19920 }, { "epoch": 0.08051972187768922, "grad_norm": 1032.8472900390625, "learning_rate": 3.986e-05, "loss": 90.5167, "step": 19930 }, { "epoch": 0.08056012314305684, "grad_norm": 548.9085693359375, "learning_rate": 3.988e-05, "loss": 82.506, "step": 19940 }, { "epoch": 0.08060052440842447, "grad_norm": 828.6427001953125, "learning_rate": 3.99e-05, "loss": 103.581, "step": 19950 }, { "epoch": 0.08064092567379211, "grad_norm": 1131.4522705078125, "learning_rate": 3.9920000000000004e-05, "loss": 124.3697, "step": 19960 }, { "epoch": 0.08068132693915973, "grad_norm": 3095.481689453125, "learning_rate": 3.994e-05, "loss": 180.1047, "step": 19970 }, { "epoch": 0.08072172820452736, "grad_norm": 1326.9815673828125, "learning_rate": 3.9960000000000004e-05, "loss": 111.703, "step": 19980 }, { "epoch": 0.080762129469895, "grad_norm": 563.6680297851562, "learning_rate": 3.998e-05, "loss": 86.8286, "step": 19990 }, { "epoch": 0.08080253073526263, "grad_norm": 867.2445068359375, "learning_rate": 4e-05, "loss": 120.7213, "step": 20000 }, { "epoch": 0.08084293200063027, "grad_norm": 3826.01513671875, "learning_rate": 4.002e-05, "loss": 188.3878, "step": 20010 }, { "epoch": 0.08088333326599789, "grad_norm": 626.7747192382812, "learning_rate": 4.004e-05, "loss": 102.346, "step": 20020 }, { "epoch": 0.08092373453136552, "grad_norm": 447.5019836425781, "learning_rate": 4.0060000000000006e-05, "loss": 112.2855, "step": 20030 }, { "epoch": 0.08096413579673316, "grad_norm": 1387.30029296875, "learning_rate": 4.008e-05, "loss": 144.4808, "step": 20040 }, { "epoch": 0.08100453706210078, "grad_norm": 711.4352416992188, "learning_rate": 4.0100000000000006e-05, "loss": 123.1025, "step": 20050 }, { "epoch": 0.08104493832746841, "grad_norm": 696.3189086914062, "learning_rate": 4.012e-05, "loss": 125.8491, "step": 20060 }, { "epoch": 0.08108533959283605, "grad_norm": 1703.4642333984375, "learning_rate": 4.014e-05, "loss": 180.9456, "step": 20070 }, { "epoch": 0.08112574085820368, "grad_norm": 931.850341796875, "learning_rate": 4.016e-05, "loss": 157.8773, "step": 20080 }, { "epoch": 0.08116614212357132, "grad_norm": 1737.6793212890625, "learning_rate": 4.018e-05, "loss": 108.9433, "step": 20090 }, { "epoch": 0.08120654338893894, "grad_norm": 1597.1063232421875, "learning_rate": 4.02e-05, "loss": 116.3655, "step": 20100 }, { "epoch": 0.08124694465430657, "grad_norm": 1292.120849609375, "learning_rate": 4.0220000000000005e-05, "loss": 128.3085, "step": 20110 }, { "epoch": 0.08128734591967421, "grad_norm": 784.8877563476562, "learning_rate": 4.024e-05, "loss": 142.1472, "step": 20120 }, { "epoch": 0.08132774718504183, "grad_norm": 518.2711181640625, "learning_rate": 4.0260000000000004e-05, "loss": 138.0721, "step": 20130 }, { "epoch": 0.08136814845040946, "grad_norm": 3041.567138671875, "learning_rate": 4.028e-05, "loss": 143.0653, "step": 20140 }, { "epoch": 0.0814085497157771, "grad_norm": 393.4328918457031, "learning_rate": 4.0300000000000004e-05, "loss": 123.1964, "step": 20150 }, { "epoch": 0.08144895098114473, "grad_norm": 433.4530334472656, "learning_rate": 4.032e-05, "loss": 108.3037, "step": 20160 }, { "epoch": 0.08148935224651237, "grad_norm": 1056.2037353515625, "learning_rate": 4.034e-05, "loss": 104.124, "step": 20170 }, { "epoch": 0.08152975351188, "grad_norm": 1283.22607421875, "learning_rate": 4.0360000000000007e-05, "loss": 146.2523, "step": 20180 }, { "epoch": 0.08157015477724762, "grad_norm": 1303.0574951171875, "learning_rate": 4.038e-05, "loss": 108.7274, "step": 20190 }, { "epoch": 0.08161055604261526, "grad_norm": 748.68115234375, "learning_rate": 4.0400000000000006e-05, "loss": 118.8553, "step": 20200 }, { "epoch": 0.08165095730798289, "grad_norm": 559.082763671875, "learning_rate": 4.042e-05, "loss": 104.5537, "step": 20210 }, { "epoch": 0.08169135857335051, "grad_norm": 883.0214233398438, "learning_rate": 4.044e-05, "loss": 119.6957, "step": 20220 }, { "epoch": 0.08173175983871815, "grad_norm": 3064.196533203125, "learning_rate": 4.046e-05, "loss": 167.4422, "step": 20230 }, { "epoch": 0.08177216110408578, "grad_norm": 0.0, "learning_rate": 4.048e-05, "loss": 128.549, "step": 20240 }, { "epoch": 0.08181256236945342, "grad_norm": 755.3316040039062, "learning_rate": 4.05e-05, "loss": 108.4741, "step": 20250 }, { "epoch": 0.08185296363482104, "grad_norm": 1562.85498046875, "learning_rate": 4.0520000000000005e-05, "loss": 111.5061, "step": 20260 }, { "epoch": 0.08189336490018867, "grad_norm": 871.8521118164062, "learning_rate": 4.054e-05, "loss": 110.9914, "step": 20270 }, { "epoch": 0.08193376616555631, "grad_norm": 772.6279907226562, "learning_rate": 4.0560000000000005e-05, "loss": 145.3599, "step": 20280 }, { "epoch": 0.08197416743092394, "grad_norm": 529.8480224609375, "learning_rate": 4.058e-05, "loss": 101.3806, "step": 20290 }, { "epoch": 0.08201456869629156, "grad_norm": 2998.20556640625, "learning_rate": 4.0600000000000004e-05, "loss": 111.4782, "step": 20300 }, { "epoch": 0.0820549699616592, "grad_norm": 1405.43505859375, "learning_rate": 4.062e-05, "loss": 163.5577, "step": 20310 }, { "epoch": 0.08209537122702683, "grad_norm": 1239.15087890625, "learning_rate": 4.064e-05, "loss": 94.2629, "step": 20320 }, { "epoch": 0.08213577249239447, "grad_norm": 1255.966552734375, "learning_rate": 4.066e-05, "loss": 126.8316, "step": 20330 }, { "epoch": 0.0821761737577621, "grad_norm": 737.30126953125, "learning_rate": 4.0680000000000004e-05, "loss": 113.6555, "step": 20340 }, { "epoch": 0.08221657502312972, "grad_norm": 2296.812744140625, "learning_rate": 4.07e-05, "loss": 109.0417, "step": 20350 }, { "epoch": 0.08225697628849736, "grad_norm": 1186.671875, "learning_rate": 4.072e-05, "loss": 86.3001, "step": 20360 }, { "epoch": 0.08229737755386499, "grad_norm": 1619.2152099609375, "learning_rate": 4.074e-05, "loss": 148.3378, "step": 20370 }, { "epoch": 0.08233777881923261, "grad_norm": 3074.595703125, "learning_rate": 4.076e-05, "loss": 112.1198, "step": 20380 }, { "epoch": 0.08237818008460025, "grad_norm": 2610.1181640625, "learning_rate": 4.078e-05, "loss": 118.642, "step": 20390 }, { "epoch": 0.08241858134996788, "grad_norm": 944.0482788085938, "learning_rate": 4.08e-05, "loss": 118.4644, "step": 20400 }, { "epoch": 0.08245898261533552, "grad_norm": 2997.534423828125, "learning_rate": 4.0820000000000006e-05, "loss": 95.5434, "step": 20410 }, { "epoch": 0.08249938388070314, "grad_norm": 1543.2197265625, "learning_rate": 4.084e-05, "loss": 155.0003, "step": 20420 }, { "epoch": 0.08253978514607077, "grad_norm": 749.4830932617188, "learning_rate": 4.0860000000000005e-05, "loss": 98.9934, "step": 20430 }, { "epoch": 0.08258018641143841, "grad_norm": 2779.212646484375, "learning_rate": 4.088e-05, "loss": 115.3504, "step": 20440 }, { "epoch": 0.08262058767680604, "grad_norm": 2938.130615234375, "learning_rate": 4.09e-05, "loss": 121.3548, "step": 20450 }, { "epoch": 0.08266098894217366, "grad_norm": 364.253173828125, "learning_rate": 4.092e-05, "loss": 118.8423, "step": 20460 }, { "epoch": 0.0827013902075413, "grad_norm": 956.0880737304688, "learning_rate": 4.094e-05, "loss": 162.3844, "step": 20470 }, { "epoch": 0.08274179147290893, "grad_norm": 651.1348266601562, "learning_rate": 4.096e-05, "loss": 101.0112, "step": 20480 }, { "epoch": 0.08278219273827657, "grad_norm": 1472.0135498046875, "learning_rate": 4.0980000000000004e-05, "loss": 140.2291, "step": 20490 }, { "epoch": 0.0828225940036442, "grad_norm": 1709.4990234375, "learning_rate": 4.1e-05, "loss": 156.1177, "step": 20500 }, { "epoch": 0.08286299526901182, "grad_norm": 1078.7275390625, "learning_rate": 4.1020000000000004e-05, "loss": 149.2032, "step": 20510 }, { "epoch": 0.08290339653437946, "grad_norm": 1183.4559326171875, "learning_rate": 4.104e-05, "loss": 85.2116, "step": 20520 }, { "epoch": 0.08294379779974709, "grad_norm": 787.4295043945312, "learning_rate": 4.106e-05, "loss": 143.7207, "step": 20530 }, { "epoch": 0.08298419906511471, "grad_norm": 497.1183776855469, "learning_rate": 4.108e-05, "loss": 89.7213, "step": 20540 }, { "epoch": 0.08302460033048235, "grad_norm": 1972.8258056640625, "learning_rate": 4.11e-05, "loss": 115.1941, "step": 20550 }, { "epoch": 0.08306500159584998, "grad_norm": 969.597900390625, "learning_rate": 4.1120000000000006e-05, "loss": 102.9652, "step": 20560 }, { "epoch": 0.08310540286121762, "grad_norm": 698.786376953125, "learning_rate": 4.114e-05, "loss": 63.844, "step": 20570 }, { "epoch": 0.08314580412658525, "grad_norm": 1212.814697265625, "learning_rate": 4.1160000000000006e-05, "loss": 149.4642, "step": 20580 }, { "epoch": 0.08318620539195287, "grad_norm": 552.6875610351562, "learning_rate": 4.118e-05, "loss": 140.7715, "step": 20590 }, { "epoch": 0.08322660665732051, "grad_norm": 1048.22607421875, "learning_rate": 4.12e-05, "loss": 154.9678, "step": 20600 }, { "epoch": 0.08326700792268814, "grad_norm": 1216.959716796875, "learning_rate": 4.122e-05, "loss": 167.41, "step": 20610 }, { "epoch": 0.08330740918805576, "grad_norm": 2889.177490234375, "learning_rate": 4.124e-05, "loss": 142.2961, "step": 20620 }, { "epoch": 0.0833478104534234, "grad_norm": 1590.9716796875, "learning_rate": 4.126e-05, "loss": 158.046, "step": 20630 }, { "epoch": 0.08338821171879103, "grad_norm": 1097.50146484375, "learning_rate": 4.1280000000000005e-05, "loss": 132.7002, "step": 20640 }, { "epoch": 0.08342861298415867, "grad_norm": 2981.930908203125, "learning_rate": 4.13e-05, "loss": 95.1602, "step": 20650 }, { "epoch": 0.0834690142495263, "grad_norm": 2784.6552734375, "learning_rate": 4.1320000000000004e-05, "loss": 173.26, "step": 20660 }, { "epoch": 0.08350941551489392, "grad_norm": 1329.2828369140625, "learning_rate": 4.134e-05, "loss": 162.6604, "step": 20670 }, { "epoch": 0.08354981678026156, "grad_norm": 808.767822265625, "learning_rate": 4.1360000000000004e-05, "loss": 112.3144, "step": 20680 }, { "epoch": 0.08359021804562919, "grad_norm": 1342.0172119140625, "learning_rate": 4.138e-05, "loss": 131.2333, "step": 20690 }, { "epoch": 0.08363061931099681, "grad_norm": 848.967529296875, "learning_rate": 4.14e-05, "loss": 147.4289, "step": 20700 }, { "epoch": 0.08367102057636445, "grad_norm": 566.413818359375, "learning_rate": 4.142000000000001e-05, "loss": 150.0216, "step": 20710 }, { "epoch": 0.08371142184173208, "grad_norm": 707.4857788085938, "learning_rate": 4.144e-05, "loss": 113.268, "step": 20720 }, { "epoch": 0.08375182310709972, "grad_norm": 851.73779296875, "learning_rate": 4.1460000000000006e-05, "loss": 128.7113, "step": 20730 }, { "epoch": 0.08379222437246735, "grad_norm": 28336.75390625, "learning_rate": 4.148e-05, "loss": 211.0017, "step": 20740 }, { "epoch": 0.08383262563783497, "grad_norm": 1380.7489013671875, "learning_rate": 4.15e-05, "loss": 72.6195, "step": 20750 }, { "epoch": 0.08387302690320261, "grad_norm": 1030.243408203125, "learning_rate": 4.152e-05, "loss": 127.0354, "step": 20760 }, { "epoch": 0.08391342816857024, "grad_norm": 7085.2041015625, "learning_rate": 4.154e-05, "loss": 119.6303, "step": 20770 }, { "epoch": 0.08395382943393787, "grad_norm": 783.239013671875, "learning_rate": 4.156e-05, "loss": 123.3007, "step": 20780 }, { "epoch": 0.0839942306993055, "grad_norm": 1146.0455322265625, "learning_rate": 4.1580000000000005e-05, "loss": 111.6334, "step": 20790 }, { "epoch": 0.08403463196467313, "grad_norm": 3808.298583984375, "learning_rate": 4.16e-05, "loss": 155.187, "step": 20800 }, { "epoch": 0.08407503323004077, "grad_norm": 2816.82275390625, "learning_rate": 4.1620000000000005e-05, "loss": 146.4871, "step": 20810 }, { "epoch": 0.0841154344954084, "grad_norm": 1327.615966796875, "learning_rate": 4.164e-05, "loss": 99.5535, "step": 20820 }, { "epoch": 0.08415583576077602, "grad_norm": 759.77490234375, "learning_rate": 4.1660000000000004e-05, "loss": 135.0864, "step": 20830 }, { "epoch": 0.08419623702614366, "grad_norm": 1578.577392578125, "learning_rate": 4.168e-05, "loss": 134.005, "step": 20840 }, { "epoch": 0.08423663829151129, "grad_norm": 845.4844970703125, "learning_rate": 4.17e-05, "loss": 72.3538, "step": 20850 }, { "epoch": 0.08427703955687892, "grad_norm": 681.817626953125, "learning_rate": 4.172e-05, "loss": 89.2945, "step": 20860 }, { "epoch": 0.08431744082224656, "grad_norm": 610.7493896484375, "learning_rate": 4.1740000000000004e-05, "loss": 88.0488, "step": 20870 }, { "epoch": 0.08435784208761418, "grad_norm": 1551.756103515625, "learning_rate": 4.176000000000001e-05, "loss": 144.3311, "step": 20880 }, { "epoch": 0.08439824335298182, "grad_norm": 941.2823486328125, "learning_rate": 4.178e-05, "loss": 108.5786, "step": 20890 }, { "epoch": 0.08443864461834945, "grad_norm": 2806.956787109375, "learning_rate": 4.18e-05, "loss": 139.1996, "step": 20900 }, { "epoch": 0.08447904588371707, "grad_norm": 2764.781982421875, "learning_rate": 4.182e-05, "loss": 202.5234, "step": 20910 }, { "epoch": 0.08451944714908471, "grad_norm": 4884.42431640625, "learning_rate": 4.184e-05, "loss": 113.7894, "step": 20920 }, { "epoch": 0.08455984841445234, "grad_norm": 752.3662719726562, "learning_rate": 4.186e-05, "loss": 108.6684, "step": 20930 }, { "epoch": 0.08460024967981997, "grad_norm": 997.4862060546875, "learning_rate": 4.1880000000000006e-05, "loss": 119.868, "step": 20940 }, { "epoch": 0.0846406509451876, "grad_norm": 1489.1739501953125, "learning_rate": 4.19e-05, "loss": 118.152, "step": 20950 }, { "epoch": 0.08468105221055523, "grad_norm": 509.3271789550781, "learning_rate": 4.1920000000000005e-05, "loss": 95.0759, "step": 20960 }, { "epoch": 0.08472145347592287, "grad_norm": 1403.126708984375, "learning_rate": 4.194e-05, "loss": 130.0431, "step": 20970 }, { "epoch": 0.0847618547412905, "grad_norm": 2763.188232421875, "learning_rate": 4.196e-05, "loss": 82.064, "step": 20980 }, { "epoch": 0.08480225600665812, "grad_norm": 643.8973388671875, "learning_rate": 4.198e-05, "loss": 120.0969, "step": 20990 }, { "epoch": 0.08484265727202576, "grad_norm": 953.9916381835938, "learning_rate": 4.2e-05, "loss": 99.3082, "step": 21000 }, { "epoch": 0.08488305853739339, "grad_norm": 7278.77392578125, "learning_rate": 4.202e-05, "loss": 185.1893, "step": 21010 }, { "epoch": 0.08492345980276102, "grad_norm": 4063.332763671875, "learning_rate": 4.2040000000000004e-05, "loss": 172.6903, "step": 21020 }, { "epoch": 0.08496386106812866, "grad_norm": 355.11962890625, "learning_rate": 4.206e-05, "loss": 97.8671, "step": 21030 }, { "epoch": 0.08500426233349628, "grad_norm": 803.3870239257812, "learning_rate": 4.2080000000000004e-05, "loss": 90.9247, "step": 21040 }, { "epoch": 0.08504466359886392, "grad_norm": 1197.3392333984375, "learning_rate": 4.21e-05, "loss": 111.2428, "step": 21050 }, { "epoch": 0.08508506486423155, "grad_norm": 1060.21728515625, "learning_rate": 4.212e-05, "loss": 80.9763, "step": 21060 }, { "epoch": 0.08512546612959918, "grad_norm": 553.6648559570312, "learning_rate": 4.214e-05, "loss": 164.6878, "step": 21070 }, { "epoch": 0.08516586739496682, "grad_norm": 1732.870361328125, "learning_rate": 4.2159999999999996e-05, "loss": 127.5546, "step": 21080 }, { "epoch": 0.08520626866033444, "grad_norm": 3130.48828125, "learning_rate": 4.2180000000000006e-05, "loss": 125.8488, "step": 21090 }, { "epoch": 0.08524666992570207, "grad_norm": 1232.9683837890625, "learning_rate": 4.22e-05, "loss": 110.2355, "step": 21100 }, { "epoch": 0.08528707119106971, "grad_norm": 1068.6566162109375, "learning_rate": 4.2220000000000006e-05, "loss": 116.9769, "step": 21110 }, { "epoch": 0.08532747245643733, "grad_norm": 687.2569580078125, "learning_rate": 4.224e-05, "loss": 124.6508, "step": 21120 }, { "epoch": 0.08536787372180497, "grad_norm": 680.8721313476562, "learning_rate": 4.226e-05, "loss": 123.4505, "step": 21130 }, { "epoch": 0.0854082749871726, "grad_norm": 1629.7750244140625, "learning_rate": 4.228e-05, "loss": 165.3711, "step": 21140 }, { "epoch": 0.08544867625254023, "grad_norm": 1123.4427490234375, "learning_rate": 4.23e-05, "loss": 104.645, "step": 21150 }, { "epoch": 0.08548907751790787, "grad_norm": 684.88134765625, "learning_rate": 4.232e-05, "loss": 87.5831, "step": 21160 }, { "epoch": 0.08552947878327549, "grad_norm": 1038.5843505859375, "learning_rate": 4.2340000000000005e-05, "loss": 111.1774, "step": 21170 }, { "epoch": 0.08556988004864312, "grad_norm": 441.3775634765625, "learning_rate": 4.236e-05, "loss": 97.0562, "step": 21180 }, { "epoch": 0.08561028131401076, "grad_norm": 1054.5010986328125, "learning_rate": 4.2380000000000004e-05, "loss": 137.0658, "step": 21190 }, { "epoch": 0.08565068257937838, "grad_norm": 2301.26708984375, "learning_rate": 4.24e-05, "loss": 121.703, "step": 21200 }, { "epoch": 0.08569108384474602, "grad_norm": 1787.3651123046875, "learning_rate": 4.2420000000000004e-05, "loss": 168.9554, "step": 21210 }, { "epoch": 0.08573148511011365, "grad_norm": 3590.007568359375, "learning_rate": 4.244e-05, "loss": 162.3379, "step": 21220 }, { "epoch": 0.08577188637548128, "grad_norm": 1609.46142578125, "learning_rate": 4.246e-05, "loss": 124.5303, "step": 21230 }, { "epoch": 0.08581228764084892, "grad_norm": 558.2980346679688, "learning_rate": 4.248e-05, "loss": 95.647, "step": 21240 }, { "epoch": 0.08585268890621654, "grad_norm": 1266.0831298828125, "learning_rate": 4.25e-05, "loss": 95.554, "step": 21250 }, { "epoch": 0.08589309017158417, "grad_norm": 1427.5911865234375, "learning_rate": 4.2520000000000006e-05, "loss": 136.6243, "step": 21260 }, { "epoch": 0.08593349143695181, "grad_norm": 1910.5550537109375, "learning_rate": 4.254e-05, "loss": 104.6323, "step": 21270 }, { "epoch": 0.08597389270231943, "grad_norm": 2682.699951171875, "learning_rate": 4.256e-05, "loss": 123.6776, "step": 21280 }, { "epoch": 0.08601429396768706, "grad_norm": 7804.3984375, "learning_rate": 4.258e-05, "loss": 165.5531, "step": 21290 }, { "epoch": 0.0860546952330547, "grad_norm": 790.6614990234375, "learning_rate": 4.26e-05, "loss": 114.8768, "step": 21300 }, { "epoch": 0.08609509649842233, "grad_norm": 1748.3701171875, "learning_rate": 4.262e-05, "loss": 120.4578, "step": 21310 }, { "epoch": 0.08613549776378997, "grad_norm": 948.3206176757812, "learning_rate": 4.2640000000000005e-05, "loss": 100.5346, "step": 21320 }, { "epoch": 0.0861758990291576, "grad_norm": 695.2459106445312, "learning_rate": 4.266e-05, "loss": 97.4773, "step": 21330 }, { "epoch": 0.08621630029452522, "grad_norm": 2172.08837890625, "learning_rate": 4.2680000000000005e-05, "loss": 132.5773, "step": 21340 }, { "epoch": 0.08625670155989286, "grad_norm": 867.505859375, "learning_rate": 4.27e-05, "loss": 104.199, "step": 21350 }, { "epoch": 0.08629710282526049, "grad_norm": 1258.5723876953125, "learning_rate": 4.2720000000000004e-05, "loss": 99.1206, "step": 21360 }, { "epoch": 0.08633750409062811, "grad_norm": 974.3964233398438, "learning_rate": 4.274e-05, "loss": 98.0185, "step": 21370 }, { "epoch": 0.08637790535599575, "grad_norm": 1084.6614990234375, "learning_rate": 4.276e-05, "loss": 95.9101, "step": 21380 }, { "epoch": 0.08641830662136338, "grad_norm": 1883.6488037109375, "learning_rate": 4.278e-05, "loss": 125.3519, "step": 21390 }, { "epoch": 0.08645870788673102, "grad_norm": 3283.783935546875, "learning_rate": 4.2800000000000004e-05, "loss": 166.7042, "step": 21400 }, { "epoch": 0.08649910915209864, "grad_norm": 942.7132568359375, "learning_rate": 4.282000000000001e-05, "loss": 154.2377, "step": 21410 }, { "epoch": 0.08653951041746627, "grad_norm": 1186.5281982421875, "learning_rate": 4.284e-05, "loss": 104.8457, "step": 21420 }, { "epoch": 0.08657991168283391, "grad_norm": 1021.5708618164062, "learning_rate": 4.286e-05, "loss": 100.3719, "step": 21430 }, { "epoch": 0.08662031294820154, "grad_norm": 1118.336669921875, "learning_rate": 4.288e-05, "loss": 145.7167, "step": 21440 }, { "epoch": 0.08666071421356916, "grad_norm": 686.393310546875, "learning_rate": 4.29e-05, "loss": 107.4845, "step": 21450 }, { "epoch": 0.0867011154789368, "grad_norm": 10137.349609375, "learning_rate": 4.292e-05, "loss": 131.5519, "step": 21460 }, { "epoch": 0.08674151674430443, "grad_norm": 4428.59521484375, "learning_rate": 4.2940000000000006e-05, "loss": 163.3146, "step": 21470 }, { "epoch": 0.08678191800967207, "grad_norm": 816.3525390625, "learning_rate": 4.296e-05, "loss": 69.2457, "step": 21480 }, { "epoch": 0.0868223192750397, "grad_norm": 1754.2156982421875, "learning_rate": 4.2980000000000005e-05, "loss": 163.8832, "step": 21490 }, { "epoch": 0.08686272054040732, "grad_norm": 1116.2850341796875, "learning_rate": 4.3e-05, "loss": 86.4771, "step": 21500 }, { "epoch": 0.08690312180577496, "grad_norm": 542.83984375, "learning_rate": 4.3020000000000005e-05, "loss": 121.8374, "step": 21510 }, { "epoch": 0.08694352307114259, "grad_norm": 1956.363037109375, "learning_rate": 4.304e-05, "loss": 108.1872, "step": 21520 }, { "epoch": 0.08698392433651021, "grad_norm": 1243.8492431640625, "learning_rate": 4.306e-05, "loss": 115.3768, "step": 21530 }, { "epoch": 0.08702432560187785, "grad_norm": 1239.7283935546875, "learning_rate": 4.308e-05, "loss": 81.8431, "step": 21540 }, { "epoch": 0.08706472686724548, "grad_norm": 759.3988037109375, "learning_rate": 4.3100000000000004e-05, "loss": 128.8443, "step": 21550 }, { "epoch": 0.08710512813261312, "grad_norm": 1724.4061279296875, "learning_rate": 4.312000000000001e-05, "loss": 132.2185, "step": 21560 }, { "epoch": 0.08714552939798074, "grad_norm": 815.0044555664062, "learning_rate": 4.3140000000000004e-05, "loss": 76.6797, "step": 21570 }, { "epoch": 0.08718593066334837, "grad_norm": 458.9404602050781, "learning_rate": 4.316e-05, "loss": 115.9924, "step": 21580 }, { "epoch": 0.08722633192871601, "grad_norm": 553.5025024414062, "learning_rate": 4.318e-05, "loss": 121.1555, "step": 21590 }, { "epoch": 0.08726673319408364, "grad_norm": 1283.981689453125, "learning_rate": 4.32e-05, "loss": 134.989, "step": 21600 }, { "epoch": 0.08730713445945126, "grad_norm": 438.9365234375, "learning_rate": 4.3219999999999996e-05, "loss": 96.8193, "step": 21610 }, { "epoch": 0.0873475357248189, "grad_norm": 963.8663940429688, "learning_rate": 4.324e-05, "loss": 109.2355, "step": 21620 }, { "epoch": 0.08738793699018653, "grad_norm": 389.078369140625, "learning_rate": 4.326e-05, "loss": 179.7937, "step": 21630 }, { "epoch": 0.08742833825555417, "grad_norm": 1283.707763671875, "learning_rate": 4.3280000000000006e-05, "loss": 93.5443, "step": 21640 }, { "epoch": 0.0874687395209218, "grad_norm": 1297.939208984375, "learning_rate": 4.33e-05, "loss": 119.5012, "step": 21650 }, { "epoch": 0.08750914078628942, "grad_norm": 1461.7098388671875, "learning_rate": 4.332e-05, "loss": 112.2618, "step": 21660 }, { "epoch": 0.08754954205165706, "grad_norm": 370.33056640625, "learning_rate": 4.334e-05, "loss": 118.6215, "step": 21670 }, { "epoch": 0.08758994331702469, "grad_norm": 1230.4083251953125, "learning_rate": 4.336e-05, "loss": 83.1036, "step": 21680 }, { "epoch": 0.08763034458239231, "grad_norm": 713.216796875, "learning_rate": 4.338e-05, "loss": 116.6274, "step": 21690 }, { "epoch": 0.08767074584775995, "grad_norm": 333.86663818359375, "learning_rate": 4.3400000000000005e-05, "loss": 138.7234, "step": 21700 }, { "epoch": 0.08771114711312758, "grad_norm": 1813.9105224609375, "learning_rate": 4.342e-05, "loss": 124.1437, "step": 21710 }, { "epoch": 0.08775154837849522, "grad_norm": 1551.122802734375, "learning_rate": 4.3440000000000004e-05, "loss": 117.1614, "step": 21720 }, { "epoch": 0.08779194964386285, "grad_norm": 556.9833984375, "learning_rate": 4.346e-05, "loss": 85.8124, "step": 21730 }, { "epoch": 0.08783235090923047, "grad_norm": 1249.522705078125, "learning_rate": 4.3480000000000004e-05, "loss": 82.4492, "step": 21740 }, { "epoch": 0.08787275217459811, "grad_norm": 644.7138671875, "learning_rate": 4.35e-05, "loss": 103.3423, "step": 21750 }, { "epoch": 0.08791315343996574, "grad_norm": 893.0758056640625, "learning_rate": 4.352e-05, "loss": 140.0829, "step": 21760 }, { "epoch": 0.08795355470533336, "grad_norm": 527.2681274414062, "learning_rate": 4.354e-05, "loss": 99.5734, "step": 21770 }, { "epoch": 0.087993955970701, "grad_norm": 623.2860717773438, "learning_rate": 4.356e-05, "loss": 75.2396, "step": 21780 }, { "epoch": 0.08803435723606863, "grad_norm": 590.4302978515625, "learning_rate": 4.3580000000000006e-05, "loss": 117.1983, "step": 21790 }, { "epoch": 0.08807475850143627, "grad_norm": 945.4894409179688, "learning_rate": 4.36e-05, "loss": 156.6693, "step": 21800 }, { "epoch": 0.0881151597668039, "grad_norm": 892.6826782226562, "learning_rate": 4.362e-05, "loss": 128.0761, "step": 21810 }, { "epoch": 0.08815556103217152, "grad_norm": 1329.8853759765625, "learning_rate": 4.364e-05, "loss": 81.0603, "step": 21820 }, { "epoch": 0.08819596229753916, "grad_norm": 3241.863525390625, "learning_rate": 4.366e-05, "loss": 114.7735, "step": 21830 }, { "epoch": 0.08823636356290679, "grad_norm": 621.0053100585938, "learning_rate": 4.368e-05, "loss": 99.9302, "step": 21840 }, { "epoch": 0.08827676482827441, "grad_norm": 571.9194946289062, "learning_rate": 4.3700000000000005e-05, "loss": 60.8999, "step": 21850 }, { "epoch": 0.08831716609364205, "grad_norm": 1223.1873779296875, "learning_rate": 4.372e-05, "loss": 115.3964, "step": 21860 }, { "epoch": 0.08835756735900968, "grad_norm": 1025.819580078125, "learning_rate": 4.3740000000000005e-05, "loss": 115.1883, "step": 21870 }, { "epoch": 0.08839796862437732, "grad_norm": 1899.594970703125, "learning_rate": 4.376e-05, "loss": 118.3364, "step": 21880 }, { "epoch": 0.08843836988974495, "grad_norm": 1690.5460205078125, "learning_rate": 4.3780000000000004e-05, "loss": 88.396, "step": 21890 }, { "epoch": 0.08847877115511257, "grad_norm": 1255.8304443359375, "learning_rate": 4.38e-05, "loss": 95.6884, "step": 21900 }, { "epoch": 0.08851917242048021, "grad_norm": 2578.5478515625, "learning_rate": 4.382e-05, "loss": 143.0776, "step": 21910 }, { "epoch": 0.08855957368584784, "grad_norm": 4696.71826171875, "learning_rate": 4.384e-05, "loss": 95.2831, "step": 21920 }, { "epoch": 0.08859997495121547, "grad_norm": 3200.905517578125, "learning_rate": 4.3860000000000004e-05, "loss": 98.7196, "step": 21930 }, { "epoch": 0.0886403762165831, "grad_norm": 771.8877563476562, "learning_rate": 4.388000000000001e-05, "loss": 109.9122, "step": 21940 }, { "epoch": 0.08868077748195073, "grad_norm": 2375.28515625, "learning_rate": 4.39e-05, "loss": 97.5147, "step": 21950 }, { "epoch": 0.08872117874731837, "grad_norm": 1200.1995849609375, "learning_rate": 4.392e-05, "loss": 163.8434, "step": 21960 }, { "epoch": 0.088761580012686, "grad_norm": 743.7747192382812, "learning_rate": 4.394e-05, "loss": 152.2877, "step": 21970 }, { "epoch": 0.08880198127805362, "grad_norm": 748.3530883789062, "learning_rate": 4.396e-05, "loss": 102.9261, "step": 21980 }, { "epoch": 0.08884238254342126, "grad_norm": 0.0, "learning_rate": 4.398e-05, "loss": 86.7398, "step": 21990 }, { "epoch": 0.08888278380878889, "grad_norm": 641.658935546875, "learning_rate": 4.4000000000000006e-05, "loss": 115.4044, "step": 22000 }, { "epoch": 0.08892318507415652, "grad_norm": 757.2327880859375, "learning_rate": 4.402e-05, "loss": 106.9586, "step": 22010 }, { "epoch": 0.08896358633952416, "grad_norm": 1741.6318359375, "learning_rate": 4.4040000000000005e-05, "loss": 61.3795, "step": 22020 }, { "epoch": 0.08900398760489178, "grad_norm": 3420.192138671875, "learning_rate": 4.406e-05, "loss": 150.3777, "step": 22030 }, { "epoch": 0.08904438887025942, "grad_norm": 785.1651000976562, "learning_rate": 4.4080000000000005e-05, "loss": 112.2344, "step": 22040 }, { "epoch": 0.08908479013562705, "grad_norm": 2430.542236328125, "learning_rate": 4.41e-05, "loss": 84.5093, "step": 22050 }, { "epoch": 0.08912519140099467, "grad_norm": 1095.4105224609375, "learning_rate": 4.412e-05, "loss": 129.3121, "step": 22060 }, { "epoch": 0.08916559266636231, "grad_norm": 654.0252075195312, "learning_rate": 4.414e-05, "loss": 105.0435, "step": 22070 }, { "epoch": 0.08920599393172994, "grad_norm": 6289.986328125, "learning_rate": 4.4160000000000004e-05, "loss": 141.9109, "step": 22080 }, { "epoch": 0.08924639519709757, "grad_norm": 3365.2255859375, "learning_rate": 4.418000000000001e-05, "loss": 164.1694, "step": 22090 }, { "epoch": 0.0892867964624652, "grad_norm": 1369.172607421875, "learning_rate": 4.4200000000000004e-05, "loss": 133.5227, "step": 22100 }, { "epoch": 0.08932719772783283, "grad_norm": 2439.97265625, "learning_rate": 4.422e-05, "loss": 126.4849, "step": 22110 }, { "epoch": 0.08936759899320047, "grad_norm": 1391.3780517578125, "learning_rate": 4.424e-05, "loss": 103.1816, "step": 22120 }, { "epoch": 0.0894080002585681, "grad_norm": 2001.207275390625, "learning_rate": 4.426e-05, "loss": 124.9317, "step": 22130 }, { "epoch": 0.08944840152393572, "grad_norm": 1090.6702880859375, "learning_rate": 4.428e-05, "loss": 86.8185, "step": 22140 }, { "epoch": 0.08948880278930336, "grad_norm": 1352.2353515625, "learning_rate": 4.43e-05, "loss": 123.4582, "step": 22150 }, { "epoch": 0.08952920405467099, "grad_norm": 1216.326171875, "learning_rate": 4.432e-05, "loss": 77.8633, "step": 22160 }, { "epoch": 0.08956960532003862, "grad_norm": 805.3572998046875, "learning_rate": 4.4340000000000006e-05, "loss": 112.9714, "step": 22170 }, { "epoch": 0.08961000658540626, "grad_norm": 558.133544921875, "learning_rate": 4.436e-05, "loss": 89.9262, "step": 22180 }, { "epoch": 0.08965040785077388, "grad_norm": 2130.873046875, "learning_rate": 4.438e-05, "loss": 112.3018, "step": 22190 }, { "epoch": 0.08969080911614152, "grad_norm": 1761.7283935546875, "learning_rate": 4.44e-05, "loss": 111.0985, "step": 22200 }, { "epoch": 0.08973121038150915, "grad_norm": 915.045166015625, "learning_rate": 4.442e-05, "loss": 107.6649, "step": 22210 }, { "epoch": 0.08977161164687678, "grad_norm": 1191.688232421875, "learning_rate": 4.444e-05, "loss": 165.1702, "step": 22220 }, { "epoch": 0.08981201291224442, "grad_norm": 496.5788879394531, "learning_rate": 4.4460000000000005e-05, "loss": 82.1613, "step": 22230 }, { "epoch": 0.08985241417761204, "grad_norm": 500.4427185058594, "learning_rate": 4.448e-05, "loss": 154.052, "step": 22240 }, { "epoch": 0.08989281544297967, "grad_norm": 1123.475341796875, "learning_rate": 4.4500000000000004e-05, "loss": 103.6876, "step": 22250 }, { "epoch": 0.08993321670834731, "grad_norm": 763.8755493164062, "learning_rate": 4.452e-05, "loss": 105.7942, "step": 22260 }, { "epoch": 0.08997361797371493, "grad_norm": 659.6030883789062, "learning_rate": 4.4540000000000004e-05, "loss": 111.4345, "step": 22270 }, { "epoch": 0.09001401923908257, "grad_norm": 649.8712158203125, "learning_rate": 4.456e-05, "loss": 78.0137, "step": 22280 }, { "epoch": 0.0900544205044502, "grad_norm": 510.55853271484375, "learning_rate": 4.458e-05, "loss": 142.013, "step": 22290 }, { "epoch": 0.09009482176981783, "grad_norm": 3399.17236328125, "learning_rate": 4.46e-05, "loss": 159.2714, "step": 22300 }, { "epoch": 0.09013522303518547, "grad_norm": 998.1200561523438, "learning_rate": 4.462e-05, "loss": 100.7152, "step": 22310 }, { "epoch": 0.09017562430055309, "grad_norm": 531.780517578125, "learning_rate": 4.4640000000000006e-05, "loss": 168.3419, "step": 22320 }, { "epoch": 0.09021602556592072, "grad_norm": 910.6287841796875, "learning_rate": 4.466e-05, "loss": 98.8065, "step": 22330 }, { "epoch": 0.09025642683128836, "grad_norm": 1744.2012939453125, "learning_rate": 4.468e-05, "loss": 105.1678, "step": 22340 }, { "epoch": 0.09029682809665598, "grad_norm": 597.9017944335938, "learning_rate": 4.47e-05, "loss": 112.0376, "step": 22350 }, { "epoch": 0.09033722936202362, "grad_norm": 769.6776733398438, "learning_rate": 4.472e-05, "loss": 94.9958, "step": 22360 }, { "epoch": 0.09037763062739125, "grad_norm": 1181.265625, "learning_rate": 4.474e-05, "loss": 110.0878, "step": 22370 }, { "epoch": 0.09041803189275888, "grad_norm": 757.1105346679688, "learning_rate": 4.4760000000000005e-05, "loss": 100.3439, "step": 22380 }, { "epoch": 0.09045843315812652, "grad_norm": 717.3568115234375, "learning_rate": 4.478e-05, "loss": 134.2481, "step": 22390 }, { "epoch": 0.09049883442349414, "grad_norm": 892.8013305664062, "learning_rate": 4.4800000000000005e-05, "loss": 101.8901, "step": 22400 }, { "epoch": 0.09053923568886177, "grad_norm": 796.65380859375, "learning_rate": 4.482e-05, "loss": 107.8212, "step": 22410 }, { "epoch": 0.09057963695422941, "grad_norm": 599.0886840820312, "learning_rate": 4.4840000000000004e-05, "loss": 112.4707, "step": 22420 }, { "epoch": 0.09062003821959703, "grad_norm": 384.7815246582031, "learning_rate": 4.486e-05, "loss": 103.7676, "step": 22430 }, { "epoch": 0.09066043948496467, "grad_norm": 0.0, "learning_rate": 4.488e-05, "loss": 83.413, "step": 22440 }, { "epoch": 0.0907008407503323, "grad_norm": 1596.82373046875, "learning_rate": 4.49e-05, "loss": 125.9112, "step": 22450 }, { "epoch": 0.09074124201569993, "grad_norm": 8902.1123046875, "learning_rate": 4.4920000000000004e-05, "loss": 158.18, "step": 22460 }, { "epoch": 0.09078164328106757, "grad_norm": 809.1474609375, "learning_rate": 4.494000000000001e-05, "loss": 129.158, "step": 22470 }, { "epoch": 0.0908220445464352, "grad_norm": 657.6831665039062, "learning_rate": 4.496e-05, "loss": 121.9886, "step": 22480 }, { "epoch": 0.09086244581180282, "grad_norm": 866.2117309570312, "learning_rate": 4.498e-05, "loss": 123.8151, "step": 22490 }, { "epoch": 0.09090284707717046, "grad_norm": 585.983642578125, "learning_rate": 4.5e-05, "loss": 74.8665, "step": 22500 }, { "epoch": 0.09094324834253809, "grad_norm": 1273.0323486328125, "learning_rate": 4.502e-05, "loss": 96.7427, "step": 22510 }, { "epoch": 0.09098364960790573, "grad_norm": 1038.0701904296875, "learning_rate": 4.504e-05, "loss": 85.5748, "step": 22520 }, { "epoch": 0.09102405087327335, "grad_norm": 1610.0634765625, "learning_rate": 4.506e-05, "loss": 92.2955, "step": 22530 }, { "epoch": 0.09106445213864098, "grad_norm": 2549.61572265625, "learning_rate": 4.508e-05, "loss": 147.1345, "step": 22540 }, { "epoch": 0.09110485340400862, "grad_norm": 931.28173828125, "learning_rate": 4.5100000000000005e-05, "loss": 82.7087, "step": 22550 }, { "epoch": 0.09114525466937624, "grad_norm": 765.2784423828125, "learning_rate": 4.512e-05, "loss": 135.216, "step": 22560 }, { "epoch": 0.09118565593474387, "grad_norm": 597.312255859375, "learning_rate": 4.5140000000000005e-05, "loss": 147.5036, "step": 22570 }, { "epoch": 0.09122605720011151, "grad_norm": 756.5013427734375, "learning_rate": 4.516e-05, "loss": 128.549, "step": 22580 }, { "epoch": 0.09126645846547914, "grad_norm": 719.6322631835938, "learning_rate": 4.518e-05, "loss": 80.9833, "step": 22590 }, { "epoch": 0.09130685973084678, "grad_norm": 721.7781982421875, "learning_rate": 4.52e-05, "loss": 144.421, "step": 22600 }, { "epoch": 0.0913472609962144, "grad_norm": 1193.63232421875, "learning_rate": 4.5220000000000004e-05, "loss": 99.2124, "step": 22610 }, { "epoch": 0.09138766226158203, "grad_norm": 886.3824462890625, "learning_rate": 4.524000000000001e-05, "loss": 117.6559, "step": 22620 }, { "epoch": 0.09142806352694967, "grad_norm": 869.9031372070312, "learning_rate": 4.5260000000000004e-05, "loss": 128.7948, "step": 22630 }, { "epoch": 0.0914684647923173, "grad_norm": 323.6915283203125, "learning_rate": 4.528e-05, "loss": 103.8211, "step": 22640 }, { "epoch": 0.09150886605768492, "grad_norm": 648.2799072265625, "learning_rate": 4.53e-05, "loss": 124.0231, "step": 22650 }, { "epoch": 0.09154926732305256, "grad_norm": 3163.934814453125, "learning_rate": 4.532e-05, "loss": 133.5165, "step": 22660 }, { "epoch": 0.09158966858842019, "grad_norm": 694.5257568359375, "learning_rate": 4.534e-05, "loss": 117.9862, "step": 22670 }, { "epoch": 0.09163006985378783, "grad_norm": 1048.819091796875, "learning_rate": 4.536e-05, "loss": 110.8099, "step": 22680 }, { "epoch": 0.09167047111915545, "grad_norm": 760.2977905273438, "learning_rate": 4.538e-05, "loss": 93.8862, "step": 22690 }, { "epoch": 0.09171087238452308, "grad_norm": 1749.94580078125, "learning_rate": 4.5400000000000006e-05, "loss": 141.0014, "step": 22700 }, { "epoch": 0.09175127364989072, "grad_norm": 0.0, "learning_rate": 4.542e-05, "loss": 100.4336, "step": 22710 }, { "epoch": 0.09179167491525834, "grad_norm": 520.8677978515625, "learning_rate": 4.5440000000000005e-05, "loss": 85.8024, "step": 22720 }, { "epoch": 0.09183207618062597, "grad_norm": 1674.249755859375, "learning_rate": 4.546e-05, "loss": 138.2154, "step": 22730 }, { "epoch": 0.09187247744599361, "grad_norm": 917.3933715820312, "learning_rate": 4.548e-05, "loss": 103.3672, "step": 22740 }, { "epoch": 0.09191287871136124, "grad_norm": 1019.2123413085938, "learning_rate": 4.55e-05, "loss": 56.5289, "step": 22750 }, { "epoch": 0.09195327997672888, "grad_norm": 2317.489501953125, "learning_rate": 4.5520000000000005e-05, "loss": 155.0266, "step": 22760 }, { "epoch": 0.0919936812420965, "grad_norm": 819.0970458984375, "learning_rate": 4.554000000000001e-05, "loss": 116.7189, "step": 22770 }, { "epoch": 0.09203408250746413, "grad_norm": 1225.271240234375, "learning_rate": 4.5560000000000004e-05, "loss": 111.6631, "step": 22780 }, { "epoch": 0.09207448377283177, "grad_norm": 1159.3927001953125, "learning_rate": 4.558e-05, "loss": 66.4148, "step": 22790 }, { "epoch": 0.0921148850381994, "grad_norm": 1292.975341796875, "learning_rate": 4.5600000000000004e-05, "loss": 105.9983, "step": 22800 }, { "epoch": 0.09215528630356702, "grad_norm": 1175.0252685546875, "learning_rate": 4.562e-05, "loss": 143.92, "step": 22810 }, { "epoch": 0.09219568756893466, "grad_norm": 759.6773681640625, "learning_rate": 4.564e-05, "loss": 90.7138, "step": 22820 }, { "epoch": 0.09223608883430229, "grad_norm": 857.7040405273438, "learning_rate": 4.566e-05, "loss": 92.1247, "step": 22830 }, { "epoch": 0.09227649009966993, "grad_norm": 1580.8248291015625, "learning_rate": 4.568e-05, "loss": 128.0727, "step": 22840 }, { "epoch": 0.09231689136503755, "grad_norm": 567.8139038085938, "learning_rate": 4.5700000000000006e-05, "loss": 97.4495, "step": 22850 }, { "epoch": 0.09235729263040518, "grad_norm": 1576.912841796875, "learning_rate": 4.572e-05, "loss": 128.2519, "step": 22860 }, { "epoch": 0.09239769389577282, "grad_norm": 716.1073608398438, "learning_rate": 4.574e-05, "loss": 94.8568, "step": 22870 }, { "epoch": 0.09243809516114045, "grad_norm": 638.1775512695312, "learning_rate": 4.576e-05, "loss": 66.2251, "step": 22880 }, { "epoch": 0.09247849642650807, "grad_norm": 2102.469482421875, "learning_rate": 4.578e-05, "loss": 128.2732, "step": 22890 }, { "epoch": 0.09251889769187571, "grad_norm": 682.6776123046875, "learning_rate": 4.58e-05, "loss": 102.548, "step": 22900 }, { "epoch": 0.09255929895724334, "grad_norm": 586.2388916015625, "learning_rate": 4.5820000000000005e-05, "loss": 148.1805, "step": 22910 }, { "epoch": 0.09259970022261098, "grad_norm": 1975.264404296875, "learning_rate": 4.584e-05, "loss": 143.5229, "step": 22920 }, { "epoch": 0.0926401014879786, "grad_norm": 758.1207885742188, "learning_rate": 4.5860000000000005e-05, "loss": 128.5342, "step": 22930 }, { "epoch": 0.09268050275334623, "grad_norm": 2084.8974609375, "learning_rate": 4.588e-05, "loss": 94.417, "step": 22940 }, { "epoch": 0.09272090401871387, "grad_norm": 830.3482055664062, "learning_rate": 4.5900000000000004e-05, "loss": 97.3431, "step": 22950 }, { "epoch": 0.0927613052840815, "grad_norm": 562.5604858398438, "learning_rate": 4.592e-05, "loss": 115.6859, "step": 22960 }, { "epoch": 0.09280170654944912, "grad_norm": 1079.0665283203125, "learning_rate": 4.594e-05, "loss": 118.9365, "step": 22970 }, { "epoch": 0.09284210781481676, "grad_norm": 1732.04638671875, "learning_rate": 4.596e-05, "loss": 86.5984, "step": 22980 }, { "epoch": 0.09288250908018439, "grad_norm": 1780.00537109375, "learning_rate": 4.5980000000000004e-05, "loss": 113.6839, "step": 22990 }, { "epoch": 0.09292291034555203, "grad_norm": 1192.3056640625, "learning_rate": 4.600000000000001e-05, "loss": 89.4596, "step": 23000 }, { "epoch": 0.09296331161091966, "grad_norm": 1134.75830078125, "learning_rate": 4.602e-05, "loss": 87.713, "step": 23010 }, { "epoch": 0.09300371287628728, "grad_norm": 1336.7357177734375, "learning_rate": 4.604e-05, "loss": 65.8475, "step": 23020 }, { "epoch": 0.09304411414165492, "grad_norm": 815.8203125, "learning_rate": 4.606e-05, "loss": 148.7902, "step": 23030 }, { "epoch": 0.09308451540702255, "grad_norm": 1082.975830078125, "learning_rate": 4.608e-05, "loss": 114.2601, "step": 23040 }, { "epoch": 0.09312491667239017, "grad_norm": 778.52587890625, "learning_rate": 4.61e-05, "loss": 284.198, "step": 23050 }, { "epoch": 0.09316531793775781, "grad_norm": 591.0042114257812, "learning_rate": 4.612e-05, "loss": 106.8146, "step": 23060 }, { "epoch": 0.09320571920312544, "grad_norm": 2606.027099609375, "learning_rate": 4.614e-05, "loss": 109.8302, "step": 23070 }, { "epoch": 0.09324612046849308, "grad_norm": 1184.9754638671875, "learning_rate": 4.6160000000000005e-05, "loss": 118.0777, "step": 23080 }, { "epoch": 0.0932865217338607, "grad_norm": 1047.3983154296875, "learning_rate": 4.618e-05, "loss": 119.2703, "step": 23090 }, { "epoch": 0.09332692299922833, "grad_norm": 522.4287719726562, "learning_rate": 4.6200000000000005e-05, "loss": 115.0015, "step": 23100 }, { "epoch": 0.09336732426459597, "grad_norm": 387.2696838378906, "learning_rate": 4.622e-05, "loss": 112.657, "step": 23110 }, { "epoch": 0.0934077255299636, "grad_norm": 680.420166015625, "learning_rate": 4.624e-05, "loss": 81.0697, "step": 23120 }, { "epoch": 0.09344812679533122, "grad_norm": 1640.8831787109375, "learning_rate": 4.626e-05, "loss": 128.7615, "step": 23130 }, { "epoch": 0.09348852806069886, "grad_norm": 666.759765625, "learning_rate": 4.6280000000000004e-05, "loss": 86.7895, "step": 23140 }, { "epoch": 0.09352892932606649, "grad_norm": 2630.477783203125, "learning_rate": 4.630000000000001e-05, "loss": 174.0495, "step": 23150 }, { "epoch": 0.09356933059143413, "grad_norm": 2171.40185546875, "learning_rate": 4.6320000000000004e-05, "loss": 126.4433, "step": 23160 }, { "epoch": 0.09360973185680176, "grad_norm": 952.8081665039062, "learning_rate": 4.634e-05, "loss": 60.8379, "step": 23170 }, { "epoch": 0.09365013312216938, "grad_norm": 1052.6181640625, "learning_rate": 4.636e-05, "loss": 155.2496, "step": 23180 }, { "epoch": 0.09369053438753702, "grad_norm": 1012.393310546875, "learning_rate": 4.638e-05, "loss": 137.2208, "step": 23190 }, { "epoch": 0.09373093565290465, "grad_norm": 1177.2757568359375, "learning_rate": 4.64e-05, "loss": 123.9954, "step": 23200 }, { "epoch": 0.09377133691827227, "grad_norm": 1205.6624755859375, "learning_rate": 4.642e-05, "loss": 133.0356, "step": 23210 }, { "epoch": 0.09381173818363991, "grad_norm": 790.0210571289062, "learning_rate": 4.644e-05, "loss": 134.6905, "step": 23220 }, { "epoch": 0.09385213944900754, "grad_norm": 1255.626953125, "learning_rate": 4.6460000000000006e-05, "loss": 182.1293, "step": 23230 }, { "epoch": 0.09389254071437518, "grad_norm": 705.6256103515625, "learning_rate": 4.648e-05, "loss": 80.2685, "step": 23240 }, { "epoch": 0.0939329419797428, "grad_norm": 1400.9781494140625, "learning_rate": 4.6500000000000005e-05, "loss": 65.442, "step": 23250 }, { "epoch": 0.09397334324511043, "grad_norm": 475.5975646972656, "learning_rate": 4.652e-05, "loss": 104.4814, "step": 23260 }, { "epoch": 0.09401374451047807, "grad_norm": 656.1465454101562, "learning_rate": 4.654e-05, "loss": 64.6497, "step": 23270 }, { "epoch": 0.0940541457758457, "grad_norm": 870.1063232421875, "learning_rate": 4.656e-05, "loss": 106.1441, "step": 23280 }, { "epoch": 0.09409454704121333, "grad_norm": 461.961181640625, "learning_rate": 4.6580000000000005e-05, "loss": 77.1417, "step": 23290 }, { "epoch": 0.09413494830658097, "grad_norm": 677.691162109375, "learning_rate": 4.660000000000001e-05, "loss": 99.122, "step": 23300 }, { "epoch": 0.09417534957194859, "grad_norm": 651.6929931640625, "learning_rate": 4.6620000000000004e-05, "loss": 80.5311, "step": 23310 }, { "epoch": 0.09421575083731623, "grad_norm": 4944.45703125, "learning_rate": 4.664e-05, "loss": 122.0651, "step": 23320 }, { "epoch": 0.09425615210268386, "grad_norm": 585.9901733398438, "learning_rate": 4.6660000000000004e-05, "loss": 105.5626, "step": 23330 }, { "epoch": 0.09429655336805148, "grad_norm": 869.3453979492188, "learning_rate": 4.668e-05, "loss": 107.5253, "step": 23340 }, { "epoch": 0.09433695463341912, "grad_norm": 1403.1729736328125, "learning_rate": 4.6700000000000003e-05, "loss": 82.5326, "step": 23350 }, { "epoch": 0.09437735589878675, "grad_norm": 1053.485595703125, "learning_rate": 4.672e-05, "loss": 110.7292, "step": 23360 }, { "epoch": 0.09441775716415438, "grad_norm": 495.44793701171875, "learning_rate": 4.674e-05, "loss": 85.2537, "step": 23370 }, { "epoch": 0.09445815842952202, "grad_norm": 1976.839599609375, "learning_rate": 4.6760000000000006e-05, "loss": 122.28, "step": 23380 }, { "epoch": 0.09449855969488964, "grad_norm": 840.1267700195312, "learning_rate": 4.678e-05, "loss": 61.5691, "step": 23390 }, { "epoch": 0.09453896096025728, "grad_norm": 1114.9774169921875, "learning_rate": 4.6800000000000006e-05, "loss": 179.376, "step": 23400 }, { "epoch": 0.09457936222562491, "grad_norm": 2026.411376953125, "learning_rate": 4.682e-05, "loss": 117.7227, "step": 23410 }, { "epoch": 0.09461976349099253, "grad_norm": 1031.648193359375, "learning_rate": 4.684e-05, "loss": 95.3115, "step": 23420 }, { "epoch": 0.09466016475636017, "grad_norm": 774.4593505859375, "learning_rate": 4.686e-05, "loss": 74.8853, "step": 23430 }, { "epoch": 0.0947005660217278, "grad_norm": 799.5897216796875, "learning_rate": 4.688e-05, "loss": 106.8804, "step": 23440 }, { "epoch": 0.09474096728709543, "grad_norm": 801.3352661132812, "learning_rate": 4.69e-05, "loss": 117.8924, "step": 23450 }, { "epoch": 0.09478136855246307, "grad_norm": 562.31494140625, "learning_rate": 4.6920000000000005e-05, "loss": 70.9862, "step": 23460 }, { "epoch": 0.09482176981783069, "grad_norm": 1088.81103515625, "learning_rate": 4.694e-05, "loss": 105.8252, "step": 23470 }, { "epoch": 0.09486217108319833, "grad_norm": 773.8255004882812, "learning_rate": 4.6960000000000004e-05, "loss": 74.9372, "step": 23480 }, { "epoch": 0.09490257234856596, "grad_norm": 2185.90771484375, "learning_rate": 4.698e-05, "loss": 137.0428, "step": 23490 }, { "epoch": 0.09494297361393358, "grad_norm": 1338.7786865234375, "learning_rate": 4.7e-05, "loss": 138.3532, "step": 23500 }, { "epoch": 0.09498337487930122, "grad_norm": 828.383544921875, "learning_rate": 4.702e-05, "loss": 103.3043, "step": 23510 }, { "epoch": 0.09502377614466885, "grad_norm": 1320.6363525390625, "learning_rate": 4.7040000000000004e-05, "loss": 112.5485, "step": 23520 }, { "epoch": 0.09506417741003648, "grad_norm": 1088.3404541015625, "learning_rate": 4.706000000000001e-05, "loss": 107.0843, "step": 23530 }, { "epoch": 0.09510457867540412, "grad_norm": 697.0968627929688, "learning_rate": 4.708e-05, "loss": 86.7037, "step": 23540 }, { "epoch": 0.09514497994077174, "grad_norm": 1503.7210693359375, "learning_rate": 4.71e-05, "loss": 113.7888, "step": 23550 }, { "epoch": 0.09518538120613938, "grad_norm": 840.5352783203125, "learning_rate": 4.712e-05, "loss": 107.902, "step": 23560 }, { "epoch": 0.09522578247150701, "grad_norm": 1638.9781494140625, "learning_rate": 4.714e-05, "loss": 102.9792, "step": 23570 }, { "epoch": 0.09526618373687464, "grad_norm": 518.2499389648438, "learning_rate": 4.716e-05, "loss": 78.3873, "step": 23580 }, { "epoch": 0.09530658500224228, "grad_norm": 837.2974853515625, "learning_rate": 4.718e-05, "loss": 71.646, "step": 23590 }, { "epoch": 0.0953469862676099, "grad_norm": 560.1998291015625, "learning_rate": 4.72e-05, "loss": 131.5562, "step": 23600 }, { "epoch": 0.09538738753297753, "grad_norm": 789.7793579101562, "learning_rate": 4.7220000000000005e-05, "loss": 95.4459, "step": 23610 }, { "epoch": 0.09542778879834517, "grad_norm": 836.2322998046875, "learning_rate": 4.724e-05, "loss": 118.2009, "step": 23620 }, { "epoch": 0.0954681900637128, "grad_norm": 1441.4818115234375, "learning_rate": 4.7260000000000005e-05, "loss": 164.6346, "step": 23630 }, { "epoch": 0.09550859132908043, "grad_norm": 965.8220825195312, "learning_rate": 4.728e-05, "loss": 109.1539, "step": 23640 }, { "epoch": 0.09554899259444806, "grad_norm": 1859.1063232421875, "learning_rate": 4.73e-05, "loss": 180.398, "step": 23650 }, { "epoch": 0.09558939385981569, "grad_norm": 823.5313110351562, "learning_rate": 4.732e-05, "loss": 95.1897, "step": 23660 }, { "epoch": 0.09562979512518333, "grad_norm": 880.113037109375, "learning_rate": 4.7340000000000004e-05, "loss": 107.1814, "step": 23670 }, { "epoch": 0.09567019639055095, "grad_norm": 1045.581298828125, "learning_rate": 4.736000000000001e-05, "loss": 151.5835, "step": 23680 }, { "epoch": 0.09571059765591858, "grad_norm": 1099.427978515625, "learning_rate": 4.7380000000000004e-05, "loss": 93.5232, "step": 23690 }, { "epoch": 0.09575099892128622, "grad_norm": 640.9761962890625, "learning_rate": 4.74e-05, "loss": 117.26, "step": 23700 }, { "epoch": 0.09579140018665384, "grad_norm": 337.58795166015625, "learning_rate": 4.742e-05, "loss": 114.4822, "step": 23710 }, { "epoch": 0.09583180145202148, "grad_norm": 1089.921875, "learning_rate": 4.744e-05, "loss": 104.9418, "step": 23720 }, { "epoch": 0.09587220271738911, "grad_norm": 2395.615234375, "learning_rate": 4.746e-05, "loss": 99.8505, "step": 23730 }, { "epoch": 0.09591260398275674, "grad_norm": 1333.4490966796875, "learning_rate": 4.748e-05, "loss": 136.7725, "step": 23740 }, { "epoch": 0.09595300524812438, "grad_norm": 1200.8509521484375, "learning_rate": 4.75e-05, "loss": 108.3065, "step": 23750 }, { "epoch": 0.095993406513492, "grad_norm": 674.5408325195312, "learning_rate": 4.7520000000000006e-05, "loss": 104.4726, "step": 23760 }, { "epoch": 0.09603380777885963, "grad_norm": 789.4915161132812, "learning_rate": 4.754e-05, "loss": 145.3513, "step": 23770 }, { "epoch": 0.09607420904422727, "grad_norm": 797.5672607421875, "learning_rate": 4.7560000000000005e-05, "loss": 99.0135, "step": 23780 }, { "epoch": 0.0961146103095949, "grad_norm": 1080.6739501953125, "learning_rate": 4.758e-05, "loss": 128.2187, "step": 23790 }, { "epoch": 0.09615501157496253, "grad_norm": 1180.974365234375, "learning_rate": 4.76e-05, "loss": 112.952, "step": 23800 }, { "epoch": 0.09619541284033016, "grad_norm": 782.664794921875, "learning_rate": 4.762e-05, "loss": 88.9206, "step": 23810 }, { "epoch": 0.09623581410569779, "grad_norm": 731.3271484375, "learning_rate": 4.7640000000000005e-05, "loss": 109.8621, "step": 23820 }, { "epoch": 0.09627621537106543, "grad_norm": 1363.64501953125, "learning_rate": 4.766000000000001e-05, "loss": 151.2704, "step": 23830 }, { "epoch": 0.09631661663643305, "grad_norm": 802.5873413085938, "learning_rate": 4.7680000000000004e-05, "loss": 126.6234, "step": 23840 }, { "epoch": 0.09635701790180068, "grad_norm": 1359.4644775390625, "learning_rate": 4.77e-05, "loss": 73.3318, "step": 23850 }, { "epoch": 0.09639741916716832, "grad_norm": 733.4736328125, "learning_rate": 4.7720000000000004e-05, "loss": 115.1329, "step": 23860 }, { "epoch": 0.09643782043253595, "grad_norm": 620.9826049804688, "learning_rate": 4.774e-05, "loss": 86.756, "step": 23870 }, { "epoch": 0.09647822169790359, "grad_norm": 508.6841125488281, "learning_rate": 4.7760000000000004e-05, "loss": 90.5046, "step": 23880 }, { "epoch": 0.09651862296327121, "grad_norm": 756.0390014648438, "learning_rate": 4.778e-05, "loss": 125.9883, "step": 23890 }, { "epoch": 0.09655902422863884, "grad_norm": 531.0929565429688, "learning_rate": 4.78e-05, "loss": 152.4438, "step": 23900 }, { "epoch": 0.09659942549400648, "grad_norm": 2243.71630859375, "learning_rate": 4.7820000000000006e-05, "loss": 155.722, "step": 23910 }, { "epoch": 0.0966398267593741, "grad_norm": 1023.2913818359375, "learning_rate": 4.784e-05, "loss": 76.3717, "step": 23920 }, { "epoch": 0.09668022802474173, "grad_norm": 896.178955078125, "learning_rate": 4.7860000000000006e-05, "loss": 114.2663, "step": 23930 }, { "epoch": 0.09672062929010937, "grad_norm": 656.4266967773438, "learning_rate": 4.788e-05, "loss": 122.8116, "step": 23940 }, { "epoch": 0.096761030555477, "grad_norm": 1333.90234375, "learning_rate": 4.79e-05, "loss": 119.1918, "step": 23950 }, { "epoch": 0.09680143182084464, "grad_norm": 929.0885620117188, "learning_rate": 4.792e-05, "loss": 101.0765, "step": 23960 }, { "epoch": 0.09684183308621226, "grad_norm": 783.88134765625, "learning_rate": 4.794e-05, "loss": 123.106, "step": 23970 }, { "epoch": 0.09688223435157989, "grad_norm": 495.6070861816406, "learning_rate": 4.796e-05, "loss": 77.067, "step": 23980 }, { "epoch": 0.09692263561694753, "grad_norm": 757.9783325195312, "learning_rate": 4.7980000000000005e-05, "loss": 108.6014, "step": 23990 }, { "epoch": 0.09696303688231515, "grad_norm": 1557.5858154296875, "learning_rate": 4.8e-05, "loss": 132.5803, "step": 24000 }, { "epoch": 0.09700343814768278, "grad_norm": 1372.1710205078125, "learning_rate": 4.8020000000000004e-05, "loss": 140.0741, "step": 24010 }, { "epoch": 0.09704383941305042, "grad_norm": 708.5230712890625, "learning_rate": 4.804e-05, "loss": 136.9571, "step": 24020 }, { "epoch": 0.09708424067841805, "grad_norm": 949.0543823242188, "learning_rate": 4.8060000000000004e-05, "loss": 119.4232, "step": 24030 }, { "epoch": 0.09712464194378569, "grad_norm": 779.013916015625, "learning_rate": 4.808e-05, "loss": 132.5883, "step": 24040 }, { "epoch": 0.09716504320915331, "grad_norm": 1494.10595703125, "learning_rate": 4.8100000000000004e-05, "loss": 101.6414, "step": 24050 }, { "epoch": 0.09720544447452094, "grad_norm": 628.3251342773438, "learning_rate": 4.812000000000001e-05, "loss": 100.0648, "step": 24060 }, { "epoch": 0.09724584573988858, "grad_norm": 518.7921752929688, "learning_rate": 4.814e-05, "loss": 55.6692, "step": 24070 }, { "epoch": 0.0972862470052562, "grad_norm": 1588.790771484375, "learning_rate": 4.816e-05, "loss": 133.6212, "step": 24080 }, { "epoch": 0.09732664827062383, "grad_norm": 843.473388671875, "learning_rate": 4.818e-05, "loss": 87.3461, "step": 24090 }, { "epoch": 0.09736704953599147, "grad_norm": 659.4796752929688, "learning_rate": 4.82e-05, "loss": 117.1845, "step": 24100 }, { "epoch": 0.0974074508013591, "grad_norm": 584.1058959960938, "learning_rate": 4.822e-05, "loss": 103.56, "step": 24110 }, { "epoch": 0.09744785206672674, "grad_norm": 1724.538330078125, "learning_rate": 4.824e-05, "loss": 89.6239, "step": 24120 }, { "epoch": 0.09748825333209436, "grad_norm": 693.462646484375, "learning_rate": 4.826e-05, "loss": 93.3504, "step": 24130 }, { "epoch": 0.09752865459746199, "grad_norm": 1428.2559814453125, "learning_rate": 4.8280000000000005e-05, "loss": 112.4596, "step": 24140 }, { "epoch": 0.09756905586282963, "grad_norm": 1738.0125732421875, "learning_rate": 4.83e-05, "loss": 107.2036, "step": 24150 }, { "epoch": 0.09760945712819726, "grad_norm": 590.5738525390625, "learning_rate": 4.8320000000000005e-05, "loss": 95.526, "step": 24160 }, { "epoch": 0.09764985839356488, "grad_norm": 1145.53515625, "learning_rate": 4.834e-05, "loss": 82.7261, "step": 24170 }, { "epoch": 0.09769025965893252, "grad_norm": 1240.5450439453125, "learning_rate": 4.836e-05, "loss": 138.7817, "step": 24180 }, { "epoch": 0.09773066092430015, "grad_norm": 1606.5479736328125, "learning_rate": 4.838e-05, "loss": 101.9048, "step": 24190 }, { "epoch": 0.09777106218966779, "grad_norm": 1542.466796875, "learning_rate": 4.8400000000000004e-05, "loss": 106.2202, "step": 24200 }, { "epoch": 0.09781146345503541, "grad_norm": 889.3588256835938, "learning_rate": 4.842000000000001e-05, "loss": 144.4749, "step": 24210 }, { "epoch": 0.09785186472040304, "grad_norm": 639.1673583984375, "learning_rate": 4.8440000000000004e-05, "loss": 103.4213, "step": 24220 }, { "epoch": 0.09789226598577068, "grad_norm": 3171.987060546875, "learning_rate": 4.846e-05, "loss": 156.6668, "step": 24230 }, { "epoch": 0.0979326672511383, "grad_norm": 709.1254272460938, "learning_rate": 4.8480000000000003e-05, "loss": 80.9064, "step": 24240 }, { "epoch": 0.09797306851650593, "grad_norm": 561.7057495117188, "learning_rate": 4.85e-05, "loss": 171.3163, "step": 24250 }, { "epoch": 0.09801346978187357, "grad_norm": 929.6841430664062, "learning_rate": 4.852e-05, "loss": 157.7511, "step": 24260 }, { "epoch": 0.0980538710472412, "grad_norm": 1814.5821533203125, "learning_rate": 4.854e-05, "loss": 91.1438, "step": 24270 }, { "epoch": 0.09809427231260884, "grad_norm": 4481.90673828125, "learning_rate": 4.856e-05, "loss": 116.3378, "step": 24280 }, { "epoch": 0.09813467357797646, "grad_norm": 1093.2393798828125, "learning_rate": 4.8580000000000006e-05, "loss": 95.4637, "step": 24290 }, { "epoch": 0.09817507484334409, "grad_norm": 559.2593383789062, "learning_rate": 4.86e-05, "loss": 80.5582, "step": 24300 }, { "epoch": 0.09821547610871173, "grad_norm": 538.036865234375, "learning_rate": 4.8620000000000005e-05, "loss": 91.296, "step": 24310 }, { "epoch": 0.09825587737407936, "grad_norm": 1021.3819580078125, "learning_rate": 4.864e-05, "loss": 96.4481, "step": 24320 }, { "epoch": 0.09829627863944698, "grad_norm": 905.1778564453125, "learning_rate": 4.866e-05, "loss": 93.2619, "step": 24330 }, { "epoch": 0.09833667990481462, "grad_norm": 604.1958618164062, "learning_rate": 4.868e-05, "loss": 62.2251, "step": 24340 }, { "epoch": 0.09837708117018225, "grad_norm": 707.2637939453125, "learning_rate": 4.87e-05, "loss": 81.4563, "step": 24350 }, { "epoch": 0.09841748243554987, "grad_norm": 1935.6922607421875, "learning_rate": 4.872000000000001e-05, "loss": 150.9556, "step": 24360 }, { "epoch": 0.09845788370091751, "grad_norm": 529.8268432617188, "learning_rate": 4.8740000000000004e-05, "loss": 95.0914, "step": 24370 }, { "epoch": 0.09849828496628514, "grad_norm": 1129.89599609375, "learning_rate": 4.876e-05, "loss": 128.5505, "step": 24380 }, { "epoch": 0.09853868623165278, "grad_norm": 2489.7001953125, "learning_rate": 4.8780000000000004e-05, "loss": 138.6563, "step": 24390 }, { "epoch": 0.0985790874970204, "grad_norm": 1026.3582763671875, "learning_rate": 4.88e-05, "loss": 126.4132, "step": 24400 }, { "epoch": 0.09861948876238803, "grad_norm": 2506.63916015625, "learning_rate": 4.8820000000000004e-05, "loss": 151.2192, "step": 24410 }, { "epoch": 0.09865989002775567, "grad_norm": 1200.955810546875, "learning_rate": 4.884e-05, "loss": 131.9323, "step": 24420 }, { "epoch": 0.0987002912931233, "grad_norm": 888.474365234375, "learning_rate": 4.886e-05, "loss": 80.7957, "step": 24430 }, { "epoch": 0.09874069255849093, "grad_norm": 853.1484375, "learning_rate": 4.8880000000000006e-05, "loss": 152.3066, "step": 24440 }, { "epoch": 0.09878109382385857, "grad_norm": 931.940673828125, "learning_rate": 4.89e-05, "loss": 124.5411, "step": 24450 }, { "epoch": 0.09882149508922619, "grad_norm": 755.48828125, "learning_rate": 4.8920000000000006e-05, "loss": 113.8801, "step": 24460 }, { "epoch": 0.09886189635459383, "grad_norm": 1995.7860107421875, "learning_rate": 4.894e-05, "loss": 101.5088, "step": 24470 }, { "epoch": 0.09890229761996146, "grad_norm": 645.9288940429688, "learning_rate": 4.896e-05, "loss": 75.8798, "step": 24480 }, { "epoch": 0.09894269888532908, "grad_norm": 1073.67626953125, "learning_rate": 4.898e-05, "loss": 161.3777, "step": 24490 }, { "epoch": 0.09898310015069672, "grad_norm": 1047.078125, "learning_rate": 4.9e-05, "loss": 136.8098, "step": 24500 }, { "epoch": 0.09902350141606435, "grad_norm": 1193.5416259765625, "learning_rate": 4.902e-05, "loss": 112.7242, "step": 24510 }, { "epoch": 0.09906390268143198, "grad_norm": 2082.607421875, "learning_rate": 4.9040000000000005e-05, "loss": 104.1718, "step": 24520 }, { "epoch": 0.09910430394679962, "grad_norm": 657.3272705078125, "learning_rate": 4.906e-05, "loss": 94.1763, "step": 24530 }, { "epoch": 0.09914470521216724, "grad_norm": 822.24462890625, "learning_rate": 4.9080000000000004e-05, "loss": 120.4415, "step": 24540 }, { "epoch": 0.09918510647753488, "grad_norm": 1214.71142578125, "learning_rate": 4.91e-05, "loss": 95.7002, "step": 24550 }, { "epoch": 0.09922550774290251, "grad_norm": 974.8594360351562, "learning_rate": 4.9120000000000004e-05, "loss": 122.8078, "step": 24560 }, { "epoch": 0.09926590900827013, "grad_norm": 2204.4921875, "learning_rate": 4.914e-05, "loss": 181.9215, "step": 24570 }, { "epoch": 0.09930631027363777, "grad_norm": 667.4011840820312, "learning_rate": 4.9160000000000004e-05, "loss": 80.917, "step": 24580 }, { "epoch": 0.0993467115390054, "grad_norm": 896.2610473632812, "learning_rate": 4.918000000000001e-05, "loss": 144.0085, "step": 24590 }, { "epoch": 0.09938711280437303, "grad_norm": 0.0, "learning_rate": 4.92e-05, "loss": 132.3478, "step": 24600 }, { "epoch": 0.09942751406974067, "grad_norm": 941.837646484375, "learning_rate": 4.9220000000000006e-05, "loss": 98.3554, "step": 24610 }, { "epoch": 0.09946791533510829, "grad_norm": 975.3572387695312, "learning_rate": 4.924e-05, "loss": 71.2832, "step": 24620 }, { "epoch": 0.09950831660047593, "grad_norm": 467.11871337890625, "learning_rate": 4.926e-05, "loss": 84.4188, "step": 24630 }, { "epoch": 0.09954871786584356, "grad_norm": 1425.18603515625, "learning_rate": 4.928e-05, "loss": 117.7631, "step": 24640 }, { "epoch": 0.09958911913121118, "grad_norm": 992.929443359375, "learning_rate": 4.93e-05, "loss": 74.3671, "step": 24650 }, { "epoch": 0.09962952039657882, "grad_norm": 1589.87646484375, "learning_rate": 4.932e-05, "loss": 127.9026, "step": 24660 }, { "epoch": 0.09966992166194645, "grad_norm": 726.9409790039062, "learning_rate": 4.9340000000000005e-05, "loss": 127.6771, "step": 24670 }, { "epoch": 0.09971032292731408, "grad_norm": 1687.093017578125, "learning_rate": 4.936e-05, "loss": 112.6813, "step": 24680 }, { "epoch": 0.09975072419268172, "grad_norm": 891.7119140625, "learning_rate": 4.9380000000000005e-05, "loss": 131.5728, "step": 24690 }, { "epoch": 0.09979112545804934, "grad_norm": 601.4475708007812, "learning_rate": 4.94e-05, "loss": 80.0845, "step": 24700 }, { "epoch": 0.09983152672341698, "grad_norm": 1217.7177734375, "learning_rate": 4.942e-05, "loss": 108.1615, "step": 24710 }, { "epoch": 0.09987192798878461, "grad_norm": 1115.55517578125, "learning_rate": 4.944e-05, "loss": 86.1, "step": 24720 }, { "epoch": 0.09991232925415224, "grad_norm": 623.4889526367188, "learning_rate": 4.946e-05, "loss": 99.014, "step": 24730 }, { "epoch": 0.09995273051951988, "grad_norm": 3404.5966796875, "learning_rate": 4.948000000000001e-05, "loss": 111.524, "step": 24740 }, { "epoch": 0.0999931317848875, "grad_norm": 1838.208251953125, "learning_rate": 4.9500000000000004e-05, "loss": 162.1484, "step": 24750 }, { "epoch": 0.10003353305025513, "grad_norm": 371.3464660644531, "learning_rate": 4.952e-05, "loss": 85.2928, "step": 24760 }, { "epoch": 0.10007393431562277, "grad_norm": 718.6541748046875, "learning_rate": 4.9540000000000003e-05, "loss": 86.9755, "step": 24770 }, { "epoch": 0.1001143355809904, "grad_norm": 1240.49169921875, "learning_rate": 4.956e-05, "loss": 72.236, "step": 24780 }, { "epoch": 0.10015473684635803, "grad_norm": 2078.4365234375, "learning_rate": 4.958e-05, "loss": 130.2423, "step": 24790 }, { "epoch": 0.10019513811172566, "grad_norm": 1719.6787109375, "learning_rate": 4.96e-05, "loss": 147.8617, "step": 24800 }, { "epoch": 0.10023553937709329, "grad_norm": 1234.83203125, "learning_rate": 4.962e-05, "loss": 116.7895, "step": 24810 }, { "epoch": 0.10027594064246093, "grad_norm": 1845.6007080078125, "learning_rate": 4.9640000000000006e-05, "loss": 123.7702, "step": 24820 }, { "epoch": 0.10031634190782855, "grad_norm": 1477.3031005859375, "learning_rate": 4.966e-05, "loss": 175.7533, "step": 24830 }, { "epoch": 0.10035674317319618, "grad_norm": 753.8565063476562, "learning_rate": 4.9680000000000005e-05, "loss": 137.9543, "step": 24840 }, { "epoch": 0.10039714443856382, "grad_norm": 458.0389709472656, "learning_rate": 4.97e-05, "loss": 99.559, "step": 24850 }, { "epoch": 0.10043754570393144, "grad_norm": 1369.2882080078125, "learning_rate": 4.972e-05, "loss": 151.6343, "step": 24860 }, { "epoch": 0.10047794696929908, "grad_norm": 2486.626220703125, "learning_rate": 4.974e-05, "loss": 118.1052, "step": 24870 }, { "epoch": 0.10051834823466671, "grad_norm": 1189.0513916015625, "learning_rate": 4.976e-05, "loss": 115.5461, "step": 24880 }, { "epoch": 0.10055874950003434, "grad_norm": 3800.11083984375, "learning_rate": 4.978e-05, "loss": 113.4657, "step": 24890 }, { "epoch": 0.10059915076540198, "grad_norm": 588.5828857421875, "learning_rate": 4.9800000000000004e-05, "loss": 116.0871, "step": 24900 }, { "epoch": 0.1006395520307696, "grad_norm": 748.7584228515625, "learning_rate": 4.982e-05, "loss": 96.397, "step": 24910 }, { "epoch": 0.10067995329613723, "grad_norm": 883.9682006835938, "learning_rate": 4.9840000000000004e-05, "loss": 73.1233, "step": 24920 }, { "epoch": 0.10072035456150487, "grad_norm": 747.5608520507812, "learning_rate": 4.986e-05, "loss": 101.5735, "step": 24930 }, { "epoch": 0.1007607558268725, "grad_norm": 1152.760009765625, "learning_rate": 4.9880000000000004e-05, "loss": 106.2813, "step": 24940 }, { "epoch": 0.10080115709224013, "grad_norm": 1556.455322265625, "learning_rate": 4.99e-05, "loss": 92.2315, "step": 24950 }, { "epoch": 0.10084155835760776, "grad_norm": 564.4570922851562, "learning_rate": 4.992e-05, "loss": 105.7234, "step": 24960 }, { "epoch": 0.10088195962297539, "grad_norm": 582.0642700195312, "learning_rate": 4.9940000000000006e-05, "loss": 85.4253, "step": 24970 }, { "epoch": 0.10092236088834303, "grad_norm": 2132.204833984375, "learning_rate": 4.996e-05, "loss": 145.5456, "step": 24980 }, { "epoch": 0.10096276215371065, "grad_norm": 1085.25048828125, "learning_rate": 4.9980000000000006e-05, "loss": 105.3973, "step": 24990 }, { "epoch": 0.10100316341907828, "grad_norm": 482.8312072753906, "learning_rate": 5e-05, "loss": 107.6187, "step": 25000 }, { "epoch": 0.10104356468444592, "grad_norm": 12273.1533203125, "learning_rate": 4.999999975630607e-05, "loss": 166.6157, "step": 25010 }, { "epoch": 0.10108396594981355, "grad_norm": 1100.8514404296875, "learning_rate": 4.999999902522426e-05, "loss": 170.2824, "step": 25020 }, { "epoch": 0.10112436721518119, "grad_norm": 1492.0712890625, "learning_rate": 4.9999997806754614e-05, "loss": 105.2635, "step": 25030 }, { "epoch": 0.10116476848054881, "grad_norm": 2506.78173828125, "learning_rate": 4.9999996100897126e-05, "loss": 76.4485, "step": 25040 }, { "epoch": 0.10120516974591644, "grad_norm": 2876.141845703125, "learning_rate": 4.999999390765185e-05, "loss": 115.6973, "step": 25050 }, { "epoch": 0.10124557101128408, "grad_norm": 1318.9537353515625, "learning_rate": 4.999999122701883e-05, "loss": 120.0407, "step": 25060 }, { "epoch": 0.1012859722766517, "grad_norm": 598.3362426757812, "learning_rate": 4.99999880589981e-05, "loss": 99.9749, "step": 25070 }, { "epoch": 0.10132637354201933, "grad_norm": 680.303466796875, "learning_rate": 4.999998440358973e-05, "loss": 86.1489, "step": 25080 }, { "epoch": 0.10136677480738697, "grad_norm": 1665.12548828125, "learning_rate": 4.99999802607938e-05, "loss": 125.203, "step": 25090 }, { "epoch": 0.1014071760727546, "grad_norm": 4897.3798828125, "learning_rate": 4.999997563061038e-05, "loss": 99.004, "step": 25100 }, { "epoch": 0.10144757733812224, "grad_norm": 561.159423828125, "learning_rate": 4.999997051303956e-05, "loss": 128.6623, "step": 25110 }, { "epoch": 0.10148797860348986, "grad_norm": 407.36114501953125, "learning_rate": 4.9999964908081455e-05, "loss": 107.5606, "step": 25120 }, { "epoch": 0.10152837986885749, "grad_norm": 602.0693359375, "learning_rate": 4.999995881573616e-05, "loss": 99.1402, "step": 25130 }, { "epoch": 0.10156878113422513, "grad_norm": 428.0013427734375, "learning_rate": 4.999995223600379e-05, "loss": 176.0661, "step": 25140 }, { "epoch": 0.10160918239959275, "grad_norm": 1730.8555908203125, "learning_rate": 4.999994516888449e-05, "loss": 143.4497, "step": 25150 }, { "epoch": 0.10164958366496038, "grad_norm": 1302.3773193359375, "learning_rate": 4.999993761437838e-05, "loss": 137.5996, "step": 25160 }, { "epoch": 0.10168998493032802, "grad_norm": 3554.90771484375, "learning_rate": 4.9999929572485616e-05, "loss": 139.0566, "step": 25170 }, { "epoch": 0.10173038619569565, "grad_norm": 1487.5889892578125, "learning_rate": 4.999992104320636e-05, "loss": 108.4844, "step": 25180 }, { "epoch": 0.10177078746106329, "grad_norm": 1060.8238525390625, "learning_rate": 4.999991202654076e-05, "loss": 110.4729, "step": 25190 }, { "epoch": 0.10181118872643091, "grad_norm": 899.1284790039062, "learning_rate": 4.9999902522489015e-05, "loss": 117.7502, "step": 25200 }, { "epoch": 0.10185158999179854, "grad_norm": 824.8314819335938, "learning_rate": 4.99998925310513e-05, "loss": 96.4199, "step": 25210 }, { "epoch": 0.10189199125716618, "grad_norm": 729.9413452148438, "learning_rate": 4.999988205222781e-05, "loss": 58.8153, "step": 25220 }, { "epoch": 0.1019323925225338, "grad_norm": 1475.010498046875, "learning_rate": 4.999987108601874e-05, "loss": 118.004, "step": 25230 }, { "epoch": 0.10197279378790143, "grad_norm": 3924.0517578125, "learning_rate": 4.999985963242432e-05, "loss": 161.4323, "step": 25240 }, { "epoch": 0.10201319505326907, "grad_norm": 684.8850708007812, "learning_rate": 4.999984769144476e-05, "loss": 93.1785, "step": 25250 }, { "epoch": 0.1020535963186367, "grad_norm": 617.2882690429688, "learning_rate": 4.99998352630803e-05, "loss": 108.2454, "step": 25260 }, { "epoch": 0.10209399758400434, "grad_norm": 713.0025634765625, "learning_rate": 4.999982234733118e-05, "loss": 128.2708, "step": 25270 }, { "epoch": 0.10213439884937196, "grad_norm": 830.2299194335938, "learning_rate": 4.9999808944197666e-05, "loss": 93.2193, "step": 25280 }, { "epoch": 0.10217480011473959, "grad_norm": 635.0062255859375, "learning_rate": 4.999979505367999e-05, "loss": 95.8657, "step": 25290 }, { "epoch": 0.10221520138010723, "grad_norm": 681.8064575195312, "learning_rate": 4.999978067577844e-05, "loss": 81.1595, "step": 25300 }, { "epoch": 0.10225560264547486, "grad_norm": 1729.1885986328125, "learning_rate": 4.999976581049331e-05, "loss": 104.3257, "step": 25310 }, { "epoch": 0.10229600391084248, "grad_norm": 499.2225341796875, "learning_rate": 4.999975045782486e-05, "loss": 150.2334, "step": 25320 }, { "epoch": 0.10233640517621012, "grad_norm": 1919.6650390625, "learning_rate": 4.9999734617773405e-05, "loss": 93.5073, "step": 25330 }, { "epoch": 0.10237680644157775, "grad_norm": 628.6290283203125, "learning_rate": 4.9999718290339256e-05, "loss": 138.5961, "step": 25340 }, { "epoch": 0.10241720770694539, "grad_norm": 877.5479125976562, "learning_rate": 4.999970147552272e-05, "loss": 106.7546, "step": 25350 }, { "epoch": 0.10245760897231301, "grad_norm": 426.84027099609375, "learning_rate": 4.999968417332415e-05, "loss": 102.9967, "step": 25360 }, { "epoch": 0.10249801023768064, "grad_norm": 1005.803466796875, "learning_rate": 4.9999666383743854e-05, "loss": 133.2389, "step": 25370 }, { "epoch": 0.10253841150304828, "grad_norm": 973.8045654296875, "learning_rate": 4.999964810678219e-05, "loss": 127.7309, "step": 25380 }, { "epoch": 0.1025788127684159, "grad_norm": 1207.312255859375, "learning_rate": 4.9999629342439524e-05, "loss": 91.1231, "step": 25390 }, { "epoch": 0.10261921403378353, "grad_norm": 2072.83837890625, "learning_rate": 4.999961009071621e-05, "loss": 154.6031, "step": 25400 }, { "epoch": 0.10265961529915117, "grad_norm": 547.6760864257812, "learning_rate": 4.999959035161263e-05, "loss": 112.7586, "step": 25410 }, { "epoch": 0.1027000165645188, "grad_norm": 784.879150390625, "learning_rate": 4.999957012512916e-05, "loss": 104.8253, "step": 25420 }, { "epoch": 0.10274041782988644, "grad_norm": 625.8154296875, "learning_rate": 4.99995494112662e-05, "loss": 80.9909, "step": 25430 }, { "epoch": 0.10278081909525406, "grad_norm": 976.3950805664062, "learning_rate": 4.999952821002415e-05, "loss": 121.5834, "step": 25440 }, { "epoch": 0.10282122036062169, "grad_norm": 658.44140625, "learning_rate": 4.999950652140343e-05, "loss": 152.7022, "step": 25450 }, { "epoch": 0.10286162162598933, "grad_norm": 948.6815795898438, "learning_rate": 4.999948434540446e-05, "loss": 119.2311, "step": 25460 }, { "epoch": 0.10290202289135696, "grad_norm": 2785.1484375, "learning_rate": 4.999946168202767e-05, "loss": 123.5097, "step": 25470 }, { "epoch": 0.10294242415672458, "grad_norm": 2336.2509765625, "learning_rate": 4.999943853127351e-05, "loss": 92.8932, "step": 25480 }, { "epoch": 0.10298282542209222, "grad_norm": 324.7106018066406, "learning_rate": 4.9999414893142425e-05, "loss": 84.0734, "step": 25490 }, { "epoch": 0.10302322668745985, "grad_norm": 1529.5069580078125, "learning_rate": 4.999939076763487e-05, "loss": 151.1412, "step": 25500 }, { "epoch": 0.10306362795282749, "grad_norm": 3243.043701171875, "learning_rate": 4.999936615475133e-05, "loss": 98.3063, "step": 25510 }, { "epoch": 0.10310402921819511, "grad_norm": 656.1243286132812, "learning_rate": 4.9999341054492265e-05, "loss": 132.48, "step": 25520 }, { "epoch": 0.10314443048356274, "grad_norm": 358.87744140625, "learning_rate": 4.999931546685819e-05, "loss": 160.6029, "step": 25530 }, { "epoch": 0.10318483174893038, "grad_norm": 1219.5975341796875, "learning_rate": 4.999928939184958e-05, "loss": 122.454, "step": 25540 }, { "epoch": 0.103225233014298, "grad_norm": 1476.14208984375, "learning_rate": 4.999926282946695e-05, "loss": 121.1642, "step": 25550 }, { "epoch": 0.10326563427966563, "grad_norm": 737.3023071289062, "learning_rate": 4.9999235779710826e-05, "loss": 68.9884, "step": 25560 }, { "epoch": 0.10330603554503327, "grad_norm": 0.0, "learning_rate": 4.999920824258173e-05, "loss": 70.6594, "step": 25570 }, { "epoch": 0.1033464368104009, "grad_norm": 585.1388549804688, "learning_rate": 4.999918021808019e-05, "loss": 82.4569, "step": 25580 }, { "epoch": 0.10338683807576854, "grad_norm": 1436.7415771484375, "learning_rate": 4.999915170620677e-05, "loss": 135.1959, "step": 25590 }, { "epoch": 0.10342723934113617, "grad_norm": 1880.6636962890625, "learning_rate": 4.999912270696202e-05, "loss": 110.9196, "step": 25600 }, { "epoch": 0.10346764060650379, "grad_norm": 435.46234130859375, "learning_rate": 4.9999093220346495e-05, "loss": 101.5219, "step": 25610 }, { "epoch": 0.10350804187187143, "grad_norm": 1327.9521484375, "learning_rate": 4.9999063246360786e-05, "loss": 92.5202, "step": 25620 }, { "epoch": 0.10354844313723906, "grad_norm": 1072.19775390625, "learning_rate": 4.9999032785005464e-05, "loss": 104.1919, "step": 25630 }, { "epoch": 0.10358884440260668, "grad_norm": 1694.939697265625, "learning_rate": 4.999900183628112e-05, "loss": 165.591, "step": 25640 }, { "epoch": 0.10362924566797432, "grad_norm": 773.9560546875, "learning_rate": 4.999897040018837e-05, "loss": 104.817, "step": 25650 }, { "epoch": 0.10366964693334195, "grad_norm": 1010.9199829101562, "learning_rate": 4.9998938476727826e-05, "loss": 114.004, "step": 25660 }, { "epoch": 0.10371004819870959, "grad_norm": 2661.3837890625, "learning_rate": 4.99989060659001e-05, "loss": 99.0087, "step": 25670 }, { "epoch": 0.10375044946407722, "grad_norm": 553.9918823242188, "learning_rate": 4.999887316770584e-05, "loss": 72.9581, "step": 25680 }, { "epoch": 0.10379085072944484, "grad_norm": 580.9917602539062, "learning_rate": 4.999883978214567e-05, "loss": 103.5152, "step": 25690 }, { "epoch": 0.10383125199481248, "grad_norm": 4983.98974609375, "learning_rate": 4.999880590922025e-05, "loss": 135.9269, "step": 25700 }, { "epoch": 0.10387165326018011, "grad_norm": 1303.5921630859375, "learning_rate": 4.999877154893023e-05, "loss": 125.694, "step": 25710 }, { "epoch": 0.10391205452554773, "grad_norm": 416.0110778808594, "learning_rate": 4.9998736701276295e-05, "loss": 61.3954, "step": 25720 }, { "epoch": 0.10395245579091537, "grad_norm": 1457.8369140625, "learning_rate": 4.999870136625912e-05, "loss": 148.0761, "step": 25730 }, { "epoch": 0.103992857056283, "grad_norm": 758.3458251953125, "learning_rate": 4.999866554387939e-05, "loss": 94.0594, "step": 25740 }, { "epoch": 0.10403325832165064, "grad_norm": 1217.02978515625, "learning_rate": 4.999862923413781e-05, "loss": 101.6771, "step": 25750 }, { "epoch": 0.10407365958701827, "grad_norm": 1469.3179931640625, "learning_rate": 4.9998592437035076e-05, "loss": 119.159, "step": 25760 }, { "epoch": 0.10411406085238589, "grad_norm": 2354.025390625, "learning_rate": 4.9998555152571914e-05, "loss": 115.2768, "step": 25770 }, { "epoch": 0.10415446211775353, "grad_norm": 789.3027954101562, "learning_rate": 4.999851738074904e-05, "loss": 73.4068, "step": 25780 }, { "epoch": 0.10419486338312116, "grad_norm": 841.7200317382812, "learning_rate": 4.9998479121567214e-05, "loss": 134.448, "step": 25790 }, { "epoch": 0.10423526464848878, "grad_norm": 431.4704895019531, "learning_rate": 4.9998440375027166e-05, "loss": 150.1897, "step": 25800 }, { "epoch": 0.10427566591385642, "grad_norm": 508.6786804199219, "learning_rate": 4.999840114112965e-05, "loss": 82.7051, "step": 25810 }, { "epoch": 0.10431606717922405, "grad_norm": 818.539794921875, "learning_rate": 4.999836141987543e-05, "loss": 138.6107, "step": 25820 }, { "epoch": 0.10435646844459169, "grad_norm": 503.4788513183594, "learning_rate": 4.999832121126529e-05, "loss": 119.86, "step": 25830 }, { "epoch": 0.10439686970995932, "grad_norm": 838.92041015625, "learning_rate": 4.99982805153e-05, "loss": 121.616, "step": 25840 }, { "epoch": 0.10443727097532694, "grad_norm": 631.1499633789062, "learning_rate": 4.9998239331980366e-05, "loss": 85.7237, "step": 25850 }, { "epoch": 0.10447767224069458, "grad_norm": 566.1675415039062, "learning_rate": 4.999819766130719e-05, "loss": 91.9205, "step": 25860 }, { "epoch": 0.10451807350606221, "grad_norm": 0.0, "learning_rate": 4.999815550328128e-05, "loss": 88.1384, "step": 25870 }, { "epoch": 0.10455847477142984, "grad_norm": 735.2154541015625, "learning_rate": 4.9998112857903454e-05, "loss": 114.0636, "step": 25880 }, { "epoch": 0.10459887603679748, "grad_norm": 1965.4488525390625, "learning_rate": 4.9998069725174546e-05, "loss": 117.463, "step": 25890 }, { "epoch": 0.1046392773021651, "grad_norm": 954.0660400390625, "learning_rate": 4.9998026105095405e-05, "loss": 110.6166, "step": 25900 }, { "epoch": 0.10467967856753274, "grad_norm": 2468.442626953125, "learning_rate": 4.9997981997666874e-05, "loss": 118.2137, "step": 25910 }, { "epoch": 0.10472007983290037, "grad_norm": 1254.0142822265625, "learning_rate": 4.999793740288982e-05, "loss": 74.752, "step": 25920 }, { "epoch": 0.104760481098268, "grad_norm": 625.595458984375, "learning_rate": 4.999789232076509e-05, "loss": 89.1264, "step": 25930 }, { "epoch": 0.10480088236363563, "grad_norm": 579.2570190429688, "learning_rate": 4.999784675129359e-05, "loss": 128.8163, "step": 25940 }, { "epoch": 0.10484128362900326, "grad_norm": 4238.0439453125, "learning_rate": 4.999780069447619e-05, "loss": 135.1213, "step": 25950 }, { "epoch": 0.10488168489437089, "grad_norm": 2267.60693359375, "learning_rate": 4.9997754150313815e-05, "loss": 106.0803, "step": 25960 }, { "epoch": 0.10492208615973853, "grad_norm": 1275.7972412109375, "learning_rate": 4.999770711880734e-05, "loss": 114.9827, "step": 25970 }, { "epoch": 0.10496248742510615, "grad_norm": 1259.9520263671875, "learning_rate": 4.999765959995769e-05, "loss": 122.4034, "step": 25980 }, { "epoch": 0.10500288869047379, "grad_norm": 1249.4486083984375, "learning_rate": 4.99976115937658e-05, "loss": 113.4047, "step": 25990 }, { "epoch": 0.10504328995584142, "grad_norm": 829.5191650390625, "learning_rate": 4.999756310023261e-05, "loss": 100.0737, "step": 26000 }, { "epoch": 0.10508369122120904, "grad_norm": 1288.53515625, "learning_rate": 4.999751411935905e-05, "loss": 86.3596, "step": 26010 }, { "epoch": 0.10512409248657668, "grad_norm": 1140.1302490234375, "learning_rate": 4.999746465114609e-05, "loss": 62.8034, "step": 26020 }, { "epoch": 0.10516449375194431, "grad_norm": 694.5878295898438, "learning_rate": 4.999741469559468e-05, "loss": 92.5841, "step": 26030 }, { "epoch": 0.10520489501731194, "grad_norm": 1500.5892333984375, "learning_rate": 4.99973642527058e-05, "loss": 110.6927, "step": 26040 }, { "epoch": 0.10524529628267958, "grad_norm": 874.5784301757812, "learning_rate": 4.999731332248044e-05, "loss": 63.3448, "step": 26050 }, { "epoch": 0.1052856975480472, "grad_norm": 932.64501953125, "learning_rate": 4.999726190491958e-05, "loss": 107.8189, "step": 26060 }, { "epoch": 0.10532609881341484, "grad_norm": 736.0410766601562, "learning_rate": 4.9997210000024236e-05, "loss": 99.0618, "step": 26070 }, { "epoch": 0.10536650007878247, "grad_norm": 1410.8321533203125, "learning_rate": 4.999715760779541e-05, "loss": 126.3873, "step": 26080 }, { "epoch": 0.1054069013441501, "grad_norm": 1724.4503173828125, "learning_rate": 4.999710472823414e-05, "loss": 104.2871, "step": 26090 }, { "epoch": 0.10544730260951773, "grad_norm": 881.2642822265625, "learning_rate": 4.9997051361341425e-05, "loss": 67.8849, "step": 26100 }, { "epoch": 0.10548770387488536, "grad_norm": 1165.669921875, "learning_rate": 4.999699750711833e-05, "loss": 99.132, "step": 26110 }, { "epoch": 0.10552810514025299, "grad_norm": 994.9390869140625, "learning_rate": 4.9996943165565905e-05, "loss": 62.6341, "step": 26120 }, { "epoch": 0.10556850640562063, "grad_norm": 1422.3941650390625, "learning_rate": 4.99968883366852e-05, "loss": 85.9586, "step": 26130 }, { "epoch": 0.10560890767098825, "grad_norm": 791.3880615234375, "learning_rate": 4.9996833020477285e-05, "loss": 76.5841, "step": 26140 }, { "epoch": 0.10564930893635589, "grad_norm": 1224.0389404296875, "learning_rate": 4.999677721694325e-05, "loss": 161.3394, "step": 26150 }, { "epoch": 0.10568971020172352, "grad_norm": 562.48486328125, "learning_rate": 4.9996720926084164e-05, "loss": 117.8859, "step": 26160 }, { "epoch": 0.10573011146709115, "grad_norm": 1088.6715087890625, "learning_rate": 4.999666414790113e-05, "loss": 119.7692, "step": 26170 }, { "epoch": 0.10577051273245879, "grad_norm": 716.9673461914062, "learning_rate": 4.999660688239527e-05, "loss": 85.0265, "step": 26180 }, { "epoch": 0.10581091399782641, "grad_norm": 469.45477294921875, "learning_rate": 4.999654912956769e-05, "loss": 75.7188, "step": 26190 }, { "epoch": 0.10585131526319404, "grad_norm": 434.962890625, "learning_rate": 4.9996490889419514e-05, "loss": 92.6741, "step": 26200 }, { "epoch": 0.10589171652856168, "grad_norm": 1787.8798828125, "learning_rate": 4.9996432161951875e-05, "loss": 101.0907, "step": 26210 }, { "epoch": 0.1059321177939293, "grad_norm": 619.8673706054688, "learning_rate": 4.999637294716593e-05, "loss": 144.3787, "step": 26220 }, { "epoch": 0.10597251905929694, "grad_norm": 1517.8831787109375, "learning_rate": 4.9996313245062823e-05, "loss": 106.3331, "step": 26230 }, { "epoch": 0.10601292032466457, "grad_norm": 7921.58203125, "learning_rate": 4.999625305564371e-05, "loss": 83.1559, "step": 26240 }, { "epoch": 0.1060533215900322, "grad_norm": 839.67822265625, "learning_rate": 4.9996192378909786e-05, "loss": 57.181, "step": 26250 }, { "epoch": 0.10609372285539984, "grad_norm": 696.2799682617188, "learning_rate": 4.999613121486222e-05, "loss": 49.9272, "step": 26260 }, { "epoch": 0.10613412412076746, "grad_norm": 1080.623291015625, "learning_rate": 4.99960695635022e-05, "loss": 120.3179, "step": 26270 }, { "epoch": 0.10617452538613509, "grad_norm": 499.45166015625, "learning_rate": 4.999600742483094e-05, "loss": 85.3591, "step": 26280 }, { "epoch": 0.10621492665150273, "grad_norm": 938.3174438476562, "learning_rate": 4.999594479884965e-05, "loss": 136.4883, "step": 26290 }, { "epoch": 0.10625532791687035, "grad_norm": 1555.7998046875, "learning_rate": 4.999588168555954e-05, "loss": 122.8322, "step": 26300 }, { "epoch": 0.106295729182238, "grad_norm": 4008.9033203125, "learning_rate": 4.999581808496185e-05, "loss": 91.5676, "step": 26310 }, { "epoch": 0.10633613044760562, "grad_norm": 2325.766845703125, "learning_rate": 4.999575399705783e-05, "loss": 85.4827, "step": 26320 }, { "epoch": 0.10637653171297325, "grad_norm": 954.0802612304688, "learning_rate": 4.999568942184871e-05, "loss": 161.0085, "step": 26330 }, { "epoch": 0.10641693297834089, "grad_norm": 1845.4427490234375, "learning_rate": 4.999562435933575e-05, "loss": 106.469, "step": 26340 }, { "epoch": 0.10645733424370851, "grad_norm": 1453.1446533203125, "learning_rate": 4.999555880952023e-05, "loss": 142.5144, "step": 26350 }, { "epoch": 0.10649773550907614, "grad_norm": 824.1065673828125, "learning_rate": 4.999549277240342e-05, "loss": 84.602, "step": 26360 }, { "epoch": 0.10653813677444378, "grad_norm": 3219.528564453125, "learning_rate": 4.999542624798661e-05, "loss": 79.4629, "step": 26370 }, { "epoch": 0.1065785380398114, "grad_norm": 655.1124267578125, "learning_rate": 4.999535923627109e-05, "loss": 103.1578, "step": 26380 }, { "epoch": 0.10661893930517904, "grad_norm": 840.5703125, "learning_rate": 4.999529173725819e-05, "loss": 146.742, "step": 26390 }, { "epoch": 0.10665934057054667, "grad_norm": 1991.2275390625, "learning_rate": 4.999522375094919e-05, "loss": 88.6063, "step": 26400 }, { "epoch": 0.1066997418359143, "grad_norm": 1580.2342529296875, "learning_rate": 4.999515527734545e-05, "loss": 113.3611, "step": 26410 }, { "epoch": 0.10674014310128194, "grad_norm": 1060.3555908203125, "learning_rate": 4.9995086316448284e-05, "loss": 176.4278, "step": 26420 }, { "epoch": 0.10678054436664956, "grad_norm": 2521.33447265625, "learning_rate": 4.999501686825904e-05, "loss": 104.2664, "step": 26430 }, { "epoch": 0.10682094563201719, "grad_norm": 1635.3087158203125, "learning_rate": 4.999494693277907e-05, "loss": 129.5747, "step": 26440 }, { "epoch": 0.10686134689738483, "grad_norm": 0.0, "learning_rate": 4.999487651000975e-05, "loss": 83.0078, "step": 26450 }, { "epoch": 0.10690174816275246, "grad_norm": 1445.0679931640625, "learning_rate": 4.9994805599952445e-05, "loss": 115.5746, "step": 26460 }, { "epoch": 0.1069421494281201, "grad_norm": 861.307861328125, "learning_rate": 4.999473420260853e-05, "loss": 63.1311, "step": 26470 }, { "epoch": 0.10698255069348772, "grad_norm": 2009.19189453125, "learning_rate": 4.999466231797941e-05, "loss": 119.9433, "step": 26480 }, { "epoch": 0.10702295195885535, "grad_norm": 1041.7818603515625, "learning_rate": 4.9994589946066475e-05, "loss": 134.1364, "step": 26490 }, { "epoch": 0.10706335322422299, "grad_norm": 717.532470703125, "learning_rate": 4.999451708687114e-05, "loss": 99.0042, "step": 26500 }, { "epoch": 0.10710375448959061, "grad_norm": 1365.530517578125, "learning_rate": 4.999444374039483e-05, "loss": 126.3179, "step": 26510 }, { "epoch": 0.10714415575495824, "grad_norm": 570.315673828125, "learning_rate": 4.999436990663897e-05, "loss": 75.446, "step": 26520 }, { "epoch": 0.10718455702032588, "grad_norm": 1172.0423583984375, "learning_rate": 4.9994295585605e-05, "loss": 117.2075, "step": 26530 }, { "epoch": 0.1072249582856935, "grad_norm": 773.590087890625, "learning_rate": 4.9994220777294364e-05, "loss": 90.5786, "step": 26540 }, { "epoch": 0.10726535955106115, "grad_norm": 983.9734497070312, "learning_rate": 4.999414548170853e-05, "loss": 89.9236, "step": 26550 }, { "epoch": 0.10730576081642877, "grad_norm": 1194.6666259765625, "learning_rate": 4.999406969884897e-05, "loss": 64.2603, "step": 26560 }, { "epoch": 0.1073461620817964, "grad_norm": 1597.0323486328125, "learning_rate": 4.9993993428717144e-05, "loss": 130.4407, "step": 26570 }, { "epoch": 0.10738656334716404, "grad_norm": 1673.6080322265625, "learning_rate": 4.999391667131455e-05, "loss": 163.3066, "step": 26580 }, { "epoch": 0.10742696461253166, "grad_norm": 663.078857421875, "learning_rate": 4.9993839426642685e-05, "loss": 130.6437, "step": 26590 }, { "epoch": 0.10746736587789929, "grad_norm": 1640.7069091796875, "learning_rate": 4.999376169470306e-05, "loss": 131.2924, "step": 26600 }, { "epoch": 0.10750776714326693, "grad_norm": 986.1610717773438, "learning_rate": 4.9993683475497174e-05, "loss": 115.7689, "step": 26610 }, { "epoch": 0.10754816840863456, "grad_norm": 322.355224609375, "learning_rate": 4.999360476902656e-05, "loss": 75.3167, "step": 26620 }, { "epoch": 0.1075885696740022, "grad_norm": 507.4998779296875, "learning_rate": 4.999352557529275e-05, "loss": 96.3537, "step": 26630 }, { "epoch": 0.10762897093936982, "grad_norm": 1944.9493408203125, "learning_rate": 4.99934458942973e-05, "loss": 145.2764, "step": 26640 }, { "epoch": 0.10766937220473745, "grad_norm": 2324.57470703125, "learning_rate": 4.999336572604175e-05, "loss": 90.6306, "step": 26650 }, { "epoch": 0.10770977347010509, "grad_norm": 1668.3519287109375, "learning_rate": 4.999328507052768e-05, "loss": 104.2174, "step": 26660 }, { "epoch": 0.10775017473547271, "grad_norm": 3942.664794921875, "learning_rate": 4.999320392775663e-05, "loss": 126.4141, "step": 26670 }, { "epoch": 0.10779057600084034, "grad_norm": 1391.0018310546875, "learning_rate": 4.999312229773022e-05, "loss": 83.7744, "step": 26680 }, { "epoch": 0.10783097726620798, "grad_norm": 1609.7559814453125, "learning_rate": 4.999304018045001e-05, "loss": 100.6681, "step": 26690 }, { "epoch": 0.1078713785315756, "grad_norm": 1184.3446044921875, "learning_rate": 4.999295757591762e-05, "loss": 95.7704, "step": 26700 }, { "epoch": 0.10791177979694325, "grad_norm": 2358.531494140625, "learning_rate": 4.9992874484134653e-05, "loss": 74.604, "step": 26710 }, { "epoch": 0.10795218106231087, "grad_norm": 576.525146484375, "learning_rate": 4.9992790905102734e-05, "loss": 114.726, "step": 26720 }, { "epoch": 0.1079925823276785, "grad_norm": 3263.18701171875, "learning_rate": 4.999270683882349e-05, "loss": 147.2892, "step": 26730 }, { "epoch": 0.10803298359304614, "grad_norm": 882.455322265625, "learning_rate": 4.999262228529855e-05, "loss": 127.5431, "step": 26740 }, { "epoch": 0.10807338485841377, "grad_norm": 1100.7166748046875, "learning_rate": 4.999253724452958e-05, "loss": 106.7672, "step": 26750 }, { "epoch": 0.10811378612378139, "grad_norm": 687.6595458984375, "learning_rate": 4.999245171651823e-05, "loss": 89.8419, "step": 26760 }, { "epoch": 0.10815418738914903, "grad_norm": 443.3103942871094, "learning_rate": 4.999236570126616e-05, "loss": 125.1742, "step": 26770 }, { "epoch": 0.10819458865451666, "grad_norm": 1187.34912109375, "learning_rate": 4.999227919877506e-05, "loss": 127.4198, "step": 26780 }, { "epoch": 0.1082349899198843, "grad_norm": 496.15240478515625, "learning_rate": 4.9992192209046603e-05, "loss": 117.4117, "step": 26790 }, { "epoch": 0.10827539118525192, "grad_norm": 1453.9019775390625, "learning_rate": 4.99921047320825e-05, "loss": 93.6351, "step": 26800 }, { "epoch": 0.10831579245061955, "grad_norm": 773.209228515625, "learning_rate": 4.999201676788445e-05, "loss": 105.4083, "step": 26810 }, { "epoch": 0.10835619371598719, "grad_norm": 1077.73193359375, "learning_rate": 4.999192831645416e-05, "loss": 113.7033, "step": 26820 }, { "epoch": 0.10839659498135482, "grad_norm": 3824.103515625, "learning_rate": 4.999183937779336e-05, "loss": 128.3056, "step": 26830 }, { "epoch": 0.10843699624672244, "grad_norm": 1111.69189453125, "learning_rate": 4.999174995190379e-05, "loss": 69.169, "step": 26840 }, { "epoch": 0.10847739751209008, "grad_norm": 355.9375, "learning_rate": 4.999166003878718e-05, "loss": 95.577, "step": 26850 }, { "epoch": 0.10851779877745771, "grad_norm": 566.212158203125, "learning_rate": 4.99915696384453e-05, "loss": 92.3288, "step": 26860 }, { "epoch": 0.10855820004282535, "grad_norm": 433.31170654296875, "learning_rate": 4.99914787508799e-05, "loss": 77.2623, "step": 26870 }, { "epoch": 0.10859860130819297, "grad_norm": 1091.27734375, "learning_rate": 4.999138737609276e-05, "loss": 97.2431, "step": 26880 }, { "epoch": 0.1086390025735606, "grad_norm": 779.114501953125, "learning_rate": 4.9991295514085644e-05, "loss": 90.7876, "step": 26890 }, { "epoch": 0.10867940383892824, "grad_norm": 1826.2213134765625, "learning_rate": 4.9991203164860365e-05, "loss": 164.2726, "step": 26900 }, { "epoch": 0.10871980510429587, "grad_norm": 1381.0306396484375, "learning_rate": 4.999111032841871e-05, "loss": 163.8954, "step": 26910 }, { "epoch": 0.10876020636966349, "grad_norm": 535.5016479492188, "learning_rate": 4.9991017004762496e-05, "loss": 85.5764, "step": 26920 }, { "epoch": 0.10880060763503113, "grad_norm": 822.9321899414062, "learning_rate": 4.999092319389354e-05, "loss": 117.8981, "step": 26930 }, { "epoch": 0.10884100890039876, "grad_norm": 800.758544921875, "learning_rate": 4.999082889581367e-05, "loss": 76.8381, "step": 26940 }, { "epoch": 0.1088814101657664, "grad_norm": 557.4238891601562, "learning_rate": 4.999073411052472e-05, "loss": 130.8072, "step": 26950 }, { "epoch": 0.10892181143113402, "grad_norm": 1034.1680908203125, "learning_rate": 4.9990638838028546e-05, "loss": 75.0051, "step": 26960 }, { "epoch": 0.10896221269650165, "grad_norm": 460.87469482421875, "learning_rate": 4.9990543078327e-05, "loss": 97.5543, "step": 26970 }, { "epoch": 0.10900261396186929, "grad_norm": 726.0296630859375, "learning_rate": 4.9990446831421955e-05, "loss": 97.844, "step": 26980 }, { "epoch": 0.10904301522723692, "grad_norm": 857.0488891601562, "learning_rate": 4.9990350097315275e-05, "loss": 110.277, "step": 26990 }, { "epoch": 0.10908341649260454, "grad_norm": 494.9609375, "learning_rate": 4.999025287600886e-05, "loss": 78.5458, "step": 27000 }, { "epoch": 0.10912381775797218, "grad_norm": 893.1571655273438, "learning_rate": 4.99901551675046e-05, "loss": 109.9289, "step": 27010 }, { "epoch": 0.10916421902333981, "grad_norm": 970.8618774414062, "learning_rate": 4.99900569718044e-05, "loss": 116.1883, "step": 27020 }, { "epoch": 0.10920462028870745, "grad_norm": 745.3575439453125, "learning_rate": 4.9989958288910164e-05, "loss": 161.3541, "step": 27030 }, { "epoch": 0.10924502155407508, "grad_norm": 804.6139526367188, "learning_rate": 4.998985911882384e-05, "loss": 105.6759, "step": 27040 }, { "epoch": 0.1092854228194427, "grad_norm": 988.0171508789062, "learning_rate": 4.998975946154734e-05, "loss": 167.7433, "step": 27050 }, { "epoch": 0.10932582408481034, "grad_norm": 570.0489501953125, "learning_rate": 4.998965931708261e-05, "loss": 102.1875, "step": 27060 }, { "epoch": 0.10936622535017797, "grad_norm": 677.1341552734375, "learning_rate": 4.998955868543161e-05, "loss": 128.7201, "step": 27070 }, { "epoch": 0.1094066266155456, "grad_norm": 905.8904418945312, "learning_rate": 4.99894575665963e-05, "loss": 107.9805, "step": 27080 }, { "epoch": 0.10944702788091323, "grad_norm": 737.1869506835938, "learning_rate": 4.9989355960578645e-05, "loss": 89.4114, "step": 27090 }, { "epoch": 0.10948742914628086, "grad_norm": 670.5479125976562, "learning_rate": 4.998925386738063e-05, "loss": 175.9301, "step": 27100 }, { "epoch": 0.1095278304116485, "grad_norm": 1188.797607421875, "learning_rate": 4.9989151287004244e-05, "loss": 90.327, "step": 27110 }, { "epoch": 0.10956823167701613, "grad_norm": 1030.21533203125, "learning_rate": 4.9989048219451495e-05, "loss": 63.2848, "step": 27120 }, { "epoch": 0.10960863294238375, "grad_norm": 539.4580688476562, "learning_rate": 4.998894466472438e-05, "loss": 104.4276, "step": 27130 }, { "epoch": 0.10964903420775139, "grad_norm": 826.8709106445312, "learning_rate": 4.998884062282492e-05, "loss": 92.5154, "step": 27140 }, { "epoch": 0.10968943547311902, "grad_norm": 916.9140014648438, "learning_rate": 4.998873609375516e-05, "loss": 104.0753, "step": 27150 }, { "epoch": 0.10972983673848664, "grad_norm": 478.9193115234375, "learning_rate": 4.998863107751711e-05, "loss": 117.0698, "step": 27160 }, { "epoch": 0.10977023800385428, "grad_norm": 944.5336303710938, "learning_rate": 4.9988525574112846e-05, "loss": 151.4209, "step": 27170 }, { "epoch": 0.10981063926922191, "grad_norm": 552.6795654296875, "learning_rate": 4.99884195835444e-05, "loss": 88.4628, "step": 27180 }, { "epoch": 0.10985104053458955, "grad_norm": 954.2455444335938, "learning_rate": 4.9988313105813856e-05, "loss": 112.3373, "step": 27190 }, { "epoch": 0.10989144179995718, "grad_norm": 936.270751953125, "learning_rate": 4.998820614092328e-05, "loss": 75.6448, "step": 27200 }, { "epoch": 0.1099318430653248, "grad_norm": 777.1414184570312, "learning_rate": 4.9988098688874763e-05, "loss": 96.6067, "step": 27210 }, { "epoch": 0.10997224433069244, "grad_norm": 1599.469482421875, "learning_rate": 4.9987990749670395e-05, "loss": 126.3036, "step": 27220 }, { "epoch": 0.11001264559606007, "grad_norm": 398.20196533203125, "learning_rate": 4.9987882323312287e-05, "loss": 86.396, "step": 27230 }, { "epoch": 0.1100530468614277, "grad_norm": 1596.5191650390625, "learning_rate": 4.998777340980254e-05, "loss": 93.4679, "step": 27240 }, { "epoch": 0.11009344812679533, "grad_norm": 807.2232055664062, "learning_rate": 4.998766400914329e-05, "loss": 75.585, "step": 27250 }, { "epoch": 0.11013384939216296, "grad_norm": 1436.22216796875, "learning_rate": 4.9987554121336666e-05, "loss": 102.0856, "step": 27260 }, { "epoch": 0.1101742506575306, "grad_norm": 1310.2745361328125, "learning_rate": 4.998744374638481e-05, "loss": 102.2359, "step": 27270 }, { "epoch": 0.11021465192289823, "grad_norm": 1259.0927734375, "learning_rate": 4.998733288428987e-05, "loss": 82.0602, "step": 27280 }, { "epoch": 0.11025505318826585, "grad_norm": 2409.748046875, "learning_rate": 4.998722153505402e-05, "loss": 95.3449, "step": 27290 }, { "epoch": 0.11029545445363349, "grad_norm": 1046.286865234375, "learning_rate": 4.998710969867942e-05, "loss": 113.0733, "step": 27300 }, { "epoch": 0.11033585571900112, "grad_norm": 630.7343139648438, "learning_rate": 4.9986997375168246e-05, "loss": 68.2742, "step": 27310 }, { "epoch": 0.11037625698436875, "grad_norm": 553.9161987304688, "learning_rate": 4.9986884564522696e-05, "loss": 145.594, "step": 27320 }, { "epoch": 0.11041665824973639, "grad_norm": 823.818603515625, "learning_rate": 4.998677126674497e-05, "loss": 106.8122, "step": 27330 }, { "epoch": 0.11045705951510401, "grad_norm": 1561.4769287109375, "learning_rate": 4.9986657481837277e-05, "loss": 154.897, "step": 27340 }, { "epoch": 0.11049746078047164, "grad_norm": 1459.7176513671875, "learning_rate": 4.9986543209801825e-05, "loss": 140.0131, "step": 27350 }, { "epoch": 0.11053786204583928, "grad_norm": 797.2266235351562, "learning_rate": 4.998642845064086e-05, "loss": 117.8721, "step": 27360 }, { "epoch": 0.1105782633112069, "grad_norm": 902.2604370117188, "learning_rate": 4.9986313204356594e-05, "loss": 169.9797, "step": 27370 }, { "epoch": 0.11061866457657454, "grad_norm": 504.2903137207031, "learning_rate": 4.998619747095129e-05, "loss": 89.4714, "step": 27380 }, { "epoch": 0.11065906584194217, "grad_norm": 713.7076416015625, "learning_rate": 4.998608125042721e-05, "loss": 97.6478, "step": 27390 }, { "epoch": 0.1106994671073098, "grad_norm": 1151.92822265625, "learning_rate": 4.9985964542786614e-05, "loss": 115.0625, "step": 27400 }, { "epoch": 0.11073986837267744, "grad_norm": 1444.137451171875, "learning_rate": 4.9985847348031764e-05, "loss": 119.7467, "step": 27410 }, { "epoch": 0.11078026963804506, "grad_norm": 3067.8701171875, "learning_rate": 4.998572966616496e-05, "loss": 119.108, "step": 27420 }, { "epoch": 0.11082067090341269, "grad_norm": 759.7105712890625, "learning_rate": 4.99856114971885e-05, "loss": 99.5433, "step": 27430 }, { "epoch": 0.11086107216878033, "grad_norm": 1036.834716796875, "learning_rate": 4.998549284110468e-05, "loss": 85.0693, "step": 27440 }, { "epoch": 0.11090147343414795, "grad_norm": 1072.163818359375, "learning_rate": 4.998537369791581e-05, "loss": 122.0357, "step": 27450 }, { "epoch": 0.1109418746995156, "grad_norm": 1213.7109375, "learning_rate": 4.9985254067624215e-05, "loss": 81.71, "step": 27460 }, { "epoch": 0.11098227596488322, "grad_norm": 641.5134887695312, "learning_rate": 4.998513395023223e-05, "loss": 111.3763, "step": 27470 }, { "epoch": 0.11102267723025085, "grad_norm": 1474.4072265625, "learning_rate": 4.99850133457422e-05, "loss": 103.8446, "step": 27480 }, { "epoch": 0.11106307849561849, "grad_norm": 837.8353271484375, "learning_rate": 4.9984892254156465e-05, "loss": 85.3186, "step": 27490 }, { "epoch": 0.11110347976098611, "grad_norm": 714.3938598632812, "learning_rate": 4.99847706754774e-05, "loss": 116.3188, "step": 27500 }, { "epoch": 0.11114388102635374, "grad_norm": 1514.875, "learning_rate": 4.998464860970736e-05, "loss": 61.2618, "step": 27510 }, { "epoch": 0.11118428229172138, "grad_norm": 122.57615661621094, "learning_rate": 4.998452605684874e-05, "loss": 103.8598, "step": 27520 }, { "epoch": 0.111224683557089, "grad_norm": 700.09521484375, "learning_rate": 4.9984403016903915e-05, "loss": 96.256, "step": 27530 }, { "epoch": 0.11126508482245664, "grad_norm": 2113.217041015625, "learning_rate": 4.998427948987528e-05, "loss": 94.1343, "step": 27540 }, { "epoch": 0.11130548608782427, "grad_norm": 710.7130126953125, "learning_rate": 4.998415547576527e-05, "loss": 134.276, "step": 27550 }, { "epoch": 0.1113458873531919, "grad_norm": 974.3263549804688, "learning_rate": 4.9984030974576285e-05, "loss": 70.3129, "step": 27560 }, { "epoch": 0.11138628861855954, "grad_norm": 1219.5396728515625, "learning_rate": 4.998390598631075e-05, "loss": 119.3183, "step": 27570 }, { "epoch": 0.11142668988392716, "grad_norm": 2306.385498046875, "learning_rate": 4.998378051097111e-05, "loss": 96.3586, "step": 27580 }, { "epoch": 0.11146709114929479, "grad_norm": 676.72998046875, "learning_rate": 4.99836545485598e-05, "loss": 87.2172, "step": 27590 }, { "epoch": 0.11150749241466243, "grad_norm": 1688.67578125, "learning_rate": 4.998352809907928e-05, "loss": 158.3624, "step": 27600 }, { "epoch": 0.11154789368003006, "grad_norm": 3076.226806640625, "learning_rate": 4.9983401162532025e-05, "loss": 105.7579, "step": 27610 }, { "epoch": 0.1115882949453977, "grad_norm": 789.8070678710938, "learning_rate": 4.99832737389205e-05, "loss": 114.1961, "step": 27620 }, { "epoch": 0.11162869621076532, "grad_norm": 1299.20166015625, "learning_rate": 4.998314582824719e-05, "loss": 85.1794, "step": 27630 }, { "epoch": 0.11166909747613295, "grad_norm": 611.26123046875, "learning_rate": 4.998301743051459e-05, "loss": 63.9051, "step": 27640 }, { "epoch": 0.11170949874150059, "grad_norm": 967.321044921875, "learning_rate": 4.99828885457252e-05, "loss": 127.1219, "step": 27650 }, { "epoch": 0.11174990000686821, "grad_norm": 1000.7772216796875, "learning_rate": 4.998275917388154e-05, "loss": 103.4082, "step": 27660 }, { "epoch": 0.11179030127223584, "grad_norm": 627.480224609375, "learning_rate": 4.9982629314986126e-05, "loss": 96.7553, "step": 27670 }, { "epoch": 0.11183070253760348, "grad_norm": 1001.6890869140625, "learning_rate": 4.998249896904149e-05, "loss": 92.7391, "step": 27680 }, { "epoch": 0.1118711038029711, "grad_norm": 781.9347534179688, "learning_rate": 4.998236813605017e-05, "loss": 86.0648, "step": 27690 }, { "epoch": 0.11191150506833875, "grad_norm": 2122.2900390625, "learning_rate": 4.998223681601473e-05, "loss": 106.2797, "step": 27700 }, { "epoch": 0.11195190633370637, "grad_norm": 2083.68798828125, "learning_rate": 4.9982105008937726e-05, "loss": 91.295, "step": 27710 }, { "epoch": 0.111992307599074, "grad_norm": 1193.2694091796875, "learning_rate": 4.998197271482171e-05, "loss": 69.5858, "step": 27720 }, { "epoch": 0.11203270886444164, "grad_norm": 2355.148193359375, "learning_rate": 4.998183993366928e-05, "loss": 133.6512, "step": 27730 }, { "epoch": 0.11207311012980926, "grad_norm": 745.9656372070312, "learning_rate": 4.998170666548302e-05, "loss": 99.9143, "step": 27740 }, { "epoch": 0.11211351139517689, "grad_norm": 559.490478515625, "learning_rate": 4.998157291026553e-05, "loss": 116.1021, "step": 27750 }, { "epoch": 0.11215391266054453, "grad_norm": 613.9931030273438, "learning_rate": 4.998143866801942e-05, "loss": 79.9714, "step": 27760 }, { "epoch": 0.11219431392591216, "grad_norm": 838.7278442382812, "learning_rate": 4.9981303938747286e-05, "loss": 74.1118, "step": 27770 }, { "epoch": 0.1122347151912798, "grad_norm": 731.7330322265625, "learning_rate": 4.9981168722451776e-05, "loss": 85.4063, "step": 27780 }, { "epoch": 0.11227511645664742, "grad_norm": 826.3333129882812, "learning_rate": 4.998103301913552e-05, "loss": 83.0917, "step": 27790 }, { "epoch": 0.11231551772201505, "grad_norm": 1546.168701171875, "learning_rate": 4.998089682880117e-05, "loss": 128.3176, "step": 27800 }, { "epoch": 0.11235591898738269, "grad_norm": 1162.286865234375, "learning_rate": 4.998076015145138e-05, "loss": 122.4255, "step": 27810 }, { "epoch": 0.11239632025275031, "grad_norm": 1132.9892578125, "learning_rate": 4.9980622987088795e-05, "loss": 98.7629, "step": 27820 }, { "epoch": 0.11243672151811794, "grad_norm": 1079.5322265625, "learning_rate": 4.9980485335716114e-05, "loss": 119.6111, "step": 27830 }, { "epoch": 0.11247712278348558, "grad_norm": 1292.2762451171875, "learning_rate": 4.9980347197336005e-05, "loss": 72.3516, "step": 27840 }, { "epoch": 0.1125175240488532, "grad_norm": 767.2022094726562, "learning_rate": 4.998020857195117e-05, "loss": 144.7648, "step": 27850 }, { "epoch": 0.11255792531422085, "grad_norm": 679.2119750976562, "learning_rate": 4.998006945956431e-05, "loss": 117.3672, "step": 27860 }, { "epoch": 0.11259832657958847, "grad_norm": 1033.4124755859375, "learning_rate": 4.997992986017813e-05, "loss": 91.8273, "step": 27870 }, { "epoch": 0.1126387278449561, "grad_norm": 1781.468505859375, "learning_rate": 4.997978977379536e-05, "loss": 108.8957, "step": 27880 }, { "epoch": 0.11267912911032374, "grad_norm": 875.4254150390625, "learning_rate": 4.9979649200418735e-05, "loss": 65.4817, "step": 27890 }, { "epoch": 0.11271953037569137, "grad_norm": 760.1170654296875, "learning_rate": 4.997950814005098e-05, "loss": 105.1915, "step": 27900 }, { "epoch": 0.11275993164105899, "grad_norm": 816.7872924804688, "learning_rate": 4.997936659269486e-05, "loss": 72.9768, "step": 27910 }, { "epoch": 0.11280033290642663, "grad_norm": 1037.8052978515625, "learning_rate": 4.997922455835311e-05, "loss": 84.1713, "step": 27920 }, { "epoch": 0.11284073417179426, "grad_norm": 599.27197265625, "learning_rate": 4.9979082037028535e-05, "loss": 89.3494, "step": 27930 }, { "epoch": 0.1128811354371619, "grad_norm": 2486.75830078125, "learning_rate": 4.9978939028723894e-05, "loss": 96.0437, "step": 27940 }, { "epoch": 0.11292153670252952, "grad_norm": 933.3982543945312, "learning_rate": 4.9978795533441966e-05, "loss": 159.8028, "step": 27950 }, { "epoch": 0.11296193796789715, "grad_norm": 783.9051513671875, "learning_rate": 4.997865155118557e-05, "loss": 80.7255, "step": 27960 }, { "epoch": 0.11300233923326479, "grad_norm": 1356.3843994140625, "learning_rate": 4.9978507081957494e-05, "loss": 135.2147, "step": 27970 }, { "epoch": 0.11304274049863242, "grad_norm": 2129.216796875, "learning_rate": 4.997836212576057e-05, "loss": 73.888, "step": 27980 }, { "epoch": 0.11308314176400004, "grad_norm": 959.6722412109375, "learning_rate": 4.9978216682597614e-05, "loss": 159.4311, "step": 27990 }, { "epoch": 0.11312354302936768, "grad_norm": 2303.733642578125, "learning_rate": 4.997807075247146e-05, "loss": 98.653, "step": 28000 }, { "epoch": 0.11316394429473531, "grad_norm": 906.4110717773438, "learning_rate": 4.997792433538496e-05, "loss": 97.6867, "step": 28010 }, { "epoch": 0.11320434556010295, "grad_norm": 814.7039184570312, "learning_rate": 4.997777743134097e-05, "loss": 91.2434, "step": 28020 }, { "epoch": 0.11324474682547057, "grad_norm": 496.1783752441406, "learning_rate": 4.9977630040342346e-05, "loss": 109.5629, "step": 28030 }, { "epoch": 0.1132851480908382, "grad_norm": 1374.5599365234375, "learning_rate": 4.997748216239196e-05, "loss": 115.9625, "step": 28040 }, { "epoch": 0.11332554935620584, "grad_norm": 1610.67529296875, "learning_rate": 4.997733379749271e-05, "loss": 138.7809, "step": 28050 }, { "epoch": 0.11336595062157347, "grad_norm": 655.5691528320312, "learning_rate": 4.9977184945647473e-05, "loss": 72.344, "step": 28060 }, { "epoch": 0.11340635188694109, "grad_norm": 1008.762939453125, "learning_rate": 4.9977035606859156e-05, "loss": 170.6503, "step": 28070 }, { "epoch": 0.11344675315230873, "grad_norm": 692.5789184570312, "learning_rate": 4.9976885781130665e-05, "loss": 79.409, "step": 28080 }, { "epoch": 0.11348715441767636, "grad_norm": 2432.585693359375, "learning_rate": 4.9976735468464935e-05, "loss": 181.1055, "step": 28090 }, { "epoch": 0.113527555683044, "grad_norm": 1090.251220703125, "learning_rate": 4.997658466886489e-05, "loss": 114.0751, "step": 28100 }, { "epoch": 0.11356795694841162, "grad_norm": 861.4591674804688, "learning_rate": 4.997643338233346e-05, "loss": 97.7165, "step": 28110 }, { "epoch": 0.11360835821377925, "grad_norm": 1879.217529296875, "learning_rate": 4.997628160887361e-05, "loss": 99.4693, "step": 28120 }, { "epoch": 0.11364875947914689, "grad_norm": 617.5592651367188, "learning_rate": 4.997612934848829e-05, "loss": 98.8888, "step": 28130 }, { "epoch": 0.11368916074451452, "grad_norm": 671.50341796875, "learning_rate": 4.997597660118046e-05, "loss": 96.949, "step": 28140 }, { "epoch": 0.11372956200988214, "grad_norm": 800.4517211914062, "learning_rate": 4.9975823366953124e-05, "loss": 71.7921, "step": 28150 }, { "epoch": 0.11376996327524978, "grad_norm": 889.1356201171875, "learning_rate": 4.9975669645809244e-05, "loss": 124.3523, "step": 28160 }, { "epoch": 0.11381036454061741, "grad_norm": 1013.8363037109375, "learning_rate": 4.997551543775182e-05, "loss": 111.264, "step": 28170 }, { "epoch": 0.11385076580598505, "grad_norm": 1395.8243408203125, "learning_rate": 4.997536074278387e-05, "loss": 112.4192, "step": 28180 }, { "epoch": 0.11389116707135268, "grad_norm": 1430.334228515625, "learning_rate": 4.997520556090841e-05, "loss": 121.1184, "step": 28190 }, { "epoch": 0.1139315683367203, "grad_norm": 1214.85400390625, "learning_rate": 4.9975049892128455e-05, "loss": 87.6032, "step": 28200 }, { "epoch": 0.11397196960208794, "grad_norm": 867.681396484375, "learning_rate": 4.9974893736447045e-05, "loss": 90.5741, "step": 28210 }, { "epoch": 0.11401237086745557, "grad_norm": 759.8204345703125, "learning_rate": 4.997473709386722e-05, "loss": 79.1436, "step": 28220 }, { "epoch": 0.1140527721328232, "grad_norm": 1119.670654296875, "learning_rate": 4.997457996439204e-05, "loss": 89.3735, "step": 28230 }, { "epoch": 0.11409317339819083, "grad_norm": 843.7247924804688, "learning_rate": 4.997442234802456e-05, "loss": 111.084, "step": 28240 }, { "epoch": 0.11413357466355846, "grad_norm": 775.5316772460938, "learning_rate": 4.997426424476787e-05, "loss": 96.1073, "step": 28250 }, { "epoch": 0.1141739759289261, "grad_norm": 1531.5697021484375, "learning_rate": 4.9974105654625036e-05, "loss": 82.1885, "step": 28260 }, { "epoch": 0.11421437719429373, "grad_norm": 693.6162719726562, "learning_rate": 4.997394657759915e-05, "loss": 119.7303, "step": 28270 }, { "epoch": 0.11425477845966135, "grad_norm": 4390.083984375, "learning_rate": 4.997378701369332e-05, "loss": 156.4863, "step": 28280 }, { "epoch": 0.11429517972502899, "grad_norm": 781.9625854492188, "learning_rate": 4.9973626962910656e-05, "loss": 90.4073, "step": 28290 }, { "epoch": 0.11433558099039662, "grad_norm": 1085.052978515625, "learning_rate": 4.9973466425254286e-05, "loss": 74.8492, "step": 28300 }, { "epoch": 0.11437598225576424, "grad_norm": 975.8035278320312, "learning_rate": 4.997330540072732e-05, "loss": 108.0216, "step": 28310 }, { "epoch": 0.11441638352113188, "grad_norm": 664.2997436523438, "learning_rate": 4.997314388933291e-05, "loss": 96.9389, "step": 28320 }, { "epoch": 0.11445678478649951, "grad_norm": 395.10052490234375, "learning_rate": 4.997298189107421e-05, "loss": 114.4039, "step": 28330 }, { "epoch": 0.11449718605186715, "grad_norm": 894.0714721679688, "learning_rate": 4.9972819405954366e-05, "loss": 66.7973, "step": 28340 }, { "epoch": 0.11453758731723478, "grad_norm": 3636.150390625, "learning_rate": 4.997265643397654e-05, "loss": 137.2116, "step": 28350 }, { "epoch": 0.1145779885826024, "grad_norm": 543.2451782226562, "learning_rate": 4.997249297514394e-05, "loss": 72.1923, "step": 28360 }, { "epoch": 0.11461838984797004, "grad_norm": 1202.521484375, "learning_rate": 4.997232902945971e-05, "loss": 101.5618, "step": 28370 }, { "epoch": 0.11465879111333767, "grad_norm": 1286.06494140625, "learning_rate": 4.997216459692709e-05, "loss": 99.6231, "step": 28380 }, { "epoch": 0.1146991923787053, "grad_norm": 747.28271484375, "learning_rate": 4.997199967754925e-05, "loss": 101.1043, "step": 28390 }, { "epoch": 0.11473959364407293, "grad_norm": 397.4785461425781, "learning_rate": 4.997183427132943e-05, "loss": 99.9764, "step": 28400 }, { "epoch": 0.11477999490944056, "grad_norm": 1810.4814453125, "learning_rate": 4.997166837827084e-05, "loss": 107.5269, "step": 28410 }, { "epoch": 0.1148203961748082, "grad_norm": 1796.510009765625, "learning_rate": 4.997150199837671e-05, "loss": 98.7808, "step": 28420 }, { "epoch": 0.11486079744017583, "grad_norm": 997.3446044921875, "learning_rate": 4.997133513165031e-05, "loss": 101.5585, "step": 28430 }, { "epoch": 0.11490119870554345, "grad_norm": 575.0126342773438, "learning_rate": 4.9971167778094863e-05, "loss": 115.6277, "step": 28440 }, { "epoch": 0.11494159997091109, "grad_norm": 1271.26953125, "learning_rate": 4.997099993771365e-05, "loss": 60.656, "step": 28450 }, { "epoch": 0.11498200123627872, "grad_norm": 1329.1561279296875, "learning_rate": 4.997083161050994e-05, "loss": 83.6766, "step": 28460 }, { "epoch": 0.11502240250164635, "grad_norm": 527.1378784179688, "learning_rate": 4.9970662796487e-05, "loss": 87.6013, "step": 28470 }, { "epoch": 0.11506280376701399, "grad_norm": 888.7230224609375, "learning_rate": 4.997049349564814e-05, "loss": 86.2665, "step": 28480 }, { "epoch": 0.11510320503238161, "grad_norm": 338.1375732421875, "learning_rate": 4.997032370799666e-05, "loss": 130.7292, "step": 28490 }, { "epoch": 0.11514360629774925, "grad_norm": 2081.712158203125, "learning_rate": 4.997015343353585e-05, "loss": 128.4502, "step": 28500 }, { "epoch": 0.11518400756311688, "grad_norm": 1699.38623046875, "learning_rate": 4.996998267226905e-05, "loss": 115.8591, "step": 28510 }, { "epoch": 0.1152244088284845, "grad_norm": 467.2063293457031, "learning_rate": 4.996981142419959e-05, "loss": 73.2754, "step": 28520 }, { "epoch": 0.11526481009385214, "grad_norm": 804.551025390625, "learning_rate": 4.996963968933079e-05, "loss": 67.7496, "step": 28530 }, { "epoch": 0.11530521135921977, "grad_norm": 1599.342529296875, "learning_rate": 4.996946746766601e-05, "loss": 91.9091, "step": 28540 }, { "epoch": 0.1153456126245874, "grad_norm": 1292.8143310546875, "learning_rate": 4.996929475920862e-05, "loss": 113.546, "step": 28550 }, { "epoch": 0.11538601388995504, "grad_norm": 1679.7252197265625, "learning_rate": 4.9969121563961956e-05, "loss": 83.8834, "step": 28560 }, { "epoch": 0.11542641515532266, "grad_norm": 1275.5130615234375, "learning_rate": 4.9968947881929414e-05, "loss": 115.3006, "step": 28570 }, { "epoch": 0.1154668164206903, "grad_norm": 959.7946166992188, "learning_rate": 4.996877371311439e-05, "loss": 87.6582, "step": 28580 }, { "epoch": 0.11550721768605793, "grad_norm": 1284.703369140625, "learning_rate": 4.996859905752026e-05, "loss": 132.2889, "step": 28590 }, { "epoch": 0.11554761895142555, "grad_norm": 0.0, "learning_rate": 4.996842391515044e-05, "loss": 91.88, "step": 28600 }, { "epoch": 0.1155880202167932, "grad_norm": 1002.761962890625, "learning_rate": 4.996824828600834e-05, "loss": 125.265, "step": 28610 }, { "epoch": 0.11562842148216082, "grad_norm": 665.4733276367188, "learning_rate": 4.996807217009738e-05, "loss": 99.0348, "step": 28620 }, { "epoch": 0.11566882274752845, "grad_norm": 1002.7938232421875, "learning_rate": 4.996789556742101e-05, "loss": 75.1195, "step": 28630 }, { "epoch": 0.11570922401289609, "grad_norm": 858.1141357421875, "learning_rate": 4.996771847798265e-05, "loss": 73.2454, "step": 28640 }, { "epoch": 0.11574962527826371, "grad_norm": 1226.3555908203125, "learning_rate": 4.996754090178577e-05, "loss": 146.9862, "step": 28650 }, { "epoch": 0.11579002654363135, "grad_norm": 1391.651123046875, "learning_rate": 4.996736283883382e-05, "loss": 142.1579, "step": 28660 }, { "epoch": 0.11583042780899898, "grad_norm": 1489.2674560546875, "learning_rate": 4.9967184289130286e-05, "loss": 115.9638, "step": 28670 }, { "epoch": 0.1158708290743666, "grad_norm": 1078.4761962890625, "learning_rate": 4.9967005252678634e-05, "loss": 118.9085, "step": 28680 }, { "epoch": 0.11591123033973424, "grad_norm": 2236.838623046875, "learning_rate": 4.9966825729482364e-05, "loss": 130.9481, "step": 28690 }, { "epoch": 0.11595163160510187, "grad_norm": 862.959716796875, "learning_rate": 4.996664571954497e-05, "loss": 111.0184, "step": 28700 }, { "epoch": 0.1159920328704695, "grad_norm": 1136.712646484375, "learning_rate": 4.996646522286997e-05, "loss": 103.9295, "step": 28710 }, { "epoch": 0.11603243413583714, "grad_norm": 1675.60791015625, "learning_rate": 4.996628423946087e-05, "loss": 109.6175, "step": 28720 }, { "epoch": 0.11607283540120476, "grad_norm": 1874.5843505859375, "learning_rate": 4.996610276932121e-05, "loss": 85.4252, "step": 28730 }, { "epoch": 0.1161132366665724, "grad_norm": 1308.438232421875, "learning_rate": 4.996592081245451e-05, "loss": 115.684, "step": 28740 }, { "epoch": 0.11615363793194003, "grad_norm": 428.2576904296875, "learning_rate": 4.996573836886435e-05, "loss": 57.5053, "step": 28750 }, { "epoch": 0.11619403919730766, "grad_norm": 1469.609619140625, "learning_rate": 4.9965555438554254e-05, "loss": 77.9164, "step": 28760 }, { "epoch": 0.1162344404626753, "grad_norm": 2238.658447265625, "learning_rate": 4.9965372021527814e-05, "loss": 129.5649, "step": 28770 }, { "epoch": 0.11627484172804292, "grad_norm": 1731.619140625, "learning_rate": 4.996518811778858e-05, "loss": 92.2746, "step": 28780 }, { "epoch": 0.11631524299341055, "grad_norm": 2262.8583984375, "learning_rate": 4.996500372734015e-05, "loss": 126.2437, "step": 28790 }, { "epoch": 0.11635564425877819, "grad_norm": 1330.55908203125, "learning_rate": 4.9964818850186135e-05, "loss": 118.3177, "step": 28800 }, { "epoch": 0.11639604552414581, "grad_norm": 1293.2772216796875, "learning_rate": 4.9964633486330116e-05, "loss": 131.9247, "step": 28810 }, { "epoch": 0.11643644678951345, "grad_norm": 626.7706298828125, "learning_rate": 4.9964447635775714e-05, "loss": 80.6821, "step": 28820 }, { "epoch": 0.11647684805488108, "grad_norm": 680.4353637695312, "learning_rate": 4.996426129852655e-05, "loss": 79.4926, "step": 28830 }, { "epoch": 0.1165172493202487, "grad_norm": 1769.197998046875, "learning_rate": 4.996407447458626e-05, "loss": 105.4986, "step": 28840 }, { "epoch": 0.11655765058561635, "grad_norm": 1792.2196044921875, "learning_rate": 4.996388716395848e-05, "loss": 108.3764, "step": 28850 }, { "epoch": 0.11659805185098397, "grad_norm": 582.6719970703125, "learning_rate": 4.996369936664688e-05, "loss": 79.2789, "step": 28860 }, { "epoch": 0.1166384531163516, "grad_norm": 1305.8157958984375, "learning_rate": 4.99635110826551e-05, "loss": 112.6347, "step": 28870 }, { "epoch": 0.11667885438171924, "grad_norm": 1331.4168701171875, "learning_rate": 4.996332231198683e-05, "loss": 76.5, "step": 28880 }, { "epoch": 0.11671925564708686, "grad_norm": 1037.36767578125, "learning_rate": 4.996313305464573e-05, "loss": 159.2613, "step": 28890 }, { "epoch": 0.1167596569124545, "grad_norm": 1378.345458984375, "learning_rate": 4.99629433106355e-05, "loss": 157.8604, "step": 28900 }, { "epoch": 0.11680005817782213, "grad_norm": 846.559814453125, "learning_rate": 4.9962753079959836e-05, "loss": 112.2688, "step": 28910 }, { "epoch": 0.11684045944318976, "grad_norm": 1050.1434326171875, "learning_rate": 4.996256236262245e-05, "loss": 88.0946, "step": 28920 }, { "epoch": 0.1168808607085574, "grad_norm": 410.0029296875, "learning_rate": 4.996237115862706e-05, "loss": 59.4956, "step": 28930 }, { "epoch": 0.11692126197392502, "grad_norm": 573.1680908203125, "learning_rate": 4.99621794679774e-05, "loss": 111.6136, "step": 28940 }, { "epoch": 0.11696166323929265, "grad_norm": 869.5213012695312, "learning_rate": 4.996198729067719e-05, "loss": 146.8108, "step": 28950 }, { "epoch": 0.11700206450466029, "grad_norm": 634.8200073242188, "learning_rate": 4.99617946267302e-05, "loss": 119.8773, "step": 28960 }, { "epoch": 0.11704246577002791, "grad_norm": 1196.7987060546875, "learning_rate": 4.996160147614016e-05, "loss": 71.1213, "step": 28970 }, { "epoch": 0.11708286703539555, "grad_norm": 1217.557861328125, "learning_rate": 4.996140783891085e-05, "loss": 121.3136, "step": 28980 }, { "epoch": 0.11712326830076318, "grad_norm": 1255.81396484375, "learning_rate": 4.9961213715046045e-05, "loss": 116.6792, "step": 28990 }, { "epoch": 0.11716366956613081, "grad_norm": 627.504150390625, "learning_rate": 4.996101910454953e-05, "loss": 75.7639, "step": 29000 }, { "epoch": 0.11720407083149845, "grad_norm": 529.0521240234375, "learning_rate": 4.996082400742509e-05, "loss": 139.7255, "step": 29010 }, { "epoch": 0.11724447209686607, "grad_norm": 1349.41357421875, "learning_rate": 4.996062842367654e-05, "loss": 138.5896, "step": 29020 }, { "epoch": 0.1172848733622337, "grad_norm": 1360.37255859375, "learning_rate": 4.9960432353307686e-05, "loss": 80.0865, "step": 29030 }, { "epoch": 0.11732527462760134, "grad_norm": 1388.444091796875, "learning_rate": 4.996023579632236e-05, "loss": 112.3245, "step": 29040 }, { "epoch": 0.11736567589296897, "grad_norm": 1930.4471435546875, "learning_rate": 4.996003875272438e-05, "loss": 114.5048, "step": 29050 }, { "epoch": 0.1174060771583366, "grad_norm": 569.8203125, "learning_rate": 4.9959841222517596e-05, "loss": 62.5382, "step": 29060 }, { "epoch": 0.11744647842370423, "grad_norm": 455.6988830566406, "learning_rate": 4.9959643205705854e-05, "loss": 100.6463, "step": 29070 }, { "epoch": 0.11748687968907186, "grad_norm": 908.3660888671875, "learning_rate": 4.995944470229302e-05, "loss": 114.3609, "step": 29080 }, { "epoch": 0.1175272809544395, "grad_norm": 755.6206665039062, "learning_rate": 4.9959245712282966e-05, "loss": 95.5638, "step": 29090 }, { "epoch": 0.11756768221980712, "grad_norm": 0.0, "learning_rate": 4.9959046235679565e-05, "loss": 64.4007, "step": 29100 }, { "epoch": 0.11760808348517475, "grad_norm": 808.4530639648438, "learning_rate": 4.9958846272486704e-05, "loss": 112.1131, "step": 29110 }, { "epoch": 0.11764848475054239, "grad_norm": 1141.5194091796875, "learning_rate": 4.9958645822708285e-05, "loss": 87.7617, "step": 29120 }, { "epoch": 0.11768888601591002, "grad_norm": 653.6248779296875, "learning_rate": 4.995844488634822e-05, "loss": 70.7442, "step": 29130 }, { "epoch": 0.11772928728127766, "grad_norm": 1141.1375732421875, "learning_rate": 4.9958243463410414e-05, "loss": 100.08, "step": 29140 }, { "epoch": 0.11776968854664528, "grad_norm": 675.2451171875, "learning_rate": 4.995804155389881e-05, "loss": 113.5737, "step": 29150 }, { "epoch": 0.11781008981201291, "grad_norm": 822.73486328125, "learning_rate": 4.995783915781734e-05, "loss": 78.6838, "step": 29160 }, { "epoch": 0.11785049107738055, "grad_norm": 1152.279541015625, "learning_rate": 4.995763627516994e-05, "loss": 124.2494, "step": 29170 }, { "epoch": 0.11789089234274817, "grad_norm": 696.2450561523438, "learning_rate": 4.995743290596057e-05, "loss": 85.5477, "step": 29180 }, { "epoch": 0.1179312936081158, "grad_norm": 669.2838745117188, "learning_rate": 4.9957229050193197e-05, "loss": 158.3468, "step": 29190 }, { "epoch": 0.11797169487348344, "grad_norm": 629.5454711914062, "learning_rate": 4.9957024707871806e-05, "loss": 72.3909, "step": 29200 }, { "epoch": 0.11801209613885107, "grad_norm": 2218.03564453125, "learning_rate": 4.995681987900036e-05, "loss": 131.4396, "step": 29210 }, { "epoch": 0.1180524974042187, "grad_norm": 1187.0064697265625, "learning_rate": 4.995661456358286e-05, "loss": 91.9417, "step": 29220 }, { "epoch": 0.11809289866958633, "grad_norm": 539.1826171875, "learning_rate": 4.995640876162332e-05, "loss": 119.5583, "step": 29230 }, { "epoch": 0.11813329993495396, "grad_norm": 1318.2589111328125, "learning_rate": 4.9956202473125736e-05, "loss": 115.7666, "step": 29240 }, { "epoch": 0.1181737012003216, "grad_norm": 466.1606750488281, "learning_rate": 4.995599569809414e-05, "loss": 118.0604, "step": 29250 }, { "epoch": 0.11821410246568922, "grad_norm": 797.7738647460938, "learning_rate": 4.9955788436532545e-05, "loss": 72.8938, "step": 29260 }, { "epoch": 0.11825450373105685, "grad_norm": 1671.8890380859375, "learning_rate": 4.995558068844503e-05, "loss": 96.0493, "step": 29270 }, { "epoch": 0.11829490499642449, "grad_norm": 934.203125, "learning_rate": 4.99553724538356e-05, "loss": 97.1457, "step": 29280 }, { "epoch": 0.11833530626179212, "grad_norm": 1040.1793212890625, "learning_rate": 4.9955163732708346e-05, "loss": 138.6697, "step": 29290 }, { "epoch": 0.11837570752715976, "grad_norm": 2738.3046875, "learning_rate": 4.9954954525067334e-05, "loss": 120.2477, "step": 29300 }, { "epoch": 0.11841610879252738, "grad_norm": 543.4052124023438, "learning_rate": 4.995474483091662e-05, "loss": 129.9302, "step": 29310 }, { "epoch": 0.11845651005789501, "grad_norm": 534.146240234375, "learning_rate": 4.995453465026032e-05, "loss": 67.6277, "step": 29320 }, { "epoch": 0.11849691132326265, "grad_norm": 1281.582275390625, "learning_rate": 4.995432398310252e-05, "loss": 117.349, "step": 29330 }, { "epoch": 0.11853731258863028, "grad_norm": 3242.07568359375, "learning_rate": 4.995411282944732e-05, "loss": 136.7699, "step": 29340 }, { "epoch": 0.1185777138539979, "grad_norm": 1517.0467529296875, "learning_rate": 4.9953901189298845e-05, "loss": 127.2586, "step": 29350 }, { "epoch": 0.11861811511936554, "grad_norm": 689.41015625, "learning_rate": 4.9953689062661226e-05, "loss": 108.8368, "step": 29360 }, { "epoch": 0.11865851638473317, "grad_norm": 663.7586669921875, "learning_rate": 4.995347644953858e-05, "loss": 86.3188, "step": 29370 }, { "epoch": 0.11869891765010081, "grad_norm": 1621.5384521484375, "learning_rate": 4.9953263349935074e-05, "loss": 100.3996, "step": 29380 }, { "epoch": 0.11873931891546843, "grad_norm": 1026.15576171875, "learning_rate": 4.995304976385484e-05, "loss": 124.3401, "step": 29390 }, { "epoch": 0.11877972018083606, "grad_norm": 840.7376098632812, "learning_rate": 4.995283569130207e-05, "loss": 99.3957, "step": 29400 }, { "epoch": 0.1188201214462037, "grad_norm": 1305.7696533203125, "learning_rate": 4.995262113228091e-05, "loss": 87.9966, "step": 29410 }, { "epoch": 0.11886052271157133, "grad_norm": 1312.4971923828125, "learning_rate": 4.9952406086795564e-05, "loss": 114.1831, "step": 29420 }, { "epoch": 0.11890092397693895, "grad_norm": 1050.6826171875, "learning_rate": 4.995219055485021e-05, "loss": 117.2113, "step": 29430 }, { "epoch": 0.11894132524230659, "grad_norm": 733.8092041015625, "learning_rate": 4.995197453644905e-05, "loss": 120.497, "step": 29440 }, { "epoch": 0.11898172650767422, "grad_norm": 980.0513305664062, "learning_rate": 4.9951758031596304e-05, "loss": 97.3482, "step": 29450 }, { "epoch": 0.11902212777304186, "grad_norm": 646.7477416992188, "learning_rate": 4.995154104029619e-05, "loss": 103.4561, "step": 29460 }, { "epoch": 0.11906252903840948, "grad_norm": 2361.02978515625, "learning_rate": 4.9951323562552934e-05, "loss": 104.5779, "step": 29470 }, { "epoch": 0.11910293030377711, "grad_norm": 844.0553588867188, "learning_rate": 4.995110559837078e-05, "loss": 91.3419, "step": 29480 }, { "epoch": 0.11914333156914475, "grad_norm": 2193.67431640625, "learning_rate": 4.995088714775398e-05, "loss": 127.1109, "step": 29490 }, { "epoch": 0.11918373283451238, "grad_norm": 732.5401611328125, "learning_rate": 4.995066821070679e-05, "loss": 120.7417, "step": 29500 }, { "epoch": 0.11922413409988, "grad_norm": 1001.8763427734375, "learning_rate": 4.995044878723348e-05, "loss": 114.4594, "step": 29510 }, { "epoch": 0.11926453536524764, "grad_norm": 928.3828735351562, "learning_rate": 4.995022887733832e-05, "loss": 83.6775, "step": 29520 }, { "epoch": 0.11930493663061527, "grad_norm": 858.733154296875, "learning_rate": 4.99500084810256e-05, "loss": 144.456, "step": 29530 }, { "epoch": 0.11934533789598291, "grad_norm": 2473.096435546875, "learning_rate": 4.994978759829963e-05, "loss": 117.7391, "step": 29540 }, { "epoch": 0.11938573916135053, "grad_norm": 1232.745361328125, "learning_rate": 4.9949566229164704e-05, "loss": 87.8614, "step": 29550 }, { "epoch": 0.11942614042671816, "grad_norm": 730.4893188476562, "learning_rate": 4.994934437362513e-05, "loss": 94.9959, "step": 29560 }, { "epoch": 0.1194665416920858, "grad_norm": 1123.4429931640625, "learning_rate": 4.9949122031685245e-05, "loss": 116.839, "step": 29570 }, { "epoch": 0.11950694295745343, "grad_norm": 723.6360473632812, "learning_rate": 4.9948899203349384e-05, "loss": 86.0155, "step": 29580 }, { "epoch": 0.11954734422282105, "grad_norm": 480.5324401855469, "learning_rate": 4.994867588862189e-05, "loss": 94.0604, "step": 29590 }, { "epoch": 0.1195877454881887, "grad_norm": 1117.021484375, "learning_rate": 4.9948452087507116e-05, "loss": 69.5577, "step": 29600 }, { "epoch": 0.11962814675355632, "grad_norm": 1889.9290771484375, "learning_rate": 4.9948227800009416e-05, "loss": 130.7532, "step": 29610 }, { "epoch": 0.11966854801892396, "grad_norm": 811.6279907226562, "learning_rate": 4.994800302613318e-05, "loss": 93.5169, "step": 29620 }, { "epoch": 0.11970894928429159, "grad_norm": 339.779052734375, "learning_rate": 4.994777776588278e-05, "loss": 112.6328, "step": 29630 }, { "epoch": 0.11974935054965921, "grad_norm": 794.0516967773438, "learning_rate": 4.9947552019262605e-05, "loss": 61.7838, "step": 29640 }, { "epoch": 0.11978975181502685, "grad_norm": 1287.187255859375, "learning_rate": 4.994732578627706e-05, "loss": 156.2145, "step": 29650 }, { "epoch": 0.11983015308039448, "grad_norm": 921.3040161132812, "learning_rate": 4.994709906693056e-05, "loss": 77.835, "step": 29660 }, { "epoch": 0.1198705543457621, "grad_norm": 985.0899047851562, "learning_rate": 4.9946871861227514e-05, "loss": 101.6619, "step": 29670 }, { "epoch": 0.11991095561112974, "grad_norm": 1570.173583984375, "learning_rate": 4.9946644169172355e-05, "loss": 132.0515, "step": 29680 }, { "epoch": 0.11995135687649737, "grad_norm": 1566.288818359375, "learning_rate": 4.9946415990769534e-05, "loss": 109.4148, "step": 29690 }, { "epoch": 0.11999175814186501, "grad_norm": 654.2642822265625, "learning_rate": 4.994618732602349e-05, "loss": 92.2775, "step": 29700 }, { "epoch": 0.12003215940723264, "grad_norm": 5245.0458984375, "learning_rate": 4.994595817493867e-05, "loss": 144.156, "step": 29710 }, { "epoch": 0.12007256067260026, "grad_norm": 4157.51708984375, "learning_rate": 4.9945728537519555e-05, "loss": 124.7138, "step": 29720 }, { "epoch": 0.1201129619379679, "grad_norm": 1033.809814453125, "learning_rate": 4.994549841377063e-05, "loss": 108.5572, "step": 29730 }, { "epoch": 0.12015336320333553, "grad_norm": 681.5181884765625, "learning_rate": 4.9945267803696364e-05, "loss": 99.638, "step": 29740 }, { "epoch": 0.12019376446870315, "grad_norm": 2804.89501953125, "learning_rate": 4.994503670730125e-05, "loss": 99.5536, "step": 29750 }, { "epoch": 0.1202341657340708, "grad_norm": 346.3814392089844, "learning_rate": 4.994480512458981e-05, "loss": 108.1357, "step": 29760 }, { "epoch": 0.12027456699943842, "grad_norm": 1421.3365478515625, "learning_rate": 4.9944573055566556e-05, "loss": 103.8683, "step": 29770 }, { "epoch": 0.12031496826480606, "grad_norm": 1197.5367431640625, "learning_rate": 4.994434050023601e-05, "loss": 107.7595, "step": 29780 }, { "epoch": 0.12035536953017369, "grad_norm": 256.7560729980469, "learning_rate": 4.9944107458602693e-05, "loss": 81.2983, "step": 29790 }, { "epoch": 0.12039577079554131, "grad_norm": 1710.7000732421875, "learning_rate": 4.994387393067117e-05, "loss": 98.1409, "step": 29800 }, { "epoch": 0.12043617206090895, "grad_norm": 583.5015258789062, "learning_rate": 4.994363991644597e-05, "loss": 67.0093, "step": 29810 }, { "epoch": 0.12047657332627658, "grad_norm": 1057.0147705078125, "learning_rate": 4.9943405415931674e-05, "loss": 116.4649, "step": 29820 }, { "epoch": 0.1205169745916442, "grad_norm": 671.4649658203125, "learning_rate": 4.9943170429132855e-05, "loss": 95.8829, "step": 29830 }, { "epoch": 0.12055737585701184, "grad_norm": 1301.7279052734375, "learning_rate": 4.9942934956054076e-05, "loss": 90.3948, "step": 29840 }, { "epoch": 0.12059777712237947, "grad_norm": 1199.511474609375, "learning_rate": 4.994269899669994e-05, "loss": 85.8095, "step": 29850 }, { "epoch": 0.12063817838774711, "grad_norm": 648.86376953125, "learning_rate": 4.9942462551075056e-05, "loss": 77.0479, "step": 29860 }, { "epoch": 0.12067857965311474, "grad_norm": 788.9052124023438, "learning_rate": 4.994222561918401e-05, "loss": 81.1479, "step": 29870 }, { "epoch": 0.12071898091848236, "grad_norm": 931.2744750976562, "learning_rate": 4.994198820103145e-05, "loss": 85.522, "step": 29880 }, { "epoch": 0.12075938218385, "grad_norm": 1237.2655029296875, "learning_rate": 4.994175029662198e-05, "loss": 95.5039, "step": 29890 }, { "epoch": 0.12079978344921763, "grad_norm": 704.9238891601562, "learning_rate": 4.994151190596025e-05, "loss": 85.7873, "step": 29900 }, { "epoch": 0.12084018471458526, "grad_norm": 826.0241088867188, "learning_rate": 4.9941273029050894e-05, "loss": 129.6459, "step": 29910 }, { "epoch": 0.1208805859799529, "grad_norm": 479.6162109375, "learning_rate": 4.994103366589859e-05, "loss": 117.3531, "step": 29920 }, { "epoch": 0.12092098724532052, "grad_norm": 1103.497314453125, "learning_rate": 4.994079381650799e-05, "loss": 183.9052, "step": 29930 }, { "epoch": 0.12096138851068816, "grad_norm": 710.986572265625, "learning_rate": 4.994055348088378e-05, "loss": 105.431, "step": 29940 }, { "epoch": 0.12100178977605579, "grad_norm": 1297.952880859375, "learning_rate": 4.994031265903063e-05, "loss": 128.99, "step": 29950 }, { "epoch": 0.12104219104142341, "grad_norm": 1707.3641357421875, "learning_rate": 4.9940071350953255e-05, "loss": 127.0656, "step": 29960 }, { "epoch": 0.12108259230679105, "grad_norm": 787.633544921875, "learning_rate": 4.993982955665634e-05, "loss": 106.8927, "step": 29970 }, { "epoch": 0.12112299357215868, "grad_norm": 1233.0062255859375, "learning_rate": 4.9939587276144616e-05, "loss": 104.3319, "step": 29980 }, { "epoch": 0.1211633948375263, "grad_norm": 1296.74658203125, "learning_rate": 4.993934450942279e-05, "loss": 77.4372, "step": 29990 }, { "epoch": 0.12120379610289395, "grad_norm": 735.8043212890625, "learning_rate": 4.993910125649561e-05, "loss": 147.1198, "step": 30000 }, { "epoch": 0.12124419736826157, "grad_norm": 364.9338684082031, "learning_rate": 4.993885751736781e-05, "loss": 84.0477, "step": 30010 }, { "epoch": 0.12128459863362921, "grad_norm": 719.6060180664062, "learning_rate": 4.993861329204414e-05, "loss": 115.0903, "step": 30020 }, { "epoch": 0.12132499989899684, "grad_norm": 1294.3851318359375, "learning_rate": 4.993836858052937e-05, "loss": 151.7286, "step": 30030 }, { "epoch": 0.12136540116436446, "grad_norm": 711.005126953125, "learning_rate": 4.993812338282826e-05, "loss": 86.3682, "step": 30040 }, { "epoch": 0.1214058024297321, "grad_norm": 723.4884643554688, "learning_rate": 4.993787769894559e-05, "loss": 120.8179, "step": 30050 }, { "epoch": 0.12144620369509973, "grad_norm": 1060.3338623046875, "learning_rate": 4.993763152888617e-05, "loss": 134.1443, "step": 30060 }, { "epoch": 0.12148660496046736, "grad_norm": 829.1416625976562, "learning_rate": 4.993738487265478e-05, "loss": 119.1408, "step": 30070 }, { "epoch": 0.121527006225835, "grad_norm": 267.49755859375, "learning_rate": 4.993713773025623e-05, "loss": 112.0577, "step": 30080 }, { "epoch": 0.12156740749120262, "grad_norm": 2977.7763671875, "learning_rate": 4.993689010169534e-05, "loss": 115.676, "step": 30090 }, { "epoch": 0.12160780875657026, "grad_norm": 927.7879638671875, "learning_rate": 4.993664198697694e-05, "loss": 80.4086, "step": 30100 }, { "epoch": 0.12164821002193789, "grad_norm": 1482.4476318359375, "learning_rate": 4.993639338610587e-05, "loss": 90.1057, "step": 30110 }, { "epoch": 0.12168861128730551, "grad_norm": 1078.0257568359375, "learning_rate": 4.993614429908697e-05, "loss": 112.5462, "step": 30120 }, { "epoch": 0.12172901255267315, "grad_norm": 1431.45947265625, "learning_rate": 4.99358947259251e-05, "loss": 83.491, "step": 30130 }, { "epoch": 0.12176941381804078, "grad_norm": 792.9324340820312, "learning_rate": 4.9935644666625125e-05, "loss": 82.8931, "step": 30140 }, { "epoch": 0.12180981508340841, "grad_norm": 1509.551025390625, "learning_rate": 4.9935394121191915e-05, "loss": 103.0119, "step": 30150 }, { "epoch": 0.12185021634877605, "grad_norm": 1039.8441162109375, "learning_rate": 4.993514308963036e-05, "loss": 119.8, "step": 30160 }, { "epoch": 0.12189061761414367, "grad_norm": 399.6064758300781, "learning_rate": 4.993489157194536e-05, "loss": 115.0279, "step": 30170 }, { "epoch": 0.12193101887951131, "grad_norm": 794.4744873046875, "learning_rate": 4.993463956814181e-05, "loss": 101.3931, "step": 30180 }, { "epoch": 0.12197142014487894, "grad_norm": 2071.046142578125, "learning_rate": 4.993438707822462e-05, "loss": 114.3679, "step": 30190 }, { "epoch": 0.12201182141024657, "grad_norm": 996.6055297851562, "learning_rate": 4.993413410219871e-05, "loss": 84.8786, "step": 30200 }, { "epoch": 0.1220522226756142, "grad_norm": 1139.685302734375, "learning_rate": 4.993388064006903e-05, "loss": 78.988, "step": 30210 }, { "epoch": 0.12209262394098183, "grad_norm": 622.7205200195312, "learning_rate": 4.993362669184051e-05, "loss": 88.1808, "step": 30220 }, { "epoch": 0.12213302520634946, "grad_norm": 7026.68359375, "learning_rate": 4.9933372257518096e-05, "loss": 126.5632, "step": 30230 }, { "epoch": 0.1221734264717171, "grad_norm": 945.2219848632812, "learning_rate": 4.993311733710676e-05, "loss": 78.7985, "step": 30240 }, { "epoch": 0.12221382773708472, "grad_norm": 826.4041137695312, "learning_rate": 4.9932861930611454e-05, "loss": 72.6881, "step": 30250 }, { "epoch": 0.12225422900245236, "grad_norm": 714.5296630859375, "learning_rate": 4.993260603803718e-05, "loss": 94.0831, "step": 30260 }, { "epoch": 0.12229463026781999, "grad_norm": 692.105224609375, "learning_rate": 4.99323496593889e-05, "loss": 60.1789, "step": 30270 }, { "epoch": 0.12233503153318762, "grad_norm": 534.4913330078125, "learning_rate": 4.993209279467164e-05, "loss": 69.5706, "step": 30280 }, { "epoch": 0.12237543279855526, "grad_norm": 261.96044921875, "learning_rate": 4.99318354438904e-05, "loss": 77.032, "step": 30290 }, { "epoch": 0.12241583406392288, "grad_norm": 1099.048095703125, "learning_rate": 4.9931577607050175e-05, "loss": 92.2519, "step": 30300 }, { "epoch": 0.12245623532929051, "grad_norm": 1755.97705078125, "learning_rate": 4.993131928415602e-05, "loss": 159.9417, "step": 30310 }, { "epoch": 0.12249663659465815, "grad_norm": 1634.8353271484375, "learning_rate": 4.993106047521296e-05, "loss": 91.0221, "step": 30320 }, { "epoch": 0.12253703786002577, "grad_norm": 980.1278686523438, "learning_rate": 4.993080118022604e-05, "loss": 97.5692, "step": 30330 }, { "epoch": 0.12257743912539341, "grad_norm": 2488.35009765625, "learning_rate": 4.993054139920032e-05, "loss": 114.5058, "step": 30340 }, { "epoch": 0.12261784039076104, "grad_norm": 1487.991455078125, "learning_rate": 4.9930281132140846e-05, "loss": 82.5714, "step": 30350 }, { "epoch": 0.12265824165612867, "grad_norm": 513.70654296875, "learning_rate": 4.993002037905272e-05, "loss": 127.0465, "step": 30360 }, { "epoch": 0.1226986429214963, "grad_norm": 2360.14208984375, "learning_rate": 4.9929759139941e-05, "loss": 128.6292, "step": 30370 }, { "epoch": 0.12273904418686393, "grad_norm": 537.433349609375, "learning_rate": 4.99294974148108e-05, "loss": 84.4188, "step": 30380 }, { "epoch": 0.12277944545223156, "grad_norm": 339.3077697753906, "learning_rate": 4.9929235203667214e-05, "loss": 84.647, "step": 30390 }, { "epoch": 0.1228198467175992, "grad_norm": 697.3407592773438, "learning_rate": 4.992897250651535e-05, "loss": 96.315, "step": 30400 }, { "epoch": 0.12286024798296682, "grad_norm": 637.2626342773438, "learning_rate": 4.9928709323360337e-05, "loss": 87.7648, "step": 30410 }, { "epoch": 0.12290064924833445, "grad_norm": 591.4133911132812, "learning_rate": 4.99284456542073e-05, "loss": 57.4863, "step": 30420 }, { "epoch": 0.12294105051370209, "grad_norm": 1242.639404296875, "learning_rate": 4.992818149906138e-05, "loss": 105.4673, "step": 30430 }, { "epoch": 0.12298145177906972, "grad_norm": 946.1543579101562, "learning_rate": 4.992791685792772e-05, "loss": 101.4292, "step": 30440 }, { "epoch": 0.12302185304443736, "grad_norm": 554.2642211914062, "learning_rate": 4.9927651730811495e-05, "loss": 55.5201, "step": 30450 }, { "epoch": 0.12306225430980498, "grad_norm": 1658.3990478515625, "learning_rate": 4.992738611771787e-05, "loss": 90.6332, "step": 30460 }, { "epoch": 0.12310265557517261, "grad_norm": 1110.182373046875, "learning_rate": 4.9927120018652004e-05, "loss": 112.0799, "step": 30470 }, { "epoch": 0.12314305684054025, "grad_norm": 2135.968505859375, "learning_rate": 4.992685343361911e-05, "loss": 124.2809, "step": 30480 }, { "epoch": 0.12318345810590788, "grad_norm": 0.0, "learning_rate": 4.992658636262438e-05, "loss": 130.0038, "step": 30490 }, { "epoch": 0.1232238593712755, "grad_norm": 897.2399291992188, "learning_rate": 4.992631880567301e-05, "loss": 110.8611, "step": 30500 }, { "epoch": 0.12326426063664314, "grad_norm": 1721.460693359375, "learning_rate": 4.9926050762770224e-05, "loss": 110.5094, "step": 30510 }, { "epoch": 0.12330466190201077, "grad_norm": 994.7217407226562, "learning_rate": 4.992578223392124e-05, "loss": 95.7169, "step": 30520 }, { "epoch": 0.12334506316737841, "grad_norm": 1169.87939453125, "learning_rate": 4.9925513219131303e-05, "loss": 82.8276, "step": 30530 }, { "epoch": 0.12338546443274603, "grad_norm": 677.4993286132812, "learning_rate": 4.992524371840566e-05, "loss": 105.6593, "step": 30540 }, { "epoch": 0.12342586569811366, "grad_norm": 885.8995361328125, "learning_rate": 4.992497373174955e-05, "loss": 114.9273, "step": 30550 }, { "epoch": 0.1234662669634813, "grad_norm": 1765.774658203125, "learning_rate": 4.9924703259168244e-05, "loss": 73.1132, "step": 30560 }, { "epoch": 0.12350666822884893, "grad_norm": 1306.00244140625, "learning_rate": 4.992443230066701e-05, "loss": 96.6844, "step": 30570 }, { "epoch": 0.12354706949421655, "grad_norm": 2164.46044921875, "learning_rate": 4.992416085625115e-05, "loss": 101.1072, "step": 30580 }, { "epoch": 0.12358747075958419, "grad_norm": 1472.5419921875, "learning_rate": 4.992388892592594e-05, "loss": 93.9604, "step": 30590 }, { "epoch": 0.12362787202495182, "grad_norm": 376.5589294433594, "learning_rate": 4.9923616509696683e-05, "loss": 89.4311, "step": 30600 }, { "epoch": 0.12366827329031946, "grad_norm": 809.4692993164062, "learning_rate": 4.9923343607568684e-05, "loss": 90.6958, "step": 30610 }, { "epoch": 0.12370867455568708, "grad_norm": 1123.7208251953125, "learning_rate": 4.9923070219547275e-05, "loss": 113.3677, "step": 30620 }, { "epoch": 0.12374907582105471, "grad_norm": 1361.58056640625, "learning_rate": 4.9922796345637776e-05, "loss": 109.5225, "step": 30630 }, { "epoch": 0.12378947708642235, "grad_norm": 1546.5401611328125, "learning_rate": 4.992252198584554e-05, "loss": 156.2372, "step": 30640 }, { "epoch": 0.12382987835178998, "grad_norm": 1252.5113525390625, "learning_rate": 4.99222471401759e-05, "loss": 98.325, "step": 30650 }, { "epoch": 0.1238702796171576, "grad_norm": 587.108642578125, "learning_rate": 4.992197180863422e-05, "loss": 83.3731, "step": 30660 }, { "epoch": 0.12391068088252524, "grad_norm": 546.9779663085938, "learning_rate": 4.992169599122587e-05, "loss": 69.5896, "step": 30670 }, { "epoch": 0.12395108214789287, "grad_norm": 533.7169189453125, "learning_rate": 4.992141968795623e-05, "loss": 59.3518, "step": 30680 }, { "epoch": 0.12399148341326051, "grad_norm": 609.4190673828125, "learning_rate": 4.992114289883068e-05, "loss": 132.897, "step": 30690 }, { "epoch": 0.12403188467862813, "grad_norm": 1489.8643798828125, "learning_rate": 4.9920865623854615e-05, "loss": 84.5661, "step": 30700 }, { "epoch": 0.12407228594399576, "grad_norm": 1392.722900390625, "learning_rate": 4.992058786303345e-05, "loss": 80.1483, "step": 30710 }, { "epoch": 0.1241126872093634, "grad_norm": 1709.162841796875, "learning_rate": 4.9920309616372596e-05, "loss": 78.6077, "step": 30720 }, { "epoch": 0.12415308847473103, "grad_norm": 319.7249450683594, "learning_rate": 4.9920030883877476e-05, "loss": 49.0658, "step": 30730 }, { "epoch": 0.12419348974009865, "grad_norm": 2077.73095703125, "learning_rate": 4.9919751665553525e-05, "loss": 119.2495, "step": 30740 }, { "epoch": 0.1242338910054663, "grad_norm": 1575.6136474609375, "learning_rate": 4.991947196140618e-05, "loss": 65.2858, "step": 30750 }, { "epoch": 0.12427429227083392, "grad_norm": 891.6358032226562, "learning_rate": 4.9919191771440905e-05, "loss": 76.9454, "step": 30760 }, { "epoch": 0.12431469353620156, "grad_norm": 836.199951171875, "learning_rate": 4.991891109566316e-05, "loss": 86.9683, "step": 30770 }, { "epoch": 0.12435509480156919, "grad_norm": 1238.918701171875, "learning_rate": 4.99186299340784e-05, "loss": 95.3734, "step": 30780 }, { "epoch": 0.12439549606693681, "grad_norm": 1337.448974609375, "learning_rate": 4.991834828669213e-05, "loss": 103.7555, "step": 30790 }, { "epoch": 0.12443589733230445, "grad_norm": 2682.815673828125, "learning_rate": 4.9918066153509834e-05, "loss": 157.3261, "step": 30800 }, { "epoch": 0.12447629859767208, "grad_norm": 2427.59033203125, "learning_rate": 4.9917783534537006e-05, "loss": 79.6278, "step": 30810 }, { "epoch": 0.1245166998630397, "grad_norm": 668.7774658203125, "learning_rate": 4.991750042977916e-05, "loss": 77.9551, "step": 30820 }, { "epoch": 0.12455710112840734, "grad_norm": 1772.7213134765625, "learning_rate": 4.991721683924182e-05, "loss": 109.0242, "step": 30830 }, { "epoch": 0.12459750239377497, "grad_norm": 896.2188110351562, "learning_rate": 4.99169327629305e-05, "loss": 103.7983, "step": 30840 }, { "epoch": 0.12463790365914261, "grad_norm": 1777.919677734375, "learning_rate": 4.991664820085074e-05, "loss": 69.7507, "step": 30850 }, { "epoch": 0.12467830492451024, "grad_norm": 414.7098693847656, "learning_rate": 4.9916363153008114e-05, "loss": 137.439, "step": 30860 }, { "epoch": 0.12471870618987786, "grad_norm": 598.8345947265625, "learning_rate": 4.9916077619408155e-05, "loss": 120.2765, "step": 30870 }, { "epoch": 0.1247591074552455, "grad_norm": 1064.3018798828125, "learning_rate": 4.991579160005644e-05, "loss": 84.4768, "step": 30880 }, { "epoch": 0.12479950872061313, "grad_norm": 666.4903564453125, "learning_rate": 4.9915505094958526e-05, "loss": 136.433, "step": 30890 }, { "epoch": 0.12483990998598075, "grad_norm": 1181.445556640625, "learning_rate": 4.991521810412002e-05, "loss": 63.556, "step": 30900 }, { "epoch": 0.1248803112513484, "grad_norm": 605.0240478515625, "learning_rate": 4.991493062754651e-05, "loss": 94.3815, "step": 30910 }, { "epoch": 0.12492071251671602, "grad_norm": 794.223388671875, "learning_rate": 4.99146426652436e-05, "loss": 76.5061, "step": 30920 }, { "epoch": 0.12496111378208366, "grad_norm": 1127.827392578125, "learning_rate": 4.991435421721691e-05, "loss": 80.6668, "step": 30930 }, { "epoch": 0.12500151504745127, "grad_norm": 494.918212890625, "learning_rate": 4.991406528347206e-05, "loss": 137.6075, "step": 30940 }, { "epoch": 0.1250419163128189, "grad_norm": 688.9254760742188, "learning_rate": 4.9913775864014665e-05, "loss": 77.563, "step": 30950 }, { "epoch": 0.12508231757818655, "grad_norm": 1798.6043701171875, "learning_rate": 4.991348595885039e-05, "loss": 172.5805, "step": 30960 }, { "epoch": 0.1251227188435542, "grad_norm": 629.1405639648438, "learning_rate": 4.991319556798488e-05, "loss": 94.1099, "step": 30970 }, { "epoch": 0.1251631201089218, "grad_norm": 831.2258911132812, "learning_rate": 4.99129046914238e-05, "loss": 82.6184, "step": 30980 }, { "epoch": 0.12520352137428944, "grad_norm": 899.6831665039062, "learning_rate": 4.991261332917282e-05, "loss": 89.8317, "step": 30990 }, { "epoch": 0.12524392263965708, "grad_norm": 1243.12255859375, "learning_rate": 4.991232148123761e-05, "loss": 106.8137, "step": 31000 }, { "epoch": 0.1252843239050247, "grad_norm": 708.5358276367188, "learning_rate": 4.9912029147623875e-05, "loss": 71.765, "step": 31010 }, { "epoch": 0.12532472517039234, "grad_norm": 1078.65185546875, "learning_rate": 4.9911736328337296e-05, "loss": 98.1934, "step": 31020 }, { "epoch": 0.12536512643575998, "grad_norm": 863.23193359375, "learning_rate": 4.99114430233836e-05, "loss": 110.7916, "step": 31030 }, { "epoch": 0.1254055277011276, "grad_norm": 948.326416015625, "learning_rate": 4.991114923276849e-05, "loss": 128.7791, "step": 31040 }, { "epoch": 0.12544592896649523, "grad_norm": 1923.48681640625, "learning_rate": 4.9910854956497696e-05, "loss": 81.3787, "step": 31050 }, { "epoch": 0.12548633023186287, "grad_norm": 1134.7197265625, "learning_rate": 4.991056019457697e-05, "loss": 97.8454, "step": 31060 }, { "epoch": 0.12552673149723048, "grad_norm": 1076.4554443359375, "learning_rate": 4.991026494701205e-05, "loss": 93.0418, "step": 31070 }, { "epoch": 0.12556713276259812, "grad_norm": 1976.7174072265625, "learning_rate": 4.9909969213808683e-05, "loss": 87.8301, "step": 31080 }, { "epoch": 0.12560753402796576, "grad_norm": 1298.41162109375, "learning_rate": 4.990967299497264e-05, "loss": 125.1484, "step": 31090 }, { "epoch": 0.12564793529333337, "grad_norm": 532.9257202148438, "learning_rate": 4.990937629050971e-05, "loss": 101.9732, "step": 31100 }, { "epoch": 0.125688336558701, "grad_norm": 1158.4649658203125, "learning_rate": 4.990907910042566e-05, "loss": 88.4633, "step": 31110 }, { "epoch": 0.12572873782406865, "grad_norm": 1732.9954833984375, "learning_rate": 4.990878142472628e-05, "loss": 80.0504, "step": 31120 }, { "epoch": 0.1257691390894363, "grad_norm": 432.7400207519531, "learning_rate": 4.990848326341739e-05, "loss": 92.5247, "step": 31130 }, { "epoch": 0.1258095403548039, "grad_norm": 985.176025390625, "learning_rate": 4.990818461650479e-05, "loss": 100.5391, "step": 31140 }, { "epoch": 0.12584994162017155, "grad_norm": 789.0088500976562, "learning_rate": 4.990788548399432e-05, "loss": 88.8649, "step": 31150 }, { "epoch": 0.12589034288553919, "grad_norm": 1790.52978515625, "learning_rate": 4.990758586589178e-05, "loss": 108.7384, "step": 31160 }, { "epoch": 0.1259307441509068, "grad_norm": 1069.7830810546875, "learning_rate": 4.9907285762203046e-05, "loss": 104.5124, "step": 31170 }, { "epoch": 0.12597114541627444, "grad_norm": 766.6657104492188, "learning_rate": 4.990698517293395e-05, "loss": 59.5578, "step": 31180 }, { "epoch": 0.12601154668164208, "grad_norm": 1233.1490478515625, "learning_rate": 4.990668409809034e-05, "loss": 63.7087, "step": 31190 }, { "epoch": 0.1260519479470097, "grad_norm": 1039.3233642578125, "learning_rate": 4.990638253767812e-05, "loss": 110.4396, "step": 31200 }, { "epoch": 0.12609234921237733, "grad_norm": 1719.74267578125, "learning_rate": 4.9906080491703146e-05, "loss": 84.7901, "step": 31210 }, { "epoch": 0.12613275047774497, "grad_norm": 2833.946533203125, "learning_rate": 4.9905777960171304e-05, "loss": 146.1511, "step": 31220 }, { "epoch": 0.12617315174311258, "grad_norm": 820.8349609375, "learning_rate": 4.99054749430885e-05, "loss": 120.1305, "step": 31230 }, { "epoch": 0.12621355300848022, "grad_norm": 782.0814208984375, "learning_rate": 4.990517144046064e-05, "loss": 68.8798, "step": 31240 }, { "epoch": 0.12625395427384786, "grad_norm": 1526.1461181640625, "learning_rate": 4.990486745229364e-05, "loss": 109.5388, "step": 31250 }, { "epoch": 0.12629435553921547, "grad_norm": 2700.19677734375, "learning_rate": 4.9904562978593426e-05, "loss": 141.1139, "step": 31260 }, { "epoch": 0.12633475680458311, "grad_norm": 578.1785278320312, "learning_rate": 4.990425801936594e-05, "loss": 63.9474, "step": 31270 }, { "epoch": 0.12637515806995075, "grad_norm": 1284.421630859375, "learning_rate": 4.990395257461712e-05, "loss": 64.6959, "step": 31280 }, { "epoch": 0.1264155593353184, "grad_norm": 722.0364379882812, "learning_rate": 4.9903646644352925e-05, "loss": 136.6368, "step": 31290 }, { "epoch": 0.126455960600686, "grad_norm": 639.962646484375, "learning_rate": 4.990334022857932e-05, "loss": 85.277, "step": 31300 }, { "epoch": 0.12649636186605365, "grad_norm": 1369.47265625, "learning_rate": 4.990303332730226e-05, "loss": 95.2965, "step": 31310 }, { "epoch": 0.1265367631314213, "grad_norm": 710.7479858398438, "learning_rate": 4.990272594052776e-05, "loss": 103.4021, "step": 31320 }, { "epoch": 0.1265771643967889, "grad_norm": 967.9599609375, "learning_rate": 4.990241806826179e-05, "loss": 55.237, "step": 31330 }, { "epoch": 0.12661756566215654, "grad_norm": 1026.2791748046875, "learning_rate": 4.990210971051037e-05, "loss": 76.5986, "step": 31340 }, { "epoch": 0.12665796692752418, "grad_norm": 1351.9713134765625, "learning_rate": 4.990180086727949e-05, "loss": 78.9581, "step": 31350 }, { "epoch": 0.1266983681928918, "grad_norm": 1424.3463134765625, "learning_rate": 4.9901491538575185e-05, "loss": 72.768, "step": 31360 }, { "epoch": 0.12673876945825943, "grad_norm": 251.47360229492188, "learning_rate": 4.990118172440348e-05, "loss": 97.3149, "step": 31370 }, { "epoch": 0.12677917072362707, "grad_norm": 1101.8797607421875, "learning_rate": 4.9900871424770424e-05, "loss": 113.3635, "step": 31380 }, { "epoch": 0.12681957198899468, "grad_norm": 3568.4501953125, "learning_rate": 4.9900560639682045e-05, "loss": 123.9855, "step": 31390 }, { "epoch": 0.12685997325436232, "grad_norm": 1042.5748291015625, "learning_rate": 4.9900249369144434e-05, "loss": 74.6795, "step": 31400 }, { "epoch": 0.12690037451972996, "grad_norm": 2803.200439453125, "learning_rate": 4.9899937613163635e-05, "loss": 122.4246, "step": 31410 }, { "epoch": 0.12694077578509758, "grad_norm": 999.986083984375, "learning_rate": 4.9899625371745726e-05, "loss": 103.829, "step": 31420 }, { "epoch": 0.12698117705046522, "grad_norm": 420.03607177734375, "learning_rate": 4.989931264489681e-05, "loss": 109.0738, "step": 31430 }, { "epoch": 0.12702157831583286, "grad_norm": 679.0191040039062, "learning_rate": 4.9898999432622974e-05, "loss": 63.1434, "step": 31440 }, { "epoch": 0.1270619795812005, "grad_norm": 1536.908203125, "learning_rate": 4.989868573493032e-05, "loss": 114.7305, "step": 31450 }, { "epoch": 0.1271023808465681, "grad_norm": 2571.055419921875, "learning_rate": 4.9898371551824974e-05, "loss": 91.3618, "step": 31460 }, { "epoch": 0.12714278211193575, "grad_norm": 1286.792724609375, "learning_rate": 4.989805688331306e-05, "loss": 96.6507, "step": 31470 }, { "epoch": 0.1271831833773034, "grad_norm": 1014.9740600585938, "learning_rate": 4.9897741729400705e-05, "loss": 82.7385, "step": 31480 }, { "epoch": 0.127223584642671, "grad_norm": 3557.023681640625, "learning_rate": 4.989742609009405e-05, "loss": 105.2578, "step": 31490 }, { "epoch": 0.12726398590803864, "grad_norm": 1261.7489013671875, "learning_rate": 4.989710996539926e-05, "loss": 110.2257, "step": 31500 }, { "epoch": 0.12730438717340628, "grad_norm": 620.3264770507812, "learning_rate": 4.9896793355322495e-05, "loss": 130.9313, "step": 31510 }, { "epoch": 0.1273447884387739, "grad_norm": 1063.13623046875, "learning_rate": 4.989647625986993e-05, "loss": 84.6815, "step": 31520 }, { "epoch": 0.12738518970414153, "grad_norm": 1258.741943359375, "learning_rate": 4.989615867904773e-05, "loss": 101.0329, "step": 31530 }, { "epoch": 0.12742559096950917, "grad_norm": 483.7554931640625, "learning_rate": 4.989584061286211e-05, "loss": 115.0386, "step": 31540 }, { "epoch": 0.12746599223487678, "grad_norm": 1390.3809814453125, "learning_rate": 4.989552206131925e-05, "loss": 95.0824, "step": 31550 }, { "epoch": 0.12750639350024442, "grad_norm": 474.9691162109375, "learning_rate": 4.9895203024425385e-05, "loss": 123.0268, "step": 31560 }, { "epoch": 0.12754679476561206, "grad_norm": 953.542236328125, "learning_rate": 4.989488350218671e-05, "loss": 132.4043, "step": 31570 }, { "epoch": 0.12758719603097968, "grad_norm": 602.45751953125, "learning_rate": 4.989456349460947e-05, "loss": 81.3055, "step": 31580 }, { "epoch": 0.12762759729634732, "grad_norm": 1298.2872314453125, "learning_rate": 4.989424300169989e-05, "loss": 85.7651, "step": 31590 }, { "epoch": 0.12766799856171496, "grad_norm": 539.2799072265625, "learning_rate": 4.9893922023464236e-05, "loss": 73.1652, "step": 31600 }, { "epoch": 0.1277083998270826, "grad_norm": 506.7164001464844, "learning_rate": 4.989360055990875e-05, "loss": 85.522, "step": 31610 }, { "epoch": 0.1277488010924502, "grad_norm": 1420.2366943359375, "learning_rate": 4.98932786110397e-05, "loss": 101.5808, "step": 31620 }, { "epoch": 0.12778920235781785, "grad_norm": 931.4535522460938, "learning_rate": 4.989295617686337e-05, "loss": 104.2542, "step": 31630 }, { "epoch": 0.1278296036231855, "grad_norm": 1208.561767578125, "learning_rate": 4.989263325738605e-05, "loss": 118.4887, "step": 31640 }, { "epoch": 0.1278700048885531, "grad_norm": 1164.8492431640625, "learning_rate": 4.989230985261403e-05, "loss": 133.7977, "step": 31650 }, { "epoch": 0.12791040615392074, "grad_norm": 2086.97900390625, "learning_rate": 4.9891985962553606e-05, "loss": 138.2947, "step": 31660 }, { "epoch": 0.12795080741928838, "grad_norm": 896.4519653320312, "learning_rate": 4.98916615872111e-05, "loss": 108.6655, "step": 31670 }, { "epoch": 0.127991208684656, "grad_norm": 921.4371337890625, "learning_rate": 4.9891336726592844e-05, "loss": 72.2636, "step": 31680 }, { "epoch": 0.12803160995002363, "grad_norm": 2175.284423828125, "learning_rate": 4.989101138070516e-05, "loss": 108.8024, "step": 31690 }, { "epoch": 0.12807201121539127, "grad_norm": 456.76068115234375, "learning_rate": 4.989068554955439e-05, "loss": 124.3368, "step": 31700 }, { "epoch": 0.12811241248075889, "grad_norm": 2323.61962890625, "learning_rate": 4.9890359233146897e-05, "loss": 83.3551, "step": 31710 }, { "epoch": 0.12815281374612653, "grad_norm": 630.8578491210938, "learning_rate": 4.989003243148904e-05, "loss": 112.8626, "step": 31720 }, { "epoch": 0.12819321501149417, "grad_norm": 1055.7919921875, "learning_rate": 4.988970514458718e-05, "loss": 85.2178, "step": 31730 }, { "epoch": 0.12823361627686178, "grad_norm": 4026.397705078125, "learning_rate": 4.9889377372447706e-05, "loss": 136.7049, "step": 31740 }, { "epoch": 0.12827401754222942, "grad_norm": 1399.34130859375, "learning_rate": 4.9889049115077005e-05, "loss": 90.4039, "step": 31750 }, { "epoch": 0.12831441880759706, "grad_norm": 718.3490600585938, "learning_rate": 4.988872037248148e-05, "loss": 122.6609, "step": 31760 }, { "epoch": 0.1283548200729647, "grad_norm": 1984.33740234375, "learning_rate": 4.988839114466753e-05, "loss": 118.7678, "step": 31770 }, { "epoch": 0.1283952213383323, "grad_norm": 732.53662109375, "learning_rate": 4.988806143164159e-05, "loss": 59.4425, "step": 31780 }, { "epoch": 0.12843562260369995, "grad_norm": 559.675537109375, "learning_rate": 4.988773123341007e-05, "loss": 67.2548, "step": 31790 }, { "epoch": 0.1284760238690676, "grad_norm": 1089.072998046875, "learning_rate": 4.988740054997943e-05, "loss": 96.6428, "step": 31800 }, { "epoch": 0.1285164251344352, "grad_norm": 2197.211181640625, "learning_rate": 4.9887069381356094e-05, "loss": 116.156, "step": 31810 }, { "epoch": 0.12855682639980284, "grad_norm": 459.337158203125, "learning_rate": 4.988673772754653e-05, "loss": 105.2984, "step": 31820 }, { "epoch": 0.12859722766517048, "grad_norm": 1142.6385498046875, "learning_rate": 4.98864055885572e-05, "loss": 97.5586, "step": 31830 }, { "epoch": 0.1286376289305381, "grad_norm": 952.4442749023438, "learning_rate": 4.988607296439458e-05, "loss": 112.1516, "step": 31840 }, { "epoch": 0.12867803019590573, "grad_norm": 633.4367065429688, "learning_rate": 4.988573985506516e-05, "loss": 82.8759, "step": 31850 }, { "epoch": 0.12871843146127337, "grad_norm": 341.5099792480469, "learning_rate": 4.988540626057543e-05, "loss": 79.2266, "step": 31860 }, { "epoch": 0.128758832726641, "grad_norm": 521.6610717773438, "learning_rate": 4.988507218093189e-05, "loss": 74.551, "step": 31870 }, { "epoch": 0.12879923399200863, "grad_norm": 848.8890991210938, "learning_rate": 4.988473761614105e-05, "loss": 68.6997, "step": 31880 }, { "epoch": 0.12883963525737627, "grad_norm": 1142.2823486328125, "learning_rate": 4.9884402566209445e-05, "loss": 84.3612, "step": 31890 }, { "epoch": 0.12888003652274388, "grad_norm": 1104.7357177734375, "learning_rate": 4.98840670311436e-05, "loss": 114.3409, "step": 31900 }, { "epoch": 0.12892043778811152, "grad_norm": 1430.170654296875, "learning_rate": 4.988373101095006e-05, "loss": 66.6711, "step": 31910 }, { "epoch": 0.12896083905347916, "grad_norm": 962.3909301757812, "learning_rate": 4.9883394505635364e-05, "loss": 103.3977, "step": 31920 }, { "epoch": 0.1290012403188468, "grad_norm": 1590.525634765625, "learning_rate": 4.988305751520609e-05, "loss": 107.3967, "step": 31930 }, { "epoch": 0.1290416415842144, "grad_norm": 373.5509033203125, "learning_rate": 4.988272003966879e-05, "loss": 61.0093, "step": 31940 }, { "epoch": 0.12908204284958205, "grad_norm": 845.4537353515625, "learning_rate": 4.9882382079030064e-05, "loss": 87.1438, "step": 31950 }, { "epoch": 0.1291224441149497, "grad_norm": 945.7670288085938, "learning_rate": 4.988204363329648e-05, "loss": 67.9447, "step": 31960 }, { "epoch": 0.1291628453803173, "grad_norm": 398.878173828125, "learning_rate": 4.988170470247465e-05, "loss": 81.1569, "step": 31970 }, { "epoch": 0.12920324664568494, "grad_norm": 329.99859619140625, "learning_rate": 4.988136528657118e-05, "loss": 59.9264, "step": 31980 }, { "epoch": 0.12924364791105258, "grad_norm": 875.9232177734375, "learning_rate": 4.988102538559268e-05, "loss": 118.8401, "step": 31990 }, { "epoch": 0.1292840491764202, "grad_norm": 1466.2398681640625, "learning_rate": 4.988068499954578e-05, "loss": 106.1032, "step": 32000 }, { "epoch": 0.12932445044178784, "grad_norm": 1423.832763671875, "learning_rate": 4.9880344128437115e-05, "loss": 138.5585, "step": 32010 }, { "epoch": 0.12936485170715548, "grad_norm": 2598.307373046875, "learning_rate": 4.988000277227334e-05, "loss": 115.3402, "step": 32020 }, { "epoch": 0.1294052529725231, "grad_norm": 1134.399169921875, "learning_rate": 4.987966093106109e-05, "loss": 90.1487, "step": 32030 }, { "epoch": 0.12944565423789073, "grad_norm": 765.7633056640625, "learning_rate": 4.987931860480705e-05, "loss": 100.8343, "step": 32040 }, { "epoch": 0.12948605550325837, "grad_norm": 1252.3284912109375, "learning_rate": 4.987897579351788e-05, "loss": 78.8595, "step": 32050 }, { "epoch": 0.12952645676862598, "grad_norm": 4336.3466796875, "learning_rate": 4.987863249720027e-05, "loss": 95.2753, "step": 32060 }, { "epoch": 0.12956685803399362, "grad_norm": 1328.06396484375, "learning_rate": 4.987828871586091e-05, "loss": 101.3385, "step": 32070 }, { "epoch": 0.12960725929936126, "grad_norm": 520.3338012695312, "learning_rate": 4.987794444950651e-05, "loss": 153.1254, "step": 32080 }, { "epoch": 0.1296476605647289, "grad_norm": 694.16015625, "learning_rate": 4.987759969814377e-05, "loss": 93.5572, "step": 32090 }, { "epoch": 0.1296880618300965, "grad_norm": 1401.1295166015625, "learning_rate": 4.987725446177941e-05, "loss": 93.8664, "step": 32100 }, { "epoch": 0.12972846309546415, "grad_norm": 928.5399780273438, "learning_rate": 4.9876908740420175e-05, "loss": 74.3067, "step": 32110 }, { "epoch": 0.1297688643608318, "grad_norm": 1556.6954345703125, "learning_rate": 4.98765625340728e-05, "loss": 85.9339, "step": 32120 }, { "epoch": 0.1298092656261994, "grad_norm": 783.3317260742188, "learning_rate": 4.987621584274402e-05, "loss": 85.9349, "step": 32130 }, { "epoch": 0.12984966689156704, "grad_norm": 423.84014892578125, "learning_rate": 4.9875868666440604e-05, "loss": 49.8796, "step": 32140 }, { "epoch": 0.12989006815693468, "grad_norm": 806.3920288085938, "learning_rate": 4.987552100516933e-05, "loss": 66.9737, "step": 32150 }, { "epoch": 0.1299304694223023, "grad_norm": 974.2014770507812, "learning_rate": 4.987517285893697e-05, "loss": 131.7362, "step": 32160 }, { "epoch": 0.12997087068766994, "grad_norm": 3982.545654296875, "learning_rate": 4.9874824227750305e-05, "loss": 130.7934, "step": 32170 }, { "epoch": 0.13001127195303758, "grad_norm": 936.3251953125, "learning_rate": 4.987447511161612e-05, "loss": 86.1245, "step": 32180 }, { "epoch": 0.1300516732184052, "grad_norm": 1825.6453857421875, "learning_rate": 4.987412551054126e-05, "loss": 95.4473, "step": 32190 }, { "epoch": 0.13009207448377283, "grad_norm": 1493.9964599609375, "learning_rate": 4.987377542453251e-05, "loss": 78.5434, "step": 32200 }, { "epoch": 0.13013247574914047, "grad_norm": 748.4346923828125, "learning_rate": 4.9873424853596695e-05, "loss": 87.2075, "step": 32210 }, { "epoch": 0.13017287701450808, "grad_norm": 1414.2127685546875, "learning_rate": 4.987307379774066e-05, "loss": 95.8589, "step": 32220 }, { "epoch": 0.13021327827987572, "grad_norm": 799.97705078125, "learning_rate": 4.987272225697125e-05, "loss": 94.7114, "step": 32230 }, { "epoch": 0.13025367954524336, "grad_norm": 2368.305419921875, "learning_rate": 4.987237023129531e-05, "loss": 93.2615, "step": 32240 }, { "epoch": 0.130294080810611, "grad_norm": 525.53759765625, "learning_rate": 4.987201772071971e-05, "loss": 100.8188, "step": 32250 }, { "epoch": 0.1303344820759786, "grad_norm": 1076.550048828125, "learning_rate": 4.9871664725251314e-05, "loss": 59.2903, "step": 32260 }, { "epoch": 0.13037488334134625, "grad_norm": 1305.9718017578125, "learning_rate": 4.987131124489701e-05, "loss": 62.7737, "step": 32270 }, { "epoch": 0.1304152846067139, "grad_norm": 1377.45458984375, "learning_rate": 4.98709572796637e-05, "loss": 98.2218, "step": 32280 }, { "epoch": 0.1304556858720815, "grad_norm": 1250.3414306640625, "learning_rate": 4.987060282955826e-05, "loss": 141.3949, "step": 32290 }, { "epoch": 0.13049608713744915, "grad_norm": 362.1710205078125, "learning_rate": 4.987024789458762e-05, "loss": 56.7362, "step": 32300 }, { "epoch": 0.13053648840281679, "grad_norm": 1179.3179931640625, "learning_rate": 4.9869892474758694e-05, "loss": 110.4925, "step": 32310 }, { "epoch": 0.1305768896681844, "grad_norm": 885.8596801757812, "learning_rate": 4.986953657007841e-05, "loss": 103.9362, "step": 32320 }, { "epoch": 0.13061729093355204, "grad_norm": 2053.674072265625, "learning_rate": 4.98691801805537e-05, "loss": 108.4903, "step": 32330 }, { "epoch": 0.13065769219891968, "grad_norm": 628.6939086914062, "learning_rate": 4.986882330619152e-05, "loss": 66.5057, "step": 32340 }, { "epoch": 0.1306980934642873, "grad_norm": 9528.09765625, "learning_rate": 4.986846594699883e-05, "loss": 103.5715, "step": 32350 }, { "epoch": 0.13073849472965493, "grad_norm": 0.0, "learning_rate": 4.9868108102982604e-05, "loss": 121.9924, "step": 32360 }, { "epoch": 0.13077889599502257, "grad_norm": 710.1858520507812, "learning_rate": 4.986774977414979e-05, "loss": 65.1207, "step": 32370 }, { "epoch": 0.13081929726039018, "grad_norm": 474.6985778808594, "learning_rate": 4.98673909605074e-05, "loss": 102.3303, "step": 32380 }, { "epoch": 0.13085969852575782, "grad_norm": 0.0, "learning_rate": 4.986703166206242e-05, "loss": 75.2703, "step": 32390 }, { "epoch": 0.13090009979112546, "grad_norm": 781.9612426757812, "learning_rate": 4.986667187882186e-05, "loss": 115.7585, "step": 32400 }, { "epoch": 0.1309405010564931, "grad_norm": 1456.1153564453125, "learning_rate": 4.986631161079272e-05, "loss": 119.8207, "step": 32410 }, { "epoch": 0.13098090232186071, "grad_norm": 767.140625, "learning_rate": 4.986595085798204e-05, "loss": 79.3004, "step": 32420 }, { "epoch": 0.13102130358722835, "grad_norm": 1090.2476806640625, "learning_rate": 4.9865589620396837e-05, "loss": 104.849, "step": 32430 }, { "epoch": 0.131061704852596, "grad_norm": 618.4625854492188, "learning_rate": 4.986522789804417e-05, "loss": 88.606, "step": 32440 }, { "epoch": 0.1311021061179636, "grad_norm": 1400.4354248046875, "learning_rate": 4.9864865690931086e-05, "loss": 112.4108, "step": 32450 }, { "epoch": 0.13114250738333125, "grad_norm": 1226.9744873046875, "learning_rate": 4.986450299906464e-05, "loss": 110.0555, "step": 32460 }, { "epoch": 0.1311829086486989, "grad_norm": 1217.69970703125, "learning_rate": 4.9864139822451905e-05, "loss": 105.7761, "step": 32470 }, { "epoch": 0.1312233099140665, "grad_norm": 944.0647583007812, "learning_rate": 4.9863776161099964e-05, "loss": 68.9442, "step": 32480 }, { "epoch": 0.13126371117943414, "grad_norm": 1241.845458984375, "learning_rate": 4.986341201501591e-05, "loss": 95.3951, "step": 32490 }, { "epoch": 0.13130411244480178, "grad_norm": 1810.86572265625, "learning_rate": 4.9863047384206835e-05, "loss": 132.1521, "step": 32500 }, { "epoch": 0.1313445137101694, "grad_norm": 1479.045166015625, "learning_rate": 4.986268226867985e-05, "loss": 109.1159, "step": 32510 }, { "epoch": 0.13138491497553703, "grad_norm": 958.012451171875, "learning_rate": 4.986231666844208e-05, "loss": 84.0408, "step": 32520 }, { "epoch": 0.13142531624090467, "grad_norm": 1029.1376953125, "learning_rate": 4.9861950583500636e-05, "loss": 161.0161, "step": 32530 }, { "epoch": 0.13146571750627228, "grad_norm": 2178.518798828125, "learning_rate": 4.986158401386268e-05, "loss": 125.5231, "step": 32540 }, { "epoch": 0.13150611877163992, "grad_norm": 873.2677612304688, "learning_rate": 4.9861216959535335e-05, "loss": 82.9297, "step": 32550 }, { "epoch": 0.13154652003700756, "grad_norm": 3118.9599609375, "learning_rate": 4.9860849420525766e-05, "loss": 133.3087, "step": 32560 }, { "epoch": 0.1315869213023752, "grad_norm": 342.8658752441406, "learning_rate": 4.986048139684114e-05, "loss": 77.4298, "step": 32570 }, { "epoch": 0.13162732256774282, "grad_norm": 1096.45458984375, "learning_rate": 4.986011288848863e-05, "loss": 64.3582, "step": 32580 }, { "epoch": 0.13166772383311046, "grad_norm": 802.4067993164062, "learning_rate": 4.9859743895475416e-05, "loss": 106.3808, "step": 32590 }, { "epoch": 0.1317081250984781, "grad_norm": 1518.53173828125, "learning_rate": 4.98593744178087e-05, "loss": 86.2783, "step": 32600 }, { "epoch": 0.1317485263638457, "grad_norm": 662.360107421875, "learning_rate": 4.985900445549568e-05, "loss": 127.7222, "step": 32610 }, { "epoch": 0.13178892762921335, "grad_norm": 1433.412109375, "learning_rate": 4.985863400854358e-05, "loss": 109.9828, "step": 32620 }, { "epoch": 0.131829328894581, "grad_norm": 524.9939575195312, "learning_rate": 4.98582630769596e-05, "loss": 114.5297, "step": 32630 }, { "epoch": 0.1318697301599486, "grad_norm": 432.99395751953125, "learning_rate": 4.9857891660750986e-05, "loss": 74.3833, "step": 32640 }, { "epoch": 0.13191013142531624, "grad_norm": 1059.305908203125, "learning_rate": 4.9857519759924974e-05, "loss": 103.7896, "step": 32650 }, { "epoch": 0.13195053269068388, "grad_norm": 1521.5408935546875, "learning_rate": 4.985714737448882e-05, "loss": 110.8204, "step": 32660 }, { "epoch": 0.1319909339560515, "grad_norm": 712.7562255859375, "learning_rate": 4.9856774504449776e-05, "loss": 77.0656, "step": 32670 }, { "epoch": 0.13203133522141913, "grad_norm": 738.4374389648438, "learning_rate": 4.9856401149815126e-05, "loss": 76.575, "step": 32680 }, { "epoch": 0.13207173648678677, "grad_norm": 521.0355834960938, "learning_rate": 4.9856027310592134e-05, "loss": 79.7847, "step": 32690 }, { "epoch": 0.13211213775215438, "grad_norm": 738.5291137695312, "learning_rate": 4.985565298678809e-05, "loss": 74.041, "step": 32700 }, { "epoch": 0.13215253901752202, "grad_norm": 1271.053955078125, "learning_rate": 4.985527817841029e-05, "loss": 87.5718, "step": 32710 }, { "epoch": 0.13219294028288966, "grad_norm": 1135.2099609375, "learning_rate": 4.985490288546606e-05, "loss": 121.2673, "step": 32720 }, { "epoch": 0.1322333415482573, "grad_norm": 866.3362426757812, "learning_rate": 4.9854527107962686e-05, "loss": 107.6865, "step": 32730 }, { "epoch": 0.13227374281362492, "grad_norm": 1969.1063232421875, "learning_rate": 4.985415084590752e-05, "loss": 79.8334, "step": 32740 }, { "epoch": 0.13231414407899256, "grad_norm": 1149.1275634765625, "learning_rate": 4.985377409930789e-05, "loss": 109.8118, "step": 32750 }, { "epoch": 0.1323545453443602, "grad_norm": 756.9971313476562, "learning_rate": 4.985339686817113e-05, "loss": 118.7679, "step": 32760 }, { "epoch": 0.1323949466097278, "grad_norm": 795.5819702148438, "learning_rate": 4.9853019152504607e-05, "loss": 91.6352, "step": 32770 }, { "epoch": 0.13243534787509545, "grad_norm": 1227.4793701171875, "learning_rate": 4.9852640952315674e-05, "loss": 60.6576, "step": 32780 }, { "epoch": 0.1324757491404631, "grad_norm": 1278.319091796875, "learning_rate": 4.985226226761172e-05, "loss": 113.7857, "step": 32790 }, { "epoch": 0.1325161504058307, "grad_norm": 610.24560546875, "learning_rate": 4.985188309840012e-05, "loss": 78.6529, "step": 32800 }, { "epoch": 0.13255655167119834, "grad_norm": 1682.79736328125, "learning_rate": 4.9851503444688255e-05, "loss": 107.8541, "step": 32810 }, { "epoch": 0.13259695293656598, "grad_norm": 493.7384033203125, "learning_rate": 4.985112330648354e-05, "loss": 79.7233, "step": 32820 }, { "epoch": 0.1326373542019336, "grad_norm": 790.5695190429688, "learning_rate": 4.985074268379338e-05, "loss": 91.9399, "step": 32830 }, { "epoch": 0.13267775546730123, "grad_norm": 1110.0252685546875, "learning_rate": 4.985036157662521e-05, "loss": 160.5353, "step": 32840 }, { "epoch": 0.13271815673266887, "grad_norm": 1280.8482666015625, "learning_rate": 4.9849979984986426e-05, "loss": 139.881, "step": 32850 }, { "epoch": 0.13275855799803649, "grad_norm": 973.560302734375, "learning_rate": 4.98495979088845e-05, "loss": 124.0516, "step": 32860 }, { "epoch": 0.13279895926340413, "grad_norm": 496.84893798828125, "learning_rate": 4.9849215348326875e-05, "loss": 90.3565, "step": 32870 }, { "epoch": 0.13283936052877177, "grad_norm": 1748.6497802734375, "learning_rate": 4.984883230332099e-05, "loss": 84.9742, "step": 32880 }, { "epoch": 0.1328797617941394, "grad_norm": 1350.8636474609375, "learning_rate": 4.984844877387433e-05, "loss": 115.242, "step": 32890 }, { "epoch": 0.13292016305950702, "grad_norm": 776.8285522460938, "learning_rate": 4.984806475999437e-05, "loss": 77.2372, "step": 32900 }, { "epoch": 0.13296056432487466, "grad_norm": 863.73583984375, "learning_rate": 4.98476802616886e-05, "loss": 74.2184, "step": 32910 }, { "epoch": 0.1330009655902423, "grad_norm": 793.2598876953125, "learning_rate": 4.9847295278964514e-05, "loss": 67.6055, "step": 32920 }, { "epoch": 0.1330413668556099, "grad_norm": 1486.9317626953125, "learning_rate": 4.9846909811829604e-05, "loss": 127.2083, "step": 32930 }, { "epoch": 0.13308176812097755, "grad_norm": 773.4334716796875, "learning_rate": 4.984652386029139e-05, "loss": 116.8202, "step": 32940 }, { "epoch": 0.1331221693863452, "grad_norm": 1226.421630859375, "learning_rate": 4.984613742435742e-05, "loss": 112.299, "step": 32950 }, { "epoch": 0.1331625706517128, "grad_norm": 445.5863342285156, "learning_rate": 4.9845750504035195e-05, "loss": 87.6496, "step": 32960 }, { "epoch": 0.13320297191708044, "grad_norm": 894.6140747070312, "learning_rate": 4.984536309933227e-05, "loss": 95.4575, "step": 32970 }, { "epoch": 0.13324337318244808, "grad_norm": 823.8974609375, "learning_rate": 4.9844975210256217e-05, "loss": 93.921, "step": 32980 }, { "epoch": 0.1332837744478157, "grad_norm": 755.107177734375, "learning_rate": 4.984458683681457e-05, "loss": 84.3962, "step": 32990 }, { "epoch": 0.13332417571318333, "grad_norm": 885.6873779296875, "learning_rate": 4.984419797901491e-05, "loss": 105.6254, "step": 33000 }, { "epoch": 0.13336457697855097, "grad_norm": 411.86053466796875, "learning_rate": 4.984380863686482e-05, "loss": 97.5172, "step": 33010 }, { "epoch": 0.1334049782439186, "grad_norm": 563.2962646484375, "learning_rate": 4.98434188103719e-05, "loss": 80.259, "step": 33020 }, { "epoch": 0.13344537950928623, "grad_norm": 1379.732666015625, "learning_rate": 4.984302849954373e-05, "loss": 73.214, "step": 33030 }, { "epoch": 0.13348578077465387, "grad_norm": 750.8110961914062, "learning_rate": 4.984263770438793e-05, "loss": 106.9937, "step": 33040 }, { "epoch": 0.1335261820400215, "grad_norm": 1013.02392578125, "learning_rate": 4.984224642491212e-05, "loss": 75.5326, "step": 33050 }, { "epoch": 0.13356658330538912, "grad_norm": 1022.1004028320312, "learning_rate": 4.9841854661123936e-05, "loss": 108.553, "step": 33060 }, { "epoch": 0.13360698457075676, "grad_norm": 809.585205078125, "learning_rate": 4.9841462413030995e-05, "loss": 84.0129, "step": 33070 }, { "epoch": 0.1336473858361244, "grad_norm": 842.181884765625, "learning_rate": 4.984106968064095e-05, "loss": 112.834, "step": 33080 }, { "epoch": 0.133687787101492, "grad_norm": 467.4346618652344, "learning_rate": 4.984067646396147e-05, "loss": 100.528, "step": 33090 }, { "epoch": 0.13372818836685965, "grad_norm": 714.903076171875, "learning_rate": 4.984028276300021e-05, "loss": 129.6565, "step": 33100 }, { "epoch": 0.1337685896322273, "grad_norm": 709.3721923828125, "learning_rate": 4.983988857776486e-05, "loss": 101.3018, "step": 33110 }, { "epoch": 0.1338089908975949, "grad_norm": 988.5148315429688, "learning_rate": 4.983949390826308e-05, "loss": 86.796, "step": 33120 }, { "epoch": 0.13384939216296254, "grad_norm": 1472.410400390625, "learning_rate": 4.983909875450258e-05, "loss": 94.508, "step": 33130 }, { "epoch": 0.13388979342833018, "grad_norm": 900.3340454101562, "learning_rate": 4.983870311649107e-05, "loss": 95.947, "step": 33140 }, { "epoch": 0.1339301946936978, "grad_norm": 1466.88134765625, "learning_rate": 4.983830699423625e-05, "loss": 74.9536, "step": 33150 }, { "epoch": 0.13397059595906544, "grad_norm": 576.2639770507812, "learning_rate": 4.9837910387745845e-05, "loss": 115.5023, "step": 33160 }, { "epoch": 0.13401099722443308, "grad_norm": 1318.8853759765625, "learning_rate": 4.9837513297027594e-05, "loss": 84.5853, "step": 33170 }, { "epoch": 0.1340513984898007, "grad_norm": 934.3829345703125, "learning_rate": 4.983711572208924e-05, "loss": 75.3381, "step": 33180 }, { "epoch": 0.13409179975516833, "grad_norm": 1154.966796875, "learning_rate": 4.983671766293851e-05, "loss": 83.0238, "step": 33190 }, { "epoch": 0.13413220102053597, "grad_norm": 1403.0806884765625, "learning_rate": 4.983631911958319e-05, "loss": 82.2895, "step": 33200 }, { "epoch": 0.1341726022859036, "grad_norm": 1025.5306396484375, "learning_rate": 4.983592009203105e-05, "loss": 64.9099, "step": 33210 }, { "epoch": 0.13421300355127122, "grad_norm": 570.6565551757812, "learning_rate": 4.9835520580289854e-05, "loss": 60.6472, "step": 33220 }, { "epoch": 0.13425340481663886, "grad_norm": 241.88272094726562, "learning_rate": 4.98351205843674e-05, "loss": 101.3504, "step": 33230 }, { "epoch": 0.1342938060820065, "grad_norm": 786.84912109375, "learning_rate": 4.9834720104271484e-05, "loss": 80.2996, "step": 33240 }, { "epoch": 0.1343342073473741, "grad_norm": 1382.0052490234375, "learning_rate": 4.983431914000991e-05, "loss": 90.2512, "step": 33250 }, { "epoch": 0.13437460861274175, "grad_norm": 1485.1060791015625, "learning_rate": 4.9833917691590506e-05, "loss": 80.0044, "step": 33260 }, { "epoch": 0.1344150098781094, "grad_norm": 698.7577514648438, "learning_rate": 4.9833515759021085e-05, "loss": 102.9575, "step": 33270 }, { "epoch": 0.134455411143477, "grad_norm": 693.4671020507812, "learning_rate": 4.98331133423095e-05, "loss": 94.7776, "step": 33280 }, { "epoch": 0.13449581240884464, "grad_norm": 664.5126342773438, "learning_rate": 4.983271044146357e-05, "loss": 85.3929, "step": 33290 }, { "epoch": 0.13453621367421228, "grad_norm": 841.61962890625, "learning_rate": 4.983230705649118e-05, "loss": 92.7831, "step": 33300 }, { "epoch": 0.1345766149395799, "grad_norm": 1524.328125, "learning_rate": 4.9831903187400166e-05, "loss": 85.1477, "step": 33310 }, { "epoch": 0.13461701620494754, "grad_norm": 551.5435180664062, "learning_rate": 4.983149883419842e-05, "loss": 128.3945, "step": 33320 }, { "epoch": 0.13465741747031518, "grad_norm": 387.754638671875, "learning_rate": 4.983109399689382e-05, "loss": 75.982, "step": 33330 }, { "epoch": 0.1346978187356828, "grad_norm": 2052.11669921875, "learning_rate": 4.9830688675494265e-05, "loss": 166.9485, "step": 33340 }, { "epoch": 0.13473822000105043, "grad_norm": 656.3280639648438, "learning_rate": 4.9830282870007646e-05, "loss": 98.0935, "step": 33350 }, { "epoch": 0.13477862126641807, "grad_norm": 960.6358032226562, "learning_rate": 4.982987658044188e-05, "loss": 88.8502, "step": 33360 }, { "epoch": 0.1348190225317857, "grad_norm": 303.2107238769531, "learning_rate": 4.982946980680488e-05, "loss": 82.7864, "step": 33370 }, { "epoch": 0.13485942379715332, "grad_norm": 786.0392456054688, "learning_rate": 4.982906254910459e-05, "loss": 75.8299, "step": 33380 }, { "epoch": 0.13489982506252096, "grad_norm": 554.2486572265625, "learning_rate": 4.982865480734894e-05, "loss": 71.6516, "step": 33390 }, { "epoch": 0.1349402263278886, "grad_norm": 901.5972900390625, "learning_rate": 4.982824658154589e-05, "loss": 106.4935, "step": 33400 }, { "epoch": 0.1349806275932562, "grad_norm": 363.12115478515625, "learning_rate": 4.982783787170338e-05, "loss": 76.4278, "step": 33410 }, { "epoch": 0.13502102885862385, "grad_norm": 3618.558349609375, "learning_rate": 4.982742867782939e-05, "loss": 130.8146, "step": 33420 }, { "epoch": 0.1350614301239915, "grad_norm": 810.9340209960938, "learning_rate": 4.982701899993189e-05, "loss": 109.7431, "step": 33430 }, { "epoch": 0.1351018313893591, "grad_norm": 1115.0909423828125, "learning_rate": 4.982660883801889e-05, "loss": 97.727, "step": 33440 }, { "epoch": 0.13514223265472675, "grad_norm": 726.3258056640625, "learning_rate": 4.982619819209836e-05, "loss": 129.2327, "step": 33450 }, { "epoch": 0.13518263392009439, "grad_norm": 0.0, "learning_rate": 4.9825787062178315e-05, "loss": 62.2287, "step": 33460 }, { "epoch": 0.135223035185462, "grad_norm": 2895.0556640625, "learning_rate": 4.982537544826677e-05, "loss": 99.4745, "step": 33470 }, { "epoch": 0.13526343645082964, "grad_norm": 496.2646789550781, "learning_rate": 4.982496335037175e-05, "loss": 76.2467, "step": 33480 }, { "epoch": 0.13530383771619728, "grad_norm": 1723.678955078125, "learning_rate": 4.982455076850129e-05, "loss": 87.5003, "step": 33490 }, { "epoch": 0.1353442389815649, "grad_norm": 622.0546875, "learning_rate": 4.982413770266342e-05, "loss": 60.5188, "step": 33500 }, { "epoch": 0.13538464024693253, "grad_norm": 579.060302734375, "learning_rate": 4.9823724152866226e-05, "loss": 121.2273, "step": 33510 }, { "epoch": 0.13542504151230017, "grad_norm": 638.8704833984375, "learning_rate": 4.982331011911774e-05, "loss": 139.4159, "step": 33520 }, { "epoch": 0.13546544277766778, "grad_norm": 684.1176147460938, "learning_rate": 4.9822895601426034e-05, "loss": 107.1239, "step": 33530 }, { "epoch": 0.13550584404303542, "grad_norm": 626.8060302734375, "learning_rate": 4.982248059979921e-05, "loss": 86.9168, "step": 33540 }, { "epoch": 0.13554624530840306, "grad_norm": 539.0343017578125, "learning_rate": 4.982206511424534e-05, "loss": 69.0915, "step": 33550 }, { "epoch": 0.1355866465737707, "grad_norm": 2398.862548828125, "learning_rate": 4.9821649144772545e-05, "loss": 70.2374, "step": 33560 }, { "epoch": 0.13562704783913831, "grad_norm": 586.1361694335938, "learning_rate": 4.9821232691388906e-05, "loss": 70.2269, "step": 33570 }, { "epoch": 0.13566744910450595, "grad_norm": 850.7914428710938, "learning_rate": 4.982081575410256e-05, "loss": 113.3011, "step": 33580 }, { "epoch": 0.1357078503698736, "grad_norm": 642.4421997070312, "learning_rate": 4.9820398332921634e-05, "loss": 95.5486, "step": 33590 }, { "epoch": 0.1357482516352412, "grad_norm": 706.2947998046875, "learning_rate": 4.981998042785427e-05, "loss": 97.3085, "step": 33600 }, { "epoch": 0.13578865290060885, "grad_norm": 1023.1542358398438, "learning_rate": 4.98195620389086e-05, "loss": 117.8821, "step": 33610 }, { "epoch": 0.1358290541659765, "grad_norm": 480.221923828125, "learning_rate": 4.9819143166092796e-05, "loss": 93.8683, "step": 33620 }, { "epoch": 0.1358694554313441, "grad_norm": 520.1914672851562, "learning_rate": 4.9818723809415016e-05, "loss": 83.5975, "step": 33630 }, { "epoch": 0.13590985669671174, "grad_norm": 2167.59619140625, "learning_rate": 4.981830396888344e-05, "loss": 103.9532, "step": 33640 }, { "epoch": 0.13595025796207938, "grad_norm": 801.8353271484375, "learning_rate": 4.981788364450625e-05, "loss": 146.1724, "step": 33650 }, { "epoch": 0.135990659227447, "grad_norm": 862.556884765625, "learning_rate": 4.981746283629164e-05, "loss": 92.8243, "step": 33660 }, { "epoch": 0.13603106049281463, "grad_norm": 1226.8758544921875, "learning_rate": 4.981704154424781e-05, "loss": 107.6284, "step": 33670 }, { "epoch": 0.13607146175818227, "grad_norm": 1550.146484375, "learning_rate": 4.981661976838299e-05, "loss": 109.7946, "step": 33680 }, { "epoch": 0.13611186302354988, "grad_norm": 619.8020629882812, "learning_rate": 4.981619750870537e-05, "loss": 108.0941, "step": 33690 }, { "epoch": 0.13615226428891752, "grad_norm": 551.2567749023438, "learning_rate": 4.9815774765223226e-05, "loss": 75.5076, "step": 33700 }, { "epoch": 0.13619266555428516, "grad_norm": 1045.827880859375, "learning_rate": 4.9815351537944774e-05, "loss": 64.202, "step": 33710 }, { "epoch": 0.1362330668196528, "grad_norm": 435.85296630859375, "learning_rate": 4.9814927826878256e-05, "loss": 123.6437, "step": 33720 }, { "epoch": 0.13627346808502042, "grad_norm": 1007.7003784179688, "learning_rate": 4.9814503632031954e-05, "loss": 133.8719, "step": 33730 }, { "epoch": 0.13631386935038806, "grad_norm": 1092.2225341796875, "learning_rate": 4.981407895341412e-05, "loss": 91.8184, "step": 33740 }, { "epoch": 0.1363542706157557, "grad_norm": 656.14990234375, "learning_rate": 4.9813653791033057e-05, "loss": 68.1897, "step": 33750 }, { "epoch": 0.1363946718811233, "grad_norm": 1275.6177978515625, "learning_rate": 4.981322814489703e-05, "loss": 91.9896, "step": 33760 }, { "epoch": 0.13643507314649095, "grad_norm": 900.5950927734375, "learning_rate": 4.9812802015014334e-05, "loss": 90.7882, "step": 33770 }, { "epoch": 0.1364754744118586, "grad_norm": 0.0, "learning_rate": 4.981237540139331e-05, "loss": 62.0134, "step": 33780 }, { "epoch": 0.1365158756772262, "grad_norm": 1441.5318603515625, "learning_rate": 4.9811948304042234e-05, "loss": 75.5356, "step": 33790 }, { "epoch": 0.13655627694259384, "grad_norm": 783.706787109375, "learning_rate": 4.9811520722969465e-05, "loss": 109.1127, "step": 33800 }, { "epoch": 0.13659667820796148, "grad_norm": 1493.0281982421875, "learning_rate": 4.981109265818332e-05, "loss": 150.1899, "step": 33810 }, { "epoch": 0.1366370794733291, "grad_norm": 728.7564086914062, "learning_rate": 4.981066410969215e-05, "loss": 64.3308, "step": 33820 }, { "epoch": 0.13667748073869673, "grad_norm": 1312.0137939453125, "learning_rate": 4.981023507750431e-05, "loss": 119.9241, "step": 33830 }, { "epoch": 0.13671788200406437, "grad_norm": 805.4586181640625, "learning_rate": 4.980980556162816e-05, "loss": 107.8721, "step": 33840 }, { "epoch": 0.13675828326943198, "grad_norm": 609.840087890625, "learning_rate": 4.980937556207208e-05, "loss": 89.4216, "step": 33850 }, { "epoch": 0.13679868453479962, "grad_norm": 973.7212524414062, "learning_rate": 4.9808945078844456e-05, "loss": 81.5434, "step": 33860 }, { "epoch": 0.13683908580016726, "grad_norm": 982.0657348632812, "learning_rate": 4.9808514111953674e-05, "loss": 67.6982, "step": 33870 }, { "epoch": 0.1368794870655349, "grad_norm": 524.6974487304688, "learning_rate": 4.980808266140813e-05, "loss": 150.7908, "step": 33880 }, { "epoch": 0.13691988833090252, "grad_norm": 600.6156005859375, "learning_rate": 4.980765072721625e-05, "loss": 107.8641, "step": 33890 }, { "epoch": 0.13696028959627016, "grad_norm": 464.95550537109375, "learning_rate": 4.9807218309386444e-05, "loss": 81.4719, "step": 33900 }, { "epoch": 0.1370006908616378, "grad_norm": 810.7647094726562, "learning_rate": 4.980678540792715e-05, "loss": 91.2513, "step": 33910 }, { "epoch": 0.1370410921270054, "grad_norm": 904.2017211914062, "learning_rate": 4.980635202284679e-05, "loss": 128.9432, "step": 33920 }, { "epoch": 0.13708149339237305, "grad_norm": 751.0636596679688, "learning_rate": 4.980591815415384e-05, "loss": 67.3943, "step": 33930 }, { "epoch": 0.1371218946577407, "grad_norm": 999.1458740234375, "learning_rate": 4.980548380185674e-05, "loss": 122.1365, "step": 33940 }, { "epoch": 0.1371622959231083, "grad_norm": 1589.1033935546875, "learning_rate": 4.980504896596396e-05, "loss": 77.5396, "step": 33950 }, { "epoch": 0.13720269718847594, "grad_norm": 1546.7073974609375, "learning_rate": 4.980461364648398e-05, "loss": 117.3774, "step": 33960 }, { "epoch": 0.13724309845384358, "grad_norm": 920.711669921875, "learning_rate": 4.9804177843425295e-05, "loss": 112.9565, "step": 33970 }, { "epoch": 0.1372834997192112, "grad_norm": 1206.650146484375, "learning_rate": 4.980374155679639e-05, "loss": 111.5966, "step": 33980 }, { "epoch": 0.13732390098457883, "grad_norm": 1469.6834716796875, "learning_rate": 4.980330478660576e-05, "loss": 107.332, "step": 33990 }, { "epoch": 0.13736430224994647, "grad_norm": 1957.3115234375, "learning_rate": 4.980286753286195e-05, "loss": 128.1132, "step": 34000 }, { "epoch": 0.13740470351531409, "grad_norm": 337.71295166015625, "learning_rate": 4.9802429795573455e-05, "loss": 70.1459, "step": 34010 }, { "epoch": 0.13744510478068173, "grad_norm": 714.4182739257812, "learning_rate": 4.980199157474884e-05, "loss": 116.3985, "step": 34020 }, { "epoch": 0.13748550604604937, "grad_norm": 691.6640014648438, "learning_rate": 4.980155287039662e-05, "loss": 54.4459, "step": 34030 }, { "epoch": 0.137525907311417, "grad_norm": 1381.4683837890625, "learning_rate": 4.980111368252535e-05, "loss": 59.3446, "step": 34040 }, { "epoch": 0.13756630857678462, "grad_norm": 1003.1581420898438, "learning_rate": 4.9800674011143614e-05, "loss": 79.539, "step": 34050 }, { "epoch": 0.13760670984215226, "grad_norm": 705.7440795898438, "learning_rate": 4.980023385625996e-05, "loss": 104.7091, "step": 34060 }, { "epoch": 0.1376471111075199, "grad_norm": 655.6250610351562, "learning_rate": 4.979979321788298e-05, "loss": 65.6317, "step": 34070 }, { "epoch": 0.1376875123728875, "grad_norm": 741.8604125976562, "learning_rate": 4.9799352096021266e-05, "loss": 70.4658, "step": 34080 }, { "epoch": 0.13772791363825515, "grad_norm": 2940.106689453125, "learning_rate": 4.979891049068342e-05, "loss": 108.38, "step": 34090 }, { "epoch": 0.1377683149036228, "grad_norm": 924.1039428710938, "learning_rate": 4.979846840187804e-05, "loss": 69.7543, "step": 34100 }, { "epoch": 0.1378087161689904, "grad_norm": 937.28515625, "learning_rate": 4.979802582961375e-05, "loss": 97.4457, "step": 34110 }, { "epoch": 0.13784911743435804, "grad_norm": 1431.635498046875, "learning_rate": 4.979758277389919e-05, "loss": 91.3253, "step": 34120 }, { "epoch": 0.13788951869972568, "grad_norm": 953.5215454101562, "learning_rate": 4.9797139234742975e-05, "loss": 81.4589, "step": 34130 }, { "epoch": 0.1379299199650933, "grad_norm": 4141.5361328125, "learning_rate": 4.9796695212153764e-05, "loss": 154.3838, "step": 34140 }, { "epoch": 0.13797032123046093, "grad_norm": 779.7581176757812, "learning_rate": 4.9796250706140224e-05, "loss": 69.3488, "step": 34150 }, { "epoch": 0.13801072249582857, "grad_norm": 778.5997314453125, "learning_rate": 4.9795805716711e-05, "loss": 135.2858, "step": 34160 }, { "epoch": 0.1380511237611962, "grad_norm": 990.5801391601562, "learning_rate": 4.979536024387479e-05, "loss": 108.3737, "step": 34170 }, { "epoch": 0.13809152502656383, "grad_norm": 715.4221801757812, "learning_rate": 4.979491428764026e-05, "loss": 95.1732, "step": 34180 }, { "epoch": 0.13813192629193147, "grad_norm": 2391.443115234375, "learning_rate": 4.979446784801611e-05, "loss": 151.664, "step": 34190 }, { "epoch": 0.1381723275572991, "grad_norm": 1992.6207275390625, "learning_rate": 4.9794020925011044e-05, "loss": 82.8294, "step": 34200 }, { "epoch": 0.13821272882266672, "grad_norm": 1085.1295166015625, "learning_rate": 4.979357351863377e-05, "loss": 116.4222, "step": 34210 }, { "epoch": 0.13825313008803436, "grad_norm": 607.43603515625, "learning_rate": 4.979312562889302e-05, "loss": 71.7245, "step": 34220 }, { "epoch": 0.138293531353402, "grad_norm": 782.8214111328125, "learning_rate": 4.9792677255797525e-05, "loss": 93.9537, "step": 34230 }, { "epoch": 0.1383339326187696, "grad_norm": 735.2789306640625, "learning_rate": 4.979222839935602e-05, "loss": 71.7033, "step": 34240 }, { "epoch": 0.13837433388413725, "grad_norm": 1233.6981201171875, "learning_rate": 4.979177905957726e-05, "loss": 90.7622, "step": 34250 }, { "epoch": 0.1384147351495049, "grad_norm": 567.0103149414062, "learning_rate": 4.979132923647001e-05, "loss": 135.1103, "step": 34260 }, { "epoch": 0.1384551364148725, "grad_norm": 594.86669921875, "learning_rate": 4.979087893004302e-05, "loss": 82.0568, "step": 34270 }, { "epoch": 0.13849553768024014, "grad_norm": 900.0689086914062, "learning_rate": 4.979042814030509e-05, "loss": 91.8436, "step": 34280 }, { "epoch": 0.13853593894560778, "grad_norm": 0.0, "learning_rate": 4.9789976867265e-05, "loss": 67.3411, "step": 34290 }, { "epoch": 0.1385763402109754, "grad_norm": 1077.3468017578125, "learning_rate": 4.9789525110931545e-05, "loss": 119.6317, "step": 34300 }, { "epoch": 0.13861674147634304, "grad_norm": 2667.85400390625, "learning_rate": 4.978907287131354e-05, "loss": 95.113, "step": 34310 }, { "epoch": 0.13865714274171068, "grad_norm": 1062.1884765625, "learning_rate": 4.978862014841979e-05, "loss": 77.8131, "step": 34320 }, { "epoch": 0.1386975440070783, "grad_norm": 3490.418701171875, "learning_rate": 4.9788166942259135e-05, "loss": 124.19, "step": 34330 }, { "epoch": 0.13873794527244593, "grad_norm": 351.10784912109375, "learning_rate": 4.97877132528404e-05, "loss": 100.0667, "step": 34340 }, { "epoch": 0.13877834653781357, "grad_norm": 901.481201171875, "learning_rate": 4.978725908017243e-05, "loss": 80.7147, "step": 34350 }, { "epoch": 0.1388187478031812, "grad_norm": 575.747802734375, "learning_rate": 4.9786804424264085e-05, "loss": 107.6961, "step": 34360 }, { "epoch": 0.13885914906854882, "grad_norm": 816.9763793945312, "learning_rate": 4.9786349285124235e-05, "loss": 104.7212, "step": 34370 }, { "epoch": 0.13889955033391646, "grad_norm": 672.7227783203125, "learning_rate": 4.978589366276174e-05, "loss": 95.8187, "step": 34380 }, { "epoch": 0.1389399515992841, "grad_norm": 1123.2491455078125, "learning_rate": 4.978543755718549e-05, "loss": 97.5948, "step": 34390 }, { "epoch": 0.1389803528646517, "grad_norm": 741.401611328125, "learning_rate": 4.978498096840436e-05, "loss": 77.3318, "step": 34400 }, { "epoch": 0.13902075413001935, "grad_norm": 2102.46923828125, "learning_rate": 4.978452389642728e-05, "loss": 80.4057, "step": 34410 }, { "epoch": 0.139061155395387, "grad_norm": 1185.681396484375, "learning_rate": 4.978406634126315e-05, "loss": 105.0425, "step": 34420 }, { "epoch": 0.1391015566607546, "grad_norm": 1234.7724609375, "learning_rate": 4.9783608302920873e-05, "loss": 94.0603, "step": 34430 }, { "epoch": 0.13914195792612225, "grad_norm": 1568.1796875, "learning_rate": 4.9783149781409404e-05, "loss": 123.3275, "step": 34440 }, { "epoch": 0.13918235919148988, "grad_norm": 1667.6737060546875, "learning_rate": 4.978269077673767e-05, "loss": 62.5816, "step": 34450 }, { "epoch": 0.1392227604568575, "grad_norm": 880.3914794921875, "learning_rate": 4.9782231288914614e-05, "loss": 80.6274, "step": 34460 }, { "epoch": 0.13926316172222514, "grad_norm": 473.5460510253906, "learning_rate": 4.97817713179492e-05, "loss": 136.4202, "step": 34470 }, { "epoch": 0.13930356298759278, "grad_norm": 1051.316162109375, "learning_rate": 4.9781310863850405e-05, "loss": 79.2705, "step": 34480 }, { "epoch": 0.1393439642529604, "grad_norm": 942.3121948242188, "learning_rate": 4.978084992662719e-05, "loss": 87.1078, "step": 34490 }, { "epoch": 0.13938436551832803, "grad_norm": 663.4690551757812, "learning_rate": 4.978038850628854e-05, "loss": 87.4095, "step": 34500 }, { "epoch": 0.13942476678369567, "grad_norm": 469.06475830078125, "learning_rate": 4.977992660284347e-05, "loss": 92.2786, "step": 34510 }, { "epoch": 0.1394651680490633, "grad_norm": 452.9181213378906, "learning_rate": 4.977946421630098e-05, "loss": 109.0747, "step": 34520 }, { "epoch": 0.13950556931443092, "grad_norm": 3418.622314453125, "learning_rate": 4.977900134667006e-05, "loss": 92.9968, "step": 34530 }, { "epoch": 0.13954597057979856, "grad_norm": 934.139892578125, "learning_rate": 4.977853799395976e-05, "loss": 86.7001, "step": 34540 }, { "epoch": 0.1395863718451662, "grad_norm": 541.4644165039062, "learning_rate": 4.97780741581791e-05, "loss": 125.5777, "step": 34550 }, { "epoch": 0.13962677311053381, "grad_norm": 1703.493896484375, "learning_rate": 4.977760983933714e-05, "loss": 113.32, "step": 34560 }, { "epoch": 0.13966717437590145, "grad_norm": 469.1068115234375, "learning_rate": 4.9777145037442906e-05, "loss": 73.509, "step": 34570 }, { "epoch": 0.1397075756412691, "grad_norm": 445.8856201171875, "learning_rate": 4.9776679752505476e-05, "loss": 72.2943, "step": 34580 }, { "epoch": 0.1397479769066367, "grad_norm": 636.2007446289062, "learning_rate": 4.977621398453393e-05, "loss": 91.6618, "step": 34590 }, { "epoch": 0.13978837817200435, "grad_norm": 727.9812622070312, "learning_rate": 4.977574773353732e-05, "loss": 101.3671, "step": 34600 }, { "epoch": 0.13982877943737199, "grad_norm": 1291.48974609375, "learning_rate": 4.9775280999524766e-05, "loss": 75.3459, "step": 34610 }, { "epoch": 0.1398691807027396, "grad_norm": 975.12109375, "learning_rate": 4.9774813782505346e-05, "loss": 108.9374, "step": 34620 }, { "epoch": 0.13990958196810724, "grad_norm": 1339.5286865234375, "learning_rate": 4.9774346082488176e-05, "loss": 62.3226, "step": 34630 }, { "epoch": 0.13994998323347488, "grad_norm": 1278.2696533203125, "learning_rate": 4.977387789948238e-05, "loss": 111.346, "step": 34640 }, { "epoch": 0.1399903844988425, "grad_norm": 582.800537109375, "learning_rate": 4.977340923349707e-05, "loss": 59.1344, "step": 34650 }, { "epoch": 0.14003078576421013, "grad_norm": 980.9688110351562, "learning_rate": 4.9772940084541405e-05, "loss": 116.2872, "step": 34660 }, { "epoch": 0.14007118702957777, "grad_norm": 600.323486328125, "learning_rate": 4.9772470452624506e-05, "loss": 98.4954, "step": 34670 }, { "epoch": 0.1401115882949454, "grad_norm": 1239.6783447265625, "learning_rate": 4.977200033775555e-05, "loss": 108.7115, "step": 34680 }, { "epoch": 0.14015198956031302, "grad_norm": 1736.9547119140625, "learning_rate": 4.97715297399437e-05, "loss": 134.5554, "step": 34690 }, { "epoch": 0.14019239082568066, "grad_norm": 2366.77490234375, "learning_rate": 4.977105865919812e-05, "loss": 127.1646, "step": 34700 }, { "epoch": 0.1402327920910483, "grad_norm": 1988.361083984375, "learning_rate": 4.9770587095527995e-05, "loss": 107.3173, "step": 34710 }, { "epoch": 0.14027319335641592, "grad_norm": 632.1370239257812, "learning_rate": 4.977011504894252e-05, "loss": 84.2821, "step": 34720 }, { "epoch": 0.14031359462178356, "grad_norm": 1778.7666015625, "learning_rate": 4.9769642519450904e-05, "loss": 83.9897, "step": 34730 }, { "epoch": 0.1403539958871512, "grad_norm": 652.8768310546875, "learning_rate": 4.9769169507062355e-05, "loss": 102.6689, "step": 34740 }, { "epoch": 0.1403943971525188, "grad_norm": 1475.0391845703125, "learning_rate": 4.976869601178609e-05, "loss": 117.3591, "step": 34750 }, { "epoch": 0.14043479841788645, "grad_norm": 1975.065673828125, "learning_rate": 4.976822203363135e-05, "loss": 98.2487, "step": 34760 }, { "epoch": 0.1404751996832541, "grad_norm": 1050.9573974609375, "learning_rate": 4.976774757260737e-05, "loss": 94.4713, "step": 34770 }, { "epoch": 0.1405156009486217, "grad_norm": 1176.8922119140625, "learning_rate": 4.9767272628723396e-05, "loss": 99.8589, "step": 34780 }, { "epoch": 0.14055600221398934, "grad_norm": 1319.9659423828125, "learning_rate": 4.976679720198869e-05, "loss": 100.7659, "step": 34790 }, { "epoch": 0.14059640347935698, "grad_norm": 515.001220703125, "learning_rate": 4.976632129241252e-05, "loss": 77.551, "step": 34800 }, { "epoch": 0.1406368047447246, "grad_norm": 822.0165405273438, "learning_rate": 4.9765844900004176e-05, "loss": 82.4817, "step": 34810 }, { "epoch": 0.14067720601009223, "grad_norm": 1176.460693359375, "learning_rate": 4.976536802477293e-05, "loss": 116.3207, "step": 34820 }, { "epoch": 0.14071760727545987, "grad_norm": 1214.6893310546875, "learning_rate": 4.976489066672808e-05, "loss": 90.3449, "step": 34830 }, { "epoch": 0.1407580085408275, "grad_norm": 610.4821166992188, "learning_rate": 4.9764412825878943e-05, "loss": 104.2971, "step": 34840 }, { "epoch": 0.14079840980619512, "grad_norm": 1629.2425537109375, "learning_rate": 4.976393450223482e-05, "loss": 100.4276, "step": 34850 }, { "epoch": 0.14083881107156276, "grad_norm": 1354.5982666015625, "learning_rate": 4.9763455695805056e-05, "loss": 87.7344, "step": 34860 }, { "epoch": 0.1408792123369304, "grad_norm": 569.3548583984375, "learning_rate": 4.976297640659897e-05, "loss": 72.0556, "step": 34870 }, { "epoch": 0.14091961360229802, "grad_norm": 1153.3983154296875, "learning_rate": 4.97624966346259e-05, "loss": 76.03, "step": 34880 }, { "epoch": 0.14096001486766566, "grad_norm": 550.9409790039062, "learning_rate": 4.9762016379895225e-05, "loss": 106.2172, "step": 34890 }, { "epoch": 0.1410004161330333, "grad_norm": 621.2599487304688, "learning_rate": 4.976153564241628e-05, "loss": 85.7488, "step": 34900 }, { "epoch": 0.1410408173984009, "grad_norm": 1340.081787109375, "learning_rate": 4.976105442219846e-05, "loss": 108.7937, "step": 34910 }, { "epoch": 0.14108121866376855, "grad_norm": 2728.06201171875, "learning_rate": 4.976057271925113e-05, "loss": 80.4239, "step": 34920 }, { "epoch": 0.1411216199291362, "grad_norm": 571.938720703125, "learning_rate": 4.9760090533583686e-05, "loss": 61.1496, "step": 34930 }, { "epoch": 0.1411620211945038, "grad_norm": 421.5497131347656, "learning_rate": 4.9759607865205534e-05, "loss": 151.0666, "step": 34940 }, { "epoch": 0.14120242245987144, "grad_norm": 1543.9168701171875, "learning_rate": 4.975912471412607e-05, "loss": 121.0911, "step": 34950 }, { "epoch": 0.14124282372523908, "grad_norm": 1396.1658935546875, "learning_rate": 4.975864108035474e-05, "loss": 94.3398, "step": 34960 }, { "epoch": 0.1412832249906067, "grad_norm": 785.8390502929688, "learning_rate": 4.975815696390094e-05, "loss": 89.429, "step": 34970 }, { "epoch": 0.14132362625597433, "grad_norm": 719.430908203125, "learning_rate": 4.975767236477413e-05, "loss": 116.3824, "step": 34980 }, { "epoch": 0.14136402752134197, "grad_norm": 662.4757080078125, "learning_rate": 4.975718728298375e-05, "loss": 87.5541, "step": 34990 }, { "epoch": 0.1414044287867096, "grad_norm": 1017.3776245117188, "learning_rate": 4.975670171853926e-05, "loss": 230.2885, "step": 35000 }, { "epoch": 0.14144483005207723, "grad_norm": 391.976806640625, "learning_rate": 4.975621567145012e-05, "loss": 81.4888, "step": 35010 }, { "epoch": 0.14148523131744487, "grad_norm": 1794.4031982421875, "learning_rate": 4.975572914172582e-05, "loss": 102.6515, "step": 35020 }, { "epoch": 0.1415256325828125, "grad_norm": 937.76123046875, "learning_rate": 4.975524212937582e-05, "loss": 108.7728, "step": 35030 }, { "epoch": 0.14156603384818012, "grad_norm": 1814.7794189453125, "learning_rate": 4.975475463440964e-05, "loss": 93.3038, "step": 35040 }, { "epoch": 0.14160643511354776, "grad_norm": 1418.080322265625, "learning_rate": 4.975426665683678e-05, "loss": 92.7187, "step": 35050 }, { "epoch": 0.1416468363789154, "grad_norm": 1647.2652587890625, "learning_rate": 4.9753778196666737e-05, "loss": 99.5951, "step": 35060 }, { "epoch": 0.141687237644283, "grad_norm": 728.3031005859375, "learning_rate": 4.975328925390904e-05, "loss": 78.1284, "step": 35070 }, { "epoch": 0.14172763890965065, "grad_norm": 1265.145263671875, "learning_rate": 4.975279982857324e-05, "loss": 141.2944, "step": 35080 }, { "epoch": 0.1417680401750183, "grad_norm": 1588.732666015625, "learning_rate": 4.975230992066885e-05, "loss": 80.3437, "step": 35090 }, { "epoch": 0.1418084414403859, "grad_norm": 0.0, "learning_rate": 4.975181953020544e-05, "loss": 101.6614, "step": 35100 }, { "epoch": 0.14184884270575354, "grad_norm": 2776.19140625, "learning_rate": 4.9751328657192565e-05, "loss": 95.6715, "step": 35110 }, { "epoch": 0.14188924397112118, "grad_norm": 2080.83837890625, "learning_rate": 4.9750837301639796e-05, "loss": 153.1414, "step": 35120 }, { "epoch": 0.1419296452364888, "grad_norm": 912.7608642578125, "learning_rate": 4.975034546355671e-05, "loss": 106.0554, "step": 35130 }, { "epoch": 0.14197004650185643, "grad_norm": 358.8157043457031, "learning_rate": 4.97498531429529e-05, "loss": 95.7549, "step": 35140 }, { "epoch": 0.14201044776722407, "grad_norm": 387.9512634277344, "learning_rate": 4.974936033983795e-05, "loss": 88.8157, "step": 35150 }, { "epoch": 0.14205084903259171, "grad_norm": 1014.5821533203125, "learning_rate": 4.974886705422149e-05, "loss": 103.6284, "step": 35160 }, { "epoch": 0.14209125029795933, "grad_norm": 1092.1754150390625, "learning_rate": 4.974837328611312e-05, "loss": 107.7396, "step": 35170 }, { "epoch": 0.14213165156332697, "grad_norm": 926.5281372070312, "learning_rate": 4.974787903552247e-05, "loss": 78.8734, "step": 35180 }, { "epoch": 0.1421720528286946, "grad_norm": 546.8348999023438, "learning_rate": 4.974738430245918e-05, "loss": 189.5167, "step": 35190 }, { "epoch": 0.14221245409406222, "grad_norm": 581.217041015625, "learning_rate": 4.9746889086932895e-05, "loss": 82.9471, "step": 35200 }, { "epoch": 0.14225285535942986, "grad_norm": 414.46807861328125, "learning_rate": 4.974639338895326e-05, "loss": 61.2459, "step": 35210 }, { "epoch": 0.1422932566247975, "grad_norm": 1268.71826171875, "learning_rate": 4.9745897208529956e-05, "loss": 102.3453, "step": 35220 }, { "epoch": 0.1423336578901651, "grad_norm": 1139.93115234375, "learning_rate": 4.974540054567264e-05, "loss": 82.09, "step": 35230 }, { "epoch": 0.14237405915553275, "grad_norm": 1343.3614501953125, "learning_rate": 4.9744903400391e-05, "loss": 98.7603, "step": 35240 }, { "epoch": 0.1424144604209004, "grad_norm": 1799.45263671875, "learning_rate": 4.9744405772694725e-05, "loss": 90.476, "step": 35250 }, { "epoch": 0.142454861686268, "grad_norm": 740.581787109375, "learning_rate": 4.9743907662593524e-05, "loss": 80.5426, "step": 35260 }, { "epoch": 0.14249526295163564, "grad_norm": 689.4889526367188, "learning_rate": 4.97434090700971e-05, "loss": 109.3305, "step": 35270 }, { "epoch": 0.14253566421700328, "grad_norm": 1570.7303466796875, "learning_rate": 4.974290999521519e-05, "loss": 115.1295, "step": 35280 }, { "epoch": 0.1425760654823709, "grad_norm": 1887.6724853515625, "learning_rate": 4.97424104379575e-05, "loss": 78.6015, "step": 35290 }, { "epoch": 0.14261646674773854, "grad_norm": 906.8424072265625, "learning_rate": 4.974191039833378e-05, "loss": 62.9002, "step": 35300 }, { "epoch": 0.14265686801310618, "grad_norm": 1138.534423828125, "learning_rate": 4.974140987635378e-05, "loss": 114.5066, "step": 35310 }, { "epoch": 0.14269726927847382, "grad_norm": 5530.13232421875, "learning_rate": 4.974090887202726e-05, "loss": 101.5712, "step": 35320 }, { "epoch": 0.14273767054384143, "grad_norm": 571.6856079101562, "learning_rate": 4.9740407385363983e-05, "loss": 87.484, "step": 35330 }, { "epoch": 0.14277807180920907, "grad_norm": 742.740478515625, "learning_rate": 4.973990541637373e-05, "loss": 116.8295, "step": 35340 }, { "epoch": 0.1428184730745767, "grad_norm": 1002.74072265625, "learning_rate": 4.9739402965066276e-05, "loss": 61.5279, "step": 35350 }, { "epoch": 0.14285887433994432, "grad_norm": 782.5518188476562, "learning_rate": 4.973890003145143e-05, "loss": 104.612, "step": 35360 }, { "epoch": 0.14289927560531196, "grad_norm": 3795.69970703125, "learning_rate": 4.973839661553899e-05, "loss": 90.4234, "step": 35370 }, { "epoch": 0.1429396768706796, "grad_norm": 679.3167724609375, "learning_rate": 4.9737892717338774e-05, "loss": 101.094, "step": 35380 }, { "epoch": 0.1429800781360472, "grad_norm": 1362.4091796875, "learning_rate": 4.97373883368606e-05, "loss": 121.7002, "step": 35390 }, { "epoch": 0.14302047940141485, "grad_norm": 692.154296875, "learning_rate": 4.973688347411431e-05, "loss": 99.3686, "step": 35400 }, { "epoch": 0.1430608806667825, "grad_norm": 1434.7564697265625, "learning_rate": 4.973637812910973e-05, "loss": 98.6788, "step": 35410 }, { "epoch": 0.1431012819321501, "grad_norm": 993.9842529296875, "learning_rate": 4.9735872301856734e-05, "loss": 98.5837, "step": 35420 }, { "epoch": 0.14314168319751774, "grad_norm": 772.5789794921875, "learning_rate": 4.973536599236517e-05, "loss": 114.3075, "step": 35430 }, { "epoch": 0.14318208446288538, "grad_norm": 1945.123046875, "learning_rate": 4.9734859200644905e-05, "loss": 80.5528, "step": 35440 }, { "epoch": 0.143222485728253, "grad_norm": 1043.6229248046875, "learning_rate": 4.973435192670584e-05, "loss": 104.7841, "step": 35450 }, { "epoch": 0.14326288699362064, "grad_norm": 0.0, "learning_rate": 4.973384417055784e-05, "loss": 83.0118, "step": 35460 }, { "epoch": 0.14330328825898828, "grad_norm": 1075.596435546875, "learning_rate": 4.9733335932210814e-05, "loss": 87.0017, "step": 35470 }, { "epoch": 0.14334368952435592, "grad_norm": 1106.34765625, "learning_rate": 4.973282721167467e-05, "loss": 84.2105, "step": 35480 }, { "epoch": 0.14338409078972353, "grad_norm": 680.8936157226562, "learning_rate": 4.973231800895932e-05, "loss": 102.21, "step": 35490 }, { "epoch": 0.14342449205509117, "grad_norm": 971.0728759765625, "learning_rate": 4.9731808324074717e-05, "loss": 89.0123, "step": 35500 }, { "epoch": 0.1434648933204588, "grad_norm": 1773.6971435546875, "learning_rate": 4.973129815703076e-05, "loss": 60.7568, "step": 35510 }, { "epoch": 0.14350529458582642, "grad_norm": 1183.9600830078125, "learning_rate": 4.973078750783742e-05, "loss": 91.8321, "step": 35520 }, { "epoch": 0.14354569585119406, "grad_norm": 540.8861694335938, "learning_rate": 4.973027637650464e-05, "loss": 82.3345, "step": 35530 }, { "epoch": 0.1435860971165617, "grad_norm": 2457.4091796875, "learning_rate": 4.9729764763042394e-05, "loss": 98.1432, "step": 35540 }, { "epoch": 0.1436264983819293, "grad_norm": 778.1422729492188, "learning_rate": 4.9729252667460655e-05, "loss": 85.1171, "step": 35550 }, { "epoch": 0.14366689964729695, "grad_norm": 501.1910095214844, "learning_rate": 4.97287400897694e-05, "loss": 138.8153, "step": 35560 }, { "epoch": 0.1437073009126646, "grad_norm": 1607.3275146484375, "learning_rate": 4.972822702997863e-05, "loss": 80.1325, "step": 35570 }, { "epoch": 0.1437477021780322, "grad_norm": 796.763427734375, "learning_rate": 4.9727713488098335e-05, "loss": 79.0268, "step": 35580 }, { "epoch": 0.14378810344339985, "grad_norm": 661.5986938476562, "learning_rate": 4.972719946413854e-05, "loss": 88.3111, "step": 35590 }, { "epoch": 0.14382850470876749, "grad_norm": 1435.8564453125, "learning_rate": 4.9726684958109266e-05, "loss": 107.5516, "step": 35600 }, { "epoch": 0.1438689059741351, "grad_norm": 679.6939697265625, "learning_rate": 4.972616997002053e-05, "loss": 75.3328, "step": 35610 }, { "epoch": 0.14390930723950274, "grad_norm": 1368.540283203125, "learning_rate": 4.972565449988239e-05, "loss": 83.6638, "step": 35620 }, { "epoch": 0.14394970850487038, "grad_norm": 503.572998046875, "learning_rate": 4.972513854770487e-05, "loss": 82.9908, "step": 35630 }, { "epoch": 0.14399010977023802, "grad_norm": 1151.1427001953125, "learning_rate": 4.972462211349806e-05, "loss": 94.219, "step": 35640 }, { "epoch": 0.14403051103560563, "grad_norm": 2658.9482421875, "learning_rate": 4.972410519727201e-05, "loss": 91.6265, "step": 35650 }, { "epoch": 0.14407091230097327, "grad_norm": 953.5581665039062, "learning_rate": 4.97235877990368e-05, "loss": 96.4948, "step": 35660 }, { "epoch": 0.1441113135663409, "grad_norm": 898.052490234375, "learning_rate": 4.972306991880251e-05, "loss": 144.139, "step": 35670 }, { "epoch": 0.14415171483170852, "grad_norm": 338.9049072265625, "learning_rate": 4.972255155657925e-05, "loss": 81.2305, "step": 35680 }, { "epoch": 0.14419211609707616, "grad_norm": 662.849365234375, "learning_rate": 4.972203271237712e-05, "loss": 82.9598, "step": 35690 }, { "epoch": 0.1442325173624438, "grad_norm": 488.6361999511719, "learning_rate": 4.972151338620623e-05, "loss": 64.3388, "step": 35700 }, { "epoch": 0.14427291862781141, "grad_norm": 1745.017578125, "learning_rate": 4.972099357807671e-05, "loss": 146.5352, "step": 35710 }, { "epoch": 0.14431331989317905, "grad_norm": 717.8941650390625, "learning_rate": 4.9720473287998695e-05, "loss": 82.1567, "step": 35720 }, { "epoch": 0.1443537211585467, "grad_norm": 3314.039794921875, "learning_rate": 4.9719952515982324e-05, "loss": 81.9564, "step": 35730 }, { "epoch": 0.1443941224239143, "grad_norm": 716.9427490234375, "learning_rate": 4.9719431262037755e-05, "loss": 94.6004, "step": 35740 }, { "epoch": 0.14443452368928195, "grad_norm": 591.388427734375, "learning_rate": 4.971890952617515e-05, "loss": 107.062, "step": 35750 }, { "epoch": 0.1444749249546496, "grad_norm": 1554.075927734375, "learning_rate": 4.9718387308404675e-05, "loss": 119.4833, "step": 35760 }, { "epoch": 0.1445153262200172, "grad_norm": 440.0912780761719, "learning_rate": 4.9717864608736506e-05, "loss": 104.854, "step": 35770 }, { "epoch": 0.14455572748538484, "grad_norm": 692.7317504882812, "learning_rate": 4.971734142718085e-05, "loss": 116.6411, "step": 35780 }, { "epoch": 0.14459612875075248, "grad_norm": 520.4461059570312, "learning_rate": 4.971681776374789e-05, "loss": 111.4582, "step": 35790 }, { "epoch": 0.14463653001612012, "grad_norm": 762.310546875, "learning_rate": 4.971629361844785e-05, "loss": 102.7609, "step": 35800 }, { "epoch": 0.14467693128148773, "grad_norm": 852.2344970703125, "learning_rate": 4.971576899129094e-05, "loss": 121.1159, "step": 35810 }, { "epoch": 0.14471733254685537, "grad_norm": 445.6109924316406, "learning_rate": 4.9715243882287386e-05, "loss": 89.8152, "step": 35820 }, { "epoch": 0.144757733812223, "grad_norm": 950.6737060546875, "learning_rate": 4.971471829144743e-05, "loss": 75.9464, "step": 35830 }, { "epoch": 0.14479813507759062, "grad_norm": 726.3805541992188, "learning_rate": 4.9714192218781316e-05, "loss": 81.0347, "step": 35840 }, { "epoch": 0.14483853634295826, "grad_norm": 1368.5889892578125, "learning_rate": 4.97136656642993e-05, "loss": 94.3249, "step": 35850 }, { "epoch": 0.1448789376083259, "grad_norm": 2891.8837890625, "learning_rate": 4.9713138628011654e-05, "loss": 88.5745, "step": 35860 }, { "epoch": 0.14491933887369352, "grad_norm": 883.2448120117188, "learning_rate": 4.971261110992864e-05, "loss": 86.1088, "step": 35870 }, { "epoch": 0.14495974013906116, "grad_norm": 750.9356689453125, "learning_rate": 4.9712083110060556e-05, "loss": 157.1846, "step": 35880 }, { "epoch": 0.1450001414044288, "grad_norm": 1293.0548095703125, "learning_rate": 4.971155462841769e-05, "loss": 97.4993, "step": 35890 }, { "epoch": 0.1450405426697964, "grad_norm": 1317.3045654296875, "learning_rate": 4.971102566501034e-05, "loss": 77.7684, "step": 35900 }, { "epoch": 0.14508094393516405, "grad_norm": 698.4030151367188, "learning_rate": 4.971049621984882e-05, "loss": 68.1086, "step": 35910 }, { "epoch": 0.1451213452005317, "grad_norm": 491.6965637207031, "learning_rate": 4.9709966292943455e-05, "loss": 85.3335, "step": 35920 }, { "epoch": 0.1451617464658993, "grad_norm": 3422.90380859375, "learning_rate": 4.970943588430458e-05, "loss": 122.6019, "step": 35930 }, { "epoch": 0.14520214773126694, "grad_norm": 933.2683715820312, "learning_rate": 4.970890499394253e-05, "loss": 116.233, "step": 35940 }, { "epoch": 0.14524254899663458, "grad_norm": 1370.2513427734375, "learning_rate": 4.9708373621867656e-05, "loss": 68.2386, "step": 35950 }, { "epoch": 0.14528295026200222, "grad_norm": 2012.014892578125, "learning_rate": 4.9707841768090314e-05, "loss": 73.6452, "step": 35960 }, { "epoch": 0.14532335152736983, "grad_norm": 1204.0938720703125, "learning_rate": 4.9707309432620874e-05, "loss": 82.8698, "step": 35970 }, { "epoch": 0.14536375279273747, "grad_norm": 1037.1630859375, "learning_rate": 4.9706776615469716e-05, "loss": 80.2174, "step": 35980 }, { "epoch": 0.1454041540581051, "grad_norm": 566.7116088867188, "learning_rate": 4.970624331664724e-05, "loss": 80.2169, "step": 35990 }, { "epoch": 0.14544455532347272, "grad_norm": 2502.911865234375, "learning_rate": 4.9705709536163824e-05, "loss": 148.008, "step": 36000 }, { "epoch": 0.14548495658884036, "grad_norm": 569.8108520507812, "learning_rate": 4.970517527402988e-05, "loss": 70.3217, "step": 36010 }, { "epoch": 0.145525357854208, "grad_norm": 635.7393188476562, "learning_rate": 4.9704640530255826e-05, "loss": 84.3072, "step": 36020 }, { "epoch": 0.14556575911957562, "grad_norm": 365.2387390136719, "learning_rate": 4.970410530485209e-05, "loss": 60.3312, "step": 36030 }, { "epoch": 0.14560616038494326, "grad_norm": 529.880126953125, "learning_rate": 4.970356959782909e-05, "loss": 125.5716, "step": 36040 }, { "epoch": 0.1456465616503109, "grad_norm": 928.6548461914062, "learning_rate": 4.97030334091973e-05, "loss": 98.0715, "step": 36050 }, { "epoch": 0.1456869629156785, "grad_norm": 937.4443359375, "learning_rate": 4.970249673896714e-05, "loss": 123.7811, "step": 36060 }, { "epoch": 0.14572736418104615, "grad_norm": 664.5932006835938, "learning_rate": 4.970195958714909e-05, "loss": 103.4508, "step": 36070 }, { "epoch": 0.1457677654464138, "grad_norm": 600.3163452148438, "learning_rate": 4.970142195375363e-05, "loss": 72.4148, "step": 36080 }, { "epoch": 0.1458081667117814, "grad_norm": 602.3806762695312, "learning_rate": 4.970088383879123e-05, "loss": 103.1301, "step": 36090 }, { "epoch": 0.14584856797714904, "grad_norm": 737.54443359375, "learning_rate": 4.970034524227238e-05, "loss": 80.4645, "step": 36100 }, { "epoch": 0.14588896924251668, "grad_norm": 1445.3460693359375, "learning_rate": 4.969980616420759e-05, "loss": 75.5717, "step": 36110 }, { "epoch": 0.14592937050788432, "grad_norm": 385.9198913574219, "learning_rate": 4.9699266604607355e-05, "loss": 137.0707, "step": 36120 }, { "epoch": 0.14596977177325193, "grad_norm": 0.0, "learning_rate": 4.96987265634822e-05, "loss": 88.1988, "step": 36130 }, { "epoch": 0.14601017303861957, "grad_norm": 793.6890869140625, "learning_rate": 4.9698186040842654e-05, "loss": 93.8744, "step": 36140 }, { "epoch": 0.1460505743039872, "grad_norm": 1040.031494140625, "learning_rate": 4.969764503669926e-05, "loss": 67.5155, "step": 36150 }, { "epoch": 0.14609097556935483, "grad_norm": 1021.9496459960938, "learning_rate": 4.9697103551062556e-05, "loss": 69.8323, "step": 36160 }, { "epoch": 0.14613137683472247, "grad_norm": 1474.1693115234375, "learning_rate": 4.9696561583943106e-05, "loss": 100.1957, "step": 36170 }, { "epoch": 0.1461717781000901, "grad_norm": 1472.6767578125, "learning_rate": 4.969601913535148e-05, "loss": 91.6, "step": 36180 }, { "epoch": 0.14621217936545772, "grad_norm": 689.4755249023438, "learning_rate": 4.9695476205298235e-05, "loss": 146.9316, "step": 36190 }, { "epoch": 0.14625258063082536, "grad_norm": 1301.0394287109375, "learning_rate": 4.969493279379398e-05, "loss": 75.4633, "step": 36200 }, { "epoch": 0.146292981896193, "grad_norm": 708.2933349609375, "learning_rate": 4.9694388900849284e-05, "loss": 73.6771, "step": 36210 }, { "epoch": 0.1463333831615606, "grad_norm": 0.0, "learning_rate": 4.969384452647477e-05, "loss": 81.5898, "step": 36220 }, { "epoch": 0.14637378442692825, "grad_norm": 1018.5614013671875, "learning_rate": 4.969329967068104e-05, "loss": 98.2319, "step": 36230 }, { "epoch": 0.1464141856922959, "grad_norm": 1839.9981689453125, "learning_rate": 4.969275433347872e-05, "loss": 88.1999, "step": 36240 }, { "epoch": 0.1464545869576635, "grad_norm": 549.9813842773438, "learning_rate": 4.9692208514878444e-05, "loss": 79.6468, "step": 36250 }, { "epoch": 0.14649498822303114, "grad_norm": 615.52099609375, "learning_rate": 4.9691662214890856e-05, "loss": 93.557, "step": 36260 }, { "epoch": 0.14653538948839878, "grad_norm": 0.0, "learning_rate": 4.969111543352659e-05, "loss": 159.444, "step": 36270 }, { "epoch": 0.14657579075376642, "grad_norm": 1021.6466674804688, "learning_rate": 4.969056817079633e-05, "loss": 93.9579, "step": 36280 }, { "epoch": 0.14661619201913403, "grad_norm": 1050.8099365234375, "learning_rate": 4.969002042671072e-05, "loss": 92.0323, "step": 36290 }, { "epoch": 0.14665659328450167, "grad_norm": 933.8421020507812, "learning_rate": 4.968947220128045e-05, "loss": 74.9989, "step": 36300 }, { "epoch": 0.14669699454986931, "grad_norm": 775.214111328125, "learning_rate": 4.968892349451621e-05, "loss": 109.3062, "step": 36310 }, { "epoch": 0.14673739581523693, "grad_norm": 275.0602111816406, "learning_rate": 4.9688374306428696e-05, "loss": 79.9519, "step": 36320 }, { "epoch": 0.14677779708060457, "grad_norm": 1294.7794189453125, "learning_rate": 4.9687824637028625e-05, "loss": 113.0867, "step": 36330 }, { "epoch": 0.1468181983459722, "grad_norm": 745.861572265625, "learning_rate": 4.968727448632669e-05, "loss": 76.9137, "step": 36340 }, { "epoch": 0.14685859961133982, "grad_norm": 776.559326171875, "learning_rate": 4.968672385433364e-05, "loss": 86.1525, "step": 36350 }, { "epoch": 0.14689900087670746, "grad_norm": 1309.9783935546875, "learning_rate": 4.968617274106019e-05, "loss": 79.5693, "step": 36360 }, { "epoch": 0.1469394021420751, "grad_norm": 1547.9395751953125, "learning_rate": 4.968562114651709e-05, "loss": 116.9439, "step": 36370 }, { "epoch": 0.1469798034074427, "grad_norm": 453.9913024902344, "learning_rate": 4.9685069070715106e-05, "loss": 74.6536, "step": 36380 }, { "epoch": 0.14702020467281035, "grad_norm": 346.8396911621094, "learning_rate": 4.968451651366498e-05, "loss": 95.8461, "step": 36390 }, { "epoch": 0.147060605938178, "grad_norm": 537.4244995117188, "learning_rate": 4.968396347537751e-05, "loss": 74.5145, "step": 36400 }, { "epoch": 0.1471010072035456, "grad_norm": 897.6705932617188, "learning_rate": 4.968340995586346e-05, "loss": 109.4366, "step": 36410 }, { "epoch": 0.14714140846891324, "grad_norm": 712.1412963867188, "learning_rate": 4.9682855955133625e-05, "loss": 67.6998, "step": 36420 }, { "epoch": 0.14718180973428088, "grad_norm": 0.0, "learning_rate": 4.96823014731988e-05, "loss": 102.4061, "step": 36430 }, { "epoch": 0.14722221099964852, "grad_norm": 1074.514404296875, "learning_rate": 4.9681746510069805e-05, "loss": 79.5955, "step": 36440 }, { "epoch": 0.14726261226501614, "grad_norm": 1605.676025390625, "learning_rate": 4.9681191065757455e-05, "loss": 89.505, "step": 36450 }, { "epoch": 0.14730301353038378, "grad_norm": 981.2547607421875, "learning_rate": 4.9680635140272575e-05, "loss": 99.4992, "step": 36460 }, { "epoch": 0.14734341479575142, "grad_norm": 937.9696655273438, "learning_rate": 4.9680078733626015e-05, "loss": 75.9712, "step": 36470 }, { "epoch": 0.14738381606111903, "grad_norm": 501.3292236328125, "learning_rate": 4.9679521845828604e-05, "loss": 75.7125, "step": 36480 }, { "epoch": 0.14742421732648667, "grad_norm": 1447.997802734375, "learning_rate": 4.967896447689121e-05, "loss": 83.492, "step": 36490 }, { "epoch": 0.1474646185918543, "grad_norm": 555.2802734375, "learning_rate": 4.96784066268247e-05, "loss": 63.0523, "step": 36500 }, { "epoch": 0.14750501985722192, "grad_norm": 784.3593139648438, "learning_rate": 4.967784829563995e-05, "loss": 93.0994, "step": 36510 }, { "epoch": 0.14754542112258956, "grad_norm": 1216.3717041015625, "learning_rate": 4.967728948334784e-05, "loss": 99.9734, "step": 36520 }, { "epoch": 0.1475858223879572, "grad_norm": 717.1402587890625, "learning_rate": 4.967673018995926e-05, "loss": 80.4648, "step": 36530 }, { "epoch": 0.1476262236533248, "grad_norm": 468.6872253417969, "learning_rate": 4.967617041548513e-05, "loss": 96.3743, "step": 36540 }, { "epoch": 0.14766662491869245, "grad_norm": 709.4956665039062, "learning_rate": 4.967561015993635e-05, "loss": 96.8744, "step": 36550 }, { "epoch": 0.1477070261840601, "grad_norm": 691.0542602539062, "learning_rate": 4.967504942332385e-05, "loss": 86.1126, "step": 36560 }, { "epoch": 0.1477474274494277, "grad_norm": 1028.8739013671875, "learning_rate": 4.967448820565856e-05, "loss": 125.4938, "step": 36570 }, { "epoch": 0.14778782871479534, "grad_norm": 599.9513549804688, "learning_rate": 4.9673926506951404e-05, "loss": 117.7221, "step": 36580 }, { "epoch": 0.14782822998016298, "grad_norm": 609.7288818359375, "learning_rate": 4.967336432721337e-05, "loss": 80.1608, "step": 36590 }, { "epoch": 0.1478686312455306, "grad_norm": 1172.2431640625, "learning_rate": 4.967280166645538e-05, "loss": 97.0124, "step": 36600 }, { "epoch": 0.14790903251089824, "grad_norm": 774.5439453125, "learning_rate": 4.967223852468842e-05, "loss": 106.1079, "step": 36610 }, { "epoch": 0.14794943377626588, "grad_norm": 414.7299499511719, "learning_rate": 4.967167490192347e-05, "loss": 95.8307, "step": 36620 }, { "epoch": 0.14798983504163352, "grad_norm": 1053.8829345703125, "learning_rate": 4.967111079817151e-05, "loss": 102.4389, "step": 36630 }, { "epoch": 0.14803023630700113, "grad_norm": 730.1495971679688, "learning_rate": 4.967054621344356e-05, "loss": 87.2563, "step": 36640 }, { "epoch": 0.14807063757236877, "grad_norm": 2739.23583984375, "learning_rate": 4.96699811477506e-05, "loss": 126.1713, "step": 36650 }, { "epoch": 0.1481110388377364, "grad_norm": 3278.61767578125, "learning_rate": 4.966941560110366e-05, "loss": 115.4281, "step": 36660 }, { "epoch": 0.14815144010310402, "grad_norm": 1437.7108154296875, "learning_rate": 4.966884957351375e-05, "loss": 98.0438, "step": 36670 }, { "epoch": 0.14819184136847166, "grad_norm": 582.1204833984375, "learning_rate": 4.966828306499193e-05, "loss": 85.8503, "step": 36680 }, { "epoch": 0.1482322426338393, "grad_norm": 790.8889770507812, "learning_rate": 4.966771607554923e-05, "loss": 75.0409, "step": 36690 }, { "epoch": 0.1482726438992069, "grad_norm": 370.1599426269531, "learning_rate": 4.96671486051967e-05, "loss": 94.7309, "step": 36700 }, { "epoch": 0.14831304516457455, "grad_norm": 2078.58837890625, "learning_rate": 4.966658065394542e-05, "loss": 133.5383, "step": 36710 }, { "epoch": 0.1483534464299422, "grad_norm": 374.68475341796875, "learning_rate": 4.9666012221806434e-05, "loss": 102.0996, "step": 36720 }, { "epoch": 0.1483938476953098, "grad_norm": 1200.5596923828125, "learning_rate": 4.966544330879085e-05, "loss": 89.5257, "step": 36730 }, { "epoch": 0.14843424896067745, "grad_norm": 430.6545104980469, "learning_rate": 4.9664873914909755e-05, "loss": 73.1253, "step": 36740 }, { "epoch": 0.14847465022604509, "grad_norm": 1570.5799560546875, "learning_rate": 4.966430404017424e-05, "loss": 72.1421, "step": 36750 }, { "epoch": 0.1485150514914127, "grad_norm": 422.2970886230469, "learning_rate": 4.966373368459541e-05, "loss": 147.6407, "step": 36760 }, { "epoch": 0.14855545275678034, "grad_norm": 567.98046875, "learning_rate": 4.966316284818441e-05, "loss": 78.6361, "step": 36770 }, { "epoch": 0.14859585402214798, "grad_norm": 0.0, "learning_rate": 4.966259153095235e-05, "loss": 72.8352, "step": 36780 }, { "epoch": 0.14863625528751562, "grad_norm": 871.2714233398438, "learning_rate": 4.966201973291036e-05, "loss": 104.7993, "step": 36790 }, { "epoch": 0.14867665655288323, "grad_norm": 762.0390625, "learning_rate": 4.966144745406961e-05, "loss": 63.3974, "step": 36800 }, { "epoch": 0.14871705781825087, "grad_norm": 1256.638427734375, "learning_rate": 4.966087469444124e-05, "loss": 82.0591, "step": 36810 }, { "epoch": 0.1487574590836185, "grad_norm": 1347.9171142578125, "learning_rate": 4.966030145403642e-05, "loss": 97.7492, "step": 36820 }, { "epoch": 0.14879786034898612, "grad_norm": 2305.42236328125, "learning_rate": 4.965972773286633e-05, "loss": 98.8773, "step": 36830 }, { "epoch": 0.14883826161435376, "grad_norm": 759.54931640625, "learning_rate": 4.965915353094215e-05, "loss": 104.5514, "step": 36840 }, { "epoch": 0.1488786628797214, "grad_norm": 409.0998840332031, "learning_rate": 4.9658578848275076e-05, "loss": 72.8223, "step": 36850 }, { "epoch": 0.14891906414508901, "grad_norm": 464.468017578125, "learning_rate": 4.965800368487632e-05, "loss": 112.4646, "step": 36860 }, { "epoch": 0.14895946541045665, "grad_norm": 923.630859375, "learning_rate": 4.9657428040757084e-05, "loss": 107.7895, "step": 36870 }, { "epoch": 0.1489998666758243, "grad_norm": 1179.0242919921875, "learning_rate": 4.965685191592859e-05, "loss": 86.0383, "step": 36880 }, { "epoch": 0.1490402679411919, "grad_norm": 636.7333374023438, "learning_rate": 4.9656275310402074e-05, "loss": 114.6134, "step": 36890 }, { "epoch": 0.14908066920655955, "grad_norm": 2465.29296875, "learning_rate": 4.965569822418877e-05, "loss": 58.8899, "step": 36900 }, { "epoch": 0.1491210704719272, "grad_norm": 757.9246215820312, "learning_rate": 4.9655120657299945e-05, "loss": 66.7647, "step": 36910 }, { "epoch": 0.1491614717372948, "grad_norm": 671.7369995117188, "learning_rate": 4.965454260974685e-05, "loss": 87.831, "step": 36920 }, { "epoch": 0.14920187300266244, "grad_norm": 409.745849609375, "learning_rate": 4.9653964081540756e-05, "loss": 107.4229, "step": 36930 }, { "epoch": 0.14924227426803008, "grad_norm": 1466.3577880859375, "learning_rate": 4.965338507269294e-05, "loss": 93.4886, "step": 36940 }, { "epoch": 0.14928267553339772, "grad_norm": 554.5855712890625, "learning_rate": 4.965280558321468e-05, "loss": 97.7594, "step": 36950 }, { "epoch": 0.14932307679876533, "grad_norm": 1820.7589111328125, "learning_rate": 4.9652225613117284e-05, "loss": 122.0696, "step": 36960 }, { "epoch": 0.14936347806413297, "grad_norm": 1617.1182861328125, "learning_rate": 4.965164516241206e-05, "loss": 123.7657, "step": 36970 }, { "epoch": 0.1494038793295006, "grad_norm": 669.3226928710938, "learning_rate": 4.965106423111033e-05, "loss": 103.5812, "step": 36980 }, { "epoch": 0.14944428059486822, "grad_norm": 2139.23876953125, "learning_rate": 4.9650482819223405e-05, "loss": 119.0735, "step": 36990 }, { "epoch": 0.14948468186023586, "grad_norm": 532.9093017578125, "learning_rate": 4.964990092676263e-05, "loss": 82.2448, "step": 37000 }, { "epoch": 0.1495250831256035, "grad_norm": 804.912109375, "learning_rate": 4.964931855373934e-05, "loss": 80.909, "step": 37010 }, { "epoch": 0.14956548439097112, "grad_norm": 847.0060424804688, "learning_rate": 4.9648735700164895e-05, "loss": 92.2945, "step": 37020 }, { "epoch": 0.14960588565633876, "grad_norm": 1183.10302734375, "learning_rate": 4.964815236605066e-05, "loss": 94.6774, "step": 37030 }, { "epoch": 0.1496462869217064, "grad_norm": 885.68896484375, "learning_rate": 4.964756855140801e-05, "loss": 75.244, "step": 37040 }, { "epoch": 0.149686688187074, "grad_norm": 843.5673828125, "learning_rate": 4.964698425624831e-05, "loss": 82.0223, "step": 37050 }, { "epoch": 0.14972708945244165, "grad_norm": 817.964599609375, "learning_rate": 4.964639948058297e-05, "loss": 131.6297, "step": 37060 }, { "epoch": 0.1497674907178093, "grad_norm": 413.62335205078125, "learning_rate": 4.964581422442338e-05, "loss": 93.1363, "step": 37070 }, { "epoch": 0.1498078919831769, "grad_norm": 569.75146484375, "learning_rate": 4.964522848778096e-05, "loss": 70.2936, "step": 37080 }, { "epoch": 0.14984829324854454, "grad_norm": 507.4297180175781, "learning_rate": 4.964464227066712e-05, "loss": 53.4872, "step": 37090 }, { "epoch": 0.14988869451391218, "grad_norm": 1582.9383544921875, "learning_rate": 4.964405557309328e-05, "loss": 128.3349, "step": 37100 }, { "epoch": 0.14992909577927982, "grad_norm": 871.0317993164062, "learning_rate": 4.9643468395070904e-05, "loss": 106.9411, "step": 37110 }, { "epoch": 0.14996949704464743, "grad_norm": 4390.7822265625, "learning_rate": 4.964288073661142e-05, "loss": 98.8651, "step": 37120 }, { "epoch": 0.15000989831001507, "grad_norm": 1010.7752075195312, "learning_rate": 4.9642292597726284e-05, "loss": 134.204, "step": 37130 }, { "epoch": 0.1500502995753827, "grad_norm": 3052.423583984375, "learning_rate": 4.964170397842697e-05, "loss": 129.5846, "step": 37140 }, { "epoch": 0.15009070084075032, "grad_norm": 363.93865966796875, "learning_rate": 4.9641114878724956e-05, "loss": 101.122, "step": 37150 }, { "epoch": 0.15013110210611796, "grad_norm": 963.4545288085938, "learning_rate": 4.964052529863171e-05, "loss": 98.0533, "step": 37160 }, { "epoch": 0.1501715033714856, "grad_norm": 939.9786987304688, "learning_rate": 4.9639935238158744e-05, "loss": 89.1619, "step": 37170 }, { "epoch": 0.15021190463685322, "grad_norm": 1083.3187255859375, "learning_rate": 4.963934469731756e-05, "loss": 77.3688, "step": 37180 }, { "epoch": 0.15025230590222086, "grad_norm": 818.65478515625, "learning_rate": 4.963875367611966e-05, "loss": 96.2921, "step": 37190 }, { "epoch": 0.1502927071675885, "grad_norm": 723.4608154296875, "learning_rate": 4.963816217457657e-05, "loss": 128.7513, "step": 37200 }, { "epoch": 0.1503331084329561, "grad_norm": 737.654541015625, "learning_rate": 4.963757019269983e-05, "loss": 107.9872, "step": 37210 }, { "epoch": 0.15037350969832375, "grad_norm": 771.156005859375, "learning_rate": 4.963697773050097e-05, "loss": 109.9101, "step": 37220 }, { "epoch": 0.1504139109636914, "grad_norm": 748.536376953125, "learning_rate": 4.9636384787991547e-05, "loss": 65.0804, "step": 37230 }, { "epoch": 0.150454312229059, "grad_norm": 430.6962890625, "learning_rate": 4.963579136518312e-05, "loss": 59.6868, "step": 37240 }, { "epoch": 0.15049471349442664, "grad_norm": 703.9127807617188, "learning_rate": 4.963519746208726e-05, "loss": 111.1007, "step": 37250 }, { "epoch": 0.15053511475979428, "grad_norm": 813.0443115234375, "learning_rate": 4.963460307871553e-05, "loss": 102.864, "step": 37260 }, { "epoch": 0.15057551602516192, "grad_norm": 1400.834716796875, "learning_rate": 4.963400821507954e-05, "loss": 116.5066, "step": 37270 }, { "epoch": 0.15061591729052953, "grad_norm": 941.7633666992188, "learning_rate": 4.9633412871190873e-05, "loss": 97.3467, "step": 37280 }, { "epoch": 0.15065631855589717, "grad_norm": 895.1846923828125, "learning_rate": 4.963281704706115e-05, "loss": 88.1651, "step": 37290 }, { "epoch": 0.1506967198212648, "grad_norm": 1722.0013427734375, "learning_rate": 4.9632220742701965e-05, "loss": 64.2686, "step": 37300 }, { "epoch": 0.15073712108663243, "grad_norm": 499.2967224121094, "learning_rate": 4.963162395812496e-05, "loss": 57.7127, "step": 37310 }, { "epoch": 0.15077752235200007, "grad_norm": 642.2814331054688, "learning_rate": 4.9631026693341764e-05, "loss": 47.7118, "step": 37320 }, { "epoch": 0.1508179236173677, "grad_norm": 985.0557250976562, "learning_rate": 4.963042894836403e-05, "loss": 113.7733, "step": 37330 }, { "epoch": 0.15085832488273532, "grad_norm": 558.4752197265625, "learning_rate": 4.9629830723203384e-05, "loss": 55.6255, "step": 37340 }, { "epoch": 0.15089872614810296, "grad_norm": 366.1856689453125, "learning_rate": 4.9629232017871524e-05, "loss": 98.4935, "step": 37350 }, { "epoch": 0.1509391274134706, "grad_norm": 817.6262817382812, "learning_rate": 4.96286328323801e-05, "loss": 69.1073, "step": 37360 }, { "epoch": 0.1509795286788382, "grad_norm": 724.3482055664062, "learning_rate": 4.96280331667408e-05, "loss": 101.978, "step": 37370 }, { "epoch": 0.15101992994420585, "grad_norm": 2478.86962890625, "learning_rate": 4.9627433020965314e-05, "loss": 104.1645, "step": 37380 }, { "epoch": 0.1510603312095735, "grad_norm": 663.1130981445312, "learning_rate": 4.962683239506534e-05, "loss": 80.2843, "step": 37390 }, { "epoch": 0.1511007324749411, "grad_norm": 757.1906127929688, "learning_rate": 4.9626231289052596e-05, "loss": 87.6918, "step": 37400 }, { "epoch": 0.15114113374030874, "grad_norm": 427.3059997558594, "learning_rate": 4.962562970293879e-05, "loss": 79.3416, "step": 37410 }, { "epoch": 0.15118153500567638, "grad_norm": 821.1558837890625, "learning_rate": 4.962502763673565e-05, "loss": 116.5157, "step": 37420 }, { "epoch": 0.15122193627104402, "grad_norm": 1046.214111328125, "learning_rate": 4.962442509045493e-05, "loss": 53.0469, "step": 37430 }, { "epoch": 0.15126233753641163, "grad_norm": 1114.373291015625, "learning_rate": 4.9623822064108364e-05, "loss": 75.2076, "step": 37440 }, { "epoch": 0.15130273880177927, "grad_norm": 1147.0633544921875, "learning_rate": 4.9623218557707694e-05, "loss": 80.5338, "step": 37450 }, { "epoch": 0.15134314006714691, "grad_norm": 1153.3177490234375, "learning_rate": 4.9622614571264715e-05, "loss": 115.0096, "step": 37460 }, { "epoch": 0.15138354133251453, "grad_norm": 560.9694213867188, "learning_rate": 4.962201010479119e-05, "loss": 86.4352, "step": 37470 }, { "epoch": 0.15142394259788217, "grad_norm": 584.9893188476562, "learning_rate": 4.96214051582989e-05, "loss": 63.2892, "step": 37480 }, { "epoch": 0.1514643438632498, "grad_norm": 573.4566650390625, "learning_rate": 4.962079973179963e-05, "loss": 71.3577, "step": 37490 }, { "epoch": 0.15150474512861742, "grad_norm": 696.969482421875, "learning_rate": 4.962019382530521e-05, "loss": 111.7284, "step": 37500 }, { "epoch": 0.15154514639398506, "grad_norm": 674.3920288085938, "learning_rate": 4.961958743882742e-05, "loss": 82.3515, "step": 37510 }, { "epoch": 0.1515855476593527, "grad_norm": 835.2359008789062, "learning_rate": 4.96189805723781e-05, "loss": 81.7163, "step": 37520 }, { "epoch": 0.1516259489247203, "grad_norm": 1307.296142578125, "learning_rate": 4.96183732259691e-05, "loss": 92.0704, "step": 37530 }, { "epoch": 0.15166635019008795, "grad_norm": 808.1261596679688, "learning_rate": 4.961776539961222e-05, "loss": 82.1407, "step": 37540 }, { "epoch": 0.1517067514554556, "grad_norm": 1993.1292724609375, "learning_rate": 4.9617157093319326e-05, "loss": 85.8768, "step": 37550 }, { "epoch": 0.1517471527208232, "grad_norm": 784.2838134765625, "learning_rate": 4.961654830710229e-05, "loss": 92.2036, "step": 37560 }, { "epoch": 0.15178755398619084, "grad_norm": 412.1575622558594, "learning_rate": 4.961593904097297e-05, "loss": 82.7349, "step": 37570 }, { "epoch": 0.15182795525155848, "grad_norm": 726.8958129882812, "learning_rate": 4.961532929494325e-05, "loss": 71.485, "step": 37580 }, { "epoch": 0.15186835651692612, "grad_norm": 1616.0050048828125, "learning_rate": 4.9614719069025e-05, "loss": 65.669, "step": 37590 }, { "epoch": 0.15190875778229374, "grad_norm": 1195.6494140625, "learning_rate": 4.9614108363230135e-05, "loss": 120.262, "step": 37600 }, { "epoch": 0.15194915904766138, "grad_norm": 785.0847778320312, "learning_rate": 4.961349717757056e-05, "loss": 55.0058, "step": 37610 }, { "epoch": 0.15198956031302902, "grad_norm": 367.303466796875, "learning_rate": 4.961288551205818e-05, "loss": 85.8025, "step": 37620 }, { "epoch": 0.15202996157839663, "grad_norm": 1064.3392333984375, "learning_rate": 4.961227336670493e-05, "loss": 120.8695, "step": 37630 }, { "epoch": 0.15207036284376427, "grad_norm": 963.6973876953125, "learning_rate": 4.961166074152274e-05, "loss": 119.0628, "step": 37640 }, { "epoch": 0.1521107641091319, "grad_norm": 1672.767333984375, "learning_rate": 4.961104763652355e-05, "loss": 100.2525, "step": 37650 }, { "epoch": 0.15215116537449952, "grad_norm": 850.4930419921875, "learning_rate": 4.961043405171931e-05, "loss": 63.3998, "step": 37660 }, { "epoch": 0.15219156663986716, "grad_norm": 1047.08935546875, "learning_rate": 4.9609819987122e-05, "loss": 78.7605, "step": 37670 }, { "epoch": 0.1522319679052348, "grad_norm": 689.37744140625, "learning_rate": 4.9609205442743566e-05, "loss": 100.5812, "step": 37680 }, { "epoch": 0.1522723691706024, "grad_norm": 686.3281860351562, "learning_rate": 4.9608590418596016e-05, "loss": 58.7595, "step": 37690 }, { "epoch": 0.15231277043597005, "grad_norm": 992.6712036132812, "learning_rate": 4.9607974914691316e-05, "loss": 117.9191, "step": 37700 }, { "epoch": 0.1523531717013377, "grad_norm": 518.0152587890625, "learning_rate": 4.960735893104148e-05, "loss": 74.092, "step": 37710 }, { "epoch": 0.1523935729667053, "grad_norm": 0.0, "learning_rate": 4.960674246765851e-05, "loss": 74.5977, "step": 37720 }, { "epoch": 0.15243397423207294, "grad_norm": 524.2943115234375, "learning_rate": 4.9606125524554434e-05, "loss": 47.5876, "step": 37730 }, { "epoch": 0.15247437549744058, "grad_norm": 1206.4256591796875, "learning_rate": 4.960550810174126e-05, "loss": 84.7391, "step": 37740 }, { "epoch": 0.15251477676280822, "grad_norm": 762.7168579101562, "learning_rate": 4.960489019923105e-05, "loss": 67.8448, "step": 37750 }, { "epoch": 0.15255517802817584, "grad_norm": 1341.15869140625, "learning_rate": 4.9604271817035834e-05, "loss": 112.8694, "step": 37760 }, { "epoch": 0.15259557929354348, "grad_norm": 603.6417236328125, "learning_rate": 4.960365295516767e-05, "loss": 123.1844, "step": 37770 }, { "epoch": 0.15263598055891112, "grad_norm": 868.97998046875, "learning_rate": 4.9603033613638626e-05, "loss": 75.7884, "step": 37780 }, { "epoch": 0.15267638182427873, "grad_norm": 696.1629638671875, "learning_rate": 4.9602413792460776e-05, "loss": 82.7375, "step": 37790 }, { "epoch": 0.15271678308964637, "grad_norm": 713.6837768554688, "learning_rate": 4.960179349164621e-05, "loss": 60.8608, "step": 37800 }, { "epoch": 0.152757184355014, "grad_norm": 903.1224365234375, "learning_rate": 4.9601172711207005e-05, "loss": 111.0102, "step": 37810 }, { "epoch": 0.15279758562038162, "grad_norm": 1082.6353759765625, "learning_rate": 4.9600551451155274e-05, "loss": 102.1816, "step": 37820 }, { "epoch": 0.15283798688574926, "grad_norm": 1227.940673828125, "learning_rate": 4.959992971150313e-05, "loss": 98.8093, "step": 37830 }, { "epoch": 0.1528783881511169, "grad_norm": 1349.7529296875, "learning_rate": 4.959930749226269e-05, "loss": 99.6163, "step": 37840 }, { "epoch": 0.1529187894164845, "grad_norm": 1902.737548828125, "learning_rate": 4.9598684793446085e-05, "loss": 108.2246, "step": 37850 }, { "epoch": 0.15295919068185215, "grad_norm": 888.9658203125, "learning_rate": 4.959806161506545e-05, "loss": 79.167, "step": 37860 }, { "epoch": 0.1529995919472198, "grad_norm": 984.05322265625, "learning_rate": 4.9597437957132955e-05, "loss": 76.7845, "step": 37870 }, { "epoch": 0.1530399932125874, "grad_norm": 849.2679443359375, "learning_rate": 4.959681381966073e-05, "loss": 122.3138, "step": 37880 }, { "epoch": 0.15308039447795505, "grad_norm": 725.888916015625, "learning_rate": 4.959618920266096e-05, "loss": 73.6324, "step": 37890 }, { "epoch": 0.15312079574332269, "grad_norm": 792.6710205078125, "learning_rate": 4.959556410614582e-05, "loss": 54.1202, "step": 37900 }, { "epoch": 0.15316119700869033, "grad_norm": 583.5054931640625, "learning_rate": 4.959493853012749e-05, "loss": 81.214, "step": 37910 }, { "epoch": 0.15320159827405794, "grad_norm": 793.6455688476562, "learning_rate": 4.9594312474618175e-05, "loss": 70.9016, "step": 37920 }, { "epoch": 0.15324199953942558, "grad_norm": 1003.0465087890625, "learning_rate": 4.959368593963007e-05, "loss": 117.324, "step": 37930 }, { "epoch": 0.15328240080479322, "grad_norm": 450.6336364746094, "learning_rate": 4.9593058925175406e-05, "loss": 95.6128, "step": 37940 }, { "epoch": 0.15332280207016083, "grad_norm": 674.2539672851562, "learning_rate": 4.959243143126639e-05, "loss": 86.3896, "step": 37950 }, { "epoch": 0.15336320333552847, "grad_norm": 396.0630798339844, "learning_rate": 4.959180345791528e-05, "loss": 80.5643, "step": 37960 }, { "epoch": 0.1534036046008961, "grad_norm": 601.1489868164062, "learning_rate": 4.9591175005134286e-05, "loss": 105.7729, "step": 37970 }, { "epoch": 0.15344400586626372, "grad_norm": 884.35693359375, "learning_rate": 4.959054607293567e-05, "loss": 100.5772, "step": 37980 }, { "epoch": 0.15348440713163136, "grad_norm": 670.9600830078125, "learning_rate": 4.95899166613317e-05, "loss": 75.9151, "step": 37990 }, { "epoch": 0.153524808396999, "grad_norm": 1574.9869384765625, "learning_rate": 4.9589286770334654e-05, "loss": 74.2453, "step": 38000 }, { "epoch": 0.15356520966236661, "grad_norm": 1199.295654296875, "learning_rate": 4.958865639995679e-05, "loss": 89.8758, "step": 38010 }, { "epoch": 0.15360561092773425, "grad_norm": 605.9471435546875, "learning_rate": 4.958802555021042e-05, "loss": 96.6403, "step": 38020 }, { "epoch": 0.1536460121931019, "grad_norm": 1894.856201171875, "learning_rate": 4.958739422110783e-05, "loss": 87.7068, "step": 38030 }, { "epoch": 0.1536864134584695, "grad_norm": 1081.8231201171875, "learning_rate": 4.9586762412661333e-05, "loss": 88.5522, "step": 38040 }, { "epoch": 0.15372681472383715, "grad_norm": 452.8377685546875, "learning_rate": 4.958613012488324e-05, "loss": 75.0825, "step": 38050 }, { "epoch": 0.1537672159892048, "grad_norm": 855.8710327148438, "learning_rate": 4.958549735778589e-05, "loss": 106.2082, "step": 38060 }, { "epoch": 0.15380761725457243, "grad_norm": 0.0, "learning_rate": 4.958486411138161e-05, "loss": 50.9362, "step": 38070 }, { "epoch": 0.15384801851994004, "grad_norm": 677.1991577148438, "learning_rate": 4.958423038568274e-05, "loss": 95.3129, "step": 38080 }, { "epoch": 0.15388841978530768, "grad_norm": 470.16778564453125, "learning_rate": 4.958359618070165e-05, "loss": 92.6209, "step": 38090 }, { "epoch": 0.15392882105067532, "grad_norm": 839.3685302734375, "learning_rate": 4.958296149645069e-05, "loss": 96.7531, "step": 38100 }, { "epoch": 0.15396922231604293, "grad_norm": 1347.764892578125, "learning_rate": 4.9582326332942244e-05, "loss": 85.9113, "step": 38110 }, { "epoch": 0.15400962358141057, "grad_norm": 452.386962890625, "learning_rate": 4.958169069018869e-05, "loss": 104.578, "step": 38120 }, { "epoch": 0.1540500248467782, "grad_norm": 1746.528076171875, "learning_rate": 4.958105456820242e-05, "loss": 81.6607, "step": 38130 }, { "epoch": 0.15409042611214582, "grad_norm": 862.51708984375, "learning_rate": 4.958041796699583e-05, "loss": 92.2541, "step": 38140 }, { "epoch": 0.15413082737751346, "grad_norm": 1134.7611083984375, "learning_rate": 4.957978088658134e-05, "loss": 85.8542, "step": 38150 }, { "epoch": 0.1541712286428811, "grad_norm": 737.6195068359375, "learning_rate": 4.957914332697137e-05, "loss": 84.3832, "step": 38160 }, { "epoch": 0.15421162990824872, "grad_norm": 614.6725463867188, "learning_rate": 4.957850528817834e-05, "loss": 83.8153, "step": 38170 }, { "epoch": 0.15425203117361636, "grad_norm": 848.0565795898438, "learning_rate": 4.957786677021471e-05, "loss": 72.8551, "step": 38180 }, { "epoch": 0.154292432438984, "grad_norm": 1637.4676513671875, "learning_rate": 4.9577227773092904e-05, "loss": 69.5406, "step": 38190 }, { "epoch": 0.1543328337043516, "grad_norm": 1447.67431640625, "learning_rate": 4.9576588296825386e-05, "loss": 60.8636, "step": 38200 }, { "epoch": 0.15437323496971925, "grad_norm": 815.7072143554688, "learning_rate": 4.9575948341424634e-05, "loss": 86.5249, "step": 38210 }, { "epoch": 0.1544136362350869, "grad_norm": 455.055419921875, "learning_rate": 4.957530790690311e-05, "loss": 87.8417, "step": 38220 }, { "epoch": 0.15445403750045453, "grad_norm": 1008.4957275390625, "learning_rate": 4.957466699327331e-05, "loss": 111.3395, "step": 38230 }, { "epoch": 0.15449443876582214, "grad_norm": 0.0, "learning_rate": 4.957402560054773e-05, "loss": 127.6305, "step": 38240 }, { "epoch": 0.15453484003118978, "grad_norm": 2538.266357421875, "learning_rate": 4.957338372873886e-05, "loss": 126.8811, "step": 38250 }, { "epoch": 0.15457524129655742, "grad_norm": 765.6140747070312, "learning_rate": 4.957274137785922e-05, "loss": 98.6367, "step": 38260 }, { "epoch": 0.15461564256192503, "grad_norm": 1128.1064453125, "learning_rate": 4.957209854792135e-05, "loss": 115.5567, "step": 38270 }, { "epoch": 0.15465604382729267, "grad_norm": 1656.940673828125, "learning_rate": 4.957145523893776e-05, "loss": 96.0133, "step": 38280 }, { "epoch": 0.1546964450926603, "grad_norm": 776.5398559570312, "learning_rate": 4.9570811450921e-05, "loss": 96.1438, "step": 38290 }, { "epoch": 0.15473684635802792, "grad_norm": 541.0315551757812, "learning_rate": 4.957016718388362e-05, "loss": 94.2679, "step": 38300 }, { "epoch": 0.15477724762339556, "grad_norm": 1425.260498046875, "learning_rate": 4.956952243783818e-05, "loss": 85.9477, "step": 38310 }, { "epoch": 0.1548176488887632, "grad_norm": 495.0509948730469, "learning_rate": 4.956887721279726e-05, "loss": 128.2712, "step": 38320 }, { "epoch": 0.15485805015413082, "grad_norm": 1667.0662841796875, "learning_rate": 4.956823150877342e-05, "loss": 81.26, "step": 38330 }, { "epoch": 0.15489845141949846, "grad_norm": 483.2498474121094, "learning_rate": 4.956758532577926e-05, "loss": 76.7887, "step": 38340 }, { "epoch": 0.1549388526848661, "grad_norm": 940.1998901367188, "learning_rate": 4.9566938663827377e-05, "loss": 62.8369, "step": 38350 }, { "epoch": 0.1549792539502337, "grad_norm": 826.6288452148438, "learning_rate": 4.9566291522930375e-05, "loss": 82.23, "step": 38360 }, { "epoch": 0.15501965521560135, "grad_norm": 596.3945922851562, "learning_rate": 4.956564390310088e-05, "loss": 68.8034, "step": 38370 }, { "epoch": 0.155060056480969, "grad_norm": 843.1058349609375, "learning_rate": 4.95649958043515e-05, "loss": 92.2614, "step": 38380 }, { "epoch": 0.15510045774633663, "grad_norm": 368.0083923339844, "learning_rate": 4.956434722669489e-05, "loss": 80.4238, "step": 38390 }, { "epoch": 0.15514085901170424, "grad_norm": 527.663330078125, "learning_rate": 4.9563698170143666e-05, "loss": 98.6083, "step": 38400 }, { "epoch": 0.15518126027707188, "grad_norm": 847.8034057617188, "learning_rate": 4.9563048634710516e-05, "loss": 154.9312, "step": 38410 }, { "epoch": 0.15522166154243952, "grad_norm": 872.8273315429688, "learning_rate": 4.956239862040808e-05, "loss": 89.4222, "step": 38420 }, { "epoch": 0.15526206280780713, "grad_norm": 2756.822509765625, "learning_rate": 4.956174812724904e-05, "loss": 77.4832, "step": 38430 }, { "epoch": 0.15530246407317477, "grad_norm": 405.55841064453125, "learning_rate": 4.956109715524608e-05, "loss": 104.6995, "step": 38440 }, { "epoch": 0.1553428653385424, "grad_norm": 514.676513671875, "learning_rate": 4.956044570441188e-05, "loss": 112.402, "step": 38450 }, { "epoch": 0.15538326660391003, "grad_norm": 867.9771118164062, "learning_rate": 4.955979377475915e-05, "loss": 72.6036, "step": 38460 }, { "epoch": 0.15542366786927767, "grad_norm": 407.26898193359375, "learning_rate": 4.9559141366300594e-05, "loss": 89.936, "step": 38470 }, { "epoch": 0.1554640691346453, "grad_norm": 906.7838745117188, "learning_rate": 4.955848847904894e-05, "loss": 111.0247, "step": 38480 }, { "epoch": 0.15550447040001292, "grad_norm": 907.695068359375, "learning_rate": 4.955783511301689e-05, "loss": 87.2485, "step": 38490 }, { "epoch": 0.15554487166538056, "grad_norm": 1016.2330932617188, "learning_rate": 4.9557181268217227e-05, "loss": 130.9673, "step": 38500 }, { "epoch": 0.1555852729307482, "grad_norm": 1297.313232421875, "learning_rate": 4.955652694466265e-05, "loss": 104.8052, "step": 38510 }, { "epoch": 0.1556256741961158, "grad_norm": 704.80126953125, "learning_rate": 4.9555872142365945e-05, "loss": 97.9365, "step": 38520 }, { "epoch": 0.15566607546148345, "grad_norm": 1771.6796875, "learning_rate": 4.9555216861339876e-05, "loss": 74.8663, "step": 38530 }, { "epoch": 0.1557064767268511, "grad_norm": 527.6610717773438, "learning_rate": 4.9554561101597206e-05, "loss": 99.9333, "step": 38540 }, { "epoch": 0.15574687799221873, "grad_norm": 380.7573547363281, "learning_rate": 4.955390486315073e-05, "loss": 72.2091, "step": 38550 }, { "epoch": 0.15578727925758634, "grad_norm": 1313.071044921875, "learning_rate": 4.955324814601324e-05, "loss": 84.0544, "step": 38560 }, { "epoch": 0.15582768052295398, "grad_norm": 879.2240600585938, "learning_rate": 4.955259095019753e-05, "loss": 100.0556, "step": 38570 }, { "epoch": 0.15586808178832162, "grad_norm": 1069.8026123046875, "learning_rate": 4.955193327571642e-05, "loss": 60.2459, "step": 38580 }, { "epoch": 0.15590848305368923, "grad_norm": 1302.9310302734375, "learning_rate": 4.955127512258273e-05, "loss": 92.8039, "step": 38590 }, { "epoch": 0.15594888431905687, "grad_norm": 1001.041259765625, "learning_rate": 4.95506164908093e-05, "loss": 110.6155, "step": 38600 }, { "epoch": 0.15598928558442451, "grad_norm": 895.0310668945312, "learning_rate": 4.954995738040895e-05, "loss": 107.8179, "step": 38610 }, { "epoch": 0.15602968684979213, "grad_norm": 823.3916625976562, "learning_rate": 4.954929779139455e-05, "loss": 78.2662, "step": 38620 }, { "epoch": 0.15607008811515977, "grad_norm": 409.09527587890625, "learning_rate": 4.954863772377894e-05, "loss": 123.7662, "step": 38630 }, { "epoch": 0.1561104893805274, "grad_norm": 569.7661743164062, "learning_rate": 4.9547977177575014e-05, "loss": 89.6015, "step": 38640 }, { "epoch": 0.15615089064589502, "grad_norm": 616.7923583984375, "learning_rate": 4.954731615279563e-05, "loss": 94.4896, "step": 38650 }, { "epoch": 0.15619129191126266, "grad_norm": 479.69647216796875, "learning_rate": 4.9546654649453675e-05, "loss": 66.053, "step": 38660 }, { "epoch": 0.1562316931766303, "grad_norm": 2162.659912109375, "learning_rate": 4.954599266756205e-05, "loss": 138.2849, "step": 38670 }, { "epoch": 0.1562720944419979, "grad_norm": 729.6651000976562, "learning_rate": 4.9545330207133664e-05, "loss": 107.9969, "step": 38680 }, { "epoch": 0.15631249570736555, "grad_norm": 1174.257568359375, "learning_rate": 4.9544667268181436e-05, "loss": 94.6718, "step": 38690 }, { "epoch": 0.1563528969727332, "grad_norm": 638.7692260742188, "learning_rate": 4.9544003850718266e-05, "loss": 100.2124, "step": 38700 }, { "epoch": 0.15639329823810083, "grad_norm": 991.5917358398438, "learning_rate": 4.954333995475712e-05, "loss": 58.1896, "step": 38710 }, { "epoch": 0.15643369950346844, "grad_norm": 2954.35302734375, "learning_rate": 4.954267558031092e-05, "loss": 65.0267, "step": 38720 }, { "epoch": 0.15647410076883608, "grad_norm": 1842.9337158203125, "learning_rate": 4.954201072739262e-05, "loss": 92.5574, "step": 38730 }, { "epoch": 0.15651450203420372, "grad_norm": 1125.166748046875, "learning_rate": 4.9541345396015193e-05, "loss": 89.0116, "step": 38740 }, { "epoch": 0.15655490329957134, "grad_norm": 420.5506896972656, "learning_rate": 4.9540679586191605e-05, "loss": 91.9055, "step": 38750 }, { "epoch": 0.15659530456493898, "grad_norm": 1417.18115234375, "learning_rate": 4.9540013297934826e-05, "loss": 72.1004, "step": 38760 }, { "epoch": 0.15663570583030662, "grad_norm": 711.8256225585938, "learning_rate": 4.953934653125786e-05, "loss": 110.1404, "step": 38770 }, { "epoch": 0.15667610709567423, "grad_norm": 936.8480834960938, "learning_rate": 4.9538679286173696e-05, "loss": 63.1972, "step": 38780 }, { "epoch": 0.15671650836104187, "grad_norm": 965.4445190429688, "learning_rate": 4.953801156269534e-05, "loss": 96.635, "step": 38790 }, { "epoch": 0.1567569096264095, "grad_norm": 388.2055969238281, "learning_rate": 4.953734336083583e-05, "loss": 69.8237, "step": 38800 }, { "epoch": 0.15679731089177712, "grad_norm": 1032.505126953125, "learning_rate": 4.953667468060816e-05, "loss": 80.3915, "step": 38810 }, { "epoch": 0.15683771215714476, "grad_norm": 2164.00341796875, "learning_rate": 4.95360055220254e-05, "loss": 117.7873, "step": 38820 }, { "epoch": 0.1568781134225124, "grad_norm": 820.93310546875, "learning_rate": 4.9535335885100575e-05, "loss": 106.5048, "step": 38830 }, { "epoch": 0.15691851468788, "grad_norm": 513.0703735351562, "learning_rate": 4.953466576984675e-05, "loss": 67.942, "step": 38840 }, { "epoch": 0.15695891595324765, "grad_norm": 479.7278137207031, "learning_rate": 4.953399517627698e-05, "loss": 92.5827, "step": 38850 }, { "epoch": 0.1569993172186153, "grad_norm": 3824.49951171875, "learning_rate": 4.953332410440435e-05, "loss": 76.3334, "step": 38860 }, { "epoch": 0.15703971848398293, "grad_norm": 492.0223693847656, "learning_rate": 4.953265255424192e-05, "loss": 90.9314, "step": 38870 }, { "epoch": 0.15708011974935054, "grad_norm": 487.1667175292969, "learning_rate": 4.953198052580281e-05, "loss": 83.6485, "step": 38880 }, { "epoch": 0.15712052101471818, "grad_norm": 821.9052734375, "learning_rate": 4.953130801910011e-05, "loss": 66.1471, "step": 38890 }, { "epoch": 0.15716092228008582, "grad_norm": 501.9080810546875, "learning_rate": 4.953063503414692e-05, "loss": 59.9543, "step": 38900 }, { "epoch": 0.15720132354545344, "grad_norm": 1479.7249755859375, "learning_rate": 4.9529961570956383e-05, "loss": 83.3378, "step": 38910 }, { "epoch": 0.15724172481082108, "grad_norm": 636.5172119140625, "learning_rate": 4.952928762954161e-05, "loss": 94.2658, "step": 38920 }, { "epoch": 0.15728212607618872, "grad_norm": 1404.88134765625, "learning_rate": 4.952861320991575e-05, "loss": 86.6847, "step": 38930 }, { "epoch": 0.15732252734155633, "grad_norm": 1033.673828125, "learning_rate": 4.952793831209195e-05, "loss": 109.079, "step": 38940 }, { "epoch": 0.15736292860692397, "grad_norm": 990.5618896484375, "learning_rate": 4.952726293608335e-05, "loss": 80.7069, "step": 38950 }, { "epoch": 0.1574033298722916, "grad_norm": 675.2631225585938, "learning_rate": 4.9526587081903145e-05, "loss": 81.1813, "step": 38960 }, { "epoch": 0.15744373113765922, "grad_norm": 911.697509765625, "learning_rate": 4.9525910749564494e-05, "loss": 107.4708, "step": 38970 }, { "epoch": 0.15748413240302686, "grad_norm": 506.453369140625, "learning_rate": 4.952523393908059e-05, "loss": 94.5604, "step": 38980 }, { "epoch": 0.1575245336683945, "grad_norm": 1586.1180419921875, "learning_rate": 4.9524556650464616e-05, "loss": 97.9683, "step": 38990 }, { "epoch": 0.1575649349337621, "grad_norm": 525.472900390625, "learning_rate": 4.952387888372979e-05, "loss": 126.3809, "step": 39000 }, { "epoch": 0.15760533619912975, "grad_norm": 2915.437255859375, "learning_rate": 4.952320063888932e-05, "loss": 100.4383, "step": 39010 }, { "epoch": 0.1576457374644974, "grad_norm": 656.8854370117188, "learning_rate": 4.952252191595643e-05, "loss": 41.7866, "step": 39020 }, { "epoch": 0.15768613872986503, "grad_norm": 1504.2880859375, "learning_rate": 4.9521842714944345e-05, "loss": 95.3066, "step": 39030 }, { "epoch": 0.15772653999523265, "grad_norm": 425.61871337890625, "learning_rate": 4.952116303586631e-05, "loss": 89.983, "step": 39040 }, { "epoch": 0.15776694126060029, "grad_norm": 1021.040771484375, "learning_rate": 4.952048287873558e-05, "loss": 103.6552, "step": 39050 }, { "epoch": 0.15780734252596793, "grad_norm": 770.2539672851562, "learning_rate": 4.9519802243565414e-05, "loss": 88.245, "step": 39060 }, { "epoch": 0.15784774379133554, "grad_norm": 509.4902038574219, "learning_rate": 4.951912113036908e-05, "loss": 125.9709, "step": 39070 }, { "epoch": 0.15788814505670318, "grad_norm": 577.9376831054688, "learning_rate": 4.951843953915985e-05, "loss": 56.5803, "step": 39080 }, { "epoch": 0.15792854632207082, "grad_norm": 1800.318115234375, "learning_rate": 4.951775746995102e-05, "loss": 126.1482, "step": 39090 }, { "epoch": 0.15796894758743843, "grad_norm": 644.34521484375, "learning_rate": 4.951707492275589e-05, "loss": 62.9948, "step": 39100 }, { "epoch": 0.15800934885280607, "grad_norm": 2003.1749267578125, "learning_rate": 4.9516391897587764e-05, "loss": 83.1591, "step": 39110 }, { "epoch": 0.1580497501181737, "grad_norm": 663.0642700195312, "learning_rate": 4.951570839445995e-05, "loss": 86.2755, "step": 39120 }, { "epoch": 0.15809015138354132, "grad_norm": 2192.492919921875, "learning_rate": 4.951502441338578e-05, "loss": 69.6609, "step": 39130 }, { "epoch": 0.15813055264890896, "grad_norm": 437.15411376953125, "learning_rate": 4.951433995437859e-05, "loss": 98.7007, "step": 39140 }, { "epoch": 0.1581709539142766, "grad_norm": 643.024658203125, "learning_rate": 4.951365501745172e-05, "loss": 69.265, "step": 39150 }, { "epoch": 0.15821135517964421, "grad_norm": 818.115234375, "learning_rate": 4.951296960261853e-05, "loss": 82.4836, "step": 39160 }, { "epoch": 0.15825175644501185, "grad_norm": 0.0, "learning_rate": 4.9512283709892374e-05, "loss": 64.5836, "step": 39170 }, { "epoch": 0.1582921577103795, "grad_norm": 476.3262634277344, "learning_rate": 4.951159733928663e-05, "loss": 113.6856, "step": 39180 }, { "epoch": 0.15833255897574713, "grad_norm": 617.4471435546875, "learning_rate": 4.9510910490814666e-05, "loss": 53.7248, "step": 39190 }, { "epoch": 0.15837296024111475, "grad_norm": 1832.762939453125, "learning_rate": 4.95102231644899e-05, "loss": 80.7921, "step": 39200 }, { "epoch": 0.1584133615064824, "grad_norm": 736.6915283203125, "learning_rate": 4.95095353603257e-05, "loss": 81.6178, "step": 39210 }, { "epoch": 0.15845376277185003, "grad_norm": 881.178466796875, "learning_rate": 4.9508847078335495e-05, "loss": 78.0822, "step": 39220 }, { "epoch": 0.15849416403721764, "grad_norm": 482.23944091796875, "learning_rate": 4.9508158318532696e-05, "loss": 83.8018, "step": 39230 }, { "epoch": 0.15853456530258528, "grad_norm": 717.8798217773438, "learning_rate": 4.9507469080930734e-05, "loss": 69.0619, "step": 39240 }, { "epoch": 0.15857496656795292, "grad_norm": 502.1875305175781, "learning_rate": 4.9506779365543046e-05, "loss": 53.7278, "step": 39250 }, { "epoch": 0.15861536783332053, "grad_norm": 437.0827331542969, "learning_rate": 4.950608917238308e-05, "loss": 79.8096, "step": 39260 }, { "epoch": 0.15865576909868817, "grad_norm": 1342.6405029296875, "learning_rate": 4.9505398501464284e-05, "loss": 78.4081, "step": 39270 }, { "epoch": 0.1586961703640558, "grad_norm": 849.6737670898438, "learning_rate": 4.9504707352800125e-05, "loss": 79.9384, "step": 39280 }, { "epoch": 0.15873657162942342, "grad_norm": 925.02392578125, "learning_rate": 4.95040157264041e-05, "loss": 59.0614, "step": 39290 }, { "epoch": 0.15877697289479106, "grad_norm": 1974.9837646484375, "learning_rate": 4.9503323622289655e-05, "loss": 91.3175, "step": 39300 }, { "epoch": 0.1588173741601587, "grad_norm": 1192.585693359375, "learning_rate": 4.950263104047031e-05, "loss": 62.4365, "step": 39310 }, { "epoch": 0.15885777542552632, "grad_norm": 1465.087158203125, "learning_rate": 4.9501937980959545e-05, "loss": 75.6416, "step": 39320 }, { "epoch": 0.15889817669089396, "grad_norm": 669.2102661132812, "learning_rate": 4.950124444377089e-05, "loss": 101.0025, "step": 39330 }, { "epoch": 0.1589385779562616, "grad_norm": 1937.0089111328125, "learning_rate": 4.950055042891786e-05, "loss": 82.6265, "step": 39340 }, { "epoch": 0.15897897922162924, "grad_norm": 1510.9599609375, "learning_rate": 4.949985593641399e-05, "loss": 117.1577, "step": 39350 }, { "epoch": 0.15901938048699685, "grad_norm": 1171.20703125, "learning_rate": 4.949916096627282e-05, "loss": 106.9967, "step": 39360 }, { "epoch": 0.1590597817523645, "grad_norm": 1075.4632568359375, "learning_rate": 4.949846551850788e-05, "loss": 110.2698, "step": 39370 }, { "epoch": 0.15910018301773213, "grad_norm": 1426.9444580078125, "learning_rate": 4.949776959313275e-05, "loss": 68.2341, "step": 39380 }, { "epoch": 0.15914058428309974, "grad_norm": 734.63525390625, "learning_rate": 4.9497073190160994e-05, "loss": 97.0607, "step": 39390 }, { "epoch": 0.15918098554846738, "grad_norm": 682.6970825195312, "learning_rate": 4.949637630960617e-05, "loss": 83.067, "step": 39400 }, { "epoch": 0.15922138681383502, "grad_norm": 239.862060546875, "learning_rate": 4.9495678951481896e-05, "loss": 93.8866, "step": 39410 }, { "epoch": 0.15926178807920263, "grad_norm": 1805.31591796875, "learning_rate": 4.949498111580174e-05, "loss": 81.097, "step": 39420 }, { "epoch": 0.15930218934457027, "grad_norm": 1094.3526611328125, "learning_rate": 4.949428280257932e-05, "loss": 122.2046, "step": 39430 }, { "epoch": 0.1593425906099379, "grad_norm": 395.03936767578125, "learning_rate": 4.949358401182824e-05, "loss": 80.3976, "step": 39440 }, { "epoch": 0.15938299187530552, "grad_norm": 896.0783081054688, "learning_rate": 4.949288474356213e-05, "loss": 100.0894, "step": 39450 }, { "epoch": 0.15942339314067316, "grad_norm": 1049.211669921875, "learning_rate": 4.9492184997794624e-05, "loss": 115.2995, "step": 39460 }, { "epoch": 0.1594637944060408, "grad_norm": 1161.3402099609375, "learning_rate": 4.949148477453936e-05, "loss": 81.019, "step": 39470 }, { "epoch": 0.15950419567140842, "grad_norm": 2834.7724609375, "learning_rate": 4.949078407381e-05, "loss": 96.1574, "step": 39480 }, { "epoch": 0.15954459693677606, "grad_norm": 808.9170532226562, "learning_rate": 4.949008289562019e-05, "loss": 79.7376, "step": 39490 }, { "epoch": 0.1595849982021437, "grad_norm": 530.290771484375, "learning_rate": 4.94893812399836e-05, "loss": 69.7185, "step": 39500 }, { "epoch": 0.1596253994675113, "grad_norm": 1201.916015625, "learning_rate": 4.9488679106913924e-05, "loss": 76.6368, "step": 39510 }, { "epoch": 0.15966580073287895, "grad_norm": 1412.6275634765625, "learning_rate": 4.948797649642484e-05, "loss": 64.3421, "step": 39520 }, { "epoch": 0.1597062019982466, "grad_norm": 927.9067993164062, "learning_rate": 4.9487273408530044e-05, "loss": 103.6948, "step": 39530 }, { "epoch": 0.15974660326361423, "grad_norm": 722.8312377929688, "learning_rate": 4.9486569843243244e-05, "loss": 76.603, "step": 39540 }, { "epoch": 0.15978700452898184, "grad_norm": 606.1805419921875, "learning_rate": 4.948586580057816e-05, "loss": 96.2999, "step": 39550 }, { "epoch": 0.15982740579434948, "grad_norm": 787.3512573242188, "learning_rate": 4.948516128054852e-05, "loss": 91.6384, "step": 39560 }, { "epoch": 0.15986780705971712, "grad_norm": 477.5809631347656, "learning_rate": 4.948445628316805e-05, "loss": 84.977, "step": 39570 }, { "epoch": 0.15990820832508473, "grad_norm": 2748.46630859375, "learning_rate": 4.94837508084505e-05, "loss": 113.5827, "step": 39580 }, { "epoch": 0.15994860959045237, "grad_norm": 1089.883544921875, "learning_rate": 4.948304485640963e-05, "loss": 110.2332, "step": 39590 }, { "epoch": 0.15998901085582, "grad_norm": 2496.34326171875, "learning_rate": 4.948233842705919e-05, "loss": 62.8154, "step": 39600 }, { "epoch": 0.16002941212118763, "grad_norm": 1067.39501953125, "learning_rate": 4.948163152041295e-05, "loss": 92.6578, "step": 39610 }, { "epoch": 0.16006981338655527, "grad_norm": 401.0006408691406, "learning_rate": 4.948092413648471e-05, "loss": 94.2235, "step": 39620 }, { "epoch": 0.1601102146519229, "grad_norm": 599.7701416015625, "learning_rate": 4.948021627528825e-05, "loss": 119.0192, "step": 39630 }, { "epoch": 0.16015061591729052, "grad_norm": 5756.70068359375, "learning_rate": 4.9479507936837364e-05, "loss": 117.7867, "step": 39640 }, { "epoch": 0.16019101718265816, "grad_norm": 1125.0499267578125, "learning_rate": 4.947879912114588e-05, "loss": 90.0595, "step": 39650 }, { "epoch": 0.1602314184480258, "grad_norm": 5096.72216796875, "learning_rate": 4.947808982822759e-05, "loss": 109.1604, "step": 39660 }, { "epoch": 0.1602718197133934, "grad_norm": 750.5372314453125, "learning_rate": 4.9477380058096343e-05, "loss": 75.6724, "step": 39670 }, { "epoch": 0.16031222097876105, "grad_norm": 721.302734375, "learning_rate": 4.947666981076597e-05, "loss": 76.214, "step": 39680 }, { "epoch": 0.1603526222441287, "grad_norm": 430.94671630859375, "learning_rate": 4.947595908625032e-05, "loss": 78.1926, "step": 39690 }, { "epoch": 0.16039302350949633, "grad_norm": 871.6290893554688, "learning_rate": 4.947524788456325e-05, "loss": 56.1409, "step": 39700 }, { "epoch": 0.16043342477486394, "grad_norm": 1097.3226318359375, "learning_rate": 4.9474536205718615e-05, "loss": 70.7656, "step": 39710 }, { "epoch": 0.16047382604023158, "grad_norm": 821.6055908203125, "learning_rate": 4.94738240497303e-05, "loss": 116.6263, "step": 39720 }, { "epoch": 0.16051422730559922, "grad_norm": 988.2919311523438, "learning_rate": 4.947311141661218e-05, "loss": 75.0355, "step": 39730 }, { "epoch": 0.16055462857096683, "grad_norm": 1874.0450439453125, "learning_rate": 4.947239830637815e-05, "loss": 93.3599, "step": 39740 }, { "epoch": 0.16059502983633447, "grad_norm": 1665.5904541015625, "learning_rate": 4.947168471904213e-05, "loss": 110.6867, "step": 39750 }, { "epoch": 0.16063543110170211, "grad_norm": 768.0989990234375, "learning_rate": 4.947097065461801e-05, "loss": 74.4268, "step": 39760 }, { "epoch": 0.16067583236706973, "grad_norm": 1051.240234375, "learning_rate": 4.947025611311972e-05, "loss": 114.7893, "step": 39770 }, { "epoch": 0.16071623363243737, "grad_norm": 4345.51953125, "learning_rate": 4.946954109456118e-05, "loss": 108.7796, "step": 39780 }, { "epoch": 0.160756634897805, "grad_norm": 1246.1304931640625, "learning_rate": 4.946882559895635e-05, "loss": 58.0561, "step": 39790 }, { "epoch": 0.16079703616317262, "grad_norm": 3426.0712890625, "learning_rate": 4.946810962631916e-05, "loss": 86.2152, "step": 39800 }, { "epoch": 0.16083743742854026, "grad_norm": 971.04052734375, "learning_rate": 4.9467393176663576e-05, "loss": 80.0207, "step": 39810 }, { "epoch": 0.1608778386939079, "grad_norm": 791.5706787109375, "learning_rate": 4.9466676250003576e-05, "loss": 83.3073, "step": 39820 }, { "epoch": 0.1609182399592755, "grad_norm": 1009.4277954101562, "learning_rate": 4.9465958846353114e-05, "loss": 81.5609, "step": 39830 }, { "epoch": 0.16095864122464315, "grad_norm": 645.2051391601562, "learning_rate": 4.9465240965726195e-05, "loss": 110.7899, "step": 39840 }, { "epoch": 0.1609990424900108, "grad_norm": 2333.77294921875, "learning_rate": 4.9464522608136805e-05, "loss": 67.2439, "step": 39850 }, { "epoch": 0.16103944375537843, "grad_norm": 657.8132934570312, "learning_rate": 4.946380377359895e-05, "loss": 60.2396, "step": 39860 }, { "epoch": 0.16107984502074604, "grad_norm": 1080.5504150390625, "learning_rate": 4.9463084462126655e-05, "loss": 101.5745, "step": 39870 }, { "epoch": 0.16112024628611368, "grad_norm": 550.1509399414062, "learning_rate": 4.946236467373392e-05, "loss": 76.9606, "step": 39880 }, { "epoch": 0.16116064755148132, "grad_norm": 686.2342529296875, "learning_rate": 4.94616444084348e-05, "loss": 87.1771, "step": 39890 }, { "epoch": 0.16120104881684894, "grad_norm": 563.68017578125, "learning_rate": 4.946092366624333e-05, "loss": 90.5833, "step": 39900 }, { "epoch": 0.16124145008221658, "grad_norm": 508.9067687988281, "learning_rate": 4.946020244717355e-05, "loss": 110.8761, "step": 39910 }, { "epoch": 0.16128185134758422, "grad_norm": 447.7995910644531, "learning_rate": 4.945948075123954e-05, "loss": 84.4401, "step": 39920 }, { "epoch": 0.16132225261295183, "grad_norm": 4074.44482421875, "learning_rate": 4.9458758578455354e-05, "loss": 130.2105, "step": 39930 }, { "epoch": 0.16136265387831947, "grad_norm": 434.82470703125, "learning_rate": 4.945803592883509e-05, "loss": 76.4253, "step": 39940 }, { "epoch": 0.1614030551436871, "grad_norm": 761.01123046875, "learning_rate": 4.945731280239281e-05, "loss": 106.6753, "step": 39950 }, { "epoch": 0.16144345640905472, "grad_norm": 1182.587646484375, "learning_rate": 4.9456589199142637e-05, "loss": 101.2267, "step": 39960 }, { "epoch": 0.16148385767442236, "grad_norm": 1377.45947265625, "learning_rate": 4.945586511909865e-05, "loss": 135.843, "step": 39970 }, { "epoch": 0.16152425893979, "grad_norm": 1164.2666015625, "learning_rate": 4.9455140562274995e-05, "loss": 104.891, "step": 39980 }, { "epoch": 0.1615646602051576, "grad_norm": 602.760498046875, "learning_rate": 4.9454415528685785e-05, "loss": 117.2372, "step": 39990 }, { "epoch": 0.16160506147052525, "grad_norm": 1094.737548828125, "learning_rate": 4.9453690018345144e-05, "loss": 85.9165, "step": 40000 }, { "epoch": 0.1616454627358929, "grad_norm": 599.16015625, "learning_rate": 4.9452964031267236e-05, "loss": 69.1269, "step": 40010 }, { "epoch": 0.16168586400126053, "grad_norm": 613.2879028320312, "learning_rate": 4.9452237567466194e-05, "loss": 88.8299, "step": 40020 }, { "epoch": 0.16172626526662814, "grad_norm": 735.5689086914062, "learning_rate": 4.9451510626956196e-05, "loss": 76.922, "step": 40030 }, { "epoch": 0.16176666653199578, "grad_norm": 696.985595703125, "learning_rate": 4.945078320975142e-05, "loss": 79.6425, "step": 40040 }, { "epoch": 0.16180706779736342, "grad_norm": 1030.8795166015625, "learning_rate": 4.9450055315866026e-05, "loss": 104.0844, "step": 40050 }, { "epoch": 0.16184746906273104, "grad_norm": 3068.77294921875, "learning_rate": 4.944932694531422e-05, "loss": 85.7902, "step": 40060 }, { "epoch": 0.16188787032809868, "grad_norm": 483.2486267089844, "learning_rate": 4.94485980981102e-05, "loss": 63.3755, "step": 40070 }, { "epoch": 0.16192827159346632, "grad_norm": 2090.39208984375, "learning_rate": 4.9447868774268166e-05, "loss": 83.095, "step": 40080 }, { "epoch": 0.16196867285883393, "grad_norm": 837.720947265625, "learning_rate": 4.944713897380235e-05, "loss": 68.0856, "step": 40090 }, { "epoch": 0.16200907412420157, "grad_norm": 3449.74267578125, "learning_rate": 4.9446408696726974e-05, "loss": 80.644, "step": 40100 }, { "epoch": 0.1620494753895692, "grad_norm": 532.7826538085938, "learning_rate": 4.944567794305627e-05, "loss": 72.2116, "step": 40110 }, { "epoch": 0.16208987665493682, "grad_norm": 518.95556640625, "learning_rate": 4.9444946712804494e-05, "loss": 95.1311, "step": 40120 }, { "epoch": 0.16213027792030446, "grad_norm": 2341.940673828125, "learning_rate": 4.944421500598589e-05, "loss": 101.21, "step": 40130 }, { "epoch": 0.1621706791856721, "grad_norm": 694.1212158203125, "learning_rate": 4.944348282261474e-05, "loss": 124.4218, "step": 40140 }, { "epoch": 0.1622110804510397, "grad_norm": 900.7708129882812, "learning_rate": 4.9442750162705295e-05, "loss": 89.3225, "step": 40150 }, { "epoch": 0.16225148171640735, "grad_norm": 1264.3707275390625, "learning_rate": 4.9442017026271864e-05, "loss": 116.5199, "step": 40160 }, { "epoch": 0.162291882981775, "grad_norm": 853.1660766601562, "learning_rate": 4.944128341332872e-05, "loss": 101.1801, "step": 40170 }, { "epoch": 0.16233228424714263, "grad_norm": 696.7931518554688, "learning_rate": 4.9440549323890176e-05, "loss": 81.5634, "step": 40180 }, { "epoch": 0.16237268551251025, "grad_norm": 1033.1129150390625, "learning_rate": 4.9439814757970535e-05, "loss": 79.6033, "step": 40190 }, { "epoch": 0.16241308677787789, "grad_norm": 605.3699951171875, "learning_rate": 4.9439079715584135e-05, "loss": 107.7314, "step": 40200 }, { "epoch": 0.16245348804324553, "grad_norm": 3663.65380859375, "learning_rate": 4.943834419674529e-05, "loss": 97.9108, "step": 40210 }, { "epoch": 0.16249388930861314, "grad_norm": 504.4712829589844, "learning_rate": 4.9437608201468336e-05, "loss": 79.6529, "step": 40220 }, { "epoch": 0.16253429057398078, "grad_norm": 759.597412109375, "learning_rate": 4.9436871729767634e-05, "loss": 65.1314, "step": 40230 }, { "epoch": 0.16257469183934842, "grad_norm": 810.0010986328125, "learning_rate": 4.943613478165753e-05, "loss": 76.6986, "step": 40240 }, { "epoch": 0.16261509310471603, "grad_norm": 701.2088623046875, "learning_rate": 4.94353973571524e-05, "loss": 59.842, "step": 40250 }, { "epoch": 0.16265549437008367, "grad_norm": 460.05841064453125, "learning_rate": 4.943465945626662e-05, "loss": 74.4412, "step": 40260 }, { "epoch": 0.1626958956354513, "grad_norm": 1123.1805419921875, "learning_rate": 4.943392107901458e-05, "loss": 141.1575, "step": 40270 }, { "epoch": 0.16273629690081892, "grad_norm": 524.522705078125, "learning_rate": 4.943318222541066e-05, "loss": 117.3683, "step": 40280 }, { "epoch": 0.16277669816618656, "grad_norm": 406.4377136230469, "learning_rate": 4.943244289546928e-05, "loss": 68.4648, "step": 40290 }, { "epoch": 0.1628170994315542, "grad_norm": 752.1702880859375, "learning_rate": 4.943170308920484e-05, "loss": 74.2303, "step": 40300 }, { "epoch": 0.16285750069692181, "grad_norm": 1674.6246337890625, "learning_rate": 4.943096280663178e-05, "loss": 123.305, "step": 40310 }, { "epoch": 0.16289790196228945, "grad_norm": 1532.0059814453125, "learning_rate": 4.9430222047764506e-05, "loss": 82.7036, "step": 40320 }, { "epoch": 0.1629383032276571, "grad_norm": 723.3096923828125, "learning_rate": 4.942948081261749e-05, "loss": 131.268, "step": 40330 }, { "epoch": 0.16297870449302473, "grad_norm": 1614.462646484375, "learning_rate": 4.942873910120516e-05, "loss": 109.0338, "step": 40340 }, { "epoch": 0.16301910575839235, "grad_norm": 720.302734375, "learning_rate": 4.9427996913542e-05, "loss": 63.5732, "step": 40350 }, { "epoch": 0.16305950702376, "grad_norm": 1045.8563232421875, "learning_rate": 4.9427254249642444e-05, "loss": 96.6388, "step": 40360 }, { "epoch": 0.16309990828912763, "grad_norm": 983.675048828125, "learning_rate": 4.9426511109521e-05, "loss": 66.8883, "step": 40370 }, { "epoch": 0.16314030955449524, "grad_norm": 668.4130249023438, "learning_rate": 4.9425767493192144e-05, "loss": 107.8524, "step": 40380 }, { "epoch": 0.16318071081986288, "grad_norm": 542.26318359375, "learning_rate": 4.942502340067038e-05, "loss": 64.179, "step": 40390 }, { "epoch": 0.16322111208523052, "grad_norm": 931.4650268554688, "learning_rate": 4.942427883197021e-05, "loss": 120.4847, "step": 40400 }, { "epoch": 0.16326151335059813, "grad_norm": 606.0546264648438, "learning_rate": 4.942353378710614e-05, "loss": 54.7097, "step": 40410 }, { "epoch": 0.16330191461596577, "grad_norm": 1016.0202026367188, "learning_rate": 4.9422788266092715e-05, "loss": 108.11, "step": 40420 }, { "epoch": 0.1633423158813334, "grad_norm": 1137.9029541015625, "learning_rate": 4.942204226894445e-05, "loss": 105.8649, "step": 40430 }, { "epoch": 0.16338271714670102, "grad_norm": 1075.5977783203125, "learning_rate": 4.94212957956759e-05, "loss": 101.3763, "step": 40440 }, { "epoch": 0.16342311841206866, "grad_norm": 674.2771606445312, "learning_rate": 4.942054884630162e-05, "loss": 81.1654, "step": 40450 }, { "epoch": 0.1634635196774363, "grad_norm": 582.1860961914062, "learning_rate": 4.941980142083617e-05, "loss": 78.6935, "step": 40460 }, { "epoch": 0.16350392094280392, "grad_norm": 688.1861572265625, "learning_rate": 4.9419053519294115e-05, "loss": 79.6108, "step": 40470 }, { "epoch": 0.16354432220817156, "grad_norm": 1063.1949462890625, "learning_rate": 4.941830514169004e-05, "loss": 85.3794, "step": 40480 }, { "epoch": 0.1635847234735392, "grad_norm": 802.294189453125, "learning_rate": 4.941755628803853e-05, "loss": 86.5771, "step": 40490 }, { "epoch": 0.16362512473890684, "grad_norm": 2401.399169921875, "learning_rate": 4.94168069583542e-05, "loss": 123.1953, "step": 40500 }, { "epoch": 0.16366552600427445, "grad_norm": 472.39874267578125, "learning_rate": 4.941605715265164e-05, "loss": 40.4517, "step": 40510 }, { "epoch": 0.1637059272696421, "grad_norm": 502.11492919921875, "learning_rate": 4.941530687094548e-05, "loss": 93.8861, "step": 40520 }, { "epoch": 0.16374632853500973, "grad_norm": 481.69085693359375, "learning_rate": 4.9414556113250344e-05, "loss": 100.2681, "step": 40530 }, { "epoch": 0.16378672980037734, "grad_norm": 718.2989501953125, "learning_rate": 4.941380487958086e-05, "loss": 67.0155, "step": 40540 }, { "epoch": 0.16382713106574498, "grad_norm": 707.5899658203125, "learning_rate": 4.941305316995169e-05, "loss": 80.2658, "step": 40550 }, { "epoch": 0.16386753233111262, "grad_norm": 411.375244140625, "learning_rate": 4.941230098437747e-05, "loss": 86.6765, "step": 40560 }, { "epoch": 0.16390793359648023, "grad_norm": 1048.686279296875, "learning_rate": 4.941154832287288e-05, "loss": 90.3216, "step": 40570 }, { "epoch": 0.16394833486184787, "grad_norm": 951.8493041992188, "learning_rate": 4.941079518545258e-05, "loss": 64.0513, "step": 40580 }, { "epoch": 0.1639887361272155, "grad_norm": 5584.05908203125, "learning_rate": 4.9410041572131266e-05, "loss": 109.0281, "step": 40590 }, { "epoch": 0.16402913739258312, "grad_norm": 1825.2513427734375, "learning_rate": 4.940928748292363e-05, "loss": 83.6246, "step": 40600 }, { "epoch": 0.16406953865795076, "grad_norm": 562.28076171875, "learning_rate": 4.940853291784435e-05, "loss": 72.4572, "step": 40610 }, { "epoch": 0.1641099399233184, "grad_norm": 2325.668212890625, "learning_rate": 4.9407777876908174e-05, "loss": 104.7029, "step": 40620 }, { "epoch": 0.16415034118868602, "grad_norm": 1462.803955078125, "learning_rate": 4.9407022360129796e-05, "loss": 113.1334, "step": 40630 }, { "epoch": 0.16419074245405366, "grad_norm": 539.4075927734375, "learning_rate": 4.9406266367523945e-05, "loss": 87.7285, "step": 40640 }, { "epoch": 0.1642311437194213, "grad_norm": 1116.6656494140625, "learning_rate": 4.940550989910537e-05, "loss": 127.9007, "step": 40650 }, { "epoch": 0.16427154498478894, "grad_norm": 745.6444702148438, "learning_rate": 4.9404752954888824e-05, "loss": 95.8032, "step": 40660 }, { "epoch": 0.16431194625015655, "grad_norm": 1984.2926025390625, "learning_rate": 4.9403995534889044e-05, "loss": 61.1674, "step": 40670 }, { "epoch": 0.1643523475155242, "grad_norm": 553.41748046875, "learning_rate": 4.9403237639120805e-05, "loss": 87.0418, "step": 40680 }, { "epoch": 0.16439274878089183, "grad_norm": 786.6685791015625, "learning_rate": 4.9402479267598887e-05, "loss": 93.9468, "step": 40690 }, { "epoch": 0.16443315004625944, "grad_norm": 826.0059814453125, "learning_rate": 4.940172042033808e-05, "loss": 102.5501, "step": 40700 }, { "epoch": 0.16447355131162708, "grad_norm": 1732.5660400390625, "learning_rate": 4.9400961097353166e-05, "loss": 110.0081, "step": 40710 }, { "epoch": 0.16451395257699472, "grad_norm": 989.8864135742188, "learning_rate": 4.940020129865895e-05, "loss": 63.4497, "step": 40720 }, { "epoch": 0.16455435384236233, "grad_norm": 484.3291931152344, "learning_rate": 4.939944102427025e-05, "loss": 111.996, "step": 40730 }, { "epoch": 0.16459475510772997, "grad_norm": 218.7427520751953, "learning_rate": 4.939868027420189e-05, "loss": 83.2038, "step": 40740 }, { "epoch": 0.1646351563730976, "grad_norm": 3197.0927734375, "learning_rate": 4.939791904846869e-05, "loss": 99.5802, "step": 40750 }, { "epoch": 0.16467555763846523, "grad_norm": 805.2986450195312, "learning_rate": 4.93971573470855e-05, "loss": 85.1965, "step": 40760 }, { "epoch": 0.16471595890383287, "grad_norm": 846.134765625, "learning_rate": 4.939639517006717e-05, "loss": 67.5336, "step": 40770 }, { "epoch": 0.1647563601692005, "grad_norm": 780.6873779296875, "learning_rate": 4.939563251742855e-05, "loss": 85.52, "step": 40780 }, { "epoch": 0.16479676143456812, "grad_norm": 1174.27685546875, "learning_rate": 4.939486938918451e-05, "loss": 97.5447, "step": 40790 }, { "epoch": 0.16483716269993576, "grad_norm": 763.7103271484375, "learning_rate": 4.9394105785349944e-05, "loss": 50.4769, "step": 40800 }, { "epoch": 0.1648775639653034, "grad_norm": 704.8574829101562, "learning_rate": 4.939334170593972e-05, "loss": 107.5855, "step": 40810 }, { "epoch": 0.16491796523067104, "grad_norm": 695.6289672851562, "learning_rate": 4.9392577150968745e-05, "loss": 119.9477, "step": 40820 }, { "epoch": 0.16495836649603865, "grad_norm": 960.3919677734375, "learning_rate": 4.939181212045192e-05, "loss": 75.6716, "step": 40830 }, { "epoch": 0.1649987677614063, "grad_norm": 930.9579467773438, "learning_rate": 4.939104661440415e-05, "loss": 75.375, "step": 40840 }, { "epoch": 0.16503916902677393, "grad_norm": 517.5274658203125, "learning_rate": 4.939028063284038e-05, "loss": 48.7727, "step": 40850 }, { "epoch": 0.16507957029214154, "grad_norm": 559.23974609375, "learning_rate": 4.938951417577552e-05, "loss": 88.6447, "step": 40860 }, { "epoch": 0.16511997155750918, "grad_norm": 1341.4295654296875, "learning_rate": 4.938874724322454e-05, "loss": 58.3951, "step": 40870 }, { "epoch": 0.16516037282287682, "grad_norm": 818.9078369140625, "learning_rate": 4.938797983520237e-05, "loss": 90.6307, "step": 40880 }, { "epoch": 0.16520077408824443, "grad_norm": 719.2857055664062, "learning_rate": 4.938721195172398e-05, "loss": 89.5419, "step": 40890 }, { "epoch": 0.16524117535361207, "grad_norm": 1062.7843017578125, "learning_rate": 4.938644359280433e-05, "loss": 57.1088, "step": 40900 }, { "epoch": 0.16528157661897971, "grad_norm": 713.6543579101562, "learning_rate": 4.938567475845841e-05, "loss": 98.1421, "step": 40910 }, { "epoch": 0.16532197788434733, "grad_norm": 1030.1905517578125, "learning_rate": 4.938490544870121e-05, "loss": 115.5957, "step": 40920 }, { "epoch": 0.16536237914971497, "grad_norm": 4530.09912109375, "learning_rate": 4.938413566354772e-05, "loss": 105.8919, "step": 40930 }, { "epoch": 0.1654027804150826, "grad_norm": 1015.1611328125, "learning_rate": 4.938336540301295e-05, "loss": 130.0979, "step": 40940 }, { "epoch": 0.16544318168045022, "grad_norm": 974.16064453125, "learning_rate": 4.938259466711193e-05, "loss": 61.6297, "step": 40950 }, { "epoch": 0.16548358294581786, "grad_norm": 874.3550415039062, "learning_rate": 4.938182345585966e-05, "loss": 76.4001, "step": 40960 }, { "epoch": 0.1655239842111855, "grad_norm": 2546.218017578125, "learning_rate": 4.938105176927119e-05, "loss": 78.0685, "step": 40970 }, { "epoch": 0.16556438547655314, "grad_norm": 1073.3936767578125, "learning_rate": 4.9380279607361575e-05, "loss": 72.0243, "step": 40980 }, { "epoch": 0.16560478674192075, "grad_norm": 1150.05029296875, "learning_rate": 4.937950697014585e-05, "loss": 76.5943, "step": 40990 }, { "epoch": 0.1656451880072884, "grad_norm": 482.436279296875, "learning_rate": 4.937873385763908e-05, "loss": 75.9616, "step": 41000 }, { "epoch": 0.16568558927265603, "grad_norm": 650.821044921875, "learning_rate": 4.9377960269856346e-05, "loss": 80.1524, "step": 41010 }, { "epoch": 0.16572599053802364, "grad_norm": 486.8204040527344, "learning_rate": 4.937718620681273e-05, "loss": 73.0701, "step": 41020 }, { "epoch": 0.16576639180339128, "grad_norm": 1333.5208740234375, "learning_rate": 4.937641166852332e-05, "loss": 73.1544, "step": 41030 }, { "epoch": 0.16580679306875892, "grad_norm": 528.667236328125, "learning_rate": 4.937563665500321e-05, "loss": 89.639, "step": 41040 }, { "epoch": 0.16584719433412654, "grad_norm": 924.5604858398438, "learning_rate": 4.937486116626752e-05, "loss": 122.8974, "step": 41050 }, { "epoch": 0.16588759559949418, "grad_norm": 1802.9498291015625, "learning_rate": 4.9374085202331354e-05, "loss": 69.5255, "step": 41060 }, { "epoch": 0.16592799686486182, "grad_norm": 519.0206909179688, "learning_rate": 4.937330876320985e-05, "loss": 89.9981, "step": 41070 }, { "epoch": 0.16596839813022943, "grad_norm": 561.7305908203125, "learning_rate": 4.9372531848918145e-05, "loss": 76.0487, "step": 41080 }, { "epoch": 0.16600879939559707, "grad_norm": 740.5826416015625, "learning_rate": 4.9371754459471384e-05, "loss": 115.638, "step": 41090 }, { "epoch": 0.1660492006609647, "grad_norm": 674.3680419921875, "learning_rate": 4.9370976594884723e-05, "loss": 73.8446, "step": 41100 }, { "epoch": 0.16608960192633232, "grad_norm": 539.9810180664062, "learning_rate": 4.937019825517333e-05, "loss": 57.1198, "step": 41110 }, { "epoch": 0.16613000319169996, "grad_norm": 1040.4820556640625, "learning_rate": 4.936941944035237e-05, "loss": 144.8092, "step": 41120 }, { "epoch": 0.1661704044570676, "grad_norm": 849.093994140625, "learning_rate": 4.936864015043703e-05, "loss": 52.7319, "step": 41130 }, { "epoch": 0.16621080572243524, "grad_norm": 641.831787109375, "learning_rate": 4.936786038544251e-05, "loss": 59.0339, "step": 41140 }, { "epoch": 0.16625120698780285, "grad_norm": 554.9229736328125, "learning_rate": 4.9367080145384006e-05, "loss": 95.6639, "step": 41150 }, { "epoch": 0.1662916082531705, "grad_norm": 392.3255920410156, "learning_rate": 4.936629943027672e-05, "loss": 96.7075, "step": 41160 }, { "epoch": 0.16633200951853813, "grad_norm": 448.32647705078125, "learning_rate": 4.936551824013589e-05, "loss": 65.1692, "step": 41170 }, { "epoch": 0.16637241078390574, "grad_norm": 992.157470703125, "learning_rate": 4.9364736574976736e-05, "loss": 116.2756, "step": 41180 }, { "epoch": 0.16641281204927338, "grad_norm": 383.3032531738281, "learning_rate": 4.93639544348145e-05, "loss": 82.3815, "step": 41190 }, { "epoch": 0.16645321331464102, "grad_norm": 3468.877685546875, "learning_rate": 4.9363171819664434e-05, "loss": 124.7114, "step": 41200 }, { "epoch": 0.16649361458000864, "grad_norm": 959.5894165039062, "learning_rate": 4.936238872954178e-05, "loss": 115.3659, "step": 41210 }, { "epoch": 0.16653401584537628, "grad_norm": 654.4769287109375, "learning_rate": 4.936160516446182e-05, "loss": 84.5516, "step": 41220 }, { "epoch": 0.16657441711074392, "grad_norm": 513.7091064453125, "learning_rate": 4.936082112443983e-05, "loss": 72.7509, "step": 41230 }, { "epoch": 0.16661481837611153, "grad_norm": 1236.0504150390625, "learning_rate": 4.936003660949108e-05, "loss": 82.1154, "step": 41240 }, { "epoch": 0.16665521964147917, "grad_norm": 1588.6417236328125, "learning_rate": 4.9359251619630886e-05, "loss": 76.5787, "step": 41250 }, { "epoch": 0.1666956209068468, "grad_norm": 851.9100341796875, "learning_rate": 4.935846615487453e-05, "loss": 73.6734, "step": 41260 }, { "epoch": 0.16673602217221442, "grad_norm": 439.6374816894531, "learning_rate": 4.935768021523734e-05, "loss": 75.6803, "step": 41270 }, { "epoch": 0.16677642343758206, "grad_norm": 2391.953125, "learning_rate": 4.935689380073464e-05, "loss": 167.1796, "step": 41280 }, { "epoch": 0.1668168247029497, "grad_norm": 591.0051879882812, "learning_rate": 4.935610691138175e-05, "loss": 89.2333, "step": 41290 }, { "epoch": 0.16685722596831734, "grad_norm": 895.1334228515625, "learning_rate": 4.9355319547194014e-05, "loss": 110.6889, "step": 41300 }, { "epoch": 0.16689762723368495, "grad_norm": 873.7748413085938, "learning_rate": 4.935453170818679e-05, "loss": 82.6634, "step": 41310 }, { "epoch": 0.1669380284990526, "grad_norm": 582.4258422851562, "learning_rate": 4.935374339437543e-05, "loss": 82.0953, "step": 41320 }, { "epoch": 0.16697842976442023, "grad_norm": 1372.591064453125, "learning_rate": 4.9352954605775305e-05, "loss": 110.3943, "step": 41330 }, { "epoch": 0.16701883102978785, "grad_norm": 1086.363525390625, "learning_rate": 4.935216534240179e-05, "loss": 103.8, "step": 41340 }, { "epoch": 0.16705923229515549, "grad_norm": 1426.1312255859375, "learning_rate": 4.935137560427027e-05, "loss": 99.1618, "step": 41350 }, { "epoch": 0.16709963356052313, "grad_norm": 1669.37744140625, "learning_rate": 4.935058539139615e-05, "loss": 122.2307, "step": 41360 }, { "epoch": 0.16714003482589074, "grad_norm": 766.4739379882812, "learning_rate": 4.934979470379484e-05, "loss": 77.6077, "step": 41370 }, { "epoch": 0.16718043609125838, "grad_norm": 846.854248046875, "learning_rate": 4.934900354148173e-05, "loss": 87.1017, "step": 41380 }, { "epoch": 0.16722083735662602, "grad_norm": 1178.765869140625, "learning_rate": 4.934821190447228e-05, "loss": 118.6451, "step": 41390 }, { "epoch": 0.16726123862199363, "grad_norm": 622.7171020507812, "learning_rate": 4.9347419792781876e-05, "loss": 92.5647, "step": 41400 }, { "epoch": 0.16730163988736127, "grad_norm": 871.9891967773438, "learning_rate": 4.934662720642601e-05, "loss": 78.3886, "step": 41410 }, { "epoch": 0.1673420411527289, "grad_norm": 601.3154296875, "learning_rate": 4.934583414542011e-05, "loss": 69.2431, "step": 41420 }, { "epoch": 0.16738244241809652, "grad_norm": 620.5543823242188, "learning_rate": 4.9345040609779634e-05, "loss": 74.3392, "step": 41430 }, { "epoch": 0.16742284368346416, "grad_norm": 803.2666625976562, "learning_rate": 4.934424659952006e-05, "loss": 90.4268, "step": 41440 }, { "epoch": 0.1674632449488318, "grad_norm": 1084.6322021484375, "learning_rate": 4.934345211465686e-05, "loss": 91.4339, "step": 41450 }, { "epoch": 0.16750364621419944, "grad_norm": 570.4534912109375, "learning_rate": 4.934265715520553e-05, "loss": 67.0539, "step": 41460 }, { "epoch": 0.16754404747956705, "grad_norm": 5001.0390625, "learning_rate": 4.934186172118157e-05, "loss": 115.2946, "step": 41470 }, { "epoch": 0.1675844487449347, "grad_norm": 0.0, "learning_rate": 4.934106581260049e-05, "loss": 71.3725, "step": 41480 }, { "epoch": 0.16762485001030233, "grad_norm": 674.1163940429688, "learning_rate": 4.934026942947779e-05, "loss": 67.2382, "step": 41490 }, { "epoch": 0.16766525127566995, "grad_norm": 1031.8499755859375, "learning_rate": 4.933947257182901e-05, "loss": 81.7795, "step": 41500 }, { "epoch": 0.1677056525410376, "grad_norm": 1388.8055419921875, "learning_rate": 4.933867523966968e-05, "loss": 67.2926, "step": 41510 }, { "epoch": 0.16774605380640523, "grad_norm": 1057.6483154296875, "learning_rate": 4.933787743301534e-05, "loss": 78.5172, "step": 41520 }, { "epoch": 0.16778645507177284, "grad_norm": 1191.138427734375, "learning_rate": 4.933707915188156e-05, "loss": 115.7021, "step": 41530 }, { "epoch": 0.16782685633714048, "grad_norm": 803.36376953125, "learning_rate": 4.933628039628389e-05, "loss": 80.2134, "step": 41540 }, { "epoch": 0.16786725760250812, "grad_norm": 434.1454772949219, "learning_rate": 4.9335481166237904e-05, "loss": 93.0307, "step": 41550 }, { "epoch": 0.16790765886787573, "grad_norm": 1040.9764404296875, "learning_rate": 4.933468146175918e-05, "loss": 108.4676, "step": 41560 }, { "epoch": 0.16794806013324337, "grad_norm": 2650.668701171875, "learning_rate": 4.933388128286331e-05, "loss": 124.0231, "step": 41570 }, { "epoch": 0.167988461398611, "grad_norm": 850.1209716796875, "learning_rate": 4.933308062956591e-05, "loss": 98.8213, "step": 41580 }, { "epoch": 0.16802886266397862, "grad_norm": 1043.2742919921875, "learning_rate": 4.9332279501882564e-05, "loss": 86.2264, "step": 41590 }, { "epoch": 0.16806926392934626, "grad_norm": 1196.578369140625, "learning_rate": 4.93314778998289e-05, "loss": 80.5056, "step": 41600 }, { "epoch": 0.1681096651947139, "grad_norm": 738.3115234375, "learning_rate": 4.933067582342056e-05, "loss": 111.9835, "step": 41610 }, { "epoch": 0.16815006646008154, "grad_norm": 828.2302856445312, "learning_rate": 4.932987327267316e-05, "loss": 87.0005, "step": 41620 }, { "epoch": 0.16819046772544916, "grad_norm": 474.343017578125, "learning_rate": 4.932907024760236e-05, "loss": 93.0332, "step": 41630 }, { "epoch": 0.1682308689908168, "grad_norm": 718.5823364257812, "learning_rate": 4.93282667482238e-05, "loss": 66.0602, "step": 41640 }, { "epoch": 0.16827127025618444, "grad_norm": 491.5641784667969, "learning_rate": 4.9327462774553166e-05, "loss": 93.1942, "step": 41650 }, { "epoch": 0.16831167152155205, "grad_norm": 244.37391662597656, "learning_rate": 4.9326658326606114e-05, "loss": 80.5441, "step": 41660 }, { "epoch": 0.1683520727869197, "grad_norm": 621.1357421875, "learning_rate": 4.9325853404398337e-05, "loss": 107.9278, "step": 41670 }, { "epoch": 0.16839247405228733, "grad_norm": 1189.0419921875, "learning_rate": 4.9325048007945526e-05, "loss": 98.2764, "step": 41680 }, { "epoch": 0.16843287531765494, "grad_norm": 1160.5760498046875, "learning_rate": 4.9324242137263376e-05, "loss": 85.1483, "step": 41690 }, { "epoch": 0.16847327658302258, "grad_norm": 717.5256958007812, "learning_rate": 4.93234357923676e-05, "loss": 68.967, "step": 41700 }, { "epoch": 0.16851367784839022, "grad_norm": 738.8279418945312, "learning_rate": 4.932262897327393e-05, "loss": 94.0461, "step": 41710 }, { "epoch": 0.16855407911375783, "grad_norm": 372.3353576660156, "learning_rate": 4.9321821679998074e-05, "loss": 107.4272, "step": 41720 }, { "epoch": 0.16859448037912547, "grad_norm": 778.2406616210938, "learning_rate": 4.932101391255579e-05, "loss": 127.5508, "step": 41730 }, { "epoch": 0.1686348816444931, "grad_norm": 296.44610595703125, "learning_rate": 4.9320205670962814e-05, "loss": 68.8699, "step": 41740 }, { "epoch": 0.16867528290986072, "grad_norm": 1934.7763671875, "learning_rate": 4.931939695523492e-05, "loss": 96.5082, "step": 41750 }, { "epoch": 0.16871568417522836, "grad_norm": 920.8310546875, "learning_rate": 4.9318587765387845e-05, "loss": 84.8141, "step": 41760 }, { "epoch": 0.168756085440596, "grad_norm": 785.7876586914062, "learning_rate": 4.93177781014374e-05, "loss": 69.8683, "step": 41770 }, { "epoch": 0.16879648670596364, "grad_norm": 747.9173583984375, "learning_rate": 4.9316967963399335e-05, "loss": 87.638, "step": 41780 }, { "epoch": 0.16883688797133126, "grad_norm": 1194.38037109375, "learning_rate": 4.931615735128947e-05, "loss": 66.9209, "step": 41790 }, { "epoch": 0.1688772892366989, "grad_norm": 944.1591796875, "learning_rate": 4.9315346265123594e-05, "loss": 80.2107, "step": 41800 }, { "epoch": 0.16891769050206654, "grad_norm": 1037.114013671875, "learning_rate": 4.9314534704917525e-05, "loss": 82.2505, "step": 41810 }, { "epoch": 0.16895809176743415, "grad_norm": 723.7459716796875, "learning_rate": 4.931372267068708e-05, "loss": 65.5946, "step": 41820 }, { "epoch": 0.1689984930328018, "grad_norm": 800.6956176757812, "learning_rate": 4.93129101624481e-05, "loss": 88.3285, "step": 41830 }, { "epoch": 0.16903889429816943, "grad_norm": 634.937744140625, "learning_rate": 4.9312097180216414e-05, "loss": 85.8466, "step": 41840 }, { "epoch": 0.16907929556353704, "grad_norm": 438.7951965332031, "learning_rate": 4.9311283724007887e-05, "loss": 59.9148, "step": 41850 }, { "epoch": 0.16911969682890468, "grad_norm": 6580.62451171875, "learning_rate": 4.931046979383835e-05, "loss": 127.95, "step": 41860 }, { "epoch": 0.16916009809427232, "grad_norm": 1237.6083984375, "learning_rate": 4.9309655389723705e-05, "loss": 81.8312, "step": 41870 }, { "epoch": 0.16920049935963993, "grad_norm": 645.1431274414062, "learning_rate": 4.9308840511679804e-05, "loss": 83.625, "step": 41880 }, { "epoch": 0.16924090062500757, "grad_norm": 808.107177734375, "learning_rate": 4.930802515972255e-05, "loss": 82.5783, "step": 41890 }, { "epoch": 0.1692813018903752, "grad_norm": 542.672119140625, "learning_rate": 4.930720933386782e-05, "loss": 77.4106, "step": 41900 }, { "epoch": 0.16932170315574283, "grad_norm": 1336.5858154296875, "learning_rate": 4.930639303413154e-05, "loss": 124.7732, "step": 41910 }, { "epoch": 0.16936210442111047, "grad_norm": 591.5770263671875, "learning_rate": 4.9305576260529607e-05, "loss": 65.4329, "step": 41920 }, { "epoch": 0.1694025056864781, "grad_norm": 804.681396484375, "learning_rate": 4.930475901307795e-05, "loss": 76.3269, "step": 41930 }, { "epoch": 0.16944290695184575, "grad_norm": 987.9590454101562, "learning_rate": 4.930394129179251e-05, "loss": 71.606, "step": 41940 }, { "epoch": 0.16948330821721336, "grad_norm": 766.5450439453125, "learning_rate": 4.930312309668922e-05, "loss": 109.5266, "step": 41950 }, { "epoch": 0.169523709482581, "grad_norm": 337.4928894042969, "learning_rate": 4.930230442778403e-05, "loss": 74.3608, "step": 41960 }, { "epoch": 0.16956411074794864, "grad_norm": 3450.510009765625, "learning_rate": 4.930148528509291e-05, "loss": 109.2802, "step": 41970 }, { "epoch": 0.16960451201331625, "grad_norm": 864.0614624023438, "learning_rate": 4.930066566863182e-05, "loss": 78.3614, "step": 41980 }, { "epoch": 0.1696449132786839, "grad_norm": 930.443115234375, "learning_rate": 4.929984557841674e-05, "loss": 126.5678, "step": 41990 }, { "epoch": 0.16968531454405153, "grad_norm": 416.3900146484375, "learning_rate": 4.929902501446366e-05, "loss": 62.9766, "step": 42000 }, { "epoch": 0.16972571580941914, "grad_norm": 533.2973022460938, "learning_rate": 4.929820397678858e-05, "loss": 64.3053, "step": 42010 }, { "epoch": 0.16976611707478678, "grad_norm": 357.4167175292969, "learning_rate": 4.92973824654075e-05, "loss": 43.4001, "step": 42020 }, { "epoch": 0.16980651834015442, "grad_norm": 1577.94384765625, "learning_rate": 4.929656048033644e-05, "loss": 72.9057, "step": 42030 }, { "epoch": 0.16984691960552203, "grad_norm": 4723.44775390625, "learning_rate": 4.929573802159143e-05, "loss": 78.4315, "step": 42040 }, { "epoch": 0.16988732087088967, "grad_norm": 607.1101684570312, "learning_rate": 4.929491508918849e-05, "loss": 83.3789, "step": 42050 }, { "epoch": 0.16992772213625731, "grad_norm": 527.4968872070312, "learning_rate": 4.929409168314368e-05, "loss": 48.889, "step": 42060 }, { "epoch": 0.16996812340162493, "grad_norm": 1297.30029296875, "learning_rate": 4.9293267803473046e-05, "loss": 85.3707, "step": 42070 }, { "epoch": 0.17000852466699257, "grad_norm": 993.5496215820312, "learning_rate": 4.9292443450192645e-05, "loss": 103.5595, "step": 42080 }, { "epoch": 0.1700489259323602, "grad_norm": 948.1973876953125, "learning_rate": 4.929161862331855e-05, "loss": 91.8254, "step": 42090 }, { "epoch": 0.17008932719772785, "grad_norm": 740.2516479492188, "learning_rate": 4.929079332286685e-05, "loss": 65.2068, "step": 42100 }, { "epoch": 0.17012972846309546, "grad_norm": 745.3865356445312, "learning_rate": 4.9289967548853627e-05, "loss": 49.6714, "step": 42110 }, { "epoch": 0.1701701297284631, "grad_norm": 1320.7667236328125, "learning_rate": 4.928914130129498e-05, "loss": 86.5712, "step": 42120 }, { "epoch": 0.17021053099383074, "grad_norm": 1176.5162353515625, "learning_rate": 4.928831458020702e-05, "loss": 113.8014, "step": 42130 }, { "epoch": 0.17025093225919835, "grad_norm": 566.5439453125, "learning_rate": 4.928748738560586e-05, "loss": 72.2683, "step": 42140 }, { "epoch": 0.170291333524566, "grad_norm": 1156.8316650390625, "learning_rate": 4.9286659717507635e-05, "loss": 76.2927, "step": 42150 }, { "epoch": 0.17033173478993363, "grad_norm": 719.6638793945312, "learning_rate": 4.9285831575928465e-05, "loss": 69.5816, "step": 42160 }, { "epoch": 0.17037213605530124, "grad_norm": 1295.44384765625, "learning_rate": 4.9285002960884515e-05, "loss": 84.5863, "step": 42170 }, { "epoch": 0.17041253732066888, "grad_norm": 1051.867431640625, "learning_rate": 4.9284173872391925e-05, "loss": 82.4616, "step": 42180 }, { "epoch": 0.17045293858603652, "grad_norm": 859.74609375, "learning_rate": 4.928334431046686e-05, "loss": 111.8195, "step": 42190 }, { "epoch": 0.17049333985140414, "grad_norm": 1075.9024658203125, "learning_rate": 4.92825142751255e-05, "loss": 85.3877, "step": 42200 }, { "epoch": 0.17053374111677178, "grad_norm": 1071.4013671875, "learning_rate": 4.9281683766384026e-05, "loss": 77.8994, "step": 42210 }, { "epoch": 0.17057414238213942, "grad_norm": 841.3255615234375, "learning_rate": 4.9280852784258624e-05, "loss": 104.7014, "step": 42220 }, { "epoch": 0.17061454364750703, "grad_norm": 1585.5380859375, "learning_rate": 4.928002132876549e-05, "loss": 73.6184, "step": 42230 }, { "epoch": 0.17065494491287467, "grad_norm": 1064.724609375, "learning_rate": 4.9279189399920844e-05, "loss": 80.9862, "step": 42240 }, { "epoch": 0.1706953461782423, "grad_norm": 1124.08642578125, "learning_rate": 4.9278356997740904e-05, "loss": 81.2137, "step": 42250 }, { "epoch": 0.17073574744360995, "grad_norm": 1296.80078125, "learning_rate": 4.9277524122241894e-05, "loss": 76.6494, "step": 42260 }, { "epoch": 0.17077614870897756, "grad_norm": 705.2342529296875, "learning_rate": 4.927669077344005e-05, "loss": 124.0301, "step": 42270 }, { "epoch": 0.1708165499743452, "grad_norm": 699.51123046875, "learning_rate": 4.927585695135162e-05, "loss": 90.7621, "step": 42280 }, { "epoch": 0.17085695123971284, "grad_norm": 1193.2637939453125, "learning_rate": 4.9275022655992864e-05, "loss": 77.3909, "step": 42290 }, { "epoch": 0.17089735250508045, "grad_norm": 1224.87744140625, "learning_rate": 4.927418788738004e-05, "loss": 90.4889, "step": 42300 }, { "epoch": 0.1709377537704481, "grad_norm": 566.4915771484375, "learning_rate": 4.927335264552943e-05, "loss": 76.954, "step": 42310 }, { "epoch": 0.17097815503581573, "grad_norm": 485.48248291015625, "learning_rate": 4.9272516930457314e-05, "loss": 85.5353, "step": 42320 }, { "epoch": 0.17101855630118334, "grad_norm": 510.9328918457031, "learning_rate": 4.927168074217998e-05, "loss": 78.9659, "step": 42330 }, { "epoch": 0.17105895756655098, "grad_norm": 542.828125, "learning_rate": 4.927084408071373e-05, "loss": 77.3036, "step": 42340 }, { "epoch": 0.17109935883191862, "grad_norm": 1816.97265625, "learning_rate": 4.927000694607489e-05, "loss": 89.3204, "step": 42350 }, { "epoch": 0.17113976009728624, "grad_norm": 558.980712890625, "learning_rate": 4.9269169338279766e-05, "loss": 97.5531, "step": 42360 }, { "epoch": 0.17118016136265388, "grad_norm": 2137.310791015625, "learning_rate": 4.9268331257344685e-05, "loss": 111.0955, "step": 42370 }, { "epoch": 0.17122056262802152, "grad_norm": 349.211181640625, "learning_rate": 4.9267492703286e-05, "loss": 116.3871, "step": 42380 }, { "epoch": 0.17126096389338913, "grad_norm": 1252.0584716796875, "learning_rate": 4.926665367612005e-05, "loss": 78.8891, "step": 42390 }, { "epoch": 0.17130136515875677, "grad_norm": 587.7830200195312, "learning_rate": 4.9265814175863186e-05, "loss": 90.0841, "step": 42400 }, { "epoch": 0.1713417664241244, "grad_norm": 1336.141845703125, "learning_rate": 4.926497420253179e-05, "loss": 99.6293, "step": 42410 }, { "epoch": 0.17138216768949205, "grad_norm": 1955.26171875, "learning_rate": 4.9264133756142224e-05, "loss": 74.8807, "step": 42420 }, { "epoch": 0.17142256895485966, "grad_norm": 1671.5946044921875, "learning_rate": 4.926329283671088e-05, "loss": 100.2369, "step": 42430 }, { "epoch": 0.1714629702202273, "grad_norm": 489.6711120605469, "learning_rate": 4.926245144425415e-05, "loss": 92.0346, "step": 42440 }, { "epoch": 0.17150337148559494, "grad_norm": 1527.400634765625, "learning_rate": 4.9261609578788435e-05, "loss": 104.6484, "step": 42450 }, { "epoch": 0.17154377275096255, "grad_norm": 1441.6905517578125, "learning_rate": 4.926076724033016e-05, "loss": 118.5627, "step": 42460 }, { "epoch": 0.1715841740163302, "grad_norm": 1650.8604736328125, "learning_rate": 4.9259924428895734e-05, "loss": 95.2558, "step": 42470 }, { "epoch": 0.17162457528169783, "grad_norm": 1053.914794921875, "learning_rate": 4.925908114450158e-05, "loss": 92.1593, "step": 42480 }, { "epoch": 0.17166497654706545, "grad_norm": 485.0592956542969, "learning_rate": 4.925823738716416e-05, "loss": 48.5927, "step": 42490 }, { "epoch": 0.17170537781243309, "grad_norm": 551.2293701171875, "learning_rate": 4.925739315689991e-05, "loss": 111.4056, "step": 42500 }, { "epoch": 0.17174577907780073, "grad_norm": 462.1259460449219, "learning_rate": 4.92565484537253e-05, "loss": 81.3713, "step": 42510 }, { "epoch": 0.17178618034316834, "grad_norm": 976.673095703125, "learning_rate": 4.925570327765678e-05, "loss": 97.2899, "step": 42520 }, { "epoch": 0.17182658160853598, "grad_norm": 1159.7283935546875, "learning_rate": 4.9254857628710846e-05, "loss": 72.4565, "step": 42530 }, { "epoch": 0.17186698287390362, "grad_norm": 886.2018432617188, "learning_rate": 4.9254011506903963e-05, "loss": 75.7661, "step": 42540 }, { "epoch": 0.17190738413927123, "grad_norm": 716.5491333007812, "learning_rate": 4.925316491225265e-05, "loss": 88.0013, "step": 42550 }, { "epoch": 0.17194778540463887, "grad_norm": 741.2266235351562, "learning_rate": 4.925231784477339e-05, "loss": 106.4336, "step": 42560 }, { "epoch": 0.1719881866700065, "grad_norm": 1328.2393798828125, "learning_rate": 4.9251470304482716e-05, "loss": 76.8043, "step": 42570 }, { "epoch": 0.17202858793537412, "grad_norm": 3505.359375, "learning_rate": 4.925062229139714e-05, "loss": 113.7022, "step": 42580 }, { "epoch": 0.17206898920074176, "grad_norm": 560.7538452148438, "learning_rate": 4.924977380553321e-05, "loss": 78.8128, "step": 42590 }, { "epoch": 0.1721093904661094, "grad_norm": 936.1580810546875, "learning_rate": 4.924892484690743e-05, "loss": 86.6639, "step": 42600 }, { "epoch": 0.17214979173147704, "grad_norm": 484.0994873046875, "learning_rate": 4.924807541553639e-05, "loss": 91.6706, "step": 42610 }, { "epoch": 0.17219019299684465, "grad_norm": 490.91448974609375, "learning_rate": 4.924722551143664e-05, "loss": 52.3336, "step": 42620 }, { "epoch": 0.1722305942622123, "grad_norm": 1924.5557861328125, "learning_rate": 4.924637513462474e-05, "loss": 98.4392, "step": 42630 }, { "epoch": 0.17227099552757993, "grad_norm": 738.7843017578125, "learning_rate": 4.9245524285117274e-05, "loss": 90.864, "step": 42640 }, { "epoch": 0.17231139679294755, "grad_norm": 805.4695434570312, "learning_rate": 4.924467296293083e-05, "loss": 102.7175, "step": 42650 }, { "epoch": 0.1723517980583152, "grad_norm": 710.0062866210938, "learning_rate": 4.924382116808201e-05, "loss": 107.4096, "step": 42660 }, { "epoch": 0.17239219932368283, "grad_norm": 703.4191284179688, "learning_rate": 4.924296890058741e-05, "loss": 82.6557, "step": 42670 }, { "epoch": 0.17243260058905044, "grad_norm": 1078.1011962890625, "learning_rate": 4.924211616046365e-05, "loss": 86.0516, "step": 42680 }, { "epoch": 0.17247300185441808, "grad_norm": 970.2047729492188, "learning_rate": 4.924126294772735e-05, "loss": 69.9036, "step": 42690 }, { "epoch": 0.17251340311978572, "grad_norm": 1400.3450927734375, "learning_rate": 4.924040926239515e-05, "loss": 103.4677, "step": 42700 }, { "epoch": 0.17255380438515333, "grad_norm": 1445.791259765625, "learning_rate": 4.9239555104483695e-05, "loss": 115.1777, "step": 42710 }, { "epoch": 0.17259420565052097, "grad_norm": 570.3566284179688, "learning_rate": 4.923870047400964e-05, "loss": 83.1902, "step": 42720 }, { "epoch": 0.1726346069158886, "grad_norm": 880.5896606445312, "learning_rate": 4.923784537098963e-05, "loss": 106.7852, "step": 42730 }, { "epoch": 0.17267500818125622, "grad_norm": 13018.2724609375, "learning_rate": 4.9236989795440346e-05, "loss": 108.5354, "step": 42740 }, { "epoch": 0.17271540944662386, "grad_norm": 1054.298583984375, "learning_rate": 4.9236133747378475e-05, "loss": 81.4467, "step": 42750 }, { "epoch": 0.1727558107119915, "grad_norm": 1258.8382568359375, "learning_rate": 4.9235277226820695e-05, "loss": 115.5568, "step": 42760 }, { "epoch": 0.17279621197735914, "grad_norm": 955.5006103515625, "learning_rate": 4.923442023378371e-05, "loss": 104.4606, "step": 42770 }, { "epoch": 0.17283661324272676, "grad_norm": 797.49951171875, "learning_rate": 4.9233562768284225e-05, "loss": 75.7812, "step": 42780 }, { "epoch": 0.1728770145080944, "grad_norm": 550.285400390625, "learning_rate": 4.923270483033896e-05, "loss": 90.7212, "step": 42790 }, { "epoch": 0.17291741577346204, "grad_norm": 828.7791137695312, "learning_rate": 4.923184641996463e-05, "loss": 66.7497, "step": 42800 }, { "epoch": 0.17295781703882965, "grad_norm": 862.7196044921875, "learning_rate": 4.923098753717798e-05, "loss": 74.5591, "step": 42810 }, { "epoch": 0.1729982183041973, "grad_norm": 1932.47802734375, "learning_rate": 4.923012818199576e-05, "loss": 113.689, "step": 42820 }, { "epoch": 0.17303861956956493, "grad_norm": 467.0488586425781, "learning_rate": 4.922926835443472e-05, "loss": 80.5391, "step": 42830 }, { "epoch": 0.17307902083493254, "grad_norm": 1409.27734375, "learning_rate": 4.922840805451161e-05, "loss": 141.5631, "step": 42840 }, { "epoch": 0.17311942210030018, "grad_norm": 470.03057861328125, "learning_rate": 4.9227547282243214e-05, "loss": 55.4408, "step": 42850 }, { "epoch": 0.17315982336566782, "grad_norm": 399.5194091796875, "learning_rate": 4.9226686037646314e-05, "loss": 95.3332, "step": 42860 }, { "epoch": 0.17320022463103543, "grad_norm": 1028.5482177734375, "learning_rate": 4.92258243207377e-05, "loss": 84.2015, "step": 42870 }, { "epoch": 0.17324062589640307, "grad_norm": 936.9313354492188, "learning_rate": 4.922496213153416e-05, "loss": 85.3661, "step": 42880 }, { "epoch": 0.1732810271617707, "grad_norm": 614.690673828125, "learning_rate": 4.922409947005251e-05, "loss": 49.2994, "step": 42890 }, { "epoch": 0.17332142842713832, "grad_norm": 845.6972045898438, "learning_rate": 4.922323633630958e-05, "loss": 44.1013, "step": 42900 }, { "epoch": 0.17336182969250596, "grad_norm": 1084.47900390625, "learning_rate": 4.9222372730322176e-05, "loss": 131.938, "step": 42910 }, { "epoch": 0.1734022309578736, "grad_norm": 1015.8482666015625, "learning_rate": 4.922150865210715e-05, "loss": 90.8332, "step": 42920 }, { "epoch": 0.17344263222324124, "grad_norm": 548.1792602539062, "learning_rate": 4.922064410168134e-05, "loss": 94.3187, "step": 42930 }, { "epoch": 0.17348303348860886, "grad_norm": 625.8770751953125, "learning_rate": 4.92197790790616e-05, "loss": 86.6768, "step": 42940 }, { "epoch": 0.1735234347539765, "grad_norm": 0.0, "learning_rate": 4.9218913584264814e-05, "loss": 86.3633, "step": 42950 }, { "epoch": 0.17356383601934414, "grad_norm": 646.9606323242188, "learning_rate": 4.9218047617307824e-05, "loss": 88.7674, "step": 42960 }, { "epoch": 0.17360423728471175, "grad_norm": 249.1079864501953, "learning_rate": 4.9217181178207535e-05, "loss": 61.6128, "step": 42970 }, { "epoch": 0.1736446385500794, "grad_norm": 543.1034545898438, "learning_rate": 4.9216314266980824e-05, "loss": 79.8014, "step": 42980 }, { "epoch": 0.17368503981544703, "grad_norm": 662.4139404296875, "learning_rate": 4.921544688364461e-05, "loss": 95.9196, "step": 42990 }, { "epoch": 0.17372544108081464, "grad_norm": 422.02825927734375, "learning_rate": 4.9214579028215776e-05, "loss": 91.8231, "step": 43000 }, { "epoch": 0.17376584234618228, "grad_norm": 891.07958984375, "learning_rate": 4.921371070071127e-05, "loss": 74.39, "step": 43010 }, { "epoch": 0.17380624361154992, "grad_norm": 1176.2237548828125, "learning_rate": 4.9212841901148e-05, "loss": 90.9235, "step": 43020 }, { "epoch": 0.17384664487691753, "grad_norm": 4489.8310546875, "learning_rate": 4.9211972629542926e-05, "loss": 124.854, "step": 43030 }, { "epoch": 0.17388704614228517, "grad_norm": 639.3281860351562, "learning_rate": 4.9211102885912965e-05, "loss": 76.7055, "step": 43040 }, { "epoch": 0.1739274474076528, "grad_norm": 1555.4083251953125, "learning_rate": 4.9210232670275094e-05, "loss": 110.0548, "step": 43050 }, { "epoch": 0.17396784867302043, "grad_norm": 727.7996826171875, "learning_rate": 4.920936198264627e-05, "loss": 95.9471, "step": 43060 }, { "epoch": 0.17400824993838807, "grad_norm": 1188.0120849609375, "learning_rate": 4.920849082304347e-05, "loss": 113.6858, "step": 43070 }, { "epoch": 0.1740486512037557, "grad_norm": 1449.6580810546875, "learning_rate": 4.920761919148369e-05, "loss": 94.3256, "step": 43080 }, { "epoch": 0.17408905246912335, "grad_norm": 454.728759765625, "learning_rate": 4.9206747087983894e-05, "loss": 78.3794, "step": 43090 }, { "epoch": 0.17412945373449096, "grad_norm": 815.9066162109375, "learning_rate": 4.9205874512561115e-05, "loss": 89.8247, "step": 43100 }, { "epoch": 0.1741698549998586, "grad_norm": 759.9569702148438, "learning_rate": 4.920500146523234e-05, "loss": 63.8019, "step": 43110 }, { "epoch": 0.17421025626522624, "grad_norm": 2164.500732421875, "learning_rate": 4.920412794601461e-05, "loss": 100.4632, "step": 43120 }, { "epoch": 0.17425065753059385, "grad_norm": 662.5512084960938, "learning_rate": 4.920325395492493e-05, "loss": 107.611, "step": 43130 }, { "epoch": 0.1742910587959615, "grad_norm": 422.92645263671875, "learning_rate": 4.920237949198037e-05, "loss": 87.6762, "step": 43140 }, { "epoch": 0.17433146006132913, "grad_norm": 915.8494873046875, "learning_rate": 4.9201504557197955e-05, "loss": 66.672, "step": 43150 }, { "epoch": 0.17437186132669674, "grad_norm": 988.4778442382812, "learning_rate": 4.9200629150594744e-05, "loss": 91.4257, "step": 43160 }, { "epoch": 0.17441226259206438, "grad_norm": 794.6514282226562, "learning_rate": 4.919975327218781e-05, "loss": 87.8521, "step": 43170 }, { "epoch": 0.17445266385743202, "grad_norm": 884.67578125, "learning_rate": 4.919887692199423e-05, "loss": 74.1221, "step": 43180 }, { "epoch": 0.17449306512279963, "grad_norm": 764.0117797851562, "learning_rate": 4.919800010003108e-05, "loss": 84.2991, "step": 43190 }, { "epoch": 0.17453346638816727, "grad_norm": 1031.7420654296875, "learning_rate": 4.919712280631547e-05, "loss": 116.3033, "step": 43200 }, { "epoch": 0.17457386765353491, "grad_norm": 593.7903442382812, "learning_rate": 4.9196245040864486e-05, "loss": 81.53, "step": 43210 }, { "epoch": 0.17461426891890253, "grad_norm": 929.322021484375, "learning_rate": 4.919536680369525e-05, "loss": 87.4219, "step": 43220 }, { "epoch": 0.17465467018427017, "grad_norm": 3907.416015625, "learning_rate": 4.919448809482489e-05, "loss": 81.7991, "step": 43230 }, { "epoch": 0.1746950714496378, "grad_norm": 893.3704223632812, "learning_rate": 4.9193608914270515e-05, "loss": 96.2677, "step": 43240 }, { "epoch": 0.17473547271500545, "grad_norm": 659.1729125976562, "learning_rate": 4.919272926204929e-05, "loss": 98.551, "step": 43250 }, { "epoch": 0.17477587398037306, "grad_norm": 995.1451416015625, "learning_rate": 4.9191849138178334e-05, "loss": 71.9214, "step": 43260 }, { "epoch": 0.1748162752457407, "grad_norm": 1777.868896484375, "learning_rate": 4.919096854267484e-05, "loss": 95.7281, "step": 43270 }, { "epoch": 0.17485667651110834, "grad_norm": 779.3260498046875, "learning_rate": 4.9190087475555955e-05, "loss": 104.3814, "step": 43280 }, { "epoch": 0.17489707777647595, "grad_norm": 939.8629760742188, "learning_rate": 4.9189205936838864e-05, "loss": 71.219, "step": 43290 }, { "epoch": 0.1749374790418436, "grad_norm": 808.994140625, "learning_rate": 4.9188323926540746e-05, "loss": 63.1995, "step": 43300 }, { "epoch": 0.17497788030721123, "grad_norm": 1255.6544189453125, "learning_rate": 4.918744144467881e-05, "loss": 73.3233, "step": 43310 }, { "epoch": 0.17501828157257884, "grad_norm": 1129.1463623046875, "learning_rate": 4.918655849127024e-05, "loss": 80.2645, "step": 43320 }, { "epoch": 0.17505868283794648, "grad_norm": 0.0, "learning_rate": 4.918567506633226e-05, "loss": 107.1662, "step": 43330 }, { "epoch": 0.17509908410331412, "grad_norm": 219.4386444091797, "learning_rate": 4.91847911698821e-05, "loss": 66.5381, "step": 43340 }, { "epoch": 0.17513948536868174, "grad_norm": 432.4940185546875, "learning_rate": 4.918390680193698e-05, "loss": 73.7235, "step": 43350 }, { "epoch": 0.17517988663404938, "grad_norm": 645.31494140625, "learning_rate": 4.918302196251415e-05, "loss": 110.384, "step": 43360 }, { "epoch": 0.17522028789941702, "grad_norm": 3361.419677734375, "learning_rate": 4.918213665163085e-05, "loss": 122.9304, "step": 43370 }, { "epoch": 0.17526068916478463, "grad_norm": 898.6963500976562, "learning_rate": 4.918125086930435e-05, "loss": 79.2204, "step": 43380 }, { "epoch": 0.17530109043015227, "grad_norm": 982.2764892578125, "learning_rate": 4.918036461555192e-05, "loss": 87.4724, "step": 43390 }, { "epoch": 0.1753414916955199, "grad_norm": 423.7979431152344, "learning_rate": 4.9179477890390825e-05, "loss": 96.8271, "step": 43400 }, { "epoch": 0.17538189296088755, "grad_norm": 994.8875732421875, "learning_rate": 4.917859069383836e-05, "loss": 65.6866, "step": 43410 }, { "epoch": 0.17542229422625516, "grad_norm": 488.28973388671875, "learning_rate": 4.9177703025911825e-05, "loss": 67.0737, "step": 43420 }, { "epoch": 0.1754626954916228, "grad_norm": 927.5771484375, "learning_rate": 4.917681488662852e-05, "loss": 96.7948, "step": 43430 }, { "epoch": 0.17550309675699044, "grad_norm": 1078.08837890625, "learning_rate": 4.917592627600577e-05, "loss": 77.3857, "step": 43440 }, { "epoch": 0.17554349802235805, "grad_norm": 1068.2432861328125, "learning_rate": 4.917503719406088e-05, "loss": 82.1481, "step": 43450 }, { "epoch": 0.1755838992877257, "grad_norm": 622.7982177734375, "learning_rate": 4.91741476408112e-05, "loss": 88.3467, "step": 43460 }, { "epoch": 0.17562430055309333, "grad_norm": 529.6117553710938, "learning_rate": 4.917325761627406e-05, "loss": 94.7772, "step": 43470 }, { "epoch": 0.17566470181846094, "grad_norm": 527.8485107421875, "learning_rate": 4.917236712046682e-05, "loss": 49.5305, "step": 43480 }, { "epoch": 0.17570510308382858, "grad_norm": 714.2109375, "learning_rate": 4.917147615340684e-05, "loss": 84.2201, "step": 43490 }, { "epoch": 0.17574550434919622, "grad_norm": 836.039794921875, "learning_rate": 4.917058471511149e-05, "loss": 73.1889, "step": 43500 }, { "epoch": 0.17578590561456384, "grad_norm": 1239.9703369140625, "learning_rate": 4.9169692805598145e-05, "loss": 94.0238, "step": 43510 }, { "epoch": 0.17582630687993148, "grad_norm": 447.8238830566406, "learning_rate": 4.916880042488419e-05, "loss": 56.3747, "step": 43520 }, { "epoch": 0.17586670814529912, "grad_norm": 646.1732788085938, "learning_rate": 4.916790757298704e-05, "loss": 73.1238, "step": 43530 }, { "epoch": 0.17590710941066673, "grad_norm": 2080.33984375, "learning_rate": 4.9167014249924075e-05, "loss": 111.906, "step": 43540 }, { "epoch": 0.17594751067603437, "grad_norm": 500.1817932128906, "learning_rate": 4.9166120455712736e-05, "loss": 55.3391, "step": 43550 }, { "epoch": 0.175987911941402, "grad_norm": 495.2890319824219, "learning_rate": 4.916522619037043e-05, "loss": 111.2715, "step": 43560 }, { "epoch": 0.17602831320676965, "grad_norm": 539.0352783203125, "learning_rate": 4.91643314539146e-05, "loss": 72.2024, "step": 43570 }, { "epoch": 0.17606871447213726, "grad_norm": 1020.1885375976562, "learning_rate": 4.916343624636269e-05, "loss": 133.4901, "step": 43580 }, { "epoch": 0.1761091157375049, "grad_norm": 463.427978515625, "learning_rate": 4.916254056773215e-05, "loss": 79.8422, "step": 43590 }, { "epoch": 0.17614951700287254, "grad_norm": 1095.344970703125, "learning_rate": 4.916164441804044e-05, "loss": 79.1968, "step": 43600 }, { "epoch": 0.17618991826824015, "grad_norm": 1455.2132568359375, "learning_rate": 4.916074779730504e-05, "loss": 79.4097, "step": 43610 }, { "epoch": 0.1762303195336078, "grad_norm": 854.4801025390625, "learning_rate": 4.915985070554341e-05, "loss": 108.5478, "step": 43620 }, { "epoch": 0.17627072079897543, "grad_norm": 1308.75341796875, "learning_rate": 4.915895314277306e-05, "loss": 64.448, "step": 43630 }, { "epoch": 0.17631112206434305, "grad_norm": 755.5027465820312, "learning_rate": 4.915805510901148e-05, "loss": 99.9466, "step": 43640 }, { "epoch": 0.17635152332971069, "grad_norm": 3085.270751953125, "learning_rate": 4.9157156604276175e-05, "loss": 99.849, "step": 43650 }, { "epoch": 0.17639192459507833, "grad_norm": 1174.489013671875, "learning_rate": 4.915625762858467e-05, "loss": 71.8641, "step": 43660 }, { "epoch": 0.17643232586044594, "grad_norm": 1123.778076171875, "learning_rate": 4.9155358181954494e-05, "loss": 92.842, "step": 43670 }, { "epoch": 0.17647272712581358, "grad_norm": 424.877197265625, "learning_rate": 4.915445826440316e-05, "loss": 82.3663, "step": 43680 }, { "epoch": 0.17651312839118122, "grad_norm": 2889.92041015625, "learning_rate": 4.915355787594823e-05, "loss": 81.4271, "step": 43690 }, { "epoch": 0.17655352965654883, "grad_norm": 5192.76611328125, "learning_rate": 4.915265701660726e-05, "loss": 80.7691, "step": 43700 }, { "epoch": 0.17659393092191647, "grad_norm": 995.813720703125, "learning_rate": 4.9151755686397793e-05, "loss": 75.4785, "step": 43710 }, { "epoch": 0.1766343321872841, "grad_norm": 648.2084350585938, "learning_rate": 4.9150853885337426e-05, "loss": 93.2939, "step": 43720 }, { "epoch": 0.17667473345265175, "grad_norm": 601.667724609375, "learning_rate": 4.914995161344373e-05, "loss": 68.4272, "step": 43730 }, { "epoch": 0.17671513471801936, "grad_norm": 553.5189819335938, "learning_rate": 4.9149048870734296e-05, "loss": 75.1263, "step": 43740 }, { "epoch": 0.176755535983387, "grad_norm": 816.7628784179688, "learning_rate": 4.914814565722671e-05, "loss": 100.5337, "step": 43750 }, { "epoch": 0.17679593724875464, "grad_norm": 1510.35302734375, "learning_rate": 4.9147241972938596e-05, "loss": 94.5766, "step": 43760 }, { "epoch": 0.17683633851412225, "grad_norm": 834.1815795898438, "learning_rate": 4.9146337817887575e-05, "loss": 70.5322, "step": 43770 }, { "epoch": 0.1768767397794899, "grad_norm": 1132.84619140625, "learning_rate": 4.914543319209126e-05, "loss": 74.2719, "step": 43780 }, { "epoch": 0.17691714104485753, "grad_norm": 0.0, "learning_rate": 4.91445280955673e-05, "loss": 91.3708, "step": 43790 }, { "epoch": 0.17695754231022515, "grad_norm": 347.47705078125, "learning_rate": 4.914362252833332e-05, "loss": 70.5821, "step": 43800 }, { "epoch": 0.1769979435755928, "grad_norm": 820.30419921875, "learning_rate": 4.9142716490407e-05, "loss": 122.0043, "step": 43810 }, { "epoch": 0.17703834484096043, "grad_norm": 910.3120727539062, "learning_rate": 4.9141809981805995e-05, "loss": 86.2672, "step": 43820 }, { "epoch": 0.17707874610632804, "grad_norm": 536.81982421875, "learning_rate": 4.914090300254798e-05, "loss": 67.7232, "step": 43830 }, { "epoch": 0.17711914737169568, "grad_norm": 799.4282836914062, "learning_rate": 4.913999555265062e-05, "loss": 66.7534, "step": 43840 }, { "epoch": 0.17715954863706332, "grad_norm": 978.2288208007812, "learning_rate": 4.913908763213162e-05, "loss": 97.7384, "step": 43850 }, { "epoch": 0.17719994990243093, "grad_norm": 1155.240478515625, "learning_rate": 4.913817924100869e-05, "loss": 83.567, "step": 43860 }, { "epoch": 0.17724035116779857, "grad_norm": 4742.7177734375, "learning_rate": 4.913727037929952e-05, "loss": 106.2576, "step": 43870 }, { "epoch": 0.1772807524331662, "grad_norm": 725.6898193359375, "learning_rate": 4.913636104702183e-05, "loss": 95.1339, "step": 43880 }, { "epoch": 0.17732115369853385, "grad_norm": 933.677978515625, "learning_rate": 4.913545124419336e-05, "loss": 77.6824, "step": 43890 }, { "epoch": 0.17736155496390146, "grad_norm": 484.01092529296875, "learning_rate": 4.913454097083185e-05, "loss": 95.2952, "step": 43900 }, { "epoch": 0.1774019562292691, "grad_norm": 4791.2919921875, "learning_rate": 4.9133630226955026e-05, "loss": 66.6884, "step": 43910 }, { "epoch": 0.17744235749463674, "grad_norm": 750.3236694335938, "learning_rate": 4.913271901258067e-05, "loss": 71.9357, "step": 43920 }, { "epoch": 0.17748275876000436, "grad_norm": 738.4391479492188, "learning_rate": 4.913180732772652e-05, "loss": 73.5072, "step": 43930 }, { "epoch": 0.177523160025372, "grad_norm": 3437.857666015625, "learning_rate": 4.913089517241037e-05, "loss": 102.427, "step": 43940 }, { "epoch": 0.17756356129073964, "grad_norm": 790.71044921875, "learning_rate": 4.912998254665e-05, "loss": 75.0368, "step": 43950 }, { "epoch": 0.17760396255610725, "grad_norm": 417.4804382324219, "learning_rate": 4.9129069450463186e-05, "loss": 97.0161, "step": 43960 }, { "epoch": 0.1776443638214749, "grad_norm": 988.0493774414062, "learning_rate": 4.912815588386775e-05, "loss": 67.218, "step": 43970 }, { "epoch": 0.17768476508684253, "grad_norm": 867.89404296875, "learning_rate": 4.912724184688149e-05, "loss": 70.8273, "step": 43980 }, { "epoch": 0.17772516635221014, "grad_norm": 678.370849609375, "learning_rate": 4.9126327339522225e-05, "loss": 79.2882, "step": 43990 }, { "epoch": 0.17776556761757778, "grad_norm": 1945.7412109375, "learning_rate": 4.912541236180779e-05, "loss": 102.0449, "step": 44000 }, { "epoch": 0.17780596888294542, "grad_norm": 874.1896362304688, "learning_rate": 4.912449691375602e-05, "loss": 73.3128, "step": 44010 }, { "epoch": 0.17784637014831303, "grad_norm": 922.020751953125, "learning_rate": 4.912358099538476e-05, "loss": 105.76, "step": 44020 }, { "epoch": 0.17788677141368067, "grad_norm": 295.5516662597656, "learning_rate": 4.912266460671187e-05, "loss": 71.2611, "step": 44030 }, { "epoch": 0.1779271726790483, "grad_norm": 524.7007446289062, "learning_rate": 4.912174774775522e-05, "loss": 132.5543, "step": 44040 }, { "epoch": 0.17796757394441595, "grad_norm": 1069.502685546875, "learning_rate": 4.912083041853267e-05, "loss": 49.895, "step": 44050 }, { "epoch": 0.17800797520978356, "grad_norm": 1046.591796875, "learning_rate": 4.911991261906212e-05, "loss": 121.5496, "step": 44060 }, { "epoch": 0.1780483764751512, "grad_norm": 1215.2974853515625, "learning_rate": 4.9118994349361455e-05, "loss": 67.8808, "step": 44070 }, { "epoch": 0.17808877774051884, "grad_norm": 419.9411315917969, "learning_rate": 4.911807560944858e-05, "loss": 94.9882, "step": 44080 }, { "epoch": 0.17812917900588646, "grad_norm": 555.5469360351562, "learning_rate": 4.911715639934139e-05, "loss": 75.9314, "step": 44090 }, { "epoch": 0.1781695802712541, "grad_norm": 961.8922729492188, "learning_rate": 4.911623671905784e-05, "loss": 281.5219, "step": 44100 }, { "epoch": 0.17820998153662174, "grad_norm": 443.88385009765625, "learning_rate": 4.9115316568615824e-05, "loss": 72.1933, "step": 44110 }, { "epoch": 0.17825038280198935, "grad_norm": 670.9072265625, "learning_rate": 4.9114395948033296e-05, "loss": 59.2318, "step": 44120 }, { "epoch": 0.178290784067357, "grad_norm": 951.0693359375, "learning_rate": 4.911347485732821e-05, "loss": 92.5108, "step": 44130 }, { "epoch": 0.17833118533272463, "grad_norm": 742.0170288085938, "learning_rate": 4.911255329651851e-05, "loss": 81.8882, "step": 44140 }, { "epoch": 0.17837158659809224, "grad_norm": 1449.71826171875, "learning_rate": 4.9111631265622184e-05, "loss": 99.7724, "step": 44150 }, { "epoch": 0.17841198786345988, "grad_norm": 884.3943481445312, "learning_rate": 4.911070876465719e-05, "loss": 77.8581, "step": 44160 }, { "epoch": 0.17845238912882752, "grad_norm": 930.3131103515625, "learning_rate": 4.910978579364151e-05, "loss": 75.9937, "step": 44170 }, { "epoch": 0.17849279039419513, "grad_norm": 741.734130859375, "learning_rate": 4.910886235259314e-05, "loss": 49.2752, "step": 44180 }, { "epoch": 0.17853319165956277, "grad_norm": 1187.31982421875, "learning_rate": 4.910793844153009e-05, "loss": 72.5356, "step": 44190 }, { "epoch": 0.1785735929249304, "grad_norm": 1084.1396484375, "learning_rate": 4.910701406047037e-05, "loss": 54.3127, "step": 44200 }, { "epoch": 0.17861399419029805, "grad_norm": 1090.561767578125, "learning_rate": 4.910608920943199e-05, "loss": 89.8081, "step": 44210 }, { "epoch": 0.17865439545566567, "grad_norm": 926.6570434570312, "learning_rate": 4.9105163888433e-05, "loss": 102.4193, "step": 44220 }, { "epoch": 0.1786947967210333, "grad_norm": 823.6111450195312, "learning_rate": 4.910423809749143e-05, "loss": 71.3021, "step": 44230 }, { "epoch": 0.17873519798640095, "grad_norm": 1753.1083984375, "learning_rate": 4.910331183662533e-05, "loss": 73.3729, "step": 44240 }, { "epoch": 0.17877559925176856, "grad_norm": 1302.8763427734375, "learning_rate": 4.910238510585276e-05, "loss": 73.3519, "step": 44250 }, { "epoch": 0.1788160005171362, "grad_norm": 1096.9420166015625, "learning_rate": 4.9101457905191774e-05, "loss": 97.6709, "step": 44260 }, { "epoch": 0.17885640178250384, "grad_norm": 1942.8685302734375, "learning_rate": 4.910053023466046e-05, "loss": 144.2541, "step": 44270 }, { "epoch": 0.17889680304787145, "grad_norm": 784.9008178710938, "learning_rate": 4.90996020942769e-05, "loss": 80.2861, "step": 44280 }, { "epoch": 0.1789372043132391, "grad_norm": 1027.322509765625, "learning_rate": 4.9098673484059195e-05, "loss": 69.3643, "step": 44290 }, { "epoch": 0.17897760557860673, "grad_norm": 1583.6845703125, "learning_rate": 4.9097744404025435e-05, "loss": 92.1036, "step": 44300 }, { "epoch": 0.17901800684397434, "grad_norm": 735.26806640625, "learning_rate": 4.909681485419375e-05, "loss": 60.5722, "step": 44310 }, { "epoch": 0.17905840810934198, "grad_norm": 0.0, "learning_rate": 4.909588483458225e-05, "loss": 65.9718, "step": 44320 }, { "epoch": 0.17909880937470962, "grad_norm": 676.1497802734375, "learning_rate": 4.9094954345209075e-05, "loss": 85.0213, "step": 44330 }, { "epoch": 0.17913921064007723, "grad_norm": 873.891357421875, "learning_rate": 4.909402338609236e-05, "loss": 77.8588, "step": 44340 }, { "epoch": 0.17917961190544487, "grad_norm": 693.5972900390625, "learning_rate": 4.909309195725025e-05, "loss": 124.6474, "step": 44350 }, { "epoch": 0.17922001317081251, "grad_norm": 460.6761779785156, "learning_rate": 4.90921600587009e-05, "loss": 87.2825, "step": 44360 }, { "epoch": 0.17926041443618015, "grad_norm": 1261.747314453125, "learning_rate": 4.90912276904625e-05, "loss": 70.8723, "step": 44370 }, { "epoch": 0.17930081570154777, "grad_norm": 538.2672729492188, "learning_rate": 4.909029485255321e-05, "loss": 67.4372, "step": 44380 }, { "epoch": 0.1793412169669154, "grad_norm": 1068.8714599609375, "learning_rate": 4.9089361544991215e-05, "loss": 68.0589, "step": 44390 }, { "epoch": 0.17938161823228305, "grad_norm": 1119.72314453125, "learning_rate": 4.908842776779472e-05, "loss": 68.3467, "step": 44400 }, { "epoch": 0.17942201949765066, "grad_norm": 568.3760986328125, "learning_rate": 4.908749352098192e-05, "loss": 73.3346, "step": 44410 }, { "epoch": 0.1794624207630183, "grad_norm": 1218.2125244140625, "learning_rate": 4.9086558804571034e-05, "loss": 109.4271, "step": 44420 }, { "epoch": 0.17950282202838594, "grad_norm": 582.5836181640625, "learning_rate": 4.908562361858028e-05, "loss": 72.2208, "step": 44430 }, { "epoch": 0.17954322329375355, "grad_norm": 1486.9588623046875, "learning_rate": 4.9084687963027894e-05, "loss": 87.6006, "step": 44440 }, { "epoch": 0.1795836245591212, "grad_norm": 646.9586791992188, "learning_rate": 4.9083751837932126e-05, "loss": 84.1968, "step": 44450 }, { "epoch": 0.17962402582448883, "grad_norm": 665.9592895507812, "learning_rate": 4.908281524331121e-05, "loss": 61.7497, "step": 44460 }, { "epoch": 0.17966442708985644, "grad_norm": 514.8817749023438, "learning_rate": 4.908187817918341e-05, "loss": 71.9887, "step": 44470 }, { "epoch": 0.17970482835522408, "grad_norm": 883.0592651367188, "learning_rate": 4.9080940645567e-05, "loss": 72.1492, "step": 44480 }, { "epoch": 0.17974522962059172, "grad_norm": 1375.27001953125, "learning_rate": 4.908000264248025e-05, "loss": 96.8598, "step": 44490 }, { "epoch": 0.17978563088595934, "grad_norm": 645.9873046875, "learning_rate": 4.907906416994146e-05, "loss": 64.9413, "step": 44500 }, { "epoch": 0.17982603215132698, "grad_norm": 1111.731201171875, "learning_rate": 4.9078125227968904e-05, "loss": 86.5026, "step": 44510 }, { "epoch": 0.17986643341669462, "grad_norm": 934.5158081054688, "learning_rate": 4.907718581658091e-05, "loss": 110.799, "step": 44520 }, { "epoch": 0.17990683468206226, "grad_norm": 920.4935913085938, "learning_rate": 4.9076245935795786e-05, "loss": 62.2967, "step": 44530 }, { "epoch": 0.17994723594742987, "grad_norm": 892.095947265625, "learning_rate": 4.9075305585631845e-05, "loss": 82.56, "step": 44540 }, { "epoch": 0.1799876372127975, "grad_norm": 1195.0810546875, "learning_rate": 4.907436476610743e-05, "loss": 48.5051, "step": 44550 }, { "epoch": 0.18002803847816515, "grad_norm": 768.6405029296875, "learning_rate": 4.907342347724087e-05, "loss": 67.0793, "step": 44560 }, { "epoch": 0.18006843974353276, "grad_norm": 1605.87060546875, "learning_rate": 4.907248171905055e-05, "loss": 114.835, "step": 44570 }, { "epoch": 0.1801088410089004, "grad_norm": 718.5313720703125, "learning_rate": 4.907153949155479e-05, "loss": 100.6462, "step": 44580 }, { "epoch": 0.18014924227426804, "grad_norm": 430.45037841796875, "learning_rate": 4.907059679477197e-05, "loss": 67.9887, "step": 44590 }, { "epoch": 0.18018964353963565, "grad_norm": 234.9417266845703, "learning_rate": 4.906965362872047e-05, "loss": 77.6404, "step": 44600 }, { "epoch": 0.1802300448050033, "grad_norm": 911.5045776367188, "learning_rate": 4.906870999341869e-05, "loss": 88.723, "step": 44610 }, { "epoch": 0.18027044607037093, "grad_norm": 772.7266845703125, "learning_rate": 4.906776588888502e-05, "loss": 93.7803, "step": 44620 }, { "epoch": 0.18031084733573854, "grad_norm": 1137.0054931640625, "learning_rate": 4.9066821315137856e-05, "loss": 110.4208, "step": 44630 }, { "epoch": 0.18035124860110618, "grad_norm": 898.4769287109375, "learning_rate": 4.906587627219562e-05, "loss": 73.2961, "step": 44640 }, { "epoch": 0.18039164986647382, "grad_norm": 958.9808959960938, "learning_rate": 4.906493076007674e-05, "loss": 57.1335, "step": 44650 }, { "epoch": 0.18043205113184144, "grad_norm": 938.7362670898438, "learning_rate": 4.9063984778799645e-05, "loss": 101.8361, "step": 44660 }, { "epoch": 0.18047245239720908, "grad_norm": 515.5078735351562, "learning_rate": 4.906303832838278e-05, "loss": 54.5177, "step": 44670 }, { "epoch": 0.18051285366257672, "grad_norm": 531.8738403320312, "learning_rate": 4.906209140884459e-05, "loss": 82.9638, "step": 44680 }, { "epoch": 0.18055325492794436, "grad_norm": 1305.5316162109375, "learning_rate": 4.906114402020354e-05, "loss": 95.9825, "step": 44690 }, { "epoch": 0.18059365619331197, "grad_norm": 777.7756958007812, "learning_rate": 4.90601961624781e-05, "loss": 81.2786, "step": 44700 }, { "epoch": 0.1806340574586796, "grad_norm": 1133.5772705078125, "learning_rate": 4.905924783568675e-05, "loss": 85.8932, "step": 44710 }, { "epoch": 0.18067445872404725, "grad_norm": 599.99951171875, "learning_rate": 4.9058299039847975e-05, "loss": 101.0232, "step": 44720 }, { "epoch": 0.18071485998941486, "grad_norm": 675.3793334960938, "learning_rate": 4.9057349774980275e-05, "loss": 56.9, "step": 44730 }, { "epoch": 0.1807552612547825, "grad_norm": 961.1941528320312, "learning_rate": 4.905640004110216e-05, "loss": 84.8394, "step": 44740 }, { "epoch": 0.18079566252015014, "grad_norm": 1229.0406494140625, "learning_rate": 4.905544983823214e-05, "loss": 65.7035, "step": 44750 }, { "epoch": 0.18083606378551775, "grad_norm": 1367.959228515625, "learning_rate": 4.905449916638873e-05, "loss": 79.8659, "step": 44760 }, { "epoch": 0.1808764650508854, "grad_norm": 705.6975708007812, "learning_rate": 4.905354802559049e-05, "loss": 109.2609, "step": 44770 }, { "epoch": 0.18091686631625303, "grad_norm": 738.3800048828125, "learning_rate": 4.905259641585594e-05, "loss": 45.9301, "step": 44780 }, { "epoch": 0.18095726758162065, "grad_norm": 746.9801025390625, "learning_rate": 4.905164433720364e-05, "loss": 65.5784, "step": 44790 }, { "epoch": 0.18099766884698829, "grad_norm": 546.0762329101562, "learning_rate": 4.905069178965215e-05, "loss": 97.4426, "step": 44800 }, { "epoch": 0.18103807011235593, "grad_norm": 714.2156372070312, "learning_rate": 4.9049738773220046e-05, "loss": 83.1105, "step": 44810 }, { "epoch": 0.18107847137772354, "grad_norm": 862.12939453125, "learning_rate": 4.9048785287925895e-05, "loss": 56.6679, "step": 44820 }, { "epoch": 0.18111887264309118, "grad_norm": 233.91058349609375, "learning_rate": 4.9047831333788295e-05, "loss": 82.0974, "step": 44830 }, { "epoch": 0.18115927390845882, "grad_norm": 820.8073120117188, "learning_rate": 4.904687691082585e-05, "loss": 72.0476, "step": 44840 }, { "epoch": 0.18119967517382646, "grad_norm": 1259.7579345703125, "learning_rate": 4.9045922019057155e-05, "loss": 83.561, "step": 44850 }, { "epoch": 0.18124007643919407, "grad_norm": 879.6514282226562, "learning_rate": 4.904496665850084e-05, "loss": 82.4675, "step": 44860 }, { "epoch": 0.1812804777045617, "grad_norm": 684.139892578125, "learning_rate": 4.90440108291755e-05, "loss": 90.4461, "step": 44870 }, { "epoch": 0.18132087896992935, "grad_norm": 2262.56591796875, "learning_rate": 4.904305453109981e-05, "loss": 53.0787, "step": 44880 }, { "epoch": 0.18136128023529696, "grad_norm": 1157.9019775390625, "learning_rate": 4.9042097764292385e-05, "loss": 83.7821, "step": 44890 }, { "epoch": 0.1814016815006646, "grad_norm": 945.2867431640625, "learning_rate": 4.904114052877188e-05, "loss": 81.2998, "step": 44900 }, { "epoch": 0.18144208276603224, "grad_norm": 1119.50048828125, "learning_rate": 4.904018282455697e-05, "loss": 49.5603, "step": 44910 }, { "epoch": 0.18148248403139985, "grad_norm": 1104.3919677734375, "learning_rate": 4.9039224651666325e-05, "loss": 89.8467, "step": 44920 }, { "epoch": 0.1815228852967675, "grad_norm": 852.224365234375, "learning_rate": 4.903826601011861e-05, "loss": 69.1575, "step": 44930 }, { "epoch": 0.18156328656213513, "grad_norm": 473.6451110839844, "learning_rate": 4.903730689993253e-05, "loss": 92.4134, "step": 44940 }, { "epoch": 0.18160368782750275, "grad_norm": 406.23199462890625, "learning_rate": 4.903634732112678e-05, "loss": 42.3387, "step": 44950 }, { "epoch": 0.1816440890928704, "grad_norm": 1381.426025390625, "learning_rate": 4.903538727372005e-05, "loss": 81.0296, "step": 44960 }, { "epoch": 0.18168449035823803, "grad_norm": 1150.0416259765625, "learning_rate": 4.903442675773108e-05, "loss": 67.7141, "step": 44970 }, { "epoch": 0.18172489162360564, "grad_norm": 816.09619140625, "learning_rate": 4.903346577317859e-05, "loss": 85.8159, "step": 44980 }, { "epoch": 0.18176529288897328, "grad_norm": 602.0270385742188, "learning_rate": 4.90325043200813e-05, "loss": 63.4132, "step": 44990 }, { "epoch": 0.18180569415434092, "grad_norm": 815.766357421875, "learning_rate": 4.9031542398457974e-05, "loss": 88.5122, "step": 45000 }, { "epoch": 0.18184609541970856, "grad_norm": 863.7614135742188, "learning_rate": 4.9030580008327353e-05, "loss": 91.4247, "step": 45010 }, { "epoch": 0.18188649668507617, "grad_norm": 2180.32373046875, "learning_rate": 4.902961714970821e-05, "loss": 104.5198, "step": 45020 }, { "epoch": 0.1819268979504438, "grad_norm": 527.2975463867188, "learning_rate": 4.90286538226193e-05, "loss": 89.9132, "step": 45030 }, { "epoch": 0.18196729921581145, "grad_norm": 797.1927490234375, "learning_rate": 4.902769002707942e-05, "loss": 60.7632, "step": 45040 }, { "epoch": 0.18200770048117906, "grad_norm": 1439.7625732421875, "learning_rate": 4.902672576310735e-05, "loss": 94.8452, "step": 45050 }, { "epoch": 0.1820481017465467, "grad_norm": 962.4320678710938, "learning_rate": 4.902576103072189e-05, "loss": 93.4051, "step": 45060 }, { "epoch": 0.18208850301191434, "grad_norm": 509.36468505859375, "learning_rate": 4.902479582994185e-05, "loss": 101.6452, "step": 45070 }, { "epoch": 0.18212890427728196, "grad_norm": 674.908447265625, "learning_rate": 4.902383016078605e-05, "loss": 82.7371, "step": 45080 }, { "epoch": 0.1821693055426496, "grad_norm": 581.795166015625, "learning_rate": 4.902286402327331e-05, "loss": 78.2122, "step": 45090 }, { "epoch": 0.18220970680801724, "grad_norm": 1014.2080078125, "learning_rate": 4.902189741742247e-05, "loss": 93.4292, "step": 45100 }, { "epoch": 0.18225010807338485, "grad_norm": 1033.72412109375, "learning_rate": 4.902093034325237e-05, "loss": 86.2924, "step": 45110 }, { "epoch": 0.1822905093387525, "grad_norm": 553.49755859375, "learning_rate": 4.901996280078186e-05, "loss": 115.3292, "step": 45120 }, { "epoch": 0.18233091060412013, "grad_norm": 867.1639404296875, "learning_rate": 4.901899479002982e-05, "loss": 64.9478, "step": 45130 }, { "epoch": 0.18237131186948774, "grad_norm": 314.60528564453125, "learning_rate": 4.901802631101511e-05, "loss": 65.43, "step": 45140 }, { "epoch": 0.18241171313485538, "grad_norm": 676.8156127929688, "learning_rate": 4.90170573637566e-05, "loss": 71.8611, "step": 45150 }, { "epoch": 0.18245211440022302, "grad_norm": 947.8834228515625, "learning_rate": 4.90160879482732e-05, "loss": 94.2625, "step": 45160 }, { "epoch": 0.18249251566559066, "grad_norm": 628.1900634765625, "learning_rate": 4.901511806458381e-05, "loss": 59.9568, "step": 45170 }, { "epoch": 0.18253291693095827, "grad_norm": 1697.5426025390625, "learning_rate": 4.9014147712707316e-05, "loss": 78.6836, "step": 45180 }, { "epoch": 0.1825733181963259, "grad_norm": 1006.1732177734375, "learning_rate": 4.9013176892662654e-05, "loss": 82.129, "step": 45190 }, { "epoch": 0.18261371946169355, "grad_norm": 1150.0892333984375, "learning_rate": 4.9012205604468744e-05, "loss": 117.7069, "step": 45200 }, { "epoch": 0.18265412072706116, "grad_norm": 707.0164184570312, "learning_rate": 4.9011233848144525e-05, "loss": 82.716, "step": 45210 }, { "epoch": 0.1826945219924288, "grad_norm": 355.14959716796875, "learning_rate": 4.9010261623708944e-05, "loss": 47.5373, "step": 45220 }, { "epoch": 0.18273492325779644, "grad_norm": 784.4992065429688, "learning_rate": 4.9009288931180947e-05, "loss": 82.2301, "step": 45230 }, { "epoch": 0.18277532452316406, "grad_norm": 1023.0245361328125, "learning_rate": 4.90083157705795e-05, "loss": 67.702, "step": 45240 }, { "epoch": 0.1828157257885317, "grad_norm": 801.0816040039062, "learning_rate": 4.900734214192358e-05, "loss": 76.0119, "step": 45250 }, { "epoch": 0.18285612705389934, "grad_norm": 1161.0206298828125, "learning_rate": 4.900636804523217e-05, "loss": 75.7235, "step": 45260 }, { "epoch": 0.18289652831926695, "grad_norm": 1342.0335693359375, "learning_rate": 4.900539348052424e-05, "loss": 82.4319, "step": 45270 }, { "epoch": 0.1829369295846346, "grad_norm": 1176.3917236328125, "learning_rate": 4.9004418447818815e-05, "loss": 97.9844, "step": 45280 }, { "epoch": 0.18297733085000223, "grad_norm": 1022.2837524414062, "learning_rate": 4.9003442947134895e-05, "loss": 78.1378, "step": 45290 }, { "epoch": 0.18301773211536984, "grad_norm": 816.5732421875, "learning_rate": 4.90024669784915e-05, "loss": 70.9764, "step": 45300 }, { "epoch": 0.18305813338073748, "grad_norm": 830.2484741210938, "learning_rate": 4.9001490541907645e-05, "loss": 62.789, "step": 45310 }, { "epoch": 0.18309853464610512, "grad_norm": 1164.3995361328125, "learning_rate": 4.900051363740238e-05, "loss": 63.8086, "step": 45320 }, { "epoch": 0.18313893591147276, "grad_norm": 1487.6907958984375, "learning_rate": 4.899953626499475e-05, "loss": 78.6914, "step": 45330 }, { "epoch": 0.18317933717684037, "grad_norm": 570.93212890625, "learning_rate": 4.89985584247038e-05, "loss": 80.8218, "step": 45340 }, { "epoch": 0.183219738442208, "grad_norm": 822.6448974609375, "learning_rate": 4.8997580116548595e-05, "loss": 105.5186, "step": 45350 }, { "epoch": 0.18326013970757565, "grad_norm": 1853.758544921875, "learning_rate": 4.8996601340548215e-05, "loss": 116.5778, "step": 45360 }, { "epoch": 0.18330054097294327, "grad_norm": 712.744140625, "learning_rate": 4.899562209672174e-05, "loss": 67.3311, "step": 45370 }, { "epoch": 0.1833409422383109, "grad_norm": 752.5093994140625, "learning_rate": 4.899464238508825e-05, "loss": 75.8195, "step": 45380 }, { "epoch": 0.18338134350367855, "grad_norm": 1256.2913818359375, "learning_rate": 4.899366220566686e-05, "loss": 112.7811, "step": 45390 }, { "epoch": 0.18342174476904616, "grad_norm": 1340.5006103515625, "learning_rate": 4.899268155847667e-05, "loss": 103.8136, "step": 45400 }, { "epoch": 0.1834621460344138, "grad_norm": 698.527587890625, "learning_rate": 4.89917004435368e-05, "loss": 85.698, "step": 45410 }, { "epoch": 0.18350254729978144, "grad_norm": 697.1134033203125, "learning_rate": 4.899071886086638e-05, "loss": 95.746, "step": 45420 }, { "epoch": 0.18354294856514905, "grad_norm": 860.381591796875, "learning_rate": 4.898973681048454e-05, "loss": 71.6687, "step": 45430 }, { "epoch": 0.1835833498305167, "grad_norm": 300.5081481933594, "learning_rate": 4.898875429241044e-05, "loss": 57.1445, "step": 45440 }, { "epoch": 0.18362375109588433, "grad_norm": 325.8232727050781, "learning_rate": 4.898777130666322e-05, "loss": 102.5872, "step": 45450 }, { "epoch": 0.18366415236125194, "grad_norm": 714.4207153320312, "learning_rate": 4.898678785326205e-05, "loss": 96.0007, "step": 45460 }, { "epoch": 0.18370455362661958, "grad_norm": 879.5983276367188, "learning_rate": 4.8985803932226094e-05, "loss": 91.5776, "step": 45470 }, { "epoch": 0.18374495489198722, "grad_norm": 1637.9261474609375, "learning_rate": 4.898481954357455e-05, "loss": 108.3861, "step": 45480 }, { "epoch": 0.18378535615735486, "grad_norm": 839.2368774414062, "learning_rate": 4.8983834687326596e-05, "loss": 96.1661, "step": 45490 }, { "epoch": 0.18382575742272247, "grad_norm": 1169.771240234375, "learning_rate": 4.898284936350144e-05, "loss": 102.8537, "step": 45500 }, { "epoch": 0.18386615868809011, "grad_norm": 760.4490966796875, "learning_rate": 4.898186357211829e-05, "loss": 74.1794, "step": 45510 }, { "epoch": 0.18390655995345775, "grad_norm": 784.6276245117188, "learning_rate": 4.898087731319636e-05, "loss": 67.2023, "step": 45520 }, { "epoch": 0.18394696121882537, "grad_norm": 1203.34228515625, "learning_rate": 4.8979890586754875e-05, "loss": 104.9335, "step": 45530 }, { "epoch": 0.183987362484193, "grad_norm": 1361.6192626953125, "learning_rate": 4.897890339281309e-05, "loss": 83.1708, "step": 45540 }, { "epoch": 0.18402776374956065, "grad_norm": 1109.7901611328125, "learning_rate": 4.897791573139023e-05, "loss": 89.0058, "step": 45550 }, { "epoch": 0.18406816501492826, "grad_norm": 945.8731689453125, "learning_rate": 4.897692760250556e-05, "loss": 124.9025, "step": 45560 }, { "epoch": 0.1841085662802959, "grad_norm": 931.4639282226562, "learning_rate": 4.897593900617834e-05, "loss": 87.5932, "step": 45570 }, { "epoch": 0.18414896754566354, "grad_norm": 928.37646484375, "learning_rate": 4.897494994242785e-05, "loss": 121.7126, "step": 45580 }, { "epoch": 0.18418936881103115, "grad_norm": 1160.150634765625, "learning_rate": 4.8973960411273364e-05, "loss": 88.2009, "step": 45590 }, { "epoch": 0.1842297700763988, "grad_norm": 1830.1046142578125, "learning_rate": 4.8972970412734176e-05, "loss": 94.9112, "step": 45600 }, { "epoch": 0.18427017134176643, "grad_norm": 760.2313232421875, "learning_rate": 4.897197994682959e-05, "loss": 66.2683, "step": 45610 }, { "epoch": 0.18431057260713404, "grad_norm": 1265.816650390625, "learning_rate": 4.897098901357891e-05, "loss": 145.3422, "step": 45620 }, { "epoch": 0.18435097387250168, "grad_norm": 1088.4708251953125, "learning_rate": 4.896999761300146e-05, "loss": 124.1203, "step": 45630 }, { "epoch": 0.18439137513786932, "grad_norm": 2023.6080322265625, "learning_rate": 4.896900574511657e-05, "loss": 89.7067, "step": 45640 }, { "epoch": 0.18443177640323694, "grad_norm": 467.109130859375, "learning_rate": 4.896801340994357e-05, "loss": 67.6761, "step": 45650 }, { "epoch": 0.18447217766860458, "grad_norm": 1232.957763671875, "learning_rate": 4.896702060750181e-05, "loss": 87.1448, "step": 45660 }, { "epoch": 0.18451257893397222, "grad_norm": 1274.385009765625, "learning_rate": 4.896602733781065e-05, "loss": 90.0393, "step": 45670 }, { "epoch": 0.18455298019933986, "grad_norm": 913.9437255859375, "learning_rate": 4.8965033600889435e-05, "loss": 58.4704, "step": 45680 }, { "epoch": 0.18459338146470747, "grad_norm": 1077.12744140625, "learning_rate": 4.896403939675756e-05, "loss": 101.7226, "step": 45690 }, { "epoch": 0.1846337827300751, "grad_norm": 648.5958862304688, "learning_rate": 4.89630447254344e-05, "loss": 59.2482, "step": 45700 }, { "epoch": 0.18467418399544275, "grad_norm": 1022.220703125, "learning_rate": 4.896204958693934e-05, "loss": 62.0977, "step": 45710 }, { "epoch": 0.18471458526081036, "grad_norm": 902.6193237304688, "learning_rate": 4.8961053981291795e-05, "loss": 57.4871, "step": 45720 }, { "epoch": 0.184754986526178, "grad_norm": 971.0794067382812, "learning_rate": 4.896005790851116e-05, "loss": 67.6352, "step": 45730 }, { "epoch": 0.18479538779154564, "grad_norm": 696.0235595703125, "learning_rate": 4.8959061368616863e-05, "loss": 60.1176, "step": 45740 }, { "epoch": 0.18483578905691325, "grad_norm": 403.1430969238281, "learning_rate": 4.895806436162833e-05, "loss": 92.4626, "step": 45750 }, { "epoch": 0.1848761903222809, "grad_norm": 524.8573608398438, "learning_rate": 4.8957066887565e-05, "loss": 82.3987, "step": 45760 }, { "epoch": 0.18491659158764853, "grad_norm": 671.1500244140625, "learning_rate": 4.8956068946446314e-05, "loss": 63.0742, "step": 45770 }, { "epoch": 0.18495699285301614, "grad_norm": 1303.492431640625, "learning_rate": 4.8955070538291735e-05, "loss": 80.9564, "step": 45780 }, { "epoch": 0.18499739411838378, "grad_norm": 433.896240234375, "learning_rate": 4.8954071663120715e-05, "loss": 58.4002, "step": 45790 }, { "epoch": 0.18503779538375142, "grad_norm": 504.2688903808594, "learning_rate": 4.8953072320952745e-05, "loss": 73.8806, "step": 45800 }, { "epoch": 0.18507819664911904, "grad_norm": 974.8590087890625, "learning_rate": 4.895207251180729e-05, "loss": 112.7232, "step": 45810 }, { "epoch": 0.18511859791448668, "grad_norm": 1069.8795166015625, "learning_rate": 4.8951072235703855e-05, "loss": 108.1218, "step": 45820 }, { "epoch": 0.18515899917985432, "grad_norm": 385.00994873046875, "learning_rate": 4.895007149266193e-05, "loss": 55.9683, "step": 45830 }, { "epoch": 0.18519940044522196, "grad_norm": 680.8944091796875, "learning_rate": 4.8949070282701034e-05, "loss": 81.2045, "step": 45840 }, { "epoch": 0.18523980171058957, "grad_norm": 885.607177734375, "learning_rate": 4.8948068605840694e-05, "loss": 83.6066, "step": 45850 }, { "epoch": 0.1852802029759572, "grad_norm": 794.8469848632812, "learning_rate": 4.894706646210041e-05, "loss": 97.3249, "step": 45860 }, { "epoch": 0.18532060424132485, "grad_norm": 865.634033203125, "learning_rate": 4.8946063851499746e-05, "loss": 51.3998, "step": 45870 }, { "epoch": 0.18536100550669246, "grad_norm": 882.5504760742188, "learning_rate": 4.894506077405824e-05, "loss": 65.3006, "step": 45880 }, { "epoch": 0.1854014067720601, "grad_norm": 712.5584106445312, "learning_rate": 4.894405722979544e-05, "loss": 60.6922, "step": 45890 }, { "epoch": 0.18544180803742774, "grad_norm": 1048.72216796875, "learning_rate": 4.894305321873092e-05, "loss": 79.0462, "step": 45900 }, { "epoch": 0.18548220930279535, "grad_norm": 757.8132934570312, "learning_rate": 4.894204874088425e-05, "loss": 82.1683, "step": 45910 }, { "epoch": 0.185522610568163, "grad_norm": 946.23828125, "learning_rate": 4.8941043796275015e-05, "loss": 80.1746, "step": 45920 }, { "epoch": 0.18556301183353063, "grad_norm": 1357.65380859375, "learning_rate": 4.8940038384922806e-05, "loss": 77.1498, "step": 45930 }, { "epoch": 0.18560341309889825, "grad_norm": 1069.114990234375, "learning_rate": 4.8939032506847224e-05, "loss": 97.8804, "step": 45940 }, { "epoch": 0.18564381436426589, "grad_norm": 1382.1502685546875, "learning_rate": 4.893802616206787e-05, "loss": 108.694, "step": 45950 }, { "epoch": 0.18568421562963353, "grad_norm": 1493.73486328125, "learning_rate": 4.893701935060439e-05, "loss": 81.302, "step": 45960 }, { "epoch": 0.18572461689500114, "grad_norm": 873.5044555664062, "learning_rate": 4.893601207247638e-05, "loss": 76.8872, "step": 45970 }, { "epoch": 0.18576501816036878, "grad_norm": 2981.951416015625, "learning_rate": 4.893500432770349e-05, "loss": 68.5081, "step": 45980 }, { "epoch": 0.18580541942573642, "grad_norm": 396.176513671875, "learning_rate": 4.893399611630538e-05, "loss": 65.5169, "step": 45990 }, { "epoch": 0.18584582069110406, "grad_norm": 1273.2371826171875, "learning_rate": 4.893298743830168e-05, "loss": 69.0687, "step": 46000 }, { "epoch": 0.18588622195647167, "grad_norm": 2934.45361328125, "learning_rate": 4.8931978293712074e-05, "loss": 67.4254, "step": 46010 }, { "epoch": 0.1859266232218393, "grad_norm": 665.5234985351562, "learning_rate": 4.8930968682556234e-05, "loss": 49.5943, "step": 46020 }, { "epoch": 0.18596702448720695, "grad_norm": 688.6306762695312, "learning_rate": 4.892995860485384e-05, "loss": 50.1611, "step": 46030 }, { "epoch": 0.18600742575257456, "grad_norm": 544.0740356445312, "learning_rate": 4.892894806062458e-05, "loss": 90.3554, "step": 46040 }, { "epoch": 0.1860478270179422, "grad_norm": 817.1072998046875, "learning_rate": 4.892793704988816e-05, "loss": 100.2068, "step": 46050 }, { "epoch": 0.18608822828330984, "grad_norm": 901.2867431640625, "learning_rate": 4.892692557266429e-05, "loss": 82.2532, "step": 46060 }, { "epoch": 0.18612862954867745, "grad_norm": 670.2236328125, "learning_rate": 4.892591362897268e-05, "loss": 113.8737, "step": 46070 }, { "epoch": 0.1861690308140451, "grad_norm": 500.85870361328125, "learning_rate": 4.892490121883306e-05, "loss": 81.973, "step": 46080 }, { "epoch": 0.18620943207941273, "grad_norm": 0.0, "learning_rate": 4.892388834226519e-05, "loss": 63.0598, "step": 46090 }, { "epoch": 0.18624983334478035, "grad_norm": 1111.806396484375, "learning_rate": 4.892287499928879e-05, "loss": 70.0805, "step": 46100 }, { "epoch": 0.186290234610148, "grad_norm": 1321.56884765625, "learning_rate": 4.892186118992362e-05, "loss": 118.3066, "step": 46110 }, { "epoch": 0.18633063587551563, "grad_norm": 916.1724243164062, "learning_rate": 4.892084691418947e-05, "loss": 99.126, "step": 46120 }, { "epoch": 0.18637103714088324, "grad_norm": 1164.2841796875, "learning_rate": 4.891983217210607e-05, "loss": 66.5533, "step": 46130 }, { "epoch": 0.18641143840625088, "grad_norm": 589.622314453125, "learning_rate": 4.891881696369325e-05, "loss": 76.5307, "step": 46140 }, { "epoch": 0.18645183967161852, "grad_norm": 1253.39306640625, "learning_rate": 4.891780128897077e-05, "loss": 72.5351, "step": 46150 }, { "epoch": 0.18649224093698616, "grad_norm": 1563.49658203125, "learning_rate": 4.891678514795843e-05, "loss": 93.3726, "step": 46160 }, { "epoch": 0.18653264220235377, "grad_norm": 3200.355712890625, "learning_rate": 4.891576854067607e-05, "loss": 124.7522, "step": 46170 }, { "epoch": 0.1865730434677214, "grad_norm": 897.306640625, "learning_rate": 4.891475146714347e-05, "loss": 68.6371, "step": 46180 }, { "epoch": 0.18661344473308905, "grad_norm": 992.8071899414062, "learning_rate": 4.891373392738049e-05, "loss": 80.4052, "step": 46190 }, { "epoch": 0.18665384599845666, "grad_norm": 947.232421875, "learning_rate": 4.891271592140695e-05, "loss": 92.7935, "step": 46200 }, { "epoch": 0.1866942472638243, "grad_norm": 1065.53759765625, "learning_rate": 4.891169744924271e-05, "loss": 180.0239, "step": 46210 }, { "epoch": 0.18673464852919194, "grad_norm": 960.1724243164062, "learning_rate": 4.8910678510907606e-05, "loss": 61.9903, "step": 46220 }, { "epoch": 0.18677504979455956, "grad_norm": 1468.2706298828125, "learning_rate": 4.890965910642152e-05, "loss": 79.8836, "step": 46230 }, { "epoch": 0.1868154510599272, "grad_norm": 0.0, "learning_rate": 4.8908639235804324e-05, "loss": 87.0793, "step": 46240 }, { "epoch": 0.18685585232529484, "grad_norm": 0.0, "learning_rate": 4.890761889907589e-05, "loss": 78.9375, "step": 46250 }, { "epoch": 0.18689625359066245, "grad_norm": 1007.9310302734375, "learning_rate": 4.890659809625612e-05, "loss": 102.4231, "step": 46260 }, { "epoch": 0.1869366548560301, "grad_norm": 794.476318359375, "learning_rate": 4.890557682736491e-05, "loss": 84.0354, "step": 46270 }, { "epoch": 0.18697705612139773, "grad_norm": 530.2271118164062, "learning_rate": 4.890455509242218e-05, "loss": 68.9714, "step": 46280 }, { "epoch": 0.18701745738676534, "grad_norm": 1666.1671142578125, "learning_rate": 4.8903532891447836e-05, "loss": 84.9628, "step": 46290 }, { "epoch": 0.18705785865213298, "grad_norm": 787.04833984375, "learning_rate": 4.890251022446181e-05, "loss": 75.6712, "step": 46300 }, { "epoch": 0.18709825991750062, "grad_norm": 2045.2965087890625, "learning_rate": 4.890148709148404e-05, "loss": 98.4277, "step": 46310 }, { "epoch": 0.18713866118286826, "grad_norm": 949.1669311523438, "learning_rate": 4.890046349253448e-05, "loss": 63.0205, "step": 46320 }, { "epoch": 0.18717906244823587, "grad_norm": 578.8233642578125, "learning_rate": 4.8899439427633076e-05, "loss": 47.0746, "step": 46330 }, { "epoch": 0.1872194637136035, "grad_norm": 406.12615966796875, "learning_rate": 4.88984148967998e-05, "loss": 53.2635, "step": 46340 }, { "epoch": 0.18725986497897115, "grad_norm": 1593.3697509765625, "learning_rate": 4.889738990005462e-05, "loss": 93.6755, "step": 46350 }, { "epoch": 0.18730026624433876, "grad_norm": 722.7239990234375, "learning_rate": 4.889636443741752e-05, "loss": 77.3317, "step": 46360 }, { "epoch": 0.1873406675097064, "grad_norm": 2375.264892578125, "learning_rate": 4.88953385089085e-05, "loss": 88.7548, "step": 46370 }, { "epoch": 0.18738106877507404, "grad_norm": 641.643798828125, "learning_rate": 4.8894312114547535e-05, "loss": 95.3137, "step": 46380 }, { "epoch": 0.18742147004044166, "grad_norm": 1006.8427734375, "learning_rate": 4.889328525435467e-05, "loss": 74.9918, "step": 46390 }, { "epoch": 0.1874618713058093, "grad_norm": 638.9917602539062, "learning_rate": 4.889225792834991e-05, "loss": 73.9908, "step": 46400 }, { "epoch": 0.18750227257117694, "grad_norm": 2413.532470703125, "learning_rate": 4.889123013655327e-05, "loss": 129.9344, "step": 46410 }, { "epoch": 0.18754267383654455, "grad_norm": 447.9295349121094, "learning_rate": 4.8890201878984796e-05, "loss": 79.6906, "step": 46420 }, { "epoch": 0.1875830751019122, "grad_norm": 1658.515380859375, "learning_rate": 4.888917315566455e-05, "loss": 74.6684, "step": 46430 }, { "epoch": 0.18762347636727983, "grad_norm": 814.1004028320312, "learning_rate": 4.888814396661256e-05, "loss": 64.3334, "step": 46440 }, { "epoch": 0.18766387763264744, "grad_norm": 723.90673828125, "learning_rate": 4.8887114311848915e-05, "loss": 98.6432, "step": 46450 }, { "epoch": 0.18770427889801508, "grad_norm": 535.9703369140625, "learning_rate": 4.8886084191393677e-05, "loss": 50.3761, "step": 46460 }, { "epoch": 0.18774468016338272, "grad_norm": 452.6556701660156, "learning_rate": 4.888505360526693e-05, "loss": 109.0327, "step": 46470 }, { "epoch": 0.18778508142875036, "grad_norm": 691.0087280273438, "learning_rate": 4.888402255348876e-05, "loss": 101.6102, "step": 46480 }, { "epoch": 0.18782548269411797, "grad_norm": 391.9655456542969, "learning_rate": 4.888299103607928e-05, "loss": 82.483, "step": 46490 }, { "epoch": 0.1878658839594856, "grad_norm": 898.39892578125, "learning_rate": 4.888195905305859e-05, "loss": 58.2394, "step": 46500 }, { "epoch": 0.18790628522485325, "grad_norm": 902.1316528320312, "learning_rate": 4.888092660444682e-05, "loss": 48.3732, "step": 46510 }, { "epoch": 0.18794668649022087, "grad_norm": 873.6351928710938, "learning_rate": 4.887989369026409e-05, "loss": 55.4155, "step": 46520 }, { "epoch": 0.1879870877555885, "grad_norm": 684.0804443359375, "learning_rate": 4.887886031053053e-05, "loss": 80.0627, "step": 46530 }, { "epoch": 0.18802748902095615, "grad_norm": 932.9844360351562, "learning_rate": 4.887782646526631e-05, "loss": 62.5363, "step": 46540 }, { "epoch": 0.18806789028632376, "grad_norm": 676.0230102539062, "learning_rate": 4.8876792154491556e-05, "loss": 67.6069, "step": 46550 }, { "epoch": 0.1881082915516914, "grad_norm": 673.8368530273438, "learning_rate": 4.887575737822645e-05, "loss": 62.4173, "step": 46560 }, { "epoch": 0.18814869281705904, "grad_norm": 1231.91943359375, "learning_rate": 4.8874722136491155e-05, "loss": 58.3462, "step": 46570 }, { "epoch": 0.18818909408242665, "grad_norm": 669.4678344726562, "learning_rate": 4.887368642930588e-05, "loss": 140.696, "step": 46580 }, { "epoch": 0.1882294953477943, "grad_norm": 797.5696411132812, "learning_rate": 4.887265025669078e-05, "loss": 68.3853, "step": 46590 }, { "epoch": 0.18826989661316193, "grad_norm": 722.3099975585938, "learning_rate": 4.887161361866608e-05, "loss": 101.0911, "step": 46600 }, { "epoch": 0.18831029787852954, "grad_norm": 1002.6024169921875, "learning_rate": 4.887057651525198e-05, "loss": 109.1899, "step": 46610 }, { "epoch": 0.18835069914389718, "grad_norm": 950.6636352539062, "learning_rate": 4.8869538946468694e-05, "loss": 59.6831, "step": 46620 }, { "epoch": 0.18839110040926482, "grad_norm": 1075.5361328125, "learning_rate": 4.8868500912336465e-05, "loss": 73.4534, "step": 46630 }, { "epoch": 0.18843150167463246, "grad_norm": 371.59368896484375, "learning_rate": 4.8867462412875526e-05, "loss": 94.6471, "step": 46640 }, { "epoch": 0.18847190294000007, "grad_norm": 1247.0185546875, "learning_rate": 4.886642344810611e-05, "loss": 86.5103, "step": 46650 }, { "epoch": 0.18851230420536771, "grad_norm": 86.08003234863281, "learning_rate": 4.8865384018048494e-05, "loss": 77.3341, "step": 46660 }, { "epoch": 0.18855270547073535, "grad_norm": 517.982421875, "learning_rate": 4.886434412272293e-05, "loss": 58.2183, "step": 46670 }, { "epoch": 0.18859310673610297, "grad_norm": 772.2702026367188, "learning_rate": 4.886330376214968e-05, "loss": 90.4057, "step": 46680 }, { "epoch": 0.1886335080014706, "grad_norm": 827.40234375, "learning_rate": 4.886226293634904e-05, "loss": 62.3181, "step": 46690 }, { "epoch": 0.18867390926683825, "grad_norm": 662.444580078125, "learning_rate": 4.886122164534131e-05, "loss": 72.7424, "step": 46700 }, { "epoch": 0.18871431053220586, "grad_norm": 423.5435791015625, "learning_rate": 4.886017988914676e-05, "loss": 132.9958, "step": 46710 }, { "epoch": 0.1887547117975735, "grad_norm": 773.7642822265625, "learning_rate": 4.8859137667785735e-05, "loss": 112.7814, "step": 46720 }, { "epoch": 0.18879511306294114, "grad_norm": 942.0858154296875, "learning_rate": 4.8858094981278524e-05, "loss": 74.1822, "step": 46730 }, { "epoch": 0.18883551432830875, "grad_norm": 623.146484375, "learning_rate": 4.8857051829645485e-05, "loss": 67.047, "step": 46740 }, { "epoch": 0.1888759155936764, "grad_norm": 2191.332275390625, "learning_rate": 4.8856008212906925e-05, "loss": 74.4607, "step": 46750 }, { "epoch": 0.18891631685904403, "grad_norm": 796.4672241210938, "learning_rate": 4.88549641310832e-05, "loss": 65.4805, "step": 46760 }, { "epoch": 0.18895671812441164, "grad_norm": 897.36181640625, "learning_rate": 4.885391958419468e-05, "loss": 104.9449, "step": 46770 }, { "epoch": 0.18899711938977928, "grad_norm": 4131.201171875, "learning_rate": 4.885287457226172e-05, "loss": 109.5929, "step": 46780 }, { "epoch": 0.18903752065514692, "grad_norm": 1081.7864990234375, "learning_rate": 4.885182909530468e-05, "loss": 101.8282, "step": 46790 }, { "epoch": 0.18907792192051456, "grad_norm": 761.68408203125, "learning_rate": 4.885078315334395e-05, "loss": 59.1006, "step": 46800 }, { "epoch": 0.18911832318588218, "grad_norm": 1586.106689453125, "learning_rate": 4.884973674639993e-05, "loss": 71.1058, "step": 46810 }, { "epoch": 0.18915872445124982, "grad_norm": 1164.787841796875, "learning_rate": 4.884868987449301e-05, "loss": 64.9034, "step": 46820 }, { "epoch": 0.18919912571661746, "grad_norm": 629.8527221679688, "learning_rate": 4.8847642537643604e-05, "loss": 71.8605, "step": 46830 }, { "epoch": 0.18923952698198507, "grad_norm": 824.6557006835938, "learning_rate": 4.884659473587213e-05, "loss": 91.3435, "step": 46840 }, { "epoch": 0.1892799282473527, "grad_norm": 213.20458984375, "learning_rate": 4.884554646919901e-05, "loss": 64.7744, "step": 46850 }, { "epoch": 0.18932032951272035, "grad_norm": 957.3148803710938, "learning_rate": 4.884449773764469e-05, "loss": 68.4271, "step": 46860 }, { "epoch": 0.18936073077808796, "grad_norm": 1541.1634521484375, "learning_rate": 4.884344854122961e-05, "loss": 93.5629, "step": 46870 }, { "epoch": 0.1894011320434556, "grad_norm": 1493.8668212890625, "learning_rate": 4.884239887997423e-05, "loss": 80.8613, "step": 46880 }, { "epoch": 0.18944153330882324, "grad_norm": 678.560546875, "learning_rate": 4.8841348753899e-05, "loss": 87.8491, "step": 46890 }, { "epoch": 0.18948193457419085, "grad_norm": 711.7306518554688, "learning_rate": 4.88402981630244e-05, "loss": 93.6465, "step": 46900 }, { "epoch": 0.1895223358395585, "grad_norm": 554.8831176757812, "learning_rate": 4.883924710737092e-05, "loss": 73.9221, "step": 46910 }, { "epoch": 0.18956273710492613, "grad_norm": 1330.556884765625, "learning_rate": 4.8838195586959046e-05, "loss": 79.4741, "step": 46920 }, { "epoch": 0.18960313837029374, "grad_norm": 598.888427734375, "learning_rate": 4.883714360180927e-05, "loss": 66.7534, "step": 46930 }, { "epoch": 0.18964353963566138, "grad_norm": 614.5370483398438, "learning_rate": 4.883609115194211e-05, "loss": 90.6562, "step": 46940 }, { "epoch": 0.18968394090102902, "grad_norm": 1026.0899658203125, "learning_rate": 4.883503823737808e-05, "loss": 86.146, "step": 46950 }, { "epoch": 0.18972434216639666, "grad_norm": 1076.3065185546875, "learning_rate": 4.8833984858137715e-05, "loss": 74.9594, "step": 46960 }, { "epoch": 0.18976474343176428, "grad_norm": 555.6848754882812, "learning_rate": 4.8832931014241534e-05, "loss": 69.4619, "step": 46970 }, { "epoch": 0.18980514469713192, "grad_norm": 1051.6075439453125, "learning_rate": 4.88318767057101e-05, "loss": 67.6875, "step": 46980 }, { "epoch": 0.18984554596249956, "grad_norm": 1332.5283203125, "learning_rate": 4.883082193256397e-05, "loss": 73.1094, "step": 46990 }, { "epoch": 0.18988594722786717, "grad_norm": 1304.3714599609375, "learning_rate": 4.882976669482367e-05, "loss": 91.2835, "step": 47000 }, { "epoch": 0.1899263484932348, "grad_norm": 1050.1197509765625, "learning_rate": 4.882871099250982e-05, "loss": 96.9666, "step": 47010 }, { "epoch": 0.18996674975860245, "grad_norm": 2204.67626953125, "learning_rate": 4.882765482564298e-05, "loss": 79.6932, "step": 47020 }, { "epoch": 0.19000715102397006, "grad_norm": 308.5193786621094, "learning_rate": 4.882659819424374e-05, "loss": 60.4664, "step": 47030 }, { "epoch": 0.1900475522893377, "grad_norm": 760.51953125, "learning_rate": 4.8825541098332706e-05, "loss": 93.623, "step": 47040 }, { "epoch": 0.19008795355470534, "grad_norm": 1053.7843017578125, "learning_rate": 4.882448353793048e-05, "loss": 72.0362, "step": 47050 }, { "epoch": 0.19012835482007295, "grad_norm": 510.9100341796875, "learning_rate": 4.8823425513057674e-05, "loss": 76.0767, "step": 47060 }, { "epoch": 0.1901687560854406, "grad_norm": 1290.3902587890625, "learning_rate": 4.8822367023734925e-05, "loss": 76.7432, "step": 47070 }, { "epoch": 0.19020915735080823, "grad_norm": 669.0762939453125, "learning_rate": 4.8821308069982867e-05, "loss": 72.9376, "step": 47080 }, { "epoch": 0.19024955861617585, "grad_norm": 708.4724731445312, "learning_rate": 4.8820248651822145e-05, "loss": 74.2905, "step": 47090 }, { "epoch": 0.19028995988154349, "grad_norm": 810.4485473632812, "learning_rate": 4.8819188769273414e-05, "loss": 85.8345, "step": 47100 }, { "epoch": 0.19033036114691113, "grad_norm": 1206.7943115234375, "learning_rate": 4.8818128422357335e-05, "loss": 62.0077, "step": 47110 }, { "epoch": 0.19037076241227877, "grad_norm": 763.1806030273438, "learning_rate": 4.881706761109458e-05, "loss": 80.4839, "step": 47120 }, { "epoch": 0.19041116367764638, "grad_norm": 1197.156005859375, "learning_rate": 4.8816006335505825e-05, "loss": 130.0592, "step": 47130 }, { "epoch": 0.19045156494301402, "grad_norm": 522.46484375, "learning_rate": 4.8814944595611776e-05, "loss": 71.9689, "step": 47140 }, { "epoch": 0.19049196620838166, "grad_norm": 653.2218627929688, "learning_rate": 4.881388239143311e-05, "loss": 88.4613, "step": 47150 }, { "epoch": 0.19053236747374927, "grad_norm": 696.4855346679688, "learning_rate": 4.881281972299055e-05, "loss": 99.2132, "step": 47160 }, { "epoch": 0.1905727687391169, "grad_norm": 779.6743774414062, "learning_rate": 4.8811756590304815e-05, "loss": 54.9467, "step": 47170 }, { "epoch": 0.19061317000448455, "grad_norm": 489.7877197265625, "learning_rate": 4.881069299339662e-05, "loss": 98.4642, "step": 47180 }, { "epoch": 0.19065357126985216, "grad_norm": 1307.5743408203125, "learning_rate": 4.880962893228671e-05, "loss": 77.5308, "step": 47190 }, { "epoch": 0.1906939725352198, "grad_norm": 1113.280517578125, "learning_rate": 4.880856440699582e-05, "loss": 65.3198, "step": 47200 }, { "epoch": 0.19073437380058744, "grad_norm": 859.1810913085938, "learning_rate": 4.880749941754471e-05, "loss": 119.8937, "step": 47210 }, { "epoch": 0.19077477506595505, "grad_norm": 676.0623779296875, "learning_rate": 4.8806433963954154e-05, "loss": 88.6286, "step": 47220 }, { "epoch": 0.1908151763313227, "grad_norm": 922.7902221679688, "learning_rate": 4.880536804624491e-05, "loss": 102.1759, "step": 47230 }, { "epoch": 0.19085557759669033, "grad_norm": 1272.20068359375, "learning_rate": 4.880430166443775e-05, "loss": 107.2287, "step": 47240 }, { "epoch": 0.19089597886205795, "grad_norm": 1099.0933837890625, "learning_rate": 4.880323481855347e-05, "loss": 71.5676, "step": 47250 }, { "epoch": 0.1909363801274256, "grad_norm": 958.9481201171875, "learning_rate": 4.880216750861288e-05, "loss": 51.0081, "step": 47260 }, { "epoch": 0.19097678139279323, "grad_norm": 850.7756958007812, "learning_rate": 4.880109973463678e-05, "loss": 102.9255, "step": 47270 }, { "epoch": 0.19101718265816087, "grad_norm": 522.798095703125, "learning_rate": 4.880003149664599e-05, "loss": 73.2628, "step": 47280 }, { "epoch": 0.19105758392352848, "grad_norm": 1138.0860595703125, "learning_rate": 4.879896279466133e-05, "loss": 93.8309, "step": 47290 }, { "epoch": 0.19109798518889612, "grad_norm": 877.7420654296875, "learning_rate": 4.8797893628703635e-05, "loss": 68.0124, "step": 47300 }, { "epoch": 0.19113838645426376, "grad_norm": 372.2701416015625, "learning_rate": 4.879682399879375e-05, "loss": 83.2249, "step": 47310 }, { "epoch": 0.19117878771963137, "grad_norm": 600.6828002929688, "learning_rate": 4.8795753904952534e-05, "loss": 69.8246, "step": 47320 }, { "epoch": 0.191219188984999, "grad_norm": 1143.0029296875, "learning_rate": 4.879468334720085e-05, "loss": 61.1958, "step": 47330 }, { "epoch": 0.19125959025036665, "grad_norm": 1139.1126708984375, "learning_rate": 4.879361232555956e-05, "loss": 79.5726, "step": 47340 }, { "epoch": 0.19129999151573426, "grad_norm": 1177.59033203125, "learning_rate": 4.879254084004955e-05, "loss": 84.1079, "step": 47350 }, { "epoch": 0.1913403927811019, "grad_norm": 643.5814819335938, "learning_rate": 4.8791468890691696e-05, "loss": 89.2118, "step": 47360 }, { "epoch": 0.19138079404646954, "grad_norm": 273.9512939453125, "learning_rate": 4.879039647750692e-05, "loss": 72.0723, "step": 47370 }, { "epoch": 0.19142119531183716, "grad_norm": 670.9833984375, "learning_rate": 4.8789323600516104e-05, "loss": 79.4604, "step": 47380 }, { "epoch": 0.1914615965772048, "grad_norm": 573.1339721679688, "learning_rate": 4.8788250259740185e-05, "loss": 57.4144, "step": 47390 }, { "epoch": 0.19150199784257244, "grad_norm": 746.1387939453125, "learning_rate": 4.878717645520008e-05, "loss": 71.1703, "step": 47400 }, { "epoch": 0.19154239910794005, "grad_norm": 770.6351318359375, "learning_rate": 4.878610218691673e-05, "loss": 71.4196, "step": 47410 }, { "epoch": 0.1915828003733077, "grad_norm": 419.6976318359375, "learning_rate": 4.878502745491106e-05, "loss": 55.445, "step": 47420 }, { "epoch": 0.19162320163867533, "grad_norm": 4431.18115234375, "learning_rate": 4.8783952259204036e-05, "loss": 75.606, "step": 47430 }, { "epoch": 0.19166360290404297, "grad_norm": 534.2245483398438, "learning_rate": 4.878287659981662e-05, "loss": 74.2982, "step": 47440 }, { "epoch": 0.19170400416941058, "grad_norm": 1988.7900390625, "learning_rate": 4.878180047676978e-05, "loss": 100.465, "step": 47450 }, { "epoch": 0.19174440543477822, "grad_norm": 605.2179565429688, "learning_rate": 4.87807238900845e-05, "loss": 54.1781, "step": 47460 }, { "epoch": 0.19178480670014586, "grad_norm": 1887.808837890625, "learning_rate": 4.8779646839781765e-05, "loss": 103.9908, "step": 47470 }, { "epoch": 0.19182520796551347, "grad_norm": 1397.8228759765625, "learning_rate": 4.877856932588257e-05, "loss": 112.4378, "step": 47480 }, { "epoch": 0.1918656092308811, "grad_norm": 1550.006103515625, "learning_rate": 4.877749134840792e-05, "loss": 104.1645, "step": 47490 }, { "epoch": 0.19190601049624875, "grad_norm": 555.1295166015625, "learning_rate": 4.877641290737884e-05, "loss": 61.3664, "step": 47500 }, { "epoch": 0.19194641176161636, "grad_norm": 1102.827392578125, "learning_rate": 4.877533400281635e-05, "loss": 119.7094, "step": 47510 }, { "epoch": 0.191986813026984, "grad_norm": 890.612548828125, "learning_rate": 4.877425463474148e-05, "loss": 75.4402, "step": 47520 }, { "epoch": 0.19202721429235164, "grad_norm": 586.08056640625, "learning_rate": 4.877317480317528e-05, "loss": 87.8783, "step": 47530 }, { "epoch": 0.19206761555771926, "grad_norm": 574.8908081054688, "learning_rate": 4.8772094508138796e-05, "loss": 88.9533, "step": 47540 }, { "epoch": 0.1921080168230869, "grad_norm": 761.5361938476562, "learning_rate": 4.877101374965308e-05, "loss": 62.7146, "step": 47550 }, { "epoch": 0.19214841808845454, "grad_norm": 709.9255981445312, "learning_rate": 4.8769932527739225e-05, "loss": 78.0876, "step": 47560 }, { "epoch": 0.19218881935382215, "grad_norm": 1276.592529296875, "learning_rate": 4.87688508424183e-05, "loss": 74.2328, "step": 47570 }, { "epoch": 0.1922292206191898, "grad_norm": 1980.7711181640625, "learning_rate": 4.876776869371139e-05, "loss": 84.7106, "step": 47580 }, { "epoch": 0.19226962188455743, "grad_norm": 1390.198486328125, "learning_rate": 4.876668608163959e-05, "loss": 89.8972, "step": 47590 }, { "epoch": 0.19231002314992507, "grad_norm": 413.9287109375, "learning_rate": 4.8765603006224006e-05, "loss": 95.8701, "step": 47600 }, { "epoch": 0.19235042441529268, "grad_norm": 1712.273193359375, "learning_rate": 4.876451946748576e-05, "loss": 123.683, "step": 47610 }, { "epoch": 0.19239082568066032, "grad_norm": 1209.84423828125, "learning_rate": 4.8763435465445964e-05, "loss": 102.8881, "step": 47620 }, { "epoch": 0.19243122694602796, "grad_norm": 919.6061401367188, "learning_rate": 4.8762351000125766e-05, "loss": 83.7829, "step": 47630 }, { "epoch": 0.19247162821139557, "grad_norm": 938.0872192382812, "learning_rate": 4.87612660715463e-05, "loss": 43.0572, "step": 47640 }, { "epoch": 0.1925120294767632, "grad_norm": 1763.4127197265625, "learning_rate": 4.876018067972872e-05, "loss": 91.6115, "step": 47650 }, { "epoch": 0.19255243074213085, "grad_norm": 2353.800537109375, "learning_rate": 4.8759094824694184e-05, "loss": 79.1209, "step": 47660 }, { "epoch": 0.19259283200749847, "grad_norm": 1034.469482421875, "learning_rate": 4.875800850646387e-05, "loss": 63.04, "step": 47670 }, { "epoch": 0.1926332332728661, "grad_norm": 497.1041564941406, "learning_rate": 4.8756921725058934e-05, "loss": 78.9805, "step": 47680 }, { "epoch": 0.19267363453823375, "grad_norm": 410.94970703125, "learning_rate": 4.875583448050059e-05, "loss": 77.002, "step": 47690 }, { "epoch": 0.19271403580360136, "grad_norm": 1801.6767578125, "learning_rate": 4.875474677281002e-05, "loss": 80.726, "step": 47700 }, { "epoch": 0.192754437068969, "grad_norm": 946.7315673828125, "learning_rate": 4.8753658602008425e-05, "loss": 64.131, "step": 47710 }, { "epoch": 0.19279483833433664, "grad_norm": 1104.990478515625, "learning_rate": 4.875256996811703e-05, "loss": 63.1742, "step": 47720 }, { "epoch": 0.19283523959970425, "grad_norm": 889.265869140625, "learning_rate": 4.875148087115706e-05, "loss": 121.2112, "step": 47730 }, { "epoch": 0.1928756408650719, "grad_norm": 502.1475830078125, "learning_rate": 4.875039131114975e-05, "loss": 81.0771, "step": 47740 }, { "epoch": 0.19291604213043953, "grad_norm": 2143.898193359375, "learning_rate": 4.874930128811631e-05, "loss": 87.0424, "step": 47750 }, { "epoch": 0.19295644339580717, "grad_norm": 792.10693359375, "learning_rate": 4.874821080207803e-05, "loss": 73.0605, "step": 47760 }, { "epoch": 0.19299684466117478, "grad_norm": 483.91546630859375, "learning_rate": 4.8747119853056156e-05, "loss": 88.888, "step": 47770 }, { "epoch": 0.19303724592654242, "grad_norm": 1531.7535400390625, "learning_rate": 4.8746028441071943e-05, "loss": 59.7669, "step": 47780 }, { "epoch": 0.19307764719191006, "grad_norm": 677.5689086914062, "learning_rate": 4.874493656614669e-05, "loss": 74.3335, "step": 47790 }, { "epoch": 0.19311804845727767, "grad_norm": 515.4765625, "learning_rate": 4.874384422830167e-05, "loss": 56.0753, "step": 47800 }, { "epoch": 0.19315844972264531, "grad_norm": 763.7691040039062, "learning_rate": 4.8742751427558186e-05, "loss": 77.0294, "step": 47810 }, { "epoch": 0.19319885098801295, "grad_norm": 1666.2574462890625, "learning_rate": 4.874165816393754e-05, "loss": 46.0998, "step": 47820 }, { "epoch": 0.19323925225338057, "grad_norm": 753.572021484375, "learning_rate": 4.874056443746104e-05, "loss": 55.663, "step": 47830 }, { "epoch": 0.1932796535187482, "grad_norm": 664.1289672851562, "learning_rate": 4.873947024815002e-05, "loss": 145.2848, "step": 47840 }, { "epoch": 0.19332005478411585, "grad_norm": 910.7630615234375, "learning_rate": 4.87383755960258e-05, "loss": 100.3147, "step": 47850 }, { "epoch": 0.19336045604948346, "grad_norm": 1756.9093017578125, "learning_rate": 4.8737280481109724e-05, "loss": 103.6948, "step": 47860 }, { "epoch": 0.1934008573148511, "grad_norm": 587.9384155273438, "learning_rate": 4.8736184903423155e-05, "loss": 90.1571, "step": 47870 }, { "epoch": 0.19344125858021874, "grad_norm": 852.3531494140625, "learning_rate": 4.873508886298743e-05, "loss": 72.187, "step": 47880 }, { "epoch": 0.19348165984558635, "grad_norm": 844.1437377929688, "learning_rate": 4.8733992359823936e-05, "loss": 76.1646, "step": 47890 }, { "epoch": 0.193522061110954, "grad_norm": 1602.298095703125, "learning_rate": 4.8732895393954036e-05, "loss": 84.8224, "step": 47900 }, { "epoch": 0.19356246237632163, "grad_norm": 870.3892211914062, "learning_rate": 4.8731797965399125e-05, "loss": 115.4123, "step": 47910 }, { "epoch": 0.19360286364168927, "grad_norm": 1025.730224609375, "learning_rate": 4.873070007418059e-05, "loss": 74.2097, "step": 47920 }, { "epoch": 0.19364326490705688, "grad_norm": 1289.6861572265625, "learning_rate": 4.8729601720319845e-05, "loss": 112.8718, "step": 47930 }, { "epoch": 0.19368366617242452, "grad_norm": 592.2373657226562, "learning_rate": 4.8728502903838295e-05, "loss": 63.9621, "step": 47940 }, { "epoch": 0.19372406743779216, "grad_norm": 5277.1982421875, "learning_rate": 4.8727403624757365e-05, "loss": 100.8859, "step": 47950 }, { "epoch": 0.19376446870315978, "grad_norm": 816.9944458007812, "learning_rate": 4.872630388309849e-05, "loss": 82.8986, "step": 47960 }, { "epoch": 0.19380486996852742, "grad_norm": 181.56996154785156, "learning_rate": 4.8725203678883104e-05, "loss": 65.7631, "step": 47970 }, { "epoch": 0.19384527123389506, "grad_norm": 1124.836181640625, "learning_rate": 4.872410301213265e-05, "loss": 81.7168, "step": 47980 }, { "epoch": 0.19388567249926267, "grad_norm": 1639.8275146484375, "learning_rate": 4.8723001882868604e-05, "loss": 103.2546, "step": 47990 }, { "epoch": 0.1939260737646303, "grad_norm": 633.078857421875, "learning_rate": 4.8721900291112415e-05, "loss": 98.4941, "step": 48000 }, { "epoch": 0.19396647502999795, "grad_norm": 766.0423583984375, "learning_rate": 4.872079823688557e-05, "loss": 51.5216, "step": 48010 }, { "epoch": 0.19400687629536556, "grad_norm": 755.6162719726562, "learning_rate": 4.871969572020955e-05, "loss": 76.6551, "step": 48020 }, { "epoch": 0.1940472775607332, "grad_norm": 8129.7333984375, "learning_rate": 4.871859274110585e-05, "loss": 132.5075, "step": 48030 }, { "epoch": 0.19408767882610084, "grad_norm": 1120.7578125, "learning_rate": 4.871748929959598e-05, "loss": 87.0844, "step": 48040 }, { "epoch": 0.19412808009146845, "grad_norm": 651.3961181640625, "learning_rate": 4.8716385395701435e-05, "loss": 61.7545, "step": 48050 }, { "epoch": 0.1941684813568361, "grad_norm": 646.9969482421875, "learning_rate": 4.871528102944376e-05, "loss": 100.9905, "step": 48060 }, { "epoch": 0.19420888262220373, "grad_norm": 478.8792419433594, "learning_rate": 4.8714176200844464e-05, "loss": 70.1561, "step": 48070 }, { "epoch": 0.19424928388757137, "grad_norm": 617.067626953125, "learning_rate": 4.8713070909925094e-05, "loss": 87.2428, "step": 48080 }, { "epoch": 0.19428968515293898, "grad_norm": 773.7765502929688, "learning_rate": 4.8711965156707195e-05, "loss": 78.5379, "step": 48090 }, { "epoch": 0.19433008641830662, "grad_norm": 1457.695068359375, "learning_rate": 4.871085894121233e-05, "loss": 106.1761, "step": 48100 }, { "epoch": 0.19437048768367426, "grad_norm": 1464.89013671875, "learning_rate": 4.8709752263462064e-05, "loss": 59.2071, "step": 48110 }, { "epoch": 0.19441088894904188, "grad_norm": 1820.7398681640625, "learning_rate": 4.870864512347797e-05, "loss": 71.2924, "step": 48120 }, { "epoch": 0.19445129021440952, "grad_norm": 596.8287353515625, "learning_rate": 4.8707537521281635e-05, "loss": 60.9779, "step": 48130 }, { "epoch": 0.19449169147977716, "grad_norm": 1118.587890625, "learning_rate": 4.870642945689465e-05, "loss": 97.0064, "step": 48140 }, { "epoch": 0.19453209274514477, "grad_norm": 1756.2308349609375, "learning_rate": 4.8705320930338615e-05, "loss": 63.2261, "step": 48150 }, { "epoch": 0.1945724940105124, "grad_norm": 846.0660400390625, "learning_rate": 4.870421194163515e-05, "loss": 118.8603, "step": 48160 }, { "epoch": 0.19461289527588005, "grad_norm": 1581.7791748046875, "learning_rate": 4.8703102490805865e-05, "loss": 122.0613, "step": 48170 }, { "epoch": 0.19465329654124766, "grad_norm": 1016.5093383789062, "learning_rate": 4.87019925778724e-05, "loss": 60.2842, "step": 48180 }, { "epoch": 0.1946936978066153, "grad_norm": 717.7647094726562, "learning_rate": 4.870088220285638e-05, "loss": 79.8649, "step": 48190 }, { "epoch": 0.19473409907198294, "grad_norm": 1193.3416748046875, "learning_rate": 4.8699771365779453e-05, "loss": 60.5539, "step": 48200 }, { "epoch": 0.19477450033735055, "grad_norm": 2215.272216796875, "learning_rate": 4.8698660066663294e-05, "loss": 87.0759, "step": 48210 }, { "epoch": 0.1948149016027182, "grad_norm": 1735.73828125, "learning_rate": 4.869754830552956e-05, "loss": 96.5737, "step": 48220 }, { "epoch": 0.19485530286808583, "grad_norm": 375.5717468261719, "learning_rate": 4.869643608239991e-05, "loss": 88.8928, "step": 48230 }, { "epoch": 0.19489570413345347, "grad_norm": 612.527587890625, "learning_rate": 4.8695323397296044e-05, "loss": 66.2606, "step": 48240 }, { "epoch": 0.19493610539882109, "grad_norm": 1690.7991943359375, "learning_rate": 4.869421025023965e-05, "loss": 92.1369, "step": 48250 }, { "epoch": 0.19497650666418873, "grad_norm": 1483.668212890625, "learning_rate": 4.8693096641252424e-05, "loss": 65.0718, "step": 48260 }, { "epoch": 0.19501690792955637, "grad_norm": 601.1848754882812, "learning_rate": 4.8691982570356084e-05, "loss": 49.7445, "step": 48270 }, { "epoch": 0.19505730919492398, "grad_norm": 1077.0008544921875, "learning_rate": 4.8690868037572346e-05, "loss": 71.7525, "step": 48280 }, { "epoch": 0.19509771046029162, "grad_norm": 1292.0035400390625, "learning_rate": 4.8689753042922935e-05, "loss": 75.1074, "step": 48290 }, { "epoch": 0.19513811172565926, "grad_norm": 1112.646484375, "learning_rate": 4.8688637586429595e-05, "loss": 82.0207, "step": 48300 }, { "epoch": 0.19517851299102687, "grad_norm": 1545.366943359375, "learning_rate": 4.8687521668114064e-05, "loss": 102.4248, "step": 48310 }, { "epoch": 0.1952189142563945, "grad_norm": 814.1812744140625, "learning_rate": 4.8686405287998116e-05, "loss": 111.8968, "step": 48320 }, { "epoch": 0.19525931552176215, "grad_norm": 632.5703735351562, "learning_rate": 4.8685288446103495e-05, "loss": 78.0891, "step": 48330 }, { "epoch": 0.19529971678712976, "grad_norm": 2291.05859375, "learning_rate": 4.8684171142451986e-05, "loss": 63.0176, "step": 48340 }, { "epoch": 0.1953401180524974, "grad_norm": 954.6275024414062, "learning_rate": 4.8683053377065356e-05, "loss": 113.7088, "step": 48350 }, { "epoch": 0.19538051931786504, "grad_norm": 619.459228515625, "learning_rate": 4.8681935149965416e-05, "loss": 66.4437, "step": 48360 }, { "epoch": 0.19542092058323265, "grad_norm": 981.1046142578125, "learning_rate": 4.868081646117395e-05, "loss": 81.0435, "step": 48370 }, { "epoch": 0.1954613218486003, "grad_norm": 714.146728515625, "learning_rate": 4.867969731071279e-05, "loss": 101.9001, "step": 48380 }, { "epoch": 0.19550172311396793, "grad_norm": 1931.5303955078125, "learning_rate": 4.8678577698603734e-05, "loss": 95.807, "step": 48390 }, { "epoch": 0.19554212437933557, "grad_norm": 3275.3095703125, "learning_rate": 4.867745762486861e-05, "loss": 73.3455, "step": 48400 }, { "epoch": 0.1955825256447032, "grad_norm": 1230.974609375, "learning_rate": 4.867633708952926e-05, "loss": 77.9758, "step": 48410 }, { "epoch": 0.19562292691007083, "grad_norm": 849.453125, "learning_rate": 4.867521609260754e-05, "loss": 73.4305, "step": 48420 }, { "epoch": 0.19566332817543847, "grad_norm": 903.135009765625, "learning_rate": 4.867409463412528e-05, "loss": 56.7117, "step": 48430 }, { "epoch": 0.19570372944080608, "grad_norm": 1139.8895263671875, "learning_rate": 4.8672972714104357e-05, "loss": 67.9619, "step": 48440 }, { "epoch": 0.19574413070617372, "grad_norm": 908.2789916992188, "learning_rate": 4.867185033256665e-05, "loss": 62.4522, "step": 48450 }, { "epoch": 0.19578453197154136, "grad_norm": 1399.5394287109375, "learning_rate": 4.8670727489534034e-05, "loss": 113.5989, "step": 48460 }, { "epoch": 0.19582493323690897, "grad_norm": 2880.655029296875, "learning_rate": 4.8669604185028394e-05, "loss": 107.0346, "step": 48470 }, { "epoch": 0.1958653345022766, "grad_norm": 689.0923461914062, "learning_rate": 4.866848041907164e-05, "loss": 89.6595, "step": 48480 }, { "epoch": 0.19590573576764425, "grad_norm": 2163.55029296875, "learning_rate": 4.866735619168568e-05, "loss": 87.9743, "step": 48490 }, { "epoch": 0.19594613703301186, "grad_norm": 816.2272338867188, "learning_rate": 4.8666231502892415e-05, "loss": 102.1195, "step": 48500 }, { "epoch": 0.1959865382983795, "grad_norm": 3147.00830078125, "learning_rate": 4.866510635271379e-05, "loss": 98.8874, "step": 48510 }, { "epoch": 0.19602693956374714, "grad_norm": 693.9053955078125, "learning_rate": 4.8663980741171724e-05, "loss": 76.9899, "step": 48520 }, { "epoch": 0.19606734082911476, "grad_norm": 833.5950317382812, "learning_rate": 4.866285466828817e-05, "loss": 56.6829, "step": 48530 }, { "epoch": 0.1961077420944824, "grad_norm": 945.19873046875, "learning_rate": 4.86617281340851e-05, "loss": 82.8859, "step": 48540 }, { "epoch": 0.19614814335985004, "grad_norm": 607.2847290039062, "learning_rate": 4.866060113858444e-05, "loss": 71.8742, "step": 48550 }, { "epoch": 0.19618854462521768, "grad_norm": 787.0588989257812, "learning_rate": 4.865947368180818e-05, "loss": 81.9591, "step": 48560 }, { "epoch": 0.1962289458905853, "grad_norm": 406.9506530761719, "learning_rate": 4.865834576377831e-05, "loss": 42.3592, "step": 48570 }, { "epoch": 0.19626934715595293, "grad_norm": 525.7552490234375, "learning_rate": 4.86572173845168e-05, "loss": 109.6562, "step": 48580 }, { "epoch": 0.19630974842132057, "grad_norm": 1523.6407470703125, "learning_rate": 4.865608854404566e-05, "loss": 133.584, "step": 48590 }, { "epoch": 0.19635014968668818, "grad_norm": 620.2904052734375, "learning_rate": 4.8654959242386896e-05, "loss": 72.112, "step": 48600 }, { "epoch": 0.19639055095205582, "grad_norm": 926.3845825195312, "learning_rate": 4.865382947956253e-05, "loss": 106.2173, "step": 48610 }, { "epoch": 0.19643095221742346, "grad_norm": 1164.56982421875, "learning_rate": 4.865269925559457e-05, "loss": 75.1725, "step": 48620 }, { "epoch": 0.19647135348279107, "grad_norm": 946.3316650390625, "learning_rate": 4.865156857050507e-05, "loss": 91.8206, "step": 48630 }, { "epoch": 0.1965117547481587, "grad_norm": 1000.8588256835938, "learning_rate": 4.865043742431605e-05, "loss": 68.0882, "step": 48640 }, { "epoch": 0.19655215601352635, "grad_norm": 1161.449951171875, "learning_rate": 4.8649305817049596e-05, "loss": 70.0598, "step": 48650 }, { "epoch": 0.19659255727889396, "grad_norm": 1116.1414794921875, "learning_rate": 4.864817374872773e-05, "loss": 100.4134, "step": 48660 }, { "epoch": 0.1966329585442616, "grad_norm": 1149.848388671875, "learning_rate": 4.864704121937256e-05, "loss": 59.9185, "step": 48670 }, { "epoch": 0.19667335980962924, "grad_norm": 932.8038940429688, "learning_rate": 4.8645908229006135e-05, "loss": 49.7766, "step": 48680 }, { "epoch": 0.19671376107499686, "grad_norm": 592.9822387695312, "learning_rate": 4.864477477765056e-05, "loss": 46.122, "step": 48690 }, { "epoch": 0.1967541623403645, "grad_norm": 484.1387023925781, "learning_rate": 4.864364086532792e-05, "loss": 60.0866, "step": 48700 }, { "epoch": 0.19679456360573214, "grad_norm": 1089.63818359375, "learning_rate": 4.8642506492060335e-05, "loss": 102.3337, "step": 48710 }, { "epoch": 0.19683496487109975, "grad_norm": 1087.8228759765625, "learning_rate": 4.8641371657869916e-05, "loss": 83.8455, "step": 48720 }, { "epoch": 0.1968753661364674, "grad_norm": 465.7780456542969, "learning_rate": 4.864023636277878e-05, "loss": 58.4958, "step": 48730 }, { "epoch": 0.19691576740183503, "grad_norm": 1844.8414306640625, "learning_rate": 4.863910060680907e-05, "loss": 84.4384, "step": 48740 }, { "epoch": 0.19695616866720267, "grad_norm": 1077.40478515625, "learning_rate": 4.8637964389982926e-05, "loss": 134.2446, "step": 48750 }, { "epoch": 0.19699656993257028, "grad_norm": 755.9237060546875, "learning_rate": 4.863682771232248e-05, "loss": 101.4888, "step": 48760 }, { "epoch": 0.19703697119793792, "grad_norm": 947.1353149414062, "learning_rate": 4.8635690573849926e-05, "loss": 99.0996, "step": 48770 }, { "epoch": 0.19707737246330556, "grad_norm": 758.8486938476562, "learning_rate": 4.8634552974587414e-05, "loss": 65.2204, "step": 48780 }, { "epoch": 0.19711777372867317, "grad_norm": 2005.01025390625, "learning_rate": 4.863341491455712e-05, "loss": 119.6658, "step": 48790 }, { "epoch": 0.1971581749940408, "grad_norm": 980.075439453125, "learning_rate": 4.863227639378124e-05, "loss": 92.1868, "step": 48800 }, { "epoch": 0.19719857625940845, "grad_norm": 511.9769592285156, "learning_rate": 4.8631137412281954e-05, "loss": 54.6814, "step": 48810 }, { "epoch": 0.19723897752477607, "grad_norm": 993.7817993164062, "learning_rate": 4.862999797008149e-05, "loss": 90.118, "step": 48820 }, { "epoch": 0.1972793787901437, "grad_norm": 687.139404296875, "learning_rate": 4.8628858067202045e-05, "loss": 98.846, "step": 48830 }, { "epoch": 0.19731978005551135, "grad_norm": 1727.8497314453125, "learning_rate": 4.862771770366584e-05, "loss": 91.3292, "step": 48840 }, { "epoch": 0.19736018132087896, "grad_norm": 527.6170043945312, "learning_rate": 4.862657687949512e-05, "loss": 84.25, "step": 48850 }, { "epoch": 0.1974005825862466, "grad_norm": 657.8152465820312, "learning_rate": 4.862543559471212e-05, "loss": 49.7685, "step": 48860 }, { "epoch": 0.19744098385161424, "grad_norm": 537.2709350585938, "learning_rate": 4.8624293849339095e-05, "loss": 61.4911, "step": 48870 }, { "epoch": 0.19748138511698185, "grad_norm": 598.13671875, "learning_rate": 4.862315164339829e-05, "loss": 82.7801, "step": 48880 }, { "epoch": 0.1975217863823495, "grad_norm": 480.8949279785156, "learning_rate": 4.862200897691199e-05, "loss": 58.1646, "step": 48890 }, { "epoch": 0.19756218764771713, "grad_norm": 868.3237915039062, "learning_rate": 4.8620865849902456e-05, "loss": 73.8393, "step": 48900 }, { "epoch": 0.19760258891308477, "grad_norm": 451.89483642578125, "learning_rate": 4.861972226239199e-05, "loss": 63.7517, "step": 48910 }, { "epoch": 0.19764299017845238, "grad_norm": 598.8991088867188, "learning_rate": 4.861857821440287e-05, "loss": 76.1932, "step": 48920 }, { "epoch": 0.19768339144382002, "grad_norm": 717.087646484375, "learning_rate": 4.861743370595741e-05, "loss": 68.1542, "step": 48930 }, { "epoch": 0.19772379270918766, "grad_norm": 1464.67333984375, "learning_rate": 4.861628873707792e-05, "loss": 86.0041, "step": 48940 }, { "epoch": 0.19776419397455527, "grad_norm": 442.174560546875, "learning_rate": 4.861514330778673e-05, "loss": 81.9746, "step": 48950 }, { "epoch": 0.19780459523992291, "grad_norm": 1277.0028076171875, "learning_rate": 4.861399741810615e-05, "loss": 87.9091, "step": 48960 }, { "epoch": 0.19784499650529055, "grad_norm": 485.0671081542969, "learning_rate": 4.8612851068058544e-05, "loss": 57.811, "step": 48970 }, { "epoch": 0.19788539777065817, "grad_norm": 1858.374267578125, "learning_rate": 4.861170425766625e-05, "loss": 95.383, "step": 48980 }, { "epoch": 0.1979257990360258, "grad_norm": 944.0939331054688, "learning_rate": 4.861055698695162e-05, "loss": 104.2803, "step": 48990 }, { "epoch": 0.19796620030139345, "grad_norm": 970.7718505859375, "learning_rate": 4.860940925593703e-05, "loss": 100.1417, "step": 49000 }, { "epoch": 0.19800660156676106, "grad_norm": 1792.8084716796875, "learning_rate": 4.860826106464484e-05, "loss": 86.6734, "step": 49010 }, { "epoch": 0.1980470028321287, "grad_norm": 1215.9122314453125, "learning_rate": 4.8607112413097464e-05, "loss": 88.0753, "step": 49020 }, { "epoch": 0.19808740409749634, "grad_norm": 484.51873779296875, "learning_rate": 4.860596330131727e-05, "loss": 78.6937, "step": 49030 }, { "epoch": 0.19812780536286395, "grad_norm": 945.8279418945312, "learning_rate": 4.860481372932667e-05, "loss": 69.6853, "step": 49040 }, { "epoch": 0.1981682066282316, "grad_norm": 507.302978515625, "learning_rate": 4.860366369714807e-05, "loss": 91.7122, "step": 49050 }, { "epoch": 0.19820860789359923, "grad_norm": 921.6329956054688, "learning_rate": 4.8602513204803896e-05, "loss": 103.2615, "step": 49060 }, { "epoch": 0.19824900915896687, "grad_norm": 596.2329711914062, "learning_rate": 4.8601362252316574e-05, "loss": 103.5869, "step": 49070 }, { "epoch": 0.19828941042433448, "grad_norm": 1672.80615234375, "learning_rate": 4.860021083970855e-05, "loss": 83.6912, "step": 49080 }, { "epoch": 0.19832981168970212, "grad_norm": 271.271728515625, "learning_rate": 4.8599058967002254e-05, "loss": 74.6612, "step": 49090 }, { "epoch": 0.19837021295506976, "grad_norm": 497.50048828125, "learning_rate": 4.859790663422016e-05, "loss": 69.9331, "step": 49100 }, { "epoch": 0.19841061422043738, "grad_norm": 573.849365234375, "learning_rate": 4.8596753841384735e-05, "loss": 54.5267, "step": 49110 }, { "epoch": 0.19845101548580502, "grad_norm": 1481.6771240234375, "learning_rate": 4.859560058851844e-05, "loss": 63.2541, "step": 49120 }, { "epoch": 0.19849141675117266, "grad_norm": 3614.8828125, "learning_rate": 4.859444687564376e-05, "loss": 98.0111, "step": 49130 }, { "epoch": 0.19853181801654027, "grad_norm": 1466.5594482421875, "learning_rate": 4.859329270278319e-05, "loss": 67.2758, "step": 49140 }, { "epoch": 0.1985722192819079, "grad_norm": 611.2523803710938, "learning_rate": 4.859213806995924e-05, "loss": 99.23, "step": 49150 }, { "epoch": 0.19861262054727555, "grad_norm": 791.8298950195312, "learning_rate": 4.85909829771944e-05, "loss": 92.561, "step": 49160 }, { "epoch": 0.19865302181264316, "grad_norm": 1123.1046142578125, "learning_rate": 4.8589827424511216e-05, "loss": 90.6359, "step": 49170 }, { "epoch": 0.1986934230780108, "grad_norm": 0.0, "learning_rate": 4.858867141193219e-05, "loss": 75.7769, "step": 49180 }, { "epoch": 0.19873382434337844, "grad_norm": 654.7210693359375, "learning_rate": 4.858751493947987e-05, "loss": 39.3021, "step": 49190 }, { "epoch": 0.19877422560874605, "grad_norm": 1822.922607421875, "learning_rate": 4.858635800717681e-05, "loss": 112.5853, "step": 49200 }, { "epoch": 0.1988146268741137, "grad_norm": 422.10791015625, "learning_rate": 4.8585200615045555e-05, "loss": 66.1211, "step": 49210 }, { "epoch": 0.19885502813948133, "grad_norm": 1020.6856689453125, "learning_rate": 4.8584042763108675e-05, "loss": 70.5579, "step": 49220 }, { "epoch": 0.19889542940484897, "grad_norm": 628.3194580078125, "learning_rate": 4.858288445138873e-05, "loss": 110.4609, "step": 49230 }, { "epoch": 0.19893583067021658, "grad_norm": 1215.436767578125, "learning_rate": 4.8581725679908317e-05, "loss": 145.442, "step": 49240 }, { "epoch": 0.19897623193558422, "grad_norm": 255.2740478515625, "learning_rate": 4.858056644869002e-05, "loss": 74.1351, "step": 49250 }, { "epoch": 0.19901663320095186, "grad_norm": 686.521484375, "learning_rate": 4.8579406757756455e-05, "loss": 57.5146, "step": 49260 }, { "epoch": 0.19905703446631948, "grad_norm": 452.3636169433594, "learning_rate": 4.85782466071302e-05, "loss": 68.5748, "step": 49270 }, { "epoch": 0.19909743573168712, "grad_norm": 2242.72412109375, "learning_rate": 4.857708599683389e-05, "loss": 86.5858, "step": 49280 }, { "epoch": 0.19913783699705476, "grad_norm": 625.6463623046875, "learning_rate": 4.8575924926890145e-05, "loss": 50.7092, "step": 49290 }, { "epoch": 0.19917823826242237, "grad_norm": 558.0465698242188, "learning_rate": 4.8574763397321614e-05, "loss": 53.2504, "step": 49300 }, { "epoch": 0.19921863952779, "grad_norm": 932.68212890625, "learning_rate": 4.857360140815093e-05, "loss": 65.5452, "step": 49310 }, { "epoch": 0.19925904079315765, "grad_norm": 965.7637939453125, "learning_rate": 4.857243895940076e-05, "loss": 68.6537, "step": 49320 }, { "epoch": 0.19929944205852526, "grad_norm": 676.4684448242188, "learning_rate": 4.857127605109374e-05, "loss": 80.4764, "step": 49330 }, { "epoch": 0.1993398433238929, "grad_norm": 499.67083740234375, "learning_rate": 4.8570112683252565e-05, "loss": 96.8114, "step": 49340 }, { "epoch": 0.19938024458926054, "grad_norm": 1086.6888427734375, "learning_rate": 4.856894885589991e-05, "loss": 98.3765, "step": 49350 }, { "epoch": 0.19942064585462815, "grad_norm": 626.739990234375, "learning_rate": 4.856778456905846e-05, "loss": 64.8752, "step": 49360 }, { "epoch": 0.1994610471199958, "grad_norm": 464.48565673828125, "learning_rate": 4.856661982275093e-05, "loss": 52.9684, "step": 49370 }, { "epoch": 0.19950144838536343, "grad_norm": 536.5781860351562, "learning_rate": 4.8565454617e-05, "loss": 71.4869, "step": 49380 }, { "epoch": 0.19954184965073107, "grad_norm": 3326.29833984375, "learning_rate": 4.85642889518284e-05, "loss": 165.3745, "step": 49390 }, { "epoch": 0.19958225091609869, "grad_norm": 2225.459716796875, "learning_rate": 4.856312282725886e-05, "loss": 92.2177, "step": 49400 }, { "epoch": 0.19962265218146633, "grad_norm": 611.5123901367188, "learning_rate": 4.85619562433141e-05, "loss": 53.4561, "step": 49410 }, { "epoch": 0.19966305344683397, "grad_norm": 763.9044189453125, "learning_rate": 4.8560789200016884e-05, "loss": 65.2781, "step": 49420 }, { "epoch": 0.19970345471220158, "grad_norm": 1733.5440673828125, "learning_rate": 4.8559621697389946e-05, "loss": 149.0616, "step": 49430 }, { "epoch": 0.19974385597756922, "grad_norm": 411.6968688964844, "learning_rate": 4.855845373545605e-05, "loss": 71.7242, "step": 49440 }, { "epoch": 0.19978425724293686, "grad_norm": 258.44677734375, "learning_rate": 4.855728531423798e-05, "loss": 71.4265, "step": 49450 }, { "epoch": 0.19982465850830447, "grad_norm": 1239.368896484375, "learning_rate": 4.85561164337585e-05, "loss": 75.7033, "step": 49460 }, { "epoch": 0.1998650597736721, "grad_norm": 1232.6944580078125, "learning_rate": 4.85549470940404e-05, "loss": 54.9064, "step": 49470 }, { "epoch": 0.19990546103903975, "grad_norm": 698.7216796875, "learning_rate": 4.855377729510648e-05, "loss": 64.2161, "step": 49480 }, { "epoch": 0.19994586230440736, "grad_norm": 590.6932983398438, "learning_rate": 4.8552607036979553e-05, "loss": 59.4658, "step": 49490 }, { "epoch": 0.199986263569775, "grad_norm": 1102.7227783203125, "learning_rate": 4.855143631968242e-05, "loss": 64.1912, "step": 49500 }, { "epoch": 0.20002666483514264, "grad_norm": 2396.97119140625, "learning_rate": 4.855026514323792e-05, "loss": 101.3902, "step": 49510 }, { "epoch": 0.20006706610051025, "grad_norm": 622.559326171875, "learning_rate": 4.8549093507668865e-05, "loss": 91.4989, "step": 49520 }, { "epoch": 0.2001074673658779, "grad_norm": 1117.0147705078125, "learning_rate": 4.854792141299811e-05, "loss": 52.4889, "step": 49530 }, { "epoch": 0.20014786863124553, "grad_norm": 726.4915771484375, "learning_rate": 4.85467488592485e-05, "loss": 69.4123, "step": 49540 }, { "epoch": 0.20018826989661317, "grad_norm": 563.3633422851562, "learning_rate": 4.85455758464429e-05, "loss": 79.5041, "step": 49550 }, { "epoch": 0.2002286711619808, "grad_norm": 1249.004150390625, "learning_rate": 4.854440237460418e-05, "loss": 113.2019, "step": 49560 }, { "epoch": 0.20026907242734843, "grad_norm": 1637.2099609375, "learning_rate": 4.854322844375522e-05, "loss": 94.2531, "step": 49570 }, { "epoch": 0.20030947369271607, "grad_norm": 916.0922241210938, "learning_rate": 4.85420540539189e-05, "loss": 74.7405, "step": 49580 }, { "epoch": 0.20034987495808368, "grad_norm": 660.0616455078125, "learning_rate": 4.8540879205118106e-05, "loss": 63.3157, "step": 49590 }, { "epoch": 0.20039027622345132, "grad_norm": 1970.4896240234375, "learning_rate": 4.8539703897375755e-05, "loss": 93.7624, "step": 49600 }, { "epoch": 0.20043067748881896, "grad_norm": 765.5314331054688, "learning_rate": 4.853852813071476e-05, "loss": 83.952, "step": 49610 }, { "epoch": 0.20047107875418657, "grad_norm": 883.2976684570312, "learning_rate": 4.853735190515804e-05, "loss": 100.3142, "step": 49620 }, { "epoch": 0.2005114800195542, "grad_norm": 1285.146240234375, "learning_rate": 4.853617522072853e-05, "loss": 84.2881, "step": 49630 }, { "epoch": 0.20055188128492185, "grad_norm": 561.2415771484375, "learning_rate": 4.853499807744916e-05, "loss": 67.512, "step": 49640 }, { "epoch": 0.20059228255028946, "grad_norm": 759.5241088867188, "learning_rate": 4.85338204753429e-05, "loss": 70.0843, "step": 49650 }, { "epoch": 0.2006326838156571, "grad_norm": 1011.6978759765625, "learning_rate": 4.8532642414432674e-05, "loss": 91.1174, "step": 49660 }, { "epoch": 0.20067308508102474, "grad_norm": 710.1160888671875, "learning_rate": 4.853146389474148e-05, "loss": 78.4036, "step": 49670 }, { "epoch": 0.20071348634639236, "grad_norm": 1832.636474609375, "learning_rate": 4.853028491629228e-05, "loss": 69.5313, "step": 49680 }, { "epoch": 0.20075388761176, "grad_norm": 630.8060913085938, "learning_rate": 4.852910547910806e-05, "loss": 72.0492, "step": 49690 }, { "epoch": 0.20079428887712764, "grad_norm": 891.0122680664062, "learning_rate": 4.852792558321182e-05, "loss": 88.1902, "step": 49700 }, { "epoch": 0.20083469014249528, "grad_norm": 493.8849182128906, "learning_rate": 4.852674522862656e-05, "loss": 82.2126, "step": 49710 }, { "epoch": 0.2008750914078629, "grad_norm": 916.6065063476562, "learning_rate": 4.852556441537528e-05, "loss": 73.7906, "step": 49720 }, { "epoch": 0.20091549267323053, "grad_norm": 1729.7864990234375, "learning_rate": 4.852438314348101e-05, "loss": 98.6484, "step": 49730 }, { "epoch": 0.20095589393859817, "grad_norm": 971.2913818359375, "learning_rate": 4.852320141296679e-05, "loss": 74.1662, "step": 49740 }, { "epoch": 0.20099629520396578, "grad_norm": 673.4738159179688, "learning_rate": 4.852201922385564e-05, "loss": 87.5311, "step": 49750 }, { "epoch": 0.20103669646933342, "grad_norm": 700.3085327148438, "learning_rate": 4.852083657617061e-05, "loss": 119.7827, "step": 49760 }, { "epoch": 0.20107709773470106, "grad_norm": 574.7796630859375, "learning_rate": 4.851965346993478e-05, "loss": 118.024, "step": 49770 }, { "epoch": 0.20111749900006867, "grad_norm": 1227.26904296875, "learning_rate": 4.851846990517118e-05, "loss": 96.7121, "step": 49780 }, { "epoch": 0.2011579002654363, "grad_norm": 1115.0523681640625, "learning_rate": 4.8517285881902904e-05, "loss": 69.7559, "step": 49790 }, { "epoch": 0.20119830153080395, "grad_norm": 820.30126953125, "learning_rate": 4.851610140015304e-05, "loss": 94.3278, "step": 49800 }, { "epoch": 0.20123870279617156, "grad_norm": 3716.187744140625, "learning_rate": 4.8514916459944666e-05, "loss": 136.7065, "step": 49810 }, { "epoch": 0.2012791040615392, "grad_norm": 1158.3758544921875, "learning_rate": 4.8513731061300887e-05, "loss": 88.3688, "step": 49820 }, { "epoch": 0.20131950532690684, "grad_norm": 2444.747314453125, "learning_rate": 4.851254520424482e-05, "loss": 58.1214, "step": 49830 }, { "epoch": 0.20135990659227446, "grad_norm": 594.2299194335938, "learning_rate": 4.851135888879958e-05, "loss": 50.3362, "step": 49840 }, { "epoch": 0.2014003078576421, "grad_norm": 422.1976318359375, "learning_rate": 4.851017211498829e-05, "loss": 114.6325, "step": 49850 }, { "epoch": 0.20144070912300974, "grad_norm": 348.53741455078125, "learning_rate": 4.85089848828341e-05, "loss": 81.1028, "step": 49860 }, { "epoch": 0.20148111038837738, "grad_norm": 629.5589599609375, "learning_rate": 4.8507797192360134e-05, "loss": 47.3838, "step": 49870 }, { "epoch": 0.201521511653745, "grad_norm": 719.5811157226562, "learning_rate": 4.850660904358956e-05, "loss": 57.1744, "step": 49880 }, { "epoch": 0.20156191291911263, "grad_norm": 497.17913818359375, "learning_rate": 4.850542043654555e-05, "loss": 75.8821, "step": 49890 }, { "epoch": 0.20160231418448027, "grad_norm": 569.685546875, "learning_rate": 4.8504231371251255e-05, "loss": 59.8405, "step": 49900 }, { "epoch": 0.20164271544984788, "grad_norm": 503.3626708984375, "learning_rate": 4.850304184772988e-05, "loss": 64.7331, "step": 49910 }, { "epoch": 0.20168311671521552, "grad_norm": 444.8017883300781, "learning_rate": 4.85018518660046e-05, "loss": 132.0521, "step": 49920 }, { "epoch": 0.20172351798058316, "grad_norm": 557.6558227539062, "learning_rate": 4.850066142609862e-05, "loss": 66.8273, "step": 49930 }, { "epoch": 0.20176391924595077, "grad_norm": 625.9427490234375, "learning_rate": 4.849947052803514e-05, "loss": 97.3501, "step": 49940 }, { "epoch": 0.2018043205113184, "grad_norm": 413.3929748535156, "learning_rate": 4.849827917183739e-05, "loss": 104.1396, "step": 49950 }, { "epoch": 0.20184472177668605, "grad_norm": 1487.6632080078125, "learning_rate": 4.849708735752859e-05, "loss": 84.6355, "step": 49960 }, { "epoch": 0.20188512304205367, "grad_norm": 399.767578125, "learning_rate": 4.849589508513197e-05, "loss": 71.0594, "step": 49970 }, { "epoch": 0.2019255243074213, "grad_norm": 739.0697631835938, "learning_rate": 4.849470235467078e-05, "loss": 91.7866, "step": 49980 }, { "epoch": 0.20196592557278895, "grad_norm": 962.0905151367188, "learning_rate": 4.849350916616827e-05, "loss": 142.8859, "step": 49990 }, { "epoch": 0.20200632683815656, "grad_norm": 556.9208374023438, "learning_rate": 4.849231551964771e-05, "loss": 69.2205, "step": 50000 }, { "epoch": 0.2020467281035242, "grad_norm": 994.56298828125, "learning_rate": 4.849112141513236e-05, "loss": 98.4823, "step": 50010 }, { "epoch": 0.20208712936889184, "grad_norm": 446.2982482910156, "learning_rate": 4.8489926852645505e-05, "loss": 91.0375, "step": 50020 }, { "epoch": 0.20212753063425948, "grad_norm": 2271.33349609375, "learning_rate": 4.848873183221043e-05, "loss": 89.0778, "step": 50030 }, { "epoch": 0.2021679318996271, "grad_norm": 1082.55810546875, "learning_rate": 4.8487536353850444e-05, "loss": 109.4579, "step": 50040 }, { "epoch": 0.20220833316499473, "grad_norm": 2170.5400390625, "learning_rate": 4.8486340417588835e-05, "loss": 84.2964, "step": 50050 }, { "epoch": 0.20224873443036237, "grad_norm": 454.11248779296875, "learning_rate": 4.8485144023448936e-05, "loss": 62.0007, "step": 50060 }, { "epoch": 0.20228913569572998, "grad_norm": 494.5736083984375, "learning_rate": 4.848394717145406e-05, "loss": 87.3821, "step": 50070 }, { "epoch": 0.20232953696109762, "grad_norm": 1059.5152587890625, "learning_rate": 4.848274986162754e-05, "loss": 68.3891, "step": 50080 }, { "epoch": 0.20236993822646526, "grad_norm": 778.6331176757812, "learning_rate": 4.848155209399272e-05, "loss": 72.0611, "step": 50090 }, { "epoch": 0.20241033949183287, "grad_norm": 819.3021240234375, "learning_rate": 4.848035386857296e-05, "loss": 67.9465, "step": 50100 }, { "epoch": 0.20245074075720051, "grad_norm": 1114.4420166015625, "learning_rate": 4.847915518539161e-05, "loss": 75.596, "step": 50110 }, { "epoch": 0.20249114202256815, "grad_norm": 1062.3887939453125, "learning_rate": 4.847795604447204e-05, "loss": 102.3436, "step": 50120 }, { "epoch": 0.20253154328793577, "grad_norm": 1471.7073974609375, "learning_rate": 4.847675644583764e-05, "loss": 129.1764, "step": 50130 }, { "epoch": 0.2025719445533034, "grad_norm": 1061.678955078125, "learning_rate": 4.847555638951177e-05, "loss": 84.9815, "step": 50140 }, { "epoch": 0.20261234581867105, "grad_norm": 1374.27001953125, "learning_rate": 4.8474355875517854e-05, "loss": 88.13, "step": 50150 }, { "epoch": 0.20265274708403866, "grad_norm": 1075.933837890625, "learning_rate": 4.8473154903879276e-05, "loss": 94.9892, "step": 50160 }, { "epoch": 0.2026931483494063, "grad_norm": 314.6831359863281, "learning_rate": 4.8471953474619466e-05, "loss": 83.2022, "step": 50170 }, { "epoch": 0.20273354961477394, "grad_norm": 1138.607421875, "learning_rate": 4.847075158776183e-05, "loss": 78.0873, "step": 50180 }, { "epoch": 0.20277395088014158, "grad_norm": 561.6530151367188, "learning_rate": 4.846954924332981e-05, "loss": 57.0055, "step": 50190 }, { "epoch": 0.2028143521455092, "grad_norm": 1306.6680908203125, "learning_rate": 4.846834644134686e-05, "loss": 69.6428, "step": 50200 }, { "epoch": 0.20285475341087683, "grad_norm": 418.66046142578125, "learning_rate": 4.846714318183639e-05, "loss": 87.8257, "step": 50210 }, { "epoch": 0.20289515467624447, "grad_norm": 1192.7034912109375, "learning_rate": 4.84659394648219e-05, "loss": 93.4679, "step": 50220 }, { "epoch": 0.20293555594161208, "grad_norm": 1370.776611328125, "learning_rate": 4.846473529032684e-05, "loss": 112.6145, "step": 50230 }, { "epoch": 0.20297595720697972, "grad_norm": 555.2098388671875, "learning_rate": 4.846353065837467e-05, "loss": 104.6596, "step": 50240 }, { "epoch": 0.20301635847234736, "grad_norm": 714.0413208007812, "learning_rate": 4.84623255689889e-05, "loss": 63.1508, "step": 50250 }, { "epoch": 0.20305675973771498, "grad_norm": 779.4690551757812, "learning_rate": 4.846112002219301e-05, "loss": 77.2726, "step": 50260 }, { "epoch": 0.20309716100308262, "grad_norm": 857.3240966796875, "learning_rate": 4.845991401801051e-05, "loss": 86.2318, "step": 50270 }, { "epoch": 0.20313756226845026, "grad_norm": 700.0541381835938, "learning_rate": 4.845870755646491e-05, "loss": 72.9242, "step": 50280 }, { "epoch": 0.20317796353381787, "grad_norm": 357.010009765625, "learning_rate": 4.8457500637579726e-05, "loss": 53.2458, "step": 50290 }, { "epoch": 0.2032183647991855, "grad_norm": 1076.287841796875, "learning_rate": 4.845629326137849e-05, "loss": 60.6979, "step": 50300 }, { "epoch": 0.20325876606455315, "grad_norm": 1700.1820068359375, "learning_rate": 4.845508542788474e-05, "loss": 92.155, "step": 50310 }, { "epoch": 0.20329916732992076, "grad_norm": 800.1776123046875, "learning_rate": 4.845387713712203e-05, "loss": 89.9958, "step": 50320 }, { "epoch": 0.2033395685952884, "grad_norm": 953.5119018554688, "learning_rate": 4.8452668389113895e-05, "loss": 103.765, "step": 50330 }, { "epoch": 0.20337996986065604, "grad_norm": 558.611083984375, "learning_rate": 4.845145918388393e-05, "loss": 48.235, "step": 50340 }, { "epoch": 0.20342037112602368, "grad_norm": 106.23624420166016, "learning_rate": 4.8450249521455695e-05, "loss": 87.897, "step": 50350 }, { "epoch": 0.2034607723913913, "grad_norm": 1008.41748046875, "learning_rate": 4.844903940185276e-05, "loss": 55.0348, "step": 50360 }, { "epoch": 0.20350117365675893, "grad_norm": 1108.453369140625, "learning_rate": 4.844782882509874e-05, "loss": 79.4438, "step": 50370 }, { "epoch": 0.20354157492212657, "grad_norm": 985.6517944335938, "learning_rate": 4.844661779121723e-05, "loss": 61.7789, "step": 50380 }, { "epoch": 0.20358197618749418, "grad_norm": 863.3707885742188, "learning_rate": 4.844540630023182e-05, "loss": 75.1732, "step": 50390 }, { "epoch": 0.20362237745286182, "grad_norm": 843.4656372070312, "learning_rate": 4.844419435216615e-05, "loss": 61.1795, "step": 50400 }, { "epoch": 0.20366277871822946, "grad_norm": 739.8035888671875, "learning_rate": 4.844298194704384e-05, "loss": 89.2932, "step": 50410 }, { "epoch": 0.20370317998359708, "grad_norm": 455.288818359375, "learning_rate": 4.8441769084888534e-05, "loss": 58.3124, "step": 50420 }, { "epoch": 0.20374358124896472, "grad_norm": 1115.42333984375, "learning_rate": 4.844055576572387e-05, "loss": 70.2969, "step": 50430 }, { "epoch": 0.20378398251433236, "grad_norm": 1393.1253662109375, "learning_rate": 4.84393419895735e-05, "loss": 89.79, "step": 50440 }, { "epoch": 0.20382438377969997, "grad_norm": 1216.634033203125, "learning_rate": 4.84381277564611e-05, "loss": 72.9308, "step": 50450 }, { "epoch": 0.2038647850450676, "grad_norm": 548.3377075195312, "learning_rate": 4.8436913066410316e-05, "loss": 73.1693, "step": 50460 }, { "epoch": 0.20390518631043525, "grad_norm": 1117.44091796875, "learning_rate": 4.843569791944486e-05, "loss": 121.1625, "step": 50470 }, { "epoch": 0.20394558757580286, "grad_norm": 1097.303955078125, "learning_rate": 4.843448231558839e-05, "loss": 103.4902, "step": 50480 }, { "epoch": 0.2039859888411705, "grad_norm": 797.4445190429688, "learning_rate": 4.843326625486464e-05, "loss": 78.5437, "step": 50490 }, { "epoch": 0.20402639010653814, "grad_norm": 876.6442260742188, "learning_rate": 4.843204973729729e-05, "loss": 72.4397, "step": 50500 }, { "epoch": 0.20406679137190578, "grad_norm": 696.5914306640625, "learning_rate": 4.843083276291007e-05, "loss": 75.2865, "step": 50510 }, { "epoch": 0.2041071926372734, "grad_norm": 202.0533447265625, "learning_rate": 4.84296153317267e-05, "loss": 42.6595, "step": 50520 }, { "epoch": 0.20414759390264103, "grad_norm": 711.9306030273438, "learning_rate": 4.8428397443770926e-05, "loss": 76.4253, "step": 50530 }, { "epoch": 0.20418799516800867, "grad_norm": 1101.085205078125, "learning_rate": 4.842717909906647e-05, "loss": 91.274, "step": 50540 }, { "epoch": 0.20422839643337629, "grad_norm": 934.9185180664062, "learning_rate": 4.84259602976371e-05, "loss": 99.1536, "step": 50550 }, { "epoch": 0.20426879769874393, "grad_norm": 1069.0614013671875, "learning_rate": 4.8424741039506575e-05, "loss": 76.847, "step": 50560 }, { "epoch": 0.20430919896411157, "grad_norm": 5626.541015625, "learning_rate": 4.842352132469867e-05, "loss": 85.2173, "step": 50570 }, { "epoch": 0.20434960022947918, "grad_norm": 467.4293212890625, "learning_rate": 4.8422301153237145e-05, "loss": 57.4749, "step": 50580 }, { "epoch": 0.20439000149484682, "grad_norm": 721.87353515625, "learning_rate": 4.842108052514581e-05, "loss": 67.7757, "step": 50590 }, { "epoch": 0.20443040276021446, "grad_norm": 1129.6400146484375, "learning_rate": 4.841985944044845e-05, "loss": 69.0111, "step": 50600 }, { "epoch": 0.20447080402558207, "grad_norm": 566.9580688476562, "learning_rate": 4.8418637899168874e-05, "loss": 96.5319, "step": 50610 }, { "epoch": 0.2045112052909497, "grad_norm": 1207.4417724609375, "learning_rate": 4.8417415901330886e-05, "loss": 86.0825, "step": 50620 }, { "epoch": 0.20455160655631735, "grad_norm": 912.6641235351562, "learning_rate": 4.841619344695833e-05, "loss": 75.0874, "step": 50630 }, { "epoch": 0.20459200782168496, "grad_norm": 619.9535522460938, "learning_rate": 4.8414970536075024e-05, "loss": 82.5721, "step": 50640 }, { "epoch": 0.2046324090870526, "grad_norm": 522.5993041992188, "learning_rate": 4.841374716870481e-05, "loss": 95.127, "step": 50650 }, { "epoch": 0.20467281035242024, "grad_norm": 368.2784729003906, "learning_rate": 4.841252334487154e-05, "loss": 61.5005, "step": 50660 }, { "epoch": 0.20471321161778788, "grad_norm": 1060.747802734375, "learning_rate": 4.841129906459908e-05, "loss": 69.1981, "step": 50670 }, { "epoch": 0.2047536128831555, "grad_norm": 1138.5040283203125, "learning_rate": 4.841007432791129e-05, "loss": 56.1066, "step": 50680 }, { "epoch": 0.20479401414852313, "grad_norm": 1501.6522216796875, "learning_rate": 4.840884913483204e-05, "loss": 108.5628, "step": 50690 }, { "epoch": 0.20483441541389077, "grad_norm": 1450.2125244140625, "learning_rate": 4.8407623485385234e-05, "loss": 89.9987, "step": 50700 }, { "epoch": 0.2048748166792584, "grad_norm": 503.9439697265625, "learning_rate": 4.840639737959476e-05, "loss": 69.8462, "step": 50710 }, { "epoch": 0.20491521794462603, "grad_norm": 702.1473388671875, "learning_rate": 4.8405170817484515e-05, "loss": 102.1393, "step": 50720 }, { "epoch": 0.20495561920999367, "grad_norm": 557.0831909179688, "learning_rate": 4.840394379907841e-05, "loss": 70.9738, "step": 50730 }, { "epoch": 0.20499602047536128, "grad_norm": 905.9644775390625, "learning_rate": 4.840271632440038e-05, "loss": 63.7002, "step": 50740 }, { "epoch": 0.20503642174072892, "grad_norm": 1401.463623046875, "learning_rate": 4.840148839347434e-05, "loss": 82.3691, "step": 50750 }, { "epoch": 0.20507682300609656, "grad_norm": 457.6974182128906, "learning_rate": 4.8400260006324235e-05, "loss": 64.2692, "step": 50760 }, { "epoch": 0.20511722427146417, "grad_norm": 1235.328857421875, "learning_rate": 4.839903116297401e-05, "loss": 76.285, "step": 50770 }, { "epoch": 0.2051576255368318, "grad_norm": 1098.241943359375, "learning_rate": 4.8397801863447635e-05, "loss": 98.041, "step": 50780 }, { "epoch": 0.20519802680219945, "grad_norm": 1030.691650390625, "learning_rate": 4.8396572107769066e-05, "loss": 81.4219, "step": 50790 }, { "epoch": 0.20523842806756706, "grad_norm": 546.6036987304688, "learning_rate": 4.839534189596228e-05, "loss": 62.711, "step": 50800 }, { "epoch": 0.2052788293329347, "grad_norm": 595.7460327148438, "learning_rate": 4.839411122805125e-05, "loss": 78.6366, "step": 50810 }, { "epoch": 0.20531923059830234, "grad_norm": 435.6410217285156, "learning_rate": 4.839288010405998e-05, "loss": 54.1102, "step": 50820 }, { "epoch": 0.20535963186366998, "grad_norm": 756.9534301757812, "learning_rate": 4.839164852401247e-05, "loss": 55.358, "step": 50830 }, { "epoch": 0.2054000331290376, "grad_norm": 1443.138671875, "learning_rate": 4.8390416487932733e-05, "loss": 76.8449, "step": 50840 }, { "epoch": 0.20544043439440524, "grad_norm": 666.9951171875, "learning_rate": 4.8389183995844785e-05, "loss": 98.6886, "step": 50850 }, { "epoch": 0.20548083565977288, "grad_norm": 720.92919921875, "learning_rate": 4.838795104777265e-05, "loss": 101.151, "step": 50860 }, { "epoch": 0.2055212369251405, "grad_norm": 929.8797607421875, "learning_rate": 4.8386717643740366e-05, "loss": 87.1545, "step": 50870 }, { "epoch": 0.20556163819050813, "grad_norm": 1356.6619873046875, "learning_rate": 4.8385483783771986e-05, "loss": 92.7927, "step": 50880 }, { "epoch": 0.20560203945587577, "grad_norm": 2006.3673095703125, "learning_rate": 4.838424946789156e-05, "loss": 75.9321, "step": 50890 }, { "epoch": 0.20564244072124338, "grad_norm": 741.2835693359375, "learning_rate": 4.8383014696123144e-05, "loss": 74.4236, "step": 50900 }, { "epoch": 0.20568284198661102, "grad_norm": 719.8021850585938, "learning_rate": 4.838177946849083e-05, "loss": 100.4028, "step": 50910 }, { "epoch": 0.20572324325197866, "grad_norm": 410.4776306152344, "learning_rate": 4.8380543785018677e-05, "loss": 78.5173, "step": 50920 }, { "epoch": 0.20576364451734627, "grad_norm": 563.4188842773438, "learning_rate": 4.8379307645730795e-05, "loss": 116.8776, "step": 50930 }, { "epoch": 0.2058040457827139, "grad_norm": 882.4771728515625, "learning_rate": 4.837807105065127e-05, "loss": 63.1256, "step": 50940 }, { "epoch": 0.20584444704808155, "grad_norm": 576.0557861328125, "learning_rate": 4.837683399980421e-05, "loss": 100.8891, "step": 50950 }, { "epoch": 0.20588484831344916, "grad_norm": 2097.596923828125, "learning_rate": 4.837559649321374e-05, "loss": 107.397, "step": 50960 }, { "epoch": 0.2059252495788168, "grad_norm": 1427.4742431640625, "learning_rate": 4.837435853090398e-05, "loss": 68.9581, "step": 50970 }, { "epoch": 0.20596565084418444, "grad_norm": 1769.10107421875, "learning_rate": 4.837312011289907e-05, "loss": 77.3765, "step": 50980 }, { "epoch": 0.20600605210955208, "grad_norm": 745.1818237304688, "learning_rate": 4.837188123922314e-05, "loss": 58.5138, "step": 50990 }, { "epoch": 0.2060464533749197, "grad_norm": 1252.1639404296875, "learning_rate": 4.837064190990036e-05, "loss": 87.0167, "step": 51000 }, { "epoch": 0.20608685464028734, "grad_norm": 702.9400024414062, "learning_rate": 4.836940212495489e-05, "loss": 100.8639, "step": 51010 }, { "epoch": 0.20612725590565498, "grad_norm": 1918.0904541015625, "learning_rate": 4.836816188441089e-05, "loss": 67.2167, "step": 51020 }, { "epoch": 0.2061676571710226, "grad_norm": 1665.779296875, "learning_rate": 4.8366921188292534e-05, "loss": 70.5055, "step": 51030 }, { "epoch": 0.20620805843639023, "grad_norm": 1276.0400390625, "learning_rate": 4.8365680036624026e-05, "loss": 69.3662, "step": 51040 }, { "epoch": 0.20624845970175787, "grad_norm": 1376.8558349609375, "learning_rate": 4.836443842942956e-05, "loss": 88.9127, "step": 51050 }, { "epoch": 0.20628886096712548, "grad_norm": 1290.6573486328125, "learning_rate": 4.836319636673334e-05, "loss": 114.1035, "step": 51060 }, { "epoch": 0.20632926223249312, "grad_norm": 789.5421142578125, "learning_rate": 4.836195384855957e-05, "loss": 67.4084, "step": 51070 }, { "epoch": 0.20636966349786076, "grad_norm": 843.2109985351562, "learning_rate": 4.8360710874932485e-05, "loss": 77.7251, "step": 51080 }, { "epoch": 0.20641006476322837, "grad_norm": 873.6146850585938, "learning_rate": 4.8359467445876314e-05, "loss": 111.2144, "step": 51090 }, { "epoch": 0.206450466028596, "grad_norm": 1589.0201416015625, "learning_rate": 4.8358223561415304e-05, "loss": 95.8929, "step": 51100 }, { "epoch": 0.20649086729396365, "grad_norm": 914.6998291015625, "learning_rate": 4.8356979221573696e-05, "loss": 58.4896, "step": 51110 }, { "epoch": 0.20653126855933127, "grad_norm": 566.5862426757812, "learning_rate": 4.8355734426375753e-05, "loss": 82.7033, "step": 51120 }, { "epoch": 0.2065716698246989, "grad_norm": 1035.8038330078125, "learning_rate": 4.835448917584574e-05, "loss": 93.2203, "step": 51130 }, { "epoch": 0.20661207109006655, "grad_norm": 645.23388671875, "learning_rate": 4.8353243470007944e-05, "loss": 84.6093, "step": 51140 }, { "epoch": 0.20665247235543419, "grad_norm": 919.4197998046875, "learning_rate": 4.835199730888664e-05, "loss": 70.3317, "step": 51150 }, { "epoch": 0.2066928736208018, "grad_norm": 647.1675415039062, "learning_rate": 4.835075069250613e-05, "loss": 70.5222, "step": 51160 }, { "epoch": 0.20673327488616944, "grad_norm": 689.3214111328125, "learning_rate": 4.8349503620890705e-05, "loss": 73.7844, "step": 51170 }, { "epoch": 0.20677367615153708, "grad_norm": 974.9696044921875, "learning_rate": 4.8348256094064695e-05, "loss": 113.3237, "step": 51180 }, { "epoch": 0.2068140774169047, "grad_norm": 595.2847290039062, "learning_rate": 4.834700811205241e-05, "loss": 65.6414, "step": 51190 }, { "epoch": 0.20685447868227233, "grad_norm": 1128.4539794921875, "learning_rate": 4.834575967487817e-05, "loss": 105.4969, "step": 51200 }, { "epoch": 0.20689487994763997, "grad_norm": 445.4708557128906, "learning_rate": 4.834451078256634e-05, "loss": 62.121, "step": 51210 }, { "epoch": 0.20693528121300758, "grad_norm": 308.1874694824219, "learning_rate": 4.8343261435141244e-05, "loss": 74.7087, "step": 51220 }, { "epoch": 0.20697568247837522, "grad_norm": 579.1897583007812, "learning_rate": 4.8342011632627254e-05, "loss": 115.5755, "step": 51230 }, { "epoch": 0.20701608374374286, "grad_norm": 1018.190185546875, "learning_rate": 4.834076137504873e-05, "loss": 77.1754, "step": 51240 }, { "epoch": 0.20705648500911047, "grad_norm": 560.5921020507812, "learning_rate": 4.8339510662430046e-05, "loss": 89.5999, "step": 51250 }, { "epoch": 0.20709688627447811, "grad_norm": 554.189453125, "learning_rate": 4.833825949479558e-05, "loss": 60.0793, "step": 51260 }, { "epoch": 0.20713728753984575, "grad_norm": 666.8161010742188, "learning_rate": 4.8337007872169735e-05, "loss": 79.2109, "step": 51270 }, { "epoch": 0.20717768880521337, "grad_norm": 758.923828125, "learning_rate": 4.833575579457691e-05, "loss": 71.3804, "step": 51280 }, { "epoch": 0.207218090070581, "grad_norm": 768.92236328125, "learning_rate": 4.8334503262041505e-05, "loss": 54.9832, "step": 51290 }, { "epoch": 0.20725849133594865, "grad_norm": 453.7713317871094, "learning_rate": 4.833325027458795e-05, "loss": 86.5132, "step": 51300 }, { "epoch": 0.2072988926013163, "grad_norm": 688.4905395507812, "learning_rate": 4.8331996832240675e-05, "loss": 62.2691, "step": 51310 }, { "epoch": 0.2073392938666839, "grad_norm": 941.0972900390625, "learning_rate": 4.83307429350241e-05, "loss": 68.9129, "step": 51320 }, { "epoch": 0.20737969513205154, "grad_norm": 553.0337524414062, "learning_rate": 4.832948858296268e-05, "loss": 70.7225, "step": 51330 }, { "epoch": 0.20742009639741918, "grad_norm": 736.6832885742188, "learning_rate": 4.832823377608087e-05, "loss": 90.5628, "step": 51340 }, { "epoch": 0.2074604976627868, "grad_norm": 1483.525390625, "learning_rate": 4.832697851440313e-05, "loss": 60.9628, "step": 51350 }, { "epoch": 0.20750089892815443, "grad_norm": 1290.9232177734375, "learning_rate": 4.8325722797953945e-05, "loss": 56.6402, "step": 51360 }, { "epoch": 0.20754130019352207, "grad_norm": 1492.0635986328125, "learning_rate": 4.8324466626757775e-05, "loss": 88.9837, "step": 51370 }, { "epoch": 0.20758170145888968, "grad_norm": 1464.2392578125, "learning_rate": 4.8323210000839124e-05, "loss": 82.0623, "step": 51380 }, { "epoch": 0.20762210272425732, "grad_norm": 771.36767578125, "learning_rate": 4.832195292022249e-05, "loss": 99.1351, "step": 51390 }, { "epoch": 0.20766250398962496, "grad_norm": 513.316650390625, "learning_rate": 4.832069538493237e-05, "loss": 54.0263, "step": 51400 }, { "epoch": 0.20770290525499258, "grad_norm": 813.9430541992188, "learning_rate": 4.831943739499328e-05, "loss": 79.7442, "step": 51410 }, { "epoch": 0.20774330652036022, "grad_norm": 1018.91650390625, "learning_rate": 4.831817895042977e-05, "loss": 103.3693, "step": 51420 }, { "epoch": 0.20778370778572786, "grad_norm": 1169.8072509765625, "learning_rate": 4.8316920051266343e-05, "loss": 68.9814, "step": 51430 }, { "epoch": 0.20782410905109547, "grad_norm": 739.5846557617188, "learning_rate": 4.8315660697527566e-05, "loss": 47.6177, "step": 51440 }, { "epoch": 0.2078645103164631, "grad_norm": 996.7125854492188, "learning_rate": 4.831440088923797e-05, "loss": 72.8957, "step": 51450 }, { "epoch": 0.20790491158183075, "grad_norm": 765.3619384765625, "learning_rate": 4.8313140626422125e-05, "loss": 56.8616, "step": 51460 }, { "epoch": 0.2079453128471984, "grad_norm": 2611.9541015625, "learning_rate": 4.831187990910461e-05, "loss": 117.1232, "step": 51470 }, { "epoch": 0.207985714112566, "grad_norm": 1099.97314453125, "learning_rate": 4.831061873730999e-05, "loss": 65.8406, "step": 51480 }, { "epoch": 0.20802611537793364, "grad_norm": 672.7211303710938, "learning_rate": 4.8309357111062856e-05, "loss": 101.9847, "step": 51490 }, { "epoch": 0.20806651664330128, "grad_norm": 559.6053466796875, "learning_rate": 4.830809503038781e-05, "loss": 60.1119, "step": 51500 }, { "epoch": 0.2081069179086689, "grad_norm": 611.0339965820312, "learning_rate": 4.8306832495309445e-05, "loss": 77.4869, "step": 51510 }, { "epoch": 0.20814731917403653, "grad_norm": 955.992919921875, "learning_rate": 4.830556950585238e-05, "loss": 94.2499, "step": 51520 }, { "epoch": 0.20818772043940417, "grad_norm": 1458.7508544921875, "learning_rate": 4.830430606204125e-05, "loss": 107.4706, "step": 51530 }, { "epoch": 0.20822812170477178, "grad_norm": 550.4443359375, "learning_rate": 4.830304216390066e-05, "loss": 58.3049, "step": 51540 }, { "epoch": 0.20826852297013942, "grad_norm": 758.5188598632812, "learning_rate": 4.8301777811455276e-05, "loss": 82.5834, "step": 51550 }, { "epoch": 0.20830892423550706, "grad_norm": 715.7566528320312, "learning_rate": 4.8300513004729735e-05, "loss": 47.3867, "step": 51560 }, { "epoch": 0.20834932550087468, "grad_norm": 517.6349487304688, "learning_rate": 4.82992477437487e-05, "loss": 60.1919, "step": 51570 }, { "epoch": 0.20838972676624232, "grad_norm": 1060.4306640625, "learning_rate": 4.8297982028536826e-05, "loss": 106.7914, "step": 51580 }, { "epoch": 0.20843012803160996, "grad_norm": 603.7183227539062, "learning_rate": 4.82967158591188e-05, "loss": 59.8083, "step": 51590 }, { "epoch": 0.20847052929697757, "grad_norm": 1019.84326171875, "learning_rate": 4.829544923551931e-05, "loss": 63.1892, "step": 51600 }, { "epoch": 0.2085109305623452, "grad_norm": 648.5969848632812, "learning_rate": 4.8294182157763044e-05, "loss": 95.7722, "step": 51610 }, { "epoch": 0.20855133182771285, "grad_norm": 2398.117431640625, "learning_rate": 4.82929146258747e-05, "loss": 65.186, "step": 51620 }, { "epoch": 0.20859173309308046, "grad_norm": 342.774658203125, "learning_rate": 4.8291646639878995e-05, "loss": 81.3063, "step": 51630 }, { "epoch": 0.2086321343584481, "grad_norm": 1235.5203857421875, "learning_rate": 4.829037819980065e-05, "loss": 44.4151, "step": 51640 }, { "epoch": 0.20867253562381574, "grad_norm": 470.635498046875, "learning_rate": 4.828910930566439e-05, "loss": 62.5535, "step": 51650 }, { "epoch": 0.20871293688918338, "grad_norm": 830.7872924804688, "learning_rate": 4.828783995749495e-05, "loss": 80.7936, "step": 51660 }, { "epoch": 0.208753338154551, "grad_norm": 1270.7835693359375, "learning_rate": 4.828657015531709e-05, "loss": 89.139, "step": 51670 }, { "epoch": 0.20879373941991863, "grad_norm": 726.8765258789062, "learning_rate": 4.828529989915555e-05, "loss": 53.5606, "step": 51680 }, { "epoch": 0.20883414068528627, "grad_norm": 2210.846435546875, "learning_rate": 4.8284029189035094e-05, "loss": 97.6787, "step": 51690 }, { "epoch": 0.20887454195065389, "grad_norm": 0.0, "learning_rate": 4.828275802498051e-05, "loss": 71.8659, "step": 51700 }, { "epoch": 0.20891494321602153, "grad_norm": 935.220458984375, "learning_rate": 4.828148640701657e-05, "loss": 65.6444, "step": 51710 }, { "epoch": 0.20895534448138917, "grad_norm": 287.4408264160156, "learning_rate": 4.828021433516806e-05, "loss": 61.6178, "step": 51720 }, { "epoch": 0.20899574574675678, "grad_norm": 477.3240966796875, "learning_rate": 4.82789418094598e-05, "loss": 93.1937, "step": 51730 }, { "epoch": 0.20903614701212442, "grad_norm": 516.9016723632812, "learning_rate": 4.827766882991657e-05, "loss": 77.4756, "step": 51740 }, { "epoch": 0.20907654827749206, "grad_norm": 1626.0362548828125, "learning_rate": 4.827639539656321e-05, "loss": 100.3807, "step": 51750 }, { "epoch": 0.20911694954285967, "grad_norm": 530.7556762695312, "learning_rate": 4.827512150942454e-05, "loss": 96.0756, "step": 51760 }, { "epoch": 0.2091573508082273, "grad_norm": 865.0823974609375, "learning_rate": 4.827384716852539e-05, "loss": 50.7269, "step": 51770 }, { "epoch": 0.20919775207359495, "grad_norm": 1538.5042724609375, "learning_rate": 4.82725723738906e-05, "loss": 147.1135, "step": 51780 }, { "epoch": 0.20923815333896256, "grad_norm": 574.5728149414062, "learning_rate": 4.827129712554504e-05, "loss": 60.3232, "step": 51790 }, { "epoch": 0.2092785546043302, "grad_norm": 674.2813110351562, "learning_rate": 4.8270021423513554e-05, "loss": 58.9629, "step": 51800 }, { "epoch": 0.20931895586969784, "grad_norm": 937.812255859375, "learning_rate": 4.826874526782103e-05, "loss": 83.8374, "step": 51810 }, { "epoch": 0.20935935713506548, "grad_norm": 713.9767456054688, "learning_rate": 4.8267468658492335e-05, "loss": 63.4093, "step": 51820 }, { "epoch": 0.2093997584004331, "grad_norm": 1410.48583984375, "learning_rate": 4.826619159555236e-05, "loss": 77.9979, "step": 51830 }, { "epoch": 0.20944015966580073, "grad_norm": 845.0786743164062, "learning_rate": 4.826491407902599e-05, "loss": 86.6751, "step": 51840 }, { "epoch": 0.20948056093116837, "grad_norm": 2135.817626953125, "learning_rate": 4.8263636108938156e-05, "loss": 96.0103, "step": 51850 }, { "epoch": 0.209520962196536, "grad_norm": 1159.19775390625, "learning_rate": 4.8262357685313754e-05, "loss": 135.6379, "step": 51860 }, { "epoch": 0.20956136346190363, "grad_norm": 962.7410278320312, "learning_rate": 4.826107880817771e-05, "loss": 65.2435, "step": 51870 }, { "epoch": 0.20960176472727127, "grad_norm": 958.7470703125, "learning_rate": 4.8259799477554965e-05, "loss": 99.5246, "step": 51880 }, { "epoch": 0.20964216599263888, "grad_norm": 675.7080078125, "learning_rate": 4.825851969347045e-05, "loss": 61.0314, "step": 51890 }, { "epoch": 0.20968256725800652, "grad_norm": 577.6461791992188, "learning_rate": 4.8257239455949124e-05, "loss": 58.5346, "step": 51900 }, { "epoch": 0.20972296852337416, "grad_norm": 1043.0440673828125, "learning_rate": 4.825595876501593e-05, "loss": 90.7892, "step": 51910 }, { "epoch": 0.20976336978874177, "grad_norm": 597.583984375, "learning_rate": 4.825467762069585e-05, "loss": 89.8225, "step": 51920 }, { "epoch": 0.2098037710541094, "grad_norm": 588.63427734375, "learning_rate": 4.825339602301387e-05, "loss": 76.6868, "step": 51930 }, { "epoch": 0.20984417231947705, "grad_norm": 1137.5469970703125, "learning_rate": 4.825211397199495e-05, "loss": 77.4429, "step": 51940 }, { "epoch": 0.20988457358484466, "grad_norm": 1342.8388671875, "learning_rate": 4.82508314676641e-05, "loss": 87.7724, "step": 51950 }, { "epoch": 0.2099249748502123, "grad_norm": 549.8995971679688, "learning_rate": 4.824954851004633e-05, "loss": 62.8191, "step": 51960 }, { "epoch": 0.20996537611557994, "grad_norm": 347.0243835449219, "learning_rate": 4.8248265099166634e-05, "loss": 68.7288, "step": 51970 }, { "epoch": 0.21000577738094758, "grad_norm": 938.3527221679688, "learning_rate": 4.824698123505004e-05, "loss": 57.8288, "step": 51980 }, { "epoch": 0.2100461786463152, "grad_norm": 869.6561279296875, "learning_rate": 4.824569691772158e-05, "loss": 82.7138, "step": 51990 }, { "epoch": 0.21008657991168284, "grad_norm": 940.8391723632812, "learning_rate": 4.8244412147206284e-05, "loss": 87.2308, "step": 52000 }, { "epoch": 0.21012698117705048, "grad_norm": 1123.21826171875, "learning_rate": 4.8243126923529214e-05, "loss": 62.064, "step": 52010 }, { "epoch": 0.2101673824424181, "grad_norm": 919.0745849609375, "learning_rate": 4.824184124671542e-05, "loss": 80.0747, "step": 52020 }, { "epoch": 0.21020778370778573, "grad_norm": 1110.951904296875, "learning_rate": 4.8240555116789964e-05, "loss": 58.8613, "step": 52030 }, { "epoch": 0.21024818497315337, "grad_norm": 1179.7130126953125, "learning_rate": 4.823926853377791e-05, "loss": 85.1923, "step": 52040 }, { "epoch": 0.21028858623852098, "grad_norm": 451.0613708496094, "learning_rate": 4.8237981497704365e-05, "loss": 63.9672, "step": 52050 }, { "epoch": 0.21032898750388862, "grad_norm": 1503.4912109375, "learning_rate": 4.8236694008594405e-05, "loss": 85.0293, "step": 52060 }, { "epoch": 0.21036938876925626, "grad_norm": 691.0103149414062, "learning_rate": 4.823540606647313e-05, "loss": 83.7888, "step": 52070 }, { "epoch": 0.21040979003462387, "grad_norm": 393.8356628417969, "learning_rate": 4.823411767136565e-05, "loss": 71.635, "step": 52080 }, { "epoch": 0.2104501912999915, "grad_norm": 828.97119140625, "learning_rate": 4.8232828823297085e-05, "loss": 88.108, "step": 52090 }, { "epoch": 0.21049059256535915, "grad_norm": 1106.6741943359375, "learning_rate": 4.8231539522292564e-05, "loss": 95.024, "step": 52100 }, { "epoch": 0.21053099383072676, "grad_norm": 996.2150268554688, "learning_rate": 4.823024976837721e-05, "loss": 63.8951, "step": 52110 }, { "epoch": 0.2105713950960944, "grad_norm": 1018.7054443359375, "learning_rate": 4.822895956157619e-05, "loss": 83.2238, "step": 52120 }, { "epoch": 0.21061179636146204, "grad_norm": 648.1104736328125, "learning_rate": 4.8227668901914636e-05, "loss": 123.0206, "step": 52130 }, { "epoch": 0.21065219762682968, "grad_norm": 781.1571655273438, "learning_rate": 4.822637778941772e-05, "loss": 43.9732, "step": 52140 }, { "epoch": 0.2106925988921973, "grad_norm": 539.1663208007812, "learning_rate": 4.8225086224110615e-05, "loss": 77.1991, "step": 52150 }, { "epoch": 0.21073300015756494, "grad_norm": 654.8760986328125, "learning_rate": 4.822379420601849e-05, "loss": 95.2022, "step": 52160 }, { "epoch": 0.21077340142293258, "grad_norm": 666.3115234375, "learning_rate": 4.822250173516655e-05, "loss": 101.8235, "step": 52170 }, { "epoch": 0.2108138026883002, "grad_norm": 8625.4052734375, "learning_rate": 4.822120881157998e-05, "loss": 73.1473, "step": 52180 }, { "epoch": 0.21085420395366783, "grad_norm": 1041.82470703125, "learning_rate": 4.821991543528398e-05, "loss": 101.8729, "step": 52190 }, { "epoch": 0.21089460521903547, "grad_norm": 0.0, "learning_rate": 4.821862160630378e-05, "loss": 49.5271, "step": 52200 }, { "epoch": 0.21093500648440308, "grad_norm": 347.1128234863281, "learning_rate": 4.8217327324664595e-05, "loss": 109.6646, "step": 52210 }, { "epoch": 0.21097540774977072, "grad_norm": 786.9452514648438, "learning_rate": 4.821603259039167e-05, "loss": 69.5838, "step": 52220 }, { "epoch": 0.21101580901513836, "grad_norm": 308.2260437011719, "learning_rate": 4.821473740351023e-05, "loss": 41.9356, "step": 52230 }, { "epoch": 0.21105621028050597, "grad_norm": 730.7586669921875, "learning_rate": 4.821344176404554e-05, "loss": 64.5068, "step": 52240 }, { "epoch": 0.2110966115458736, "grad_norm": 724.8195190429688, "learning_rate": 4.8212145672022844e-05, "loss": 75.1824, "step": 52250 }, { "epoch": 0.21113701281124125, "grad_norm": 1213.788818359375, "learning_rate": 4.821084912746742e-05, "loss": 99.653, "step": 52260 }, { "epoch": 0.21117741407660887, "grad_norm": 886.8688354492188, "learning_rate": 4.820955213040454e-05, "loss": 85.2024, "step": 52270 }, { "epoch": 0.2112178153419765, "grad_norm": 746.9656372070312, "learning_rate": 4.8208254680859494e-05, "loss": 62.4096, "step": 52280 }, { "epoch": 0.21125821660734415, "grad_norm": 695.8228759765625, "learning_rate": 4.820695677885757e-05, "loss": 123.5433, "step": 52290 }, { "epoch": 0.21129861787271179, "grad_norm": 642.0867309570312, "learning_rate": 4.820565842442408e-05, "loss": 82.3376, "step": 52300 }, { "epoch": 0.2113390191380794, "grad_norm": 448.39202880859375, "learning_rate": 4.8204359617584336e-05, "loss": 58.77, "step": 52310 }, { "epoch": 0.21137942040344704, "grad_norm": 906.605712890625, "learning_rate": 4.820306035836365e-05, "loss": 79.2194, "step": 52320 }, { "epoch": 0.21141982166881468, "grad_norm": 478.28643798828125, "learning_rate": 4.8201760646787366e-05, "loss": 56.0789, "step": 52330 }, { "epoch": 0.2114602229341823, "grad_norm": 1141.816650390625, "learning_rate": 4.82004604828808e-05, "loss": 110.6768, "step": 52340 }, { "epoch": 0.21150062419954993, "grad_norm": 280.15692138671875, "learning_rate": 4.819915986666932e-05, "loss": 93.5777, "step": 52350 }, { "epoch": 0.21154102546491757, "grad_norm": 1224.0416259765625, "learning_rate": 4.819785879817827e-05, "loss": 104.5719, "step": 52360 }, { "epoch": 0.21158142673028518, "grad_norm": 960.8833618164062, "learning_rate": 4.819655727743302e-05, "loss": 88.1145, "step": 52370 }, { "epoch": 0.21162182799565282, "grad_norm": 717.3423461914062, "learning_rate": 4.8195255304458945e-05, "loss": 138.9197, "step": 52380 }, { "epoch": 0.21166222926102046, "grad_norm": 902.6900634765625, "learning_rate": 4.819395287928143e-05, "loss": 72.1466, "step": 52390 }, { "epoch": 0.21170263052638807, "grad_norm": 988.1195068359375, "learning_rate": 4.8192650001925855e-05, "loss": 63.8846, "step": 52400 }, { "epoch": 0.21174303179175571, "grad_norm": 1469.6943359375, "learning_rate": 4.8191346672417633e-05, "loss": 68.6236, "step": 52410 }, { "epoch": 0.21178343305712335, "grad_norm": 529.160888671875, "learning_rate": 4.819004289078217e-05, "loss": 71.5383, "step": 52420 }, { "epoch": 0.21182383432249097, "grad_norm": 1089.6658935546875, "learning_rate": 4.818873865704487e-05, "loss": 101.7536, "step": 52430 }, { "epoch": 0.2118642355878586, "grad_norm": 1817.579833984375, "learning_rate": 4.818743397123119e-05, "loss": 111.1048, "step": 52440 }, { "epoch": 0.21190463685322625, "grad_norm": 1221.6204833984375, "learning_rate": 4.818612883336654e-05, "loss": 59.6956, "step": 52450 }, { "epoch": 0.2119450381185939, "grad_norm": 572.4197998046875, "learning_rate": 4.8184823243476364e-05, "loss": 69.0064, "step": 52460 }, { "epoch": 0.2119854393839615, "grad_norm": 617.7356567382812, "learning_rate": 4.818351720158613e-05, "loss": 50.4712, "step": 52470 }, { "epoch": 0.21202584064932914, "grad_norm": 530.05224609375, "learning_rate": 4.8182210707721284e-05, "loss": 63.9792, "step": 52480 }, { "epoch": 0.21206624191469678, "grad_norm": 2283.126953125, "learning_rate": 4.8180903761907315e-05, "loss": 76.8574, "step": 52490 }, { "epoch": 0.2121066431800644, "grad_norm": 721.9110717773438, "learning_rate": 4.817959636416969e-05, "loss": 61.0024, "step": 52500 }, { "epoch": 0.21214704444543203, "grad_norm": 993.3989868164062, "learning_rate": 4.81782885145339e-05, "loss": 97.0186, "step": 52510 }, { "epoch": 0.21218744571079967, "grad_norm": 897.0438232421875, "learning_rate": 4.8176980213025434e-05, "loss": 83.1266, "step": 52520 }, { "epoch": 0.21222784697616728, "grad_norm": 677.8610229492188, "learning_rate": 4.817567145966982e-05, "loss": 46.0433, "step": 52530 }, { "epoch": 0.21226824824153492, "grad_norm": 893.5686645507812, "learning_rate": 4.817436225449255e-05, "loss": 66.1131, "step": 52540 }, { "epoch": 0.21230864950690256, "grad_norm": 879.1597290039062, "learning_rate": 4.817305259751916e-05, "loss": 56.4979, "step": 52550 }, { "epoch": 0.21234905077227018, "grad_norm": 1273.606689453125, "learning_rate": 4.817174248877518e-05, "loss": 68.6022, "step": 52560 }, { "epoch": 0.21238945203763782, "grad_norm": 958.6312255859375, "learning_rate": 4.8170431928286155e-05, "loss": 73.1606, "step": 52570 }, { "epoch": 0.21242985330300546, "grad_norm": 2562.6357421875, "learning_rate": 4.816912091607762e-05, "loss": 97.7864, "step": 52580 }, { "epoch": 0.21247025456837307, "grad_norm": 305.3066711425781, "learning_rate": 4.816780945217515e-05, "loss": 77.4546, "step": 52590 }, { "epoch": 0.2125106558337407, "grad_norm": 721.4779663085938, "learning_rate": 4.81664975366043e-05, "loss": 87.7981, "step": 52600 }, { "epoch": 0.21255105709910835, "grad_norm": 1225.9923095703125, "learning_rate": 4.816518516939067e-05, "loss": 82.7573, "step": 52610 }, { "epoch": 0.212591458364476, "grad_norm": 729.4354248046875, "learning_rate": 4.8163872350559816e-05, "loss": 66.5677, "step": 52620 }, { "epoch": 0.2126318596298436, "grad_norm": 1146.287109375, "learning_rate": 4.8162559080137346e-05, "loss": 108.5152, "step": 52630 }, { "epoch": 0.21267226089521124, "grad_norm": 1008.0455322265625, "learning_rate": 4.8161245358148866e-05, "loss": 91.5322, "step": 52640 }, { "epoch": 0.21271266216057888, "grad_norm": 193.93905639648438, "learning_rate": 4.815993118461998e-05, "loss": 40.234, "step": 52650 }, { "epoch": 0.2127530634259465, "grad_norm": 1779.3536376953125, "learning_rate": 4.815861655957632e-05, "loss": 82.0843, "step": 52660 }, { "epoch": 0.21279346469131413, "grad_norm": 761.0687255859375, "learning_rate": 4.81573014830435e-05, "loss": 98.4412, "step": 52670 }, { "epoch": 0.21283386595668177, "grad_norm": 639.0739135742188, "learning_rate": 4.815598595504717e-05, "loss": 103.2698, "step": 52680 }, { "epoch": 0.21287426722204938, "grad_norm": 1339.623291015625, "learning_rate": 4.8154669975612966e-05, "loss": 97.1118, "step": 52690 }, { "epoch": 0.21291466848741702, "grad_norm": 1258.3170166015625, "learning_rate": 4.8153353544766553e-05, "loss": 73.1002, "step": 52700 }, { "epoch": 0.21295506975278466, "grad_norm": 783.255859375, "learning_rate": 4.815203666253359e-05, "loss": 60.4868, "step": 52710 }, { "epoch": 0.21299547101815228, "grad_norm": 621.6766357421875, "learning_rate": 4.8150719328939755e-05, "loss": 53.9103, "step": 52720 }, { "epoch": 0.21303587228351992, "grad_norm": 899.034912109375, "learning_rate": 4.814940154401073e-05, "loss": 90.3724, "step": 52730 }, { "epoch": 0.21307627354888756, "grad_norm": 1244.728515625, "learning_rate": 4.81480833077722e-05, "loss": 57.8157, "step": 52740 }, { "epoch": 0.21311667481425517, "grad_norm": 809.1812744140625, "learning_rate": 4.814676462024988e-05, "loss": 110.7006, "step": 52750 }, { "epoch": 0.2131570760796228, "grad_norm": 3029.4775390625, "learning_rate": 4.814544548146945e-05, "loss": 63.5398, "step": 52760 }, { "epoch": 0.21319747734499045, "grad_norm": 392.19464111328125, "learning_rate": 4.814412589145665e-05, "loss": 84.5926, "step": 52770 }, { "epoch": 0.2132378786103581, "grad_norm": 883.9444580078125, "learning_rate": 4.814280585023721e-05, "loss": 73.6628, "step": 52780 }, { "epoch": 0.2132782798757257, "grad_norm": 1597.8800048828125, "learning_rate": 4.814148535783684e-05, "loss": 137.1192, "step": 52790 }, { "epoch": 0.21331868114109334, "grad_norm": 390.61407470703125, "learning_rate": 4.8140164414281306e-05, "loss": 58.5544, "step": 52800 }, { "epoch": 0.21335908240646098, "grad_norm": 1327.6656494140625, "learning_rate": 4.813884301959635e-05, "loss": 62.8842, "step": 52810 }, { "epoch": 0.2133994836718286, "grad_norm": 1165.1925048828125, "learning_rate": 4.813752117380774e-05, "loss": 73.2491, "step": 52820 }, { "epoch": 0.21343988493719623, "grad_norm": 974.8809204101562, "learning_rate": 4.813619887694124e-05, "loss": 86.8438, "step": 52830 }, { "epoch": 0.21348028620256387, "grad_norm": 658.647216796875, "learning_rate": 4.813487612902264e-05, "loss": 75.7994, "step": 52840 }, { "epoch": 0.21352068746793149, "grad_norm": 619.7402954101562, "learning_rate": 4.8133552930077716e-05, "loss": 47.0964, "step": 52850 }, { "epoch": 0.21356108873329913, "grad_norm": 865.9132690429688, "learning_rate": 4.813222928013226e-05, "loss": 47.7462, "step": 52860 }, { "epoch": 0.21360148999866677, "grad_norm": 983.5340576171875, "learning_rate": 4.813090517921209e-05, "loss": 48.6633, "step": 52870 }, { "epoch": 0.21364189126403438, "grad_norm": 607.2757568359375, "learning_rate": 4.812958062734302e-05, "loss": 93.2731, "step": 52880 }, { "epoch": 0.21368229252940202, "grad_norm": 732.9096069335938, "learning_rate": 4.812825562455086e-05, "loss": 83.7247, "step": 52890 }, { "epoch": 0.21372269379476966, "grad_norm": 493.807373046875, "learning_rate": 4.812693017086145e-05, "loss": 71.3714, "step": 52900 }, { "epoch": 0.21376309506013727, "grad_norm": 880.7445678710938, "learning_rate": 4.8125604266300636e-05, "loss": 69.891, "step": 52910 }, { "epoch": 0.2138034963255049, "grad_norm": 1000.0642700195312, "learning_rate": 4.812427791089426e-05, "loss": 106.0305, "step": 52920 }, { "epoch": 0.21384389759087255, "grad_norm": 649.3963012695312, "learning_rate": 4.812295110466817e-05, "loss": 71.4997, "step": 52930 }, { "epoch": 0.2138842988562402, "grad_norm": 539.8707885742188, "learning_rate": 4.812162384764826e-05, "loss": 81.3335, "step": 52940 }, { "epoch": 0.2139247001216078, "grad_norm": 822.1153564453125, "learning_rate": 4.8120296139860376e-05, "loss": 67.337, "step": 52950 }, { "epoch": 0.21396510138697544, "grad_norm": 2130.726806640625, "learning_rate": 4.811896798133042e-05, "loss": 111.1284, "step": 52960 }, { "epoch": 0.21400550265234308, "grad_norm": 467.0498046875, "learning_rate": 4.811763937208428e-05, "loss": 84.292, "step": 52970 }, { "epoch": 0.2140459039177107, "grad_norm": 1501.177978515625, "learning_rate": 4.811631031214786e-05, "loss": 63.4714, "step": 52980 }, { "epoch": 0.21408630518307833, "grad_norm": 628.075439453125, "learning_rate": 4.811498080154707e-05, "loss": 113.7706, "step": 52990 }, { "epoch": 0.21412670644844597, "grad_norm": 792.900146484375, "learning_rate": 4.8113650840307834e-05, "loss": 55.3442, "step": 53000 }, { "epoch": 0.2141671077138136, "grad_norm": 989.729736328125, "learning_rate": 4.811232042845607e-05, "loss": 88.5752, "step": 53010 }, { "epoch": 0.21420750897918123, "grad_norm": 1206.408447265625, "learning_rate": 4.8110989566017716e-05, "loss": 50.2796, "step": 53020 }, { "epoch": 0.21424791024454887, "grad_norm": 854.8396606445312, "learning_rate": 4.810965825301873e-05, "loss": 85.1104, "step": 53030 }, { "epoch": 0.21428831150991648, "grad_norm": 841.41357421875, "learning_rate": 4.810832648948505e-05, "loss": 64.3325, "step": 53040 }, { "epoch": 0.21432871277528412, "grad_norm": 1686.3604736328125, "learning_rate": 4.810699427544265e-05, "loss": 73.6608, "step": 53050 }, { "epoch": 0.21436911404065176, "grad_norm": 1173.083740234375, "learning_rate": 4.810566161091751e-05, "loss": 80.8808, "step": 53060 }, { "epoch": 0.21440951530601937, "grad_norm": 1014.2672119140625, "learning_rate": 4.810432849593559e-05, "loss": 109.5157, "step": 53070 }, { "epoch": 0.214449916571387, "grad_norm": 848.516357421875, "learning_rate": 4.810299493052289e-05, "loss": 86.9813, "step": 53080 }, { "epoch": 0.21449031783675465, "grad_norm": 2046.2830810546875, "learning_rate": 4.810166091470542e-05, "loss": 71.7423, "step": 53090 }, { "epoch": 0.2145307191021223, "grad_norm": 856.9232177734375, "learning_rate": 4.810032644850917e-05, "loss": 50.4441, "step": 53100 }, { "epoch": 0.2145711203674899, "grad_norm": 868.9803466796875, "learning_rate": 4.809899153196017e-05, "loss": 115.6349, "step": 53110 }, { "epoch": 0.21461152163285754, "grad_norm": 0.0, "learning_rate": 4.809765616508443e-05, "loss": 59.3985, "step": 53120 }, { "epoch": 0.21465192289822518, "grad_norm": 2540.765625, "learning_rate": 4.8096320347908e-05, "loss": 75.7832, "step": 53130 }, { "epoch": 0.2146923241635928, "grad_norm": 927.6019897460938, "learning_rate": 4.8094984080456904e-05, "loss": 65.1414, "step": 53140 }, { "epoch": 0.21473272542896044, "grad_norm": 863.6876831054688, "learning_rate": 4.8093647362757206e-05, "loss": 163.4252, "step": 53150 }, { "epoch": 0.21477312669432808, "grad_norm": 542.0109252929688, "learning_rate": 4.809231019483497e-05, "loss": 81.4207, "step": 53160 }, { "epoch": 0.2148135279596957, "grad_norm": 738.8974609375, "learning_rate": 4.809097257671625e-05, "loss": 71.7718, "step": 53170 }, { "epoch": 0.21485392922506333, "grad_norm": 1300.409423828125, "learning_rate": 4.808963450842713e-05, "loss": 63.6445, "step": 53180 }, { "epoch": 0.21489433049043097, "grad_norm": 710.06103515625, "learning_rate": 4.80882959899937e-05, "loss": 75.5958, "step": 53190 }, { "epoch": 0.21493473175579858, "grad_norm": 1211.570556640625, "learning_rate": 4.808695702144206e-05, "loss": 96.9039, "step": 53200 }, { "epoch": 0.21497513302116622, "grad_norm": 4932.537109375, "learning_rate": 4.808561760279831e-05, "loss": 62.6395, "step": 53210 }, { "epoch": 0.21501553428653386, "grad_norm": 1449.601806640625, "learning_rate": 4.8084277734088544e-05, "loss": 76.1311, "step": 53220 }, { "epoch": 0.21505593555190147, "grad_norm": 1062.4901123046875, "learning_rate": 4.808293741533891e-05, "loss": 94.2731, "step": 53230 }, { "epoch": 0.2150963368172691, "grad_norm": 766.841552734375, "learning_rate": 4.808159664657552e-05, "loss": 59.1123, "step": 53240 }, { "epoch": 0.21513673808263675, "grad_norm": 503.95635986328125, "learning_rate": 4.808025542782453e-05, "loss": 71.4319, "step": 53250 }, { "epoch": 0.2151771393480044, "grad_norm": 2950.0439453125, "learning_rate": 4.8078913759112066e-05, "loss": 90.5065, "step": 53260 }, { "epoch": 0.215217540613372, "grad_norm": 1027.7640380859375, "learning_rate": 4.80775716404643e-05, "loss": 56.2461, "step": 53270 }, { "epoch": 0.21525794187873964, "grad_norm": 507.6009521484375, "learning_rate": 4.8076229071907397e-05, "loss": 66.3295, "step": 53280 }, { "epoch": 0.21529834314410728, "grad_norm": 665.6806640625, "learning_rate": 4.807488605346753e-05, "loss": 80.3044, "step": 53290 }, { "epoch": 0.2153387444094749, "grad_norm": 989.35791015625, "learning_rate": 4.8073542585170877e-05, "loss": 68.0304, "step": 53300 }, { "epoch": 0.21537914567484254, "grad_norm": 6701.48046875, "learning_rate": 4.8072198667043635e-05, "loss": 130.3542, "step": 53310 }, { "epoch": 0.21541954694021018, "grad_norm": 626.8556518554688, "learning_rate": 4.8070854299111994e-05, "loss": 69.6508, "step": 53320 }, { "epoch": 0.2154599482055778, "grad_norm": 889.5938720703125, "learning_rate": 4.806950948140217e-05, "loss": 94.4536, "step": 53330 }, { "epoch": 0.21550034947094543, "grad_norm": 470.2935485839844, "learning_rate": 4.8068164213940393e-05, "loss": 65.35, "step": 53340 }, { "epoch": 0.21554075073631307, "grad_norm": 1064.505859375, "learning_rate": 4.8066818496752875e-05, "loss": 88.5015, "step": 53350 }, { "epoch": 0.21558115200168068, "grad_norm": 673.2662963867188, "learning_rate": 4.8065472329865854e-05, "loss": 63.5901, "step": 53360 }, { "epoch": 0.21562155326704832, "grad_norm": 880.4266967773438, "learning_rate": 4.806412571330557e-05, "loss": 63.0849, "step": 53370 }, { "epoch": 0.21566195453241596, "grad_norm": 761.7822875976562, "learning_rate": 4.8062778647098284e-05, "loss": 78.9925, "step": 53380 }, { "epoch": 0.21570235579778357, "grad_norm": 974.259521484375, "learning_rate": 4.806143113127025e-05, "loss": 93.895, "step": 53390 }, { "epoch": 0.2157427570631512, "grad_norm": 555.7957153320312, "learning_rate": 4.8060083165847754e-05, "loss": 74.5153, "step": 53400 }, { "epoch": 0.21578315832851885, "grad_norm": 925.432373046875, "learning_rate": 4.805873475085706e-05, "loss": 119.6515, "step": 53410 }, { "epoch": 0.2158235595938865, "grad_norm": 817.2948608398438, "learning_rate": 4.805738588632446e-05, "loss": 90.0423, "step": 53420 }, { "epoch": 0.2158639608592541, "grad_norm": 921.9326782226562, "learning_rate": 4.805603657227625e-05, "loss": 106.7092, "step": 53430 }, { "epoch": 0.21590436212462175, "grad_norm": 1191.692626953125, "learning_rate": 4.805468680873874e-05, "loss": 57.5349, "step": 53440 }, { "epoch": 0.21594476338998939, "grad_norm": 327.62908935546875, "learning_rate": 4.8053336595738236e-05, "loss": 84.3568, "step": 53450 }, { "epoch": 0.215985164655357, "grad_norm": 752.6683349609375, "learning_rate": 4.805198593330107e-05, "loss": 63.1536, "step": 53460 }, { "epoch": 0.21602556592072464, "grad_norm": 711.6024780273438, "learning_rate": 4.8050634821453565e-05, "loss": 62.8601, "step": 53470 }, { "epoch": 0.21606596718609228, "grad_norm": 466.1304931640625, "learning_rate": 4.8049283260222075e-05, "loss": 77.9144, "step": 53480 }, { "epoch": 0.2161063684514599, "grad_norm": 1117.0677490234375, "learning_rate": 4.804793124963294e-05, "loss": 76.5366, "step": 53490 }, { "epoch": 0.21614676971682753, "grad_norm": 637.1599731445312, "learning_rate": 4.8046578789712515e-05, "loss": 67.9231, "step": 53500 }, { "epoch": 0.21618717098219517, "grad_norm": 568.0106811523438, "learning_rate": 4.804522588048718e-05, "loss": 75.5774, "step": 53510 }, { "epoch": 0.21622757224756278, "grad_norm": 432.543212890625, "learning_rate": 4.8043872521983294e-05, "loss": 59.4265, "step": 53520 }, { "epoch": 0.21626797351293042, "grad_norm": 454.713134765625, "learning_rate": 4.804251871422725e-05, "loss": 96.0723, "step": 53530 }, { "epoch": 0.21630837477829806, "grad_norm": 1082.2203369140625, "learning_rate": 4.804116445724543e-05, "loss": 79.7681, "step": 53540 }, { "epoch": 0.21634877604366567, "grad_norm": 621.678466796875, "learning_rate": 4.803980975106427e-05, "loss": 85.7473, "step": 53550 }, { "epoch": 0.21638917730903331, "grad_norm": 890.781005859375, "learning_rate": 4.803845459571014e-05, "loss": 86.9005, "step": 53560 }, { "epoch": 0.21642957857440095, "grad_norm": 649.1585083007812, "learning_rate": 4.8037098991209484e-05, "loss": 61.2427, "step": 53570 }, { "epoch": 0.2164699798397686, "grad_norm": 850.4962768554688, "learning_rate": 4.8035742937588724e-05, "loss": 59.2555, "step": 53580 }, { "epoch": 0.2165103811051362, "grad_norm": 787.1389770507812, "learning_rate": 4.803438643487429e-05, "loss": 82.7354, "step": 53590 }, { "epoch": 0.21655078237050385, "grad_norm": 743.615234375, "learning_rate": 4.803302948309264e-05, "loss": 72.4777, "step": 53600 }, { "epoch": 0.2165911836358715, "grad_norm": 1553.5562744140625, "learning_rate": 4.8031672082270216e-05, "loss": 85.1181, "step": 53610 }, { "epoch": 0.2166315849012391, "grad_norm": 525.2081298828125, "learning_rate": 4.803031423243349e-05, "loss": 65.5073, "step": 53620 }, { "epoch": 0.21667198616660674, "grad_norm": 718.9132080078125, "learning_rate": 4.802895593360893e-05, "loss": 169.8341, "step": 53630 }, { "epoch": 0.21671238743197438, "grad_norm": 663.7149658203125, "learning_rate": 4.8027597185823016e-05, "loss": 54.949, "step": 53640 }, { "epoch": 0.216752788697342, "grad_norm": 558.2113037109375, "learning_rate": 4.802623798910224e-05, "loss": 72.456, "step": 53650 }, { "epoch": 0.21679318996270963, "grad_norm": 3290.45068359375, "learning_rate": 4.802487834347311e-05, "loss": 80.3636, "step": 53660 }, { "epoch": 0.21683359122807727, "grad_norm": 601.9519653320312, "learning_rate": 4.802351824896211e-05, "loss": 82.0123, "step": 53670 }, { "epoch": 0.21687399249344488, "grad_norm": 0.0, "learning_rate": 4.802215770559577e-05, "loss": 58.6228, "step": 53680 }, { "epoch": 0.21691439375881252, "grad_norm": 610.6953125, "learning_rate": 4.802079671340062e-05, "loss": 60.981, "step": 53690 }, { "epoch": 0.21695479502418016, "grad_norm": 774.2489624023438, "learning_rate": 4.801943527240318e-05, "loss": 94.8428, "step": 53700 }, { "epoch": 0.21699519628954778, "grad_norm": 1123.4874267578125, "learning_rate": 4.801807338263e-05, "loss": 76.9702, "step": 53710 }, { "epoch": 0.21703559755491542, "grad_norm": 1223.6903076171875, "learning_rate": 4.801671104410763e-05, "loss": 101.9046, "step": 53720 }, { "epoch": 0.21707599882028306, "grad_norm": 1136.135009765625, "learning_rate": 4.801534825686263e-05, "loss": 98.1998, "step": 53730 }, { "epoch": 0.2171164000856507, "grad_norm": 1007.6103515625, "learning_rate": 4.801398502092156e-05, "loss": 79.4994, "step": 53740 }, { "epoch": 0.2171568013510183, "grad_norm": 1269.972900390625, "learning_rate": 4.8012621336311016e-05, "loss": 63.9924, "step": 53750 }, { "epoch": 0.21719720261638595, "grad_norm": 643.7221069335938, "learning_rate": 4.8011257203057556e-05, "loss": 55.9485, "step": 53760 }, { "epoch": 0.2172376038817536, "grad_norm": 1297.72412109375, "learning_rate": 4.80098926211878e-05, "loss": 92.5376, "step": 53770 }, { "epoch": 0.2172780051471212, "grad_norm": 1075.8739013671875, "learning_rate": 4.800852759072833e-05, "loss": 59.2101, "step": 53780 }, { "epoch": 0.21731840641248884, "grad_norm": 1902.2833251953125, "learning_rate": 4.800716211170578e-05, "loss": 95.2259, "step": 53790 }, { "epoch": 0.21735880767785648, "grad_norm": 1468.7894287109375, "learning_rate": 4.800579618414676e-05, "loss": 94.4642, "step": 53800 }, { "epoch": 0.2173992089432241, "grad_norm": 640.5420532226562, "learning_rate": 4.80044298080779e-05, "loss": 86.4085, "step": 53810 }, { "epoch": 0.21743961020859173, "grad_norm": 872.8704833984375, "learning_rate": 4.800306298352583e-05, "loss": 64.3737, "step": 53820 }, { "epoch": 0.21748001147395937, "grad_norm": 586.101806640625, "learning_rate": 4.800169571051721e-05, "loss": 89.3042, "step": 53830 }, { "epoch": 0.21752041273932698, "grad_norm": 966.0181884765625, "learning_rate": 4.800032798907869e-05, "loss": 90.1948, "step": 53840 }, { "epoch": 0.21756081400469462, "grad_norm": 795.524658203125, "learning_rate": 4.799895981923693e-05, "loss": 110.5656, "step": 53850 }, { "epoch": 0.21760121527006226, "grad_norm": 496.3986511230469, "learning_rate": 4.799759120101861e-05, "loss": 51.413, "step": 53860 }, { "epoch": 0.21764161653542988, "grad_norm": 596.7861328125, "learning_rate": 4.799622213445041e-05, "loss": 44.7856, "step": 53870 }, { "epoch": 0.21768201780079752, "grad_norm": 1061.037353515625, "learning_rate": 4.7994852619559016e-05, "loss": 93.6382, "step": 53880 }, { "epoch": 0.21772241906616516, "grad_norm": 386.0953063964844, "learning_rate": 4.7993482656371135e-05, "loss": 70.8924, "step": 53890 }, { "epoch": 0.2177628203315328, "grad_norm": 1275.1336669921875, "learning_rate": 4.799211224491348e-05, "loss": 77.7109, "step": 53900 }, { "epoch": 0.2178032215969004, "grad_norm": 534.1307373046875, "learning_rate": 4.799074138521274e-05, "loss": 47.0972, "step": 53910 }, { "epoch": 0.21784362286226805, "grad_norm": 915.685791015625, "learning_rate": 4.798937007729568e-05, "loss": 67.5173, "step": 53920 }, { "epoch": 0.2178840241276357, "grad_norm": 965.9025268554688, "learning_rate": 4.7987998321189e-05, "loss": 60.5081, "step": 53930 }, { "epoch": 0.2179244253930033, "grad_norm": 2893.699462890625, "learning_rate": 4.798662611691947e-05, "loss": 109.9214, "step": 53940 }, { "epoch": 0.21796482665837094, "grad_norm": 468.1808776855469, "learning_rate": 4.7985253464513825e-05, "loss": 56.8471, "step": 53950 }, { "epoch": 0.21800522792373858, "grad_norm": 547.9169311523438, "learning_rate": 4.798388036399883e-05, "loss": 57.3491, "step": 53960 }, { "epoch": 0.2180456291891062, "grad_norm": 482.74652099609375, "learning_rate": 4.7982506815401254e-05, "loss": 69.7557, "step": 53970 }, { "epoch": 0.21808603045447383, "grad_norm": 1082.1453857421875, "learning_rate": 4.7981132818747876e-05, "loss": 96.0472, "step": 53980 }, { "epoch": 0.21812643171984147, "grad_norm": 1212.4945068359375, "learning_rate": 4.797975837406547e-05, "loss": 63.2927, "step": 53990 }, { "epoch": 0.21816683298520909, "grad_norm": 921.6978149414062, "learning_rate": 4.797838348138086e-05, "loss": 57.7244, "step": 54000 }, { "epoch": 0.21820723425057673, "grad_norm": 873.8997802734375, "learning_rate": 4.797700814072083e-05, "loss": 66.1426, "step": 54010 }, { "epoch": 0.21824763551594437, "grad_norm": 760.1358032226562, "learning_rate": 4.7975632352112195e-05, "loss": 77.4176, "step": 54020 }, { "epoch": 0.21828803678131198, "grad_norm": 1324.942626953125, "learning_rate": 4.7974256115581785e-05, "loss": 69.078, "step": 54030 }, { "epoch": 0.21832843804667962, "grad_norm": 1516.5565185546875, "learning_rate": 4.797287943115641e-05, "loss": 64.1911, "step": 54040 }, { "epoch": 0.21836883931204726, "grad_norm": 1137.977294921875, "learning_rate": 4.7971502298862936e-05, "loss": 102.8754, "step": 54050 }, { "epoch": 0.2184092405774149, "grad_norm": 813.7706909179688, "learning_rate": 4.7970124718728193e-05, "loss": 82.1068, "step": 54060 }, { "epoch": 0.2184496418427825, "grad_norm": 635.2130737304688, "learning_rate": 4.7968746690779044e-05, "loss": 58.6373, "step": 54070 }, { "epoch": 0.21849004310815015, "grad_norm": 642.5101928710938, "learning_rate": 4.796736821504235e-05, "loss": 74.7753, "step": 54080 }, { "epoch": 0.2185304443735178, "grad_norm": 849.4306030273438, "learning_rate": 4.7965989291545e-05, "loss": 91.7941, "step": 54090 }, { "epoch": 0.2185708456388854, "grad_norm": 740.3421630859375, "learning_rate": 4.796460992031385e-05, "loss": 72.5141, "step": 54100 }, { "epoch": 0.21861124690425304, "grad_norm": 606.4213256835938, "learning_rate": 4.7963230101375814e-05, "loss": 90.6276, "step": 54110 }, { "epoch": 0.21865164816962068, "grad_norm": 534.52197265625, "learning_rate": 4.7961849834757786e-05, "loss": 141.8116, "step": 54120 }, { "epoch": 0.2186920494349883, "grad_norm": 2459.29443359375, "learning_rate": 4.7960469120486674e-05, "loss": 104.6196, "step": 54130 }, { "epoch": 0.21873245070035593, "grad_norm": 804.312744140625, "learning_rate": 4.7959087958589386e-05, "loss": 71.0338, "step": 54140 }, { "epoch": 0.21877285196572357, "grad_norm": 884.2997436523438, "learning_rate": 4.7957706349092865e-05, "loss": 80.0751, "step": 54150 }, { "epoch": 0.2188132532310912, "grad_norm": 1428.2335205078125, "learning_rate": 4.795632429202405e-05, "loss": 73.554, "step": 54160 }, { "epoch": 0.21885365449645883, "grad_norm": 555.4033203125, "learning_rate": 4.795494178740986e-05, "loss": 52.6659, "step": 54170 }, { "epoch": 0.21889405576182647, "grad_norm": 479.893310546875, "learning_rate": 4.795355883527727e-05, "loss": 81.4304, "step": 54180 }, { "epoch": 0.21893445702719408, "grad_norm": 738.9581909179688, "learning_rate": 4.7952175435653226e-05, "loss": 83.4163, "step": 54190 }, { "epoch": 0.21897485829256172, "grad_norm": 599.0054321289062, "learning_rate": 4.79507915885647e-05, "loss": 62.3013, "step": 54200 }, { "epoch": 0.21901525955792936, "grad_norm": 672.7980346679688, "learning_rate": 4.794940729403869e-05, "loss": 73.0649, "step": 54210 }, { "epoch": 0.219055660823297, "grad_norm": 1021.8695068359375, "learning_rate": 4.794802255210217e-05, "loss": 74.2551, "step": 54220 }, { "epoch": 0.2190960620886646, "grad_norm": 2235.082763671875, "learning_rate": 4.794663736278212e-05, "loss": 100.521, "step": 54230 }, { "epoch": 0.21913646335403225, "grad_norm": 454.6265563964844, "learning_rate": 4.794525172610558e-05, "loss": 64.2982, "step": 54240 }, { "epoch": 0.2191768646193999, "grad_norm": 2061.634765625, "learning_rate": 4.794386564209953e-05, "loss": 80.8011, "step": 54250 }, { "epoch": 0.2192172658847675, "grad_norm": 1272.87451171875, "learning_rate": 4.7942479110791015e-05, "loss": 75.4505, "step": 54260 }, { "epoch": 0.21925766715013514, "grad_norm": 732.8099975585938, "learning_rate": 4.7941092132207056e-05, "loss": 76.4685, "step": 54270 }, { "epoch": 0.21929806841550278, "grad_norm": 1131.0069580078125, "learning_rate": 4.793970470637469e-05, "loss": 95.1893, "step": 54280 }, { "epoch": 0.2193384696808704, "grad_norm": 707.0789794921875, "learning_rate": 4.793831683332098e-05, "loss": 79.5281, "step": 54290 }, { "epoch": 0.21937887094623804, "grad_norm": 1276.7493896484375, "learning_rate": 4.7936928513072964e-05, "loss": 63.4697, "step": 54300 }, { "epoch": 0.21941927221160568, "grad_norm": 169.8773193359375, "learning_rate": 4.793553974565773e-05, "loss": 65.7283, "step": 54310 }, { "epoch": 0.2194596734769733, "grad_norm": 554.336181640625, "learning_rate": 4.793415053110233e-05, "loss": 56.5391, "step": 54320 }, { "epoch": 0.21950007474234093, "grad_norm": 848.8544921875, "learning_rate": 4.7932760869433865e-05, "loss": 66.7205, "step": 54330 }, { "epoch": 0.21954047600770857, "grad_norm": 1150.58349609375, "learning_rate": 4.793137076067942e-05, "loss": 70.9918, "step": 54340 }, { "epoch": 0.21958087727307618, "grad_norm": 1347.6571044921875, "learning_rate": 4.792998020486609e-05, "loss": 75.1297, "step": 54350 }, { "epoch": 0.21962127853844382, "grad_norm": 796.1422119140625, "learning_rate": 4.792858920202099e-05, "loss": 91.2722, "step": 54360 }, { "epoch": 0.21966167980381146, "grad_norm": 978.01025390625, "learning_rate": 4.792719775217124e-05, "loss": 79.3633, "step": 54370 }, { "epoch": 0.2197020810691791, "grad_norm": 676.4722900390625, "learning_rate": 4.7925805855343975e-05, "loss": 76.3162, "step": 54380 }, { "epoch": 0.2197424823345467, "grad_norm": 820.4541625976562, "learning_rate": 4.7924413511566315e-05, "loss": 43.4046, "step": 54390 }, { "epoch": 0.21978288359991435, "grad_norm": 598.8165893554688, "learning_rate": 4.7923020720865414e-05, "loss": 57.7346, "step": 54400 }, { "epoch": 0.219823284865282, "grad_norm": 2260.6591796875, "learning_rate": 4.792162748326841e-05, "loss": 97.9996, "step": 54410 }, { "epoch": 0.2198636861306496, "grad_norm": 1380.8861083984375, "learning_rate": 4.792023379880249e-05, "loss": 88.7846, "step": 54420 }, { "epoch": 0.21990408739601724, "grad_norm": 1240.204833984375, "learning_rate": 4.791883966749482e-05, "loss": 78.3506, "step": 54430 }, { "epoch": 0.21994448866138488, "grad_norm": 436.11798095703125, "learning_rate": 4.791744508937256e-05, "loss": 80.6678, "step": 54440 }, { "epoch": 0.2199848899267525, "grad_norm": 1142.2906494140625, "learning_rate": 4.791605006446291e-05, "loss": 63.2783, "step": 54450 }, { "epoch": 0.22002529119212014, "grad_norm": 815.8292846679688, "learning_rate": 4.7914654592793065e-05, "loss": 77.567, "step": 54460 }, { "epoch": 0.22006569245748778, "grad_norm": 920.00439453125, "learning_rate": 4.791325867439024e-05, "loss": 57.8406, "step": 54470 }, { "epoch": 0.2201060937228554, "grad_norm": 752.0264282226562, "learning_rate": 4.791186230928163e-05, "loss": 94.0675, "step": 54480 }, { "epoch": 0.22014649498822303, "grad_norm": 1893.0584716796875, "learning_rate": 4.7910465497494474e-05, "loss": 136.9929, "step": 54490 }, { "epoch": 0.22018689625359067, "grad_norm": 634.4692993164062, "learning_rate": 4.790906823905599e-05, "loss": 69.6565, "step": 54500 }, { "epoch": 0.22022729751895828, "grad_norm": 395.0136413574219, "learning_rate": 4.790767053399343e-05, "loss": 33.5299, "step": 54510 }, { "epoch": 0.22026769878432592, "grad_norm": 1267.1798095703125, "learning_rate": 4.790627238233405e-05, "loss": 52.7564, "step": 54520 }, { "epoch": 0.22030810004969356, "grad_norm": 1663.51611328125, "learning_rate": 4.790487378410509e-05, "loss": 97.8893, "step": 54530 }, { "epoch": 0.2203485013150612, "grad_norm": 279.7654113769531, "learning_rate": 4.790347473933382e-05, "loss": 49.2227, "step": 54540 }, { "epoch": 0.2203889025804288, "grad_norm": 1237.12255859375, "learning_rate": 4.7902075248047515e-05, "loss": 93.2389, "step": 54550 }, { "epoch": 0.22042930384579645, "grad_norm": 1792.2801513671875, "learning_rate": 4.7900675310273466e-05, "loss": 65.3986, "step": 54560 }, { "epoch": 0.2204697051111641, "grad_norm": 923.925048828125, "learning_rate": 4.7899274926038976e-05, "loss": 60.5506, "step": 54570 }, { "epoch": 0.2205101063765317, "grad_norm": 624.0264282226562, "learning_rate": 4.789787409537131e-05, "loss": 46.6642, "step": 54580 }, { "epoch": 0.22055050764189935, "grad_norm": 732.738037109375, "learning_rate": 4.789647281829781e-05, "loss": 118.6461, "step": 54590 }, { "epoch": 0.22059090890726699, "grad_norm": 654.3661499023438, "learning_rate": 4.789507109484579e-05, "loss": 108.6401, "step": 54600 }, { "epoch": 0.2206313101726346, "grad_norm": 628.9659423828125, "learning_rate": 4.7893668925042565e-05, "loss": 56.0631, "step": 54610 }, { "epoch": 0.22067171143800224, "grad_norm": 1165.976806640625, "learning_rate": 4.789226630891548e-05, "loss": 67.4176, "step": 54620 }, { "epoch": 0.22071211270336988, "grad_norm": 2275.9443359375, "learning_rate": 4.789086324649187e-05, "loss": 81.9733, "step": 54630 }, { "epoch": 0.2207525139687375, "grad_norm": 1239.550537109375, "learning_rate": 4.78894597377991e-05, "loss": 81.8693, "step": 54640 }, { "epoch": 0.22079291523410513, "grad_norm": 474.0306701660156, "learning_rate": 4.788805578286454e-05, "loss": 70.7014, "step": 54650 }, { "epoch": 0.22083331649947277, "grad_norm": 1004.7076416015625, "learning_rate": 4.788665138171553e-05, "loss": 79.816, "step": 54660 }, { "epoch": 0.22087371776484038, "grad_norm": 536.8333129882812, "learning_rate": 4.788524653437948e-05, "loss": 107.741, "step": 54670 }, { "epoch": 0.22091411903020802, "grad_norm": 1520.8243408203125, "learning_rate": 4.7883841240883766e-05, "loss": 67.9091, "step": 54680 }, { "epoch": 0.22095452029557566, "grad_norm": 978.5899047851562, "learning_rate": 4.7882435501255785e-05, "loss": 61.7931, "step": 54690 }, { "epoch": 0.22099492156094327, "grad_norm": 2291.6357421875, "learning_rate": 4.788102931552294e-05, "loss": 66.0947, "step": 54700 }, { "epoch": 0.22103532282631091, "grad_norm": 1223.5504150390625, "learning_rate": 4.787962268371266e-05, "loss": 96.6182, "step": 54710 }, { "epoch": 0.22107572409167855, "grad_norm": 753.4441528320312, "learning_rate": 4.7878215605852336e-05, "loss": 78.4969, "step": 54720 }, { "epoch": 0.2211161253570462, "grad_norm": 2007.7423095703125, "learning_rate": 4.7876808081969436e-05, "loss": 88.9049, "step": 54730 }, { "epoch": 0.2211565266224138, "grad_norm": 520.70361328125, "learning_rate": 4.787540011209138e-05, "loss": 55.8599, "step": 54740 }, { "epoch": 0.22119692788778145, "grad_norm": 509.8031005859375, "learning_rate": 4.7873991696245624e-05, "loss": 68.4668, "step": 54750 }, { "epoch": 0.2212373291531491, "grad_norm": 1531.3848876953125, "learning_rate": 4.787258283445962e-05, "loss": 109.6678, "step": 54760 }, { "epoch": 0.2212777304185167, "grad_norm": 1034.49755859375, "learning_rate": 4.7871173526760835e-05, "loss": 82.1401, "step": 54770 }, { "epoch": 0.22131813168388434, "grad_norm": 1044.2027587890625, "learning_rate": 4.7869763773176756e-05, "loss": 55.1931, "step": 54780 }, { "epoch": 0.22135853294925198, "grad_norm": 937.3899536132812, "learning_rate": 4.786835357373486e-05, "loss": 86.6882, "step": 54790 }, { "epoch": 0.2213989342146196, "grad_norm": 816.9603881835938, "learning_rate": 4.7866942928462625e-05, "loss": 68.9065, "step": 54800 }, { "epoch": 0.22143933547998723, "grad_norm": 1069.583984375, "learning_rate": 4.7865531837387576e-05, "loss": 79.5692, "step": 54810 }, { "epoch": 0.22147973674535487, "grad_norm": 859.8712768554688, "learning_rate": 4.7864120300537206e-05, "loss": 52.4455, "step": 54820 }, { "epoch": 0.22152013801072248, "grad_norm": 861.6782836914062, "learning_rate": 4.786270831793904e-05, "loss": 67.6935, "step": 54830 }, { "epoch": 0.22156053927609012, "grad_norm": 382.9268798828125, "learning_rate": 4.786129588962061e-05, "loss": 85.1282, "step": 54840 }, { "epoch": 0.22160094054145776, "grad_norm": 1050.475830078125, "learning_rate": 4.785988301560944e-05, "loss": 103.8309, "step": 54850 }, { "epoch": 0.22164134180682538, "grad_norm": 691.6054077148438, "learning_rate": 4.785846969593308e-05, "loss": 68.233, "step": 54860 }, { "epoch": 0.22168174307219302, "grad_norm": 2130.216064453125, "learning_rate": 4.785705593061909e-05, "loss": 68.9335, "step": 54870 }, { "epoch": 0.22172214433756066, "grad_norm": 804.7174072265625, "learning_rate": 4.7855641719695023e-05, "loss": 60.727, "step": 54880 }, { "epoch": 0.2217625456029283, "grad_norm": 634.82080078125, "learning_rate": 4.785422706318846e-05, "loss": 88.999, "step": 54890 }, { "epoch": 0.2218029468682959, "grad_norm": 597.0457763671875, "learning_rate": 4.785281196112698e-05, "loss": 64.0293, "step": 54900 }, { "epoch": 0.22184334813366355, "grad_norm": 1046.802001953125, "learning_rate": 4.785139641353815e-05, "loss": 70.1819, "step": 54910 }, { "epoch": 0.2218837493990312, "grad_norm": 753.67236328125, "learning_rate": 4.7849980420449594e-05, "loss": 66.1747, "step": 54920 }, { "epoch": 0.2219241506643988, "grad_norm": 1037.9305419921875, "learning_rate": 4.7848563981888893e-05, "loss": 108.7891, "step": 54930 }, { "epoch": 0.22196455192976644, "grad_norm": 1170.2239990234375, "learning_rate": 4.784714709788368e-05, "loss": 102.6976, "step": 54940 }, { "epoch": 0.22200495319513408, "grad_norm": 894.5665283203125, "learning_rate": 4.7845729768461576e-05, "loss": 54.2852, "step": 54950 }, { "epoch": 0.2220453544605017, "grad_norm": 584.92431640625, "learning_rate": 4.7844311993650205e-05, "loss": 75.9472, "step": 54960 }, { "epoch": 0.22208575572586933, "grad_norm": 1176.026123046875, "learning_rate": 4.784289377347721e-05, "loss": 88.7713, "step": 54970 }, { "epoch": 0.22212615699123697, "grad_norm": 834.7158203125, "learning_rate": 4.7841475107970244e-05, "loss": 86.6488, "step": 54980 }, { "epoch": 0.22216655825660458, "grad_norm": 1956.4654541015625, "learning_rate": 4.784005599715696e-05, "loss": 53.2461, "step": 54990 }, { "epoch": 0.22220695952197222, "grad_norm": 1810.177734375, "learning_rate": 4.783863644106502e-05, "loss": 141.2733, "step": 55000 }, { "epoch": 0.22224736078733986, "grad_norm": 641.3799438476562, "learning_rate": 4.783721643972211e-05, "loss": 62.0612, "step": 55010 }, { "epoch": 0.22228776205270748, "grad_norm": 1118.1512451171875, "learning_rate": 4.783579599315591e-05, "loss": 80.4928, "step": 55020 }, { "epoch": 0.22232816331807512, "grad_norm": 1295.068115234375, "learning_rate": 4.783437510139411e-05, "loss": 75.4781, "step": 55030 }, { "epoch": 0.22236856458344276, "grad_norm": 489.80078125, "learning_rate": 4.7832953764464405e-05, "loss": 67.9461, "step": 55040 }, { "epoch": 0.2224089658488104, "grad_norm": 432.6492614746094, "learning_rate": 4.783153198239452e-05, "loss": 66.1955, "step": 55050 }, { "epoch": 0.222449367114178, "grad_norm": 0.0, "learning_rate": 4.783010975521216e-05, "loss": 61.2556, "step": 55060 }, { "epoch": 0.22248976837954565, "grad_norm": 656.0430908203125, "learning_rate": 4.7828687082945054e-05, "loss": 50.6344, "step": 55070 }, { "epoch": 0.2225301696449133, "grad_norm": 799.9983520507812, "learning_rate": 4.782726396562094e-05, "loss": 63.7389, "step": 55080 }, { "epoch": 0.2225705709102809, "grad_norm": 848.791748046875, "learning_rate": 4.782584040326757e-05, "loss": 83.2984, "step": 55090 }, { "epoch": 0.22261097217564854, "grad_norm": 749.4911499023438, "learning_rate": 4.7824416395912686e-05, "loss": 84.4783, "step": 55100 }, { "epoch": 0.22265137344101618, "grad_norm": 546.4697265625, "learning_rate": 4.782299194358405e-05, "loss": 80.4849, "step": 55110 }, { "epoch": 0.2226917747063838, "grad_norm": 587.6947021484375, "learning_rate": 4.782156704630944e-05, "loss": 79.875, "step": 55120 }, { "epoch": 0.22273217597175143, "grad_norm": 1178.3653564453125, "learning_rate": 4.782014170411663e-05, "loss": 77.4586, "step": 55130 }, { "epoch": 0.22277257723711907, "grad_norm": 641.4794921875, "learning_rate": 4.781871591703341e-05, "loss": 70.9194, "step": 55140 }, { "epoch": 0.22281297850248669, "grad_norm": 450.2964172363281, "learning_rate": 4.7817289685087577e-05, "loss": 92.5157, "step": 55150 }, { "epoch": 0.22285337976785433, "grad_norm": 1155.9171142578125, "learning_rate": 4.781586300830693e-05, "loss": 125.689, "step": 55160 }, { "epoch": 0.22289378103322197, "grad_norm": 684.2454833984375, "learning_rate": 4.781443588671929e-05, "loss": 61.9113, "step": 55170 }, { "epoch": 0.22293418229858958, "grad_norm": 0.0, "learning_rate": 4.781300832035247e-05, "loss": 66.496, "step": 55180 }, { "epoch": 0.22297458356395722, "grad_norm": 546.8045043945312, "learning_rate": 4.7811580309234314e-05, "loss": 74.1194, "step": 55190 }, { "epoch": 0.22301498482932486, "grad_norm": 559.2355346679688, "learning_rate": 4.781015185339266e-05, "loss": 63.7682, "step": 55200 }, { "epoch": 0.2230553860946925, "grad_norm": 848.5513916015625, "learning_rate": 4.7808722952855344e-05, "loss": 59.4205, "step": 55210 }, { "epoch": 0.2230957873600601, "grad_norm": 513.669677734375, "learning_rate": 4.780729360765024e-05, "loss": 69.9661, "step": 55220 }, { "epoch": 0.22313618862542775, "grad_norm": 1898.05419921875, "learning_rate": 4.7805863817805196e-05, "loss": 60.6413, "step": 55230 }, { "epoch": 0.2231765898907954, "grad_norm": 304.1728820800781, "learning_rate": 4.78044335833481e-05, "loss": 65.169, "step": 55240 }, { "epoch": 0.223216991156163, "grad_norm": 1311.47265625, "learning_rate": 4.780300290430682e-05, "loss": 75.9994, "step": 55250 }, { "epoch": 0.22325739242153064, "grad_norm": 867.0023193359375, "learning_rate": 4.780157178070928e-05, "loss": 70.7438, "step": 55260 }, { "epoch": 0.22329779368689828, "grad_norm": 749.3482055664062, "learning_rate": 4.780014021258334e-05, "loss": 87.707, "step": 55270 }, { "epoch": 0.2233381949522659, "grad_norm": 717.660888671875, "learning_rate": 4.779870819995694e-05, "loss": 90.9197, "step": 55280 }, { "epoch": 0.22337859621763353, "grad_norm": 677.76708984375, "learning_rate": 4.779727574285798e-05, "loss": 69.9488, "step": 55290 }, { "epoch": 0.22341899748300117, "grad_norm": 781.3521728515625, "learning_rate": 4.77958428413144e-05, "loss": 83.5576, "step": 55300 }, { "epoch": 0.2234593987483688, "grad_norm": 678.9473266601562, "learning_rate": 4.779440949535412e-05, "loss": 74.7966, "step": 55310 }, { "epoch": 0.22349980001373643, "grad_norm": 1800.738037109375, "learning_rate": 4.779297570500509e-05, "loss": 97.8668, "step": 55320 }, { "epoch": 0.22354020127910407, "grad_norm": 642.32568359375, "learning_rate": 4.779154147029527e-05, "loss": 38.1043, "step": 55330 }, { "epoch": 0.22358060254447168, "grad_norm": 955.9810180664062, "learning_rate": 4.7790106791252614e-05, "loss": 99.5178, "step": 55340 }, { "epoch": 0.22362100380983932, "grad_norm": 2772.03564453125, "learning_rate": 4.7788671667905096e-05, "loss": 103.198, "step": 55350 }, { "epoch": 0.22366140507520696, "grad_norm": 896.0523071289062, "learning_rate": 4.7787236100280685e-05, "loss": 104.9025, "step": 55360 }, { "epoch": 0.2237018063405746, "grad_norm": 657.1527709960938, "learning_rate": 4.7785800088407376e-05, "loss": 64.4315, "step": 55370 }, { "epoch": 0.2237422076059422, "grad_norm": 1311.0645751953125, "learning_rate": 4.7784363632313166e-05, "loss": 82.5705, "step": 55380 }, { "epoch": 0.22378260887130985, "grad_norm": 636.6400146484375, "learning_rate": 4.778292673202606e-05, "loss": 69.7723, "step": 55390 }, { "epoch": 0.2238230101366775, "grad_norm": 768.77392578125, "learning_rate": 4.778148938757406e-05, "loss": 88.6552, "step": 55400 }, { "epoch": 0.2238634114020451, "grad_norm": 494.320068359375, "learning_rate": 4.7780051598985196e-05, "loss": 78.5235, "step": 55410 }, { "epoch": 0.22390381266741274, "grad_norm": 525.7529907226562, "learning_rate": 4.7778613366287505e-05, "loss": 106.9901, "step": 55420 }, { "epoch": 0.22394421393278038, "grad_norm": 574.625732421875, "learning_rate": 4.7777174689509006e-05, "loss": 81.0395, "step": 55430 }, { "epoch": 0.223984615198148, "grad_norm": 599.45947265625, "learning_rate": 4.7775735568677775e-05, "loss": 68.9037, "step": 55440 }, { "epoch": 0.22402501646351564, "grad_norm": 653.6656494140625, "learning_rate": 4.777429600382185e-05, "loss": 79.6398, "step": 55450 }, { "epoch": 0.22406541772888328, "grad_norm": 372.3857116699219, "learning_rate": 4.777285599496929e-05, "loss": 47.1804, "step": 55460 }, { "epoch": 0.2241058189942509, "grad_norm": 444.17919921875, "learning_rate": 4.777141554214819e-05, "loss": 79.5894, "step": 55470 }, { "epoch": 0.22414622025961853, "grad_norm": 952.4342041015625, "learning_rate": 4.776997464538662e-05, "loss": 79.2442, "step": 55480 }, { "epoch": 0.22418662152498617, "grad_norm": 1239.525146484375, "learning_rate": 4.776853330471266e-05, "loss": 70.9746, "step": 55490 }, { "epoch": 0.22422702279035378, "grad_norm": 5761.12841796875, "learning_rate": 4.776709152015443e-05, "loss": 110.6402, "step": 55500 }, { "epoch": 0.22426742405572142, "grad_norm": 5974.0244140625, "learning_rate": 4.776564929174003e-05, "loss": 88.7858, "step": 55510 }, { "epoch": 0.22430782532108906, "grad_norm": 702.3343505859375, "learning_rate": 4.776420661949758e-05, "loss": 80.6855, "step": 55520 }, { "epoch": 0.2243482265864567, "grad_norm": 736.3425903320312, "learning_rate": 4.776276350345519e-05, "loss": 74.7216, "step": 55530 }, { "epoch": 0.2243886278518243, "grad_norm": 412.60272216796875, "learning_rate": 4.776131994364102e-05, "loss": 80.698, "step": 55540 }, { "epoch": 0.22442902911719195, "grad_norm": 723.0787353515625, "learning_rate": 4.775987594008319e-05, "loss": 106.2656, "step": 55550 }, { "epoch": 0.2244694303825596, "grad_norm": 598.0833129882812, "learning_rate": 4.775843149280986e-05, "loss": 60.9806, "step": 55560 }, { "epoch": 0.2245098316479272, "grad_norm": 1235.12060546875, "learning_rate": 4.775698660184919e-05, "loss": 80.7508, "step": 55570 }, { "epoch": 0.22455023291329484, "grad_norm": 874.450927734375, "learning_rate": 4.775554126722935e-05, "loss": 42.9898, "step": 55580 }, { "epoch": 0.22459063417866248, "grad_norm": 589.5454711914062, "learning_rate": 4.775409548897853e-05, "loss": 94.5234, "step": 55590 }, { "epoch": 0.2246310354440301, "grad_norm": 918.5469360351562, "learning_rate": 4.775264926712489e-05, "loss": 48.5788, "step": 55600 }, { "epoch": 0.22467143670939774, "grad_norm": 1047.6563720703125, "learning_rate": 4.775120260169665e-05, "loss": 88.8342, "step": 55610 }, { "epoch": 0.22471183797476538, "grad_norm": 932.34912109375, "learning_rate": 4.774975549272199e-05, "loss": 48.4354, "step": 55620 }, { "epoch": 0.224752239240133, "grad_norm": 614.5720825195312, "learning_rate": 4.774830794022915e-05, "loss": 93.6599, "step": 55630 }, { "epoch": 0.22479264050550063, "grad_norm": 503.9685363769531, "learning_rate": 4.7746859944246325e-05, "loss": 85.0896, "step": 55640 }, { "epoch": 0.22483304177086827, "grad_norm": 662.5595703125, "learning_rate": 4.774541150480175e-05, "loss": 103.3519, "step": 55650 }, { "epoch": 0.22487344303623588, "grad_norm": 982.870849609375, "learning_rate": 4.7743962621923674e-05, "loss": 82.5962, "step": 55660 }, { "epoch": 0.22491384430160352, "grad_norm": 1002.5994873046875, "learning_rate": 4.774251329564034e-05, "loss": 64.0428, "step": 55670 }, { "epoch": 0.22495424556697116, "grad_norm": 360.1722412109375, "learning_rate": 4.7741063525980004e-05, "loss": 75.0625, "step": 55680 }, { "epoch": 0.2249946468323388, "grad_norm": 1272.3038330078125, "learning_rate": 4.773961331297092e-05, "loss": 114.2109, "step": 55690 }, { "epoch": 0.2250350480977064, "grad_norm": 6723.4404296875, "learning_rate": 4.773816265664136e-05, "loss": 103.1604, "step": 55700 }, { "epoch": 0.22507544936307405, "grad_norm": 2476.1787109375, "learning_rate": 4.7736711557019617e-05, "loss": 101.4048, "step": 55710 }, { "epoch": 0.2251158506284417, "grad_norm": 1094.4051513671875, "learning_rate": 4.7735260014133986e-05, "loss": 80.1582, "step": 55720 }, { "epoch": 0.2251562518938093, "grad_norm": 990.44677734375, "learning_rate": 4.773380802801275e-05, "loss": 80.2596, "step": 55730 }, { "epoch": 0.22519665315917695, "grad_norm": 700.4454956054688, "learning_rate": 4.773235559868422e-05, "loss": 78.9232, "step": 55740 }, { "epoch": 0.22523705442454459, "grad_norm": 1065.6510009765625, "learning_rate": 4.773090272617672e-05, "loss": 85.6272, "step": 55750 }, { "epoch": 0.2252774556899122, "grad_norm": 810.5206909179688, "learning_rate": 4.772944941051856e-05, "loss": 83.4951, "step": 55760 }, { "epoch": 0.22531785695527984, "grad_norm": 508.2962341308594, "learning_rate": 4.772799565173809e-05, "loss": 63.031, "step": 55770 }, { "epoch": 0.22535825822064748, "grad_norm": 371.3773498535156, "learning_rate": 4.772654144986364e-05, "loss": 96.568, "step": 55780 }, { "epoch": 0.2253986594860151, "grad_norm": 492.9206237792969, "learning_rate": 4.772508680492356e-05, "loss": 76.7317, "step": 55790 }, { "epoch": 0.22543906075138273, "grad_norm": 368.9469909667969, "learning_rate": 4.772363171694622e-05, "loss": 62.3136, "step": 55800 }, { "epoch": 0.22547946201675037, "grad_norm": 445.32501220703125, "learning_rate": 4.7722176185959974e-05, "loss": 47.5343, "step": 55810 }, { "epoch": 0.22551986328211798, "grad_norm": 385.32098388671875, "learning_rate": 4.772072021199321e-05, "loss": 86.4635, "step": 55820 }, { "epoch": 0.22556026454748562, "grad_norm": 828.4990844726562, "learning_rate": 4.771926379507431e-05, "loss": 79.7598, "step": 55830 }, { "epoch": 0.22560066581285326, "grad_norm": 881.7483520507812, "learning_rate": 4.7717806935231665e-05, "loss": 54.1686, "step": 55840 }, { "epoch": 0.2256410670782209, "grad_norm": 882.0201416015625, "learning_rate": 4.7716349632493674e-05, "loss": 61.8202, "step": 55850 }, { "epoch": 0.22568146834358851, "grad_norm": 1207.105712890625, "learning_rate": 4.7714891886888756e-05, "loss": 78.0542, "step": 55860 }, { "epoch": 0.22572186960895615, "grad_norm": 1286.038330078125, "learning_rate": 4.771343369844532e-05, "loss": 84.5764, "step": 55870 }, { "epoch": 0.2257622708743238, "grad_norm": 518.7008056640625, "learning_rate": 4.771197506719181e-05, "loss": 68.4155, "step": 55880 }, { "epoch": 0.2258026721396914, "grad_norm": 1427.2728271484375, "learning_rate": 4.7710515993156645e-05, "loss": 89.9567, "step": 55890 }, { "epoch": 0.22584307340505905, "grad_norm": 964.0196533203125, "learning_rate": 4.770905647636828e-05, "loss": 68.5106, "step": 55900 }, { "epoch": 0.2258834746704267, "grad_norm": 452.55487060546875, "learning_rate": 4.770759651685517e-05, "loss": 56.08, "step": 55910 }, { "epoch": 0.2259238759357943, "grad_norm": 1233.5595703125, "learning_rate": 4.770613611464577e-05, "loss": 74.0022, "step": 55920 }, { "epoch": 0.22596427720116194, "grad_norm": 1455.181640625, "learning_rate": 4.7704675269768565e-05, "loss": 91.9318, "step": 55930 }, { "epoch": 0.22600467846652958, "grad_norm": 735.1796264648438, "learning_rate": 4.7703213982252016e-05, "loss": 85.3958, "step": 55940 }, { "epoch": 0.2260450797318972, "grad_norm": 670.2793579101562, "learning_rate": 4.770175225212463e-05, "loss": 60.0693, "step": 55950 }, { "epoch": 0.22608548099726483, "grad_norm": 820.1773071289062, "learning_rate": 4.7700290079414896e-05, "loss": 75.0173, "step": 55960 }, { "epoch": 0.22612588226263247, "grad_norm": 640.2984619140625, "learning_rate": 4.769882746415132e-05, "loss": 58.8079, "step": 55970 }, { "epoch": 0.22616628352800008, "grad_norm": 1904.8134765625, "learning_rate": 4.769736440636241e-05, "loss": 59.5396, "step": 55980 }, { "epoch": 0.22620668479336772, "grad_norm": 764.6589965820312, "learning_rate": 4.76959009060767e-05, "loss": 47.2034, "step": 55990 }, { "epoch": 0.22624708605873536, "grad_norm": 312.55859375, "learning_rate": 4.769443696332272e-05, "loss": 62.7167, "step": 56000 }, { "epoch": 0.226287487324103, "grad_norm": 560.5413818359375, "learning_rate": 4.7692972578129005e-05, "loss": 79.5518, "step": 56010 }, { "epoch": 0.22632788858947062, "grad_norm": 664.6395874023438, "learning_rate": 4.769150775052411e-05, "loss": 63.6301, "step": 56020 }, { "epoch": 0.22636828985483826, "grad_norm": 1466.82666015625, "learning_rate": 4.769004248053658e-05, "loss": 63.513, "step": 56030 }, { "epoch": 0.2264086911202059, "grad_norm": 994.0587768554688, "learning_rate": 4.7688576768194994e-05, "loss": 59.1224, "step": 56040 }, { "epoch": 0.2264490923855735, "grad_norm": 700.6870727539062, "learning_rate": 4.7687110613527926e-05, "loss": 85.2521, "step": 56050 }, { "epoch": 0.22648949365094115, "grad_norm": 1606.5682373046875, "learning_rate": 4.7685644016563956e-05, "loss": 97.3078, "step": 56060 }, { "epoch": 0.2265298949163088, "grad_norm": 559.5364379882812, "learning_rate": 4.7684176977331674e-05, "loss": 75.0282, "step": 56070 }, { "epoch": 0.2265702961816764, "grad_norm": 750.7931518554688, "learning_rate": 4.768270949585968e-05, "loss": 89.0941, "step": 56080 }, { "epoch": 0.22661069744704404, "grad_norm": 843.3683471679688, "learning_rate": 4.7681241572176596e-05, "loss": 76.073, "step": 56090 }, { "epoch": 0.22665109871241168, "grad_norm": 759.1117553710938, "learning_rate": 4.767977320631103e-05, "loss": 72.5775, "step": 56100 }, { "epoch": 0.2266914999777793, "grad_norm": 687.8123168945312, "learning_rate": 4.76783043982916e-05, "loss": 53.4036, "step": 56110 }, { "epoch": 0.22673190124314693, "grad_norm": 996.044921875, "learning_rate": 4.767683514814696e-05, "loss": 56.8859, "step": 56120 }, { "epoch": 0.22677230250851457, "grad_norm": 821.6911010742188, "learning_rate": 4.767536545590574e-05, "loss": 66.1622, "step": 56130 }, { "epoch": 0.22681270377388218, "grad_norm": 1311.89892578125, "learning_rate": 4.767389532159659e-05, "loss": 69.1287, "step": 56140 }, { "epoch": 0.22685310503924982, "grad_norm": 734.8349609375, "learning_rate": 4.7672424745248176e-05, "loss": 54.9413, "step": 56150 }, { "epoch": 0.22689350630461746, "grad_norm": 854.7682495117188, "learning_rate": 4.767095372688918e-05, "loss": 98.0047, "step": 56160 }, { "epoch": 0.2269339075699851, "grad_norm": 969.171630859375, "learning_rate": 4.7669482266548264e-05, "loss": 98.2503, "step": 56170 }, { "epoch": 0.22697430883535272, "grad_norm": 1126.5001220703125, "learning_rate": 4.7668010364254124e-05, "loss": 63.528, "step": 56180 }, { "epoch": 0.22701471010072036, "grad_norm": 1452.1922607421875, "learning_rate": 4.7666538020035445e-05, "loss": 68.9039, "step": 56190 }, { "epoch": 0.227055111366088, "grad_norm": 1715.3773193359375, "learning_rate": 4.7665065233920945e-05, "loss": 109.966, "step": 56200 }, { "epoch": 0.2270955126314556, "grad_norm": 631.9252319335938, "learning_rate": 4.766359200593933e-05, "loss": 49.2157, "step": 56210 }, { "epoch": 0.22713591389682325, "grad_norm": 1373.4630126953125, "learning_rate": 4.766211833611931e-05, "loss": 74.802, "step": 56220 }, { "epoch": 0.2271763151621909, "grad_norm": 2058.58740234375, "learning_rate": 4.766064422448964e-05, "loss": 75.4964, "step": 56230 }, { "epoch": 0.2272167164275585, "grad_norm": 2785.7197265625, "learning_rate": 4.765916967107903e-05, "loss": 77.155, "step": 56240 }, { "epoch": 0.22725711769292614, "grad_norm": 593.6171264648438, "learning_rate": 4.765769467591625e-05, "loss": 60.3693, "step": 56250 }, { "epoch": 0.22729751895829378, "grad_norm": 1533.2698974609375, "learning_rate": 4.7656219239030046e-05, "loss": 95.2811, "step": 56260 }, { "epoch": 0.2273379202236614, "grad_norm": 806.5111083984375, "learning_rate": 4.7654743360449186e-05, "loss": 65.6008, "step": 56270 }, { "epoch": 0.22737832148902903, "grad_norm": 1836.31787109375, "learning_rate": 4.7653267040202436e-05, "loss": 80.5326, "step": 56280 }, { "epoch": 0.22741872275439667, "grad_norm": 733.4277954101562, "learning_rate": 4.765179027831858e-05, "loss": 81.6788, "step": 56290 }, { "epoch": 0.22745912401976429, "grad_norm": 1102.4857177734375, "learning_rate": 4.7650313074826425e-05, "loss": 63.4363, "step": 56300 }, { "epoch": 0.22749952528513193, "grad_norm": 1184.0565185546875, "learning_rate": 4.764883542975475e-05, "loss": 113.9402, "step": 56310 }, { "epoch": 0.22753992655049957, "grad_norm": 901.2548828125, "learning_rate": 4.764735734313236e-05, "loss": 61.9935, "step": 56320 }, { "epoch": 0.2275803278158672, "grad_norm": 750.1239624023438, "learning_rate": 4.7645878814988075e-05, "loss": 82.1202, "step": 56330 }, { "epoch": 0.22762072908123482, "grad_norm": 1717.6053466796875, "learning_rate": 4.764439984535074e-05, "loss": 54.0498, "step": 56340 }, { "epoch": 0.22766113034660246, "grad_norm": 755.3112182617188, "learning_rate": 4.764292043424916e-05, "loss": 46.0645, "step": 56350 }, { "epoch": 0.2277015316119701, "grad_norm": 819.234375, "learning_rate": 4.764144058171219e-05, "loss": 68.3072, "step": 56360 }, { "epoch": 0.2277419328773377, "grad_norm": 1272.74462890625, "learning_rate": 4.763996028776868e-05, "loss": 81.0632, "step": 56370 }, { "epoch": 0.22778233414270535, "grad_norm": 970.59765625, "learning_rate": 4.763847955244749e-05, "loss": 45.2182, "step": 56380 }, { "epoch": 0.227822735408073, "grad_norm": 895.1033325195312, "learning_rate": 4.7636998375777486e-05, "loss": 78.1244, "step": 56390 }, { "epoch": 0.2278631366734406, "grad_norm": 1142.0018310546875, "learning_rate": 4.763551675778755e-05, "loss": 93.2416, "step": 56400 }, { "epoch": 0.22790353793880824, "grad_norm": 694.2943725585938, "learning_rate": 4.7634034698506545e-05, "loss": 66.8523, "step": 56410 }, { "epoch": 0.22794393920417588, "grad_norm": 649.4854736328125, "learning_rate": 4.76325521979634e-05, "loss": 94.2294, "step": 56420 }, { "epoch": 0.2279843404695435, "grad_norm": 1363.1820068359375, "learning_rate": 4.7631069256186986e-05, "loss": 68.8548, "step": 56430 }, { "epoch": 0.22802474173491113, "grad_norm": 766.2236328125, "learning_rate": 4.7629585873206226e-05, "loss": 98.7785, "step": 56440 }, { "epoch": 0.22806514300027877, "grad_norm": 1092.222900390625, "learning_rate": 4.7628102049050036e-05, "loss": 68.331, "step": 56450 }, { "epoch": 0.2281055442656464, "grad_norm": 745.677978515625, "learning_rate": 4.7626617783747364e-05, "loss": 86.9224, "step": 56460 }, { "epoch": 0.22814594553101403, "grad_norm": 848.4603881835938, "learning_rate": 4.762513307732711e-05, "loss": 97.7969, "step": 56470 }, { "epoch": 0.22818634679638167, "grad_norm": 924.7987670898438, "learning_rate": 4.762364792981825e-05, "loss": 68.205, "step": 56480 }, { "epoch": 0.2282267480617493, "grad_norm": 442.6836242675781, "learning_rate": 4.762216234124972e-05, "loss": 125.6712, "step": 56490 }, { "epoch": 0.22826714932711692, "grad_norm": 716.2772827148438, "learning_rate": 4.762067631165049e-05, "loss": 126.1072, "step": 56500 }, { "epoch": 0.22830755059248456, "grad_norm": 956.81005859375, "learning_rate": 4.761918984104953e-05, "loss": 51.0658, "step": 56510 }, { "epoch": 0.2283479518578522, "grad_norm": 379.62493896484375, "learning_rate": 4.761770292947582e-05, "loss": 97.7945, "step": 56520 }, { "epoch": 0.2283883531232198, "grad_norm": 1116.5791015625, "learning_rate": 4.761621557695834e-05, "loss": 77.2991, "step": 56530 }, { "epoch": 0.22842875438858745, "grad_norm": 544.430419921875, "learning_rate": 4.76147277835261e-05, "loss": 94.4459, "step": 56540 }, { "epoch": 0.2284691556539551, "grad_norm": 827.6028442382812, "learning_rate": 4.7613239549208106e-05, "loss": 53.5641, "step": 56550 }, { "epoch": 0.2285095569193227, "grad_norm": 1755.43505859375, "learning_rate": 4.7611750874033356e-05, "loss": 97.6019, "step": 56560 }, { "epoch": 0.22854995818469034, "grad_norm": 456.3388671875, "learning_rate": 4.7610261758030886e-05, "loss": 65.4111, "step": 56570 }, { "epoch": 0.22859035945005798, "grad_norm": 710.4873046875, "learning_rate": 4.760877220122971e-05, "loss": 70.0858, "step": 56580 }, { "epoch": 0.2286307607154256, "grad_norm": 717.0205688476562, "learning_rate": 4.76072822036589e-05, "loss": 80.4853, "step": 56590 }, { "epoch": 0.22867116198079324, "grad_norm": 813.551513671875, "learning_rate": 4.760579176534747e-05, "loss": 72.0974, "step": 56600 }, { "epoch": 0.22871156324616088, "grad_norm": 1653.39111328125, "learning_rate": 4.7604300886324496e-05, "loss": 37.6728, "step": 56610 }, { "epoch": 0.2287519645115285, "grad_norm": 1388.64501953125, "learning_rate": 4.760280956661903e-05, "loss": 102.1589, "step": 56620 }, { "epoch": 0.22879236577689613, "grad_norm": 1519.29931640625, "learning_rate": 4.760131780626017e-05, "loss": 107.9805, "step": 56630 }, { "epoch": 0.22883276704226377, "grad_norm": 967.7742919921875, "learning_rate": 4.759982560527698e-05, "loss": 86.072, "step": 56640 }, { "epoch": 0.2288731683076314, "grad_norm": 1597.58642578125, "learning_rate": 4.7598332963698545e-05, "loss": 95.1602, "step": 56650 }, { "epoch": 0.22891356957299902, "grad_norm": 616.4041137695312, "learning_rate": 4.7596839881553976e-05, "loss": 83.347, "step": 56660 }, { "epoch": 0.22895397083836666, "grad_norm": 728.1845703125, "learning_rate": 4.75953463588724e-05, "loss": 58.7279, "step": 56670 }, { "epoch": 0.2289943721037343, "grad_norm": 1369.0509033203125, "learning_rate": 4.759385239568289e-05, "loss": 104.2516, "step": 56680 }, { "epoch": 0.2290347733691019, "grad_norm": 445.80267333984375, "learning_rate": 4.75923579920146e-05, "loss": 69.9564, "step": 56690 }, { "epoch": 0.22907517463446955, "grad_norm": 1323.3516845703125, "learning_rate": 4.7590863147896666e-05, "loss": 57.2469, "step": 56700 }, { "epoch": 0.2291155758998372, "grad_norm": 1134.4810791015625, "learning_rate": 4.7589367863358225e-05, "loss": 74.4222, "step": 56710 }, { "epoch": 0.2291559771652048, "grad_norm": 1246.8629150390625, "learning_rate": 4.758787213842842e-05, "loss": 97.5686, "step": 56720 }, { "epoch": 0.22919637843057244, "grad_norm": 1165.90478515625, "learning_rate": 4.758637597313642e-05, "loss": 84.3428, "step": 56730 }, { "epoch": 0.22923677969594008, "grad_norm": 683.56689453125, "learning_rate": 4.7584879367511395e-05, "loss": 61.9244, "step": 56740 }, { "epoch": 0.2292771809613077, "grad_norm": 2286.635498046875, "learning_rate": 4.758338232158252e-05, "loss": 90.8525, "step": 56750 }, { "epoch": 0.22931758222667534, "grad_norm": 1139.4610595703125, "learning_rate": 4.758188483537898e-05, "loss": 93.7091, "step": 56760 }, { "epoch": 0.22935798349204298, "grad_norm": 517.6661376953125, "learning_rate": 4.758038690892997e-05, "loss": 62.2874, "step": 56770 }, { "epoch": 0.2293983847574106, "grad_norm": 657.8548583984375, "learning_rate": 4.7578888542264686e-05, "loss": 90.596, "step": 56780 }, { "epoch": 0.22943878602277823, "grad_norm": 1066.094482421875, "learning_rate": 4.757738973541236e-05, "loss": 54.5622, "step": 56790 }, { "epoch": 0.22947918728814587, "grad_norm": 1410.8668212890625, "learning_rate": 4.7575890488402185e-05, "loss": 44.4098, "step": 56800 }, { "epoch": 0.2295195885535135, "grad_norm": 1515.7193603515625, "learning_rate": 4.75743908012634e-05, "loss": 89.9203, "step": 56810 }, { "epoch": 0.22955998981888112, "grad_norm": 1256.6802978515625, "learning_rate": 4.757289067402525e-05, "loss": 65.3287, "step": 56820 }, { "epoch": 0.22960039108424876, "grad_norm": 520.7095947265625, "learning_rate": 4.757139010671697e-05, "loss": 67.0209, "step": 56830 }, { "epoch": 0.2296407923496164, "grad_norm": 1852.7618408203125, "learning_rate": 4.7569889099367824e-05, "loss": 90.3647, "step": 56840 }, { "epoch": 0.229681193614984, "grad_norm": 888.56787109375, "learning_rate": 4.7568387652007075e-05, "loss": 97.766, "step": 56850 }, { "epoch": 0.22972159488035165, "grad_norm": 515.2661743164062, "learning_rate": 4.756688576466398e-05, "loss": 52.9156, "step": 56860 }, { "epoch": 0.2297619961457193, "grad_norm": 779.8905639648438, "learning_rate": 4.756538343736784e-05, "loss": 85.2727, "step": 56870 }, { "epoch": 0.2298023974110869, "grad_norm": 808.3165893554688, "learning_rate": 4.756388067014792e-05, "loss": 89.8556, "step": 56880 }, { "epoch": 0.22984279867645455, "grad_norm": 1065.549072265625, "learning_rate": 4.7562377463033536e-05, "loss": 81.1974, "step": 56890 }, { "epoch": 0.22988319994182219, "grad_norm": 1067.1673583984375, "learning_rate": 4.7560873816053984e-05, "loss": 61.5579, "step": 56900 }, { "epoch": 0.2299236012071898, "grad_norm": 768.210205078125, "learning_rate": 4.755936972923859e-05, "loss": 75.4602, "step": 56910 }, { "epoch": 0.22996400247255744, "grad_norm": 333.94232177734375, "learning_rate": 4.7557865202616656e-05, "loss": 65.8191, "step": 56920 }, { "epoch": 0.23000440373792508, "grad_norm": 1091.112548828125, "learning_rate": 4.7556360236217534e-05, "loss": 112.1934, "step": 56930 }, { "epoch": 0.2300448050032927, "grad_norm": 1078.4996337890625, "learning_rate": 4.755485483007056e-05, "loss": 80.1494, "step": 56940 }, { "epoch": 0.23008520626866033, "grad_norm": 663.4862670898438, "learning_rate": 4.755334898420507e-05, "loss": 81.2759, "step": 56950 }, { "epoch": 0.23012560753402797, "grad_norm": 1670.8721923828125, "learning_rate": 4.7551842698650436e-05, "loss": 98.7399, "step": 56960 }, { "epoch": 0.2301660087993956, "grad_norm": 707.2112426757812, "learning_rate": 4.755033597343602e-05, "loss": 84.5847, "step": 56970 }, { "epoch": 0.23020641006476322, "grad_norm": 913.306884765625, "learning_rate": 4.7548828808591195e-05, "loss": 78.6086, "step": 56980 }, { "epoch": 0.23024681133013086, "grad_norm": 390.49578857421875, "learning_rate": 4.754732120414534e-05, "loss": 81.2955, "step": 56990 }, { "epoch": 0.2302872125954985, "grad_norm": 593.7713623046875, "learning_rate": 4.754581316012785e-05, "loss": 59.7465, "step": 57000 }, { "epoch": 0.23032761386086611, "grad_norm": 704.7691650390625, "learning_rate": 4.754430467656812e-05, "loss": 61.9324, "step": 57010 }, { "epoch": 0.23036801512623375, "grad_norm": 770.904296875, "learning_rate": 4.7542795753495574e-05, "loss": 68.9715, "step": 57020 }, { "epoch": 0.2304084163916014, "grad_norm": 630.1139526367188, "learning_rate": 4.754128639093961e-05, "loss": 82.8383, "step": 57030 }, { "epoch": 0.230448817656969, "grad_norm": 1761.17626953125, "learning_rate": 4.753977658892967e-05, "loss": 59.8425, "step": 57040 }, { "epoch": 0.23048921892233665, "grad_norm": 1344.968017578125, "learning_rate": 4.753826634749518e-05, "loss": 89.4451, "step": 57050 }, { "epoch": 0.2305296201877043, "grad_norm": 448.2859802246094, "learning_rate": 4.753675566666558e-05, "loss": 79.9184, "step": 57060 }, { "epoch": 0.2305700214530719, "grad_norm": 999.4277954101562, "learning_rate": 4.7535244546470325e-05, "loss": 112.2555, "step": 57070 }, { "epoch": 0.23061042271843954, "grad_norm": 1350.092041015625, "learning_rate": 4.753373298693888e-05, "loss": 84.6386, "step": 57080 }, { "epoch": 0.23065082398380718, "grad_norm": 318.74359130859375, "learning_rate": 4.753222098810071e-05, "loss": 48.7075, "step": 57090 }, { "epoch": 0.2306912252491748, "grad_norm": 1049.048095703125, "learning_rate": 4.7530708549985287e-05, "loss": 55.4899, "step": 57100 }, { "epoch": 0.23073162651454243, "grad_norm": 2994.60107421875, "learning_rate": 4.75291956726221e-05, "loss": 82.0188, "step": 57110 }, { "epoch": 0.23077202777991007, "grad_norm": 1358.6580810546875, "learning_rate": 4.752768235604065e-05, "loss": 84.3785, "step": 57120 }, { "epoch": 0.2308124290452777, "grad_norm": 309.6406555175781, "learning_rate": 4.7526168600270435e-05, "loss": 73.9946, "step": 57130 }, { "epoch": 0.23085283031064532, "grad_norm": 1002.0283203125, "learning_rate": 4.752465440534096e-05, "loss": 104.0093, "step": 57140 }, { "epoch": 0.23089323157601296, "grad_norm": 697.1533813476562, "learning_rate": 4.752313977128175e-05, "loss": 45.6771, "step": 57150 }, { "epoch": 0.2309336328413806, "grad_norm": 1255.6212158203125, "learning_rate": 4.752162469812234e-05, "loss": 77.5493, "step": 57160 }, { "epoch": 0.23097403410674822, "grad_norm": 5676.9970703125, "learning_rate": 4.752010918589226e-05, "loss": 64.4288, "step": 57170 }, { "epoch": 0.23101443537211586, "grad_norm": 564.4259033203125, "learning_rate": 4.7518593234621056e-05, "loss": 98.8771, "step": 57180 }, { "epoch": 0.2310548366374835, "grad_norm": 1186.0013427734375, "learning_rate": 4.7517076844338285e-05, "loss": 67.5268, "step": 57190 }, { "epoch": 0.2310952379028511, "grad_norm": 1269.7178955078125, "learning_rate": 4.7515560015073514e-05, "loss": 89.8433, "step": 57200 }, { "epoch": 0.23113563916821875, "grad_norm": 0.0, "learning_rate": 4.75140427468563e-05, "loss": 60.9059, "step": 57210 }, { "epoch": 0.2311760404335864, "grad_norm": 1260.9530029296875, "learning_rate": 4.751252503971624e-05, "loss": 68.3741, "step": 57220 }, { "epoch": 0.231216441698954, "grad_norm": 1468.294677734375, "learning_rate": 4.75110068936829e-05, "loss": 86.1376, "step": 57230 }, { "epoch": 0.23125684296432164, "grad_norm": 631.3265991210938, "learning_rate": 4.7509488308785905e-05, "loss": 92.6364, "step": 57240 }, { "epoch": 0.23129724422968928, "grad_norm": 391.2497863769531, "learning_rate": 4.7507969285054845e-05, "loss": 57.7668, "step": 57250 }, { "epoch": 0.2313376454950569, "grad_norm": 1009.0247192382812, "learning_rate": 4.750644982251933e-05, "loss": 60.969, "step": 57260 }, { "epoch": 0.23137804676042453, "grad_norm": 787.4730834960938, "learning_rate": 4.7504929921208984e-05, "loss": 105.2391, "step": 57270 }, { "epoch": 0.23141844802579217, "grad_norm": 649.8621215820312, "learning_rate": 4.750340958115346e-05, "loss": 62.9108, "step": 57280 }, { "epoch": 0.2314588492911598, "grad_norm": 1550.597900390625, "learning_rate": 4.7501888802382365e-05, "loss": 96.6346, "step": 57290 }, { "epoch": 0.23149925055652743, "grad_norm": 1971.5574951171875, "learning_rate": 4.750036758492537e-05, "loss": 85.7584, "step": 57300 }, { "epoch": 0.23153965182189506, "grad_norm": 386.6150817871094, "learning_rate": 4.749884592881212e-05, "loss": 109.9105, "step": 57310 }, { "epoch": 0.2315800530872627, "grad_norm": 1582.1597900390625, "learning_rate": 4.749732383407229e-05, "loss": 112.0731, "step": 57320 }, { "epoch": 0.23162045435263032, "grad_norm": 0.0, "learning_rate": 4.7495801300735554e-05, "loss": 52.8345, "step": 57330 }, { "epoch": 0.23166085561799796, "grad_norm": 489.4527893066406, "learning_rate": 4.7494278328831584e-05, "loss": 69.7233, "step": 57340 }, { "epoch": 0.2317012568833656, "grad_norm": 483.29364013671875, "learning_rate": 4.7492754918390074e-05, "loss": 89.4249, "step": 57350 }, { "epoch": 0.2317416581487332, "grad_norm": 2289.152099609375, "learning_rate": 4.749123106944073e-05, "loss": 75.3741, "step": 57360 }, { "epoch": 0.23178205941410085, "grad_norm": 730.8201904296875, "learning_rate": 4.748970678201326e-05, "loss": 76.5618, "step": 57370 }, { "epoch": 0.2318224606794685, "grad_norm": 604.216064453125, "learning_rate": 4.7488182056137374e-05, "loss": 64.0703, "step": 57380 }, { "epoch": 0.2318628619448361, "grad_norm": 850.9320068359375, "learning_rate": 4.74866568918428e-05, "loss": 82.7347, "step": 57390 }, { "epoch": 0.23190326321020374, "grad_norm": 948.7066650390625, "learning_rate": 4.7485131289159276e-05, "loss": 76.1124, "step": 57400 }, { "epoch": 0.23194366447557138, "grad_norm": 985.8638916015625, "learning_rate": 4.7483605248116544e-05, "loss": 90.0347, "step": 57410 }, { "epoch": 0.231984065740939, "grad_norm": 322.2492370605469, "learning_rate": 4.7482078768744345e-05, "loss": 97.9077, "step": 57420 }, { "epoch": 0.23202446700630663, "grad_norm": 1223.1875, "learning_rate": 4.7480551851072454e-05, "loss": 53.8952, "step": 57430 }, { "epoch": 0.23206486827167427, "grad_norm": 455.7870788574219, "learning_rate": 4.747902449513063e-05, "loss": 78.5205, "step": 57440 }, { "epoch": 0.23210526953704191, "grad_norm": 1016.0787963867188, "learning_rate": 4.747749670094864e-05, "loss": 86.3612, "step": 57450 }, { "epoch": 0.23214567080240953, "grad_norm": 1260.7244873046875, "learning_rate": 4.7475968468556295e-05, "loss": 84.6346, "step": 57460 }, { "epoch": 0.23218607206777717, "grad_norm": 437.6191711425781, "learning_rate": 4.7474439797983364e-05, "loss": 107.3411, "step": 57470 }, { "epoch": 0.2322264733331448, "grad_norm": 516.1616821289062, "learning_rate": 4.7472910689259655e-05, "loss": 71.8174, "step": 57480 }, { "epoch": 0.23226687459851242, "grad_norm": 662.67822265625, "learning_rate": 4.747138114241499e-05, "loss": 73.287, "step": 57490 }, { "epoch": 0.23230727586388006, "grad_norm": 1739.5025634765625, "learning_rate": 4.7469851157479177e-05, "loss": 92.4851, "step": 57500 }, { "epoch": 0.2323476771292477, "grad_norm": 682.4597778320312, "learning_rate": 4.746832073448205e-05, "loss": 88.4044, "step": 57510 }, { "epoch": 0.2323880783946153, "grad_norm": 797.5489501953125, "learning_rate": 4.7466789873453444e-05, "loss": 86.4066, "step": 57520 }, { "epoch": 0.23242847965998295, "grad_norm": 423.3626708984375, "learning_rate": 4.74652585744232e-05, "loss": 62.8509, "step": 57530 }, { "epoch": 0.2324688809253506, "grad_norm": 712.4830322265625, "learning_rate": 4.746372683742117e-05, "loss": 42.4057, "step": 57540 }, { "epoch": 0.2325092821907182, "grad_norm": 799.061767578125, "learning_rate": 4.746219466247722e-05, "loss": 52.0032, "step": 57550 }, { "epoch": 0.23254968345608584, "grad_norm": 945.8649291992188, "learning_rate": 4.746066204962123e-05, "loss": 88.8774, "step": 57560 }, { "epoch": 0.23259008472145348, "grad_norm": 803.7813110351562, "learning_rate": 4.745912899888306e-05, "loss": 93.842, "step": 57570 }, { "epoch": 0.2326304859868211, "grad_norm": 696.2904663085938, "learning_rate": 4.745759551029261e-05, "loss": 101.2722, "step": 57580 }, { "epoch": 0.23267088725218874, "grad_norm": 857.1948852539062, "learning_rate": 4.745606158387978e-05, "loss": 61.6568, "step": 57590 }, { "epoch": 0.23271128851755638, "grad_norm": 548.427734375, "learning_rate": 4.745452721967446e-05, "loss": 96.5383, "step": 57600 }, { "epoch": 0.23275168978292402, "grad_norm": 1423.155517578125, "learning_rate": 4.745299241770658e-05, "loss": 85.5254, "step": 57610 }, { "epoch": 0.23279209104829163, "grad_norm": 496.381591796875, "learning_rate": 4.745145717800605e-05, "loss": 67.1594, "step": 57620 }, { "epoch": 0.23283249231365927, "grad_norm": 629.1244506835938, "learning_rate": 4.74499215006028e-05, "loss": 64.9573, "step": 57630 }, { "epoch": 0.2328728935790269, "grad_norm": 903.4159545898438, "learning_rate": 4.744838538552677e-05, "loss": 76.1402, "step": 57640 }, { "epoch": 0.23291329484439452, "grad_norm": 504.8460693359375, "learning_rate": 4.744684883280792e-05, "loss": 82.0837, "step": 57650 }, { "epoch": 0.23295369610976216, "grad_norm": 923.25732421875, "learning_rate": 4.744531184247619e-05, "loss": 61.5699, "step": 57660 }, { "epoch": 0.2329940973751298, "grad_norm": 869.8147583007812, "learning_rate": 4.744377441456155e-05, "loss": 68.7058, "step": 57670 }, { "epoch": 0.2330344986404974, "grad_norm": 924.66845703125, "learning_rate": 4.744223654909397e-05, "loss": 109.6333, "step": 57680 }, { "epoch": 0.23307489990586505, "grad_norm": 1206.5703125, "learning_rate": 4.744069824610344e-05, "loss": 95.6685, "step": 57690 }, { "epoch": 0.2331153011712327, "grad_norm": 777.4462280273438, "learning_rate": 4.743915950561994e-05, "loss": 65.7932, "step": 57700 }, { "epoch": 0.2331557024366003, "grad_norm": 741.0814819335938, "learning_rate": 4.743762032767348e-05, "loss": 51.2344, "step": 57710 }, { "epoch": 0.23319610370196794, "grad_norm": 898.6174926757812, "learning_rate": 4.743608071229405e-05, "loss": 97.9571, "step": 57720 }, { "epoch": 0.23323650496733558, "grad_norm": 949.9400024414062, "learning_rate": 4.743454065951168e-05, "loss": 88.1819, "step": 57730 }, { "epoch": 0.2332769062327032, "grad_norm": 6401.75244140625, "learning_rate": 4.743300016935639e-05, "loss": 86.2424, "step": 57740 }, { "epoch": 0.23331730749807084, "grad_norm": 1315.59130859375, "learning_rate": 4.743145924185821e-05, "loss": 78.7651, "step": 57750 }, { "epoch": 0.23335770876343848, "grad_norm": 968.3670043945312, "learning_rate": 4.742991787704719e-05, "loss": 53.8725, "step": 57760 }, { "epoch": 0.2333981100288061, "grad_norm": 281.5350036621094, "learning_rate": 4.7428376074953365e-05, "loss": 88.1397, "step": 57770 }, { "epoch": 0.23343851129417373, "grad_norm": 1783.01318359375, "learning_rate": 4.7426833835606806e-05, "loss": 111.0655, "step": 57780 }, { "epoch": 0.23347891255954137, "grad_norm": 971.6173095703125, "learning_rate": 4.7425291159037575e-05, "loss": 71.0971, "step": 57790 }, { "epoch": 0.233519313824909, "grad_norm": 966.53662109375, "learning_rate": 4.742374804527575e-05, "loss": 85.1464, "step": 57800 }, { "epoch": 0.23355971509027662, "grad_norm": 1798.930419921875, "learning_rate": 4.742220449435141e-05, "loss": 84.1092, "step": 57810 }, { "epoch": 0.23360011635564426, "grad_norm": 1772.7415771484375, "learning_rate": 4.742066050629465e-05, "loss": 50.9059, "step": 57820 }, { "epoch": 0.2336405176210119, "grad_norm": 942.0059814453125, "learning_rate": 4.741911608113557e-05, "loss": 77.9877, "step": 57830 }, { "epoch": 0.2336809188863795, "grad_norm": 472.93890380859375, "learning_rate": 4.741757121890428e-05, "loss": 67.7341, "step": 57840 }, { "epoch": 0.23372132015174715, "grad_norm": 731.5870971679688, "learning_rate": 4.7416025919630904e-05, "loss": 96.6892, "step": 57850 }, { "epoch": 0.2337617214171148, "grad_norm": 973.33935546875, "learning_rate": 4.741448018334555e-05, "loss": 106.4843, "step": 57860 }, { "epoch": 0.2338021226824824, "grad_norm": 1150.518310546875, "learning_rate": 4.741293401007837e-05, "loss": 57.7009, "step": 57870 }, { "epoch": 0.23384252394785005, "grad_norm": 879.5131225585938, "learning_rate": 4.741138739985951e-05, "loss": 75.2222, "step": 57880 }, { "epoch": 0.23388292521321769, "grad_norm": 1559.7349853515625, "learning_rate": 4.7409840352719106e-05, "loss": 95.9505, "step": 57890 }, { "epoch": 0.2339233264785853, "grad_norm": 1342.956298828125, "learning_rate": 4.740829286868733e-05, "loss": 74.7077, "step": 57900 }, { "epoch": 0.23396372774395294, "grad_norm": 1618.971923828125, "learning_rate": 4.740674494779435e-05, "loss": 72.2942, "step": 57910 }, { "epoch": 0.23400412900932058, "grad_norm": 598.3280029296875, "learning_rate": 4.740519659007033e-05, "loss": 71.8578, "step": 57920 }, { "epoch": 0.2340445302746882, "grad_norm": 1528.6131591796875, "learning_rate": 4.7403647795545484e-05, "loss": 109.1537, "step": 57930 }, { "epoch": 0.23408493154005583, "grad_norm": 1394.4090576171875, "learning_rate": 4.7402098564249974e-05, "loss": 58.2658, "step": 57940 }, { "epoch": 0.23412533280542347, "grad_norm": 415.70263671875, "learning_rate": 4.7400548896214024e-05, "loss": 67.7115, "step": 57950 }, { "epoch": 0.2341657340707911, "grad_norm": 928.4489135742188, "learning_rate": 4.739899879146785e-05, "loss": 54.671, "step": 57960 }, { "epoch": 0.23420613533615872, "grad_norm": 995.4201049804688, "learning_rate": 4.739744825004165e-05, "loss": 85.7247, "step": 57970 }, { "epoch": 0.23424653660152636, "grad_norm": 950.1658935546875, "learning_rate": 4.739589727196568e-05, "loss": 53.7052, "step": 57980 }, { "epoch": 0.234286937866894, "grad_norm": 431.6142883300781, "learning_rate": 4.739434585727015e-05, "loss": 56.5389, "step": 57990 }, { "epoch": 0.23432733913226161, "grad_norm": 1080.2391357421875, "learning_rate": 4.7392794005985326e-05, "loss": 82.0306, "step": 58000 }, { "epoch": 0.23436774039762925, "grad_norm": 1104.7730712890625, "learning_rate": 4.739124171814145e-05, "loss": 64.9178, "step": 58010 }, { "epoch": 0.2344081416629969, "grad_norm": 1969.99755859375, "learning_rate": 4.7389688993768786e-05, "loss": 97.3219, "step": 58020 }, { "epoch": 0.2344485429283645, "grad_norm": 792.9730224609375, "learning_rate": 4.738813583289762e-05, "loss": 55.4297, "step": 58030 }, { "epoch": 0.23448894419373215, "grad_norm": 1705.9310302734375, "learning_rate": 4.7386582235558205e-05, "loss": 105.6232, "step": 58040 }, { "epoch": 0.2345293454590998, "grad_norm": 630.877197265625, "learning_rate": 4.738502820178085e-05, "loss": 72.6224, "step": 58050 }, { "epoch": 0.2345697467244674, "grad_norm": 656.330322265625, "learning_rate": 4.738347373159585e-05, "loss": 106.0807, "step": 58060 }, { "epoch": 0.23461014798983504, "grad_norm": 917.6051635742188, "learning_rate": 4.73819188250335e-05, "loss": 82.6257, "step": 58070 }, { "epoch": 0.23465054925520268, "grad_norm": 574.9983520507812, "learning_rate": 4.738036348212412e-05, "loss": 86.4343, "step": 58080 }, { "epoch": 0.2346909505205703, "grad_norm": 593.380859375, "learning_rate": 4.737880770289803e-05, "loss": 84.0278, "step": 58090 }, { "epoch": 0.23473135178593793, "grad_norm": 342.4287109375, "learning_rate": 4.737725148738557e-05, "loss": 48.0205, "step": 58100 }, { "epoch": 0.23477175305130557, "grad_norm": 520.4551391601562, "learning_rate": 4.737569483561707e-05, "loss": 48.0131, "step": 58110 }, { "epoch": 0.2348121543166732, "grad_norm": 431.37115478515625, "learning_rate": 4.737413774762287e-05, "loss": 49.1139, "step": 58120 }, { "epoch": 0.23485255558204082, "grad_norm": 1225.8043212890625, "learning_rate": 4.737258022343335e-05, "loss": 55.6529, "step": 58130 }, { "epoch": 0.23489295684740846, "grad_norm": 475.2739562988281, "learning_rate": 4.737102226307884e-05, "loss": 72.1621, "step": 58140 }, { "epoch": 0.2349333581127761, "grad_norm": 5526.28271484375, "learning_rate": 4.736946386658976e-05, "loss": 81.7198, "step": 58150 }, { "epoch": 0.23497375937814372, "grad_norm": 836.2157592773438, "learning_rate": 4.7367905033996445e-05, "loss": 64.377, "step": 58160 }, { "epoch": 0.23501416064351136, "grad_norm": 880.4158935546875, "learning_rate": 4.736634576532931e-05, "loss": 92.249, "step": 58170 }, { "epoch": 0.235054561908879, "grad_norm": 582.840576171875, "learning_rate": 4.736478606061875e-05, "loss": 52.3027, "step": 58180 }, { "epoch": 0.2350949631742466, "grad_norm": 587.2545776367188, "learning_rate": 4.7363225919895185e-05, "loss": 57.8036, "step": 58190 }, { "epoch": 0.23513536443961425, "grad_norm": 975.4487915039062, "learning_rate": 4.7361665343189e-05, "loss": 74.3185, "step": 58200 }, { "epoch": 0.2351757657049819, "grad_norm": 1377.298583984375, "learning_rate": 4.736010433053064e-05, "loss": 80.2555, "step": 58210 }, { "epoch": 0.2352161669703495, "grad_norm": 806.4180297851562, "learning_rate": 4.735854288195054e-05, "loss": 63.3192, "step": 58220 }, { "epoch": 0.23525656823571714, "grad_norm": 1805.80810546875, "learning_rate": 4.735698099747913e-05, "loss": 73.7252, "step": 58230 }, { "epoch": 0.23529696950108478, "grad_norm": 1094.813232421875, "learning_rate": 4.735541867714687e-05, "loss": 114.6661, "step": 58240 }, { "epoch": 0.2353373707664524, "grad_norm": 3966.76123046875, "learning_rate": 4.73538559209842e-05, "loss": 168.1956, "step": 58250 }, { "epoch": 0.23537777203182003, "grad_norm": 496.0440673828125, "learning_rate": 4.735229272902162e-05, "loss": 90.3166, "step": 58260 }, { "epoch": 0.23541817329718767, "grad_norm": 639.0549926757812, "learning_rate": 4.735072910128957e-05, "loss": 101.5113, "step": 58270 }, { "epoch": 0.2354585745625553, "grad_norm": 643.226318359375, "learning_rate": 4.734916503781856e-05, "loss": 46.3206, "step": 58280 }, { "epoch": 0.23549897582792292, "grad_norm": 1588.71142578125, "learning_rate": 4.7347600538639067e-05, "loss": 82.4462, "step": 58290 }, { "epoch": 0.23553937709329056, "grad_norm": 306.2060852050781, "learning_rate": 4.73460356037816e-05, "loss": 86.551, "step": 58300 }, { "epoch": 0.2355797783586582, "grad_norm": 838.1617431640625, "learning_rate": 4.734447023327666e-05, "loss": 62.018, "step": 58310 }, { "epoch": 0.23562017962402582, "grad_norm": 839.5139770507812, "learning_rate": 4.7342904427154766e-05, "loss": 68.918, "step": 58320 }, { "epoch": 0.23566058088939346, "grad_norm": 2884.371826171875, "learning_rate": 4.734133818544645e-05, "loss": 93.363, "step": 58330 }, { "epoch": 0.2357009821547611, "grad_norm": 1337.994873046875, "learning_rate": 4.733977150818225e-05, "loss": 80.4189, "step": 58340 }, { "epoch": 0.2357413834201287, "grad_norm": 916.3220825195312, "learning_rate": 4.7338204395392694e-05, "loss": 64.318, "step": 58350 }, { "epoch": 0.23578178468549635, "grad_norm": 4077.7041015625, "learning_rate": 4.733663684710835e-05, "loss": 106.2886, "step": 58360 }, { "epoch": 0.235822185950864, "grad_norm": 471.49566650390625, "learning_rate": 4.7335068863359764e-05, "loss": 76.7145, "step": 58370 }, { "epoch": 0.2358625872162316, "grad_norm": 821.7672119140625, "learning_rate": 4.733350044417752e-05, "loss": 59.775, "step": 58380 }, { "epoch": 0.23590298848159924, "grad_norm": 628.004638671875, "learning_rate": 4.733193158959218e-05, "loss": 65.8933, "step": 58390 }, { "epoch": 0.23594338974696688, "grad_norm": 487.54107666015625, "learning_rate": 4.733036229963435e-05, "loss": 78.1683, "step": 58400 }, { "epoch": 0.2359837910123345, "grad_norm": 967.7349243164062, "learning_rate": 4.732879257433459e-05, "loss": 87.9057, "step": 58410 }, { "epoch": 0.23602419227770213, "grad_norm": 834.74755859375, "learning_rate": 4.7327222413723536e-05, "loss": 74.7745, "step": 58420 }, { "epoch": 0.23606459354306977, "grad_norm": 1843.8294677734375, "learning_rate": 4.7325651817831784e-05, "loss": 69.399, "step": 58430 }, { "epoch": 0.2361049948084374, "grad_norm": 900.3565063476562, "learning_rate": 4.732408078668995e-05, "loss": 70.5233, "step": 58440 }, { "epoch": 0.23614539607380503, "grad_norm": 420.2508544921875, "learning_rate": 4.7322509320328675e-05, "loss": 67.6304, "step": 58450 }, { "epoch": 0.23618579733917267, "grad_norm": 1126.9072265625, "learning_rate": 4.732093741877859e-05, "loss": 95.9313, "step": 58460 }, { "epoch": 0.2362261986045403, "grad_norm": 496.6501770019531, "learning_rate": 4.731936508207033e-05, "loss": 65.5002, "step": 58470 }, { "epoch": 0.23626659986990792, "grad_norm": 511.6619567871094, "learning_rate": 4.731779231023456e-05, "loss": 68.3283, "step": 58480 }, { "epoch": 0.23630700113527556, "grad_norm": 728.8594970703125, "learning_rate": 4.731621910330194e-05, "loss": 71.2413, "step": 58490 }, { "epoch": 0.2363474024006432, "grad_norm": 721.872802734375, "learning_rate": 4.731464546130314e-05, "loss": 64.1135, "step": 58500 }, { "epoch": 0.2363878036660108, "grad_norm": 557.9456787109375, "learning_rate": 4.7313071384268836e-05, "loss": 71.9643, "step": 58510 }, { "epoch": 0.23642820493137845, "grad_norm": 952.9915771484375, "learning_rate": 4.731149687222972e-05, "loss": 72.1631, "step": 58520 }, { "epoch": 0.2364686061967461, "grad_norm": 406.0745544433594, "learning_rate": 4.7309921925216484e-05, "loss": 87.8844, "step": 58530 }, { "epoch": 0.2365090074621137, "grad_norm": 6206.943359375, "learning_rate": 4.730834654325984e-05, "loss": 117.5255, "step": 58540 }, { "epoch": 0.23654940872748134, "grad_norm": 0.0, "learning_rate": 4.7306770726390496e-05, "loss": 52.3261, "step": 58550 }, { "epoch": 0.23658980999284898, "grad_norm": 1959.0308837890625, "learning_rate": 4.730519447463916e-05, "loss": 87.7869, "step": 58560 }, { "epoch": 0.2366302112582166, "grad_norm": 1353.8363037109375, "learning_rate": 4.730361778803658e-05, "loss": 92.4961, "step": 58570 }, { "epoch": 0.23667061252358423, "grad_norm": 1127.2203369140625, "learning_rate": 4.730204066661349e-05, "loss": 101.3056, "step": 58580 }, { "epoch": 0.23671101378895187, "grad_norm": 1524.7484130859375, "learning_rate": 4.730046311040064e-05, "loss": 144.1944, "step": 58590 }, { "epoch": 0.23675141505431951, "grad_norm": 831.2205200195312, "learning_rate": 4.7298885119428773e-05, "loss": 70.5365, "step": 58600 }, { "epoch": 0.23679181631968713, "grad_norm": 913.74853515625, "learning_rate": 4.729730669372866e-05, "loss": 68.5036, "step": 58610 }, { "epoch": 0.23683221758505477, "grad_norm": 2814.275146484375, "learning_rate": 4.729572783333108e-05, "loss": 90.047, "step": 58620 }, { "epoch": 0.2368726188504224, "grad_norm": 1022.5120849609375, "learning_rate": 4.72941485382668e-05, "loss": 84.8219, "step": 58630 }, { "epoch": 0.23691302011579002, "grad_norm": 665.8732299804688, "learning_rate": 4.729256880856662e-05, "loss": 63.3056, "step": 58640 }, { "epoch": 0.23695342138115766, "grad_norm": 2222.1650390625, "learning_rate": 4.7290988644261336e-05, "loss": 77.9888, "step": 58650 }, { "epoch": 0.2369938226465253, "grad_norm": 780.3485717773438, "learning_rate": 4.728940804538176e-05, "loss": 58.1334, "step": 58660 }, { "epoch": 0.2370342239118929, "grad_norm": 568.1907958984375, "learning_rate": 4.728782701195869e-05, "loss": 53.3181, "step": 58670 }, { "epoch": 0.23707462517726055, "grad_norm": 662.433349609375, "learning_rate": 4.728624554402295e-05, "loss": 67.1239, "step": 58680 }, { "epoch": 0.2371150264426282, "grad_norm": 742.7056274414062, "learning_rate": 4.7284663641605384e-05, "loss": 56.0102, "step": 58690 }, { "epoch": 0.2371554277079958, "grad_norm": 1382.75439453125, "learning_rate": 4.728308130473683e-05, "loss": 52.9646, "step": 58700 }, { "epoch": 0.23719582897336344, "grad_norm": 1125.7822265625, "learning_rate": 4.7281498533448136e-05, "loss": 69.9835, "step": 58710 }, { "epoch": 0.23723623023873108, "grad_norm": 446.69677734375, "learning_rate": 4.7279915327770155e-05, "loss": 54.0501, "step": 58720 }, { "epoch": 0.2372766315040987, "grad_norm": 2016.18310546875, "learning_rate": 4.7278331687733754e-05, "loss": 111.3491, "step": 58730 }, { "epoch": 0.23731703276946634, "grad_norm": 1517.979736328125, "learning_rate": 4.727674761336981e-05, "loss": 95.3152, "step": 58740 }, { "epoch": 0.23735743403483398, "grad_norm": 965.1806030273438, "learning_rate": 4.72751631047092e-05, "loss": 77.6278, "step": 58750 }, { "epoch": 0.23739783530020162, "grad_norm": 697.4866333007812, "learning_rate": 4.727357816178282e-05, "loss": 86.2706, "step": 58760 }, { "epoch": 0.23743823656556923, "grad_norm": 521.2189331054688, "learning_rate": 4.727199278462156e-05, "loss": 59.1101, "step": 58770 }, { "epoch": 0.23747863783093687, "grad_norm": 456.9306945800781, "learning_rate": 4.727040697325634e-05, "loss": 57.2227, "step": 58780 }, { "epoch": 0.2375190390963045, "grad_norm": 1488.9251708984375, "learning_rate": 4.726882072771807e-05, "loss": 83.9987, "step": 58790 }, { "epoch": 0.23755944036167212, "grad_norm": 816.0877075195312, "learning_rate": 4.7267234048037664e-05, "loss": 75.2146, "step": 58800 }, { "epoch": 0.23759984162703976, "grad_norm": 1049.84375, "learning_rate": 4.726564693424608e-05, "loss": 85.2812, "step": 58810 }, { "epoch": 0.2376402428924074, "grad_norm": 660.714599609375, "learning_rate": 4.7264059386374236e-05, "loss": 50.8552, "step": 58820 }, { "epoch": 0.237680644157775, "grad_norm": 835.5247192382812, "learning_rate": 4.72624714044531e-05, "loss": 85.9735, "step": 58830 }, { "epoch": 0.23772104542314265, "grad_norm": 934.1461181640625, "learning_rate": 4.7260882988513624e-05, "loss": 82.1574, "step": 58840 }, { "epoch": 0.2377614466885103, "grad_norm": 253.60675048828125, "learning_rate": 4.725929413858677e-05, "loss": 86.5755, "step": 58850 }, { "epoch": 0.2378018479538779, "grad_norm": 476.08544921875, "learning_rate": 4.725770485470351e-05, "loss": 79.0762, "step": 58860 }, { "epoch": 0.23784224921924554, "grad_norm": 1652.21484375, "learning_rate": 4.725611513689485e-05, "loss": 76.6932, "step": 58870 }, { "epoch": 0.23788265048461318, "grad_norm": 504.9270324707031, "learning_rate": 4.725452498519175e-05, "loss": 76.0411, "step": 58880 }, { "epoch": 0.2379230517499808, "grad_norm": 1309.692626953125, "learning_rate": 4.7252934399625234e-05, "loss": 52.6607, "step": 58890 }, { "epoch": 0.23796345301534844, "grad_norm": 1309.8441162109375, "learning_rate": 4.725134338022631e-05, "loss": 88.8201, "step": 58900 }, { "epoch": 0.23800385428071608, "grad_norm": 1632.8326416015625, "learning_rate": 4.7249751927025996e-05, "loss": 88.7722, "step": 58910 }, { "epoch": 0.23804425554608372, "grad_norm": 535.5170288085938, "learning_rate": 4.7248160040055304e-05, "loss": 47.7584, "step": 58920 }, { "epoch": 0.23808465681145133, "grad_norm": 708.9509887695312, "learning_rate": 4.724656771934528e-05, "loss": 83.726, "step": 58930 }, { "epoch": 0.23812505807681897, "grad_norm": 579.57275390625, "learning_rate": 4.7244974964926965e-05, "loss": 62.5475, "step": 58940 }, { "epoch": 0.2381654593421866, "grad_norm": 651.062744140625, "learning_rate": 4.724338177683141e-05, "loss": 63.3703, "step": 58950 }, { "epoch": 0.23820586060755422, "grad_norm": 1036.9290771484375, "learning_rate": 4.724178815508967e-05, "loss": 59.7281, "step": 58960 }, { "epoch": 0.23824626187292186, "grad_norm": 406.64044189453125, "learning_rate": 4.724019409973283e-05, "loss": 75.1738, "step": 58970 }, { "epoch": 0.2382866631382895, "grad_norm": 533.7338256835938, "learning_rate": 4.723859961079195e-05, "loss": 76.1549, "step": 58980 }, { "epoch": 0.2383270644036571, "grad_norm": 595.7689819335938, "learning_rate": 4.7237004688298125e-05, "loss": 134.3011, "step": 58990 }, { "epoch": 0.23836746566902475, "grad_norm": 1294.3282470703125, "learning_rate": 4.723540933228244e-05, "loss": 100.5167, "step": 59000 }, { "epoch": 0.2384078669343924, "grad_norm": 612.8972778320312, "learning_rate": 4.7233813542776006e-05, "loss": 75.6335, "step": 59010 }, { "epoch": 0.23844826819976, "grad_norm": 1156.35498046875, "learning_rate": 4.723221731980993e-05, "loss": 70.4552, "step": 59020 }, { "epoch": 0.23848866946512765, "grad_norm": 1819.1259765625, "learning_rate": 4.723062066341533e-05, "loss": 64.2368, "step": 59030 }, { "epoch": 0.23852907073049529, "grad_norm": 765.05615234375, "learning_rate": 4.722902357362333e-05, "loss": 80.1383, "step": 59040 }, { "epoch": 0.2385694719958629, "grad_norm": 623.2485961914062, "learning_rate": 4.7227426050465084e-05, "loss": 73.9087, "step": 59050 }, { "epoch": 0.23860987326123054, "grad_norm": 719.6350708007812, "learning_rate": 4.722582809397171e-05, "loss": 59.0293, "step": 59060 }, { "epoch": 0.23865027452659818, "grad_norm": 410.1199645996094, "learning_rate": 4.722422970417438e-05, "loss": 58.2241, "step": 59070 }, { "epoch": 0.23869067579196582, "grad_norm": 598.96240234375, "learning_rate": 4.722263088110426e-05, "loss": 79.149, "step": 59080 }, { "epoch": 0.23873107705733343, "grad_norm": 504.0932922363281, "learning_rate": 4.72210316247925e-05, "loss": 67.0265, "step": 59090 }, { "epoch": 0.23877147832270107, "grad_norm": 1157.12060546875, "learning_rate": 4.721943193527029e-05, "loss": 89.9269, "step": 59100 }, { "epoch": 0.2388118795880687, "grad_norm": 1023.6410522460938, "learning_rate": 4.7217831812568815e-05, "loss": 59.8878, "step": 59110 }, { "epoch": 0.23885228085343632, "grad_norm": 299.01300048828125, "learning_rate": 4.721623125671927e-05, "loss": 106.7344, "step": 59120 }, { "epoch": 0.23889268211880396, "grad_norm": 490.2772521972656, "learning_rate": 4.7214630267752856e-05, "loss": 84.1761, "step": 59130 }, { "epoch": 0.2389330833841716, "grad_norm": 689.8060913085938, "learning_rate": 4.721302884570079e-05, "loss": 61.8113, "step": 59140 }, { "epoch": 0.23897348464953921, "grad_norm": 573.6192016601562, "learning_rate": 4.7211426990594296e-05, "loss": 84.5191, "step": 59150 }, { "epoch": 0.23901388591490685, "grad_norm": 554.744384765625, "learning_rate": 4.720982470246459e-05, "loss": 68.6441, "step": 59160 }, { "epoch": 0.2390542871802745, "grad_norm": 279.83795166015625, "learning_rate": 4.720822198134293e-05, "loss": 70.0726, "step": 59170 }, { "epoch": 0.2390946884456421, "grad_norm": 1090.882568359375, "learning_rate": 4.7206618827260534e-05, "loss": 41.3233, "step": 59180 }, { "epoch": 0.23913508971100975, "grad_norm": 2077.9443359375, "learning_rate": 4.720501524024867e-05, "loss": 81.099, "step": 59190 }, { "epoch": 0.2391754909763774, "grad_norm": 537.9066772460938, "learning_rate": 4.720341122033862e-05, "loss": 71.1957, "step": 59200 }, { "epoch": 0.239215892241745, "grad_norm": 1677.8251953125, "learning_rate": 4.720180676756162e-05, "loss": 70.0094, "step": 59210 }, { "epoch": 0.23925629350711264, "grad_norm": 618.0469970703125, "learning_rate": 4.720020188194897e-05, "loss": 60.1108, "step": 59220 }, { "epoch": 0.23929669477248028, "grad_norm": 843.9923095703125, "learning_rate": 4.719859656353196e-05, "loss": 54.7157, "step": 59230 }, { "epoch": 0.23933709603784792, "grad_norm": 1101.4031982421875, "learning_rate": 4.719699081234188e-05, "loss": 101.9431, "step": 59240 }, { "epoch": 0.23937749730321553, "grad_norm": 541.9828491210938, "learning_rate": 4.719538462841003e-05, "loss": 85.9788, "step": 59250 }, { "epoch": 0.23941789856858317, "grad_norm": 470.3389587402344, "learning_rate": 4.719377801176774e-05, "loss": 73.3981, "step": 59260 }, { "epoch": 0.2394582998339508, "grad_norm": 867.8907470703125, "learning_rate": 4.719217096244631e-05, "loss": 56.2378, "step": 59270 }, { "epoch": 0.23949870109931842, "grad_norm": 1353.7008056640625, "learning_rate": 4.7190563480477095e-05, "loss": 57.5405, "step": 59280 }, { "epoch": 0.23953910236468606, "grad_norm": 968.8933715820312, "learning_rate": 4.718895556589141e-05, "loss": 55.8982, "step": 59290 }, { "epoch": 0.2395795036300537, "grad_norm": 1031.3507080078125, "learning_rate": 4.718734721872062e-05, "loss": 72.3141, "step": 59300 }, { "epoch": 0.23961990489542132, "grad_norm": 531.7722778320312, "learning_rate": 4.718573843899607e-05, "loss": 79.3897, "step": 59310 }, { "epoch": 0.23966030616078896, "grad_norm": 1232.718994140625, "learning_rate": 4.718412922674913e-05, "loss": 79.8532, "step": 59320 }, { "epoch": 0.2397007074261566, "grad_norm": 3018.275390625, "learning_rate": 4.718251958201117e-05, "loss": 88.7309, "step": 59330 }, { "epoch": 0.2397411086915242, "grad_norm": 1827.9061279296875, "learning_rate": 4.718090950481356e-05, "loss": 101.574, "step": 59340 }, { "epoch": 0.23978150995689185, "grad_norm": 715.8681640625, "learning_rate": 4.71792989951877e-05, "loss": 72.9536, "step": 59350 }, { "epoch": 0.2398219112222595, "grad_norm": 1006.0107421875, "learning_rate": 4.717768805316501e-05, "loss": 67.8689, "step": 59360 }, { "epoch": 0.2398623124876271, "grad_norm": 1562.623779296875, "learning_rate": 4.717607667877685e-05, "loss": 94.7435, "step": 59370 }, { "epoch": 0.23990271375299474, "grad_norm": 822.7528686523438, "learning_rate": 4.717446487205466e-05, "loss": 71.5322, "step": 59380 }, { "epoch": 0.23994311501836238, "grad_norm": 733.3274536132812, "learning_rate": 4.717285263302987e-05, "loss": 63.2148, "step": 59390 }, { "epoch": 0.23998351628373002, "grad_norm": 1009.9617919921875, "learning_rate": 4.71712399617339e-05, "loss": 92.269, "step": 59400 }, { "epoch": 0.24002391754909763, "grad_norm": 768.0454711914062, "learning_rate": 4.716962685819819e-05, "loss": 46.8281, "step": 59410 }, { "epoch": 0.24006431881446527, "grad_norm": 392.7835693359375, "learning_rate": 4.716801332245419e-05, "loss": 57.8384, "step": 59420 }, { "epoch": 0.2401047200798329, "grad_norm": 1200.4879150390625, "learning_rate": 4.7166399354533365e-05, "loss": 63.8987, "step": 59430 }, { "epoch": 0.24014512134520052, "grad_norm": 540.96142578125, "learning_rate": 4.7164784954467166e-05, "loss": 86.5465, "step": 59440 }, { "epoch": 0.24018552261056816, "grad_norm": 679.489501953125, "learning_rate": 4.716317012228707e-05, "loss": 52.6366, "step": 59450 }, { "epoch": 0.2402259238759358, "grad_norm": 1019.4818115234375, "learning_rate": 4.716155485802457e-05, "loss": 55.8305, "step": 59460 }, { "epoch": 0.24026632514130342, "grad_norm": 330.5364990234375, "learning_rate": 4.715993916171114e-05, "loss": 42.9955, "step": 59470 }, { "epoch": 0.24030672640667106, "grad_norm": 1147.410888671875, "learning_rate": 4.715832303337829e-05, "loss": 93.0537, "step": 59480 }, { "epoch": 0.2403471276720387, "grad_norm": 1502.9691162109375, "learning_rate": 4.715670647305753e-05, "loss": 66.4446, "step": 59490 }, { "epoch": 0.2403875289374063, "grad_norm": 1007.5733032226562, "learning_rate": 4.715508948078037e-05, "loss": 68.1062, "step": 59500 }, { "epoch": 0.24042793020277395, "grad_norm": 1082.7093505859375, "learning_rate": 4.715347205657833e-05, "loss": 85.7707, "step": 59510 }, { "epoch": 0.2404683314681416, "grad_norm": 591.158447265625, "learning_rate": 4.715185420048295e-05, "loss": 53.6973, "step": 59520 }, { "epoch": 0.2405087327335092, "grad_norm": 357.9864196777344, "learning_rate": 4.715023591252576e-05, "loss": 70.244, "step": 59530 }, { "epoch": 0.24054913399887684, "grad_norm": 975.7410888671875, "learning_rate": 4.714861719273833e-05, "loss": 84.0754, "step": 59540 }, { "epoch": 0.24058953526424448, "grad_norm": 986.4381103515625, "learning_rate": 4.7146998041152204e-05, "loss": 64.928, "step": 59550 }, { "epoch": 0.24062993652961212, "grad_norm": 996.6936645507812, "learning_rate": 4.714537845779894e-05, "loss": 47.7855, "step": 59560 }, { "epoch": 0.24067033779497973, "grad_norm": 864.1629638671875, "learning_rate": 4.7143758442710124e-05, "loss": 65.1874, "step": 59570 }, { "epoch": 0.24071073906034737, "grad_norm": 291.1817321777344, "learning_rate": 4.7142137995917336e-05, "loss": 57.6154, "step": 59580 }, { "epoch": 0.240751140325715, "grad_norm": 353.8094177246094, "learning_rate": 4.714051711745217e-05, "loss": 62.8998, "step": 59590 }, { "epoch": 0.24079154159108263, "grad_norm": 667.8562622070312, "learning_rate": 4.713889580734623e-05, "loss": 61.3621, "step": 59600 }, { "epoch": 0.24083194285645027, "grad_norm": 600.4000854492188, "learning_rate": 4.713727406563111e-05, "loss": 101.2895, "step": 59610 }, { "epoch": 0.2408723441218179, "grad_norm": 1322.2716064453125, "learning_rate": 4.713565189233844e-05, "loss": 73.4781, "step": 59620 }, { "epoch": 0.24091274538718552, "grad_norm": 378.68841552734375, "learning_rate": 4.7134029287499834e-05, "loss": 68.5723, "step": 59630 }, { "epoch": 0.24095314665255316, "grad_norm": 1208.138916015625, "learning_rate": 4.7132406251146935e-05, "loss": 66.9278, "step": 59640 }, { "epoch": 0.2409935479179208, "grad_norm": 402.3450927734375, "learning_rate": 4.713078278331138e-05, "loss": 84.2316, "step": 59650 }, { "epoch": 0.2410339491832884, "grad_norm": 785.4451293945312, "learning_rate": 4.712915888402483e-05, "loss": 82.4133, "step": 59660 }, { "epoch": 0.24107435044865605, "grad_norm": 485.10504150390625, "learning_rate": 4.7127534553318925e-05, "loss": 62.9998, "step": 59670 }, { "epoch": 0.2411147517140237, "grad_norm": 791.6461791992188, "learning_rate": 4.712590979122534e-05, "loss": 59.8595, "step": 59680 }, { "epoch": 0.2411551529793913, "grad_norm": 417.98040771484375, "learning_rate": 4.712428459777576e-05, "loss": 71.6051, "step": 59690 }, { "epoch": 0.24119555424475894, "grad_norm": 866.4073486328125, "learning_rate": 4.712265897300186e-05, "loss": 50.0575, "step": 59700 }, { "epoch": 0.24123595551012658, "grad_norm": 1018.1526489257812, "learning_rate": 4.712103291693533e-05, "loss": 88.681, "step": 59710 }, { "epoch": 0.24127635677549422, "grad_norm": 2852.052978515625, "learning_rate": 4.7119406429607885e-05, "loss": 85.578, "step": 59720 }, { "epoch": 0.24131675804086183, "grad_norm": 682.5413208007812, "learning_rate": 4.711777951105121e-05, "loss": 73.2862, "step": 59730 }, { "epoch": 0.24135715930622947, "grad_norm": 1790.8092041015625, "learning_rate": 4.7116152161297045e-05, "loss": 67.5014, "step": 59740 }, { "epoch": 0.24139756057159711, "grad_norm": 671.518310546875, "learning_rate": 4.71145243803771e-05, "loss": 67.0482, "step": 59750 }, { "epoch": 0.24143796183696473, "grad_norm": 248.55958557128906, "learning_rate": 4.711289616832312e-05, "loss": 52.1543, "step": 59760 }, { "epoch": 0.24147836310233237, "grad_norm": 676.2057495117188, "learning_rate": 4.7111267525166845e-05, "loss": 89.2593, "step": 59770 }, { "epoch": 0.2415187643677, "grad_norm": 846.4784545898438, "learning_rate": 4.710963845094003e-05, "loss": 94.6082, "step": 59780 }, { "epoch": 0.24155916563306762, "grad_norm": 735.9619750976562, "learning_rate": 4.710800894567443e-05, "loss": 85.0373, "step": 59790 }, { "epoch": 0.24159956689843526, "grad_norm": 0.0, "learning_rate": 4.710637900940181e-05, "loss": 56.9569, "step": 59800 }, { "epoch": 0.2416399681638029, "grad_norm": 1803.2908935546875, "learning_rate": 4.7104748642153954e-05, "loss": 76.5792, "step": 59810 }, { "epoch": 0.2416803694291705, "grad_norm": 886.1052856445312, "learning_rate": 4.710311784396264e-05, "loss": 44.95, "step": 59820 }, { "epoch": 0.24172077069453815, "grad_norm": 2984.022705078125, "learning_rate": 4.710148661485966e-05, "loss": 104.791, "step": 59830 }, { "epoch": 0.2417611719599058, "grad_norm": 1253.9434814453125, "learning_rate": 4.709985495487682e-05, "loss": 71.7768, "step": 59840 }, { "epoch": 0.2418015732252734, "grad_norm": 785.1651000976562, "learning_rate": 4.7098222864045945e-05, "loss": 81.442, "step": 59850 }, { "epoch": 0.24184197449064104, "grad_norm": 1157.2420654296875, "learning_rate": 4.709659034239883e-05, "loss": 91.4549, "step": 59860 }, { "epoch": 0.24188237575600868, "grad_norm": 2114.29833984375, "learning_rate": 4.7094957389967306e-05, "loss": 82.0255, "step": 59870 }, { "epoch": 0.24192277702137632, "grad_norm": 1235.954345703125, "learning_rate": 4.7093324006783214e-05, "loss": 95.1471, "step": 59880 }, { "epoch": 0.24196317828674394, "grad_norm": 990.7355346679688, "learning_rate": 4.709169019287839e-05, "loss": 83.1672, "step": 59890 }, { "epoch": 0.24200357955211158, "grad_norm": 790.0713500976562, "learning_rate": 4.7090055948284706e-05, "loss": 75.6788, "step": 59900 }, { "epoch": 0.24204398081747922, "grad_norm": 613.9937133789062, "learning_rate": 4.7088421273034e-05, "loss": 74.3221, "step": 59910 }, { "epoch": 0.24208438208284683, "grad_norm": 1069.763427734375, "learning_rate": 4.708678616715815e-05, "loss": 94.9927, "step": 59920 }, { "epoch": 0.24212478334821447, "grad_norm": 1685.7093505859375, "learning_rate": 4.7085150630689034e-05, "loss": 79.8483, "step": 59930 }, { "epoch": 0.2421651846135821, "grad_norm": 1826.313720703125, "learning_rate": 4.7083514663658536e-05, "loss": 110.4784, "step": 59940 }, { "epoch": 0.24220558587894972, "grad_norm": 2508.780029296875, "learning_rate": 4.7081878266098545e-05, "loss": 95.3383, "step": 59950 }, { "epoch": 0.24224598714431736, "grad_norm": 659.6832275390625, "learning_rate": 4.708024143804097e-05, "loss": 79.6695, "step": 59960 }, { "epoch": 0.242286388409685, "grad_norm": 570.7511596679688, "learning_rate": 4.707860417951773e-05, "loss": 68.3188, "step": 59970 }, { "epoch": 0.2423267896750526, "grad_norm": 1179.564208984375, "learning_rate": 4.707696649056073e-05, "loss": 109.7361, "step": 59980 }, { "epoch": 0.24236719094042025, "grad_norm": 632.1577758789062, "learning_rate": 4.70753283712019e-05, "loss": 86.0636, "step": 59990 }, { "epoch": 0.2424075922057879, "grad_norm": 975.7955322265625, "learning_rate": 4.707368982147318e-05, "loss": 87.0066, "step": 60000 }, { "epoch": 0.2424479934711555, "grad_norm": 3105.386962890625, "learning_rate": 4.707205084140651e-05, "loss": 77.5594, "step": 60010 }, { "epoch": 0.24248839473652314, "grad_norm": 834.739013671875, "learning_rate": 4.707041143103384e-05, "loss": 72.7049, "step": 60020 }, { "epoch": 0.24252879600189078, "grad_norm": 481.8167419433594, "learning_rate": 4.706877159038715e-05, "loss": 91.9634, "step": 60030 }, { "epoch": 0.24256919726725842, "grad_norm": 651.197021484375, "learning_rate": 4.706713131949839e-05, "loss": 57.6539, "step": 60040 }, { "epoch": 0.24260959853262604, "grad_norm": 821.2056884765625, "learning_rate": 4.706549061839954e-05, "loss": 67.2985, "step": 60050 }, { "epoch": 0.24264999979799368, "grad_norm": 445.24578857421875, "learning_rate": 4.70638494871226e-05, "loss": 70.1479, "step": 60060 }, { "epoch": 0.24269040106336132, "grad_norm": 902.0319213867188, "learning_rate": 4.7062207925699544e-05, "loss": 85.1113, "step": 60070 }, { "epoch": 0.24273080232872893, "grad_norm": 845.628662109375, "learning_rate": 4.7060565934162394e-05, "loss": 86.7892, "step": 60080 }, { "epoch": 0.24277120359409657, "grad_norm": 936.5902709960938, "learning_rate": 4.7058923512543154e-05, "loss": 84.2543, "step": 60090 }, { "epoch": 0.2428116048594642, "grad_norm": 770.0076904296875, "learning_rate": 4.7057280660873835e-05, "loss": 77.7724, "step": 60100 }, { "epoch": 0.24285200612483182, "grad_norm": 1131.8125, "learning_rate": 4.705563737918648e-05, "loss": 69.4776, "step": 60110 }, { "epoch": 0.24289240739019946, "grad_norm": 940.4691772460938, "learning_rate": 4.705399366751312e-05, "loss": 88.3803, "step": 60120 }, { "epoch": 0.2429328086555671, "grad_norm": 763.4826049804688, "learning_rate": 4.705234952588579e-05, "loss": 79.9678, "step": 60130 }, { "epoch": 0.2429732099209347, "grad_norm": 712.3192749023438, "learning_rate": 4.705070495433657e-05, "loss": 71.1181, "step": 60140 }, { "epoch": 0.24301361118630235, "grad_norm": 1741.696044921875, "learning_rate": 4.704905995289749e-05, "loss": 70.7245, "step": 60150 }, { "epoch": 0.24305401245167, "grad_norm": 588.7029418945312, "learning_rate": 4.7047414521600644e-05, "loss": 57.2675, "step": 60160 }, { "epoch": 0.2430944137170376, "grad_norm": 2938.370361328125, "learning_rate": 4.704576866047808e-05, "loss": 61.0899, "step": 60170 }, { "epoch": 0.24313481498240525, "grad_norm": 936.1781005859375, "learning_rate": 4.704412236956193e-05, "loss": 62.6005, "step": 60180 }, { "epoch": 0.24317521624777289, "grad_norm": 804.415771484375, "learning_rate": 4.7042475648884254e-05, "loss": 79.235, "step": 60190 }, { "epoch": 0.24321561751314053, "grad_norm": 729.1420288085938, "learning_rate": 4.704082849847718e-05, "loss": 92.9888, "step": 60200 }, { "epoch": 0.24325601877850814, "grad_norm": 604.2557983398438, "learning_rate": 4.703918091837279e-05, "loss": 55.6201, "step": 60210 }, { "epoch": 0.24329642004387578, "grad_norm": 580.6284790039062, "learning_rate": 4.703753290860323e-05, "loss": 45.9183, "step": 60220 }, { "epoch": 0.24333682130924342, "grad_norm": 361.0988464355469, "learning_rate": 4.703588446920062e-05, "loss": 96.7986, "step": 60230 }, { "epoch": 0.24337722257461103, "grad_norm": 5288.78271484375, "learning_rate": 4.70342356001971e-05, "loss": 62.3638, "step": 60240 }, { "epoch": 0.24341762383997867, "grad_norm": 981.030517578125, "learning_rate": 4.70325863016248e-05, "loss": 72.3735, "step": 60250 }, { "epoch": 0.2434580251053463, "grad_norm": 1044.311279296875, "learning_rate": 4.703093657351591e-05, "loss": 101.13, "step": 60260 }, { "epoch": 0.24349842637071392, "grad_norm": 695.3463134765625, "learning_rate": 4.702928641590255e-05, "loss": 61.2876, "step": 60270 }, { "epoch": 0.24353882763608156, "grad_norm": 825.5418701171875, "learning_rate": 4.702763582881692e-05, "loss": 77.4805, "step": 60280 }, { "epoch": 0.2435792289014492, "grad_norm": 568.7557983398438, "learning_rate": 4.702598481229118e-05, "loss": 52.6858, "step": 60290 }, { "epoch": 0.24361963016681681, "grad_norm": 1038.3533935546875, "learning_rate": 4.702433336635753e-05, "loss": 72.8244, "step": 60300 }, { "epoch": 0.24366003143218445, "grad_norm": 534.8966674804688, "learning_rate": 4.702268149104816e-05, "loss": 54.4095, "step": 60310 }, { "epoch": 0.2437004326975521, "grad_norm": 683.1052856445312, "learning_rate": 4.702102918639528e-05, "loss": 55.7349, "step": 60320 }, { "epoch": 0.2437408339629197, "grad_norm": 483.41552734375, "learning_rate": 4.70193764524311e-05, "loss": 74.4342, "step": 60330 }, { "epoch": 0.24378123522828735, "grad_norm": 688.1597900390625, "learning_rate": 4.701772328918784e-05, "loss": 56.6066, "step": 60340 }, { "epoch": 0.243821636493655, "grad_norm": 874.982177734375, "learning_rate": 4.701606969669773e-05, "loss": 62.7406, "step": 60350 }, { "epoch": 0.24386203775902263, "grad_norm": 1608.3681640625, "learning_rate": 4.7014415674993e-05, "loss": 87.2219, "step": 60360 }, { "epoch": 0.24390243902439024, "grad_norm": 852.6019287109375, "learning_rate": 4.701276122410591e-05, "loss": 65.5959, "step": 60370 }, { "epoch": 0.24394284028975788, "grad_norm": 796.290771484375, "learning_rate": 4.70111063440687e-05, "loss": 64.5564, "step": 60380 }, { "epoch": 0.24398324155512552, "grad_norm": 1192.7855224609375, "learning_rate": 4.7009451034913645e-05, "loss": 56.2537, "step": 60390 }, { "epoch": 0.24402364282049313, "grad_norm": 691.683837890625, "learning_rate": 4.7007795296673006e-05, "loss": 58.2113, "step": 60400 }, { "epoch": 0.24406404408586077, "grad_norm": 586.52685546875, "learning_rate": 4.700613912937907e-05, "loss": 47.3722, "step": 60410 }, { "epoch": 0.2441044453512284, "grad_norm": 1319.3663330078125, "learning_rate": 4.700448253306412e-05, "loss": 75.1673, "step": 60420 }, { "epoch": 0.24414484661659602, "grad_norm": 1594.365234375, "learning_rate": 4.7002825507760465e-05, "loss": 120.8496, "step": 60430 }, { "epoch": 0.24418524788196366, "grad_norm": 789.883544921875, "learning_rate": 4.700116805350039e-05, "loss": 65.3508, "step": 60440 }, { "epoch": 0.2442256491473313, "grad_norm": 627.7055053710938, "learning_rate": 4.699951017031621e-05, "loss": 80.6547, "step": 60450 }, { "epoch": 0.24426605041269892, "grad_norm": 564.2537841796875, "learning_rate": 4.699785185824026e-05, "loss": 73.5011, "step": 60460 }, { "epoch": 0.24430645167806656, "grad_norm": 589.3305053710938, "learning_rate": 4.6996193117304864e-05, "loss": 50.1976, "step": 60470 }, { "epoch": 0.2443468529434342, "grad_norm": 2720.202392578125, "learning_rate": 4.699453394754236e-05, "loss": 65.688, "step": 60480 }, { "epoch": 0.2443872542088018, "grad_norm": 926.1817016601562, "learning_rate": 4.6992874348985093e-05, "loss": 99.6495, "step": 60490 }, { "epoch": 0.24442765547416945, "grad_norm": 845.654541015625, "learning_rate": 4.6991214321665414e-05, "loss": 63.7764, "step": 60500 }, { "epoch": 0.2444680567395371, "grad_norm": 458.4206237792969, "learning_rate": 4.698955386561569e-05, "loss": 53.1651, "step": 60510 }, { "epoch": 0.24450845800490473, "grad_norm": 1341.775146484375, "learning_rate": 4.6987892980868296e-05, "loss": 85.8457, "step": 60520 }, { "epoch": 0.24454885927027234, "grad_norm": 1214.4681396484375, "learning_rate": 4.6986231667455605e-05, "loss": 59.1112, "step": 60530 }, { "epoch": 0.24458926053563998, "grad_norm": 791.0181884765625, "learning_rate": 4.6984569925410016e-05, "loss": 113.4298, "step": 60540 }, { "epoch": 0.24462966180100762, "grad_norm": 1435.430908203125, "learning_rate": 4.6982907754763906e-05, "loss": 106.5237, "step": 60550 }, { "epoch": 0.24467006306637523, "grad_norm": 671.5120239257812, "learning_rate": 4.69812451555497e-05, "loss": 75.014, "step": 60560 }, { "epoch": 0.24471046433174287, "grad_norm": 419.3539733886719, "learning_rate": 4.697958212779981e-05, "loss": 40.2964, "step": 60570 }, { "epoch": 0.2447508655971105, "grad_norm": 1304.94189453125, "learning_rate": 4.697791867154663e-05, "loss": 52.5812, "step": 60580 }, { "epoch": 0.24479126686247812, "grad_norm": 577.18505859375, "learning_rate": 4.697625478682263e-05, "loss": 82.3342, "step": 60590 }, { "epoch": 0.24483166812784576, "grad_norm": 455.48480224609375, "learning_rate": 4.6974590473660216e-05, "loss": 55.9024, "step": 60600 }, { "epoch": 0.2448720693932134, "grad_norm": 592.9940185546875, "learning_rate": 4.697292573209185e-05, "loss": 65.5786, "step": 60610 }, { "epoch": 0.24491247065858102, "grad_norm": 368.27581787109375, "learning_rate": 4.697126056214999e-05, "loss": 34.5167, "step": 60620 }, { "epoch": 0.24495287192394866, "grad_norm": 400.2873840332031, "learning_rate": 4.6969594963867084e-05, "loss": 104.06, "step": 60630 }, { "epoch": 0.2449932731893163, "grad_norm": 744.5037231445312, "learning_rate": 4.696792893727562e-05, "loss": 87.9777, "step": 60640 }, { "epoch": 0.2450336744546839, "grad_norm": 654.1832885742188, "learning_rate": 4.696626248240807e-05, "loss": 54.0729, "step": 60650 }, { "epoch": 0.24507407572005155, "grad_norm": 339.2250671386719, "learning_rate": 4.6964595599296926e-05, "loss": 40.1769, "step": 60660 }, { "epoch": 0.2451144769854192, "grad_norm": 632.5075073242188, "learning_rate": 4.696292828797468e-05, "loss": 79.759, "step": 60670 }, { "epoch": 0.24515487825078683, "grad_norm": 884.93310546875, "learning_rate": 4.696126054847385e-05, "loss": 73.9001, "step": 60680 }, { "epoch": 0.24519527951615444, "grad_norm": 1115.2449951171875, "learning_rate": 4.695959238082692e-05, "loss": 70.6572, "step": 60690 }, { "epoch": 0.24523568078152208, "grad_norm": 1520.8741455078125, "learning_rate": 4.6957923785066445e-05, "loss": 90.2716, "step": 60700 }, { "epoch": 0.24527608204688972, "grad_norm": 1127.73876953125, "learning_rate": 4.6956254761224936e-05, "loss": 70.0457, "step": 60710 }, { "epoch": 0.24531648331225733, "grad_norm": 1073.5108642578125, "learning_rate": 4.695458530933494e-05, "loss": 100.9174, "step": 60720 }, { "epoch": 0.24535688457762497, "grad_norm": 773.4358520507812, "learning_rate": 4.6952915429429e-05, "loss": 82.9172, "step": 60730 }, { "epoch": 0.2453972858429926, "grad_norm": 1714.5184326171875, "learning_rate": 4.6951245121539675e-05, "loss": 74.2328, "step": 60740 }, { "epoch": 0.24543768710836023, "grad_norm": 1113.5059814453125, "learning_rate": 4.694957438569951e-05, "loss": 68.1501, "step": 60750 }, { "epoch": 0.24547808837372787, "grad_norm": 1123.2255859375, "learning_rate": 4.694790322194111e-05, "loss": 57.4253, "step": 60760 }, { "epoch": 0.2455184896390955, "grad_norm": 1276.2410888671875, "learning_rate": 4.6946231630297036e-05, "loss": 110.3862, "step": 60770 }, { "epoch": 0.24555889090446312, "grad_norm": 695.6715087890625, "learning_rate": 4.694455961079987e-05, "loss": 70.8457, "step": 60780 }, { "epoch": 0.24559929216983076, "grad_norm": 1895.229736328125, "learning_rate": 4.694288716348221e-05, "loss": 62.3386, "step": 60790 }, { "epoch": 0.2456396934351984, "grad_norm": 894.3408813476562, "learning_rate": 4.694121428837668e-05, "loss": 73.6542, "step": 60800 }, { "epoch": 0.245680094700566, "grad_norm": 773.7852172851562, "learning_rate": 4.693954098551587e-05, "loss": 57.4668, "step": 60810 }, { "epoch": 0.24572049596593365, "grad_norm": 873.3363037109375, "learning_rate": 4.693786725493242e-05, "loss": 88.7946, "step": 60820 }, { "epoch": 0.2457608972313013, "grad_norm": 787.7183227539062, "learning_rate": 4.6936193096658955e-05, "loss": 95.2631, "step": 60830 }, { "epoch": 0.2458012984966689, "grad_norm": 1632.471923828125, "learning_rate": 4.693451851072811e-05, "loss": 76.3312, "step": 60840 }, { "epoch": 0.24584169976203654, "grad_norm": 624.8790283203125, "learning_rate": 4.693284349717254e-05, "loss": 60.229, "step": 60850 }, { "epoch": 0.24588210102740418, "grad_norm": 776.8783569335938, "learning_rate": 4.693116805602489e-05, "loss": 67.0748, "step": 60860 }, { "epoch": 0.24592250229277182, "grad_norm": 357.4325866699219, "learning_rate": 4.692949218731782e-05, "loss": 75.164, "step": 60870 }, { "epoch": 0.24596290355813943, "grad_norm": 934.4722290039062, "learning_rate": 4.692781589108402e-05, "loss": 104.851, "step": 60880 }, { "epoch": 0.24600330482350707, "grad_norm": 788.913330078125, "learning_rate": 4.692613916735615e-05, "loss": 76.9564, "step": 60890 }, { "epoch": 0.24604370608887471, "grad_norm": 1184.293701171875, "learning_rate": 4.692446201616692e-05, "loss": 104.7225, "step": 60900 }, { "epoch": 0.24608410735424233, "grad_norm": 768.3632202148438, "learning_rate": 4.692278443754901e-05, "loss": 96.9056, "step": 60910 }, { "epoch": 0.24612450861960997, "grad_norm": 1289.7335205078125, "learning_rate": 4.6921106431535135e-05, "loss": 51.5294, "step": 60920 }, { "epoch": 0.2461649098849776, "grad_norm": 537.193359375, "learning_rate": 4.6919427998158e-05, "loss": 87.4695, "step": 60930 }, { "epoch": 0.24620531115034522, "grad_norm": 3655.863525390625, "learning_rate": 4.691774913745033e-05, "loss": 92.1189, "step": 60940 }, { "epoch": 0.24624571241571286, "grad_norm": 465.6102600097656, "learning_rate": 4.691606984944486e-05, "loss": 79.8114, "step": 60950 }, { "epoch": 0.2462861136810805, "grad_norm": 531.8803100585938, "learning_rate": 4.691439013417433e-05, "loss": 62.0595, "step": 60960 }, { "epoch": 0.2463265149464481, "grad_norm": 1052.3104248046875, "learning_rate": 4.691270999167147e-05, "loss": 67.6164, "step": 60970 }, { "epoch": 0.24636691621181575, "grad_norm": 624.75390625, "learning_rate": 4.691102942196906e-05, "loss": 67.1735, "step": 60980 }, { "epoch": 0.2464073174771834, "grad_norm": 626.8027954101562, "learning_rate": 4.6909348425099835e-05, "loss": 64.8553, "step": 60990 }, { "epoch": 0.246447718742551, "grad_norm": 250.1268768310547, "learning_rate": 4.690766700109659e-05, "loss": 63.7221, "step": 61000 }, { "epoch": 0.24648812000791864, "grad_norm": 978.392333984375, "learning_rate": 4.6905985149992107e-05, "loss": 138.7521, "step": 61010 }, { "epoch": 0.24652852127328628, "grad_norm": 770.6021728515625, "learning_rate": 4.690430287181915e-05, "loss": 104.2015, "step": 61020 }, { "epoch": 0.24656892253865392, "grad_norm": 1836.369873046875, "learning_rate": 4.690262016661054e-05, "loss": 64.2337, "step": 61030 }, { "epoch": 0.24660932380402154, "grad_norm": 1369.9937744140625, "learning_rate": 4.690093703439907e-05, "loss": 114.4457, "step": 61040 }, { "epoch": 0.24664972506938918, "grad_norm": 756.3161010742188, "learning_rate": 4.689925347521757e-05, "loss": 49.0843, "step": 61050 }, { "epoch": 0.24669012633475682, "grad_norm": 446.1201171875, "learning_rate": 4.689756948909884e-05, "loss": 59.4647, "step": 61060 }, { "epoch": 0.24673052760012443, "grad_norm": 1100.18994140625, "learning_rate": 4.689588507607572e-05, "loss": 55.1963, "step": 61070 }, { "epoch": 0.24677092886549207, "grad_norm": 863.7999877929688, "learning_rate": 4.689420023618104e-05, "loss": 67.0081, "step": 61080 }, { "epoch": 0.2468113301308597, "grad_norm": 696.3552856445312, "learning_rate": 4.6892514969447664e-05, "loss": 80.1907, "step": 61090 }, { "epoch": 0.24685173139622732, "grad_norm": 695.3787231445312, "learning_rate": 4.6890829275908434e-05, "loss": 70.8243, "step": 61100 }, { "epoch": 0.24689213266159496, "grad_norm": 893.7193603515625, "learning_rate": 4.6889143155596214e-05, "loss": 99.1561, "step": 61110 }, { "epoch": 0.2469325339269626, "grad_norm": 1360.45263671875, "learning_rate": 4.688745660854388e-05, "loss": 103.6161, "step": 61120 }, { "epoch": 0.2469729351923302, "grad_norm": 726.0472412109375, "learning_rate": 4.688576963478432e-05, "loss": 77.2796, "step": 61130 }, { "epoch": 0.24701333645769785, "grad_norm": 120.09856414794922, "learning_rate": 4.68840822343504e-05, "loss": 77.299, "step": 61140 }, { "epoch": 0.2470537377230655, "grad_norm": 1099.0858154296875, "learning_rate": 4.6882394407275044e-05, "loss": 103.8713, "step": 61150 }, { "epoch": 0.2470941389884331, "grad_norm": 704.1102294921875, "learning_rate": 4.688070615359114e-05, "loss": 100.1313, "step": 61160 }, { "epoch": 0.24713454025380074, "grad_norm": 429.2733459472656, "learning_rate": 4.6879017473331595e-05, "loss": 105.0034, "step": 61170 }, { "epoch": 0.24717494151916838, "grad_norm": 715.7286376953125, "learning_rate": 4.6877328366529346e-05, "loss": 57.8755, "step": 61180 }, { "epoch": 0.24721534278453602, "grad_norm": 708.1912841796875, "learning_rate": 4.687563883321732e-05, "loss": 67.7243, "step": 61190 }, { "epoch": 0.24725574404990364, "grad_norm": 885.6137084960938, "learning_rate": 4.687394887342845e-05, "loss": 63.6241, "step": 61200 }, { "epoch": 0.24729614531527128, "grad_norm": 549.2717895507812, "learning_rate": 4.687225848719568e-05, "loss": 66.4841, "step": 61210 }, { "epoch": 0.24733654658063892, "grad_norm": 390.741455078125, "learning_rate": 4.687056767455198e-05, "loss": 59.3939, "step": 61220 }, { "epoch": 0.24737694784600653, "grad_norm": 770.025390625, "learning_rate": 4.6868876435530296e-05, "loss": 78.1586, "step": 61230 }, { "epoch": 0.24741734911137417, "grad_norm": 565.1214599609375, "learning_rate": 4.686718477016361e-05, "loss": 68.8117, "step": 61240 }, { "epoch": 0.2474577503767418, "grad_norm": 944.383544921875, "learning_rate": 4.6865492678484895e-05, "loss": 72.7522, "step": 61250 }, { "epoch": 0.24749815164210942, "grad_norm": 2328.01513671875, "learning_rate": 4.6863800160527147e-05, "loss": 86.5814, "step": 61260 }, { "epoch": 0.24753855290747706, "grad_norm": 648.8901977539062, "learning_rate": 4.686210721632336e-05, "loss": 85.4312, "step": 61270 }, { "epoch": 0.2475789541728447, "grad_norm": 1613.290283203125, "learning_rate": 4.6860413845906534e-05, "loss": 80.4138, "step": 61280 }, { "epoch": 0.2476193554382123, "grad_norm": 515.4049682617188, "learning_rate": 4.685872004930969e-05, "loss": 48.0804, "step": 61290 }, { "epoch": 0.24765975670357995, "grad_norm": 258.1792297363281, "learning_rate": 4.685702582656584e-05, "loss": 48.0407, "step": 61300 }, { "epoch": 0.2477001579689476, "grad_norm": 1516.9779052734375, "learning_rate": 4.685533117770803e-05, "loss": 76.3524, "step": 61310 }, { "epoch": 0.2477405592343152, "grad_norm": 1197.6373291015625, "learning_rate": 4.6853636102769274e-05, "loss": 95.2485, "step": 61320 }, { "epoch": 0.24778096049968285, "grad_norm": 433.1604309082031, "learning_rate": 4.6851940601782635e-05, "loss": 83.6786, "step": 61330 }, { "epoch": 0.24782136176505049, "grad_norm": 622.607666015625, "learning_rate": 4.685024467478116e-05, "loss": 75.2844, "step": 61340 }, { "epoch": 0.24786176303041813, "grad_norm": 710.9769287109375, "learning_rate": 4.684854832179792e-05, "loss": 106.4003, "step": 61350 }, { "epoch": 0.24790216429578574, "grad_norm": 1580.35009765625, "learning_rate": 4.684685154286599e-05, "loss": 58.8193, "step": 61360 }, { "epoch": 0.24794256556115338, "grad_norm": 917.3915405273438, "learning_rate": 4.684515433801843e-05, "loss": 78.9511, "step": 61370 }, { "epoch": 0.24798296682652102, "grad_norm": 1568.43408203125, "learning_rate": 4.684345670728834e-05, "loss": 82.2572, "step": 61380 }, { "epoch": 0.24802336809188863, "grad_norm": 768.3124389648438, "learning_rate": 4.6841758650708824e-05, "loss": 83.1386, "step": 61390 }, { "epoch": 0.24806376935725627, "grad_norm": 972.3724975585938, "learning_rate": 4.684006016831297e-05, "loss": 111.0157, "step": 61400 }, { "epoch": 0.2481041706226239, "grad_norm": 710.788330078125, "learning_rate": 4.68383612601339e-05, "loss": 70.9548, "step": 61410 }, { "epoch": 0.24814457188799152, "grad_norm": 1372.3570556640625, "learning_rate": 4.6836661926204736e-05, "loss": 57.5385, "step": 61420 }, { "epoch": 0.24818497315335916, "grad_norm": 2235.90380859375, "learning_rate": 4.6834962166558605e-05, "loss": 86.6557, "step": 61430 }, { "epoch": 0.2482253744187268, "grad_norm": 1524.7860107421875, "learning_rate": 4.6833261981228646e-05, "loss": 86.687, "step": 61440 }, { "epoch": 0.24826577568409441, "grad_norm": 514.3629760742188, "learning_rate": 4.683156137024801e-05, "loss": 73.8575, "step": 61450 }, { "epoch": 0.24830617694946205, "grad_norm": 830.9341430664062, "learning_rate": 4.6829860333649836e-05, "loss": 63.1829, "step": 61460 }, { "epoch": 0.2483465782148297, "grad_norm": 0.0, "learning_rate": 4.68281588714673e-05, "loss": 49.3014, "step": 61470 }, { "epoch": 0.2483869794801973, "grad_norm": 1122.3907470703125, "learning_rate": 4.682645698373357e-05, "loss": 56.4632, "step": 61480 }, { "epoch": 0.24842738074556495, "grad_norm": 838.9703369140625, "learning_rate": 4.682475467048182e-05, "loss": 51.4873, "step": 61490 }, { "epoch": 0.2484677820109326, "grad_norm": 966.208251953125, "learning_rate": 4.682305193174524e-05, "loss": 92.0763, "step": 61500 }, { "epoch": 0.24850818327630023, "grad_norm": 623.809326171875, "learning_rate": 4.682134876755704e-05, "loss": 87.1039, "step": 61510 }, { "epoch": 0.24854858454166784, "grad_norm": 769.28759765625, "learning_rate": 4.68196451779504e-05, "loss": 98.8148, "step": 61520 }, { "epoch": 0.24858898580703548, "grad_norm": 437.263671875, "learning_rate": 4.6817941162958544e-05, "loss": 78.6662, "step": 61530 }, { "epoch": 0.24862938707240312, "grad_norm": 566.79638671875, "learning_rate": 4.681623672261469e-05, "loss": 67.6416, "step": 61540 }, { "epoch": 0.24866978833777073, "grad_norm": 378.28082275390625, "learning_rate": 4.6814531856952084e-05, "loss": 51.0772, "step": 61550 }, { "epoch": 0.24871018960313837, "grad_norm": 2680.483154296875, "learning_rate": 4.6812826566003934e-05, "loss": 102.0762, "step": 61560 }, { "epoch": 0.248750590868506, "grad_norm": 787.8114624023438, "learning_rate": 4.68111208498035e-05, "loss": 40.7656, "step": 61570 }, { "epoch": 0.24879099213387362, "grad_norm": 701.6543579101562, "learning_rate": 4.6809414708384046e-05, "loss": 71.9813, "step": 61580 }, { "epoch": 0.24883139339924126, "grad_norm": 643.1692504882812, "learning_rate": 4.680770814177882e-05, "loss": 84.7958, "step": 61590 }, { "epoch": 0.2488717946646089, "grad_norm": 857.9775390625, "learning_rate": 4.68060011500211e-05, "loss": 78.1685, "step": 61600 }, { "epoch": 0.24891219592997652, "grad_norm": 897.5280151367188, "learning_rate": 4.680429373314415e-05, "loss": 57.9258, "step": 61610 }, { "epoch": 0.24895259719534416, "grad_norm": 756.8377685546875, "learning_rate": 4.680258589118128e-05, "loss": 90.2208, "step": 61620 }, { "epoch": 0.2489929984607118, "grad_norm": 650.5374145507812, "learning_rate": 4.680087762416576e-05, "loss": 81.8518, "step": 61630 }, { "epoch": 0.2490333997260794, "grad_norm": 675.606201171875, "learning_rate": 4.6799168932130915e-05, "loss": 72.3285, "step": 61640 }, { "epoch": 0.24907380099144705, "grad_norm": 932.6596069335938, "learning_rate": 4.679745981511005e-05, "loss": 51.2554, "step": 61650 }, { "epoch": 0.2491142022568147, "grad_norm": 891.9270629882812, "learning_rate": 4.679575027313649e-05, "loss": 54.2277, "step": 61660 }, { "epoch": 0.24915460352218233, "grad_norm": 927.309814453125, "learning_rate": 4.6794040306243545e-05, "loss": 68.7457, "step": 61670 }, { "epoch": 0.24919500478754994, "grad_norm": 2662.16845703125, "learning_rate": 4.679232991446456e-05, "loss": 63.2462, "step": 61680 }, { "epoch": 0.24923540605291758, "grad_norm": 604.02978515625, "learning_rate": 4.67906190978329e-05, "loss": 37.8021, "step": 61690 }, { "epoch": 0.24927580731828522, "grad_norm": 291.47430419921875, "learning_rate": 4.6788907856381895e-05, "loss": 131.591, "step": 61700 }, { "epoch": 0.24931620858365283, "grad_norm": 640.17529296875, "learning_rate": 4.678719619014491e-05, "loss": 86.4894, "step": 61710 }, { "epoch": 0.24935660984902047, "grad_norm": 745.0997924804688, "learning_rate": 4.678548409915532e-05, "loss": 62.8037, "step": 61720 }, { "epoch": 0.2493970111143881, "grad_norm": 576.8112182617188, "learning_rate": 4.67837715834465e-05, "loss": 96.3769, "step": 61730 }, { "epoch": 0.24943741237975572, "grad_norm": 687.8386840820312, "learning_rate": 4.678205864305184e-05, "loss": 78.1074, "step": 61740 }, { "epoch": 0.24947781364512336, "grad_norm": 542.1369018554688, "learning_rate": 4.678034527800474e-05, "loss": 70.0832, "step": 61750 }, { "epoch": 0.249518214910491, "grad_norm": 945.9921875, "learning_rate": 4.677863148833859e-05, "loss": 87.9454, "step": 61760 }, { "epoch": 0.24955861617585862, "grad_norm": 443.54583740234375, "learning_rate": 4.6776917274086806e-05, "loss": 63.2674, "step": 61770 }, { "epoch": 0.24959901744122626, "grad_norm": 485.80865478515625, "learning_rate": 4.67752026352828e-05, "loss": 86.9183, "step": 61780 }, { "epoch": 0.2496394187065939, "grad_norm": 514.6878051757812, "learning_rate": 4.677348757196002e-05, "loss": 95.4372, "step": 61790 }, { "epoch": 0.2496798199719615, "grad_norm": 1996.046142578125, "learning_rate": 4.6771772084151885e-05, "loss": 93.6957, "step": 61800 }, { "epoch": 0.24972022123732915, "grad_norm": 893.337646484375, "learning_rate": 4.6770056171891846e-05, "loss": 75.4777, "step": 61810 }, { "epoch": 0.2497606225026968, "grad_norm": 999.063232421875, "learning_rate": 4.676833983521335e-05, "loss": 133.2376, "step": 61820 }, { "epoch": 0.24980102376806443, "grad_norm": 1203.1588134765625, "learning_rate": 4.676662307414987e-05, "loss": 92.8217, "step": 61830 }, { "epoch": 0.24984142503343204, "grad_norm": 1015.3056030273438, "learning_rate": 4.676490588873486e-05, "loss": 71.3694, "step": 61840 }, { "epoch": 0.24988182629879968, "grad_norm": 504.0675354003906, "learning_rate": 4.6763188279001804e-05, "loss": 37.9027, "step": 61850 }, { "epoch": 0.24992222756416732, "grad_norm": 1208.43310546875, "learning_rate": 4.6761470244984196e-05, "loss": 79.1104, "step": 61860 }, { "epoch": 0.24996262882953493, "grad_norm": 1380.7022705078125, "learning_rate": 4.675975178671551e-05, "loss": 59.1448, "step": 61870 }, { "epoch": 0.25000303009490255, "grad_norm": 348.9476623535156, "learning_rate": 4.675803290422927e-05, "loss": 85.426, "step": 61880 }, { "epoch": 0.2500434313602702, "grad_norm": 709.4434204101562, "learning_rate": 4.6756313597558977e-05, "loss": 52.583, "step": 61890 }, { "epoch": 0.2500838326256378, "grad_norm": 490.9687805175781, "learning_rate": 4.675459386673815e-05, "loss": 84.7173, "step": 61900 }, { "epoch": 0.25012423389100547, "grad_norm": 935.8824462890625, "learning_rate": 4.6752873711800306e-05, "loss": 61.2777, "step": 61910 }, { "epoch": 0.2501646351563731, "grad_norm": 1041.913818359375, "learning_rate": 4.6751153132779e-05, "loss": 90.0399, "step": 61920 }, { "epoch": 0.25020503642174075, "grad_norm": 516.0934448242188, "learning_rate": 4.674943212970776e-05, "loss": 53.4762, "step": 61930 }, { "epoch": 0.2502454376871084, "grad_norm": 1382.7197265625, "learning_rate": 4.674771070262014e-05, "loss": 128.2385, "step": 61940 }, { "epoch": 0.25028583895247597, "grad_norm": 759.5361938476562, "learning_rate": 4.67459888515497e-05, "loss": 56.0513, "step": 61950 }, { "epoch": 0.2503262402178436, "grad_norm": 1789.28466796875, "learning_rate": 4.674426657653003e-05, "loss": 59.7396, "step": 61960 }, { "epoch": 0.25036664148321125, "grad_norm": 456.0893859863281, "learning_rate": 4.6742543877594675e-05, "loss": 42.1051, "step": 61970 }, { "epoch": 0.2504070427485789, "grad_norm": 1111.8485107421875, "learning_rate": 4.6740820754777235e-05, "loss": 62.1872, "step": 61980 }, { "epoch": 0.25044744401394653, "grad_norm": 927.1618041992188, "learning_rate": 4.6739097208111306e-05, "loss": 67.374, "step": 61990 }, { "epoch": 0.25048784527931417, "grad_norm": 898.595458984375, "learning_rate": 4.6737373237630476e-05, "loss": 97.9774, "step": 62000 }, { "epoch": 0.25052824654468175, "grad_norm": 0.0, "learning_rate": 4.6735648843368376e-05, "loss": 87.7824, "step": 62010 }, { "epoch": 0.2505686478100494, "grad_norm": 850.822265625, "learning_rate": 4.6733924025358597e-05, "loss": 58.0294, "step": 62020 }, { "epoch": 0.25060904907541703, "grad_norm": 1727.8907470703125, "learning_rate": 4.673219878363479e-05, "loss": 71.4364, "step": 62030 }, { "epoch": 0.2506494503407847, "grad_norm": 878.0247802734375, "learning_rate": 4.6730473118230575e-05, "loss": 90.9596, "step": 62040 }, { "epoch": 0.2506898516061523, "grad_norm": 1600.9791259765625, "learning_rate": 4.67287470291796e-05, "loss": 64.3531, "step": 62050 }, { "epoch": 0.25073025287151995, "grad_norm": 617.0396118164062, "learning_rate": 4.672702051651552e-05, "loss": 60.0307, "step": 62060 }, { "epoch": 0.2507706541368876, "grad_norm": 583.3132934570312, "learning_rate": 4.672529358027198e-05, "loss": 82.0429, "step": 62070 }, { "epoch": 0.2508110554022552, "grad_norm": 579.264892578125, "learning_rate": 4.6723566220482664e-05, "loss": 71.0199, "step": 62080 }, { "epoch": 0.2508514566676228, "grad_norm": 723.0690307617188, "learning_rate": 4.672183843718123e-05, "loss": 72.1286, "step": 62090 }, { "epoch": 0.25089185793299046, "grad_norm": 543.1493530273438, "learning_rate": 4.672011023040138e-05, "loss": 64.0717, "step": 62100 }, { "epoch": 0.2509322591983581, "grad_norm": 724.9541625976562, "learning_rate": 4.671838160017681e-05, "loss": 63.9457, "step": 62110 }, { "epoch": 0.25097266046372574, "grad_norm": 565.6871948242188, "learning_rate": 4.6716652546541194e-05, "loss": 52.1139, "step": 62120 }, { "epoch": 0.2510130617290934, "grad_norm": 1354.7672119140625, "learning_rate": 4.671492306952826e-05, "loss": 76.9495, "step": 62130 }, { "epoch": 0.25105346299446096, "grad_norm": 636.530029296875, "learning_rate": 4.6713193169171724e-05, "loss": 55.6664, "step": 62140 }, { "epoch": 0.2510938642598286, "grad_norm": 1726.2562255859375, "learning_rate": 4.6711462845505304e-05, "loss": 81.8684, "step": 62150 }, { "epoch": 0.25113426552519624, "grad_norm": 742.2718505859375, "learning_rate": 4.6709732098562745e-05, "loss": 68.1843, "step": 62160 }, { "epoch": 0.2511746667905639, "grad_norm": 725.1959228515625, "learning_rate": 4.670800092837777e-05, "loss": 62.3066, "step": 62170 }, { "epoch": 0.2512150680559315, "grad_norm": 1269.651123046875, "learning_rate": 4.670626933498415e-05, "loss": 75.0705, "step": 62180 }, { "epoch": 0.25125546932129916, "grad_norm": 1048.4793701171875, "learning_rate": 4.670453731841563e-05, "loss": 67.2275, "step": 62190 }, { "epoch": 0.25129587058666675, "grad_norm": 1658.6903076171875, "learning_rate": 4.670280487870598e-05, "loss": 87.6034, "step": 62200 }, { "epoch": 0.2513362718520344, "grad_norm": 706.1924438476562, "learning_rate": 4.670107201588898e-05, "loss": 66.6542, "step": 62210 }, { "epoch": 0.251376673117402, "grad_norm": 1182.62939453125, "learning_rate": 4.669933872999841e-05, "loss": 61.4396, "step": 62220 }, { "epoch": 0.25141707438276967, "grad_norm": 973.4239501953125, "learning_rate": 4.669760502106805e-05, "loss": 49.4658, "step": 62230 }, { "epoch": 0.2514574756481373, "grad_norm": 954.9755859375, "learning_rate": 4.6695870889131724e-05, "loss": 58.9961, "step": 62240 }, { "epoch": 0.25149787691350495, "grad_norm": 983.7535400390625, "learning_rate": 4.669413633422322e-05, "loss": 67.7966, "step": 62250 }, { "epoch": 0.2515382781788726, "grad_norm": 934.3507690429688, "learning_rate": 4.669240135637635e-05, "loss": 64.6385, "step": 62260 }, { "epoch": 0.25157867944424017, "grad_norm": 535.4703369140625, "learning_rate": 4.669066595562496e-05, "loss": 99.6696, "step": 62270 }, { "epoch": 0.2516190807096078, "grad_norm": 887.987060546875, "learning_rate": 4.668893013200286e-05, "loss": 66.7841, "step": 62280 }, { "epoch": 0.25165948197497545, "grad_norm": 1214.275146484375, "learning_rate": 4.66871938855439e-05, "loss": 75.537, "step": 62290 }, { "epoch": 0.2516998832403431, "grad_norm": 572.87158203125, "learning_rate": 4.6685457216281936e-05, "loss": 67.1349, "step": 62300 }, { "epoch": 0.25174028450571073, "grad_norm": 507.9439697265625, "learning_rate": 4.668372012425082e-05, "loss": 79.3798, "step": 62310 }, { "epoch": 0.25178068577107837, "grad_norm": 265.6028747558594, "learning_rate": 4.6681982609484416e-05, "loss": 57.9518, "step": 62320 }, { "epoch": 0.25182108703644596, "grad_norm": 2266.7421875, "learning_rate": 4.6680244672016595e-05, "loss": 74.0739, "step": 62330 }, { "epoch": 0.2518614883018136, "grad_norm": 727.9891967773438, "learning_rate": 4.6678506311881245e-05, "loss": 107.9434, "step": 62340 }, { "epoch": 0.25190188956718124, "grad_norm": 769.7603759765625, "learning_rate": 4.667676752911225e-05, "loss": 62.9798, "step": 62350 }, { "epoch": 0.2519422908325489, "grad_norm": 367.056396484375, "learning_rate": 4.667502832374352e-05, "loss": 76.6132, "step": 62360 }, { "epoch": 0.2519826920979165, "grad_norm": 1385.6729736328125, "learning_rate": 4.667328869580895e-05, "loss": 56.0849, "step": 62370 }, { "epoch": 0.25202309336328416, "grad_norm": 509.8416442871094, "learning_rate": 4.6671548645342456e-05, "loss": 59.8157, "step": 62380 }, { "epoch": 0.2520634946286518, "grad_norm": 1469.747314453125, "learning_rate": 4.666980817237797e-05, "loss": 89.1393, "step": 62390 }, { "epoch": 0.2521038958940194, "grad_norm": 1213.3016357421875, "learning_rate": 4.6668067276949414e-05, "loss": 59.1252, "step": 62400 }, { "epoch": 0.252144297159387, "grad_norm": 990.9741821289062, "learning_rate": 4.666632595909072e-05, "loss": 68.1975, "step": 62410 }, { "epoch": 0.25218469842475466, "grad_norm": 916.1857299804688, "learning_rate": 4.666458421883586e-05, "loss": 66.8495, "step": 62420 }, { "epoch": 0.2522250996901223, "grad_norm": 381.66485595703125, "learning_rate": 4.666284205621877e-05, "loss": 91.8923, "step": 62430 }, { "epoch": 0.25226550095548994, "grad_norm": 894.6453247070312, "learning_rate": 4.666109947127343e-05, "loss": 76.0347, "step": 62440 }, { "epoch": 0.2523059022208576, "grad_norm": 455.5608215332031, "learning_rate": 4.6659356464033795e-05, "loss": 54.6976, "step": 62450 }, { "epoch": 0.25234630348622517, "grad_norm": 581.934814453125, "learning_rate": 4.6657613034533866e-05, "loss": 60.0454, "step": 62460 }, { "epoch": 0.2523867047515928, "grad_norm": 911.0252685546875, "learning_rate": 4.665586918280761e-05, "loss": 102.2392, "step": 62470 }, { "epoch": 0.25242710601696045, "grad_norm": 1507.02685546875, "learning_rate": 4.665412490888904e-05, "loss": 90.9116, "step": 62480 }, { "epoch": 0.2524675072823281, "grad_norm": 2237.4755859375, "learning_rate": 4.6652380212812155e-05, "loss": 93.2901, "step": 62490 }, { "epoch": 0.2525079085476957, "grad_norm": 816.4346923828125, "learning_rate": 4.665063509461097e-05, "loss": 80.2981, "step": 62500 }, { "epoch": 0.25254830981306337, "grad_norm": 769.7588500976562, "learning_rate": 4.6648889554319506e-05, "loss": 55.1148, "step": 62510 }, { "epoch": 0.25258871107843095, "grad_norm": 401.32989501953125, "learning_rate": 4.66471435919718e-05, "loss": 67.0569, "step": 62520 }, { "epoch": 0.2526291123437986, "grad_norm": 888.2326049804688, "learning_rate": 4.6645397207601884e-05, "loss": 78.4589, "step": 62530 }, { "epoch": 0.25266951360916623, "grad_norm": 310.5882568359375, "learning_rate": 4.66436504012438e-05, "loss": 98.2279, "step": 62540 }, { "epoch": 0.25270991487453387, "grad_norm": 984.7472534179688, "learning_rate": 4.664190317293161e-05, "loss": 97.2269, "step": 62550 }, { "epoch": 0.2527503161399015, "grad_norm": 793.189697265625, "learning_rate": 4.6640155522699374e-05, "loss": 127.4911, "step": 62560 }, { "epoch": 0.25279071740526915, "grad_norm": 2629.53466796875, "learning_rate": 4.6638407450581165e-05, "loss": 95.0568, "step": 62570 }, { "epoch": 0.2528311186706368, "grad_norm": 380.42779541015625, "learning_rate": 4.663665895661107e-05, "loss": 81.795, "step": 62580 }, { "epoch": 0.2528715199360044, "grad_norm": 690.9551391601562, "learning_rate": 4.663491004082316e-05, "loss": 50.0963, "step": 62590 }, { "epoch": 0.252911921201372, "grad_norm": 1449.210693359375, "learning_rate": 4.6633160703251554e-05, "loss": 65.8343, "step": 62600 }, { "epoch": 0.25295232246673965, "grad_norm": 1021.6344604492188, "learning_rate": 4.6631410943930334e-05, "loss": 107.6034, "step": 62610 }, { "epoch": 0.2529927237321073, "grad_norm": 869.527099609375, "learning_rate": 4.662966076289362e-05, "loss": 102.0092, "step": 62620 }, { "epoch": 0.25303312499747493, "grad_norm": 584.1009521484375, "learning_rate": 4.662791016017554e-05, "loss": 69.7825, "step": 62630 }, { "epoch": 0.2530735262628426, "grad_norm": 802.0068359375, "learning_rate": 4.6626159135810205e-05, "loss": 72.057, "step": 62640 }, { "epoch": 0.25311392752821016, "grad_norm": 2002.407958984375, "learning_rate": 4.662440768983177e-05, "loss": 76.6827, "step": 62650 }, { "epoch": 0.2531543287935778, "grad_norm": 573.6917724609375, "learning_rate": 4.662265582227438e-05, "loss": 116.2082, "step": 62660 }, { "epoch": 0.25319473005894544, "grad_norm": 1771.079345703125, "learning_rate": 4.662090353317217e-05, "loss": 81.9311, "step": 62670 }, { "epoch": 0.2532351313243131, "grad_norm": 1918.1868896484375, "learning_rate": 4.661915082255932e-05, "loss": 109.1544, "step": 62680 }, { "epoch": 0.2532755325896807, "grad_norm": 1021.0386962890625, "learning_rate": 4.661739769047e-05, "loss": 71.0243, "step": 62690 }, { "epoch": 0.25331593385504836, "grad_norm": 1025.5999755859375, "learning_rate": 4.6615644136938375e-05, "loss": 69.5765, "step": 62700 }, { "epoch": 0.253356335120416, "grad_norm": 441.26617431640625, "learning_rate": 4.661389016199864e-05, "loss": 48.4432, "step": 62710 }, { "epoch": 0.2533967363857836, "grad_norm": 2817.1181640625, "learning_rate": 4.6612135765685e-05, "loss": 43.8955, "step": 62720 }, { "epoch": 0.2534371376511512, "grad_norm": 539.3783569335938, "learning_rate": 4.6610380948031627e-05, "loss": 63.5467, "step": 62730 }, { "epoch": 0.25347753891651886, "grad_norm": 735.4268188476562, "learning_rate": 4.660862570907277e-05, "loss": 107.1801, "step": 62740 }, { "epoch": 0.2535179401818865, "grad_norm": 1085.6962890625, "learning_rate": 4.6606870048842624e-05, "loss": 88.6567, "step": 62750 }, { "epoch": 0.25355834144725414, "grad_norm": 488.97369384765625, "learning_rate": 4.660511396737541e-05, "loss": 74.2245, "step": 62760 }, { "epoch": 0.2535987427126218, "grad_norm": 3399.495361328125, "learning_rate": 4.660335746470539e-05, "loss": 188.06, "step": 62770 }, { "epoch": 0.25363914397798937, "grad_norm": 515.9884033203125, "learning_rate": 4.6601600540866794e-05, "loss": 61.2756, "step": 62780 }, { "epoch": 0.253679545243357, "grad_norm": 328.14276123046875, "learning_rate": 4.659984319589387e-05, "loss": 66.6262, "step": 62790 }, { "epoch": 0.25371994650872465, "grad_norm": 1215.5234375, "learning_rate": 4.659808542982088e-05, "loss": 107.4222, "step": 62800 }, { "epoch": 0.2537603477740923, "grad_norm": 1078.8668212890625, "learning_rate": 4.65963272426821e-05, "loss": 101.8976, "step": 62810 }, { "epoch": 0.2538007490394599, "grad_norm": 620.238037109375, "learning_rate": 4.659456863451181e-05, "loss": 49.1838, "step": 62820 }, { "epoch": 0.25384115030482757, "grad_norm": 357.8902587890625, "learning_rate": 4.6592809605344276e-05, "loss": 76.9179, "step": 62830 }, { "epoch": 0.25388155157019515, "grad_norm": 763.6502075195312, "learning_rate": 4.65910501552138e-05, "loss": 68.4214, "step": 62840 }, { "epoch": 0.2539219528355628, "grad_norm": 650.4541015625, "learning_rate": 4.658929028415468e-05, "loss": 67.5361, "step": 62850 }, { "epoch": 0.25396235410093043, "grad_norm": 1105.059814453125, "learning_rate": 4.658752999220125e-05, "loss": 107.3979, "step": 62860 }, { "epoch": 0.25400275536629807, "grad_norm": 1753.42822265625, "learning_rate": 4.65857692793878e-05, "loss": 59.3314, "step": 62870 }, { "epoch": 0.2540431566316657, "grad_norm": 610.6260375976562, "learning_rate": 4.6584008145748656e-05, "loss": 75.3817, "step": 62880 }, { "epoch": 0.25408355789703335, "grad_norm": 815.94482421875, "learning_rate": 4.6582246591318175e-05, "loss": 49.2156, "step": 62890 }, { "epoch": 0.254123959162401, "grad_norm": 802.24072265625, "learning_rate": 4.658048461613068e-05, "loss": 84.062, "step": 62900 }, { "epoch": 0.2541643604277686, "grad_norm": 517.5693969726562, "learning_rate": 4.6578722220220525e-05, "loss": 101.054, "step": 62910 }, { "epoch": 0.2542047616931362, "grad_norm": 948.2103271484375, "learning_rate": 4.657695940362207e-05, "loss": 78.7945, "step": 62920 }, { "epoch": 0.25424516295850386, "grad_norm": 751.4588623046875, "learning_rate": 4.657519616636968e-05, "loss": 68.9439, "step": 62930 }, { "epoch": 0.2542855642238715, "grad_norm": 209.2197723388672, "learning_rate": 4.6573432508497735e-05, "loss": 68.4884, "step": 62940 }, { "epoch": 0.25432596548923914, "grad_norm": 793.4098510742188, "learning_rate": 4.6571668430040625e-05, "loss": 97.053, "step": 62950 }, { "epoch": 0.2543663667546068, "grad_norm": 606.1543579101562, "learning_rate": 4.6569903931032735e-05, "loss": 69.4006, "step": 62960 }, { "epoch": 0.25440676801997436, "grad_norm": 757.6336059570312, "learning_rate": 4.656813901150845e-05, "loss": 79.5474, "step": 62970 }, { "epoch": 0.254447169285342, "grad_norm": 866.605712890625, "learning_rate": 4.6566373671502196e-05, "loss": 56.2643, "step": 62980 }, { "epoch": 0.25448757055070964, "grad_norm": 121.89437866210938, "learning_rate": 4.656460791104839e-05, "loss": 48.1909, "step": 62990 }, { "epoch": 0.2545279718160773, "grad_norm": 1390.025390625, "learning_rate": 4.656284173018144e-05, "loss": 107.6447, "step": 63000 }, { "epoch": 0.2545683730814449, "grad_norm": 1153.6959228515625, "learning_rate": 4.656107512893579e-05, "loss": 75.1273, "step": 63010 }, { "epoch": 0.25460877434681256, "grad_norm": 441.3359069824219, "learning_rate": 4.655930810734589e-05, "loss": 46.4667, "step": 63020 }, { "epoch": 0.2546491756121802, "grad_norm": 914.875732421875, "learning_rate": 4.655754066544617e-05, "loss": 58.5283, "step": 63030 }, { "epoch": 0.2546895768775478, "grad_norm": 1101.917236328125, "learning_rate": 4.65557728032711e-05, "loss": 84.4799, "step": 63040 }, { "epoch": 0.2547299781429154, "grad_norm": 985.5469360351562, "learning_rate": 4.655400452085514e-05, "loss": 63.2999, "step": 63050 }, { "epoch": 0.25477037940828307, "grad_norm": 596.2723999023438, "learning_rate": 4.6552235818232764e-05, "loss": 72.8403, "step": 63060 }, { "epoch": 0.2548107806736507, "grad_norm": 654.357666015625, "learning_rate": 4.655046669543845e-05, "loss": 57.4079, "step": 63070 }, { "epoch": 0.25485118193901835, "grad_norm": 634.196044921875, "learning_rate": 4.6548697152506705e-05, "loss": 80.194, "step": 63080 }, { "epoch": 0.254891583204386, "grad_norm": 701.0132446289062, "learning_rate": 4.6546927189472014e-05, "loss": 46.6557, "step": 63090 }, { "epoch": 0.25493198446975357, "grad_norm": 1064.72412109375, "learning_rate": 4.654515680636888e-05, "loss": 73.7024, "step": 63100 }, { "epoch": 0.2549723857351212, "grad_norm": 967.7996826171875, "learning_rate": 4.654338600323182e-05, "loss": 62.9957, "step": 63110 }, { "epoch": 0.25501278700048885, "grad_norm": 533.4601440429688, "learning_rate": 4.654161478009536e-05, "loss": 63.6593, "step": 63120 }, { "epoch": 0.2550531882658565, "grad_norm": 588.319580078125, "learning_rate": 4.6539843136994036e-05, "loss": 60.8479, "step": 63130 }, { "epoch": 0.25509358953122413, "grad_norm": 1351.3424072265625, "learning_rate": 4.653807107396237e-05, "loss": 87.2913, "step": 63140 }, { "epoch": 0.25513399079659177, "grad_norm": 780.9151000976562, "learning_rate": 4.653629859103492e-05, "loss": 64.7784, "step": 63150 }, { "epoch": 0.25517439206195935, "grad_norm": 618.2050170898438, "learning_rate": 4.653452568824625e-05, "loss": 44.7988, "step": 63160 }, { "epoch": 0.255214793327327, "grad_norm": 295.4721984863281, "learning_rate": 4.653275236563091e-05, "loss": 83.7096, "step": 63170 }, { "epoch": 0.25525519459269463, "grad_norm": 567.467041015625, "learning_rate": 4.653097862322348e-05, "loss": 47.7101, "step": 63180 }, { "epoch": 0.2552955958580623, "grad_norm": 1230.712646484375, "learning_rate": 4.652920446105853e-05, "loss": 82.6227, "step": 63190 }, { "epoch": 0.2553359971234299, "grad_norm": 1003.6646118164062, "learning_rate": 4.652742987917066e-05, "loss": 82.4499, "step": 63200 }, { "epoch": 0.25537639838879755, "grad_norm": 488.0013122558594, "learning_rate": 4.652565487759446e-05, "loss": 69.6767, "step": 63210 }, { "epoch": 0.2554167996541652, "grad_norm": 423.2750244140625, "learning_rate": 4.652387945636454e-05, "loss": 99.7746, "step": 63220 }, { "epoch": 0.2554572009195328, "grad_norm": 531.3796997070312, "learning_rate": 4.65221036155155e-05, "loss": 65.6895, "step": 63230 }, { "epoch": 0.2554976021849004, "grad_norm": 664.51513671875, "learning_rate": 4.652032735508198e-05, "loss": 76.6833, "step": 63240 }, { "epoch": 0.25553800345026806, "grad_norm": 1186.4224853515625, "learning_rate": 4.65185506750986e-05, "loss": 75.191, "step": 63250 }, { "epoch": 0.2555784047156357, "grad_norm": 1025.0635986328125, "learning_rate": 4.651677357559998e-05, "loss": 79.5044, "step": 63260 }, { "epoch": 0.25561880598100334, "grad_norm": 1971.0733642578125, "learning_rate": 4.65149960566208e-05, "loss": 78.158, "step": 63270 }, { "epoch": 0.255659207246371, "grad_norm": 601.2109375, "learning_rate": 4.651321811819568e-05, "loss": 109.8638, "step": 63280 }, { "epoch": 0.25569960851173856, "grad_norm": 926.903076171875, "learning_rate": 4.65114397603593e-05, "loss": 67.0008, "step": 63290 }, { "epoch": 0.2557400097771062, "grad_norm": 543.5036010742188, "learning_rate": 4.6509660983146334e-05, "loss": 90.5155, "step": 63300 }, { "epoch": 0.25578041104247384, "grad_norm": 1474.077392578125, "learning_rate": 4.650788178659146e-05, "loss": 76.4021, "step": 63310 }, { "epoch": 0.2558208123078415, "grad_norm": 1020.8681030273438, "learning_rate": 4.650610217072934e-05, "loss": 78.0826, "step": 63320 }, { "epoch": 0.2558612135732091, "grad_norm": 963.8372192382812, "learning_rate": 4.650432213559469e-05, "loss": 63.825, "step": 63330 }, { "epoch": 0.25590161483857676, "grad_norm": 1577.4927978515625, "learning_rate": 4.650254168122222e-05, "loss": 76.4572, "step": 63340 }, { "epoch": 0.2559420161039444, "grad_norm": 985.75439453125, "learning_rate": 4.650076080764662e-05, "loss": 45.7849, "step": 63350 }, { "epoch": 0.255982417369312, "grad_norm": 651.5304565429688, "learning_rate": 4.649897951490262e-05, "loss": 86.52, "step": 63360 }, { "epoch": 0.2560228186346796, "grad_norm": 1333.999267578125, "learning_rate": 4.649719780302495e-05, "loss": 59.2886, "step": 63370 }, { "epoch": 0.25606321990004727, "grad_norm": 926.7659912109375, "learning_rate": 4.649541567204834e-05, "loss": 91.8786, "step": 63380 }, { "epoch": 0.2561036211654149, "grad_norm": 720.6255493164062, "learning_rate": 4.649363312200753e-05, "loss": 76.3271, "step": 63390 }, { "epoch": 0.25614402243078255, "grad_norm": 0.0, "learning_rate": 4.649185015293728e-05, "loss": 68.2173, "step": 63400 }, { "epoch": 0.2561844236961502, "grad_norm": 905.1610717773438, "learning_rate": 4.649006676487234e-05, "loss": 72.3964, "step": 63410 }, { "epoch": 0.25622482496151777, "grad_norm": 1136.3037109375, "learning_rate": 4.6488282957847494e-05, "loss": 102.3837, "step": 63420 }, { "epoch": 0.2562652262268854, "grad_norm": 1587.19921875, "learning_rate": 4.648649873189751e-05, "loss": 70.8869, "step": 63430 }, { "epoch": 0.25630562749225305, "grad_norm": 683.1766967773438, "learning_rate": 4.648471408705717e-05, "loss": 78.3768, "step": 63440 }, { "epoch": 0.2563460287576207, "grad_norm": 719.8806762695312, "learning_rate": 4.648292902336126e-05, "loss": 81.2503, "step": 63450 }, { "epoch": 0.25638643002298833, "grad_norm": 1979.6317138671875, "learning_rate": 4.648114354084459e-05, "loss": 129.6083, "step": 63460 }, { "epoch": 0.25642683128835597, "grad_norm": 565.4490356445312, "learning_rate": 4.647935763954198e-05, "loss": 78.8221, "step": 63470 }, { "epoch": 0.25646723255372356, "grad_norm": 446.4573669433594, "learning_rate": 4.647757131948822e-05, "loss": 103.6806, "step": 63480 }, { "epoch": 0.2565076338190912, "grad_norm": 1135.3199462890625, "learning_rate": 4.6475784580718155e-05, "loss": 97.5097, "step": 63490 }, { "epoch": 0.25654803508445884, "grad_norm": 725.8003540039062, "learning_rate": 4.6473997423266614e-05, "loss": 65.7066, "step": 63500 }, { "epoch": 0.2565884363498265, "grad_norm": 844.005859375, "learning_rate": 4.6472209847168435e-05, "loss": 75.9603, "step": 63510 }, { "epoch": 0.2566288376151941, "grad_norm": 555.097412109375, "learning_rate": 4.647042185245847e-05, "loss": 68.8893, "step": 63520 }, { "epoch": 0.25666923888056176, "grad_norm": 434.0204772949219, "learning_rate": 4.646863343917158e-05, "loss": 82.4939, "step": 63530 }, { "epoch": 0.2567096401459294, "grad_norm": 918.038818359375, "learning_rate": 4.646684460734263e-05, "loss": 61.4218, "step": 63540 }, { "epoch": 0.256750041411297, "grad_norm": 617.525390625, "learning_rate": 4.646505535700649e-05, "loss": 55.3601, "step": 63550 }, { "epoch": 0.2567904426766646, "grad_norm": 864.6348266601562, "learning_rate": 4.6463265688198044e-05, "loss": 76.1844, "step": 63560 }, { "epoch": 0.25683084394203226, "grad_norm": 2091.900390625, "learning_rate": 4.6461475600952184e-05, "loss": 76.1381, "step": 63570 }, { "epoch": 0.2568712452073999, "grad_norm": 1115.5269775390625, "learning_rate": 4.645968509530381e-05, "loss": 69.8563, "step": 63580 }, { "epoch": 0.25691164647276754, "grad_norm": 808.4495849609375, "learning_rate": 4.645789417128783e-05, "loss": 65.0377, "step": 63590 }, { "epoch": 0.2569520477381352, "grad_norm": 867.1176147460938, "learning_rate": 4.645610282893915e-05, "loss": 78.228, "step": 63600 }, { "epoch": 0.25699244900350277, "grad_norm": 452.4241943359375, "learning_rate": 4.64543110682927e-05, "loss": 80.4785, "step": 63610 }, { "epoch": 0.2570328502688704, "grad_norm": 626.6029663085938, "learning_rate": 4.6452518889383414e-05, "loss": 68.1861, "step": 63620 }, { "epoch": 0.25707325153423805, "grad_norm": 561.9072265625, "learning_rate": 4.645072629224622e-05, "loss": 68.4941, "step": 63630 }, { "epoch": 0.2571136527996057, "grad_norm": 495.2870788574219, "learning_rate": 4.6448933276916076e-05, "loss": 78.5616, "step": 63640 }, { "epoch": 0.2571540540649733, "grad_norm": 757.0482788085938, "learning_rate": 4.644713984342793e-05, "loss": 61.9383, "step": 63650 }, { "epoch": 0.25719445533034097, "grad_norm": 666.8660278320312, "learning_rate": 4.644534599181677e-05, "loss": 60.4049, "step": 63660 }, { "epoch": 0.2572348565957086, "grad_norm": 487.714111328125, "learning_rate": 4.644355172211753e-05, "loss": 65.7735, "step": 63670 }, { "epoch": 0.2572752578610762, "grad_norm": 403.6894836425781, "learning_rate": 4.644175703436522e-05, "loss": 74.5089, "step": 63680 }, { "epoch": 0.25731565912644383, "grad_norm": 806.9922485351562, "learning_rate": 4.643996192859481e-05, "loss": 47.5238, "step": 63690 }, { "epoch": 0.25735606039181147, "grad_norm": 1357.157470703125, "learning_rate": 4.643816640484131e-05, "loss": 86.1968, "step": 63700 }, { "epoch": 0.2573964616571791, "grad_norm": 1357.3590087890625, "learning_rate": 4.643637046313972e-05, "loss": 82.4154, "step": 63710 }, { "epoch": 0.25743686292254675, "grad_norm": 1421.4974365234375, "learning_rate": 4.6434574103525044e-05, "loss": 91.1663, "step": 63720 }, { "epoch": 0.2574772641879144, "grad_norm": 1042.117431640625, "learning_rate": 4.6432777326032316e-05, "loss": 86.4203, "step": 63730 }, { "epoch": 0.257517665453282, "grad_norm": 1068.4251708984375, "learning_rate": 4.6430980130696555e-05, "loss": 79.4296, "step": 63740 }, { "epoch": 0.2575580667186496, "grad_norm": 709.8405151367188, "learning_rate": 4.642918251755281e-05, "loss": 59.4794, "step": 63750 }, { "epoch": 0.25759846798401725, "grad_norm": 752.4713745117188, "learning_rate": 4.6427384486636113e-05, "loss": 80.5621, "step": 63760 }, { "epoch": 0.2576388692493849, "grad_norm": 500.284423828125, "learning_rate": 4.6425586037981526e-05, "loss": 70.1939, "step": 63770 }, { "epoch": 0.25767927051475253, "grad_norm": 1230.3875732421875, "learning_rate": 4.6423787171624114e-05, "loss": 93.3144, "step": 63780 }, { "epoch": 0.2577196717801202, "grad_norm": 375.65838623046875, "learning_rate": 4.642198788759894e-05, "loss": 83.3093, "step": 63790 }, { "epoch": 0.25776007304548776, "grad_norm": 1165.2003173828125, "learning_rate": 4.642018818594107e-05, "loss": 67.3937, "step": 63800 }, { "epoch": 0.2578004743108554, "grad_norm": 1046.146728515625, "learning_rate": 4.641838806668562e-05, "loss": 72.0059, "step": 63810 }, { "epoch": 0.25784087557622304, "grad_norm": 695.942138671875, "learning_rate": 4.6416587529867664e-05, "loss": 54.245, "step": 63820 }, { "epoch": 0.2578812768415907, "grad_norm": 1145.5577392578125, "learning_rate": 4.6414786575522306e-05, "loss": 88.9208, "step": 63830 }, { "epoch": 0.2579216781069583, "grad_norm": 453.61181640625, "learning_rate": 4.6412985203684654e-05, "loss": 66.1485, "step": 63840 }, { "epoch": 0.25796207937232596, "grad_norm": 765.479248046875, "learning_rate": 4.6411183414389837e-05, "loss": 74.9149, "step": 63850 }, { "epoch": 0.2580024806376936, "grad_norm": 794.6134643554688, "learning_rate": 4.6409381207672974e-05, "loss": 58.8936, "step": 63860 }, { "epoch": 0.2580428819030612, "grad_norm": 582.8801879882812, "learning_rate": 4.64075785835692e-05, "loss": 95.4306, "step": 63870 }, { "epoch": 0.2580832831684288, "grad_norm": 1164.59619140625, "learning_rate": 4.640577554211366e-05, "loss": 64.1848, "step": 63880 }, { "epoch": 0.25812368443379646, "grad_norm": 1310.0181884765625, "learning_rate": 4.640397208334151e-05, "loss": 63.126, "step": 63890 }, { "epoch": 0.2581640856991641, "grad_norm": 571.4081420898438, "learning_rate": 4.64021682072879e-05, "loss": 42.7484, "step": 63900 }, { "epoch": 0.25820448696453174, "grad_norm": 726.766845703125, "learning_rate": 4.640036391398801e-05, "loss": 43.3208, "step": 63910 }, { "epoch": 0.2582448882298994, "grad_norm": 754.6783447265625, "learning_rate": 4.639855920347701e-05, "loss": 110.9717, "step": 63920 }, { "epoch": 0.25828528949526697, "grad_norm": 1137.88818359375, "learning_rate": 4.639675407579007e-05, "loss": 70.8244, "step": 63930 }, { "epoch": 0.2583256907606346, "grad_norm": 508.732421875, "learning_rate": 4.6394948530962396e-05, "loss": 55.6226, "step": 63940 }, { "epoch": 0.25836609202600225, "grad_norm": 405.8502197265625, "learning_rate": 4.639314256902919e-05, "loss": 51.9442, "step": 63950 }, { "epoch": 0.2584064932913699, "grad_norm": 941.8868408203125, "learning_rate": 4.6391336190025644e-05, "loss": 97.1299, "step": 63960 }, { "epoch": 0.2584468945567375, "grad_norm": 368.6508483886719, "learning_rate": 4.6389529393987e-05, "loss": 52.9645, "step": 63970 }, { "epoch": 0.25848729582210517, "grad_norm": 846.4256591796875, "learning_rate": 4.638772218094847e-05, "loss": 76.5249, "step": 63980 }, { "epoch": 0.25852769708747275, "grad_norm": 552.8169555664062, "learning_rate": 4.638591455094527e-05, "loss": 58.1772, "step": 63990 }, { "epoch": 0.2585680983528404, "grad_norm": 1205.1046142578125, "learning_rate": 4.638410650401267e-05, "loss": 59.0313, "step": 64000 }, { "epoch": 0.25860849961820803, "grad_norm": 979.0299682617188, "learning_rate": 4.63822980401859e-05, "loss": 75.0273, "step": 64010 }, { "epoch": 0.25864890088357567, "grad_norm": 1404.583740234375, "learning_rate": 4.638048915950022e-05, "loss": 77.1647, "step": 64020 }, { "epoch": 0.2586893021489433, "grad_norm": 1869.6007080078125, "learning_rate": 4.637867986199089e-05, "loss": 57.802, "step": 64030 }, { "epoch": 0.25872970341431095, "grad_norm": 502.42138671875, "learning_rate": 4.6376870147693196e-05, "loss": 62.1255, "step": 64040 }, { "epoch": 0.2587701046796786, "grad_norm": 700.3970947265625, "learning_rate": 4.6375060016642415e-05, "loss": 72.0223, "step": 64050 }, { "epoch": 0.2588105059450462, "grad_norm": 1100.4659423828125, "learning_rate": 4.6373249468873833e-05, "loss": 84.0306, "step": 64060 }, { "epoch": 0.2588509072104138, "grad_norm": 818.85595703125, "learning_rate": 4.637143850442275e-05, "loss": 82.6929, "step": 64070 }, { "epoch": 0.25889130847578146, "grad_norm": 373.8831787109375, "learning_rate": 4.6369627123324465e-05, "loss": 65.1804, "step": 64080 }, { "epoch": 0.2589317097411491, "grad_norm": 932.3283081054688, "learning_rate": 4.6367815325614306e-05, "loss": 72.5139, "step": 64090 }, { "epoch": 0.25897211100651674, "grad_norm": 1563.8875732421875, "learning_rate": 4.636600311132758e-05, "loss": 100.7779, "step": 64100 }, { "epoch": 0.2590125122718844, "grad_norm": 1865.01220703125, "learning_rate": 4.6364190480499624e-05, "loss": 105.2925, "step": 64110 }, { "epoch": 0.25905291353725196, "grad_norm": 947.768798828125, "learning_rate": 4.636237743316578e-05, "loss": 58.8598, "step": 64120 }, { "epoch": 0.2590933148026196, "grad_norm": 663.779052734375, "learning_rate": 4.636056396936138e-05, "loss": 97.7154, "step": 64130 }, { "epoch": 0.25913371606798724, "grad_norm": 814.3782958984375, "learning_rate": 4.6358750089121795e-05, "loss": 72.49, "step": 64140 }, { "epoch": 0.2591741173333549, "grad_norm": 634.1887817382812, "learning_rate": 4.635693579248238e-05, "loss": 64.081, "step": 64150 }, { "epoch": 0.2592145185987225, "grad_norm": 1665.0477294921875, "learning_rate": 4.635512107947851e-05, "loss": 60.826, "step": 64160 }, { "epoch": 0.25925491986409016, "grad_norm": 587.009765625, "learning_rate": 4.635330595014555e-05, "loss": 62.3891, "step": 64170 }, { "epoch": 0.2592953211294578, "grad_norm": 1239.5069580078125, "learning_rate": 4.635149040451891e-05, "loss": 83.9093, "step": 64180 }, { "epoch": 0.2593357223948254, "grad_norm": 743.6141357421875, "learning_rate": 4.634967444263397e-05, "loss": 111.2367, "step": 64190 }, { "epoch": 0.259376123660193, "grad_norm": 848.8331909179688, "learning_rate": 4.6347858064526125e-05, "loss": 56.9608, "step": 64200 }, { "epoch": 0.25941652492556067, "grad_norm": 628.3904418945312, "learning_rate": 4.6346041270230804e-05, "loss": 72.7587, "step": 64210 }, { "epoch": 0.2594569261909283, "grad_norm": 849.9680786132812, "learning_rate": 4.634422405978342e-05, "loss": 47.3868, "step": 64220 }, { "epoch": 0.25949732745629595, "grad_norm": 474.90264892578125, "learning_rate": 4.6342406433219394e-05, "loss": 102.6614, "step": 64230 }, { "epoch": 0.2595377287216636, "grad_norm": 2507.705322265625, "learning_rate": 4.634058839057417e-05, "loss": 70.4811, "step": 64240 }, { "epoch": 0.25957812998703117, "grad_norm": 1063.095458984375, "learning_rate": 4.6338769931883185e-05, "loss": 78.3297, "step": 64250 }, { "epoch": 0.2596185312523988, "grad_norm": 622.4473266601562, "learning_rate": 4.63369510571819e-05, "loss": 77.0825, "step": 64260 }, { "epoch": 0.25965893251776645, "grad_norm": 674.91455078125, "learning_rate": 4.633513176650577e-05, "loss": 60.1044, "step": 64270 }, { "epoch": 0.2596993337831341, "grad_norm": 857.1925659179688, "learning_rate": 4.6333312059890256e-05, "loss": 75.2282, "step": 64280 }, { "epoch": 0.25973973504850173, "grad_norm": 1186.2684326171875, "learning_rate": 4.633149193737084e-05, "loss": 90.6651, "step": 64290 }, { "epoch": 0.25978013631386937, "grad_norm": 503.4192810058594, "learning_rate": 4.632967139898301e-05, "loss": 52.2867, "step": 64300 }, { "epoch": 0.25982053757923695, "grad_norm": 0.0, "learning_rate": 4.632785044476225e-05, "loss": 54.2169, "step": 64310 }, { "epoch": 0.2598609388446046, "grad_norm": 808.451904296875, "learning_rate": 4.6326029074744074e-05, "loss": 89.7694, "step": 64320 }, { "epoch": 0.25990134010997223, "grad_norm": 3421.571533203125, "learning_rate": 4.6324207288963974e-05, "loss": 94.0056, "step": 64330 }, { "epoch": 0.2599417413753399, "grad_norm": 300.9707336425781, "learning_rate": 4.632238508745748e-05, "loss": 72.748, "step": 64340 }, { "epoch": 0.2599821426407075, "grad_norm": 1028.27197265625, "learning_rate": 4.632056247026011e-05, "loss": 66.6853, "step": 64350 }, { "epoch": 0.26002254390607515, "grad_norm": 730.0326538085938, "learning_rate": 4.63187394374074e-05, "loss": 90.4567, "step": 64360 }, { "epoch": 0.2600629451714428, "grad_norm": 859.37890625, "learning_rate": 4.6316915988934884e-05, "loss": 76.2773, "step": 64370 }, { "epoch": 0.2601033464368104, "grad_norm": 956.5285034179688, "learning_rate": 4.631509212487811e-05, "loss": 70.2754, "step": 64380 }, { "epoch": 0.260143747702178, "grad_norm": 627.6688232421875, "learning_rate": 4.6313267845272656e-05, "loss": 61.4442, "step": 64390 }, { "epoch": 0.26018414896754566, "grad_norm": 716.159423828125, "learning_rate": 4.631144315015407e-05, "loss": 72.7093, "step": 64400 }, { "epoch": 0.2602245502329133, "grad_norm": 447.9820861816406, "learning_rate": 4.630961803955792e-05, "loss": 66.9453, "step": 64410 }, { "epoch": 0.26026495149828094, "grad_norm": 588.1676025390625, "learning_rate": 4.63077925135198e-05, "loss": 44.5305, "step": 64420 }, { "epoch": 0.2603053527636486, "grad_norm": 594.3401489257812, "learning_rate": 4.6305966572075286e-05, "loss": 60.6098, "step": 64430 }, { "epoch": 0.26034575402901616, "grad_norm": 1224.4241943359375, "learning_rate": 4.630414021525999e-05, "loss": 95.3115, "step": 64440 }, { "epoch": 0.2603861552943838, "grad_norm": 1307.3175048828125, "learning_rate": 4.6302313443109526e-05, "loss": 59.6623, "step": 64450 }, { "epoch": 0.26042655655975144, "grad_norm": 667.8453979492188, "learning_rate": 4.6300486255659484e-05, "loss": 68.9579, "step": 64460 }, { "epoch": 0.2604669578251191, "grad_norm": 648.3145141601562, "learning_rate": 4.6298658652945494e-05, "loss": 72.2152, "step": 64470 }, { "epoch": 0.2605073590904867, "grad_norm": 1166.9095458984375, "learning_rate": 4.629683063500319e-05, "loss": 56.1489, "step": 64480 }, { "epoch": 0.26054776035585436, "grad_norm": 856.700927734375, "learning_rate": 4.629500220186821e-05, "loss": 92.3313, "step": 64490 }, { "epoch": 0.260588161621222, "grad_norm": 1207.9443359375, "learning_rate": 4.629317335357619e-05, "loss": 62.1099, "step": 64500 }, { "epoch": 0.2606285628865896, "grad_norm": 618.7341918945312, "learning_rate": 4.6291344090162804e-05, "loss": 89.1145, "step": 64510 }, { "epoch": 0.2606689641519572, "grad_norm": 1234.6435546875, "learning_rate": 4.62895144116637e-05, "loss": 81.7492, "step": 64520 }, { "epoch": 0.26070936541732487, "grad_norm": 1199.2274169921875, "learning_rate": 4.628768431811455e-05, "loss": 93.2701, "step": 64530 }, { "epoch": 0.2607497666826925, "grad_norm": 787.503662109375, "learning_rate": 4.6285853809551036e-05, "loss": 67.4107, "step": 64540 }, { "epoch": 0.26079016794806015, "grad_norm": 1185.4876708984375, "learning_rate": 4.6284022886008836e-05, "loss": 137.017, "step": 64550 }, { "epoch": 0.2608305692134278, "grad_norm": 1550.175048828125, "learning_rate": 4.628219154752367e-05, "loss": 105.3924, "step": 64560 }, { "epoch": 0.26087097047879537, "grad_norm": 474.1405334472656, "learning_rate": 4.628035979413121e-05, "loss": 63.6596, "step": 64570 }, { "epoch": 0.260911371744163, "grad_norm": 631.3251953125, "learning_rate": 4.627852762586718e-05, "loss": 88.0774, "step": 64580 }, { "epoch": 0.26095177300953065, "grad_norm": 932.6629638671875, "learning_rate": 4.627669504276731e-05, "loss": 69.9022, "step": 64590 }, { "epoch": 0.2609921742748983, "grad_norm": 462.3409423828125, "learning_rate": 4.6274862044867304e-05, "loss": 105.5615, "step": 64600 }, { "epoch": 0.26103257554026593, "grad_norm": 421.7193908691406, "learning_rate": 4.627302863220291e-05, "loss": 83.5629, "step": 64610 }, { "epoch": 0.26107297680563357, "grad_norm": 849.374267578125, "learning_rate": 4.627119480480987e-05, "loss": 108.5556, "step": 64620 }, { "epoch": 0.26111337807100116, "grad_norm": 851.120849609375, "learning_rate": 4.626936056272394e-05, "loss": 76.4113, "step": 64630 }, { "epoch": 0.2611537793363688, "grad_norm": 587.6618041992188, "learning_rate": 4.626752590598088e-05, "loss": 97.3091, "step": 64640 }, { "epoch": 0.26119418060173644, "grad_norm": 1624.475341796875, "learning_rate": 4.6265690834616446e-05, "loss": 66.4535, "step": 64650 }, { "epoch": 0.2612345818671041, "grad_norm": 194.85707092285156, "learning_rate": 4.626385534866642e-05, "loss": 51.6759, "step": 64660 }, { "epoch": 0.2612749831324717, "grad_norm": 1180.03369140625, "learning_rate": 4.626201944816659e-05, "loss": 51.8923, "step": 64670 }, { "epoch": 0.26131538439783936, "grad_norm": 951.8189697265625, "learning_rate": 4.626018313315275e-05, "loss": 76.8008, "step": 64680 }, { "epoch": 0.261355785663207, "grad_norm": 894.181396484375, "learning_rate": 4.625834640366068e-05, "loss": 93.1069, "step": 64690 }, { "epoch": 0.2613961869285746, "grad_norm": 575.2242431640625, "learning_rate": 4.625650925972622e-05, "loss": 85.9263, "step": 64700 }, { "epoch": 0.2614365881939422, "grad_norm": 925.6211547851562, "learning_rate": 4.6254671701385154e-05, "loss": 69.3511, "step": 64710 }, { "epoch": 0.26147698945930986, "grad_norm": 588.945068359375, "learning_rate": 4.625283372867333e-05, "loss": 36.42, "step": 64720 }, { "epoch": 0.2615173907246775, "grad_norm": 1043.0775146484375, "learning_rate": 4.625099534162656e-05, "loss": 81.0917, "step": 64730 }, { "epoch": 0.26155779199004514, "grad_norm": 1658.7032470703125, "learning_rate": 4.62491565402807e-05, "loss": 62.2384, "step": 64740 }, { "epoch": 0.2615981932554128, "grad_norm": 1211.529052734375, "learning_rate": 4.6247317324671605e-05, "loss": 74.7915, "step": 64750 }, { "epoch": 0.26163859452078037, "grad_norm": 1145.8216552734375, "learning_rate": 4.6245477694835106e-05, "loss": 52.6177, "step": 64760 }, { "epoch": 0.261678995786148, "grad_norm": 758.6449584960938, "learning_rate": 4.6243637650807086e-05, "loss": 85.0178, "step": 64770 }, { "epoch": 0.26171939705151565, "grad_norm": 427.8953857421875, "learning_rate": 4.624179719262342e-05, "loss": 76.1282, "step": 64780 }, { "epoch": 0.2617597983168833, "grad_norm": 750.1713256835938, "learning_rate": 4.623995632031997e-05, "loss": 74.4636, "step": 64790 }, { "epoch": 0.2618001995822509, "grad_norm": 421.75958251953125, "learning_rate": 4.6238115033932636e-05, "loss": 56.7678, "step": 64800 }, { "epoch": 0.26184060084761857, "grad_norm": 1215.94482421875, "learning_rate": 4.623627333349732e-05, "loss": 72.2328, "step": 64810 }, { "epoch": 0.2618810021129862, "grad_norm": 0.0, "learning_rate": 4.623443121904992e-05, "loss": 68.4072, "step": 64820 }, { "epoch": 0.2619214033783538, "grad_norm": 739.1293334960938, "learning_rate": 4.623258869062636e-05, "loss": 58.8318, "step": 64830 }, { "epoch": 0.26196180464372143, "grad_norm": 760.8939819335938, "learning_rate": 4.623074574826254e-05, "loss": 85.5972, "step": 64840 }, { "epoch": 0.26200220590908907, "grad_norm": 902.6166381835938, "learning_rate": 4.622890239199441e-05, "loss": 77.9647, "step": 64850 }, { "epoch": 0.2620426071744567, "grad_norm": 504.2374572753906, "learning_rate": 4.622705862185789e-05, "loss": 61.3637, "step": 64860 }, { "epoch": 0.26208300843982435, "grad_norm": 550.212890625, "learning_rate": 4.622521443788894e-05, "loss": 58.025, "step": 64870 }, { "epoch": 0.262123409705192, "grad_norm": 1491.5367431640625, "learning_rate": 4.622336984012351e-05, "loss": 119.6012, "step": 64880 }, { "epoch": 0.2621638109705596, "grad_norm": 1074.4013671875, "learning_rate": 4.622152482859755e-05, "loss": 53.4368, "step": 64890 }, { "epoch": 0.2622042122359272, "grad_norm": 1384.9111328125, "learning_rate": 4.621967940334705e-05, "loss": 73.9773, "step": 64900 }, { "epoch": 0.26224461350129485, "grad_norm": 594.1865234375, "learning_rate": 4.621783356440796e-05, "loss": 59.9528, "step": 64910 }, { "epoch": 0.2622850147666625, "grad_norm": 1251.0433349609375, "learning_rate": 4.621598731181629e-05, "loss": 69.5674, "step": 64920 }, { "epoch": 0.26232541603203013, "grad_norm": 799.28564453125, "learning_rate": 4.621414064560803e-05, "loss": 90.669, "step": 64930 }, { "epoch": 0.2623658172973978, "grad_norm": 766.1452026367188, "learning_rate": 4.6212293565819166e-05, "loss": 65.2333, "step": 64940 }, { "epoch": 0.26240621856276536, "grad_norm": 2778.8935546875, "learning_rate": 4.6210446072485725e-05, "loss": 89.6359, "step": 64950 }, { "epoch": 0.262446619828133, "grad_norm": 1777.0819091796875, "learning_rate": 4.6208598165643715e-05, "loss": 106.0751, "step": 64960 }, { "epoch": 0.26248702109350064, "grad_norm": 607.8473510742188, "learning_rate": 4.6206749845329164e-05, "loss": 65.859, "step": 64970 }, { "epoch": 0.2625274223588683, "grad_norm": 640.949951171875, "learning_rate": 4.62049011115781e-05, "loss": 93.8094, "step": 64980 }, { "epoch": 0.2625678236242359, "grad_norm": 449.35650634765625, "learning_rate": 4.620305196442659e-05, "loss": 52.2666, "step": 64990 }, { "epoch": 0.26260822488960356, "grad_norm": 1173.4410400390625, "learning_rate": 4.620120240391065e-05, "loss": 93.0513, "step": 65000 }, { "epoch": 0.2626486261549712, "grad_norm": 489.27716064453125, "learning_rate": 4.619935243006636e-05, "loss": 64.3064, "step": 65010 }, { "epoch": 0.2626890274203388, "grad_norm": 265.78076171875, "learning_rate": 4.619750204292978e-05, "loss": 80.1383, "step": 65020 }, { "epoch": 0.2627294286857064, "grad_norm": 581.3447265625, "learning_rate": 4.619565124253698e-05, "loss": 41.3353, "step": 65030 }, { "epoch": 0.26276982995107406, "grad_norm": 3157.60595703125, "learning_rate": 4.619380002892406e-05, "loss": 86.7737, "step": 65040 }, { "epoch": 0.2628102312164417, "grad_norm": 679.6066284179688, "learning_rate": 4.619194840212708e-05, "loss": 73.4557, "step": 65050 }, { "epoch": 0.26285063248180934, "grad_norm": 352.4366760253906, "learning_rate": 4.6190096362182167e-05, "loss": 84.3029, "step": 65060 }, { "epoch": 0.262891033747177, "grad_norm": 1345.6533203125, "learning_rate": 4.618824390912541e-05, "loss": 76.0789, "step": 65070 }, { "epoch": 0.26293143501254457, "grad_norm": 606.6847534179688, "learning_rate": 4.618639104299294e-05, "loss": 84.0097, "step": 65080 }, { "epoch": 0.2629718362779122, "grad_norm": 777.6514892578125, "learning_rate": 4.618453776382086e-05, "loss": 54.7092, "step": 65090 }, { "epoch": 0.26301223754327985, "grad_norm": 686.7308959960938, "learning_rate": 4.61826840716453e-05, "loss": 61.8434, "step": 65100 }, { "epoch": 0.2630526388086475, "grad_norm": 538.2279052734375, "learning_rate": 4.618082996650243e-05, "loss": 49.7606, "step": 65110 }, { "epoch": 0.2630930400740151, "grad_norm": 537.077392578125, "learning_rate": 4.617897544842836e-05, "loss": 66.0555, "step": 65120 }, { "epoch": 0.26313344133938277, "grad_norm": 676.6751098632812, "learning_rate": 4.617712051745927e-05, "loss": 64.1107, "step": 65130 }, { "epoch": 0.2631738426047504, "grad_norm": 587.270263671875, "learning_rate": 4.61752651736313e-05, "loss": 63.3736, "step": 65140 }, { "epoch": 0.263214243870118, "grad_norm": 1263.456787109375, "learning_rate": 4.617340941698064e-05, "loss": 59.6258, "step": 65150 }, { "epoch": 0.26325464513548563, "grad_norm": 427.99737548828125, "learning_rate": 4.617155324754346e-05, "loss": 52.6464, "step": 65160 }, { "epoch": 0.26329504640085327, "grad_norm": 0.0, "learning_rate": 4.616969666535596e-05, "loss": 73.0998, "step": 65170 }, { "epoch": 0.2633354476662209, "grad_norm": 617.71337890625, "learning_rate": 4.6167839670454315e-05, "loss": 44.5023, "step": 65180 }, { "epoch": 0.26337584893158855, "grad_norm": 517.5345458984375, "learning_rate": 4.616598226287474e-05, "loss": 95.0356, "step": 65190 }, { "epoch": 0.2634162501969562, "grad_norm": 1026.675537109375, "learning_rate": 4.616412444265345e-05, "loss": 67.167, "step": 65200 }, { "epoch": 0.2634566514623238, "grad_norm": 1128.9427490234375, "learning_rate": 4.616226620982665e-05, "loss": 89.6982, "step": 65210 }, { "epoch": 0.2634970527276914, "grad_norm": 809.5255737304688, "learning_rate": 4.6160407564430574e-05, "loss": 65.6736, "step": 65220 }, { "epoch": 0.26353745399305906, "grad_norm": 1231.8623046875, "learning_rate": 4.6158548506501464e-05, "loss": 86.9045, "step": 65230 }, { "epoch": 0.2635778552584267, "grad_norm": 1171.7540283203125, "learning_rate": 4.6156689036075555e-05, "loss": 77.3384, "step": 65240 }, { "epoch": 0.26361825652379434, "grad_norm": 619.1737060546875, "learning_rate": 4.615482915318911e-05, "loss": 65.3949, "step": 65250 }, { "epoch": 0.263658657789162, "grad_norm": 518.4265747070312, "learning_rate": 4.6152968857878366e-05, "loss": 55.1953, "step": 65260 }, { "epoch": 0.26369905905452956, "grad_norm": 1298.9517822265625, "learning_rate": 4.615110815017961e-05, "loss": 56.1695, "step": 65270 }, { "epoch": 0.2637394603198972, "grad_norm": 452.3489074707031, "learning_rate": 4.614924703012911e-05, "loss": 58.1403, "step": 65280 }, { "epoch": 0.26377986158526484, "grad_norm": 809.9728393554688, "learning_rate": 4.614738549776315e-05, "loss": 49.3975, "step": 65290 }, { "epoch": 0.2638202628506325, "grad_norm": 703.8427124023438, "learning_rate": 4.614552355311802e-05, "loss": 54.3999, "step": 65300 }, { "epoch": 0.2638606641160001, "grad_norm": 518.8963623046875, "learning_rate": 4.6143661196230026e-05, "loss": 59.6565, "step": 65310 }, { "epoch": 0.26390106538136776, "grad_norm": 780.0380249023438, "learning_rate": 4.614179842713547e-05, "loss": 85.9944, "step": 65320 }, { "epoch": 0.2639414666467354, "grad_norm": 855.755126953125, "learning_rate": 4.613993524587067e-05, "loss": 57.039, "step": 65330 }, { "epoch": 0.263981867912103, "grad_norm": 722.5667114257812, "learning_rate": 4.613807165247195e-05, "loss": 77.862, "step": 65340 }, { "epoch": 0.2640222691774706, "grad_norm": 833.8828125, "learning_rate": 4.613620764697564e-05, "loss": 70.7339, "step": 65350 }, { "epoch": 0.26406267044283827, "grad_norm": 827.6605224609375, "learning_rate": 4.6134343229418075e-05, "loss": 91.2179, "step": 65360 }, { "epoch": 0.2641030717082059, "grad_norm": 398.13287353515625, "learning_rate": 4.613247839983561e-05, "loss": 85.9632, "step": 65370 }, { "epoch": 0.26414347297357355, "grad_norm": 661.8615112304688, "learning_rate": 4.613061315826461e-05, "loss": 89.6971, "step": 65380 }, { "epoch": 0.2641838742389412, "grad_norm": 425.14483642578125, "learning_rate": 4.612874750474142e-05, "loss": 46.4946, "step": 65390 }, { "epoch": 0.26422427550430877, "grad_norm": 709.1620483398438, "learning_rate": 4.612688143930242e-05, "loss": 97.9462, "step": 65400 }, { "epoch": 0.2642646767696764, "grad_norm": 888.79248046875, "learning_rate": 4.612501496198398e-05, "loss": 94.5246, "step": 65410 }, { "epoch": 0.26430507803504405, "grad_norm": 629.85791015625, "learning_rate": 4.612314807282251e-05, "loss": 47.7306, "step": 65420 }, { "epoch": 0.2643454793004117, "grad_norm": 76.55754089355469, "learning_rate": 4.612128077185439e-05, "loss": 79.3505, "step": 65430 }, { "epoch": 0.26438588056577933, "grad_norm": 1617.7518310546875, "learning_rate": 4.611941305911602e-05, "loss": 100.8537, "step": 65440 }, { "epoch": 0.26442628183114697, "grad_norm": 653.6577758789062, "learning_rate": 4.611754493464383e-05, "loss": 61.9429, "step": 65450 }, { "epoch": 0.2644666830965146, "grad_norm": 1721.381103515625, "learning_rate": 4.611567639847422e-05, "loss": 72.449, "step": 65460 }, { "epoch": 0.2645070843618822, "grad_norm": 2042.4002685546875, "learning_rate": 4.611380745064363e-05, "loss": 66.475, "step": 65470 }, { "epoch": 0.26454748562724983, "grad_norm": 603.5230712890625, "learning_rate": 4.61119380911885e-05, "loss": 81.908, "step": 65480 }, { "epoch": 0.2645878868926175, "grad_norm": 507.7789001464844, "learning_rate": 4.611006832014526e-05, "loss": 57.1852, "step": 65490 }, { "epoch": 0.2646282881579851, "grad_norm": 646.4906005859375, "learning_rate": 4.610819813755038e-05, "loss": 48.6094, "step": 65500 }, { "epoch": 0.26466868942335275, "grad_norm": 783.2797241210938, "learning_rate": 4.61063275434403e-05, "loss": 67.8832, "step": 65510 }, { "epoch": 0.2647090906887204, "grad_norm": 698.7806396484375, "learning_rate": 4.610445653785151e-05, "loss": 53.7803, "step": 65520 }, { "epoch": 0.264749491954088, "grad_norm": 563.870361328125, "learning_rate": 4.610258512082046e-05, "loss": 54.008, "step": 65530 }, { "epoch": 0.2647898932194556, "grad_norm": 734.525146484375, "learning_rate": 4.610071329238366e-05, "loss": 59.3586, "step": 65540 }, { "epoch": 0.26483029448482326, "grad_norm": 568.80419921875, "learning_rate": 4.6098841052577583e-05, "loss": 52.5479, "step": 65550 }, { "epoch": 0.2648706957501909, "grad_norm": 473.8629150390625, "learning_rate": 4.6096968401438745e-05, "loss": 88.4755, "step": 65560 }, { "epoch": 0.26491109701555854, "grad_norm": 804.3765258789062, "learning_rate": 4.609509533900364e-05, "loss": 82.4179, "step": 65570 }, { "epoch": 0.2649514982809262, "grad_norm": 556.0540161132812, "learning_rate": 4.6093221865308786e-05, "loss": 108.8011, "step": 65580 }, { "epoch": 0.26499189954629376, "grad_norm": 761.5044555664062, "learning_rate": 4.609134798039073e-05, "loss": 61.9983, "step": 65590 }, { "epoch": 0.2650323008116614, "grad_norm": 1213.9462890625, "learning_rate": 4.6089473684285974e-05, "loss": 93.0749, "step": 65600 }, { "epoch": 0.26507270207702904, "grad_norm": 691.1778564453125, "learning_rate": 4.608759897703107e-05, "loss": 55.7611, "step": 65610 }, { "epoch": 0.2651131033423967, "grad_norm": 1190.127685546875, "learning_rate": 4.608572385866257e-05, "loss": 56.0981, "step": 65620 }, { "epoch": 0.2651535046077643, "grad_norm": 1123.38916015625, "learning_rate": 4.6083848329217026e-05, "loss": 88.4509, "step": 65630 }, { "epoch": 0.26519390587313196, "grad_norm": 358.7076110839844, "learning_rate": 4.608197238873101e-05, "loss": 70.7137, "step": 65640 }, { "epoch": 0.2652343071384996, "grad_norm": 1214.915283203125, "learning_rate": 4.6080096037241086e-05, "loss": 89.7655, "step": 65650 }, { "epoch": 0.2652747084038672, "grad_norm": 890.3026123046875, "learning_rate": 4.607821927478383e-05, "loss": 86.5065, "step": 65660 }, { "epoch": 0.2653151096692348, "grad_norm": 835.5069580078125, "learning_rate": 4.607634210139584e-05, "loss": 84.1165, "step": 65670 }, { "epoch": 0.26535551093460247, "grad_norm": 350.78350830078125, "learning_rate": 4.607446451711372e-05, "loss": 67.6091, "step": 65680 }, { "epoch": 0.2653959121999701, "grad_norm": 1582.5438232421875, "learning_rate": 4.607258652197406e-05, "loss": 86.531, "step": 65690 }, { "epoch": 0.26543631346533775, "grad_norm": 1221.990234375, "learning_rate": 4.6070708116013476e-05, "loss": 70.8549, "step": 65700 }, { "epoch": 0.2654767147307054, "grad_norm": 931.9036865234375, "learning_rate": 4.606882929926858e-05, "loss": 47.7265, "step": 65710 }, { "epoch": 0.26551711599607297, "grad_norm": 787.3499755859375, "learning_rate": 4.6066950071776015e-05, "loss": 82.7044, "step": 65720 }, { "epoch": 0.2655575172614406, "grad_norm": 3718.5693359375, "learning_rate": 4.606507043357242e-05, "loss": 120.9925, "step": 65730 }, { "epoch": 0.26559791852680825, "grad_norm": 836.9452514648438, "learning_rate": 4.606319038469443e-05, "loss": 59.5213, "step": 65740 }, { "epoch": 0.2656383197921759, "grad_norm": 694.1288452148438, "learning_rate": 4.606130992517869e-05, "loss": 70.1149, "step": 65750 }, { "epoch": 0.26567872105754353, "grad_norm": 1005.6154174804688, "learning_rate": 4.605942905506188e-05, "loss": 49.102, "step": 65760 }, { "epoch": 0.26571912232291117, "grad_norm": 684.3242797851562, "learning_rate": 4.605754777438065e-05, "loss": 79.9185, "step": 65770 }, { "epoch": 0.2657595235882788, "grad_norm": 1068.396240234375, "learning_rate": 4.605566608317169e-05, "loss": 109.3487, "step": 65780 }, { "epoch": 0.2657999248536464, "grad_norm": 2943.5751953125, "learning_rate": 4.6053783981471675e-05, "loss": 78.831, "step": 65790 }, { "epoch": 0.26584032611901404, "grad_norm": 386.4954833984375, "learning_rate": 4.605190146931731e-05, "loss": 91.7346, "step": 65800 }, { "epoch": 0.2658807273843817, "grad_norm": 1254.3067626953125, "learning_rate": 4.605001854674529e-05, "loss": 88.2726, "step": 65810 }, { "epoch": 0.2659211286497493, "grad_norm": 553.072509765625, "learning_rate": 4.604813521379231e-05, "loss": 71.1216, "step": 65820 }, { "epoch": 0.26596152991511696, "grad_norm": 780.3787841796875, "learning_rate": 4.60462514704951e-05, "loss": 71.7296, "step": 65830 }, { "epoch": 0.2660019311804846, "grad_norm": 1093.623046875, "learning_rate": 4.6044367316890386e-05, "loss": 53.4292, "step": 65840 }, { "epoch": 0.2660423324458522, "grad_norm": 986.8058471679688, "learning_rate": 4.6042482753014895e-05, "loss": 70.1599, "step": 65850 }, { "epoch": 0.2660827337112198, "grad_norm": 614.8383178710938, "learning_rate": 4.604059777890537e-05, "loss": 68.2766, "step": 65860 }, { "epoch": 0.26612313497658746, "grad_norm": 608.2391357421875, "learning_rate": 4.603871239459856e-05, "loss": 51.4506, "step": 65870 }, { "epoch": 0.2661635362419551, "grad_norm": 1213.140869140625, "learning_rate": 4.6036826600131216e-05, "loss": 66.3176, "step": 65880 }, { "epoch": 0.26620393750732274, "grad_norm": 1492.8607177734375, "learning_rate": 4.603494039554011e-05, "loss": 83.3855, "step": 65890 }, { "epoch": 0.2662443387726904, "grad_norm": 2795.365478515625, "learning_rate": 4.603305378086201e-05, "loss": 86.1449, "step": 65900 }, { "epoch": 0.26628474003805797, "grad_norm": 429.2622985839844, "learning_rate": 4.60311667561337e-05, "loss": 93.9114, "step": 65910 }, { "epoch": 0.2663251413034256, "grad_norm": 1739.33544921875, "learning_rate": 4.602927932139197e-05, "loss": 72.7662, "step": 65920 }, { "epoch": 0.26636554256879325, "grad_norm": 850.324951171875, "learning_rate": 4.6027391476673606e-05, "loss": 87.2089, "step": 65930 }, { "epoch": 0.2664059438341609, "grad_norm": 248.5250244140625, "learning_rate": 4.602550322201542e-05, "loss": 73.9269, "step": 65940 }, { "epoch": 0.2664463450995285, "grad_norm": 414.97100830078125, "learning_rate": 4.602361455745423e-05, "loss": 38.258, "step": 65950 }, { "epoch": 0.26648674636489617, "grad_norm": 821.9193725585938, "learning_rate": 4.602172548302684e-05, "loss": 51.6796, "step": 65960 }, { "epoch": 0.2665271476302638, "grad_norm": 972.9696044921875, "learning_rate": 4.60198359987701e-05, "loss": 75.2604, "step": 65970 }, { "epoch": 0.2665675488956314, "grad_norm": 589.0814819335938, "learning_rate": 4.6017946104720836e-05, "loss": 86.6934, "step": 65980 }, { "epoch": 0.26660795016099903, "grad_norm": 812.1279296875, "learning_rate": 4.6016055800915884e-05, "loss": 77.9313, "step": 65990 }, { "epoch": 0.26664835142636667, "grad_norm": 456.3207702636719, "learning_rate": 4.601416508739211e-05, "loss": 49.5027, "step": 66000 }, { "epoch": 0.2666887526917343, "grad_norm": 1623.6546630859375, "learning_rate": 4.6012273964186365e-05, "loss": 64.7836, "step": 66010 }, { "epoch": 0.26672915395710195, "grad_norm": 779.8026733398438, "learning_rate": 4.601038243133552e-05, "loss": 108.0493, "step": 66020 }, { "epoch": 0.2667695552224696, "grad_norm": 685.0618286132812, "learning_rate": 4.600849048887646e-05, "loss": 79.6456, "step": 66030 }, { "epoch": 0.2668099564878372, "grad_norm": 1468.7335205078125, "learning_rate": 4.6006598136846056e-05, "loss": 63.8021, "step": 66040 }, { "epoch": 0.2668503577532048, "grad_norm": 932.735107421875, "learning_rate": 4.600470537528121e-05, "loss": 51.6305, "step": 66050 }, { "epoch": 0.26689075901857245, "grad_norm": 815.7344970703125, "learning_rate": 4.6002812204218816e-05, "loss": 66.5829, "step": 66060 }, { "epoch": 0.2669311602839401, "grad_norm": 606.532958984375, "learning_rate": 4.600091862369579e-05, "loss": 77.9135, "step": 66070 }, { "epoch": 0.26697156154930773, "grad_norm": 760.3806762695312, "learning_rate": 4.599902463374903e-05, "loss": 64.1426, "step": 66080 }, { "epoch": 0.2670119628146754, "grad_norm": 1549.1610107421875, "learning_rate": 4.599713023441549e-05, "loss": 67.7165, "step": 66090 }, { "epoch": 0.267052364080043, "grad_norm": 500.93182373046875, "learning_rate": 4.599523542573207e-05, "loss": 65.0257, "step": 66100 }, { "epoch": 0.2670927653454106, "grad_norm": 301.22589111328125, "learning_rate": 4.599334020773574e-05, "loss": 69.3244, "step": 66110 }, { "epoch": 0.26713316661077824, "grad_norm": 374.9650573730469, "learning_rate": 4.599144458046343e-05, "loss": 57.7532, "step": 66120 }, { "epoch": 0.2671735678761459, "grad_norm": 521.96337890625, "learning_rate": 4.59895485439521e-05, "loss": 80.4386, "step": 66130 }, { "epoch": 0.2672139691415135, "grad_norm": 1903.363525390625, "learning_rate": 4.5987652098238714e-05, "loss": 68.9257, "step": 66140 }, { "epoch": 0.26725437040688116, "grad_norm": 561.215087890625, "learning_rate": 4.598575524336025e-05, "loss": 56.7605, "step": 66150 }, { "epoch": 0.2672947716722488, "grad_norm": 502.40240478515625, "learning_rate": 4.598385797935368e-05, "loss": 61.1385, "step": 66160 }, { "epoch": 0.2673351729376164, "grad_norm": 948.8161010742188, "learning_rate": 4.5981960306255996e-05, "loss": 71.2674, "step": 66170 }, { "epoch": 0.267375574202984, "grad_norm": 683.00244140625, "learning_rate": 4.598006222410419e-05, "loss": 56.7152, "step": 66180 }, { "epoch": 0.26741597546835166, "grad_norm": 916.79833984375, "learning_rate": 4.597816373293528e-05, "loss": 75.4204, "step": 66190 }, { "epoch": 0.2674563767337193, "grad_norm": 406.5462341308594, "learning_rate": 4.597626483278625e-05, "loss": 63.3937, "step": 66200 }, { "epoch": 0.26749677799908694, "grad_norm": 953.0973510742188, "learning_rate": 4.5974365523694155e-05, "loss": 55.9855, "step": 66210 }, { "epoch": 0.2675371792644546, "grad_norm": 664.46923828125, "learning_rate": 4.5972465805695996e-05, "loss": 100.492, "step": 66220 }, { "epoch": 0.26757758052982217, "grad_norm": 810.0478515625, "learning_rate": 4.597056567882883e-05, "loss": 68.5419, "step": 66230 }, { "epoch": 0.2676179817951898, "grad_norm": 821.2228393554688, "learning_rate": 4.596866514312967e-05, "loss": 71.8012, "step": 66240 }, { "epoch": 0.26765838306055745, "grad_norm": 1174.8955078125, "learning_rate": 4.5966764198635606e-05, "loss": 92.9574, "step": 66250 }, { "epoch": 0.2676987843259251, "grad_norm": 1086.9854736328125, "learning_rate": 4.596486284538367e-05, "loss": 57.3918, "step": 66260 }, { "epoch": 0.2677391855912927, "grad_norm": 690.481689453125, "learning_rate": 4.5962961083410946e-05, "loss": 71.6719, "step": 66270 }, { "epoch": 0.26777958685666037, "grad_norm": 1423.809814453125, "learning_rate": 4.596105891275449e-05, "loss": 66.3168, "step": 66280 }, { "epoch": 0.267819988122028, "grad_norm": 1662.720947265625, "learning_rate": 4.595915633345141e-05, "loss": 94.1194, "step": 66290 }, { "epoch": 0.2678603893873956, "grad_norm": 687.7420654296875, "learning_rate": 4.595725334553879e-05, "loss": 125.3564, "step": 66300 }, { "epoch": 0.26790079065276323, "grad_norm": 580.8500366210938, "learning_rate": 4.595534994905372e-05, "loss": 45.3163, "step": 66310 }, { "epoch": 0.26794119191813087, "grad_norm": 0.0, "learning_rate": 4.5953446144033316e-05, "loss": 59.1748, "step": 66320 }, { "epoch": 0.2679815931834985, "grad_norm": 292.1687927246094, "learning_rate": 4.595154193051469e-05, "loss": 62.6477, "step": 66330 }, { "epoch": 0.26802199444886615, "grad_norm": 379.4153747558594, "learning_rate": 4.594963730853497e-05, "loss": 90.7049, "step": 66340 }, { "epoch": 0.2680623957142338, "grad_norm": 2087.564208984375, "learning_rate": 4.5947732278131286e-05, "loss": 75.824, "step": 66350 }, { "epoch": 0.2681027969796014, "grad_norm": 1189.827880859375, "learning_rate": 4.594582683934078e-05, "loss": 92.8753, "step": 66360 }, { "epoch": 0.268143198244969, "grad_norm": 524.31005859375, "learning_rate": 4.5943920992200585e-05, "loss": 57.8241, "step": 66370 }, { "epoch": 0.26818359951033666, "grad_norm": 1099.4833984375, "learning_rate": 4.5942014736747875e-05, "loss": 65.1616, "step": 66380 }, { "epoch": 0.2682240007757043, "grad_norm": 1009.6073608398438, "learning_rate": 4.59401080730198e-05, "loss": 68.1652, "step": 66390 }, { "epoch": 0.26826440204107194, "grad_norm": 4793.3828125, "learning_rate": 4.593820100105355e-05, "loss": 111.5321, "step": 66400 }, { "epoch": 0.2683048033064396, "grad_norm": 940.0222778320312, "learning_rate": 4.5936293520886275e-05, "loss": 83.8232, "step": 66410 }, { "epoch": 0.2683452045718072, "grad_norm": 1047.490478515625, "learning_rate": 4.59343856325552e-05, "loss": 58.4591, "step": 66420 }, { "epoch": 0.2683856058371748, "grad_norm": 874.4302978515625, "learning_rate": 4.593247733609748e-05, "loss": 104.9535, "step": 66430 }, { "epoch": 0.26842600710254244, "grad_norm": 742.1166381835938, "learning_rate": 4.593056863155034e-05, "loss": 84.062, "step": 66440 }, { "epoch": 0.2684664083679101, "grad_norm": 313.023681640625, "learning_rate": 4.5928659518951e-05, "loss": 44.992, "step": 66450 }, { "epoch": 0.2685068096332777, "grad_norm": 1163.264892578125, "learning_rate": 4.592674999833666e-05, "loss": 71.2603, "step": 66460 }, { "epoch": 0.26854721089864536, "grad_norm": 1168.4986572265625, "learning_rate": 4.592484006974456e-05, "loss": 46.842, "step": 66470 }, { "epoch": 0.268587612164013, "grad_norm": 2281.073974609375, "learning_rate": 4.5922929733211926e-05, "loss": 54.4491, "step": 66480 }, { "epoch": 0.2686280134293806, "grad_norm": 656.7672729492188, "learning_rate": 4.5921018988776e-05, "loss": 88.2362, "step": 66490 }, { "epoch": 0.2686684146947482, "grad_norm": 787.104736328125, "learning_rate": 4.591910783647404e-05, "loss": 102.6616, "step": 66500 }, { "epoch": 0.26870881596011587, "grad_norm": 602.447265625, "learning_rate": 4.591719627634331e-05, "loss": 70.4162, "step": 66510 }, { "epoch": 0.2687492172254835, "grad_norm": 607.5802612304688, "learning_rate": 4.591528430842107e-05, "loss": 67.1125, "step": 66520 }, { "epoch": 0.26878961849085115, "grad_norm": 1654.757080078125, "learning_rate": 4.5913371932744584e-05, "loss": 82.985, "step": 66530 }, { "epoch": 0.2688300197562188, "grad_norm": 711.9852905273438, "learning_rate": 4.591145914935116e-05, "loss": 66.6757, "step": 66540 }, { "epoch": 0.26887042102158637, "grad_norm": 956.3167114257812, "learning_rate": 4.590954595827806e-05, "loss": 78.8319, "step": 66550 }, { "epoch": 0.268910822286954, "grad_norm": 1775.7945556640625, "learning_rate": 4.59076323595626e-05, "loss": 88.6002, "step": 66560 }, { "epoch": 0.26895122355232165, "grad_norm": 776.41015625, "learning_rate": 4.5905718353242086e-05, "loss": 70.2173, "step": 66570 }, { "epoch": 0.2689916248176893, "grad_norm": 934.8427124023438, "learning_rate": 4.590380393935383e-05, "loss": 71.2888, "step": 66580 }, { "epoch": 0.26903202608305693, "grad_norm": 1015.8541259765625, "learning_rate": 4.5901889117935153e-05, "loss": 94.4522, "step": 66590 }, { "epoch": 0.26907242734842457, "grad_norm": 0.0, "learning_rate": 4.589997388902338e-05, "loss": 66.415, "step": 66600 }, { "epoch": 0.2691128286137922, "grad_norm": 1408.8917236328125, "learning_rate": 4.589805825265587e-05, "loss": 83.8427, "step": 66610 }, { "epoch": 0.2691532298791598, "grad_norm": 654.0909423828125, "learning_rate": 4.5896142208869954e-05, "loss": 59.3255, "step": 66620 }, { "epoch": 0.26919363114452743, "grad_norm": 644.368896484375, "learning_rate": 4.589422575770298e-05, "loss": 51.3406, "step": 66630 }, { "epoch": 0.2692340324098951, "grad_norm": 2795.22998046875, "learning_rate": 4.589230889919232e-05, "loss": 64.9199, "step": 66640 }, { "epoch": 0.2692744336752627, "grad_norm": 940.733642578125, "learning_rate": 4.589039163337534e-05, "loss": 102.8163, "step": 66650 }, { "epoch": 0.26931483494063035, "grad_norm": 847.7637939453125, "learning_rate": 4.588847396028942e-05, "loss": 77.151, "step": 66660 }, { "epoch": 0.269355236205998, "grad_norm": 710.346435546875, "learning_rate": 4.588655587997195e-05, "loss": 57.7109, "step": 66670 }, { "epoch": 0.2693956374713656, "grad_norm": 607.9664306640625, "learning_rate": 4.5884637392460314e-05, "loss": 71.083, "step": 66680 }, { "epoch": 0.2694360387367332, "grad_norm": 615.1112060546875, "learning_rate": 4.588271849779192e-05, "loss": 63.7689, "step": 66690 }, { "epoch": 0.26947644000210086, "grad_norm": 664.7433471679688, "learning_rate": 4.588079919600419e-05, "loss": 80.6283, "step": 66700 }, { "epoch": 0.2695168412674685, "grad_norm": 407.6193542480469, "learning_rate": 4.587887948713452e-05, "loss": 49.1637, "step": 66710 }, { "epoch": 0.26955724253283614, "grad_norm": 785.1792602539062, "learning_rate": 4.5876959371220344e-05, "loss": 64.5263, "step": 66720 }, { "epoch": 0.2695976437982038, "grad_norm": 565.7954711914062, "learning_rate": 4.587503884829909e-05, "loss": 86.9049, "step": 66730 }, { "epoch": 0.2696380450635714, "grad_norm": 1209.4056396484375, "learning_rate": 4.587311791840822e-05, "loss": 103.0452, "step": 66740 }, { "epoch": 0.269678446328939, "grad_norm": 384.4944152832031, "learning_rate": 4.5871196581585166e-05, "loss": 46.5085, "step": 66750 }, { "epoch": 0.26971884759430664, "grad_norm": 818.3886108398438, "learning_rate": 4.5869274837867394e-05, "loss": 100.6729, "step": 66760 }, { "epoch": 0.2697592488596743, "grad_norm": 964.607421875, "learning_rate": 4.5867352687292355e-05, "loss": 64.0361, "step": 66770 }, { "epoch": 0.2697996501250419, "grad_norm": 641.611328125, "learning_rate": 4.5865430129897536e-05, "loss": 44.5035, "step": 66780 }, { "epoch": 0.26984005139040956, "grad_norm": 822.2906494140625, "learning_rate": 4.5863507165720415e-05, "loss": 63.5622, "step": 66790 }, { "epoch": 0.2698804526557772, "grad_norm": 370.7065124511719, "learning_rate": 4.586158379479848e-05, "loss": 57.749, "step": 66800 }, { "epoch": 0.2699208539211448, "grad_norm": 565.5477905273438, "learning_rate": 4.5859660017169224e-05, "loss": 80.5675, "step": 66810 }, { "epoch": 0.2699612551865124, "grad_norm": 869.1512451171875, "learning_rate": 4.5857735832870166e-05, "loss": 76.6965, "step": 66820 }, { "epoch": 0.27000165645188007, "grad_norm": 390.1742858886719, "learning_rate": 4.5855811241938806e-05, "loss": 87.9975, "step": 66830 }, { "epoch": 0.2700420577172477, "grad_norm": 815.9105834960938, "learning_rate": 4.585388624441267e-05, "loss": 60.8064, "step": 66840 }, { "epoch": 0.27008245898261535, "grad_norm": 777.6511840820312, "learning_rate": 4.585196084032928e-05, "loss": 50.49, "step": 66850 }, { "epoch": 0.270122860247983, "grad_norm": 975.7098999023438, "learning_rate": 4.585003502972618e-05, "loss": 82.6168, "step": 66860 }, { "epoch": 0.27016326151335057, "grad_norm": 773.058349609375, "learning_rate": 4.584810881264092e-05, "loss": 58.9796, "step": 66870 }, { "epoch": 0.2702036627787182, "grad_norm": 342.78265380859375, "learning_rate": 4.5846182189111035e-05, "loss": 77.3388, "step": 66880 }, { "epoch": 0.27024406404408585, "grad_norm": 759.47314453125, "learning_rate": 4.584425515917411e-05, "loss": 53.6399, "step": 66890 }, { "epoch": 0.2702844653094535, "grad_norm": 392.1265869140625, "learning_rate": 4.584232772286768e-05, "loss": 48.1632, "step": 66900 }, { "epoch": 0.27032486657482113, "grad_norm": 704.0643920898438, "learning_rate": 4.5840399880229354e-05, "loss": 73.1397, "step": 66910 }, { "epoch": 0.27036526784018877, "grad_norm": 828.9876098632812, "learning_rate": 4.58384716312967e-05, "loss": 60.1899, "step": 66920 }, { "epoch": 0.2704056691055564, "grad_norm": 529.6195678710938, "learning_rate": 4.583654297610731e-05, "loss": 59.9641, "step": 66930 }, { "epoch": 0.270446070370924, "grad_norm": 733.8816528320312, "learning_rate": 4.583461391469879e-05, "loss": 90.0057, "step": 66940 }, { "epoch": 0.27048647163629164, "grad_norm": 1088.7060546875, "learning_rate": 4.583268444710875e-05, "loss": 103.6154, "step": 66950 }, { "epoch": 0.2705268729016593, "grad_norm": 952.4298095703125, "learning_rate": 4.583075457337479e-05, "loss": 78.6486, "step": 66960 }, { "epoch": 0.2705672741670269, "grad_norm": 490.1788024902344, "learning_rate": 4.5828824293534555e-05, "loss": 64.2271, "step": 66970 }, { "epoch": 0.27060767543239456, "grad_norm": 1667.7532958984375, "learning_rate": 4.5826893607625665e-05, "loss": 64.5124, "step": 66980 }, { "epoch": 0.2706480766977622, "grad_norm": 823.8807373046875, "learning_rate": 4.582496251568576e-05, "loss": 71.0763, "step": 66990 }, { "epoch": 0.2706884779631298, "grad_norm": 0.0, "learning_rate": 4.5823031017752485e-05, "loss": 65.76, "step": 67000 }, { "epoch": 0.2707288792284974, "grad_norm": 962.5149536132812, "learning_rate": 4.5821099113863506e-05, "loss": 87.7109, "step": 67010 }, { "epoch": 0.27076928049386506, "grad_norm": 1748.1005859375, "learning_rate": 4.581916680405648e-05, "loss": 62.5524, "step": 67020 }, { "epoch": 0.2708096817592327, "grad_norm": 1395.0408935546875, "learning_rate": 4.581723408836908e-05, "loss": 70.1472, "step": 67030 }, { "epoch": 0.27085008302460034, "grad_norm": 1065.451904296875, "learning_rate": 4.581530096683898e-05, "loss": 65.4421, "step": 67040 }, { "epoch": 0.270890484289968, "grad_norm": 1557.5567626953125, "learning_rate": 4.5813367439503875e-05, "loss": 50.4025, "step": 67050 }, { "epoch": 0.27093088555533557, "grad_norm": 990.9954223632812, "learning_rate": 4.5811433506401456e-05, "loss": 60.8415, "step": 67060 }, { "epoch": 0.2709712868207032, "grad_norm": 535.752685546875, "learning_rate": 4.580949916756942e-05, "loss": 56.8241, "step": 67070 }, { "epoch": 0.27101168808607085, "grad_norm": 589.4478149414062, "learning_rate": 4.580756442304549e-05, "loss": 43.5019, "step": 67080 }, { "epoch": 0.2710520893514385, "grad_norm": 471.8979187011719, "learning_rate": 4.580562927286738e-05, "loss": 83.2732, "step": 67090 }, { "epoch": 0.2710924906168061, "grad_norm": 1266.64794921875, "learning_rate": 4.5803693717072815e-05, "loss": 97.8621, "step": 67100 }, { "epoch": 0.27113289188217377, "grad_norm": 939.3637084960938, "learning_rate": 4.5801757755699534e-05, "loss": 73.3667, "step": 67110 }, { "epoch": 0.2711732931475414, "grad_norm": 516.1859741210938, "learning_rate": 4.579982138878527e-05, "loss": 64.6031, "step": 67120 }, { "epoch": 0.271213694412909, "grad_norm": 688.78125, "learning_rate": 4.579788461636778e-05, "loss": 86.9311, "step": 67130 }, { "epoch": 0.27125409567827663, "grad_norm": 726.7952270507812, "learning_rate": 4.579594743848482e-05, "loss": 97.9171, "step": 67140 }, { "epoch": 0.27129449694364427, "grad_norm": 1185.85498046875, "learning_rate": 4.579400985517416e-05, "loss": 92.5374, "step": 67150 }, { "epoch": 0.2713348982090119, "grad_norm": 928.0086059570312, "learning_rate": 4.579207186647357e-05, "loss": 46.1306, "step": 67160 }, { "epoch": 0.27137529947437955, "grad_norm": 5591.84228515625, "learning_rate": 4.579013347242085e-05, "loss": 74.2452, "step": 67170 }, { "epoch": 0.2714157007397472, "grad_norm": 886.691650390625, "learning_rate": 4.5788194673053756e-05, "loss": 55.0841, "step": 67180 }, { "epoch": 0.2714561020051148, "grad_norm": 1341.62109375, "learning_rate": 4.578625546841011e-05, "loss": 62.2266, "step": 67190 }, { "epoch": 0.2714965032704824, "grad_norm": 839.7449951171875, "learning_rate": 4.5784315858527715e-05, "loss": 73.6595, "step": 67200 }, { "epoch": 0.27153690453585005, "grad_norm": 570.7716674804688, "learning_rate": 4.578237584344438e-05, "loss": 68.3076, "step": 67210 }, { "epoch": 0.2715773058012177, "grad_norm": 673.6726684570312, "learning_rate": 4.578043542319793e-05, "loss": 59.9907, "step": 67220 }, { "epoch": 0.27161770706658533, "grad_norm": 966.4290161132812, "learning_rate": 4.577849459782619e-05, "loss": 56.2375, "step": 67230 }, { "epoch": 0.271658108331953, "grad_norm": 516.3497924804688, "learning_rate": 4.5776553367367e-05, "loss": 86.4351, "step": 67240 }, { "epoch": 0.2716985095973206, "grad_norm": 2003.4329833984375, "learning_rate": 4.577461173185821e-05, "loss": 76.1077, "step": 67250 }, { "epoch": 0.2717389108626882, "grad_norm": 829.5421752929688, "learning_rate": 4.5772669691337665e-05, "loss": 67.1411, "step": 67260 }, { "epoch": 0.27177931212805584, "grad_norm": 783.5784912109375, "learning_rate": 4.577072724584323e-05, "loss": 72.7227, "step": 67270 }, { "epoch": 0.2718197133934235, "grad_norm": 875.1643676757812, "learning_rate": 4.576878439541278e-05, "loss": 80.1554, "step": 67280 }, { "epoch": 0.2718601146587911, "grad_norm": 432.68487548828125, "learning_rate": 4.576684114008418e-05, "loss": 44.3341, "step": 67290 }, { "epoch": 0.27190051592415876, "grad_norm": 427.4791259765625, "learning_rate": 4.5764897479895317e-05, "loss": 74.892, "step": 67300 }, { "epoch": 0.2719409171895264, "grad_norm": 1177.6551513671875, "learning_rate": 4.576295341488409e-05, "loss": 68.8277, "step": 67310 }, { "epoch": 0.271981318454894, "grad_norm": 739.357421875, "learning_rate": 4.57610089450884e-05, "loss": 83.5637, "step": 67320 }, { "epoch": 0.2720217197202616, "grad_norm": 464.8116760253906, "learning_rate": 4.575906407054615e-05, "loss": 69.9499, "step": 67330 }, { "epoch": 0.27206212098562926, "grad_norm": 439.9012145996094, "learning_rate": 4.5757118791295264e-05, "loss": 54.5363, "step": 67340 }, { "epoch": 0.2721025222509969, "grad_norm": 415.9515380859375, "learning_rate": 4.575517310737365e-05, "loss": 73.2503, "step": 67350 }, { "epoch": 0.27214292351636454, "grad_norm": 784.4149780273438, "learning_rate": 4.575322701881926e-05, "loss": 64.8702, "step": 67360 }, { "epoch": 0.2721833247817322, "grad_norm": 389.4613342285156, "learning_rate": 4.575128052567002e-05, "loss": 76.2548, "step": 67370 }, { "epoch": 0.27222372604709977, "grad_norm": 942.818603515625, "learning_rate": 4.5749333627963884e-05, "loss": 76.8624, "step": 67380 }, { "epoch": 0.2722641273124674, "grad_norm": 895.9558715820312, "learning_rate": 4.574738632573881e-05, "loss": 69.907, "step": 67390 }, { "epoch": 0.27230452857783505, "grad_norm": 585.9174194335938, "learning_rate": 4.574543861903274e-05, "loss": 88.2039, "step": 67400 }, { "epoch": 0.2723449298432027, "grad_norm": 556.5941162109375, "learning_rate": 4.5743490507883685e-05, "loss": 128.176, "step": 67410 }, { "epoch": 0.2723853311085703, "grad_norm": 730.61865234375, "learning_rate": 4.574154199232959e-05, "loss": 69.3492, "step": 67420 }, { "epoch": 0.27242573237393797, "grad_norm": 887.3568725585938, "learning_rate": 4.573959307240847e-05, "loss": 69.9397, "step": 67430 }, { "epoch": 0.2724661336393056, "grad_norm": 549.9888916015625, "learning_rate": 4.5737643748158295e-05, "loss": 76.4168, "step": 67440 }, { "epoch": 0.2725065349046732, "grad_norm": 1629.8140869140625, "learning_rate": 4.573569401961708e-05, "loss": 59.1505, "step": 67450 }, { "epoch": 0.27254693617004083, "grad_norm": 982.314453125, "learning_rate": 4.573374388682283e-05, "loss": 70.0269, "step": 67460 }, { "epoch": 0.27258733743540847, "grad_norm": 499.3196716308594, "learning_rate": 4.573179334981358e-05, "loss": 41.5002, "step": 67470 }, { "epoch": 0.2726277387007761, "grad_norm": 764.8738403320312, "learning_rate": 4.5729842408627334e-05, "loss": 65.5099, "step": 67480 }, { "epoch": 0.27266813996614375, "grad_norm": 813.9560546875, "learning_rate": 4.572789106330214e-05, "loss": 67.2205, "step": 67490 }, { "epoch": 0.2727085412315114, "grad_norm": 813.4544677734375, "learning_rate": 4.572593931387604e-05, "loss": 58.4749, "step": 67500 }, { "epoch": 0.272748942496879, "grad_norm": 1321.0914306640625, "learning_rate": 4.572398716038709e-05, "loss": 76.9033, "step": 67510 }, { "epoch": 0.2727893437622466, "grad_norm": 809.7509155273438, "learning_rate": 4.572203460287333e-05, "loss": 44.7534, "step": 67520 }, { "epoch": 0.27282974502761426, "grad_norm": 502.35369873046875, "learning_rate": 4.5720081641372844e-05, "loss": 75.6196, "step": 67530 }, { "epoch": 0.2728701462929819, "grad_norm": 285.2467956542969, "learning_rate": 4.57181282759237e-05, "loss": 49.662, "step": 67540 }, { "epoch": 0.27291054755834954, "grad_norm": 1159.55419921875, "learning_rate": 4.571617450656397e-05, "loss": 68.6516, "step": 67550 }, { "epoch": 0.2729509488237172, "grad_norm": 883.5384521484375, "learning_rate": 4.5714220333331756e-05, "loss": 48.4618, "step": 67560 }, { "epoch": 0.2729913500890848, "grad_norm": 714.6936645507812, "learning_rate": 4.571226575626516e-05, "loss": 70.2103, "step": 67570 }, { "epoch": 0.2730317513544524, "grad_norm": 803.3701782226562, "learning_rate": 4.5710310775402274e-05, "loss": 65.889, "step": 67580 }, { "epoch": 0.27307215261982004, "grad_norm": 463.9601745605469, "learning_rate": 4.570835539078121e-05, "loss": 65.3356, "step": 67590 }, { "epoch": 0.2731125538851877, "grad_norm": 5190.55419921875, "learning_rate": 4.5706399602440106e-05, "loss": 79.4295, "step": 67600 }, { "epoch": 0.2731529551505553, "grad_norm": 1560.6190185546875, "learning_rate": 4.5704443410417075e-05, "loss": 60.054, "step": 67610 }, { "epoch": 0.27319335641592296, "grad_norm": 566.1893310546875, "learning_rate": 4.5702486814750265e-05, "loss": 65.9659, "step": 67620 }, { "epoch": 0.2732337576812906, "grad_norm": 497.7099914550781, "learning_rate": 4.570052981547782e-05, "loss": 60.542, "step": 67630 }, { "epoch": 0.2732741589466582, "grad_norm": 595.028076171875, "learning_rate": 4.569857241263788e-05, "loss": 113.0741, "step": 67640 }, { "epoch": 0.2733145602120258, "grad_norm": 562.2115478515625, "learning_rate": 4.569661460626862e-05, "loss": 73.3425, "step": 67650 }, { "epoch": 0.27335496147739347, "grad_norm": 3808.25244140625, "learning_rate": 4.5694656396408195e-05, "loss": 101.9904, "step": 67660 }, { "epoch": 0.2733953627427611, "grad_norm": 1519.3927001953125, "learning_rate": 4.56926977830948e-05, "loss": 89.2418, "step": 67670 }, { "epoch": 0.27343576400812875, "grad_norm": 1025.804931640625, "learning_rate": 4.56907387663666e-05, "loss": 69.6141, "step": 67680 }, { "epoch": 0.2734761652734964, "grad_norm": 852.5979614257812, "learning_rate": 4.56887793462618e-05, "loss": 88.1962, "step": 67690 }, { "epoch": 0.27351656653886397, "grad_norm": 589.6854858398438, "learning_rate": 4.5686819522818594e-05, "loss": 73.4965, "step": 67700 }, { "epoch": 0.2735569678042316, "grad_norm": 1321.8143310546875, "learning_rate": 4.56848592960752e-05, "loss": 79.9866, "step": 67710 }, { "epoch": 0.27359736906959925, "grad_norm": 1166.6270751953125, "learning_rate": 4.568289866606981e-05, "loss": 55.3361, "step": 67720 }, { "epoch": 0.2736377703349669, "grad_norm": 440.9592590332031, "learning_rate": 4.568093763284067e-05, "loss": 41.0626, "step": 67730 }, { "epoch": 0.27367817160033453, "grad_norm": 598.6520385742188, "learning_rate": 4.567897619642601e-05, "loss": 80.7803, "step": 67740 }, { "epoch": 0.27371857286570217, "grad_norm": 897.6016235351562, "learning_rate": 4.567701435686404e-05, "loss": 94.469, "step": 67750 }, { "epoch": 0.2737589741310698, "grad_norm": 926.674560546875, "learning_rate": 4.567505211419305e-05, "loss": 68.1472, "step": 67760 }, { "epoch": 0.2737993753964374, "grad_norm": 497.29339599609375, "learning_rate": 4.567308946845127e-05, "loss": 47.2474, "step": 67770 }, { "epoch": 0.27383977666180503, "grad_norm": 891.9127807617188, "learning_rate": 4.567112641967697e-05, "loss": 88.2951, "step": 67780 }, { "epoch": 0.2738801779271727, "grad_norm": 655.0907592773438, "learning_rate": 4.566916296790842e-05, "loss": 61.0948, "step": 67790 }, { "epoch": 0.2739205791925403, "grad_norm": 577.0123901367188, "learning_rate": 4.566719911318389e-05, "loss": 89.3447, "step": 67800 }, { "epoch": 0.27396098045790795, "grad_norm": 598.5209350585938, "learning_rate": 4.5665234855541675e-05, "loss": 42.0639, "step": 67810 }, { "epoch": 0.2740013817232756, "grad_norm": 578.243408203125, "learning_rate": 4.566327019502007e-05, "loss": 50.7425, "step": 67820 }, { "epoch": 0.2740417829886432, "grad_norm": 849.9849853515625, "learning_rate": 4.566130513165737e-05, "loss": 58.8878, "step": 67830 }, { "epoch": 0.2740821842540108, "grad_norm": 585.877685546875, "learning_rate": 4.565933966549189e-05, "loss": 64.4063, "step": 67840 }, { "epoch": 0.27412258551937846, "grad_norm": 600.2329711914062, "learning_rate": 4.565737379656195e-05, "loss": 62.7049, "step": 67850 }, { "epoch": 0.2741629867847461, "grad_norm": 1287.9954833984375, "learning_rate": 4.5655407524905866e-05, "loss": 55.8217, "step": 67860 }, { "epoch": 0.27420338805011374, "grad_norm": 640.1063842773438, "learning_rate": 4.5653440850561986e-05, "loss": 107.2745, "step": 67870 }, { "epoch": 0.2742437893154814, "grad_norm": 1059.8924560546875, "learning_rate": 4.565147377356864e-05, "loss": 64.9931, "step": 67880 }, { "epoch": 0.274284190580849, "grad_norm": 1559.3463134765625, "learning_rate": 4.564950629396418e-05, "loss": 117.1461, "step": 67890 }, { "epoch": 0.2743245918462166, "grad_norm": 367.62322998046875, "learning_rate": 4.564753841178697e-05, "loss": 45.144, "step": 67900 }, { "epoch": 0.27436499311158424, "grad_norm": 2093.43505859375, "learning_rate": 4.564557012707536e-05, "loss": 118.0313, "step": 67910 }, { "epoch": 0.2744053943769519, "grad_norm": 1412.019287109375, "learning_rate": 4.5643601439867734e-05, "loss": 85.6378, "step": 67920 }, { "epoch": 0.2744457956423195, "grad_norm": 1107.6322021484375, "learning_rate": 4.564163235020247e-05, "loss": 77.5563, "step": 67930 }, { "epoch": 0.27448619690768716, "grad_norm": 938.7551879882812, "learning_rate": 4.563966285811796e-05, "loss": 53.3907, "step": 67940 }, { "epoch": 0.2745265981730548, "grad_norm": 898.0419921875, "learning_rate": 4.5637692963652596e-05, "loss": 56.9066, "step": 67950 }, { "epoch": 0.2745669994384224, "grad_norm": 654.0993041992188, "learning_rate": 4.5635722666844775e-05, "loss": 71.2898, "step": 67960 }, { "epoch": 0.27460740070379, "grad_norm": 762.6791381835938, "learning_rate": 4.563375196773293e-05, "loss": 111.2552, "step": 67970 }, { "epoch": 0.27464780196915767, "grad_norm": 683.8074951171875, "learning_rate": 4.5631780866355454e-05, "loss": 79.4953, "step": 67980 }, { "epoch": 0.2746882032345253, "grad_norm": 0.0, "learning_rate": 4.56298093627508e-05, "loss": 79.2197, "step": 67990 }, { "epoch": 0.27472860449989295, "grad_norm": 525.245361328125, "learning_rate": 4.562783745695738e-05, "loss": 53.7112, "step": 68000 }, { "epoch": 0.2747690057652606, "grad_norm": 588.7833251953125, "learning_rate": 4.562586514901366e-05, "loss": 50.6445, "step": 68010 }, { "epoch": 0.27480940703062817, "grad_norm": 515.099853515625, "learning_rate": 4.5623892438958074e-05, "loss": 48.6461, "step": 68020 }, { "epoch": 0.2748498082959958, "grad_norm": 835.2371826171875, "learning_rate": 4.562191932682908e-05, "loss": 49.5645, "step": 68030 }, { "epoch": 0.27489020956136345, "grad_norm": 1118.2117919921875, "learning_rate": 4.561994581266516e-05, "loss": 97.7809, "step": 68040 }, { "epoch": 0.2749306108267311, "grad_norm": 514.0653076171875, "learning_rate": 4.561797189650478e-05, "loss": 76.3595, "step": 68050 }, { "epoch": 0.27497101209209873, "grad_norm": 461.3988342285156, "learning_rate": 4.561599757838643e-05, "loss": 66.2146, "step": 68060 }, { "epoch": 0.27501141335746637, "grad_norm": 710.6083374023438, "learning_rate": 4.561402285834858e-05, "loss": 65.2828, "step": 68070 }, { "epoch": 0.275051814622834, "grad_norm": 1474.451416015625, "learning_rate": 4.561204773642974e-05, "loss": 54.9727, "step": 68080 }, { "epoch": 0.2750922158882016, "grad_norm": 606.7708129882812, "learning_rate": 4.5610072212668434e-05, "loss": 57.3314, "step": 68090 }, { "epoch": 0.27513261715356924, "grad_norm": 706.001953125, "learning_rate": 4.560809628710315e-05, "loss": 72.5477, "step": 68100 }, { "epoch": 0.2751730184189369, "grad_norm": 805.1146850585938, "learning_rate": 4.560611995977242e-05, "loss": 62.7448, "step": 68110 }, { "epoch": 0.2752134196843045, "grad_norm": 502.4281311035156, "learning_rate": 4.560414323071477e-05, "loss": 40.9372, "step": 68120 }, { "epoch": 0.27525382094967216, "grad_norm": 468.7350158691406, "learning_rate": 4.560216609996874e-05, "loss": 71.8095, "step": 68130 }, { "epoch": 0.2752942222150398, "grad_norm": 557.2088623046875, "learning_rate": 4.5600188567572876e-05, "loss": 64.4124, "step": 68140 }, { "epoch": 0.2753346234804074, "grad_norm": 1210.1270751953125, "learning_rate": 4.559821063356574e-05, "loss": 85.0767, "step": 68150 }, { "epoch": 0.275375024745775, "grad_norm": 1038.1043701171875, "learning_rate": 4.559623229798587e-05, "loss": 71.8373, "step": 68160 }, { "epoch": 0.27541542601114266, "grad_norm": 929.40576171875, "learning_rate": 4.5594253560871854e-05, "loss": 72.4507, "step": 68170 }, { "epoch": 0.2754558272765103, "grad_norm": 957.2470092773438, "learning_rate": 4.559227442226226e-05, "loss": 97.403, "step": 68180 }, { "epoch": 0.27549622854187794, "grad_norm": 706.5764770507812, "learning_rate": 4.559029488219567e-05, "loss": 64.6209, "step": 68190 }, { "epoch": 0.2755366298072456, "grad_norm": 434.8182373046875, "learning_rate": 4.558831494071069e-05, "loss": 87.0814, "step": 68200 }, { "epoch": 0.2755770310726132, "grad_norm": 760.851318359375, "learning_rate": 4.5586334597845904e-05, "loss": 80.2775, "step": 68210 }, { "epoch": 0.2756174323379808, "grad_norm": 256.12982177734375, "learning_rate": 4.558435385363993e-05, "loss": 53.6615, "step": 68220 }, { "epoch": 0.27565783360334845, "grad_norm": 421.9570617675781, "learning_rate": 4.5582372708131385e-05, "loss": 75.5364, "step": 68230 }, { "epoch": 0.2756982348687161, "grad_norm": 812.7416381835938, "learning_rate": 4.558039116135887e-05, "loss": 65.1539, "step": 68240 }, { "epoch": 0.2757386361340837, "grad_norm": 639.4966430664062, "learning_rate": 4.557840921336105e-05, "loss": 54.6641, "step": 68250 }, { "epoch": 0.27577903739945137, "grad_norm": 370.4179992675781, "learning_rate": 4.557642686417654e-05, "loss": 98.5805, "step": 68260 }, { "epoch": 0.275819438664819, "grad_norm": 516.9441528320312, "learning_rate": 4.5574444113844e-05, "loss": 76.0973, "step": 68270 }, { "epoch": 0.2758598399301866, "grad_norm": 434.20660400390625, "learning_rate": 4.5572460962402075e-05, "loss": 59.694, "step": 68280 }, { "epoch": 0.27590024119555423, "grad_norm": 1096.5814208984375, "learning_rate": 4.557047740988944e-05, "loss": 83.0501, "step": 68290 }, { "epoch": 0.27594064246092187, "grad_norm": 679.775634765625, "learning_rate": 4.556849345634475e-05, "loss": 71.1283, "step": 68300 }, { "epoch": 0.2759810437262895, "grad_norm": 1577.58251953125, "learning_rate": 4.5566509101806695e-05, "loss": 99.9247, "step": 68310 }, { "epoch": 0.27602144499165715, "grad_norm": 1189.281982421875, "learning_rate": 4.556452434631395e-05, "loss": 80.3163, "step": 68320 }, { "epoch": 0.2760618462570248, "grad_norm": 809.91015625, "learning_rate": 4.5562539189905223e-05, "loss": 104.2898, "step": 68330 }, { "epoch": 0.2761022475223924, "grad_norm": 1438.5238037109375, "learning_rate": 4.5560553632619205e-05, "loss": 99.692, "step": 68340 }, { "epoch": 0.27614264878776, "grad_norm": 330.85479736328125, "learning_rate": 4.555856767449461e-05, "loss": 67.1174, "step": 68350 }, { "epoch": 0.27618305005312765, "grad_norm": 803.5668334960938, "learning_rate": 4.555658131557015e-05, "loss": 99.5837, "step": 68360 }, { "epoch": 0.2762234513184953, "grad_norm": 674.5006713867188, "learning_rate": 4.555459455588456e-05, "loss": 65.4563, "step": 68370 }, { "epoch": 0.27626385258386293, "grad_norm": 773.7620849609375, "learning_rate": 4.555260739547657e-05, "loss": 43.1016, "step": 68380 }, { "epoch": 0.2763042538492306, "grad_norm": 639.3969116210938, "learning_rate": 4.55506198343849e-05, "loss": 78.3155, "step": 68390 }, { "epoch": 0.2763446551145982, "grad_norm": 373.80694580078125, "learning_rate": 4.5548631872648326e-05, "loss": 83.402, "step": 68400 }, { "epoch": 0.2763850563799658, "grad_norm": 420.725830078125, "learning_rate": 4.55466435103056e-05, "loss": 61.3818, "step": 68410 }, { "epoch": 0.27642545764533344, "grad_norm": 877.37890625, "learning_rate": 4.554465474739548e-05, "loss": 59.281, "step": 68420 }, { "epoch": 0.2764658589107011, "grad_norm": 295.0977478027344, "learning_rate": 4.5542665583956736e-05, "loss": 36.8095, "step": 68430 }, { "epoch": 0.2765062601760687, "grad_norm": 288.376953125, "learning_rate": 4.5540676020028145e-05, "loss": 68.784, "step": 68440 }, { "epoch": 0.27654666144143636, "grad_norm": 1751.3602294921875, "learning_rate": 4.5538686055648506e-05, "loss": 93.4555, "step": 68450 }, { "epoch": 0.276587062706804, "grad_norm": 857.876953125, "learning_rate": 4.5536695690856606e-05, "loss": 53.5522, "step": 68460 }, { "epoch": 0.2766274639721716, "grad_norm": 1057.3465576171875, "learning_rate": 4.553470492569125e-05, "loss": 58.7135, "step": 68470 }, { "epoch": 0.2766678652375392, "grad_norm": 602.7147216796875, "learning_rate": 4.553271376019125e-05, "loss": 73.2039, "step": 68480 }, { "epoch": 0.27670826650290686, "grad_norm": 799.6278686523438, "learning_rate": 4.5530722194395425e-05, "loss": 66.5906, "step": 68490 }, { "epoch": 0.2767486677682745, "grad_norm": 539.0422973632812, "learning_rate": 4.5528730228342605e-05, "loss": 79.3498, "step": 68500 }, { "epoch": 0.27678906903364214, "grad_norm": 838.4744873046875, "learning_rate": 4.552673786207161e-05, "loss": 46.1416, "step": 68510 }, { "epoch": 0.2768294702990098, "grad_norm": 1674.3841552734375, "learning_rate": 4.55247450956213e-05, "loss": 72.0151, "step": 68520 }, { "epoch": 0.2768698715643774, "grad_norm": 5940.70263671875, "learning_rate": 4.552275192903052e-05, "loss": 75.9517, "step": 68530 }, { "epoch": 0.276910272829745, "grad_norm": 1089.8138427734375, "learning_rate": 4.552075836233812e-05, "loss": 97.0178, "step": 68540 }, { "epoch": 0.27695067409511265, "grad_norm": 608.082275390625, "learning_rate": 4.551876439558298e-05, "loss": 85.0739, "step": 68550 }, { "epoch": 0.2769910753604803, "grad_norm": 1392.0042724609375, "learning_rate": 4.5516770028803954e-05, "loss": 60.7242, "step": 68560 }, { "epoch": 0.27703147662584793, "grad_norm": 758.804931640625, "learning_rate": 4.5514775262039934e-05, "loss": 61.6169, "step": 68570 }, { "epoch": 0.27707187789121557, "grad_norm": 3674.3056640625, "learning_rate": 4.551278009532981e-05, "loss": 77.6101, "step": 68580 }, { "epoch": 0.2771122791565832, "grad_norm": 1095.73876953125, "learning_rate": 4.551078452871248e-05, "loss": 54.9264, "step": 68590 }, { "epoch": 0.2771526804219508, "grad_norm": 1469.385009765625, "learning_rate": 4.550878856222685e-05, "loss": 70.6401, "step": 68600 }, { "epoch": 0.27719308168731843, "grad_norm": 866.8749389648438, "learning_rate": 4.5506792195911817e-05, "loss": 50.9549, "step": 68610 }, { "epoch": 0.27723348295268607, "grad_norm": 735.7921142578125, "learning_rate": 4.550479542980632e-05, "loss": 85.6192, "step": 68620 }, { "epoch": 0.2772738842180537, "grad_norm": 400.9674377441406, "learning_rate": 4.550279826394928e-05, "loss": 97.7626, "step": 68630 }, { "epoch": 0.27731428548342135, "grad_norm": 700.01318359375, "learning_rate": 4.5500800698379624e-05, "loss": 66.2745, "step": 68640 }, { "epoch": 0.277354686748789, "grad_norm": 1081.5074462890625, "learning_rate": 4.549880273313631e-05, "loss": 73.2586, "step": 68650 }, { "epoch": 0.2773950880141566, "grad_norm": 770.979736328125, "learning_rate": 4.5496804368258286e-05, "loss": 56.8278, "step": 68660 }, { "epoch": 0.2774354892795242, "grad_norm": 733.8268432617188, "learning_rate": 4.549480560378451e-05, "loss": 57.6247, "step": 68670 }, { "epoch": 0.27747589054489186, "grad_norm": 744.9702758789062, "learning_rate": 4.5492806439753935e-05, "loss": 59.9518, "step": 68680 }, { "epoch": 0.2775162918102595, "grad_norm": 701.0172729492188, "learning_rate": 4.549080687620555e-05, "loss": 86.6771, "step": 68690 }, { "epoch": 0.27755669307562714, "grad_norm": 387.11041259765625, "learning_rate": 4.548880691317835e-05, "loss": 79.3791, "step": 68700 }, { "epoch": 0.2775970943409948, "grad_norm": 1299.3056640625, "learning_rate": 4.54868065507113e-05, "loss": 64.44, "step": 68710 }, { "epoch": 0.2776374956063624, "grad_norm": 795.0128173828125, "learning_rate": 4.548480578884341e-05, "loss": 60.6718, "step": 68720 }, { "epoch": 0.27767789687173, "grad_norm": 332.22100830078125, "learning_rate": 4.5482804627613686e-05, "loss": 61.0125, "step": 68730 }, { "epoch": 0.27771829813709764, "grad_norm": 525.1163940429688, "learning_rate": 4.548080306706114e-05, "loss": 74.0871, "step": 68740 }, { "epoch": 0.2777586994024653, "grad_norm": 1393.9156494140625, "learning_rate": 4.54788011072248e-05, "loss": 72.4176, "step": 68750 }, { "epoch": 0.2777991006678329, "grad_norm": 950.6893920898438, "learning_rate": 4.547679874814368e-05, "loss": 62.7376, "step": 68760 }, { "epoch": 0.27783950193320056, "grad_norm": 1314.624267578125, "learning_rate": 4.547479598985683e-05, "loss": 95.1351, "step": 68770 }, { "epoch": 0.2778799031985682, "grad_norm": 360.00762939453125, "learning_rate": 4.547279283240329e-05, "loss": 52.8572, "step": 68780 }, { "epoch": 0.2779203044639358, "grad_norm": 789.6798706054688, "learning_rate": 4.547078927582212e-05, "loss": 79.6554, "step": 68790 }, { "epoch": 0.2779607057293034, "grad_norm": 973.8438110351562, "learning_rate": 4.5468785320152365e-05, "loss": 66.0115, "step": 68800 }, { "epoch": 0.27800110699467107, "grad_norm": 351.57525634765625, "learning_rate": 4.546678096543311e-05, "loss": 62.568, "step": 68810 }, { "epoch": 0.2780415082600387, "grad_norm": 1510.792724609375, "learning_rate": 4.546477621170342e-05, "loss": 82.8039, "step": 68820 }, { "epoch": 0.27808190952540635, "grad_norm": 1050.9688720703125, "learning_rate": 4.546277105900237e-05, "loss": 71.6794, "step": 68830 }, { "epoch": 0.278122310790774, "grad_norm": 1624.618408203125, "learning_rate": 4.5460765507369084e-05, "loss": 64.593, "step": 68840 }, { "epoch": 0.2781627120561416, "grad_norm": 1015.446533203125, "learning_rate": 4.5458759556842624e-05, "loss": 73.7418, "step": 68850 }, { "epoch": 0.2782031133215092, "grad_norm": 1055.7169189453125, "learning_rate": 4.545675320746212e-05, "loss": 89.4522, "step": 68860 }, { "epoch": 0.27824351458687685, "grad_norm": 939.6693725585938, "learning_rate": 4.545474645926668e-05, "loss": 55.4124, "step": 68870 }, { "epoch": 0.2782839158522445, "grad_norm": 524.6759033203125, "learning_rate": 4.5452739312295436e-05, "loss": 67.2065, "step": 68880 }, { "epoch": 0.27832431711761213, "grad_norm": 636.6887817382812, "learning_rate": 4.54507317665875e-05, "loss": 54.9611, "step": 68890 }, { "epoch": 0.27836471838297977, "grad_norm": 1205.8494873046875, "learning_rate": 4.544872382218202e-05, "loss": 55.5338, "step": 68900 }, { "epoch": 0.2784051196483474, "grad_norm": 400.2951965332031, "learning_rate": 4.544671547911814e-05, "loss": 72.6024, "step": 68910 }, { "epoch": 0.278445520913715, "grad_norm": 671.6885986328125, "learning_rate": 4.5444706737435014e-05, "loss": 88.0325, "step": 68920 }, { "epoch": 0.27848592217908263, "grad_norm": 905.302490234375, "learning_rate": 4.544269759717181e-05, "loss": 90.4285, "step": 68930 }, { "epoch": 0.2785263234444503, "grad_norm": 713.8814086914062, "learning_rate": 4.5440688058367686e-05, "loss": 70.4518, "step": 68940 }, { "epoch": 0.2785667247098179, "grad_norm": 986.9356689453125, "learning_rate": 4.543867812106183e-05, "loss": 61.4856, "step": 68950 }, { "epoch": 0.27860712597518555, "grad_norm": 852.2457885742188, "learning_rate": 4.543666778529342e-05, "loss": 67.6646, "step": 68960 }, { "epoch": 0.2786475272405532, "grad_norm": 1037.552490234375, "learning_rate": 4.543465705110165e-05, "loss": 50.1012, "step": 68970 }, { "epoch": 0.2786879285059208, "grad_norm": 327.4298095703125, "learning_rate": 4.543264591852572e-05, "loss": 61.3465, "step": 68980 }, { "epoch": 0.2787283297712884, "grad_norm": 866.1886596679688, "learning_rate": 4.543063438760483e-05, "loss": 48.84, "step": 68990 }, { "epoch": 0.27876873103665606, "grad_norm": 708.6840209960938, "learning_rate": 4.542862245837821e-05, "loss": 66.3793, "step": 69000 }, { "epoch": 0.2788091323020237, "grad_norm": 1597.6319580078125, "learning_rate": 4.5426610130885087e-05, "loss": 80.3279, "step": 69010 }, { "epoch": 0.27884953356739134, "grad_norm": 906.7168579101562, "learning_rate": 4.542459740516467e-05, "loss": 73.8522, "step": 69020 }, { "epoch": 0.278889934832759, "grad_norm": 890.2028198242188, "learning_rate": 4.542258428125622e-05, "loss": 90.2994, "step": 69030 }, { "epoch": 0.2789303360981266, "grad_norm": 612.5167846679688, "learning_rate": 4.542057075919897e-05, "loss": 87.7907, "step": 69040 }, { "epoch": 0.2789707373634942, "grad_norm": 983.6898193359375, "learning_rate": 4.541855683903219e-05, "loss": 65.7596, "step": 69050 }, { "epoch": 0.27901113862886184, "grad_norm": 366.17645263671875, "learning_rate": 4.541654252079513e-05, "loss": 63.9307, "step": 69060 }, { "epoch": 0.2790515398942295, "grad_norm": 489.0401306152344, "learning_rate": 4.541452780452705e-05, "loss": 41.8797, "step": 69070 }, { "epoch": 0.2790919411595971, "grad_norm": 983.3743896484375, "learning_rate": 4.5412512690267246e-05, "loss": 111.5006, "step": 69080 }, { "epoch": 0.27913234242496476, "grad_norm": 1284.0035400390625, "learning_rate": 4.5410497178055e-05, "loss": 58.7547, "step": 69090 }, { "epoch": 0.2791727436903324, "grad_norm": 731.1244506835938, "learning_rate": 4.5408481267929605e-05, "loss": 81.0387, "step": 69100 }, { "epoch": 0.2792131449557, "grad_norm": 717.0902099609375, "learning_rate": 4.540646495993036e-05, "loss": 130.641, "step": 69110 }, { "epoch": 0.27925354622106763, "grad_norm": 844.9999389648438, "learning_rate": 4.540444825409657e-05, "loss": 67.0038, "step": 69120 }, { "epoch": 0.27929394748643527, "grad_norm": 399.6801452636719, "learning_rate": 4.540243115046756e-05, "loss": 55.7817, "step": 69130 }, { "epoch": 0.2793343487518029, "grad_norm": 438.31207275390625, "learning_rate": 4.540041364908265e-05, "loss": 63.9536, "step": 69140 }, { "epoch": 0.27937475001717055, "grad_norm": 1663.7740478515625, "learning_rate": 4.539839574998117e-05, "loss": 80.7982, "step": 69150 }, { "epoch": 0.2794151512825382, "grad_norm": 1818.9991455078125, "learning_rate": 4.5396377453202466e-05, "loss": 113.5125, "step": 69160 }, { "epoch": 0.27945555254790583, "grad_norm": 736.188720703125, "learning_rate": 4.539435875878588e-05, "loss": 81.4038, "step": 69170 }, { "epoch": 0.2794959538132734, "grad_norm": 879.2432250976562, "learning_rate": 4.539233966677078e-05, "loss": 87.2281, "step": 69180 }, { "epoch": 0.27953635507864105, "grad_norm": 560.682373046875, "learning_rate": 4.539032017719651e-05, "loss": 82.0745, "step": 69190 }, { "epoch": 0.2795767563440087, "grad_norm": 943.4149780273438, "learning_rate": 4.5388300290102456e-05, "loss": 117.8961, "step": 69200 }, { "epoch": 0.27961715760937633, "grad_norm": 854.2255249023438, "learning_rate": 4.538628000552799e-05, "loss": 46.8455, "step": 69210 }, { "epoch": 0.27965755887474397, "grad_norm": 768.5675659179688, "learning_rate": 4.5384259323512504e-05, "loss": 77.2018, "step": 69220 }, { "epoch": 0.2796979601401116, "grad_norm": 790.1107177734375, "learning_rate": 4.538223824409538e-05, "loss": 62.6797, "step": 69230 }, { "epoch": 0.2797383614054792, "grad_norm": 0.0, "learning_rate": 4.538021676731603e-05, "loss": 50.2399, "step": 69240 }, { "epoch": 0.27977876267084684, "grad_norm": 2715.3037109375, "learning_rate": 4.537819489321386e-05, "loss": 107.9521, "step": 69250 }, { "epoch": 0.2798191639362145, "grad_norm": 2060.965576171875, "learning_rate": 4.537617262182829e-05, "loss": 65.6806, "step": 69260 }, { "epoch": 0.2798595652015821, "grad_norm": 1369.305419921875, "learning_rate": 4.5374149953198746e-05, "loss": 92.2145, "step": 69270 }, { "epoch": 0.27989996646694976, "grad_norm": 830.0508422851562, "learning_rate": 4.5372126887364655e-05, "loss": 106.5613, "step": 69280 }, { "epoch": 0.2799403677323174, "grad_norm": 942.8829345703125, "learning_rate": 4.5370103424365474e-05, "loss": 76.8458, "step": 69290 }, { "epoch": 0.279980768997685, "grad_norm": 842.5079345703125, "learning_rate": 4.536807956424063e-05, "loss": 53.2971, "step": 69300 }, { "epoch": 0.2800211702630526, "grad_norm": 1059.9853515625, "learning_rate": 4.5366055307029585e-05, "loss": 80.9336, "step": 69310 }, { "epoch": 0.28006157152842026, "grad_norm": 326.0080261230469, "learning_rate": 4.536403065277182e-05, "loss": 78.7748, "step": 69320 }, { "epoch": 0.2801019727937879, "grad_norm": 275.553955078125, "learning_rate": 4.536200560150678e-05, "loss": 69.185, "step": 69330 }, { "epoch": 0.28014237405915554, "grad_norm": 686.8462524414062, "learning_rate": 4.5359980153273964e-05, "loss": 68.1509, "step": 69340 }, { "epoch": 0.2801827753245232, "grad_norm": 630.9837646484375, "learning_rate": 4.535795430811285e-05, "loss": 107.0847, "step": 69350 }, { "epoch": 0.2802231765898908, "grad_norm": 2537.67138671875, "learning_rate": 4.535592806606294e-05, "loss": 64.2041, "step": 69360 }, { "epoch": 0.2802635778552584, "grad_norm": 871.0073852539062, "learning_rate": 4.5353901427163725e-05, "loss": 56.6685, "step": 69370 }, { "epoch": 0.28030397912062605, "grad_norm": 805.7468872070312, "learning_rate": 4.535187439145473e-05, "loss": 56.4696, "step": 69380 }, { "epoch": 0.2803443803859937, "grad_norm": 638.688720703125, "learning_rate": 4.534984695897546e-05, "loss": 61.3559, "step": 69390 }, { "epoch": 0.2803847816513613, "grad_norm": 916.9631958007812, "learning_rate": 4.534781912976546e-05, "loss": 81.307, "step": 69400 }, { "epoch": 0.28042518291672897, "grad_norm": 695.0220947265625, "learning_rate": 4.534579090386424e-05, "loss": 69.4657, "step": 69410 }, { "epoch": 0.2804655841820966, "grad_norm": 2208.393310546875, "learning_rate": 4.5343762281311345e-05, "loss": 78.4579, "step": 69420 }, { "epoch": 0.2805059854474642, "grad_norm": 903.3010864257812, "learning_rate": 4.534173326214634e-05, "loss": 85.2893, "step": 69430 }, { "epoch": 0.28054638671283183, "grad_norm": 1086.91455078125, "learning_rate": 4.533970384640877e-05, "loss": 56.4757, "step": 69440 }, { "epoch": 0.28058678797819947, "grad_norm": 841.8659057617188, "learning_rate": 4.53376740341382e-05, "loss": 71.8159, "step": 69450 }, { "epoch": 0.2806271892435671, "grad_norm": 1415.8238525390625, "learning_rate": 4.533564382537421e-05, "loss": 62.5434, "step": 69460 }, { "epoch": 0.28066759050893475, "grad_norm": 974.2418212890625, "learning_rate": 4.533361322015637e-05, "loss": 69.978, "step": 69470 }, { "epoch": 0.2807079917743024, "grad_norm": 2103.853515625, "learning_rate": 4.533158221852427e-05, "loss": 81.6764, "step": 69480 }, { "epoch": 0.28074839303967003, "grad_norm": 644.2200317382812, "learning_rate": 4.532955082051751e-05, "loss": 49.908, "step": 69490 }, { "epoch": 0.2807887943050376, "grad_norm": 603.8206176757812, "learning_rate": 4.532751902617569e-05, "loss": 112.3736, "step": 69500 }, { "epoch": 0.28082919557040525, "grad_norm": 700.8045654296875, "learning_rate": 4.532548683553842e-05, "loss": 65.1945, "step": 69510 }, { "epoch": 0.2808695968357729, "grad_norm": 565.0137329101562, "learning_rate": 4.5323454248645324e-05, "loss": 89.2239, "step": 69520 }, { "epoch": 0.28090999810114053, "grad_norm": 550.55859375, "learning_rate": 4.532142126553603e-05, "loss": 62.9486, "step": 69530 }, { "epoch": 0.2809503993665082, "grad_norm": 819.1644287109375, "learning_rate": 4.5319387886250156e-05, "loss": 79.9527, "step": 69540 }, { "epoch": 0.2809908006318758, "grad_norm": 1997.134033203125, "learning_rate": 4.531735411082735e-05, "loss": 73.3801, "step": 69550 }, { "epoch": 0.2810312018972434, "grad_norm": 233.41488647460938, "learning_rate": 4.531531993930727e-05, "loss": 79.5541, "step": 69560 }, { "epoch": 0.28107160316261104, "grad_norm": 981.9132080078125, "learning_rate": 4.5313285371729575e-05, "loss": 66.0245, "step": 69570 }, { "epoch": 0.2811120044279787, "grad_norm": 600.988037109375, "learning_rate": 4.531125040813392e-05, "loss": 71.3912, "step": 69580 }, { "epoch": 0.2811524056933463, "grad_norm": 760.9716796875, "learning_rate": 4.530921504855997e-05, "loss": 52.4341, "step": 69590 }, { "epoch": 0.28119280695871396, "grad_norm": 1831.3338623046875, "learning_rate": 4.530717929304743e-05, "loss": 109.2623, "step": 69600 }, { "epoch": 0.2812332082240816, "grad_norm": 838.1921997070312, "learning_rate": 4.5305143141635976e-05, "loss": 68.4752, "step": 69610 }, { "epoch": 0.2812736094894492, "grad_norm": 446.0285949707031, "learning_rate": 4.5303106594365296e-05, "loss": 53.7918, "step": 69620 }, { "epoch": 0.2813140107548168, "grad_norm": 1072.6981201171875, "learning_rate": 4.530106965127511e-05, "loss": 71.5331, "step": 69630 }, { "epoch": 0.28135441202018446, "grad_norm": 841.3654174804688, "learning_rate": 4.529903231240511e-05, "loss": 74.8359, "step": 69640 }, { "epoch": 0.2813948132855521, "grad_norm": 685.6234741210938, "learning_rate": 4.529699457779503e-05, "loss": 77.722, "step": 69650 }, { "epoch": 0.28143521455091974, "grad_norm": 1382.5294189453125, "learning_rate": 4.5294956447484584e-05, "loss": 72.9175, "step": 69660 }, { "epoch": 0.2814756158162874, "grad_norm": 651.1674194335938, "learning_rate": 4.529291792151351e-05, "loss": 73.6339, "step": 69670 }, { "epoch": 0.281516017081655, "grad_norm": 540.8804931640625, "learning_rate": 4.529087899992156e-05, "loss": 36.5959, "step": 69680 }, { "epoch": 0.2815564183470226, "grad_norm": 648.30615234375, "learning_rate": 4.528883968274848e-05, "loss": 96.0729, "step": 69690 }, { "epoch": 0.28159681961239025, "grad_norm": 1536.320556640625, "learning_rate": 4.528679997003403e-05, "loss": 62.9985, "step": 69700 }, { "epoch": 0.2816372208777579, "grad_norm": 458.5219421386719, "learning_rate": 4.528475986181796e-05, "loss": 69.9463, "step": 69710 }, { "epoch": 0.28167762214312553, "grad_norm": 342.8268127441406, "learning_rate": 4.5282719358140056e-05, "loss": 87.2414, "step": 69720 }, { "epoch": 0.28171802340849317, "grad_norm": 564.786376953125, "learning_rate": 4.5280678459040095e-05, "loss": 61.6622, "step": 69730 }, { "epoch": 0.2817584246738608, "grad_norm": 665.1903686523438, "learning_rate": 4.5278637164557866e-05, "loss": 116.5534, "step": 69740 }, { "epoch": 0.2817988259392284, "grad_norm": 736.6189575195312, "learning_rate": 4.527659547473317e-05, "loss": 67.6608, "step": 69750 }, { "epoch": 0.28183922720459603, "grad_norm": 660.351806640625, "learning_rate": 4.52745533896058e-05, "loss": 84.5275, "step": 69760 }, { "epoch": 0.2818796284699637, "grad_norm": 641.83642578125, "learning_rate": 4.527251090921558e-05, "loss": 64.5504, "step": 69770 }, { "epoch": 0.2819200297353313, "grad_norm": 966.1611328125, "learning_rate": 4.527046803360232e-05, "loss": 66.6436, "step": 69780 }, { "epoch": 0.28196043100069895, "grad_norm": 1488.1563720703125, "learning_rate": 4.526842476280585e-05, "loss": 72.4152, "step": 69790 }, { "epoch": 0.2820008322660666, "grad_norm": 809.4866333007812, "learning_rate": 4.5266381096866e-05, "loss": 56.8854, "step": 69800 }, { "epoch": 0.28204123353143423, "grad_norm": 945.8799438476562, "learning_rate": 4.526433703582262e-05, "loss": 85.1573, "step": 69810 }, { "epoch": 0.2820816347968018, "grad_norm": 500.84307861328125, "learning_rate": 4.5262292579715556e-05, "loss": 37.6372, "step": 69820 }, { "epoch": 0.28212203606216946, "grad_norm": 909.1983642578125, "learning_rate": 4.526024772858467e-05, "loss": 69.4535, "step": 69830 }, { "epoch": 0.2821624373275371, "grad_norm": 1079.6373291015625, "learning_rate": 4.525820248246982e-05, "loss": 111.5219, "step": 69840 }, { "epoch": 0.28220283859290474, "grad_norm": 727.4838256835938, "learning_rate": 4.5256156841410886e-05, "loss": 66.2702, "step": 69850 }, { "epoch": 0.2822432398582724, "grad_norm": 1162.87353515625, "learning_rate": 4.525411080544775e-05, "loss": 62.2597, "step": 69860 }, { "epoch": 0.28228364112364, "grad_norm": 2795.06640625, "learning_rate": 4.5252064374620285e-05, "loss": 99.6256, "step": 69870 }, { "epoch": 0.2823240423890076, "grad_norm": 672.3999633789062, "learning_rate": 4.5250017548968404e-05, "loss": 104.1018, "step": 69880 }, { "epoch": 0.28236444365437524, "grad_norm": 1359.24609375, "learning_rate": 4.524797032853201e-05, "loss": 90.7683, "step": 69890 }, { "epoch": 0.2824048449197429, "grad_norm": 744.1480102539062, "learning_rate": 4.5245922713350996e-05, "loss": 73.5604, "step": 69900 }, { "epoch": 0.2824452461851105, "grad_norm": 629.0797729492188, "learning_rate": 4.524387470346531e-05, "loss": 79.3253, "step": 69910 }, { "epoch": 0.28248564745047816, "grad_norm": 846.8230590820312, "learning_rate": 4.524182629891486e-05, "loss": 52.2285, "step": 69920 }, { "epoch": 0.2825260487158458, "grad_norm": 916.1362915039062, "learning_rate": 4.523977749973958e-05, "loss": 47.4978, "step": 69930 }, { "epoch": 0.2825664499812134, "grad_norm": 878.1392822265625, "learning_rate": 4.523772830597942e-05, "loss": 69.1207, "step": 69940 }, { "epoch": 0.282606851246581, "grad_norm": 1516.745361328125, "learning_rate": 4.523567871767434e-05, "loss": 70.5018, "step": 69950 }, { "epoch": 0.28264725251194867, "grad_norm": 1540.39501953125, "learning_rate": 4.523362873486427e-05, "loss": 61.0742, "step": 69960 }, { "epoch": 0.2826876537773163, "grad_norm": 588.6622314453125, "learning_rate": 4.52315783575892e-05, "loss": 105.1381, "step": 69970 }, { "epoch": 0.28272805504268395, "grad_norm": 792.640380859375, "learning_rate": 4.522952758588909e-05, "loss": 74.2773, "step": 69980 }, { "epoch": 0.2827684563080516, "grad_norm": 381.4082336425781, "learning_rate": 4.5227476419803916e-05, "loss": 42.4223, "step": 69990 }, { "epoch": 0.2828088575734192, "grad_norm": 2376.44775390625, "learning_rate": 4.522542485937369e-05, "loss": 73.0111, "step": 70000 }, { "epoch": 0.2828492588387868, "grad_norm": 478.2873229980469, "learning_rate": 4.5223372904638386e-05, "loss": 89.4105, "step": 70010 }, { "epoch": 0.28288966010415445, "grad_norm": 1394.88623046875, "learning_rate": 4.5221320555638016e-05, "loss": 57.0316, "step": 70020 }, { "epoch": 0.2829300613695221, "grad_norm": 1728.24267578125, "learning_rate": 4.521926781241259e-05, "loss": 60.0436, "step": 70030 }, { "epoch": 0.28297046263488973, "grad_norm": 313.620361328125, "learning_rate": 4.521721467500213e-05, "loss": 78.582, "step": 70040 }, { "epoch": 0.28301086390025737, "grad_norm": 876.0878295898438, "learning_rate": 4.521516114344666e-05, "loss": 96.7629, "step": 70050 }, { "epoch": 0.283051265165625, "grad_norm": 715.8422241210938, "learning_rate": 4.521310721778622e-05, "loss": 68.7341, "step": 70060 }, { "epoch": 0.2830916664309926, "grad_norm": 984.6596069335938, "learning_rate": 4.5211052898060855e-05, "loss": 85.3589, "step": 70070 }, { "epoch": 0.28313206769636023, "grad_norm": 440.2537536621094, "learning_rate": 4.5208998184310596e-05, "loss": 66.0765, "step": 70080 }, { "epoch": 0.2831724689617279, "grad_norm": 958.81982421875, "learning_rate": 4.520694307657551e-05, "loss": 62.4521, "step": 70090 }, { "epoch": 0.2832128702270955, "grad_norm": 592.9948120117188, "learning_rate": 4.5204887574895684e-05, "loss": 76.5842, "step": 70100 }, { "epoch": 0.28325327149246315, "grad_norm": 810.19287109375, "learning_rate": 4.520283167931115e-05, "loss": 67.7459, "step": 70110 }, { "epoch": 0.2832936727578308, "grad_norm": 486.7115173339844, "learning_rate": 4.5200775389862026e-05, "loss": 57.3887, "step": 70120 }, { "epoch": 0.2833340740231984, "grad_norm": 1011.84033203125, "learning_rate": 4.519871870658838e-05, "loss": 77.9214, "step": 70130 }, { "epoch": 0.283374475288566, "grad_norm": 1170.3331298828125, "learning_rate": 4.519666162953032e-05, "loss": 81.3161, "step": 70140 }, { "epoch": 0.28341487655393366, "grad_norm": 885.1085815429688, "learning_rate": 4.519460415872794e-05, "loss": 73.1431, "step": 70150 }, { "epoch": 0.2834552778193013, "grad_norm": 746.1101684570312, "learning_rate": 4.519254629422136e-05, "loss": 75.5062, "step": 70160 }, { "epoch": 0.28349567908466894, "grad_norm": 805.8640747070312, "learning_rate": 4.5190488036050685e-05, "loss": 62.1489, "step": 70170 }, { "epoch": 0.2835360803500366, "grad_norm": 1288.6954345703125, "learning_rate": 4.518842938425605e-05, "loss": 70.3223, "step": 70180 }, { "epoch": 0.2835764816154042, "grad_norm": 820.5307006835938, "learning_rate": 4.51863703388776e-05, "loss": 49.1447, "step": 70190 }, { "epoch": 0.2836168828807718, "grad_norm": 279.9125671386719, "learning_rate": 4.5184310899955465e-05, "loss": 41.5823, "step": 70200 }, { "epoch": 0.28365728414613944, "grad_norm": 2208.78564453125, "learning_rate": 4.518225106752979e-05, "loss": 63.2972, "step": 70210 }, { "epoch": 0.2836976854115071, "grad_norm": 609.0946655273438, "learning_rate": 4.5180190841640747e-05, "loss": 59.1739, "step": 70220 }, { "epoch": 0.2837380866768747, "grad_norm": 761.7667236328125, "learning_rate": 4.517813022232849e-05, "loss": 76.7521, "step": 70230 }, { "epoch": 0.28377848794224236, "grad_norm": 623.1316528320312, "learning_rate": 4.51760692096332e-05, "loss": 54.5164, "step": 70240 }, { "epoch": 0.28381888920761, "grad_norm": 875.2012329101562, "learning_rate": 4.5174007803595055e-05, "loss": 69.7034, "step": 70250 }, { "epoch": 0.2838592904729776, "grad_norm": 294.3750915527344, "learning_rate": 4.517194600425423e-05, "loss": 75.7382, "step": 70260 }, { "epoch": 0.28389969173834523, "grad_norm": 793.992919921875, "learning_rate": 4.516988381165095e-05, "loss": 57.0891, "step": 70270 }, { "epoch": 0.28394009300371287, "grad_norm": 265.2918395996094, "learning_rate": 4.516782122582538e-05, "loss": 53.4736, "step": 70280 }, { "epoch": 0.2839804942690805, "grad_norm": 637.1311645507812, "learning_rate": 4.516575824681777e-05, "loss": 89.6522, "step": 70290 }, { "epoch": 0.28402089553444815, "grad_norm": 2700.84521484375, "learning_rate": 4.516369487466832e-05, "loss": 78.3818, "step": 70300 }, { "epoch": 0.2840612967998158, "grad_norm": 775.3411865234375, "learning_rate": 4.5161631109417246e-05, "loss": 75.4549, "step": 70310 }, { "epoch": 0.28410169806518343, "grad_norm": 616.4786376953125, "learning_rate": 4.5159566951104796e-05, "loss": 78.6188, "step": 70320 }, { "epoch": 0.284142099330551, "grad_norm": 265.2325744628906, "learning_rate": 4.515750239977122e-05, "loss": 58.4777, "step": 70330 }, { "epoch": 0.28418250059591865, "grad_norm": 356.9336242675781, "learning_rate": 4.5155437455456744e-05, "loss": 88.5666, "step": 70340 }, { "epoch": 0.2842229018612863, "grad_norm": 1054.8193359375, "learning_rate": 4.515337211820165e-05, "loss": 74.0532, "step": 70350 }, { "epoch": 0.28426330312665393, "grad_norm": 1073.7655029296875, "learning_rate": 4.5151306388046175e-05, "loss": 57.4097, "step": 70360 }, { "epoch": 0.2843037043920216, "grad_norm": 1117.8515625, "learning_rate": 4.5149240265030627e-05, "loss": 91.991, "step": 70370 }, { "epoch": 0.2843441056573892, "grad_norm": 554.7203979492188, "learning_rate": 4.5147173749195255e-05, "loss": 51.3412, "step": 70380 }, { "epoch": 0.2843845069227568, "grad_norm": 669.3203125, "learning_rate": 4.514510684058036e-05, "loss": 90.7202, "step": 70390 }, { "epoch": 0.28442490818812444, "grad_norm": 861.2157592773438, "learning_rate": 4.5143039539226234e-05, "loss": 70.1871, "step": 70400 }, { "epoch": 0.2844653094534921, "grad_norm": 345.44781494140625, "learning_rate": 4.514097184517318e-05, "loss": 49.2981, "step": 70410 }, { "epoch": 0.2845057107188597, "grad_norm": 1595.496337890625, "learning_rate": 4.5138903758461515e-05, "loss": 47.9407, "step": 70420 }, { "epoch": 0.28454611198422736, "grad_norm": 560.5866088867188, "learning_rate": 4.5136835279131556e-05, "loss": 67.7868, "step": 70430 }, { "epoch": 0.284586513249595, "grad_norm": 215.32069396972656, "learning_rate": 4.513476640722362e-05, "loss": 59.5168, "step": 70440 }, { "epoch": 0.2846269145149626, "grad_norm": 858.737548828125, "learning_rate": 4.513269714277805e-05, "loss": 53.7752, "step": 70450 }, { "epoch": 0.2846673157803302, "grad_norm": 575.0614624023438, "learning_rate": 4.5130627485835186e-05, "loss": 95.9444, "step": 70460 }, { "epoch": 0.28470771704569786, "grad_norm": 2945.451416015625, "learning_rate": 4.512855743643537e-05, "loss": 69.3364, "step": 70470 }, { "epoch": 0.2847481183110655, "grad_norm": 578.4557495117188, "learning_rate": 4.512648699461897e-05, "loss": 53.4026, "step": 70480 }, { "epoch": 0.28478851957643314, "grad_norm": 362.3124084472656, "learning_rate": 4.512441616042634e-05, "loss": 86.2023, "step": 70490 }, { "epoch": 0.2848289208418008, "grad_norm": 521.9320068359375, "learning_rate": 4.512234493389785e-05, "loss": 69.5704, "step": 70500 }, { "epoch": 0.2848693221071684, "grad_norm": 606.285888671875, "learning_rate": 4.5120273315073897e-05, "loss": 67.9938, "step": 70510 }, { "epoch": 0.284909723372536, "grad_norm": 977.4638671875, "learning_rate": 4.511820130399485e-05, "loss": 90.854, "step": 70520 }, { "epoch": 0.28495012463790365, "grad_norm": 402.9257507324219, "learning_rate": 4.5116128900701114e-05, "loss": 79.0524, "step": 70530 }, { "epoch": 0.2849905259032713, "grad_norm": 1074.8834228515625, "learning_rate": 4.511405610523309e-05, "loss": 80.932, "step": 70540 }, { "epoch": 0.2850309271686389, "grad_norm": 1405.5771484375, "learning_rate": 4.5111982917631194e-05, "loss": 103.0241, "step": 70550 }, { "epoch": 0.28507132843400657, "grad_norm": 1106.503662109375, "learning_rate": 4.510990933793583e-05, "loss": 70.0878, "step": 70560 }, { "epoch": 0.2851117296993742, "grad_norm": 788.6580810546875, "learning_rate": 4.5107835366187425e-05, "loss": 32.8911, "step": 70570 }, { "epoch": 0.2851521309647418, "grad_norm": 1015.0027465820312, "learning_rate": 4.5105761002426415e-05, "loss": 55.8387, "step": 70580 }, { "epoch": 0.28519253223010943, "grad_norm": 537.4610595703125, "learning_rate": 4.510368624669325e-05, "loss": 58.8079, "step": 70590 }, { "epoch": 0.28523293349547707, "grad_norm": 1022.7601318359375, "learning_rate": 4.510161109902837e-05, "loss": 66.04, "step": 70600 }, { "epoch": 0.2852733347608447, "grad_norm": 785.5269165039062, "learning_rate": 4.5099535559472234e-05, "loss": 87.9221, "step": 70610 }, { "epoch": 0.28531373602621235, "grad_norm": 765.5206298828125, "learning_rate": 4.50974596280653e-05, "loss": 72.3749, "step": 70620 }, { "epoch": 0.28535413729158, "grad_norm": 1824.7486572265625, "learning_rate": 4.509538330484805e-05, "loss": 70.8314, "step": 70630 }, { "epoch": 0.28539453855694763, "grad_norm": 784.1112670898438, "learning_rate": 4.509330658986095e-05, "loss": 64.7922, "step": 70640 }, { "epoch": 0.2854349398223152, "grad_norm": 621.5699462890625, "learning_rate": 4.5091229483144495e-05, "loss": 94.8581, "step": 70650 }, { "epoch": 0.28547534108768285, "grad_norm": 724.716796875, "learning_rate": 4.508915198473919e-05, "loss": 54.7563, "step": 70660 }, { "epoch": 0.2855157423530505, "grad_norm": 435.5616149902344, "learning_rate": 4.50870740946855e-05, "loss": 69.268, "step": 70670 }, { "epoch": 0.28555614361841813, "grad_norm": 1086.2598876953125, "learning_rate": 4.508499581302398e-05, "loss": 56.1892, "step": 70680 }, { "epoch": 0.2855965448837858, "grad_norm": 1214.41015625, "learning_rate": 4.5082917139795125e-05, "loss": 75.7297, "step": 70690 }, { "epoch": 0.2856369461491534, "grad_norm": 665.9967041015625, "learning_rate": 4.508083807503945e-05, "loss": 56.6401, "step": 70700 }, { "epoch": 0.285677347414521, "grad_norm": 1049.6688232421875, "learning_rate": 4.50787586187975e-05, "loss": 70.0839, "step": 70710 }, { "epoch": 0.28571774867988864, "grad_norm": 680.3385009765625, "learning_rate": 4.507667877110982e-05, "loss": 65.669, "step": 70720 }, { "epoch": 0.2857581499452563, "grad_norm": 351.40545654296875, "learning_rate": 4.507459853201695e-05, "loss": 52.2754, "step": 70730 }, { "epoch": 0.2857985512106239, "grad_norm": 699.9556274414062, "learning_rate": 4.507251790155944e-05, "loss": 66.7143, "step": 70740 }, { "epoch": 0.28583895247599156, "grad_norm": 483.2270202636719, "learning_rate": 4.5070436879777865e-05, "loss": 72.3885, "step": 70750 }, { "epoch": 0.2858793537413592, "grad_norm": 13059.109375, "learning_rate": 4.506835546671278e-05, "loss": 100.1066, "step": 70760 }, { "epoch": 0.2859197550067268, "grad_norm": 1270.271240234375, "learning_rate": 4.506627366240479e-05, "loss": 76.3044, "step": 70770 }, { "epoch": 0.2859601562720944, "grad_norm": 373.1097106933594, "learning_rate": 4.506419146689446e-05, "loss": 138.1525, "step": 70780 }, { "epoch": 0.28600055753746206, "grad_norm": 627.6198120117188, "learning_rate": 4.506210888022239e-05, "loss": 80.5968, "step": 70790 }, { "epoch": 0.2860409588028297, "grad_norm": 396.57012939453125, "learning_rate": 4.5060025902429174e-05, "loss": 53.7545, "step": 70800 }, { "epoch": 0.28608136006819734, "grad_norm": 534.5454711914062, "learning_rate": 4.505794253355542e-05, "loss": 87.9787, "step": 70810 }, { "epoch": 0.286121761333565, "grad_norm": 1753.4989013671875, "learning_rate": 4.505585877364175e-05, "loss": 80.0071, "step": 70820 }, { "epoch": 0.2861621625989326, "grad_norm": 940.0836791992188, "learning_rate": 4.505377462272879e-05, "loss": 64.5939, "step": 70830 }, { "epoch": 0.2862025638643002, "grad_norm": 524.5255737304688, "learning_rate": 4.5051690080857176e-05, "loss": 54.3899, "step": 70840 }, { "epoch": 0.28624296512966785, "grad_norm": 982.0265502929688, "learning_rate": 4.504960514806753e-05, "loss": 64.2541, "step": 70850 }, { "epoch": 0.2862833663950355, "grad_norm": 1201.5341796875, "learning_rate": 4.504751982440052e-05, "loss": 88.8859, "step": 70860 }, { "epoch": 0.28632376766040313, "grad_norm": 624.1221313476562, "learning_rate": 4.5045434109896786e-05, "loss": 59.5705, "step": 70870 }, { "epoch": 0.28636416892577077, "grad_norm": 422.28778076171875, "learning_rate": 4.504334800459699e-05, "loss": 44.9323, "step": 70880 }, { "epoch": 0.2864045701911384, "grad_norm": 665.96923828125, "learning_rate": 4.504126150854181e-05, "loss": 60.9275, "step": 70890 }, { "epoch": 0.286444971456506, "grad_norm": 1277.694580078125, "learning_rate": 4.503917462177192e-05, "loss": 77.8568, "step": 70900 }, { "epoch": 0.28648537272187363, "grad_norm": 1883.61962890625, "learning_rate": 4.5037087344328e-05, "loss": 62.4995, "step": 70910 }, { "epoch": 0.2865257739872413, "grad_norm": 885.0050659179688, "learning_rate": 4.5034999676250745e-05, "loss": 62.4988, "step": 70920 }, { "epoch": 0.2865661752526089, "grad_norm": 1359.623779296875, "learning_rate": 4.503291161758087e-05, "loss": 71.7273, "step": 70930 }, { "epoch": 0.28660657651797655, "grad_norm": 497.2101745605469, "learning_rate": 4.5030823168359046e-05, "loss": 76.8819, "step": 70940 }, { "epoch": 0.2866469777833442, "grad_norm": 891.3270874023438, "learning_rate": 4.502873432862603e-05, "loss": 69.6958, "step": 70950 }, { "epoch": 0.28668737904871183, "grad_norm": 441.0372314453125, "learning_rate": 4.5026645098422515e-05, "loss": 102.5139, "step": 70960 }, { "epoch": 0.2867277803140794, "grad_norm": 993.6549072265625, "learning_rate": 4.5024555477789255e-05, "loss": 79.794, "step": 70970 }, { "epoch": 0.28676818157944706, "grad_norm": 3066.687255859375, "learning_rate": 4.5022465466766974e-05, "loss": 105.9858, "step": 70980 }, { "epoch": 0.2868085828448147, "grad_norm": 1127.01025390625, "learning_rate": 4.502037506539642e-05, "loss": 69.0879, "step": 70990 }, { "epoch": 0.28684898411018234, "grad_norm": 1199.5037841796875, "learning_rate": 4.5018284273718336e-05, "loss": 58.2869, "step": 71000 }, { "epoch": 0.28688938537555, "grad_norm": 706.9274291992188, "learning_rate": 4.5016193091773504e-05, "loss": 71.1858, "step": 71010 }, { "epoch": 0.2869297866409176, "grad_norm": 758.85498046875, "learning_rate": 4.501410151960268e-05, "loss": 75.0063, "step": 71020 }, { "epoch": 0.2869701879062852, "grad_norm": 938.1792602539062, "learning_rate": 4.5012009557246645e-05, "loss": 56.8306, "step": 71030 }, { "epoch": 0.28701058917165284, "grad_norm": 539.9905395507812, "learning_rate": 4.5009917204746184e-05, "loss": 49.6551, "step": 71040 }, { "epoch": 0.2870509904370205, "grad_norm": 1365.8111572265625, "learning_rate": 4.5007824462142076e-05, "loss": 67.7948, "step": 71050 }, { "epoch": 0.2870913917023881, "grad_norm": 1149.0391845703125, "learning_rate": 4.500573132947514e-05, "loss": 62.5273, "step": 71060 }, { "epoch": 0.28713179296775576, "grad_norm": 833.6195678710938, "learning_rate": 4.500363780678617e-05, "loss": 46.2563, "step": 71070 }, { "epoch": 0.2871721942331234, "grad_norm": 2005.719482421875, "learning_rate": 4.5001543894115975e-05, "loss": 65.34, "step": 71080 }, { "epoch": 0.287212595498491, "grad_norm": 499.4972839355469, "learning_rate": 4.4999449591505396e-05, "loss": 46.8325, "step": 71090 }, { "epoch": 0.2872529967638586, "grad_norm": 1662.39013671875, "learning_rate": 4.499735489899524e-05, "loss": 75.1804, "step": 71100 }, { "epoch": 0.28729339802922627, "grad_norm": 297.105712890625, "learning_rate": 4.4995259816626356e-05, "loss": 59.1684, "step": 71110 }, { "epoch": 0.2873337992945939, "grad_norm": 506.93670654296875, "learning_rate": 4.499316434443959e-05, "loss": 41.7648, "step": 71120 }, { "epoch": 0.28737420055996155, "grad_norm": 666.0968627929688, "learning_rate": 4.49910684824758e-05, "loss": 110.1385, "step": 71130 }, { "epoch": 0.2874146018253292, "grad_norm": 786.0115966796875, "learning_rate": 4.498897223077582e-05, "loss": 72.202, "step": 71140 }, { "epoch": 0.2874550030906968, "grad_norm": 7043.43310546875, "learning_rate": 4.498687558938055e-05, "loss": 108.4283, "step": 71150 }, { "epoch": 0.2874954043560644, "grad_norm": 857.16748046875, "learning_rate": 4.4984778558330844e-05, "loss": 96.0511, "step": 71160 }, { "epoch": 0.28753580562143205, "grad_norm": 637.8214111328125, "learning_rate": 4.4982681137667594e-05, "loss": 44.1582, "step": 71170 }, { "epoch": 0.2875762068867997, "grad_norm": 904.2376098632812, "learning_rate": 4.498058332743168e-05, "loss": 98.6093, "step": 71180 }, { "epoch": 0.28761660815216733, "grad_norm": 1919.4237060546875, "learning_rate": 4.4978485127664015e-05, "loss": 103.2059, "step": 71190 }, { "epoch": 0.28765700941753497, "grad_norm": 810.6397094726562, "learning_rate": 4.4976386538405495e-05, "loss": 67.5545, "step": 71200 }, { "epoch": 0.2876974106829026, "grad_norm": 1610.69384765625, "learning_rate": 4.4974287559697035e-05, "loss": 60.4293, "step": 71210 }, { "epoch": 0.2877378119482702, "grad_norm": 1313.538818359375, "learning_rate": 4.497218819157956e-05, "loss": 57.7007, "step": 71220 }, { "epoch": 0.28777821321363783, "grad_norm": 872.7113037109375, "learning_rate": 4.497008843409399e-05, "loss": 51.1471, "step": 71230 }, { "epoch": 0.2878186144790055, "grad_norm": 398.4093017578125, "learning_rate": 4.496798828728126e-05, "loss": 73.8484, "step": 71240 }, { "epoch": 0.2878590157443731, "grad_norm": 3197.44970703125, "learning_rate": 4.496588775118232e-05, "loss": 97.3321, "step": 71250 }, { "epoch": 0.28789941700974075, "grad_norm": 422.7282409667969, "learning_rate": 4.496378682583813e-05, "loss": 79.5314, "step": 71260 }, { "epoch": 0.2879398182751084, "grad_norm": 409.5416564941406, "learning_rate": 4.4961685511289625e-05, "loss": 93.6449, "step": 71270 }, { "epoch": 0.28798021954047603, "grad_norm": 714.4796752929688, "learning_rate": 4.495958380757779e-05, "loss": 105.323, "step": 71280 }, { "epoch": 0.2880206208058436, "grad_norm": 1594.0919189453125, "learning_rate": 4.4957481714743585e-05, "loss": 68.0684, "step": 71290 }, { "epoch": 0.28806102207121126, "grad_norm": 460.36334228515625, "learning_rate": 4.495537923282801e-05, "loss": 64.8978, "step": 71300 }, { "epoch": 0.2881014233365789, "grad_norm": 1116.5792236328125, "learning_rate": 4.4953276361872034e-05, "loss": 61.4736, "step": 71310 }, { "epoch": 0.28814182460194654, "grad_norm": 463.9264221191406, "learning_rate": 4.4951173101916675e-05, "loss": 59.6178, "step": 71320 }, { "epoch": 0.2881822258673142, "grad_norm": 958.1243896484375, "learning_rate": 4.494906945300291e-05, "loss": 76.7924, "step": 71330 }, { "epoch": 0.2882226271326818, "grad_norm": 479.6456604003906, "learning_rate": 4.4946965415171775e-05, "loss": 57.5769, "step": 71340 }, { "epoch": 0.2882630283980494, "grad_norm": 595.579345703125, "learning_rate": 4.4944860988464276e-05, "loss": 72.5989, "step": 71350 }, { "epoch": 0.28830342966341704, "grad_norm": 821.9955444335938, "learning_rate": 4.494275617292144e-05, "loss": 74.0442, "step": 71360 }, { "epoch": 0.2883438309287847, "grad_norm": 890.1593627929688, "learning_rate": 4.494065096858432e-05, "loss": 67.1569, "step": 71370 }, { "epoch": 0.2883842321941523, "grad_norm": 935.3182983398438, "learning_rate": 4.4938545375493934e-05, "loss": 57.2469, "step": 71380 }, { "epoch": 0.28842463345951996, "grad_norm": 930.1670532226562, "learning_rate": 4.493643939369134e-05, "loss": 62.9051, "step": 71390 }, { "epoch": 0.2884650347248876, "grad_norm": 923.1588745117188, "learning_rate": 4.493433302321759e-05, "loss": 72.7095, "step": 71400 }, { "epoch": 0.2885054359902552, "grad_norm": 1056.5206298828125, "learning_rate": 4.4932226264113764e-05, "loss": 89.8381, "step": 71410 }, { "epoch": 0.28854583725562283, "grad_norm": 556.2738647460938, "learning_rate": 4.493011911642092e-05, "loss": 64.1478, "step": 71420 }, { "epoch": 0.28858623852099047, "grad_norm": 614.498046875, "learning_rate": 4.4928011580180155e-05, "loss": 71.601, "step": 71430 }, { "epoch": 0.2886266397863581, "grad_norm": 1232.3363037109375, "learning_rate": 4.492590365543253e-05, "loss": 59.9572, "step": 71440 }, { "epoch": 0.28866704105172575, "grad_norm": 1182.1708984375, "learning_rate": 4.492379534221916e-05, "loss": 87.4886, "step": 71450 }, { "epoch": 0.2887074423170934, "grad_norm": 703.7147827148438, "learning_rate": 4.492168664058114e-05, "loss": 48.6093, "step": 71460 }, { "epoch": 0.28874784358246103, "grad_norm": 1096.2550048828125, "learning_rate": 4.491957755055959e-05, "loss": 55.8847, "step": 71470 }, { "epoch": 0.2887882448478286, "grad_norm": 824.9928588867188, "learning_rate": 4.491746807219561e-05, "loss": 69.6475, "step": 71480 }, { "epoch": 0.28882864611319625, "grad_norm": 799.056640625, "learning_rate": 4.491535820553034e-05, "loss": 74.3196, "step": 71490 }, { "epoch": 0.2888690473785639, "grad_norm": 1376.468017578125, "learning_rate": 4.491324795060491e-05, "loss": 56.1541, "step": 71500 }, { "epoch": 0.28890944864393153, "grad_norm": 792.1198120117188, "learning_rate": 4.491113730746046e-05, "loss": 80.2355, "step": 71510 }, { "epoch": 0.2889498499092992, "grad_norm": 1302.2103271484375, "learning_rate": 4.490902627613813e-05, "loss": 70.5781, "step": 71520 }, { "epoch": 0.2889902511746668, "grad_norm": 1214.075927734375, "learning_rate": 4.4906914856679094e-05, "loss": 84.3187, "step": 71530 }, { "epoch": 0.2890306524400344, "grad_norm": 401.3418273925781, "learning_rate": 4.49048030491245e-05, "loss": 53.576, "step": 71540 }, { "epoch": 0.28907105370540204, "grad_norm": 885.4319458007812, "learning_rate": 4.490269085351552e-05, "loss": 46.2776, "step": 71550 }, { "epoch": 0.2891114549707697, "grad_norm": 410.9359436035156, "learning_rate": 4.4900578269893335e-05, "loss": 75.6896, "step": 71560 }, { "epoch": 0.2891518562361373, "grad_norm": 858.1013793945312, "learning_rate": 4.4898465298299134e-05, "loss": 78.359, "step": 71570 }, { "epoch": 0.28919225750150496, "grad_norm": 1423.806640625, "learning_rate": 4.489635193877411e-05, "loss": 62.488, "step": 71580 }, { "epoch": 0.2892326587668726, "grad_norm": 962.8115234375, "learning_rate": 4.489423819135945e-05, "loss": 68.6732, "step": 71590 }, { "epoch": 0.28927306003224024, "grad_norm": 737.3809814453125, "learning_rate": 4.4892124056096386e-05, "loss": 85.0963, "step": 71600 }, { "epoch": 0.2893134612976078, "grad_norm": 699.3038330078125, "learning_rate": 4.489000953302612e-05, "loss": 39.0792, "step": 71610 }, { "epoch": 0.28935386256297546, "grad_norm": 526.4671020507812, "learning_rate": 4.488789462218987e-05, "loss": 73.1894, "step": 71620 }, { "epoch": 0.2893942638283431, "grad_norm": 869.59130859375, "learning_rate": 4.4885779323628886e-05, "loss": 80.5256, "step": 71630 }, { "epoch": 0.28943466509371074, "grad_norm": 2488.22216796875, "learning_rate": 4.4883663637384396e-05, "loss": 96.78, "step": 71640 }, { "epoch": 0.2894750663590784, "grad_norm": 322.6717529296875, "learning_rate": 4.488154756349764e-05, "loss": 55.7131, "step": 71650 }, { "epoch": 0.289515467624446, "grad_norm": 762.85498046875, "learning_rate": 4.4879431102009886e-05, "loss": 83.2775, "step": 71660 }, { "epoch": 0.2895558688898136, "grad_norm": 312.4606018066406, "learning_rate": 4.487731425296238e-05, "loss": 73.5523, "step": 71670 }, { "epoch": 0.28959627015518125, "grad_norm": 774.15478515625, "learning_rate": 4.487519701639641e-05, "loss": 58.399, "step": 71680 }, { "epoch": 0.2896366714205489, "grad_norm": 939.1637573242188, "learning_rate": 4.487307939235323e-05, "loss": 103.4214, "step": 71690 }, { "epoch": 0.2896770726859165, "grad_norm": 378.4061584472656, "learning_rate": 4.487096138087414e-05, "loss": 54.6242, "step": 71700 }, { "epoch": 0.28971747395128417, "grad_norm": 841.2785034179688, "learning_rate": 4.4868842982000425e-05, "loss": 92.4388, "step": 71710 }, { "epoch": 0.2897578752166518, "grad_norm": 1951.40673828125, "learning_rate": 4.486672419577339e-05, "loss": 99.056, "step": 71720 }, { "epoch": 0.2897982764820194, "grad_norm": 870.7053833007812, "learning_rate": 4.486460502223434e-05, "loss": 82.6444, "step": 71730 }, { "epoch": 0.28983867774738703, "grad_norm": 1036.0379638671875, "learning_rate": 4.4862485461424585e-05, "loss": 84.8484, "step": 71740 }, { "epoch": 0.28987907901275467, "grad_norm": 1319.0260009765625, "learning_rate": 4.4860365513385456e-05, "loss": 65.2683, "step": 71750 }, { "epoch": 0.2899194802781223, "grad_norm": 623.9202270507812, "learning_rate": 4.4858245178158276e-05, "loss": 51.1636, "step": 71760 }, { "epoch": 0.28995988154348995, "grad_norm": 1197.3074951171875, "learning_rate": 4.4856124455784375e-05, "loss": 57.0653, "step": 71770 }, { "epoch": 0.2900002828088576, "grad_norm": 951.6260986328125, "learning_rate": 4.485400334630511e-05, "loss": 79.302, "step": 71780 }, { "epoch": 0.29004068407422523, "grad_norm": 714.1635131835938, "learning_rate": 4.485188184976182e-05, "loss": 64.435, "step": 71790 }, { "epoch": 0.2900810853395928, "grad_norm": 1034.5103759765625, "learning_rate": 4.484975996619589e-05, "loss": 69.878, "step": 71800 }, { "epoch": 0.29012148660496045, "grad_norm": 2745.012939453125, "learning_rate": 4.484763769564866e-05, "loss": 46.8337, "step": 71810 }, { "epoch": 0.2901618878703281, "grad_norm": 905.9317626953125, "learning_rate": 4.4845515038161515e-05, "loss": 72.7523, "step": 71820 }, { "epoch": 0.29020228913569573, "grad_norm": 596.106689453125, "learning_rate": 4.484339199377583e-05, "loss": 56.2039, "step": 71830 }, { "epoch": 0.2902426904010634, "grad_norm": 770.546875, "learning_rate": 4.484126856253301e-05, "loss": 63.8412, "step": 71840 }, { "epoch": 0.290283091666431, "grad_norm": 1505.4610595703125, "learning_rate": 4.483914474447445e-05, "loss": 84.4822, "step": 71850 }, { "epoch": 0.2903234929317986, "grad_norm": 925.32421875, "learning_rate": 4.483702053964154e-05, "loss": 58.1121, "step": 71860 }, { "epoch": 0.29036389419716624, "grad_norm": 754.6409912109375, "learning_rate": 4.4834895948075704e-05, "loss": 94.093, "step": 71870 }, { "epoch": 0.2904042954625339, "grad_norm": 296.52349853515625, "learning_rate": 4.483277096981836e-05, "loss": 70.3271, "step": 71880 }, { "epoch": 0.2904446967279015, "grad_norm": 387.88568115234375, "learning_rate": 4.483064560491094e-05, "loss": 46.6679, "step": 71890 }, { "epoch": 0.29048509799326916, "grad_norm": 628.3577270507812, "learning_rate": 4.482851985339487e-05, "loss": 73.2846, "step": 71900 }, { "epoch": 0.2905254992586368, "grad_norm": 429.7192077636719, "learning_rate": 4.4826393715311595e-05, "loss": 54.1423, "step": 71910 }, { "epoch": 0.29056590052400444, "grad_norm": 933.8616943359375, "learning_rate": 4.482426719070258e-05, "loss": 82.952, "step": 71920 }, { "epoch": 0.290606301789372, "grad_norm": 1550.8494873046875, "learning_rate": 4.482214027960925e-05, "loss": 60.8338, "step": 71930 }, { "epoch": 0.29064670305473966, "grad_norm": 725.9008178710938, "learning_rate": 4.48200129820731e-05, "loss": 58.8863, "step": 71940 }, { "epoch": 0.2906871043201073, "grad_norm": 426.5110168457031, "learning_rate": 4.481788529813559e-05, "loss": 84.5091, "step": 71950 }, { "epoch": 0.29072750558547494, "grad_norm": 735.1624145507812, "learning_rate": 4.481575722783821e-05, "loss": 71.0047, "step": 71960 }, { "epoch": 0.2907679068508426, "grad_norm": 1465.6466064453125, "learning_rate": 4.481362877122243e-05, "loss": 93.0289, "step": 71970 }, { "epoch": 0.2908083081162102, "grad_norm": 448.85919189453125, "learning_rate": 4.481149992832977e-05, "loss": 49.5786, "step": 71980 }, { "epoch": 0.2908487093815778, "grad_norm": 1719.1485595703125, "learning_rate": 4.4809370699201706e-05, "loss": 99.739, "step": 71990 }, { "epoch": 0.29088911064694545, "grad_norm": 2338.972900390625, "learning_rate": 4.480724108387977e-05, "loss": 94.2316, "step": 72000 }, { "epoch": 0.2909295119123131, "grad_norm": 495.1227722167969, "learning_rate": 4.480511108240547e-05, "loss": 57.7428, "step": 72010 }, { "epoch": 0.29096991317768073, "grad_norm": 516.1085205078125, "learning_rate": 4.480298069482033e-05, "loss": 65.6927, "step": 72020 }, { "epoch": 0.29101031444304837, "grad_norm": 592.4568481445312, "learning_rate": 4.480084992116589e-05, "loss": 54.2324, "step": 72030 }, { "epoch": 0.291050715708416, "grad_norm": 661.2754516601562, "learning_rate": 4.479871876148368e-05, "loss": 101.6369, "step": 72040 }, { "epoch": 0.2910911169737836, "grad_norm": 1018.503662109375, "learning_rate": 4.479658721581527e-05, "loss": 87.5494, "step": 72050 }, { "epoch": 0.29113151823915123, "grad_norm": 1803.326416015625, "learning_rate": 4.479445528420218e-05, "loss": 121.1224, "step": 72060 }, { "epoch": 0.2911719195045189, "grad_norm": 773.8688354492188, "learning_rate": 4.479232296668601e-05, "loss": 40.3872, "step": 72070 }, { "epoch": 0.2912123207698865, "grad_norm": 598.6536865234375, "learning_rate": 4.4790190263308306e-05, "loss": 61.5503, "step": 72080 }, { "epoch": 0.29125272203525415, "grad_norm": 1390.3765869140625, "learning_rate": 4.478805717411066e-05, "loss": 75.8073, "step": 72090 }, { "epoch": 0.2912931233006218, "grad_norm": 1810.283447265625, "learning_rate": 4.478592369913465e-05, "loss": 66.2582, "step": 72100 }, { "epoch": 0.29133352456598943, "grad_norm": 585.9959106445312, "learning_rate": 4.478378983842186e-05, "loss": 60.1694, "step": 72110 }, { "epoch": 0.291373925831357, "grad_norm": 589.87109375, "learning_rate": 4.4781655592013914e-05, "loss": 69.5412, "step": 72120 }, { "epoch": 0.29141432709672466, "grad_norm": 829.7304077148438, "learning_rate": 4.47795209599524e-05, "loss": 57.3538, "step": 72130 }, { "epoch": 0.2914547283620923, "grad_norm": 331.1748962402344, "learning_rate": 4.477738594227895e-05, "loss": 54.7226, "step": 72140 }, { "epoch": 0.29149512962745994, "grad_norm": 1150.1204833984375, "learning_rate": 4.4775250539035174e-05, "loss": 101.6825, "step": 72150 }, { "epoch": 0.2915355308928276, "grad_norm": 1445.37255859375, "learning_rate": 4.477311475026271e-05, "loss": 73.3006, "step": 72160 }, { "epoch": 0.2915759321581952, "grad_norm": 1057.3179931640625, "learning_rate": 4.4770978576003196e-05, "loss": 87.7836, "step": 72170 }, { "epoch": 0.2916163334235628, "grad_norm": 258.7021484375, "learning_rate": 4.4768842016298275e-05, "loss": 47.9227, "step": 72180 }, { "epoch": 0.29165673468893044, "grad_norm": 572.38623046875, "learning_rate": 4.4766705071189595e-05, "loss": 94.038, "step": 72190 }, { "epoch": 0.2916971359542981, "grad_norm": 1339.6978759765625, "learning_rate": 4.4764567740718825e-05, "loss": 55.9968, "step": 72200 }, { "epoch": 0.2917375372196657, "grad_norm": 671.230712890625, "learning_rate": 4.4762430024927636e-05, "loss": 83.76, "step": 72210 }, { "epoch": 0.29177793848503336, "grad_norm": 459.9236145019531, "learning_rate": 4.476029192385769e-05, "loss": 52.4123, "step": 72220 }, { "epoch": 0.291818339750401, "grad_norm": 1072.2008056640625, "learning_rate": 4.4758153437550684e-05, "loss": 61.8481, "step": 72230 }, { "epoch": 0.29185874101576864, "grad_norm": 1238.638671875, "learning_rate": 4.475601456604831e-05, "loss": 101.6763, "step": 72240 }, { "epoch": 0.2918991422811362, "grad_norm": 1013.4456787109375, "learning_rate": 4.4753875309392266e-05, "loss": 83.2611, "step": 72250 }, { "epoch": 0.29193954354650387, "grad_norm": 609.5685424804688, "learning_rate": 4.4751735667624237e-05, "loss": 83.8193, "step": 72260 }, { "epoch": 0.2919799448118715, "grad_norm": 683.1033325195312, "learning_rate": 4.474959564078596e-05, "loss": 51.5649, "step": 72270 }, { "epoch": 0.29202034607723915, "grad_norm": 799.1218872070312, "learning_rate": 4.4747455228919146e-05, "loss": 48.9159, "step": 72280 }, { "epoch": 0.2920607473426068, "grad_norm": 316.8408508300781, "learning_rate": 4.4745314432065535e-05, "loss": 80.174, "step": 72290 }, { "epoch": 0.2921011486079744, "grad_norm": 549.0940551757812, "learning_rate": 4.474317325026684e-05, "loss": 56.1946, "step": 72300 }, { "epoch": 0.292141549873342, "grad_norm": 591.0458984375, "learning_rate": 4.474103168356483e-05, "loss": 101.0572, "step": 72310 }, { "epoch": 0.29218195113870965, "grad_norm": 3971.22802734375, "learning_rate": 4.4738889732001234e-05, "loss": 65.9609, "step": 72320 }, { "epoch": 0.2922223524040773, "grad_norm": 947.2832641601562, "learning_rate": 4.473674739561783e-05, "loss": 108.9696, "step": 72330 }, { "epoch": 0.29226275366944493, "grad_norm": 745.0115356445312, "learning_rate": 4.473460467445637e-05, "loss": 53.7305, "step": 72340 }, { "epoch": 0.29230315493481257, "grad_norm": 1212.1033935546875, "learning_rate": 4.473246156855863e-05, "loss": 85.5423, "step": 72350 }, { "epoch": 0.2923435562001802, "grad_norm": 1858.6473388671875, "learning_rate": 4.473031807796639e-05, "loss": 84.1671, "step": 72360 }, { "epoch": 0.2923839574655478, "grad_norm": 1183.3797607421875, "learning_rate": 4.4728174202721444e-05, "loss": 85.8308, "step": 72370 }, { "epoch": 0.29242435873091543, "grad_norm": 564.0057373046875, "learning_rate": 4.472602994286559e-05, "loss": 62.2768, "step": 72380 }, { "epoch": 0.2924647599962831, "grad_norm": 740.6309204101562, "learning_rate": 4.472388529844062e-05, "loss": 46.9198, "step": 72390 }, { "epoch": 0.2925051612616507, "grad_norm": 1595.9669189453125, "learning_rate": 4.4721740269488355e-05, "loss": 61.2028, "step": 72400 }, { "epoch": 0.29254556252701835, "grad_norm": 1151.5087890625, "learning_rate": 4.4719594856050604e-05, "loss": 62.4049, "step": 72410 }, { "epoch": 0.292585963792386, "grad_norm": 475.585205078125, "learning_rate": 4.4717449058169216e-05, "loss": 50.4637, "step": 72420 }, { "epoch": 0.29262636505775363, "grad_norm": 519.3822631835938, "learning_rate": 4.471530287588599e-05, "loss": 77.0086, "step": 72430 }, { "epoch": 0.2926667663231212, "grad_norm": 993.6683959960938, "learning_rate": 4.471315630924279e-05, "loss": 76.9779, "step": 72440 }, { "epoch": 0.29270716758848886, "grad_norm": 1110.651611328125, "learning_rate": 4.4711009358281456e-05, "loss": 73.6586, "step": 72450 }, { "epoch": 0.2927475688538565, "grad_norm": 703.5302124023438, "learning_rate": 4.4708862023043854e-05, "loss": 68.1201, "step": 72460 }, { "epoch": 0.29278797011922414, "grad_norm": 773.6651611328125, "learning_rate": 4.470671430357183e-05, "loss": 62.8462, "step": 72470 }, { "epoch": 0.2928283713845918, "grad_norm": 806.9835815429688, "learning_rate": 4.470456619990727e-05, "loss": 71.4623, "step": 72480 }, { "epoch": 0.2928687726499594, "grad_norm": 805.1903686523438, "learning_rate": 4.470241771209205e-05, "loss": 75.0459, "step": 72490 }, { "epoch": 0.292909173915327, "grad_norm": 408.85089111328125, "learning_rate": 4.4700268840168045e-05, "loss": 58.6624, "step": 72500 }, { "epoch": 0.29294957518069464, "grad_norm": 701.9678344726562, "learning_rate": 4.469811958417717e-05, "loss": 55.822, "step": 72510 }, { "epoch": 0.2929899764460623, "grad_norm": 563.7380981445312, "learning_rate": 4.46959699441613e-05, "loss": 49.1962, "step": 72520 }, { "epoch": 0.2930303777114299, "grad_norm": 736.0975952148438, "learning_rate": 4.469381992016236e-05, "loss": 42.205, "step": 72530 }, { "epoch": 0.29307077897679756, "grad_norm": 375.850830078125, "learning_rate": 4.469166951222227e-05, "loss": 58.5806, "step": 72540 }, { "epoch": 0.2931111802421652, "grad_norm": 828.135498046875, "learning_rate": 4.4689518720382937e-05, "loss": 69.4176, "step": 72550 }, { "epoch": 0.29315158150753284, "grad_norm": 955.2371215820312, "learning_rate": 4.46873675446863e-05, "loss": 100.2446, "step": 72560 }, { "epoch": 0.29319198277290043, "grad_norm": 808.4039916992188, "learning_rate": 4.468521598517429e-05, "loss": 79.4082, "step": 72570 }, { "epoch": 0.29323238403826807, "grad_norm": 563.0283813476562, "learning_rate": 4.468306404188887e-05, "loss": 33.0342, "step": 72580 }, { "epoch": 0.2932727853036357, "grad_norm": 2005.7269287109375, "learning_rate": 4.468091171487197e-05, "loss": 87.8131, "step": 72590 }, { "epoch": 0.29331318656900335, "grad_norm": 1091.245849609375, "learning_rate": 4.4678759004165584e-05, "loss": 60.274, "step": 72600 }, { "epoch": 0.293353587834371, "grad_norm": 773.4925537109375, "learning_rate": 4.467660590981165e-05, "loss": 43.7714, "step": 72610 }, { "epoch": 0.29339398909973863, "grad_norm": 414.9910583496094, "learning_rate": 4.4674452431852155e-05, "loss": 43.9692, "step": 72620 }, { "epoch": 0.2934343903651062, "grad_norm": 974.5853881835938, "learning_rate": 4.467229857032907e-05, "loss": 73.6409, "step": 72630 }, { "epoch": 0.29347479163047385, "grad_norm": 879.56494140625, "learning_rate": 4.4670144325284414e-05, "loss": 103.894, "step": 72640 }, { "epoch": 0.2935151928958415, "grad_norm": 522.8005981445312, "learning_rate": 4.466798969676015e-05, "loss": 108.3693, "step": 72650 }, { "epoch": 0.29355559416120913, "grad_norm": 680.3546142578125, "learning_rate": 4.4665834684798316e-05, "loss": 58.7339, "step": 72660 }, { "epoch": 0.2935959954265768, "grad_norm": 639.8063354492188, "learning_rate": 4.4663679289440895e-05, "loss": 69.8376, "step": 72670 }, { "epoch": 0.2936363966919444, "grad_norm": 279.01385498046875, "learning_rate": 4.466152351072994e-05, "loss": 60.8839, "step": 72680 }, { "epoch": 0.293676797957312, "grad_norm": 744.683349609375, "learning_rate": 4.465936734870745e-05, "loss": 59.2348, "step": 72690 }, { "epoch": 0.29371719922267964, "grad_norm": 884.4476928710938, "learning_rate": 4.465721080341547e-05, "loss": 55.9587, "step": 72700 }, { "epoch": 0.2937576004880473, "grad_norm": 736.0975341796875, "learning_rate": 4.465505387489606e-05, "loss": 85.0535, "step": 72710 }, { "epoch": 0.2937980017534149, "grad_norm": 1129.6541748046875, "learning_rate": 4.465289656319124e-05, "loss": 166.6917, "step": 72720 }, { "epoch": 0.29383840301878256, "grad_norm": 918.02587890625, "learning_rate": 4.465073886834309e-05, "loss": 80.1093, "step": 72730 }, { "epoch": 0.2938788042841502, "grad_norm": 741.1550903320312, "learning_rate": 4.464858079039367e-05, "loss": 62.3389, "step": 72740 }, { "epoch": 0.29391920554951784, "grad_norm": 1366.1309814453125, "learning_rate": 4.464642232938505e-05, "loss": 127.8192, "step": 72750 }, { "epoch": 0.2939596068148854, "grad_norm": 691.9977416992188, "learning_rate": 4.464426348535931e-05, "loss": 71.4388, "step": 72760 }, { "epoch": 0.29400000808025306, "grad_norm": 743.5264282226562, "learning_rate": 4.464210425835854e-05, "loss": 77.9983, "step": 72770 }, { "epoch": 0.2940404093456207, "grad_norm": 570.9989624023438, "learning_rate": 4.463994464842484e-05, "loss": 66.5061, "step": 72780 }, { "epoch": 0.29408081061098834, "grad_norm": 419.94793701171875, "learning_rate": 4.46377846556003e-05, "loss": 80.6933, "step": 72790 }, { "epoch": 0.294121211876356, "grad_norm": 399.4372253417969, "learning_rate": 4.4635624279927044e-05, "loss": 82.7092, "step": 72800 }, { "epoch": 0.2941616131417236, "grad_norm": 1092.372802734375, "learning_rate": 4.463346352144718e-05, "loss": 61.0069, "step": 72810 }, { "epoch": 0.2942020144070912, "grad_norm": 1244.2633056640625, "learning_rate": 4.463130238020285e-05, "loss": 69.0925, "step": 72820 }, { "epoch": 0.29424241567245885, "grad_norm": 3014.16796875, "learning_rate": 4.4629140856236155e-05, "loss": 95.4817, "step": 72830 }, { "epoch": 0.2942828169378265, "grad_norm": 855.9567260742188, "learning_rate": 4.462697894958926e-05, "loss": 79.4187, "step": 72840 }, { "epoch": 0.2943232182031941, "grad_norm": 588.0103149414062, "learning_rate": 4.4624816660304314e-05, "loss": 46.0367, "step": 72850 }, { "epoch": 0.29436361946856177, "grad_norm": 2154.02734375, "learning_rate": 4.4622653988423455e-05, "loss": 116.9557, "step": 72860 }, { "epoch": 0.2944040207339294, "grad_norm": 743.9976196289062, "learning_rate": 4.462049093398885e-05, "loss": 52.3771, "step": 72870 }, { "epoch": 0.29444442199929705, "grad_norm": 1056.8822021484375, "learning_rate": 4.461832749704268e-05, "loss": 70.0739, "step": 72880 }, { "epoch": 0.29448482326466463, "grad_norm": 860.94580078125, "learning_rate": 4.461616367762711e-05, "loss": 53.0582, "step": 72890 }, { "epoch": 0.29452522453003227, "grad_norm": 319.7754821777344, "learning_rate": 4.4613999475784336e-05, "loss": 50.5095, "step": 72900 }, { "epoch": 0.2945656257953999, "grad_norm": 1478.31982421875, "learning_rate": 4.4611834891556534e-05, "loss": 94.7775, "step": 72910 }, { "epoch": 0.29460602706076755, "grad_norm": 548.7947998046875, "learning_rate": 4.460966992498593e-05, "loss": 96.3625, "step": 72920 }, { "epoch": 0.2946464283261352, "grad_norm": 567.1472778320312, "learning_rate": 4.46075045761147e-05, "loss": 66.8417, "step": 72930 }, { "epoch": 0.29468682959150283, "grad_norm": 510.6759033203125, "learning_rate": 4.460533884498509e-05, "loss": 43.7635, "step": 72940 }, { "epoch": 0.2947272308568704, "grad_norm": 1200.787353515625, "learning_rate": 4.460317273163929e-05, "loss": 79.7278, "step": 72950 }, { "epoch": 0.29476763212223805, "grad_norm": 1466.562255859375, "learning_rate": 4.460100623611955e-05, "loss": 80.256, "step": 72960 }, { "epoch": 0.2948080333876057, "grad_norm": 1230.124267578125, "learning_rate": 4.45988393584681e-05, "loss": 94.8147, "step": 72970 }, { "epoch": 0.29484843465297333, "grad_norm": 1354.7569580078125, "learning_rate": 4.4596672098727195e-05, "loss": 79.9409, "step": 72980 }, { "epoch": 0.294888835918341, "grad_norm": 486.84246826171875, "learning_rate": 4.459450445693907e-05, "loss": 48.9059, "step": 72990 }, { "epoch": 0.2949292371837086, "grad_norm": 606.8703002929688, "learning_rate": 4.4592336433146e-05, "loss": 56.8745, "step": 73000 }, { "epoch": 0.2949696384490762, "grad_norm": 1210.473388671875, "learning_rate": 4.459016802739023e-05, "loss": 81.2079, "step": 73010 }, { "epoch": 0.29501003971444384, "grad_norm": 467.654296875, "learning_rate": 4.458799923971406e-05, "loss": 71.5637, "step": 73020 }, { "epoch": 0.2950504409798115, "grad_norm": 1085.084228515625, "learning_rate": 4.4585830070159764e-05, "loss": 59.4551, "step": 73030 }, { "epoch": 0.2950908422451791, "grad_norm": 1020.708984375, "learning_rate": 4.458366051876962e-05, "loss": 67.9901, "step": 73040 }, { "epoch": 0.29513124351054676, "grad_norm": 413.11859130859375, "learning_rate": 4.458149058558594e-05, "loss": 94.3448, "step": 73050 }, { "epoch": 0.2951716447759144, "grad_norm": 344.1728210449219, "learning_rate": 4.457932027065102e-05, "loss": 42.2916, "step": 73060 }, { "epoch": 0.29521204604128204, "grad_norm": 0.0, "learning_rate": 4.457714957400716e-05, "loss": 72.4169, "step": 73070 }, { "epoch": 0.2952524473066496, "grad_norm": 1693.0472412109375, "learning_rate": 4.45749784956967e-05, "loss": 81.7885, "step": 73080 }, { "epoch": 0.29529284857201726, "grad_norm": 676.3005981445312, "learning_rate": 4.457280703576194e-05, "loss": 69.0328, "step": 73090 }, { "epoch": 0.2953332498373849, "grad_norm": 723.7576293945312, "learning_rate": 4.457063519424525e-05, "loss": 74.3711, "step": 73100 }, { "epoch": 0.29537365110275254, "grad_norm": 397.1194152832031, "learning_rate": 4.456846297118894e-05, "loss": 65.241, "step": 73110 }, { "epoch": 0.2954140523681202, "grad_norm": 686.8461303710938, "learning_rate": 4.456629036663537e-05, "loss": 66.4102, "step": 73120 }, { "epoch": 0.2954544536334878, "grad_norm": 991.1904296875, "learning_rate": 4.45641173806269e-05, "loss": 79.4375, "step": 73130 }, { "epoch": 0.2954948548988554, "grad_norm": 1777.7161865234375, "learning_rate": 4.4561944013205885e-05, "loss": 74.0132, "step": 73140 }, { "epoch": 0.29553525616422305, "grad_norm": 1228.3016357421875, "learning_rate": 4.45597702644147e-05, "loss": 55.363, "step": 73150 }, { "epoch": 0.2955756574295907, "grad_norm": 846.4840087890625, "learning_rate": 4.455759613429573e-05, "loss": 76.3519, "step": 73160 }, { "epoch": 0.29561605869495833, "grad_norm": 639.7553100585938, "learning_rate": 4.455542162289136e-05, "loss": 83.1235, "step": 73170 }, { "epoch": 0.29565645996032597, "grad_norm": 485.4346618652344, "learning_rate": 4.455324673024396e-05, "loss": 106.5092, "step": 73180 }, { "epoch": 0.2956968612256936, "grad_norm": 1039.0308837890625, "learning_rate": 4.4551071456395957e-05, "loss": 55.8722, "step": 73190 }, { "epoch": 0.2957372624910612, "grad_norm": 4481.669921875, "learning_rate": 4.454889580138975e-05, "loss": 92.0912, "step": 73200 }, { "epoch": 0.29577766375642883, "grad_norm": 1369.180419921875, "learning_rate": 4.454671976526776e-05, "loss": 81.6197, "step": 73210 }, { "epoch": 0.2958180650217965, "grad_norm": 1476.4033203125, "learning_rate": 4.45445433480724e-05, "loss": 60.5057, "step": 73220 }, { "epoch": 0.2958584662871641, "grad_norm": 817.6678466796875, "learning_rate": 4.45423665498461e-05, "loss": 107.7844, "step": 73230 }, { "epoch": 0.29589886755253175, "grad_norm": 1333.8826904296875, "learning_rate": 4.4540189370631315e-05, "loss": 100.9942, "step": 73240 }, { "epoch": 0.2959392688178994, "grad_norm": 465.54510498046875, "learning_rate": 4.453801181047047e-05, "loss": 77.737, "step": 73250 }, { "epoch": 0.29597967008326703, "grad_norm": 1762.44287109375, "learning_rate": 4.4535833869406027e-05, "loss": 58.7377, "step": 73260 }, { "epoch": 0.2960200713486346, "grad_norm": 568.57958984375, "learning_rate": 4.4533655547480444e-05, "loss": 56.8368, "step": 73270 }, { "epoch": 0.29606047261400226, "grad_norm": 1187.254638671875, "learning_rate": 4.45314768447362e-05, "loss": 88.9359, "step": 73280 }, { "epoch": 0.2961008738793699, "grad_norm": 902.0029296875, "learning_rate": 4.452929776121575e-05, "loss": 69.4659, "step": 73290 }, { "epoch": 0.29614127514473754, "grad_norm": 0.0, "learning_rate": 4.452711829696158e-05, "loss": 47.5744, "step": 73300 }, { "epoch": 0.2961816764101052, "grad_norm": 407.5782775878906, "learning_rate": 4.452493845201619e-05, "loss": 63.7359, "step": 73310 }, { "epoch": 0.2962220776754728, "grad_norm": 674.528076171875, "learning_rate": 4.4522758226422076e-05, "loss": 43.8548, "step": 73320 }, { "epoch": 0.2962624789408404, "grad_norm": 679.5468139648438, "learning_rate": 4.452057762022174e-05, "loss": 56.5893, "step": 73330 }, { "epoch": 0.29630288020620804, "grad_norm": 982.80517578125, "learning_rate": 4.4518396633457696e-05, "loss": 103.2029, "step": 73340 }, { "epoch": 0.2963432814715757, "grad_norm": 529.539794921875, "learning_rate": 4.4516215266172453e-05, "loss": 36.5715, "step": 73350 }, { "epoch": 0.2963836827369433, "grad_norm": 703.641357421875, "learning_rate": 4.451403351840855e-05, "loss": 68.3904, "step": 73360 }, { "epoch": 0.29642408400231096, "grad_norm": 865.1505126953125, "learning_rate": 4.451185139020852e-05, "loss": 64.2055, "step": 73370 }, { "epoch": 0.2964644852676786, "grad_norm": 718.0322265625, "learning_rate": 4.4509668881614894e-05, "loss": 102.1239, "step": 73380 }, { "epoch": 0.29650488653304624, "grad_norm": 1524.7041015625, "learning_rate": 4.450748599267024e-05, "loss": 81.8768, "step": 73390 }, { "epoch": 0.2965452877984138, "grad_norm": 378.5061950683594, "learning_rate": 4.450530272341709e-05, "loss": 56.3704, "step": 73400 }, { "epoch": 0.29658568906378147, "grad_norm": 1193.3980712890625, "learning_rate": 4.4503119073898024e-05, "loss": 62.8788, "step": 73410 }, { "epoch": 0.2966260903291491, "grad_norm": 972.7481079101562, "learning_rate": 4.4500935044155626e-05, "loss": 55.7444, "step": 73420 }, { "epoch": 0.29666649159451675, "grad_norm": 496.94207763671875, "learning_rate": 4.4498750634232445e-05, "loss": 55.0117, "step": 73430 }, { "epoch": 0.2967068928598844, "grad_norm": 690.003173828125, "learning_rate": 4.449656584417108e-05, "loss": 63.6232, "step": 73440 }, { "epoch": 0.296747294125252, "grad_norm": 1239.76123046875, "learning_rate": 4.449438067401413e-05, "loss": 56.0802, "step": 73450 }, { "epoch": 0.2967876953906196, "grad_norm": 1073.9417724609375, "learning_rate": 4.44921951238042e-05, "loss": 74.7659, "step": 73460 }, { "epoch": 0.29682809665598725, "grad_norm": 612.8600463867188, "learning_rate": 4.449000919358388e-05, "loss": 49.4253, "step": 73470 }, { "epoch": 0.2968684979213549, "grad_norm": 803.32763671875, "learning_rate": 4.4487822883395805e-05, "loss": 70.9369, "step": 73480 }, { "epoch": 0.29690889918672253, "grad_norm": 999.6846313476562, "learning_rate": 4.448563619328259e-05, "loss": 77.0513, "step": 73490 }, { "epoch": 0.29694930045209017, "grad_norm": 260.62261962890625, "learning_rate": 4.448344912328686e-05, "loss": 40.874, "step": 73500 }, { "epoch": 0.2969897017174578, "grad_norm": 1337.423828125, "learning_rate": 4.4481261673451255e-05, "loss": 63.2176, "step": 73510 }, { "epoch": 0.2970301029828254, "grad_norm": 328.9664306640625, "learning_rate": 4.447907384381843e-05, "loss": 58.7723, "step": 73520 }, { "epoch": 0.29707050424819303, "grad_norm": 767.3320922851562, "learning_rate": 4.447688563443103e-05, "loss": 100.0262, "step": 73530 }, { "epoch": 0.2971109055135607, "grad_norm": 251.83306884765625, "learning_rate": 4.447469704533172e-05, "loss": 58.1344, "step": 73540 }, { "epoch": 0.2971513067789283, "grad_norm": 696.3643798828125, "learning_rate": 4.4472508076563166e-05, "loss": 80.2865, "step": 73550 }, { "epoch": 0.29719170804429595, "grad_norm": 578.1966552734375, "learning_rate": 4.447031872816804e-05, "loss": 63.5288, "step": 73560 }, { "epoch": 0.2972321093096636, "grad_norm": 1685.81982421875, "learning_rate": 4.446812900018902e-05, "loss": 98.5778, "step": 73570 }, { "epoch": 0.29727251057503123, "grad_norm": 605.77978515625, "learning_rate": 4.4465938892668814e-05, "loss": 108.6232, "step": 73580 }, { "epoch": 0.2973129118403988, "grad_norm": 425.3101501464844, "learning_rate": 4.44637484056501e-05, "loss": 81.9932, "step": 73590 }, { "epoch": 0.29735331310576646, "grad_norm": 664.3129272460938, "learning_rate": 4.4461557539175594e-05, "loss": 80.7212, "step": 73600 }, { "epoch": 0.2973937143711341, "grad_norm": 932.316650390625, "learning_rate": 4.4459366293287994e-05, "loss": 83.0597, "step": 73610 }, { "epoch": 0.29743411563650174, "grad_norm": 1423.0401611328125, "learning_rate": 4.445717466803004e-05, "loss": 64.9128, "step": 73620 }, { "epoch": 0.2974745169018694, "grad_norm": 572.2749633789062, "learning_rate": 4.445498266344444e-05, "loss": 49.8087, "step": 73630 }, { "epoch": 0.297514918167237, "grad_norm": 1016.2572021484375, "learning_rate": 4.445279027957395e-05, "loss": 64.3123, "step": 73640 }, { "epoch": 0.2975553194326046, "grad_norm": 1676.9608154296875, "learning_rate": 4.4450597516461287e-05, "loss": 79.8256, "step": 73650 }, { "epoch": 0.29759572069797224, "grad_norm": 501.0017395019531, "learning_rate": 4.444840437414922e-05, "loss": 63.8892, "step": 73660 }, { "epoch": 0.2976361219633399, "grad_norm": 1168.860107421875, "learning_rate": 4.444621085268049e-05, "loss": 59.1573, "step": 73670 }, { "epoch": 0.2976765232287075, "grad_norm": 664.9098510742188, "learning_rate": 4.444401695209788e-05, "loss": 57.3835, "step": 73680 }, { "epoch": 0.29771692449407516, "grad_norm": 1014.5247192382812, "learning_rate": 4.4441822672444134e-05, "loss": 89.5824, "step": 73690 }, { "epoch": 0.2977573257594428, "grad_norm": 765.1936645507812, "learning_rate": 4.443962801376205e-05, "loss": 38.8063, "step": 73700 }, { "epoch": 0.29779772702481044, "grad_norm": 928.1898193359375, "learning_rate": 4.443743297609442e-05, "loss": 62.8045, "step": 73710 }, { "epoch": 0.29783812829017803, "grad_norm": 685.7420654296875, "learning_rate": 4.443523755948401e-05, "loss": 66.0851, "step": 73720 }, { "epoch": 0.29787852955554567, "grad_norm": 881.26416015625, "learning_rate": 4.443304176397365e-05, "loss": 71.4241, "step": 73730 }, { "epoch": 0.2979189308209133, "grad_norm": 1430.6566162109375, "learning_rate": 4.443084558960613e-05, "loss": 110.0562, "step": 73740 }, { "epoch": 0.29795933208628095, "grad_norm": 476.3144226074219, "learning_rate": 4.442864903642428e-05, "loss": 63.0536, "step": 73750 }, { "epoch": 0.2979997333516486, "grad_norm": 597.589111328125, "learning_rate": 4.4426452104470903e-05, "loss": 49.1393, "step": 73760 }, { "epoch": 0.29804013461701623, "grad_norm": 673.9664306640625, "learning_rate": 4.4424254793788844e-05, "loss": 49.0798, "step": 73770 }, { "epoch": 0.2980805358823838, "grad_norm": 1329.530029296875, "learning_rate": 4.4422057104420946e-05, "loss": 71.3935, "step": 73780 }, { "epoch": 0.29812093714775145, "grad_norm": 690.584228515625, "learning_rate": 4.4419859036410036e-05, "loss": 73.6381, "step": 73790 }, { "epoch": 0.2981613384131191, "grad_norm": 627.472900390625, "learning_rate": 4.441766058979898e-05, "loss": 82.9257, "step": 73800 }, { "epoch": 0.29820173967848673, "grad_norm": 638.86279296875, "learning_rate": 4.441546176463063e-05, "loss": 57.4086, "step": 73810 }, { "epoch": 0.2982421409438544, "grad_norm": 844.6746215820312, "learning_rate": 4.441326256094787e-05, "loss": 74.4755, "step": 73820 }, { "epoch": 0.298282542209222, "grad_norm": 1176.2183837890625, "learning_rate": 4.4411062978793545e-05, "loss": 48.4431, "step": 73830 }, { "epoch": 0.2983229434745896, "grad_norm": 762.2763671875, "learning_rate": 4.4408863018210564e-05, "loss": 54.5466, "step": 73840 }, { "epoch": 0.29836334473995724, "grad_norm": 2829.393310546875, "learning_rate": 4.44066626792418e-05, "loss": 88.412, "step": 73850 }, { "epoch": 0.2984037460053249, "grad_norm": 2878.249267578125, "learning_rate": 4.440446196193016e-05, "loss": 129.003, "step": 73860 }, { "epoch": 0.2984441472706925, "grad_norm": 631.333984375, "learning_rate": 4.440226086631854e-05, "loss": 59.6416, "step": 73870 }, { "epoch": 0.29848454853606016, "grad_norm": 858.2770385742188, "learning_rate": 4.440005939244986e-05, "loss": 54.7916, "step": 73880 }, { "epoch": 0.2985249498014278, "grad_norm": 945.3146362304688, "learning_rate": 4.439785754036703e-05, "loss": 70.1292, "step": 73890 }, { "epoch": 0.29856535106679544, "grad_norm": 915.803955078125, "learning_rate": 4.439565531011299e-05, "loss": 66.0254, "step": 73900 }, { "epoch": 0.298605752332163, "grad_norm": 683.1613159179688, "learning_rate": 4.4393452701730655e-05, "loss": 55.0487, "step": 73910 }, { "epoch": 0.29864615359753066, "grad_norm": 1310.735107421875, "learning_rate": 4.439124971526297e-05, "loss": 99.916, "step": 73920 }, { "epoch": 0.2986865548628983, "grad_norm": 276.0152893066406, "learning_rate": 4.4389046350752905e-05, "loss": 46.5066, "step": 73930 }, { "epoch": 0.29872695612826594, "grad_norm": 1337.242919921875, "learning_rate": 4.438684260824339e-05, "loss": 112.974, "step": 73940 }, { "epoch": 0.2987673573936336, "grad_norm": 583.646728515625, "learning_rate": 4.43846384877774e-05, "loss": 49.2991, "step": 73950 }, { "epoch": 0.2988077586590012, "grad_norm": 1229.034423828125, "learning_rate": 4.4382433989397895e-05, "loss": 56.1411, "step": 73960 }, { "epoch": 0.2988481599243688, "grad_norm": 556.5376586914062, "learning_rate": 4.4380229113147866e-05, "loss": 69.2976, "step": 73970 }, { "epoch": 0.29888856118973645, "grad_norm": 448.5850524902344, "learning_rate": 4.437802385907029e-05, "loss": 65.7849, "step": 73980 }, { "epoch": 0.2989289624551041, "grad_norm": 412.0335388183594, "learning_rate": 4.4375818227208164e-05, "loss": 47.8312, "step": 73990 }, { "epoch": 0.2989693637204717, "grad_norm": 549.283935546875, "learning_rate": 4.4373612217604496e-05, "loss": 60.2066, "step": 74000 }, { "epoch": 0.29900976498583937, "grad_norm": 641.086181640625, "learning_rate": 4.437140583030227e-05, "loss": 81.0014, "step": 74010 }, { "epoch": 0.299050166251207, "grad_norm": 470.0632629394531, "learning_rate": 4.4369199065344525e-05, "loss": 48.1158, "step": 74020 }, { "epoch": 0.29909056751657465, "grad_norm": 479.7843933105469, "learning_rate": 4.436699192277426e-05, "loss": 62.3095, "step": 74030 }, { "epoch": 0.29913096878194223, "grad_norm": 953.147705078125, "learning_rate": 4.436478440263453e-05, "loss": 69.0217, "step": 74040 }, { "epoch": 0.29917137004730987, "grad_norm": 609.3707885742188, "learning_rate": 4.436257650496834e-05, "loss": 71.6211, "step": 74050 }, { "epoch": 0.2992117713126775, "grad_norm": 1487.8802490234375, "learning_rate": 4.436036822981877e-05, "loss": 84.6675, "step": 74060 }, { "epoch": 0.29925217257804515, "grad_norm": 894.4046630859375, "learning_rate": 4.435815957722885e-05, "loss": 51.4052, "step": 74070 }, { "epoch": 0.2992925738434128, "grad_norm": 1185.888671875, "learning_rate": 4.4355950547241645e-05, "loss": 79.9474, "step": 74080 }, { "epoch": 0.29933297510878043, "grad_norm": 2298.0419921875, "learning_rate": 4.435374113990021e-05, "loss": 62.4108, "step": 74090 }, { "epoch": 0.299373376374148, "grad_norm": 1640.901611328125, "learning_rate": 4.435153135524763e-05, "loss": 108.3077, "step": 74100 }, { "epoch": 0.29941377763951565, "grad_norm": 509.5463562011719, "learning_rate": 4.434932119332699e-05, "loss": 74.0899, "step": 74110 }, { "epoch": 0.2994541789048833, "grad_norm": 1251.0087890625, "learning_rate": 4.434711065418137e-05, "loss": 117.1789, "step": 74120 }, { "epoch": 0.29949458017025093, "grad_norm": 838.3406982421875, "learning_rate": 4.434489973785386e-05, "loss": 99.4823, "step": 74130 }, { "epoch": 0.2995349814356186, "grad_norm": 924.2326049804688, "learning_rate": 4.434268844438758e-05, "loss": 60.6183, "step": 74140 }, { "epoch": 0.2995753827009862, "grad_norm": 602.224365234375, "learning_rate": 4.4340476773825625e-05, "loss": 65.4386, "step": 74150 }, { "epoch": 0.2996157839663538, "grad_norm": 1039.2386474609375, "learning_rate": 4.433826472621112e-05, "loss": 72.3676, "step": 74160 }, { "epoch": 0.29965618523172144, "grad_norm": 1209.8310546875, "learning_rate": 4.4336052301587185e-05, "loss": 86.5389, "step": 74170 }, { "epoch": 0.2996965864970891, "grad_norm": 664.8455200195312, "learning_rate": 4.4333839499996954e-05, "loss": 77.3693, "step": 74180 }, { "epoch": 0.2997369877624567, "grad_norm": 373.7446594238281, "learning_rate": 4.4331626321483575e-05, "loss": 37.4625, "step": 74190 }, { "epoch": 0.29977738902782436, "grad_norm": 587.56396484375, "learning_rate": 4.432941276609018e-05, "loss": 53.5302, "step": 74200 }, { "epoch": 0.299817790293192, "grad_norm": 399.6770935058594, "learning_rate": 4.432719883385994e-05, "loss": 43.0968, "step": 74210 }, { "epoch": 0.29985819155855964, "grad_norm": 455.2350158691406, "learning_rate": 4.4324984524836e-05, "loss": 65.6205, "step": 74220 }, { "epoch": 0.2998985928239272, "grad_norm": 1229.9195556640625, "learning_rate": 4.432276983906155e-05, "loss": 59.1207, "step": 74230 }, { "epoch": 0.29993899408929486, "grad_norm": 1179.552734375, "learning_rate": 4.4320554776579747e-05, "loss": 68.4554, "step": 74240 }, { "epoch": 0.2999793953546625, "grad_norm": 563.2706298828125, "learning_rate": 4.431833933743378e-05, "loss": 51.8571, "step": 74250 }, { "epoch": 0.30001979662003014, "grad_norm": 779.345947265625, "learning_rate": 4.431612352166684e-05, "loss": 81.9162, "step": 74260 }, { "epoch": 0.3000601978853978, "grad_norm": 363.64324951171875, "learning_rate": 4.431390732932213e-05, "loss": 55.0468, "step": 74270 }, { "epoch": 0.3001005991507654, "grad_norm": 1637.340087890625, "learning_rate": 4.431169076044286e-05, "loss": 81.3846, "step": 74280 }, { "epoch": 0.300141000416133, "grad_norm": 563.9585571289062, "learning_rate": 4.4309473815072225e-05, "loss": 60.6945, "step": 74290 }, { "epoch": 0.30018140168150065, "grad_norm": 749.6682739257812, "learning_rate": 4.4307256493253457e-05, "loss": 68.1788, "step": 74300 }, { "epoch": 0.3002218029468683, "grad_norm": 5840.30810546875, "learning_rate": 4.4305038795029794e-05, "loss": 86.4264, "step": 74310 }, { "epoch": 0.30026220421223593, "grad_norm": 910.1226806640625, "learning_rate": 4.4302820720444456e-05, "loss": 77.6066, "step": 74320 }, { "epoch": 0.30030260547760357, "grad_norm": 709.5213012695312, "learning_rate": 4.430060226954069e-05, "loss": 120.0873, "step": 74330 }, { "epoch": 0.3003430067429712, "grad_norm": 907.42431640625, "learning_rate": 4.429838344236174e-05, "loss": 82.4217, "step": 74340 }, { "epoch": 0.30038340800833885, "grad_norm": 595.138671875, "learning_rate": 4.4296164238950874e-05, "loss": 77.8745, "step": 74350 }, { "epoch": 0.30042380927370643, "grad_norm": 1740.2891845703125, "learning_rate": 4.429394465935136e-05, "loss": 70.9948, "step": 74360 }, { "epoch": 0.3004642105390741, "grad_norm": 624.7880249023438, "learning_rate": 4.429172470360645e-05, "loss": 84.4048, "step": 74370 }, { "epoch": 0.3005046118044417, "grad_norm": 632.6150512695312, "learning_rate": 4.428950437175944e-05, "loss": 65.9942, "step": 74380 }, { "epoch": 0.30054501306980935, "grad_norm": 217.38211059570312, "learning_rate": 4.428728366385361e-05, "loss": 73.9153, "step": 74390 }, { "epoch": 0.300585414335177, "grad_norm": 1739.6029052734375, "learning_rate": 4.428506257993226e-05, "loss": 78.2645, "step": 74400 }, { "epoch": 0.30062581560054463, "grad_norm": 1436.2427978515625, "learning_rate": 4.428284112003868e-05, "loss": 71.5589, "step": 74410 }, { "epoch": 0.3006662168659122, "grad_norm": 316.6112060546875, "learning_rate": 4.428061928421618e-05, "loss": 77.6914, "step": 74420 }, { "epoch": 0.30070661813127986, "grad_norm": 711.2620849609375, "learning_rate": 4.427839707250809e-05, "loss": 77.8412, "step": 74430 }, { "epoch": 0.3007470193966475, "grad_norm": 1620.897705078125, "learning_rate": 4.427617448495772e-05, "loss": 78.4815, "step": 74440 }, { "epoch": 0.30078742066201514, "grad_norm": 519.2609252929688, "learning_rate": 4.427395152160841e-05, "loss": 41.6989, "step": 74450 }, { "epoch": 0.3008278219273828, "grad_norm": 666.103515625, "learning_rate": 4.427172818250349e-05, "loss": 61.7915, "step": 74460 }, { "epoch": 0.3008682231927504, "grad_norm": 611.0293579101562, "learning_rate": 4.42695044676863e-05, "loss": 63.6666, "step": 74470 }, { "epoch": 0.300908624458118, "grad_norm": 789.2578125, "learning_rate": 4.4267280377200205e-05, "loss": 82.4072, "step": 74480 }, { "epoch": 0.30094902572348564, "grad_norm": 1049.16650390625, "learning_rate": 4.426505591108856e-05, "loss": 67.3771, "step": 74490 }, { "epoch": 0.3009894269888533, "grad_norm": 565.5245361328125, "learning_rate": 4.426283106939474e-05, "loss": 52.4517, "step": 74500 }, { "epoch": 0.3010298282542209, "grad_norm": 1094.810546875, "learning_rate": 4.42606058521621e-05, "loss": 89.7887, "step": 74510 }, { "epoch": 0.30107022951958856, "grad_norm": 938.249267578125, "learning_rate": 4.425838025943403e-05, "loss": 67.3641, "step": 74520 }, { "epoch": 0.3011106307849562, "grad_norm": 857.2614135742188, "learning_rate": 4.4256154291253925e-05, "loss": 102.7043, "step": 74530 }, { "epoch": 0.30115103205032384, "grad_norm": 520.708251953125, "learning_rate": 4.4253927947665185e-05, "loss": 66.7792, "step": 74540 }, { "epoch": 0.3011914333156914, "grad_norm": 764.5960693359375, "learning_rate": 4.42517012287112e-05, "loss": 69.1683, "step": 74550 }, { "epoch": 0.30123183458105907, "grad_norm": 1161.567138671875, "learning_rate": 4.424947413443539e-05, "loss": 75.7145, "step": 74560 }, { "epoch": 0.3012722358464267, "grad_norm": 810.30126953125, "learning_rate": 4.424724666488117e-05, "loss": 60.572, "step": 74570 }, { "epoch": 0.30131263711179435, "grad_norm": 1023.4842529296875, "learning_rate": 4.424501882009198e-05, "loss": 60.1247, "step": 74580 }, { "epoch": 0.301353038377162, "grad_norm": 572.3932495117188, "learning_rate": 4.424279060011123e-05, "loss": 58.7828, "step": 74590 }, { "epoch": 0.3013934396425296, "grad_norm": 1139.3538818359375, "learning_rate": 4.4240562004982364e-05, "loss": 89.5676, "step": 74600 }, { "epoch": 0.3014338409078972, "grad_norm": 685.8850708007812, "learning_rate": 4.423833303474884e-05, "loss": 74.0729, "step": 74610 }, { "epoch": 0.30147424217326485, "grad_norm": 358.7518005371094, "learning_rate": 4.423610368945411e-05, "loss": 64.4605, "step": 74620 }, { "epoch": 0.3015146434386325, "grad_norm": 2760.231201171875, "learning_rate": 4.423387396914164e-05, "loss": 63.1559, "step": 74630 }, { "epoch": 0.30155504470400013, "grad_norm": 1156.1490478515625, "learning_rate": 4.423164387385489e-05, "loss": 68.2249, "step": 74640 }, { "epoch": 0.30159544596936777, "grad_norm": 397.64569091796875, "learning_rate": 4.4229413403637345e-05, "loss": 70.5398, "step": 74650 }, { "epoch": 0.3016358472347354, "grad_norm": 488.1241760253906, "learning_rate": 4.422718255853248e-05, "loss": 63.8304, "step": 74660 }, { "epoch": 0.30167624850010305, "grad_norm": 623.3941650390625, "learning_rate": 4.42249513385838e-05, "loss": 93.5955, "step": 74670 }, { "epoch": 0.30171664976547063, "grad_norm": 584.9109497070312, "learning_rate": 4.422271974383479e-05, "loss": 63.7987, "step": 74680 }, { "epoch": 0.3017570510308383, "grad_norm": 1291.9105224609375, "learning_rate": 4.4220487774328964e-05, "loss": 80.7659, "step": 74690 }, { "epoch": 0.3017974522962059, "grad_norm": 921.5263671875, "learning_rate": 4.421825543010983e-05, "loss": 57.1126, "step": 74700 }, { "epoch": 0.30183785356157355, "grad_norm": 840.3859252929688, "learning_rate": 4.4216022711220916e-05, "loss": 47.0774, "step": 74710 }, { "epoch": 0.3018782548269412, "grad_norm": 1399.228759765625, "learning_rate": 4.4213789617705746e-05, "loss": 60.6638, "step": 74720 }, { "epoch": 0.30191865609230883, "grad_norm": 574.983642578125, "learning_rate": 4.421155614960785e-05, "loss": 64.1311, "step": 74730 }, { "epoch": 0.3019590573576764, "grad_norm": 1133.6746826171875, "learning_rate": 4.420932230697079e-05, "loss": 66.0676, "step": 74740 }, { "epoch": 0.30199945862304406, "grad_norm": 2249.599853515625, "learning_rate": 4.420708808983809e-05, "loss": 72.025, "step": 74750 }, { "epoch": 0.3020398598884117, "grad_norm": 837.6932983398438, "learning_rate": 4.420485349825332e-05, "loss": 68.1134, "step": 74760 }, { "epoch": 0.30208026115377934, "grad_norm": 984.0842895507812, "learning_rate": 4.4202618532260046e-05, "loss": 91.3187, "step": 74770 }, { "epoch": 0.302120662419147, "grad_norm": 727.6422729492188, "learning_rate": 4.420038319190184e-05, "loss": 63.3439, "step": 74780 }, { "epoch": 0.3021610636845146, "grad_norm": 412.24908447265625, "learning_rate": 4.4198147477222274e-05, "loss": 60.6722, "step": 74790 }, { "epoch": 0.3022014649498822, "grad_norm": 1043.6190185546875, "learning_rate": 4.4195911388264946e-05, "loss": 50.6668, "step": 74800 }, { "epoch": 0.30224186621524984, "grad_norm": 432.61956787109375, "learning_rate": 4.419367492507343e-05, "loss": 53.3677, "step": 74810 }, { "epoch": 0.3022822674806175, "grad_norm": 554.3751831054688, "learning_rate": 4.419143808769135e-05, "loss": 45.9557, "step": 74820 }, { "epoch": 0.3023226687459851, "grad_norm": 880.563720703125, "learning_rate": 4.4189200876162295e-05, "loss": 79.8755, "step": 74830 }, { "epoch": 0.30236307001135276, "grad_norm": 766.0863647460938, "learning_rate": 4.41869632905299e-05, "loss": 81.7201, "step": 74840 }, { "epoch": 0.3024034712767204, "grad_norm": 512.21240234375, "learning_rate": 4.418472533083777e-05, "loss": 64.7566, "step": 74850 }, { "epoch": 0.30244387254208804, "grad_norm": 1144.3148193359375, "learning_rate": 4.418248699712955e-05, "loss": 119.2652, "step": 74860 }, { "epoch": 0.30248427380745563, "grad_norm": 535.4666137695312, "learning_rate": 4.418024828944886e-05, "loss": 60.0249, "step": 74870 }, { "epoch": 0.30252467507282327, "grad_norm": 3031.205322265625, "learning_rate": 4.417800920783937e-05, "loss": 56.3927, "step": 74880 }, { "epoch": 0.3025650763381909, "grad_norm": 1405.0643310546875, "learning_rate": 4.4175769752344706e-05, "loss": 78.4172, "step": 74890 }, { "epoch": 0.30260547760355855, "grad_norm": 779.4258422851562, "learning_rate": 4.417352992300854e-05, "loss": 59.0052, "step": 74900 }, { "epoch": 0.3026458788689262, "grad_norm": 1063.1361083984375, "learning_rate": 4.4171289719874543e-05, "loss": 66.4332, "step": 74910 }, { "epoch": 0.30268628013429383, "grad_norm": 479.256103515625, "learning_rate": 4.4169049142986376e-05, "loss": 51.7122, "step": 74920 }, { "epoch": 0.3027266813996614, "grad_norm": 482.245849609375, "learning_rate": 4.416680819238773e-05, "loss": 71.5557, "step": 74930 }, { "epoch": 0.30276708266502905, "grad_norm": 1661.60791015625, "learning_rate": 4.4164566868122286e-05, "loss": 110.3273, "step": 74940 }, { "epoch": 0.3028074839303967, "grad_norm": 763.4310913085938, "learning_rate": 4.4162325170233745e-05, "loss": 63.9054, "step": 74950 }, { "epoch": 0.30284788519576433, "grad_norm": 581.7908325195312, "learning_rate": 4.4160083098765815e-05, "loss": 73.6799, "step": 74960 }, { "epoch": 0.302888286461132, "grad_norm": 522.9912109375, "learning_rate": 4.4157840653762196e-05, "loss": 63.8081, "step": 74970 }, { "epoch": 0.3029286877264996, "grad_norm": 671.7311401367188, "learning_rate": 4.4155597835266616e-05, "loss": 43.7649, "step": 74980 }, { "epoch": 0.30296908899186725, "grad_norm": 655.3804931640625, "learning_rate": 4.415335464332279e-05, "loss": 68.5104, "step": 74990 }, { "epoch": 0.30300949025723484, "grad_norm": 705.6524047851562, "learning_rate": 4.415111107797445e-05, "loss": 55.7934, "step": 75000 }, { "epoch": 0.3030498915226025, "grad_norm": 2905.262939453125, "learning_rate": 4.4148867139265345e-05, "loss": 111.0617, "step": 75010 }, { "epoch": 0.3030902927879701, "grad_norm": 972.8043823242188, "learning_rate": 4.414662282723922e-05, "loss": 63.5983, "step": 75020 }, { "epoch": 0.30313069405333776, "grad_norm": 1290.5452880859375, "learning_rate": 4.414437814193982e-05, "loss": 58.9312, "step": 75030 }, { "epoch": 0.3031710953187054, "grad_norm": 1295.46044921875, "learning_rate": 4.414213308341092e-05, "loss": 60.9342, "step": 75040 }, { "epoch": 0.30321149658407304, "grad_norm": 778.5823364257812, "learning_rate": 4.4139887651696265e-05, "loss": 65.9136, "step": 75050 }, { "epoch": 0.3032518978494406, "grad_norm": 515.2728271484375, "learning_rate": 4.413764184683966e-05, "loss": 73.2117, "step": 75060 }, { "epoch": 0.30329229911480826, "grad_norm": 1737.145751953125, "learning_rate": 4.413539566888487e-05, "loss": 122.1578, "step": 75070 }, { "epoch": 0.3033327003801759, "grad_norm": 606.1090698242188, "learning_rate": 4.413314911787569e-05, "loss": 62.4606, "step": 75080 }, { "epoch": 0.30337310164554354, "grad_norm": 760.4569702148438, "learning_rate": 4.413090219385592e-05, "loss": 42.6394, "step": 75090 }, { "epoch": 0.3034135029109112, "grad_norm": 809.2393188476562, "learning_rate": 4.412865489686936e-05, "loss": 61.619, "step": 75100 }, { "epoch": 0.3034539041762788, "grad_norm": 555.9096069335938, "learning_rate": 4.412640722695982e-05, "loss": 79.0981, "step": 75110 }, { "epoch": 0.3034943054416464, "grad_norm": 745.3788452148438, "learning_rate": 4.4124159184171134e-05, "loss": 80.1419, "step": 75120 }, { "epoch": 0.30353470670701405, "grad_norm": 915.6812744140625, "learning_rate": 4.412191076854711e-05, "loss": 80.7349, "step": 75130 }, { "epoch": 0.3035751079723817, "grad_norm": 857.411865234375, "learning_rate": 4.41196619801316e-05, "loss": 43.2552, "step": 75140 }, { "epoch": 0.3036155092377493, "grad_norm": 1478.0841064453125, "learning_rate": 4.4117412818968426e-05, "loss": 71.5397, "step": 75150 }, { "epoch": 0.30365591050311697, "grad_norm": 818.4799194335938, "learning_rate": 4.411516328510145e-05, "loss": 42.5693, "step": 75160 }, { "epoch": 0.3036963117684846, "grad_norm": 1053.192626953125, "learning_rate": 4.411291337857453e-05, "loss": 67.0812, "step": 75170 }, { "epoch": 0.30373671303385225, "grad_norm": 399.8765563964844, "learning_rate": 4.4110663099431514e-05, "loss": 65.8789, "step": 75180 }, { "epoch": 0.30377711429921983, "grad_norm": 869.1029052734375, "learning_rate": 4.41084124477163e-05, "loss": 86.4271, "step": 75190 }, { "epoch": 0.30381751556458747, "grad_norm": 561.1309814453125, "learning_rate": 4.410616142347273e-05, "loss": 65.8079, "step": 75200 }, { "epoch": 0.3038579168299551, "grad_norm": 1289.060302734375, "learning_rate": 4.410391002674471e-05, "loss": 74.7418, "step": 75210 }, { "epoch": 0.30389831809532275, "grad_norm": 566.0087280273438, "learning_rate": 4.410165825757613e-05, "loss": 74.0141, "step": 75220 }, { "epoch": 0.3039387193606904, "grad_norm": 0.0, "learning_rate": 4.409940611601089e-05, "loss": 52.371, "step": 75230 }, { "epoch": 0.30397912062605803, "grad_norm": 628.7798461914062, "learning_rate": 4.409715360209289e-05, "loss": 70.8489, "step": 75240 }, { "epoch": 0.3040195218914256, "grad_norm": 644.141845703125, "learning_rate": 4.4094900715866064e-05, "loss": 41.9273, "step": 75250 }, { "epoch": 0.30405992315679325, "grad_norm": 727.8847045898438, "learning_rate": 4.40926474573743e-05, "loss": 67.9712, "step": 75260 }, { "epoch": 0.3041003244221609, "grad_norm": 0.0, "learning_rate": 4.409039382666155e-05, "loss": 52.6979, "step": 75270 }, { "epoch": 0.30414072568752853, "grad_norm": 521.0494384765625, "learning_rate": 4.4088139823771744e-05, "loss": 48.4199, "step": 75280 }, { "epoch": 0.3041811269528962, "grad_norm": 494.1226501464844, "learning_rate": 4.408588544874882e-05, "loss": 65.0988, "step": 75290 }, { "epoch": 0.3042215282182638, "grad_norm": 513.5343627929688, "learning_rate": 4.408363070163675e-05, "loss": 42.8742, "step": 75300 }, { "epoch": 0.30426192948363145, "grad_norm": 654.8699951171875, "learning_rate": 4.408137558247946e-05, "loss": 64.6675, "step": 75310 }, { "epoch": 0.30430233074899904, "grad_norm": 1542.4345703125, "learning_rate": 4.407912009132093e-05, "loss": 58.0917, "step": 75320 }, { "epoch": 0.3043427320143667, "grad_norm": 1139.7677001953125, "learning_rate": 4.4076864228205136e-05, "loss": 62.5426, "step": 75330 }, { "epoch": 0.3043831332797343, "grad_norm": 480.1134033203125, "learning_rate": 4.407460799317604e-05, "loss": 64.8383, "step": 75340 }, { "epoch": 0.30442353454510196, "grad_norm": 1814.2696533203125, "learning_rate": 4.4072351386277654e-05, "loss": 78.7744, "step": 75350 }, { "epoch": 0.3044639358104696, "grad_norm": 1329.923095703125, "learning_rate": 4.407009440755396e-05, "loss": 72.4044, "step": 75360 }, { "epoch": 0.30450433707583724, "grad_norm": 3605.839599609375, "learning_rate": 4.4067837057048956e-05, "loss": 79.2061, "step": 75370 }, { "epoch": 0.3045447383412048, "grad_norm": 1073.45166015625, "learning_rate": 4.406557933480664e-05, "loss": 81.1469, "step": 75380 }, { "epoch": 0.30458513960657246, "grad_norm": 751.8543090820312, "learning_rate": 4.406332124087105e-05, "loss": 77.547, "step": 75390 }, { "epoch": 0.3046255408719401, "grad_norm": 531.0945434570312, "learning_rate": 4.40610627752862e-05, "loss": 66.1161, "step": 75400 }, { "epoch": 0.30466594213730774, "grad_norm": 738.7662963867188, "learning_rate": 4.405880393809612e-05, "loss": 64.749, "step": 75410 }, { "epoch": 0.3047063434026754, "grad_norm": 877.9468383789062, "learning_rate": 4.405654472934483e-05, "loss": 51.235, "step": 75420 }, { "epoch": 0.304746744668043, "grad_norm": 653.334716796875, "learning_rate": 4.4054285149076404e-05, "loss": 55.1535, "step": 75430 }, { "epoch": 0.3047871459334106, "grad_norm": 1065.5577392578125, "learning_rate": 4.4052025197334864e-05, "loss": 75.8509, "step": 75440 }, { "epoch": 0.30482754719877825, "grad_norm": 1329.488525390625, "learning_rate": 4.40497648741643e-05, "loss": 78.9796, "step": 75450 }, { "epoch": 0.3048679484641459, "grad_norm": 1260.999755859375, "learning_rate": 4.4047504179608755e-05, "loss": 91.8325, "step": 75460 }, { "epoch": 0.30490834972951353, "grad_norm": 656.1160888671875, "learning_rate": 4.404524311371231e-05, "loss": 35.623, "step": 75470 }, { "epoch": 0.30494875099488117, "grad_norm": 882.13623046875, "learning_rate": 4.404298167651905e-05, "loss": 58.8389, "step": 75480 }, { "epoch": 0.3049891522602488, "grad_norm": 1063.432861328125, "learning_rate": 4.4040719868073055e-05, "loss": 55.5148, "step": 75490 }, { "epoch": 0.30502955352561645, "grad_norm": 1446.3270263671875, "learning_rate": 4.403845768841842e-05, "loss": 93.3359, "step": 75500 }, { "epoch": 0.30506995479098403, "grad_norm": 652.8603515625, "learning_rate": 4.403619513759926e-05, "loss": 88.8308, "step": 75510 }, { "epoch": 0.3051103560563517, "grad_norm": 848.1201782226562, "learning_rate": 4.403393221565966e-05, "loss": 51.726, "step": 75520 }, { "epoch": 0.3051507573217193, "grad_norm": 823.25390625, "learning_rate": 4.403166892264376e-05, "loss": 60.7063, "step": 75530 }, { "epoch": 0.30519115858708695, "grad_norm": 837.02978515625, "learning_rate": 4.402940525859568e-05, "loss": 62.3497, "step": 75540 }, { "epoch": 0.3052315598524546, "grad_norm": 1130.3653564453125, "learning_rate": 4.402714122355955e-05, "loss": 86.2569, "step": 75550 }, { "epoch": 0.30527196111782223, "grad_norm": 313.84063720703125, "learning_rate": 4.40248768175795e-05, "loss": 85.372, "step": 75560 }, { "epoch": 0.3053123623831898, "grad_norm": 1272.7034912109375, "learning_rate": 4.4022612040699676e-05, "loss": 59.0401, "step": 75570 }, { "epoch": 0.30535276364855746, "grad_norm": 259.1897888183594, "learning_rate": 4.4020346892964246e-05, "loss": 42.5053, "step": 75580 }, { "epoch": 0.3053931649139251, "grad_norm": 541.4120483398438, "learning_rate": 4.401808137441736e-05, "loss": 59.4345, "step": 75590 }, { "epoch": 0.30543356617929274, "grad_norm": 923.3859252929688, "learning_rate": 4.401581548510318e-05, "loss": 47.9736, "step": 75600 }, { "epoch": 0.3054739674446604, "grad_norm": 1377.25, "learning_rate": 4.40135492250659e-05, "loss": 78.1078, "step": 75610 }, { "epoch": 0.305514368710028, "grad_norm": 1074.262939453125, "learning_rate": 4.401128259434968e-05, "loss": 57.5512, "step": 75620 }, { "epoch": 0.30555476997539566, "grad_norm": 616.6300048828125, "learning_rate": 4.400901559299871e-05, "loss": 38.326, "step": 75630 }, { "epoch": 0.30559517124076324, "grad_norm": 1104.368408203125, "learning_rate": 4.4006748221057206e-05, "loss": 94.1468, "step": 75640 }, { "epoch": 0.3056355725061309, "grad_norm": 581.0299072265625, "learning_rate": 4.4004480478569353e-05, "loss": 87.8687, "step": 75650 }, { "epoch": 0.3056759737714985, "grad_norm": 1138.6673583984375, "learning_rate": 4.400221236557938e-05, "loss": 66.9354, "step": 75660 }, { "epoch": 0.30571637503686616, "grad_norm": 568.2470703125, "learning_rate": 4.399994388213149e-05, "loss": 88.6248, "step": 75670 }, { "epoch": 0.3057567763022338, "grad_norm": 598.5287475585938, "learning_rate": 4.3997675028269906e-05, "loss": 55.7092, "step": 75680 }, { "epoch": 0.30579717756760144, "grad_norm": 971.5711669921875, "learning_rate": 4.399540580403887e-05, "loss": 67.1653, "step": 75690 }, { "epoch": 0.305837578832969, "grad_norm": 1012.9927368164062, "learning_rate": 4.399313620948262e-05, "loss": 51.6907, "step": 75700 }, { "epoch": 0.30587798009833667, "grad_norm": 1266.41748046875, "learning_rate": 4.39908662446454e-05, "loss": 59.8522, "step": 75710 }, { "epoch": 0.3059183813637043, "grad_norm": 580.0430297851562, "learning_rate": 4.3988595909571464e-05, "loss": 51.2898, "step": 75720 }, { "epoch": 0.30595878262907195, "grad_norm": 503.0343322753906, "learning_rate": 4.3986325204305076e-05, "loss": 69.8676, "step": 75730 }, { "epoch": 0.3059991838944396, "grad_norm": 246.7170867919922, "learning_rate": 4.398405412889051e-05, "loss": 60.3378, "step": 75740 }, { "epoch": 0.3060395851598072, "grad_norm": 440.064208984375, "learning_rate": 4.3981782683372016e-05, "loss": 52.1341, "step": 75750 }, { "epoch": 0.3060799864251748, "grad_norm": 684.9658203125, "learning_rate": 4.3979510867793917e-05, "loss": 83.72, "step": 75760 }, { "epoch": 0.30612038769054245, "grad_norm": 0.0, "learning_rate": 4.397723868220047e-05, "loss": 64.8489, "step": 75770 }, { "epoch": 0.3061607889559101, "grad_norm": 971.6740112304688, "learning_rate": 4.397496612663599e-05, "loss": 59.74, "step": 75780 }, { "epoch": 0.30620119022127773, "grad_norm": 1435.0618896484375, "learning_rate": 4.397269320114478e-05, "loss": 92.3261, "step": 75790 }, { "epoch": 0.30624159148664537, "grad_norm": 1401.4566650390625, "learning_rate": 4.3970419905771145e-05, "loss": 86.528, "step": 75800 }, { "epoch": 0.306281992752013, "grad_norm": 423.3729553222656, "learning_rate": 4.39681462405594e-05, "loss": 106.0067, "step": 75810 }, { "epoch": 0.30632239401738065, "grad_norm": 0.0, "learning_rate": 4.3965872205553885e-05, "loss": 70.0686, "step": 75820 }, { "epoch": 0.30636279528274823, "grad_norm": 1514.8172607421875, "learning_rate": 4.3963597800798927e-05, "loss": 85.976, "step": 75830 }, { "epoch": 0.3064031965481159, "grad_norm": 286.7713928222656, "learning_rate": 4.396132302633886e-05, "loss": 47.0414, "step": 75840 }, { "epoch": 0.3064435978134835, "grad_norm": 1182.9954833984375, "learning_rate": 4.395904788221805e-05, "loss": 59.4661, "step": 75850 }, { "epoch": 0.30648399907885115, "grad_norm": 1122.1630859375, "learning_rate": 4.3956772368480836e-05, "loss": 94.7752, "step": 75860 }, { "epoch": 0.3065244003442188, "grad_norm": 555.4927368164062, "learning_rate": 4.395449648517158e-05, "loss": 52.1087, "step": 75870 }, { "epoch": 0.30656480160958643, "grad_norm": 698.0592041015625, "learning_rate": 4.395222023233466e-05, "loss": 52.4741, "step": 75880 }, { "epoch": 0.306605202874954, "grad_norm": 628.822509765625, "learning_rate": 4.3949943610014455e-05, "loss": 86.0263, "step": 75890 }, { "epoch": 0.30664560414032166, "grad_norm": 1240.1202392578125, "learning_rate": 4.394766661825533e-05, "loss": 46.8542, "step": 75900 }, { "epoch": 0.3066860054056893, "grad_norm": 816.1203002929688, "learning_rate": 4.3945389257101704e-05, "loss": 38.3735, "step": 75910 }, { "epoch": 0.30672640667105694, "grad_norm": 889.33349609375, "learning_rate": 4.394311152659796e-05, "loss": 73.3283, "step": 75920 }, { "epoch": 0.3067668079364246, "grad_norm": 1453.927978515625, "learning_rate": 4.3940833426788496e-05, "loss": 62.5195, "step": 75930 }, { "epoch": 0.3068072092017922, "grad_norm": 987.9056396484375, "learning_rate": 4.393855495771774e-05, "loss": 77.4361, "step": 75940 }, { "epoch": 0.3068476104671598, "grad_norm": 1086.7833251953125, "learning_rate": 4.3936276119430096e-05, "loss": 87.7577, "step": 75950 }, { "epoch": 0.30688801173252744, "grad_norm": 491.1762390136719, "learning_rate": 4.393399691197e-05, "loss": 71.9018, "step": 75960 }, { "epoch": 0.3069284129978951, "grad_norm": 834.448974609375, "learning_rate": 4.3931717335381894e-05, "loss": 56.691, "step": 75970 }, { "epoch": 0.3069688142632627, "grad_norm": 1463.418212890625, "learning_rate": 4.392943738971021e-05, "loss": 66.625, "step": 75980 }, { "epoch": 0.30700921552863036, "grad_norm": 709.5184936523438, "learning_rate": 4.39271570749994e-05, "loss": 72.5582, "step": 75990 }, { "epoch": 0.307049616793998, "grad_norm": 802.2734985351562, "learning_rate": 4.3924876391293915e-05, "loss": 80.8964, "step": 76000 }, { "epoch": 0.30709001805936564, "grad_norm": 1081.5029296875, "learning_rate": 4.3922595338638214e-05, "loss": 64.753, "step": 76010 }, { "epoch": 0.30713041932473323, "grad_norm": 747.075439453125, "learning_rate": 4.3920313917076794e-05, "loss": 48.6337, "step": 76020 }, { "epoch": 0.30717082059010087, "grad_norm": 371.2689208984375, "learning_rate": 4.3918032126654095e-05, "loss": 56.0596, "step": 76030 }, { "epoch": 0.3072112218554685, "grad_norm": 852.907958984375, "learning_rate": 4.391574996741463e-05, "loss": 66.6618, "step": 76040 }, { "epoch": 0.30725162312083615, "grad_norm": 1084.6485595703125, "learning_rate": 4.391346743940288e-05, "loss": 66.4222, "step": 76050 }, { "epoch": 0.3072920243862038, "grad_norm": 1998.0355224609375, "learning_rate": 4.3911184542663344e-05, "loss": 77.1086, "step": 76060 }, { "epoch": 0.30733242565157143, "grad_norm": 789.2811889648438, "learning_rate": 4.390890127724053e-05, "loss": 68.8511, "step": 76070 }, { "epoch": 0.307372826916939, "grad_norm": 666.281494140625, "learning_rate": 4.390661764317895e-05, "loss": 68.8774, "step": 76080 }, { "epoch": 0.30741322818230665, "grad_norm": 593.8773803710938, "learning_rate": 4.390433364052312e-05, "loss": 45.1297, "step": 76090 }, { "epoch": 0.3074536294476743, "grad_norm": 1460.0093994140625, "learning_rate": 4.390204926931758e-05, "loss": 79.1225, "step": 76100 }, { "epoch": 0.30749403071304193, "grad_norm": 653.6718139648438, "learning_rate": 4.389976452960686e-05, "loss": 72.7947, "step": 76110 }, { "epoch": 0.3075344319784096, "grad_norm": 906.63134765625, "learning_rate": 4.38974794214355e-05, "loss": 72.8858, "step": 76120 }, { "epoch": 0.3075748332437772, "grad_norm": 1931.9991455078125, "learning_rate": 4.3895193944848034e-05, "loss": 64.3772, "step": 76130 }, { "epoch": 0.30761523450914485, "grad_norm": 798.5737915039062, "learning_rate": 4.3892908099889054e-05, "loss": 55.5147, "step": 76140 }, { "epoch": 0.30765563577451244, "grad_norm": 696.617919921875, "learning_rate": 4.389062188660309e-05, "loss": 62.2774, "step": 76150 }, { "epoch": 0.3076960370398801, "grad_norm": 840.3499145507812, "learning_rate": 4.388833530503473e-05, "loss": 98.8143, "step": 76160 }, { "epoch": 0.3077364383052477, "grad_norm": 764.2418212890625, "learning_rate": 4.388604835522855e-05, "loss": 69.4767, "step": 76170 }, { "epoch": 0.30777683957061536, "grad_norm": 663.6424560546875, "learning_rate": 4.3883761037229146e-05, "loss": 69.4592, "step": 76180 }, { "epoch": 0.307817240835983, "grad_norm": 624.4500732421875, "learning_rate": 4.388147335108108e-05, "loss": 53.6257, "step": 76190 }, { "epoch": 0.30785764210135064, "grad_norm": 1174.481201171875, "learning_rate": 4.387918529682898e-05, "loss": 70.0419, "step": 76200 }, { "epoch": 0.3078980433667182, "grad_norm": 466.0067443847656, "learning_rate": 4.3876896874517434e-05, "loss": 112.7296, "step": 76210 }, { "epoch": 0.30793844463208586, "grad_norm": 576.7889404296875, "learning_rate": 4.387460808419108e-05, "loss": 56.6612, "step": 76220 }, { "epoch": 0.3079788458974535, "grad_norm": 809.4537353515625, "learning_rate": 4.387231892589452e-05, "loss": 84.8902, "step": 76230 }, { "epoch": 0.30801924716282114, "grad_norm": 1000.173095703125, "learning_rate": 4.387002939967237e-05, "loss": 69.7403, "step": 76240 }, { "epoch": 0.3080596484281888, "grad_norm": 756.7673950195312, "learning_rate": 4.386773950556931e-05, "loss": 51.9468, "step": 76250 }, { "epoch": 0.3081000496935564, "grad_norm": 922.7078247070312, "learning_rate": 4.386544924362993e-05, "loss": 80.4618, "step": 76260 }, { "epoch": 0.308140450958924, "grad_norm": 928.8509521484375, "learning_rate": 4.3863158613898915e-05, "loss": 90.6427, "step": 76270 }, { "epoch": 0.30818085222429165, "grad_norm": 990.8674926757812, "learning_rate": 4.386086761642091e-05, "loss": 54.2147, "step": 76280 }, { "epoch": 0.3082212534896593, "grad_norm": 973.0326538085938, "learning_rate": 4.385857625124058e-05, "loss": 86.8008, "step": 76290 }, { "epoch": 0.3082616547550269, "grad_norm": 831.694580078125, "learning_rate": 4.3856284518402594e-05, "loss": 77.8919, "step": 76300 }, { "epoch": 0.30830205602039457, "grad_norm": 1294.062255859375, "learning_rate": 4.385399241795164e-05, "loss": 89.092, "step": 76310 }, { "epoch": 0.3083424572857622, "grad_norm": 899.0340576171875, "learning_rate": 4.3851699949932396e-05, "loss": 66.3644, "step": 76320 }, { "epoch": 0.30838285855112985, "grad_norm": 2219.54296875, "learning_rate": 4.384940711438955e-05, "loss": 43.2072, "step": 76330 }, { "epoch": 0.30842325981649743, "grad_norm": 1805.721923828125, "learning_rate": 4.384711391136781e-05, "loss": 68.598, "step": 76340 }, { "epoch": 0.30846366108186507, "grad_norm": 701.5916137695312, "learning_rate": 4.384482034091189e-05, "loss": 49.2661, "step": 76350 }, { "epoch": 0.3085040623472327, "grad_norm": 435.8236389160156, "learning_rate": 4.3842526403066486e-05, "loss": 52.0933, "step": 76360 }, { "epoch": 0.30854446361260035, "grad_norm": 2196.045166015625, "learning_rate": 4.384023209787633e-05, "loss": 93.9429, "step": 76370 }, { "epoch": 0.308584864877968, "grad_norm": 780.8671264648438, "learning_rate": 4.383793742538616e-05, "loss": 60.4108, "step": 76380 }, { "epoch": 0.30862526614333563, "grad_norm": 1023.3828125, "learning_rate": 4.383564238564068e-05, "loss": 65.5329, "step": 76390 }, { "epoch": 0.3086656674087032, "grad_norm": 677.0743408203125, "learning_rate": 4.3833346978684675e-05, "loss": 89.5438, "step": 76400 }, { "epoch": 0.30870606867407085, "grad_norm": 650.9645385742188, "learning_rate": 4.383105120456287e-05, "loss": 50.4317, "step": 76410 }, { "epoch": 0.3087464699394385, "grad_norm": 543.072021484375, "learning_rate": 4.3828755063320016e-05, "loss": 84.4733, "step": 76420 }, { "epoch": 0.30878687120480613, "grad_norm": 773.248046875, "learning_rate": 4.38264585550009e-05, "loss": 64.6917, "step": 76430 }, { "epoch": 0.3088272724701738, "grad_norm": 365.4747009277344, "learning_rate": 4.382416167965028e-05, "loss": 62.2631, "step": 76440 }, { "epoch": 0.3088676737355414, "grad_norm": 674.434326171875, "learning_rate": 4.382186443731293e-05, "loss": 54.153, "step": 76450 }, { "epoch": 0.30890807500090905, "grad_norm": 1216.2164306640625, "learning_rate": 4.381956682803365e-05, "loss": 77.9781, "step": 76460 }, { "epoch": 0.30894847626627664, "grad_norm": 615.4111938476562, "learning_rate": 4.381726885185722e-05, "loss": 66.789, "step": 76470 }, { "epoch": 0.3089888775316443, "grad_norm": 1128.7919921875, "learning_rate": 4.381497050882845e-05, "loss": 61.4229, "step": 76480 }, { "epoch": 0.3090292787970119, "grad_norm": 645.344482421875, "learning_rate": 4.381267179899214e-05, "loss": 47.0934, "step": 76490 }, { "epoch": 0.30906968006237956, "grad_norm": 397.85980224609375, "learning_rate": 4.381037272239311e-05, "loss": 63.7962, "step": 76500 }, { "epoch": 0.3091100813277472, "grad_norm": 463.2906188964844, "learning_rate": 4.380807327907618e-05, "loss": 60.8565, "step": 76510 }, { "epoch": 0.30915048259311484, "grad_norm": 470.76104736328125, "learning_rate": 4.380577346908618e-05, "loss": 71.5916, "step": 76520 }, { "epoch": 0.3091908838584824, "grad_norm": 1406.014404296875, "learning_rate": 4.380347329246794e-05, "loss": 76.5488, "step": 76530 }, { "epoch": 0.30923128512385006, "grad_norm": 898.1220703125, "learning_rate": 4.380117274926631e-05, "loss": 75.5578, "step": 76540 }, { "epoch": 0.3092716863892177, "grad_norm": 1027.6002197265625, "learning_rate": 4.379887183952614e-05, "loss": 73.7126, "step": 76550 }, { "epoch": 0.30931208765458534, "grad_norm": 2206.342529296875, "learning_rate": 4.379657056329228e-05, "loss": 121.1146, "step": 76560 }, { "epoch": 0.309352488919953, "grad_norm": 986.2785034179688, "learning_rate": 4.3794268920609605e-05, "loss": 59.564, "step": 76570 }, { "epoch": 0.3093928901853206, "grad_norm": 847.2274169921875, "learning_rate": 4.379196691152298e-05, "loss": 69.7664, "step": 76580 }, { "epoch": 0.3094332914506882, "grad_norm": 938.61767578125, "learning_rate": 4.3789664536077286e-05, "loss": 54.9465, "step": 76590 }, { "epoch": 0.30947369271605585, "grad_norm": 879.3148193359375, "learning_rate": 4.3787361794317405e-05, "loss": 82.9992, "step": 76600 }, { "epoch": 0.3095140939814235, "grad_norm": 394.82000732421875, "learning_rate": 4.378505868628823e-05, "loss": 79.9965, "step": 76610 }, { "epoch": 0.30955449524679113, "grad_norm": 458.2685241699219, "learning_rate": 4.3782755212034675e-05, "loss": 90.1063, "step": 76620 }, { "epoch": 0.30959489651215877, "grad_norm": 1523.7799072265625, "learning_rate": 4.3780451371601626e-05, "loss": 63.1249, "step": 76630 }, { "epoch": 0.3096352977775264, "grad_norm": 656.118896484375, "learning_rate": 4.3778147165034025e-05, "loss": 75.1157, "step": 76640 }, { "epoch": 0.30967569904289405, "grad_norm": 1332.3353271484375, "learning_rate": 4.377584259237676e-05, "loss": 98.9753, "step": 76650 }, { "epoch": 0.30971610030826163, "grad_norm": 1163.6890869140625, "learning_rate": 4.377353765367479e-05, "loss": 82.146, "step": 76660 }, { "epoch": 0.3097565015736293, "grad_norm": 994.2089233398438, "learning_rate": 4.377123234897303e-05, "loss": 47.6127, "step": 76670 }, { "epoch": 0.3097969028389969, "grad_norm": 427.42059326171875, "learning_rate": 4.376892667831644e-05, "loss": 61.8331, "step": 76680 }, { "epoch": 0.30983730410436455, "grad_norm": 1147.4427490234375, "learning_rate": 4.376662064174994e-05, "loss": 65.2715, "step": 76690 }, { "epoch": 0.3098777053697322, "grad_norm": 1077.2244873046875, "learning_rate": 4.376431423931853e-05, "loss": 111.3696, "step": 76700 }, { "epoch": 0.30991810663509983, "grad_norm": 384.9374084472656, "learning_rate": 4.3762007471067146e-05, "loss": 72.2117, "step": 76710 }, { "epoch": 0.3099585079004674, "grad_norm": 0.0, "learning_rate": 4.375970033704077e-05, "loss": 53.84, "step": 76720 }, { "epoch": 0.30999890916583506, "grad_norm": 514.8012084960938, "learning_rate": 4.375739283728437e-05, "loss": 43.7522, "step": 76730 }, { "epoch": 0.3100393104312027, "grad_norm": 1148.413818359375, "learning_rate": 4.3755084971842954e-05, "loss": 73.6758, "step": 76740 }, { "epoch": 0.31007971169657034, "grad_norm": 1197.4542236328125, "learning_rate": 4.375277674076149e-05, "loss": 51.1232, "step": 76750 }, { "epoch": 0.310120112961938, "grad_norm": 1706.7191162109375, "learning_rate": 4.375046814408499e-05, "loss": 102.9833, "step": 76760 }, { "epoch": 0.3101605142273056, "grad_norm": 696.5614624023438, "learning_rate": 4.374815918185846e-05, "loss": 58.2234, "step": 76770 }, { "epoch": 0.31020091549267326, "grad_norm": 1848.7935791015625, "learning_rate": 4.374584985412692e-05, "loss": 67.9964, "step": 76780 }, { "epoch": 0.31024131675804084, "grad_norm": 469.37255859375, "learning_rate": 4.374354016093538e-05, "loss": 72.5402, "step": 76790 }, { "epoch": 0.3102817180234085, "grad_norm": 1170.890380859375, "learning_rate": 4.374123010232888e-05, "loss": 67.4718, "step": 76800 }, { "epoch": 0.3103221192887761, "grad_norm": 468.8921813964844, "learning_rate": 4.373891967835245e-05, "loss": 50.772, "step": 76810 }, { "epoch": 0.31036252055414376, "grad_norm": 1059.63623046875, "learning_rate": 4.373660888905113e-05, "loss": 74.093, "step": 76820 }, { "epoch": 0.3104029218195114, "grad_norm": 1103.974609375, "learning_rate": 4.373429773446998e-05, "loss": 65.5093, "step": 76830 }, { "epoch": 0.31044332308487904, "grad_norm": 885.3768920898438, "learning_rate": 4.373198621465404e-05, "loss": 77.8017, "step": 76840 }, { "epoch": 0.3104837243502466, "grad_norm": 968.3536376953125, "learning_rate": 4.372967432964838e-05, "loss": 53.5939, "step": 76850 }, { "epoch": 0.31052412561561427, "grad_norm": 365.57904052734375, "learning_rate": 4.372736207949809e-05, "loss": 94.1086, "step": 76860 }, { "epoch": 0.3105645268809819, "grad_norm": 974.5148315429688, "learning_rate": 4.3725049464248235e-05, "loss": 62.9441, "step": 76870 }, { "epoch": 0.31060492814634955, "grad_norm": 191.68748474121094, "learning_rate": 4.372273648394389e-05, "loss": 61.9872, "step": 76880 }, { "epoch": 0.3106453294117172, "grad_norm": 661.6585693359375, "learning_rate": 4.372042313863017e-05, "loss": 50.0335, "step": 76890 }, { "epoch": 0.3106857306770848, "grad_norm": 774.6177978515625, "learning_rate": 4.371810942835215e-05, "loss": 48.9739, "step": 76900 }, { "epoch": 0.3107261319424524, "grad_norm": 813.4871826171875, "learning_rate": 4.371579535315496e-05, "loss": 69.586, "step": 76910 }, { "epoch": 0.31076653320782005, "grad_norm": 1050.9583740234375, "learning_rate": 4.37134809130837e-05, "loss": 67.1586, "step": 76920 }, { "epoch": 0.3108069344731877, "grad_norm": 747.37939453125, "learning_rate": 4.37111661081835e-05, "loss": 56.411, "step": 76930 }, { "epoch": 0.31084733573855533, "grad_norm": 499.1912536621094, "learning_rate": 4.370885093849948e-05, "loss": 36.8895, "step": 76940 }, { "epoch": 0.31088773700392297, "grad_norm": 1127.0404052734375, "learning_rate": 4.3706535404076784e-05, "loss": 100.2089, "step": 76950 }, { "epoch": 0.3109281382692906, "grad_norm": 696.9407958984375, "learning_rate": 4.370421950496054e-05, "loss": 55.8497, "step": 76960 }, { "epoch": 0.31096853953465825, "grad_norm": 867.03173828125, "learning_rate": 4.3701903241195916e-05, "loss": 67.1302, "step": 76970 }, { "epoch": 0.31100894080002583, "grad_norm": 972.6212768554688, "learning_rate": 4.369958661282805e-05, "loss": 54.7901, "step": 76980 }, { "epoch": 0.3110493420653935, "grad_norm": 979.4427490234375, "learning_rate": 4.369726961990213e-05, "loss": 63.9195, "step": 76990 }, { "epoch": 0.3110897433307611, "grad_norm": 1254.9720458984375, "learning_rate": 4.36949522624633e-05, "loss": 110.9768, "step": 77000 }, { "epoch": 0.31113014459612875, "grad_norm": 1459.2310791015625, "learning_rate": 4.369263454055675e-05, "loss": 81.1262, "step": 77010 }, { "epoch": 0.3111705458614964, "grad_norm": 968.3046875, "learning_rate": 4.3690316454227674e-05, "loss": 64.1656, "step": 77020 }, { "epoch": 0.31121094712686403, "grad_norm": 818.0725708007812, "learning_rate": 4.368799800352126e-05, "loss": 93.2603, "step": 77030 }, { "epoch": 0.3112513483922316, "grad_norm": 521.28125, "learning_rate": 4.368567918848269e-05, "loss": 45.2758, "step": 77040 }, { "epoch": 0.31129174965759926, "grad_norm": 273.4820556640625, "learning_rate": 4.368336000915719e-05, "loss": 69.712, "step": 77050 }, { "epoch": 0.3113321509229669, "grad_norm": 905.5136108398438, "learning_rate": 4.3681040465589976e-05, "loss": 88.944, "step": 77060 }, { "epoch": 0.31137255218833454, "grad_norm": 809.4381713867188, "learning_rate": 4.3678720557826247e-05, "loss": 72.0091, "step": 77070 }, { "epoch": 0.3114129534537022, "grad_norm": 439.21533203125, "learning_rate": 4.3676400285911256e-05, "loss": 68.7604, "step": 77080 }, { "epoch": 0.3114533547190698, "grad_norm": 4371.494140625, "learning_rate": 4.367407964989022e-05, "loss": 73.8835, "step": 77090 }, { "epoch": 0.31149375598443746, "grad_norm": 1301.020751953125, "learning_rate": 4.367175864980839e-05, "loss": 71.925, "step": 77100 }, { "epoch": 0.31153415724980504, "grad_norm": 608.1272583007812, "learning_rate": 4.366943728571101e-05, "loss": 61.2593, "step": 77110 }, { "epoch": 0.3115745585151727, "grad_norm": 529.8942260742188, "learning_rate": 4.3667115557643336e-05, "loss": 68.4521, "step": 77120 }, { "epoch": 0.3116149597805403, "grad_norm": 499.2430725097656, "learning_rate": 4.366479346565064e-05, "loss": 47.7916, "step": 77130 }, { "epoch": 0.31165536104590796, "grad_norm": 1138.603759765625, "learning_rate": 4.366247100977818e-05, "loss": 58.4289, "step": 77140 }, { "epoch": 0.3116957623112756, "grad_norm": 558.7338256835938, "learning_rate": 4.366014819007124e-05, "loss": 40.2173, "step": 77150 }, { "epoch": 0.31173616357664324, "grad_norm": 0.0, "learning_rate": 4.3657825006575106e-05, "loss": 76.302, "step": 77160 }, { "epoch": 0.31177656484201083, "grad_norm": 1142.35986328125, "learning_rate": 4.365550145933507e-05, "loss": 59.8843, "step": 77170 }, { "epoch": 0.31181696610737847, "grad_norm": 437.07196044921875, "learning_rate": 4.3653177548396426e-05, "loss": 84.331, "step": 77180 }, { "epoch": 0.3118573673727461, "grad_norm": 1512.7645263671875, "learning_rate": 4.365085327380448e-05, "loss": 68.1001, "step": 77190 }, { "epoch": 0.31189776863811375, "grad_norm": 595.816650390625, "learning_rate": 4.3648528635604556e-05, "loss": 61.2616, "step": 77200 }, { "epoch": 0.3119381699034814, "grad_norm": 307.5999450683594, "learning_rate": 4.364620363384196e-05, "loss": 66.6272, "step": 77210 }, { "epoch": 0.31197857116884903, "grad_norm": 1286.98095703125, "learning_rate": 4.364387826856202e-05, "loss": 78.2909, "step": 77220 }, { "epoch": 0.3120189724342166, "grad_norm": 1373.5860595703125, "learning_rate": 4.364155253981008e-05, "loss": 72.0954, "step": 77230 }, { "epoch": 0.31205937369958425, "grad_norm": 392.8745422363281, "learning_rate": 4.363922644763147e-05, "loss": 71.9362, "step": 77240 }, { "epoch": 0.3120997749649519, "grad_norm": 891.7858276367188, "learning_rate": 4.363689999207156e-05, "loss": 62.0088, "step": 77250 }, { "epoch": 0.31214017623031953, "grad_norm": 861.945556640625, "learning_rate": 4.363457317317567e-05, "loss": 105.5122, "step": 77260 }, { "epoch": 0.3121805774956872, "grad_norm": 843.31689453125, "learning_rate": 4.3632245990989194e-05, "loss": 52.6803, "step": 77270 }, { "epoch": 0.3122209787610548, "grad_norm": 649.4727172851562, "learning_rate": 4.362991844555749e-05, "loss": 44.0785, "step": 77280 }, { "epoch": 0.31226138002642245, "grad_norm": 961.2431640625, "learning_rate": 4.362759053692593e-05, "loss": 61.1215, "step": 77290 }, { "epoch": 0.31230178129179004, "grad_norm": 832.6133422851562, "learning_rate": 4.3625262265139906e-05, "loss": 71.7721, "step": 77300 }, { "epoch": 0.3123421825571577, "grad_norm": 1031.405517578125, "learning_rate": 4.36229336302448e-05, "loss": 61.8297, "step": 77310 }, { "epoch": 0.3123825838225253, "grad_norm": 821.444580078125, "learning_rate": 4.3620604632286024e-05, "loss": 92.3522, "step": 77320 }, { "epoch": 0.31242298508789296, "grad_norm": 809.0938720703125, "learning_rate": 4.361827527130896e-05, "loss": 75.1257, "step": 77330 }, { "epoch": 0.3124633863532606, "grad_norm": 725.1190795898438, "learning_rate": 4.361594554735905e-05, "loss": 77.7145, "step": 77340 }, { "epoch": 0.31250378761862824, "grad_norm": 888.7479858398438, "learning_rate": 4.361361546048169e-05, "loss": 95.2185, "step": 77350 }, { "epoch": 0.3125441888839958, "grad_norm": 2857.910400390625, "learning_rate": 4.361128501072231e-05, "loss": 68.9192, "step": 77360 }, { "epoch": 0.31258459014936346, "grad_norm": 506.87982177734375, "learning_rate": 4.360895419812635e-05, "loss": 58.4488, "step": 77370 }, { "epoch": 0.3126249914147311, "grad_norm": 749.842041015625, "learning_rate": 4.360662302273925e-05, "loss": 71.4121, "step": 77380 }, { "epoch": 0.31266539268009874, "grad_norm": 1351.6341552734375, "learning_rate": 4.360429148460645e-05, "loss": 75.3694, "step": 77390 }, { "epoch": 0.3127057939454664, "grad_norm": 460.1432189941406, "learning_rate": 4.3601959583773415e-05, "loss": 89.7496, "step": 77400 }, { "epoch": 0.312746195210834, "grad_norm": 1023.0693969726562, "learning_rate": 4.3599627320285596e-05, "loss": 110.523, "step": 77410 }, { "epoch": 0.31278659647620166, "grad_norm": 385.4729919433594, "learning_rate": 4.3597294694188475e-05, "loss": 37.7279, "step": 77420 }, { "epoch": 0.31282699774156925, "grad_norm": 1083.031005859375, "learning_rate": 4.359496170552751e-05, "loss": 65.8572, "step": 77430 }, { "epoch": 0.3128673990069369, "grad_norm": 571.319580078125, "learning_rate": 4.35926283543482e-05, "loss": 54.1978, "step": 77440 }, { "epoch": 0.3129078002723045, "grad_norm": 535.0450439453125, "learning_rate": 4.3590294640696025e-05, "loss": 88.7871, "step": 77450 }, { "epoch": 0.31294820153767217, "grad_norm": 625.9801025390625, "learning_rate": 4.358796056461648e-05, "loss": 73.7608, "step": 77460 }, { "epoch": 0.3129886028030398, "grad_norm": 569.495361328125, "learning_rate": 4.3585626126155084e-05, "loss": 78.8239, "step": 77470 }, { "epoch": 0.31302900406840745, "grad_norm": 632.259521484375, "learning_rate": 4.358329132535733e-05, "loss": 119.5843, "step": 77480 }, { "epoch": 0.31306940533377503, "grad_norm": 752.5601806640625, "learning_rate": 4.3580956162268746e-05, "loss": 41.869, "step": 77490 }, { "epoch": 0.31310980659914267, "grad_norm": 1639.3475341796875, "learning_rate": 4.357862063693486e-05, "loss": 104.7077, "step": 77500 }, { "epoch": 0.3131502078645103, "grad_norm": 1603.3714599609375, "learning_rate": 4.35762847494012e-05, "loss": 133.0148, "step": 77510 }, { "epoch": 0.31319060912987795, "grad_norm": 921.978515625, "learning_rate": 4.35739484997133e-05, "loss": 54.5854, "step": 77520 }, { "epoch": 0.3132310103952456, "grad_norm": 567.6099853515625, "learning_rate": 4.3571611887916705e-05, "loss": 45.054, "step": 77530 }, { "epoch": 0.31327141166061323, "grad_norm": 1245.0203857421875, "learning_rate": 4.356927491405699e-05, "loss": 54.2814, "step": 77540 }, { "epoch": 0.3133118129259808, "grad_norm": 798.68359375, "learning_rate": 4.356693757817969e-05, "loss": 68.1666, "step": 77550 }, { "epoch": 0.31335221419134845, "grad_norm": 838.5280151367188, "learning_rate": 4.356459988033039e-05, "loss": 55.8854, "step": 77560 }, { "epoch": 0.3133926154567161, "grad_norm": 497.4013366699219, "learning_rate": 4.356226182055465e-05, "loss": 43.9287, "step": 77570 }, { "epoch": 0.31343301672208373, "grad_norm": 2377.817626953125, "learning_rate": 4.355992339889806e-05, "loss": 89.1516, "step": 77580 }, { "epoch": 0.3134734179874514, "grad_norm": 1571.2337646484375, "learning_rate": 4.355758461540622e-05, "loss": 64.8556, "step": 77590 }, { "epoch": 0.313513819252819, "grad_norm": 2129.327880859375, "learning_rate": 4.355524547012471e-05, "loss": 88.4741, "step": 77600 }, { "epoch": 0.31355422051818665, "grad_norm": 445.593505859375, "learning_rate": 4.355290596309912e-05, "loss": 59.0199, "step": 77610 }, { "epoch": 0.31359462178355424, "grad_norm": 660.9097290039062, "learning_rate": 4.3550566094375086e-05, "loss": 65.1326, "step": 77620 }, { "epoch": 0.3136350230489219, "grad_norm": 670.1815185546875, "learning_rate": 4.3548225863998224e-05, "loss": 76.792, "step": 77630 }, { "epoch": 0.3136754243142895, "grad_norm": 882.048828125, "learning_rate": 4.354588527201414e-05, "loss": 70.3437, "step": 77640 }, { "epoch": 0.31371582557965716, "grad_norm": 831.5132446289062, "learning_rate": 4.3543544318468485e-05, "loss": 60.2682, "step": 77650 }, { "epoch": 0.3137562268450248, "grad_norm": 1422.9718017578125, "learning_rate": 4.354120300340688e-05, "loss": 69.6125, "step": 77660 }, { "epoch": 0.31379662811039244, "grad_norm": 703.7569580078125, "learning_rate": 4.353886132687497e-05, "loss": 46.6462, "step": 77670 }, { "epoch": 0.31383702937576, "grad_norm": 620.9691772460938, "learning_rate": 4.353651928891842e-05, "loss": 86.715, "step": 77680 }, { "epoch": 0.31387743064112766, "grad_norm": 586.15625, "learning_rate": 4.353417688958289e-05, "loss": 58.7201, "step": 77690 }, { "epoch": 0.3139178319064953, "grad_norm": 1025.6397705078125, "learning_rate": 4.3531834128914025e-05, "loss": 46.0605, "step": 77700 }, { "epoch": 0.31395823317186294, "grad_norm": 1030.073974609375, "learning_rate": 4.352949100695752e-05, "loss": 79.4799, "step": 77710 }, { "epoch": 0.3139986344372306, "grad_norm": 862.0059204101562, "learning_rate": 4.352714752375906e-05, "loss": 94.2031, "step": 77720 }, { "epoch": 0.3140390357025982, "grad_norm": 968.3876342773438, "learning_rate": 4.352480367936431e-05, "loss": 63.7685, "step": 77730 }, { "epoch": 0.31407943696796586, "grad_norm": 819.8076782226562, "learning_rate": 4.352245947381897e-05, "loss": 77.5755, "step": 77740 }, { "epoch": 0.31411983823333345, "grad_norm": 1108.0177001953125, "learning_rate": 4.352011490716875e-05, "loss": 70.4065, "step": 77750 }, { "epoch": 0.3141602394987011, "grad_norm": 698.9972534179688, "learning_rate": 4.351776997945936e-05, "loss": 72.4767, "step": 77760 }, { "epoch": 0.31420064076406873, "grad_norm": 1234.1883544921875, "learning_rate": 4.351542469073651e-05, "loss": 68.6958, "step": 77770 }, { "epoch": 0.31424104202943637, "grad_norm": 493.3515625, "learning_rate": 4.351307904104592e-05, "loss": 51.1366, "step": 77780 }, { "epoch": 0.314281443294804, "grad_norm": 1404.8785400390625, "learning_rate": 4.351073303043332e-05, "loss": 85.1898, "step": 77790 }, { "epoch": 0.31432184456017165, "grad_norm": 2445.363037109375, "learning_rate": 4.350838665894446e-05, "loss": 67.7399, "step": 77800 }, { "epoch": 0.31436224582553923, "grad_norm": 1352.8099365234375, "learning_rate": 4.350603992662506e-05, "loss": 52.9771, "step": 77810 }, { "epoch": 0.3144026470909069, "grad_norm": 969.1926879882812, "learning_rate": 4.3503692833520894e-05, "loss": 62.1888, "step": 77820 }, { "epoch": 0.3144430483562745, "grad_norm": 486.71185302734375, "learning_rate": 4.350134537967771e-05, "loss": 54.4507, "step": 77830 }, { "epoch": 0.31448344962164215, "grad_norm": 817.6029663085938, "learning_rate": 4.3498997565141267e-05, "loss": 42.0353, "step": 77840 }, { "epoch": 0.3145238508870098, "grad_norm": 1096.86669921875, "learning_rate": 4.349664938995734e-05, "loss": 79.652, "step": 77850 }, { "epoch": 0.31456425215237743, "grad_norm": 1449.87744140625, "learning_rate": 4.3494300854171715e-05, "loss": 93.2444, "step": 77860 }, { "epoch": 0.314604653417745, "grad_norm": 821.5718994140625, "learning_rate": 4.349195195783017e-05, "loss": 53.875, "step": 77870 }, { "epoch": 0.31464505468311266, "grad_norm": 1154.4615478515625, "learning_rate": 4.348960270097851e-05, "loss": 62.7061, "step": 77880 }, { "epoch": 0.3146854559484803, "grad_norm": 792.2052001953125, "learning_rate": 4.348725308366252e-05, "loss": 83.3605, "step": 77890 }, { "epoch": 0.31472585721384794, "grad_norm": 3039.34228515625, "learning_rate": 4.348490310592801e-05, "loss": 87.3251, "step": 77900 }, { "epoch": 0.3147662584792156, "grad_norm": 886.7009887695312, "learning_rate": 4.34825527678208e-05, "loss": 57.0282, "step": 77910 }, { "epoch": 0.3148066597445832, "grad_norm": 786.5516967773438, "learning_rate": 4.348020206938672e-05, "loss": 87.8153, "step": 77920 }, { "epoch": 0.31484706100995086, "grad_norm": 739.66015625, "learning_rate": 4.347785101067157e-05, "loss": 58.632, "step": 77930 }, { "epoch": 0.31488746227531844, "grad_norm": 578.0338134765625, "learning_rate": 4.347549959172121e-05, "loss": 80.8651, "step": 77940 }, { "epoch": 0.3149278635406861, "grad_norm": 1119.5306396484375, "learning_rate": 4.347314781258147e-05, "loss": 68.4437, "step": 77950 }, { "epoch": 0.3149682648060537, "grad_norm": 0.0, "learning_rate": 4.3470795673298206e-05, "loss": 57.3587, "step": 77960 }, { "epoch": 0.31500866607142136, "grad_norm": 681.5150146484375, "learning_rate": 4.3468443173917267e-05, "loss": 52.9524, "step": 77970 }, { "epoch": 0.315049067336789, "grad_norm": 412.8719787597656, "learning_rate": 4.346609031448452e-05, "loss": 36.4723, "step": 77980 }, { "epoch": 0.31508946860215664, "grad_norm": 945.4547119140625, "learning_rate": 4.346373709504584e-05, "loss": 67.0522, "step": 77990 }, { "epoch": 0.3151298698675242, "grad_norm": 992.1616821289062, "learning_rate": 4.3461383515647106e-05, "loss": 78.1435, "step": 78000 }, { "epoch": 0.31517027113289187, "grad_norm": 503.8204650878906, "learning_rate": 4.345902957633418e-05, "loss": 48.7498, "step": 78010 }, { "epoch": 0.3152106723982595, "grad_norm": 780.5662841796875, "learning_rate": 4.3456675277152973e-05, "loss": 56.2306, "step": 78020 }, { "epoch": 0.31525107366362715, "grad_norm": 454.04132080078125, "learning_rate": 4.345432061814938e-05, "loss": 85.0091, "step": 78030 }, { "epoch": 0.3152914749289948, "grad_norm": 1036.3134765625, "learning_rate": 4.345196559936932e-05, "loss": 53.9506, "step": 78040 }, { "epoch": 0.3153318761943624, "grad_norm": 929.426513671875, "learning_rate": 4.344961022085867e-05, "loss": 70.4333, "step": 78050 }, { "epoch": 0.31537227745973007, "grad_norm": 597.806640625, "learning_rate": 4.344725448266338e-05, "loss": 41.9415, "step": 78060 }, { "epoch": 0.31541267872509765, "grad_norm": 391.077392578125, "learning_rate": 4.3444898384829364e-05, "loss": 54.7683, "step": 78070 }, { "epoch": 0.3154530799904653, "grad_norm": 873.1908569335938, "learning_rate": 4.3442541927402566e-05, "loss": 60.2897, "step": 78080 }, { "epoch": 0.31549348125583293, "grad_norm": 464.908447265625, "learning_rate": 4.344018511042891e-05, "loss": 57.7049, "step": 78090 }, { "epoch": 0.31553388252120057, "grad_norm": 481.4813537597656, "learning_rate": 4.343782793395435e-05, "loss": 67.3648, "step": 78100 }, { "epoch": 0.3155742837865682, "grad_norm": 1816.134521484375, "learning_rate": 4.343547039802485e-05, "loss": 79.2945, "step": 78110 }, { "epoch": 0.31561468505193585, "grad_norm": 561.038330078125, "learning_rate": 4.3433112502686355e-05, "loss": 50.8454, "step": 78120 }, { "epoch": 0.31565508631730343, "grad_norm": 685.545654296875, "learning_rate": 4.3430754247984845e-05, "loss": 54.5997, "step": 78130 }, { "epoch": 0.3156954875826711, "grad_norm": 618.0608520507812, "learning_rate": 4.342839563396629e-05, "loss": 76.7737, "step": 78140 }, { "epoch": 0.3157358888480387, "grad_norm": 1280.976806640625, "learning_rate": 4.3426036660676686e-05, "loss": 54.933, "step": 78150 }, { "epoch": 0.31577629011340635, "grad_norm": 650.5822143554688, "learning_rate": 4.3423677328161996e-05, "loss": 71.7501, "step": 78160 }, { "epoch": 0.315816691378774, "grad_norm": 597.2694091796875, "learning_rate": 4.342131763646824e-05, "loss": 68.88, "step": 78170 }, { "epoch": 0.31585709264414163, "grad_norm": 1208.90576171875, "learning_rate": 4.341895758564141e-05, "loss": 67.8843, "step": 78180 }, { "epoch": 0.3158974939095092, "grad_norm": 830.9584350585938, "learning_rate": 4.3416597175727514e-05, "loss": 89.2304, "step": 78190 }, { "epoch": 0.31593789517487686, "grad_norm": 1917.3353271484375, "learning_rate": 4.3414236406772584e-05, "loss": 111.667, "step": 78200 }, { "epoch": 0.3159782964402445, "grad_norm": 924.7677612304688, "learning_rate": 4.3411875278822635e-05, "loss": 49.8145, "step": 78210 }, { "epoch": 0.31601869770561214, "grad_norm": 469.1351318359375, "learning_rate": 4.340951379192369e-05, "loss": 116.5447, "step": 78220 }, { "epoch": 0.3160590989709798, "grad_norm": 495.249755859375, "learning_rate": 4.34071519461218e-05, "loss": 63.0928, "step": 78230 }, { "epoch": 0.3160995002363474, "grad_norm": 1259.7349853515625, "learning_rate": 4.3404789741463e-05, "loss": 54.0104, "step": 78240 }, { "epoch": 0.31613990150171506, "grad_norm": 512.0302124023438, "learning_rate": 4.3402427177993366e-05, "loss": 48.6828, "step": 78250 }, { "epoch": 0.31618030276708264, "grad_norm": 1092.3133544921875, "learning_rate": 4.340006425575892e-05, "loss": 51.555, "step": 78260 }, { "epoch": 0.3162207040324503, "grad_norm": 668.2493896484375, "learning_rate": 4.339770097480576e-05, "loss": 67.2536, "step": 78270 }, { "epoch": 0.3162611052978179, "grad_norm": 832.0966796875, "learning_rate": 4.3395337335179945e-05, "loss": 78.7102, "step": 78280 }, { "epoch": 0.31630150656318556, "grad_norm": 931.7671508789062, "learning_rate": 4.339297333692756e-05, "loss": 57.0442, "step": 78290 }, { "epoch": 0.3163419078285532, "grad_norm": 1212.2379150390625, "learning_rate": 4.339060898009469e-05, "loss": 83.3376, "step": 78300 }, { "epoch": 0.31638230909392084, "grad_norm": 393.2463073730469, "learning_rate": 4.338824426472743e-05, "loss": 48.1462, "step": 78310 }, { "epoch": 0.31642271035928843, "grad_norm": 422.79998779296875, "learning_rate": 4.338587919087187e-05, "loss": 57.0146, "step": 78320 }, { "epoch": 0.31646311162465607, "grad_norm": 1542.5203857421875, "learning_rate": 4.3383513758574143e-05, "loss": 69.819, "step": 78330 }, { "epoch": 0.3165035128900237, "grad_norm": 858.1940307617188, "learning_rate": 4.338114796788035e-05, "loss": 95.8173, "step": 78340 }, { "epoch": 0.31654391415539135, "grad_norm": 393.74505615234375, "learning_rate": 4.337878181883661e-05, "loss": 46.5559, "step": 78350 }, { "epoch": 0.316584315420759, "grad_norm": 786.9148559570312, "learning_rate": 4.3376415311489056e-05, "loss": 68.8837, "step": 78360 }, { "epoch": 0.31662471668612663, "grad_norm": 1402.4737548828125, "learning_rate": 4.337404844588382e-05, "loss": 89.8514, "step": 78370 }, { "epoch": 0.31666511795149427, "grad_norm": 773.1746826171875, "learning_rate": 4.337168122206706e-05, "loss": 74.1317, "step": 78380 }, { "epoch": 0.31670551921686185, "grad_norm": 653.9239501953125, "learning_rate": 4.3369313640084916e-05, "loss": 51.6219, "step": 78390 }, { "epoch": 0.3167459204822295, "grad_norm": 664.2848510742188, "learning_rate": 4.336694569998354e-05, "loss": 74.1221, "step": 78400 }, { "epoch": 0.31678632174759713, "grad_norm": 609.3822021484375, "learning_rate": 4.3364577401809105e-05, "loss": 71.547, "step": 78410 }, { "epoch": 0.3168267230129648, "grad_norm": 1231.6363525390625, "learning_rate": 4.336220874560778e-05, "loss": 77.0097, "step": 78420 }, { "epoch": 0.3168671242783324, "grad_norm": 205.0146484375, "learning_rate": 4.3359839731425735e-05, "loss": 50.0911, "step": 78430 }, { "epoch": 0.31690752554370005, "grad_norm": 389.177978515625, "learning_rate": 4.335747035930916e-05, "loss": 59.2829, "step": 78440 }, { "epoch": 0.31694792680906764, "grad_norm": 600.1491088867188, "learning_rate": 4.3355100629304254e-05, "loss": 79.876, "step": 78450 }, { "epoch": 0.3169883280744353, "grad_norm": 544.903564453125, "learning_rate": 4.335273054145722e-05, "loss": 82.2558, "step": 78460 }, { "epoch": 0.3170287293398029, "grad_norm": 909.1273803710938, "learning_rate": 4.335036009581425e-05, "loss": 70.1611, "step": 78470 }, { "epoch": 0.31706913060517056, "grad_norm": 639.6483764648438, "learning_rate": 4.334798929242155e-05, "loss": 69.5685, "step": 78480 }, { "epoch": 0.3171095318705382, "grad_norm": 615.6561889648438, "learning_rate": 4.3345618131325374e-05, "loss": 76.2855, "step": 78490 }, { "epoch": 0.31714993313590584, "grad_norm": 3859.427490234375, "learning_rate": 4.334324661257191e-05, "loss": 76.5629, "step": 78500 }, { "epoch": 0.3171903344012734, "grad_norm": 1610.2098388671875, "learning_rate": 4.334087473620742e-05, "loss": 103.2507, "step": 78510 }, { "epoch": 0.31723073566664106, "grad_norm": 588.2677612304688, "learning_rate": 4.3338502502278134e-05, "loss": 51.7838, "step": 78520 }, { "epoch": 0.3172711369320087, "grad_norm": 192.1394500732422, "learning_rate": 4.333612991083029e-05, "loss": 68.0169, "step": 78530 }, { "epoch": 0.31731153819737634, "grad_norm": 960.3283081054688, "learning_rate": 4.3333756961910166e-05, "loss": 47.2101, "step": 78540 }, { "epoch": 0.317351939462744, "grad_norm": 1771.0201416015625, "learning_rate": 4.3331383655564006e-05, "loss": 60.5402, "step": 78550 }, { "epoch": 0.3173923407281116, "grad_norm": 1430.1416015625, "learning_rate": 4.3329009991838084e-05, "loss": 53.3842, "step": 78560 }, { "epoch": 0.31743274199347926, "grad_norm": 429.119873046875, "learning_rate": 4.3326635970778676e-05, "loss": 48.8891, "step": 78570 }, { "epoch": 0.31747314325884685, "grad_norm": 798.0321655273438, "learning_rate": 4.3324261592432056e-05, "loss": 68.9384, "step": 78580 }, { "epoch": 0.3175135445242145, "grad_norm": 999.2288208007812, "learning_rate": 4.3321886856844534e-05, "loss": 57.1692, "step": 78590 }, { "epoch": 0.3175539457895821, "grad_norm": 923.92724609375, "learning_rate": 4.331951176406239e-05, "loss": 43.5955, "step": 78600 }, { "epoch": 0.31759434705494977, "grad_norm": 967.3873901367188, "learning_rate": 4.331713631413194e-05, "loss": 47.511, "step": 78610 }, { "epoch": 0.3176347483203174, "grad_norm": 884.6947021484375, "learning_rate": 4.331476050709948e-05, "loss": 57.999, "step": 78620 }, { "epoch": 0.31767514958568505, "grad_norm": 611.1873168945312, "learning_rate": 4.331238434301134e-05, "loss": 77.5249, "step": 78630 }, { "epoch": 0.31771555085105263, "grad_norm": 913.7862548828125, "learning_rate": 4.3310007821913836e-05, "loss": 54.6169, "step": 78640 }, { "epoch": 0.31775595211642027, "grad_norm": 1171.2972412109375, "learning_rate": 4.330763094385329e-05, "loss": 80.6045, "step": 78650 }, { "epoch": 0.3177963533817879, "grad_norm": 507.2369384765625, "learning_rate": 4.330525370887607e-05, "loss": 46.0367, "step": 78660 }, { "epoch": 0.31783675464715555, "grad_norm": 564.16259765625, "learning_rate": 4.33028761170285e-05, "loss": 73.1167, "step": 78670 }, { "epoch": 0.3178771559125232, "grad_norm": 897.42919921875, "learning_rate": 4.330049816835694e-05, "loss": 92.9207, "step": 78680 }, { "epoch": 0.31791755717789083, "grad_norm": 3515.646240234375, "learning_rate": 4.3298119862907744e-05, "loss": 119.0089, "step": 78690 }, { "epoch": 0.31795795844325847, "grad_norm": 1024.9483642578125, "learning_rate": 4.329574120072728e-05, "loss": 58.4423, "step": 78700 }, { "epoch": 0.31799835970862605, "grad_norm": 969.68115234375, "learning_rate": 4.329336218186192e-05, "loss": 55.7534, "step": 78710 }, { "epoch": 0.3180387609739937, "grad_norm": 602.466552734375, "learning_rate": 4.3290982806358046e-05, "loss": 75.1919, "step": 78720 }, { "epoch": 0.31807916223936133, "grad_norm": 599.2578735351562, "learning_rate": 4.3288603074262054e-05, "loss": 105.2766, "step": 78730 }, { "epoch": 0.318119563504729, "grad_norm": 790.10693359375, "learning_rate": 4.328622298562033e-05, "loss": 53.7875, "step": 78740 }, { "epoch": 0.3181599647700966, "grad_norm": 551.5433349609375, "learning_rate": 4.3283842540479264e-05, "loss": 63.0458, "step": 78750 }, { "epoch": 0.31820036603546425, "grad_norm": 0.0, "learning_rate": 4.3281461738885274e-05, "loss": 61.5794, "step": 78760 }, { "epoch": 0.31824076730083184, "grad_norm": 681.5363159179688, "learning_rate": 4.327908058088479e-05, "loss": 67.191, "step": 78770 }, { "epoch": 0.3182811685661995, "grad_norm": 1063.3834228515625, "learning_rate": 4.327669906652421e-05, "loss": 82.7017, "step": 78780 }, { "epoch": 0.3183215698315671, "grad_norm": 500.39361572265625, "learning_rate": 4.327431719584997e-05, "loss": 85.2169, "step": 78790 }, { "epoch": 0.31836197109693476, "grad_norm": 1306.898681640625, "learning_rate": 4.3271934968908514e-05, "loss": 72.2316, "step": 78800 }, { "epoch": 0.3184023723623024, "grad_norm": 815.1703491210938, "learning_rate": 4.326955238574627e-05, "loss": 50.3086, "step": 78810 }, { "epoch": 0.31844277362767004, "grad_norm": 460.31005859375, "learning_rate": 4.32671694464097e-05, "loss": 64.6161, "step": 78820 }, { "epoch": 0.3184831748930376, "grad_norm": 2179.723388671875, "learning_rate": 4.326478615094526e-05, "loss": 49.6086, "step": 78830 }, { "epoch": 0.31852357615840526, "grad_norm": 1665.5799560546875, "learning_rate": 4.3262402499399404e-05, "loss": 97.2382, "step": 78840 }, { "epoch": 0.3185639774237729, "grad_norm": 1162.87841796875, "learning_rate": 4.326001849181862e-05, "loss": 57.7263, "step": 78850 }, { "epoch": 0.31860437868914054, "grad_norm": 333.8135681152344, "learning_rate": 4.325763412824937e-05, "loss": 49.9047, "step": 78860 }, { "epoch": 0.3186447799545082, "grad_norm": 510.8147888183594, "learning_rate": 4.325524940873814e-05, "loss": 82.3427, "step": 78870 }, { "epoch": 0.3186851812198758, "grad_norm": 434.4716491699219, "learning_rate": 4.325286433333142e-05, "loss": 71.3787, "step": 78880 }, { "epoch": 0.31872558248524346, "grad_norm": 725.61279296875, "learning_rate": 4.325047890207572e-05, "loss": 63.1463, "step": 78890 }, { "epoch": 0.31876598375061105, "grad_norm": 366.7152404785156, "learning_rate": 4.324809311501754e-05, "loss": 66.2424, "step": 78900 }, { "epoch": 0.3188063850159787, "grad_norm": 1318.6060791015625, "learning_rate": 4.3245706972203385e-05, "loss": 67.3569, "step": 78910 }, { "epoch": 0.31884678628134633, "grad_norm": 727.7313842773438, "learning_rate": 4.3243320473679785e-05, "loss": 63.7311, "step": 78920 }, { "epoch": 0.31888718754671397, "grad_norm": 1180.8690185546875, "learning_rate": 4.324093361949325e-05, "loss": 54.1359, "step": 78930 }, { "epoch": 0.3189275888120816, "grad_norm": 652.950439453125, "learning_rate": 4.323854640969033e-05, "loss": 66.1783, "step": 78940 }, { "epoch": 0.31896799007744925, "grad_norm": 554.9948120117188, "learning_rate": 4.323615884431756e-05, "loss": 77.2896, "step": 78950 }, { "epoch": 0.31900839134281683, "grad_norm": 657.7294921875, "learning_rate": 4.323377092342148e-05, "loss": 56.7781, "step": 78960 }, { "epoch": 0.3190487926081845, "grad_norm": 1056.711181640625, "learning_rate": 4.323138264704864e-05, "loss": 51.1476, "step": 78970 }, { "epoch": 0.3190891938735521, "grad_norm": 1820.9912109375, "learning_rate": 4.322899401524563e-05, "loss": 90.834, "step": 78980 }, { "epoch": 0.31912959513891975, "grad_norm": 1768.5189208984375, "learning_rate": 4.322660502805899e-05, "loss": 79.8078, "step": 78990 }, { "epoch": 0.3191699964042874, "grad_norm": 601.8983154296875, "learning_rate": 4.3224215685535294e-05, "loss": 58.5694, "step": 79000 }, { "epoch": 0.31921039766965503, "grad_norm": 1303.4908447265625, "learning_rate": 4.322182598772113e-05, "loss": 45.729, "step": 79010 }, { "epoch": 0.3192507989350226, "grad_norm": 480.8334655761719, "learning_rate": 4.321943593466309e-05, "loss": 84.9618, "step": 79020 }, { "epoch": 0.31929120020039026, "grad_norm": 932.1970825195312, "learning_rate": 4.321704552640777e-05, "loss": 67.6896, "step": 79030 }, { "epoch": 0.3193316014657579, "grad_norm": 1057.0179443359375, "learning_rate": 4.321465476300177e-05, "loss": 100.6307, "step": 79040 }, { "epoch": 0.31937200273112554, "grad_norm": 996.9810791015625, "learning_rate": 4.321226364449169e-05, "loss": 71.2474, "step": 79050 }, { "epoch": 0.3194124039964932, "grad_norm": 568.8705444335938, "learning_rate": 4.320987217092416e-05, "loss": 43.2181, "step": 79060 }, { "epoch": 0.3194528052618608, "grad_norm": 420.3672180175781, "learning_rate": 4.320748034234579e-05, "loss": 57.1254, "step": 79070 }, { "epoch": 0.31949320652722846, "grad_norm": 1757.289306640625, "learning_rate": 4.3205088158803226e-05, "loss": 68.5235, "step": 79080 }, { "epoch": 0.31953360779259604, "grad_norm": 660.7200927734375, "learning_rate": 4.3202695620343083e-05, "loss": 50.2219, "step": 79090 }, { "epoch": 0.3195740090579637, "grad_norm": 1262.357421875, "learning_rate": 4.320030272701203e-05, "loss": 58.4075, "step": 79100 }, { "epoch": 0.3196144103233313, "grad_norm": 898.780029296875, "learning_rate": 4.31979094788567e-05, "loss": 88.9766, "step": 79110 }, { "epoch": 0.31965481158869896, "grad_norm": 535.6188354492188, "learning_rate": 4.319551587592376e-05, "loss": 30.2677, "step": 79120 }, { "epoch": 0.3196952128540666, "grad_norm": 559.2113647460938, "learning_rate": 4.319312191825987e-05, "loss": 78.3262, "step": 79130 }, { "epoch": 0.31973561411943424, "grad_norm": 952.9019775390625, "learning_rate": 4.31907276059117e-05, "loss": 67.9382, "step": 79140 }, { "epoch": 0.3197760153848018, "grad_norm": 959.8912353515625, "learning_rate": 4.318833293892592e-05, "loss": 68.8999, "step": 79150 }, { "epoch": 0.31981641665016947, "grad_norm": 985.2177124023438, "learning_rate": 4.318593791734924e-05, "loss": 73.2616, "step": 79160 }, { "epoch": 0.3198568179155371, "grad_norm": 1765.3321533203125, "learning_rate": 4.318354254122833e-05, "loss": 101.728, "step": 79170 }, { "epoch": 0.31989721918090475, "grad_norm": 2424.275390625, "learning_rate": 4.31811468106099e-05, "loss": 40.6484, "step": 79180 }, { "epoch": 0.3199376204462724, "grad_norm": 529.9296264648438, "learning_rate": 4.317875072554065e-05, "loss": 56.8446, "step": 79190 }, { "epoch": 0.31997802171164, "grad_norm": 988.1986083984375, "learning_rate": 4.31763542860673e-05, "loss": 89.1671, "step": 79200 }, { "epoch": 0.32001842297700767, "grad_norm": 954.2215576171875, "learning_rate": 4.317395749223656e-05, "loss": 62.5503, "step": 79210 }, { "epoch": 0.32005882424237525, "grad_norm": 1343.7386474609375, "learning_rate": 4.3171560344095164e-05, "loss": 85.5978, "step": 79220 }, { "epoch": 0.3200992255077429, "grad_norm": 992.0386962890625, "learning_rate": 4.3169162841689846e-05, "loss": 77.3141, "step": 79230 }, { "epoch": 0.32013962677311053, "grad_norm": 684.0596313476562, "learning_rate": 4.3166764985067343e-05, "loss": 68.0206, "step": 79240 }, { "epoch": 0.32018002803847817, "grad_norm": 544.3250732421875, "learning_rate": 4.31643667742744e-05, "loss": 64.4232, "step": 79250 }, { "epoch": 0.3202204293038458, "grad_norm": 485.05413818359375, "learning_rate": 4.3161968209357776e-05, "loss": 72.3071, "step": 79260 }, { "epoch": 0.32026083056921345, "grad_norm": 1109.7979736328125, "learning_rate": 4.315956929036423e-05, "loss": 87.8763, "step": 79270 }, { "epoch": 0.32030123183458103, "grad_norm": 735.7939453125, "learning_rate": 4.3157170017340545e-05, "loss": 60.9775, "step": 79280 }, { "epoch": 0.3203416330999487, "grad_norm": 515.8373413085938, "learning_rate": 4.3154770390333463e-05, "loss": 57.4723, "step": 79290 }, { "epoch": 0.3203820343653163, "grad_norm": 11893.99609375, "learning_rate": 4.3152370409389795e-05, "loss": 97.6898, "step": 79300 }, { "epoch": 0.32042243563068395, "grad_norm": 583.2001953125, "learning_rate": 4.3149970074556324e-05, "loss": 104.1178, "step": 79310 }, { "epoch": 0.3204628368960516, "grad_norm": 1154.815185546875, "learning_rate": 4.314756938587984e-05, "loss": 47.7879, "step": 79320 }, { "epoch": 0.32050323816141923, "grad_norm": 1132.3023681640625, "learning_rate": 4.314516834340715e-05, "loss": 59.4125, "step": 79330 }, { "epoch": 0.3205436394267868, "grad_norm": 530.7276611328125, "learning_rate": 4.3142766947185056e-05, "loss": 76.4587, "step": 79340 }, { "epoch": 0.32058404069215446, "grad_norm": 943.7814331054688, "learning_rate": 4.314036519726038e-05, "loss": 83.0291, "step": 79350 }, { "epoch": 0.3206244419575221, "grad_norm": 1113.9444580078125, "learning_rate": 4.3137963093679945e-05, "loss": 62.54, "step": 79360 }, { "epoch": 0.32066484322288974, "grad_norm": 839.0965576171875, "learning_rate": 4.313556063649059e-05, "loss": 65.0847, "step": 79370 }, { "epoch": 0.3207052444882574, "grad_norm": 645.9111938476562, "learning_rate": 4.313315782573913e-05, "loss": 66.1717, "step": 79380 }, { "epoch": 0.320745645753625, "grad_norm": 1160.1456298828125, "learning_rate": 4.3130754661472435e-05, "loss": 72.6717, "step": 79390 }, { "epoch": 0.32078604701899266, "grad_norm": 861.6610107421875, "learning_rate": 4.3128351143737335e-05, "loss": 94.8734, "step": 79400 }, { "epoch": 0.32082644828436024, "grad_norm": 1239.6209716796875, "learning_rate": 4.31259472725807e-05, "loss": 65.5956, "step": 79410 }, { "epoch": 0.3208668495497279, "grad_norm": 1918.2100830078125, "learning_rate": 4.312354304804939e-05, "loss": 98.4281, "step": 79420 }, { "epoch": 0.3209072508150955, "grad_norm": 496.2858581542969, "learning_rate": 4.312113847019028e-05, "loss": 64.9097, "step": 79430 }, { "epoch": 0.32094765208046316, "grad_norm": 568.856689453125, "learning_rate": 4.3118733539050244e-05, "loss": 80.5664, "step": 79440 }, { "epoch": 0.3209880533458308, "grad_norm": 647.578369140625, "learning_rate": 4.311632825467617e-05, "loss": 81.0642, "step": 79450 }, { "epoch": 0.32102845461119844, "grad_norm": 606.3375854492188, "learning_rate": 4.311392261711495e-05, "loss": 87.3034, "step": 79460 }, { "epoch": 0.32106885587656603, "grad_norm": 486.86273193359375, "learning_rate": 4.3111516626413485e-05, "loss": 36.4118, "step": 79470 }, { "epoch": 0.32110925714193367, "grad_norm": 527.7968139648438, "learning_rate": 4.310911028261867e-05, "loss": 75.8701, "step": 79480 }, { "epoch": 0.3211496584073013, "grad_norm": 899.1015014648438, "learning_rate": 4.310670358577744e-05, "loss": 64.9072, "step": 79490 }, { "epoch": 0.32119005967266895, "grad_norm": 491.0596923828125, "learning_rate": 4.3104296535936695e-05, "loss": 62.814, "step": 79500 }, { "epoch": 0.3212304609380366, "grad_norm": 643.3750610351562, "learning_rate": 4.3101889133143365e-05, "loss": 62.7745, "step": 79510 }, { "epoch": 0.32127086220340423, "grad_norm": 584.9881591796875, "learning_rate": 4.3099481377444384e-05, "loss": 70.7787, "step": 79520 }, { "epoch": 0.32131126346877187, "grad_norm": 0.0, "learning_rate": 4.30970732688867e-05, "loss": 72.2548, "step": 79530 }, { "epoch": 0.32135166473413945, "grad_norm": 551.8460693359375, "learning_rate": 4.309466480751726e-05, "loss": 57.843, "step": 79540 }, { "epoch": 0.3213920659995071, "grad_norm": 1137.9078369140625, "learning_rate": 4.309225599338301e-05, "loss": 67.0156, "step": 79550 }, { "epoch": 0.32143246726487473, "grad_norm": 1040.0809326171875, "learning_rate": 4.308984682653092e-05, "loss": 51.4768, "step": 79560 }, { "epoch": 0.3214728685302424, "grad_norm": 496.1695556640625, "learning_rate": 4.308743730700795e-05, "loss": 49.8229, "step": 79570 }, { "epoch": 0.32151326979561, "grad_norm": 928.9419555664062, "learning_rate": 4.308502743486107e-05, "loss": 68.5493, "step": 79580 }, { "epoch": 0.32155367106097765, "grad_norm": 554.7418212890625, "learning_rate": 4.308261721013728e-05, "loss": 58.4424, "step": 79590 }, { "epoch": 0.32159407232634524, "grad_norm": 530.4016723632812, "learning_rate": 4.3080206632883554e-05, "loss": 71.472, "step": 79600 }, { "epoch": 0.3216344735917129, "grad_norm": 823.0756225585938, "learning_rate": 4.307779570314689e-05, "loss": 54.4288, "step": 79610 }, { "epoch": 0.3216748748570805, "grad_norm": 523.12744140625, "learning_rate": 4.307538442097429e-05, "loss": 52.3814, "step": 79620 }, { "epoch": 0.32171527612244816, "grad_norm": 1036.486083984375, "learning_rate": 4.307297278641277e-05, "loss": 56.4284, "step": 79630 }, { "epoch": 0.3217556773878158, "grad_norm": 886.7937622070312, "learning_rate": 4.307056079950934e-05, "loss": 61.4247, "step": 79640 }, { "epoch": 0.32179607865318344, "grad_norm": 1947.759033203125, "learning_rate": 4.306814846031102e-05, "loss": 69.1207, "step": 79650 }, { "epoch": 0.321836479918551, "grad_norm": 1469.478759765625, "learning_rate": 4.306573576886484e-05, "loss": 76.3007, "step": 79660 }, { "epoch": 0.32187688118391866, "grad_norm": 883.7667846679688, "learning_rate": 4.306332272521785e-05, "loss": 45.0061, "step": 79670 }, { "epoch": 0.3219172824492863, "grad_norm": 418.3061828613281, "learning_rate": 4.306090932941708e-05, "loss": 45.161, "step": 79680 }, { "epoch": 0.32195768371465394, "grad_norm": 504.8469543457031, "learning_rate": 4.3058495581509586e-05, "loss": 89.2983, "step": 79690 }, { "epoch": 0.3219980849800216, "grad_norm": 510.06805419921875, "learning_rate": 4.305608148154242e-05, "loss": 57.7559, "step": 79700 }, { "epoch": 0.3220384862453892, "grad_norm": 1332.9613037109375, "learning_rate": 4.305366702956265e-05, "loss": 54.3312, "step": 79710 }, { "epoch": 0.32207888751075686, "grad_norm": 608.1598510742188, "learning_rate": 4.305125222561736e-05, "loss": 63.2103, "step": 79720 }, { "epoch": 0.32211928877612445, "grad_norm": 504.0339050292969, "learning_rate": 4.304883706975359e-05, "loss": 45.3614, "step": 79730 }, { "epoch": 0.3221596900414921, "grad_norm": 865.779052734375, "learning_rate": 4.304642156201847e-05, "loss": 77.652, "step": 79740 }, { "epoch": 0.3222000913068597, "grad_norm": 1158.892333984375, "learning_rate": 4.304400570245906e-05, "loss": 46.2974, "step": 79750 }, { "epoch": 0.32224049257222737, "grad_norm": 1002.193603515625, "learning_rate": 4.304158949112247e-05, "loss": 78.2484, "step": 79760 }, { "epoch": 0.322280893837595, "grad_norm": 1321.477294921875, "learning_rate": 4.3039172928055805e-05, "loss": 72.8844, "step": 79770 }, { "epoch": 0.32232129510296265, "grad_norm": 188.91424560546875, "learning_rate": 4.303675601330618e-05, "loss": 47.1185, "step": 79780 }, { "epoch": 0.32236169636833023, "grad_norm": 535.0440673828125, "learning_rate": 4.3034338746920707e-05, "loss": 63.9909, "step": 79790 }, { "epoch": 0.32240209763369787, "grad_norm": 474.7372741699219, "learning_rate": 4.303192112894652e-05, "loss": 73.8586, "step": 79800 }, { "epoch": 0.3224424988990655, "grad_norm": 813.8701782226562, "learning_rate": 4.302950315943074e-05, "loss": 90.6143, "step": 79810 }, { "epoch": 0.32248290016443315, "grad_norm": 344.40325927734375, "learning_rate": 4.3027084838420516e-05, "loss": 36.8543, "step": 79820 }, { "epoch": 0.3225233014298008, "grad_norm": 300.6861267089844, "learning_rate": 4.302466616596299e-05, "loss": 48.8195, "step": 79830 }, { "epoch": 0.32256370269516843, "grad_norm": 782.69970703125, "learning_rate": 4.302224714210532e-05, "loss": 59.1578, "step": 79840 }, { "epoch": 0.32260410396053607, "grad_norm": 801.529541015625, "learning_rate": 4.301982776689467e-05, "loss": 44.7871, "step": 79850 }, { "epoch": 0.32264450522590365, "grad_norm": 926.8488159179688, "learning_rate": 4.301740804037819e-05, "loss": 65.3786, "step": 79860 }, { "epoch": 0.3226849064912713, "grad_norm": 917.0177001953125, "learning_rate": 4.301498796260307e-05, "loss": 55.9066, "step": 79870 }, { "epoch": 0.32272530775663893, "grad_norm": 0.0, "learning_rate": 4.301256753361649e-05, "loss": 50.5934, "step": 79880 }, { "epoch": 0.3227657090220066, "grad_norm": 771.5294799804688, "learning_rate": 4.301014675346562e-05, "loss": 109.5298, "step": 79890 }, { "epoch": 0.3228061102873742, "grad_norm": 562.3530883789062, "learning_rate": 4.3007725622197674e-05, "loss": 43.964, "step": 79900 }, { "epoch": 0.32284651155274185, "grad_norm": 468.8666076660156, "learning_rate": 4.300530413985985e-05, "loss": 49.0986, "step": 79910 }, { "epoch": 0.32288691281810944, "grad_norm": 678.0904541015625, "learning_rate": 4.3002882306499345e-05, "loss": 91.667, "step": 79920 }, { "epoch": 0.3229273140834771, "grad_norm": 601.5751342773438, "learning_rate": 4.300046012216338e-05, "loss": 52.2733, "step": 79930 }, { "epoch": 0.3229677153488447, "grad_norm": 1330.5103759765625, "learning_rate": 4.299803758689919e-05, "loss": 82.389, "step": 79940 }, { "epoch": 0.32300811661421236, "grad_norm": 509.53277587890625, "learning_rate": 4.299561470075397e-05, "loss": 78.0836, "step": 79950 }, { "epoch": 0.32304851787958, "grad_norm": 589.202392578125, "learning_rate": 4.2993191463774997e-05, "loss": 60.7983, "step": 79960 }, { "epoch": 0.32308891914494764, "grad_norm": 738.194580078125, "learning_rate": 4.299076787600948e-05, "loss": 77.6573, "step": 79970 }, { "epoch": 0.3231293204103152, "grad_norm": 1307.095947265625, "learning_rate": 4.2988343937504686e-05, "loss": 61.8344, "step": 79980 }, { "epoch": 0.32316972167568286, "grad_norm": 656.2578125, "learning_rate": 4.298591964830787e-05, "loss": 95.2794, "step": 79990 }, { "epoch": 0.3232101229410505, "grad_norm": 1574.1217041015625, "learning_rate": 4.2983495008466276e-05, "loss": 46.9904, "step": 80000 }, { "epoch": 0.32325052420641814, "grad_norm": 678.7640380859375, "learning_rate": 4.2981070018027204e-05, "loss": 83.523, "step": 80010 }, { "epoch": 0.3232909254717858, "grad_norm": 802.5941772460938, "learning_rate": 4.29786446770379e-05, "loss": 47.3945, "step": 80020 }, { "epoch": 0.3233313267371534, "grad_norm": 880.6734008789062, "learning_rate": 4.297621898554568e-05, "loss": 69.5324, "step": 80030 }, { "epoch": 0.32337172800252106, "grad_norm": 1087.5975341796875, "learning_rate": 4.297379294359781e-05, "loss": 68.7424, "step": 80040 }, { "epoch": 0.32341212926788865, "grad_norm": 1480.21826171875, "learning_rate": 4.297136655124159e-05, "loss": 68.1467, "step": 80050 }, { "epoch": 0.3234525305332563, "grad_norm": 375.3006286621094, "learning_rate": 4.2968939808524323e-05, "loss": 58.7787, "step": 80060 }, { "epoch": 0.32349293179862393, "grad_norm": 811.9970092773438, "learning_rate": 4.296651271549333e-05, "loss": 72.5431, "step": 80070 }, { "epoch": 0.32353333306399157, "grad_norm": 533.86474609375, "learning_rate": 4.296408527219592e-05, "loss": 87.0758, "step": 80080 }, { "epoch": 0.3235737343293592, "grad_norm": 850.876220703125, "learning_rate": 4.296165747867942e-05, "loss": 41.6743, "step": 80090 }, { "epoch": 0.32361413559472685, "grad_norm": 924.040283203125, "learning_rate": 4.2959229334991156e-05, "loss": 69.6225, "step": 80100 }, { "epoch": 0.32365453686009443, "grad_norm": 351.706298828125, "learning_rate": 4.295680084117847e-05, "loss": 60.5032, "step": 80110 }, { "epoch": 0.3236949381254621, "grad_norm": 893.626708984375, "learning_rate": 4.295437199728871e-05, "loss": 77.7679, "step": 80120 }, { "epoch": 0.3237353393908297, "grad_norm": 652.7449951171875, "learning_rate": 4.2951942803369225e-05, "loss": 54.3826, "step": 80130 }, { "epoch": 0.32377574065619735, "grad_norm": 1125.824462890625, "learning_rate": 4.294951325946737e-05, "loss": 93.5955, "step": 80140 }, { "epoch": 0.323816141921565, "grad_norm": 1012.917236328125, "learning_rate": 4.2947083365630514e-05, "loss": 61.9179, "step": 80150 }, { "epoch": 0.32385654318693263, "grad_norm": 807.2591552734375, "learning_rate": 4.294465312190603e-05, "loss": 46.5419, "step": 80160 }, { "epoch": 0.3238969444523003, "grad_norm": 317.6124572753906, "learning_rate": 4.294222252834129e-05, "loss": 68.4578, "step": 80170 }, { "epoch": 0.32393734571766786, "grad_norm": 2099.4423828125, "learning_rate": 4.293979158498369e-05, "loss": 87.771, "step": 80180 }, { "epoch": 0.3239777469830355, "grad_norm": 482.209228515625, "learning_rate": 4.293736029188061e-05, "loss": 73.7765, "step": 80190 }, { "epoch": 0.32401814824840314, "grad_norm": 1821.595947265625, "learning_rate": 4.293492864907947e-05, "loss": 84.6861, "step": 80200 }, { "epoch": 0.3240585495137708, "grad_norm": 640.3442993164062, "learning_rate": 4.293249665662765e-05, "loss": 55.7724, "step": 80210 }, { "epoch": 0.3240989507791384, "grad_norm": 974.443115234375, "learning_rate": 4.293006431457258e-05, "loss": 89.1751, "step": 80220 }, { "epoch": 0.32413935204450606, "grad_norm": 625.10888671875, "learning_rate": 4.2927631622961674e-05, "loss": 77.496, "step": 80230 }, { "epoch": 0.32417975330987364, "grad_norm": 267.2085266113281, "learning_rate": 4.292519858184236e-05, "loss": 54.031, "step": 80240 }, { "epoch": 0.3242201545752413, "grad_norm": 543.6681518554688, "learning_rate": 4.292276519126207e-05, "loss": 61.7213, "step": 80250 }, { "epoch": 0.3242605558406089, "grad_norm": 1130.407958984375, "learning_rate": 4.292033145126825e-05, "loss": 61.5954, "step": 80260 }, { "epoch": 0.32430095710597656, "grad_norm": 590.1688842773438, "learning_rate": 4.2917897361908335e-05, "loss": 54.5805, "step": 80270 }, { "epoch": 0.3243413583713442, "grad_norm": 1286.2869873046875, "learning_rate": 4.291546292322979e-05, "loss": 94.4061, "step": 80280 }, { "epoch": 0.32438175963671184, "grad_norm": 520.0972290039062, "learning_rate": 4.2913028135280076e-05, "loss": 49.1347, "step": 80290 }, { "epoch": 0.3244221609020794, "grad_norm": 775.9698486328125, "learning_rate": 4.291059299810665e-05, "loss": 58.7011, "step": 80300 }, { "epoch": 0.32446256216744707, "grad_norm": 276.6414794921875, "learning_rate": 4.2908157511757e-05, "loss": 70.6107, "step": 80310 }, { "epoch": 0.3245029634328147, "grad_norm": 446.62158203125, "learning_rate": 4.290572167627859e-05, "loss": 59.438, "step": 80320 }, { "epoch": 0.32454336469818235, "grad_norm": 692.0556030273438, "learning_rate": 4.290328549171893e-05, "loss": 62.021, "step": 80330 }, { "epoch": 0.32458376596355, "grad_norm": 446.0535583496094, "learning_rate": 4.2900848958125485e-05, "loss": 56.7572, "step": 80340 }, { "epoch": 0.3246241672289176, "grad_norm": 477.32568359375, "learning_rate": 4.289841207554578e-05, "loss": 110.483, "step": 80350 }, { "epoch": 0.32466456849428527, "grad_norm": 700.8925170898438, "learning_rate": 4.289597484402732e-05, "loss": 65.4878, "step": 80360 }, { "epoch": 0.32470496975965285, "grad_norm": 1176.870849609375, "learning_rate": 4.289353726361762e-05, "loss": 61.7405, "step": 80370 }, { "epoch": 0.3247453710250205, "grad_norm": 574.3339233398438, "learning_rate": 4.289109933436419e-05, "loss": 58.5538, "step": 80380 }, { "epoch": 0.32478577229038813, "grad_norm": 928.0294799804688, "learning_rate": 4.2888661056314574e-05, "loss": 62.1923, "step": 80390 }, { "epoch": 0.32482617355575577, "grad_norm": 1341.2864990234375, "learning_rate": 4.2886222429516296e-05, "loss": 67.9961, "step": 80400 }, { "epoch": 0.3248665748211234, "grad_norm": 575.213134765625, "learning_rate": 4.2883783454016915e-05, "loss": 43.7556, "step": 80410 }, { "epoch": 0.32490697608649105, "grad_norm": 637.3678588867188, "learning_rate": 4.288134412986395e-05, "loss": 72.743, "step": 80420 }, { "epoch": 0.32494737735185864, "grad_norm": 936.275146484375, "learning_rate": 4.287890445710499e-05, "loss": 89.083, "step": 80430 }, { "epoch": 0.3249877786172263, "grad_norm": 587.6963500976562, "learning_rate": 4.287646443578758e-05, "loss": 40.0728, "step": 80440 }, { "epoch": 0.3250281798825939, "grad_norm": 465.3719177246094, "learning_rate": 4.287402406595929e-05, "loss": 53.9417, "step": 80450 }, { "epoch": 0.32506858114796156, "grad_norm": 862.9938354492188, "learning_rate": 4.28715833476677e-05, "loss": 59.9648, "step": 80460 }, { "epoch": 0.3251089824133292, "grad_norm": 902.8772583007812, "learning_rate": 4.2869142280960396e-05, "loss": 53.2347, "step": 80470 }, { "epoch": 0.32514938367869683, "grad_norm": 827.6386108398438, "learning_rate": 4.2866700865884954e-05, "loss": 64.4303, "step": 80480 }, { "epoch": 0.3251897849440645, "grad_norm": 1176.3564453125, "learning_rate": 4.2864259102488984e-05, "loss": 70.9066, "step": 80490 }, { "epoch": 0.32523018620943206, "grad_norm": 2174.75244140625, "learning_rate": 4.2861816990820084e-05, "loss": 82.248, "step": 80500 }, { "epoch": 0.3252705874747997, "grad_norm": 1319.863037109375, "learning_rate": 4.285937453092587e-05, "loss": 73.5724, "step": 80510 }, { "epoch": 0.32531098874016734, "grad_norm": 802.91845703125, "learning_rate": 4.285693172285396e-05, "loss": 62.4872, "step": 80520 }, { "epoch": 0.325351390005535, "grad_norm": 735.5004272460938, "learning_rate": 4.2854488566651965e-05, "loss": 80.5372, "step": 80530 }, { "epoch": 0.3253917912709026, "grad_norm": 813.9871215820312, "learning_rate": 4.2852045062367516e-05, "loss": 59.1823, "step": 80540 }, { "epoch": 0.32543219253627026, "grad_norm": 911.4254150390625, "learning_rate": 4.2849601210048274e-05, "loss": 73.9285, "step": 80550 }, { "epoch": 0.32547259380163784, "grad_norm": 1268.9906005859375, "learning_rate": 4.2847157009741856e-05, "loss": 63.6592, "step": 80560 }, { "epoch": 0.3255129950670055, "grad_norm": 514.0250244140625, "learning_rate": 4.2844712461495926e-05, "loss": 66.8111, "step": 80570 }, { "epoch": 0.3255533963323731, "grad_norm": 1467.4144287109375, "learning_rate": 4.284226756535814e-05, "loss": 66.5214, "step": 80580 }, { "epoch": 0.32559379759774076, "grad_norm": 416.23828125, "learning_rate": 4.283982232137617e-05, "loss": 47.4183, "step": 80590 }, { "epoch": 0.3256341988631084, "grad_norm": 1595.3319091796875, "learning_rate": 4.283737672959766e-05, "loss": 72.357, "step": 80600 }, { "epoch": 0.32567460012847604, "grad_norm": 1119.335693359375, "learning_rate": 4.283493079007032e-05, "loss": 51.9857, "step": 80610 }, { "epoch": 0.32571500139384363, "grad_norm": 838.667236328125, "learning_rate": 4.283248450284182e-05, "loss": 55.8164, "step": 80620 }, { "epoch": 0.32575540265921127, "grad_norm": 394.1136779785156, "learning_rate": 4.283003786795986e-05, "loss": 49.545, "step": 80630 }, { "epoch": 0.3257958039245789, "grad_norm": 874.3743286132812, "learning_rate": 4.2827590885472125e-05, "loss": 64.6963, "step": 80640 }, { "epoch": 0.32583620518994655, "grad_norm": 1160.2918701171875, "learning_rate": 4.2825143555426326e-05, "loss": 61.9271, "step": 80650 }, { "epoch": 0.3258766064553142, "grad_norm": 1602.0166015625, "learning_rate": 4.2822695877870177e-05, "loss": 51.2635, "step": 80660 }, { "epoch": 0.32591700772068183, "grad_norm": 382.5502014160156, "learning_rate": 4.28202478528514e-05, "loss": 54.0187, "step": 80670 }, { "epoch": 0.32595740898604947, "grad_norm": 1229.9798583984375, "learning_rate": 4.281779948041772e-05, "loss": 60.0021, "step": 80680 }, { "epoch": 0.32599781025141705, "grad_norm": 565.9646606445312, "learning_rate": 4.2815350760616864e-05, "loss": 61.7397, "step": 80690 }, { "epoch": 0.3260382115167847, "grad_norm": 769.6617431640625, "learning_rate": 4.2812901693496564e-05, "loss": 60.3849, "step": 80700 }, { "epoch": 0.32607861278215233, "grad_norm": 648.3060913085938, "learning_rate": 4.281045227910459e-05, "loss": 75.0846, "step": 80710 }, { "epoch": 0.32611901404752, "grad_norm": 3067.703857421875, "learning_rate": 4.2808002517488667e-05, "loss": 90.7922, "step": 80720 }, { "epoch": 0.3261594153128876, "grad_norm": 109.4122085571289, "learning_rate": 4.280555240869657e-05, "loss": 69.8678, "step": 80730 }, { "epoch": 0.32619981657825525, "grad_norm": 733.5319213867188, "learning_rate": 4.280310195277606e-05, "loss": 81.5929, "step": 80740 }, { "epoch": 0.32624021784362284, "grad_norm": 610.775634765625, "learning_rate": 4.280065114977492e-05, "loss": 57.4617, "step": 80750 }, { "epoch": 0.3262806191089905, "grad_norm": 551.3561401367188, "learning_rate": 4.279819999974091e-05, "loss": 61.8333, "step": 80760 }, { "epoch": 0.3263210203743581, "grad_norm": 685.814697265625, "learning_rate": 4.279574850272183e-05, "loss": 67.4173, "step": 80770 }, { "epoch": 0.32636142163972576, "grad_norm": 1033.493896484375, "learning_rate": 4.279329665876548e-05, "loss": 53.4654, "step": 80780 }, { "epoch": 0.3264018229050934, "grad_norm": 485.32745361328125, "learning_rate": 4.2790844467919646e-05, "loss": 54.8732, "step": 80790 }, { "epoch": 0.32644222417046104, "grad_norm": 0.0, "learning_rate": 4.278839193023214e-05, "loss": 75.1927, "step": 80800 }, { "epoch": 0.3264826254358287, "grad_norm": 743.0775756835938, "learning_rate": 4.278593904575077e-05, "loss": 44.6109, "step": 80810 }, { "epoch": 0.32652302670119626, "grad_norm": 1801.4681396484375, "learning_rate": 4.278348581452337e-05, "loss": 76.2488, "step": 80820 }, { "epoch": 0.3265634279665639, "grad_norm": 1143.9945068359375, "learning_rate": 4.278103223659775e-05, "loss": 68.8127, "step": 80830 }, { "epoch": 0.32660382923193154, "grad_norm": 568.1923217773438, "learning_rate": 4.2778578312021754e-05, "loss": 46.9349, "step": 80840 }, { "epoch": 0.3266442304972992, "grad_norm": 758.5953979492188, "learning_rate": 4.277612404084322e-05, "loss": 67.1757, "step": 80850 }, { "epoch": 0.3266846317626668, "grad_norm": 696.4761352539062, "learning_rate": 4.277366942311001e-05, "loss": 62.2708, "step": 80860 }, { "epoch": 0.32672503302803446, "grad_norm": 664.263671875, "learning_rate": 4.277121445886995e-05, "loss": 70.6147, "step": 80870 }, { "epoch": 0.32676543429340205, "grad_norm": 184.5323944091797, "learning_rate": 4.2768759148170915e-05, "loss": 42.6514, "step": 80880 }, { "epoch": 0.3268058355587697, "grad_norm": 586.1389770507812, "learning_rate": 4.276630349106078e-05, "loss": 38.8002, "step": 80890 }, { "epoch": 0.3268462368241373, "grad_norm": 1424.904541015625, "learning_rate": 4.276384748758741e-05, "loss": 77.3322, "step": 80900 }, { "epoch": 0.32688663808950497, "grad_norm": 859.6981811523438, "learning_rate": 4.2761391137798676e-05, "loss": 86.453, "step": 80910 }, { "epoch": 0.3269270393548726, "grad_norm": 1004.2837524414062, "learning_rate": 4.2758934441742496e-05, "loss": 56.4649, "step": 80920 }, { "epoch": 0.32696744062024025, "grad_norm": 1162.806396484375, "learning_rate": 4.2756477399466735e-05, "loss": 97.9132, "step": 80930 }, { "epoch": 0.32700784188560783, "grad_norm": 425.3879699707031, "learning_rate": 4.275402001101931e-05, "loss": 68.8998, "step": 80940 }, { "epoch": 0.32704824315097547, "grad_norm": 645.8928833007812, "learning_rate": 4.2751562276448124e-05, "loss": 51.0803, "step": 80950 }, { "epoch": 0.3270886444163431, "grad_norm": 2057.58837890625, "learning_rate": 4.274910419580108e-05, "loss": 60.8407, "step": 80960 }, { "epoch": 0.32712904568171075, "grad_norm": 1685.50048828125, "learning_rate": 4.274664576912613e-05, "loss": 66.7414, "step": 80970 }, { "epoch": 0.3271694469470784, "grad_norm": 1081.0294189453125, "learning_rate": 4.2744186996471174e-05, "loss": 87.3173, "step": 80980 }, { "epoch": 0.32720984821244603, "grad_norm": 1577.8040771484375, "learning_rate": 4.2741727877884155e-05, "loss": 85.7177, "step": 80990 }, { "epoch": 0.32725024947781367, "grad_norm": 547.41259765625, "learning_rate": 4.273926841341302e-05, "loss": 47.8796, "step": 81000 }, { "epoch": 0.32729065074318126, "grad_norm": 990.9943237304688, "learning_rate": 4.273680860310572e-05, "loss": 71.2299, "step": 81010 }, { "epoch": 0.3273310520085489, "grad_norm": 379.1767883300781, "learning_rate": 4.2734348447010206e-05, "loss": 53.3839, "step": 81020 }, { "epoch": 0.32737145327391654, "grad_norm": 280.44287109375, "learning_rate": 4.2731887945174434e-05, "loss": 66.233, "step": 81030 }, { "epoch": 0.3274118545392842, "grad_norm": 659.9508056640625, "learning_rate": 4.272942709764638e-05, "loss": 80.5297, "step": 81040 }, { "epoch": 0.3274522558046518, "grad_norm": 688.0112915039062, "learning_rate": 4.2726965904474e-05, "loss": 58.7139, "step": 81050 }, { "epoch": 0.32749265707001946, "grad_norm": 742.99169921875, "learning_rate": 4.2724504365705314e-05, "loss": 46.189, "step": 81060 }, { "epoch": 0.32753305833538704, "grad_norm": 1215.4967041015625, "learning_rate": 4.272204248138828e-05, "loss": 48.7161, "step": 81070 }, { "epoch": 0.3275734596007547, "grad_norm": 575.4824829101562, "learning_rate": 4.2719580251570915e-05, "loss": 78.7691, "step": 81080 }, { "epoch": 0.3276138608661223, "grad_norm": 547.03759765625, "learning_rate": 4.2717117676301196e-05, "loss": 42.8608, "step": 81090 }, { "epoch": 0.32765426213148996, "grad_norm": 605.3917846679688, "learning_rate": 4.271465475562716e-05, "loss": 52.3641, "step": 81100 }, { "epoch": 0.3276946633968576, "grad_norm": 402.33050537109375, "learning_rate": 4.2712191489596796e-05, "loss": 81.6275, "step": 81110 }, { "epoch": 0.32773506466222524, "grad_norm": 944.0283813476562, "learning_rate": 4.270972787825815e-05, "loss": 44.0819, "step": 81120 }, { "epoch": 0.3277754659275929, "grad_norm": 2794.85986328125, "learning_rate": 4.2707263921659236e-05, "loss": 77.4065, "step": 81130 }, { "epoch": 0.32781586719296046, "grad_norm": 706.5042724609375, "learning_rate": 4.27047996198481e-05, "loss": 84.9347, "step": 81140 }, { "epoch": 0.3278562684583281, "grad_norm": 1184.057861328125, "learning_rate": 4.2702334972872776e-05, "loss": 70.5365, "step": 81150 }, { "epoch": 0.32789666972369574, "grad_norm": 548.59716796875, "learning_rate": 4.269986998078132e-05, "loss": 57.2449, "step": 81160 }, { "epoch": 0.3279370709890634, "grad_norm": 712.474609375, "learning_rate": 4.2697404643621786e-05, "loss": 68.5629, "step": 81170 }, { "epoch": 0.327977472254431, "grad_norm": 727.8631591796875, "learning_rate": 4.269493896144224e-05, "loss": 63.5557, "step": 81180 }, { "epoch": 0.32801787351979866, "grad_norm": 1237.4130859375, "learning_rate": 4.2692472934290746e-05, "loss": 70.6992, "step": 81190 }, { "epoch": 0.32805827478516625, "grad_norm": 625.5675048828125, "learning_rate": 4.2690006562215384e-05, "loss": 80.8208, "step": 81200 }, { "epoch": 0.3280986760505339, "grad_norm": 561.9657592773438, "learning_rate": 4.2687539845264235e-05, "loss": 62.1039, "step": 81210 }, { "epoch": 0.32813907731590153, "grad_norm": 1042.281494140625, "learning_rate": 4.268507278348539e-05, "loss": 39.331, "step": 81220 }, { "epoch": 0.32817947858126917, "grad_norm": 401.18792724609375, "learning_rate": 4.2682605376926955e-05, "loss": 61.7103, "step": 81230 }, { "epoch": 0.3282198798466368, "grad_norm": 1227.1231689453125, "learning_rate": 4.268013762563702e-05, "loss": 72.3686, "step": 81240 }, { "epoch": 0.32826028111200445, "grad_norm": 2529.718994140625, "learning_rate": 4.267766952966369e-05, "loss": 111.4047, "step": 81250 }, { "epoch": 0.32830068237737203, "grad_norm": 762.4780883789062, "learning_rate": 4.2675201089055096e-05, "loss": 44.7507, "step": 81260 }, { "epoch": 0.3283410836427397, "grad_norm": 917.88720703125, "learning_rate": 4.2672732303859365e-05, "loss": 74.2715, "step": 81270 }, { "epoch": 0.3283814849081073, "grad_norm": 520.3221435546875, "learning_rate": 4.267026317412461e-05, "loss": 54.3115, "step": 81280 }, { "epoch": 0.32842188617347495, "grad_norm": 185.31436157226562, "learning_rate": 4.266779369989899e-05, "loss": 50.5796, "step": 81290 }, { "epoch": 0.3284622874388426, "grad_norm": 1354.3519287109375, "learning_rate": 4.2665323881230624e-05, "loss": 101.209, "step": 81300 }, { "epoch": 0.32850268870421023, "grad_norm": 387.4610900878906, "learning_rate": 4.266285371816767e-05, "loss": 80.6324, "step": 81310 }, { "epoch": 0.3285430899695779, "grad_norm": 1534.5985107421875, "learning_rate": 4.266038321075831e-05, "loss": 55.0623, "step": 81320 }, { "epoch": 0.32858349123494546, "grad_norm": 750.3333129882812, "learning_rate": 4.265791235905067e-05, "loss": 65.2265, "step": 81330 }, { "epoch": 0.3286238925003131, "grad_norm": 438.5273742675781, "learning_rate": 4.265544116309294e-05, "loss": 82.4092, "step": 81340 }, { "epoch": 0.32866429376568074, "grad_norm": 1133.52099609375, "learning_rate": 4.265296962293329e-05, "loss": 58.1583, "step": 81350 }, { "epoch": 0.3287046950310484, "grad_norm": 632.3345336914062, "learning_rate": 4.265049773861991e-05, "loss": 79.6419, "step": 81360 }, { "epoch": 0.328745096296416, "grad_norm": 778.0379028320312, "learning_rate": 4.2648025510201e-05, "loss": 49.8477, "step": 81370 }, { "epoch": 0.32878549756178366, "grad_norm": 662.5733642578125, "learning_rate": 4.2645552937724744e-05, "loss": 58.1828, "step": 81380 }, { "epoch": 0.32882589882715124, "grad_norm": 1248.02001953125, "learning_rate": 4.264308002123935e-05, "loss": 57.9085, "step": 81390 }, { "epoch": 0.3288663000925189, "grad_norm": 645.2049560546875, "learning_rate": 4.264060676079302e-05, "loss": 56.5214, "step": 81400 }, { "epoch": 0.3289067013578865, "grad_norm": 2653.940673828125, "learning_rate": 4.2638133156433986e-05, "loss": 68.7137, "step": 81410 }, { "epoch": 0.32894710262325416, "grad_norm": 756.0985717773438, "learning_rate": 4.263565920821046e-05, "loss": 53.9748, "step": 81420 }, { "epoch": 0.3289875038886218, "grad_norm": 449.9473571777344, "learning_rate": 4.2633184916170677e-05, "loss": 69.4224, "step": 81430 }, { "epoch": 0.32902790515398944, "grad_norm": 1535.21923828125, "learning_rate": 4.263071028036288e-05, "loss": 66.2684, "step": 81440 }, { "epoch": 0.3290683064193571, "grad_norm": 899.3699951171875, "learning_rate": 4.2628235300835314e-05, "loss": 65.152, "step": 81450 }, { "epoch": 0.32910870768472467, "grad_norm": 956.332275390625, "learning_rate": 4.2625759977636214e-05, "loss": 68.6298, "step": 81460 }, { "epoch": 0.3291491089500923, "grad_norm": 815.9563598632812, "learning_rate": 4.262328431081386e-05, "loss": 80.0543, "step": 81470 }, { "epoch": 0.32918951021545995, "grad_norm": 536.765625, "learning_rate": 4.26208083004165e-05, "loss": 47.5851, "step": 81480 }, { "epoch": 0.3292299114808276, "grad_norm": 564.234375, "learning_rate": 4.261833194649241e-05, "loss": 73.5911, "step": 81490 }, { "epoch": 0.3292703127461952, "grad_norm": 999.4091186523438, "learning_rate": 4.261585524908987e-05, "loss": 68.8592, "step": 81500 }, { "epoch": 0.32931071401156287, "grad_norm": 1350.5775146484375, "learning_rate": 4.261337820825716e-05, "loss": 70.5121, "step": 81510 }, { "epoch": 0.32935111527693045, "grad_norm": 735.5888061523438, "learning_rate": 4.261090082404258e-05, "loss": 75.4246, "step": 81520 }, { "epoch": 0.3293915165422981, "grad_norm": 365.2147521972656, "learning_rate": 4.2608423096494406e-05, "loss": 61.5511, "step": 81530 }, { "epoch": 0.32943191780766573, "grad_norm": 1004.5228881835938, "learning_rate": 4.260594502566097e-05, "loss": 66.0472, "step": 81540 }, { "epoch": 0.32947231907303337, "grad_norm": 725.521484375, "learning_rate": 4.260346661159058e-05, "loss": 61.7147, "step": 81550 }, { "epoch": 0.329512720338401, "grad_norm": 1222.5792236328125, "learning_rate": 4.260098785433154e-05, "loss": 60.5714, "step": 81560 }, { "epoch": 0.32955312160376865, "grad_norm": 1317.46728515625, "learning_rate": 4.259850875393217e-05, "loss": 75.1795, "step": 81570 }, { "epoch": 0.32959352286913624, "grad_norm": 1155.3607177734375, "learning_rate": 4.2596029310440824e-05, "loss": 65.329, "step": 81580 }, { "epoch": 0.3296339241345039, "grad_norm": 549.3975219726562, "learning_rate": 4.259354952390582e-05, "loss": 65.329, "step": 81590 }, { "epoch": 0.3296743253998715, "grad_norm": 1814.1624755859375, "learning_rate": 4.259106939437551e-05, "loss": 66.1722, "step": 81600 }, { "epoch": 0.32971472666523916, "grad_norm": 355.4609680175781, "learning_rate": 4.258858892189825e-05, "loss": 80.2931, "step": 81610 }, { "epoch": 0.3297551279306068, "grad_norm": 570.4844360351562, "learning_rate": 4.258610810652239e-05, "loss": 44.5537, "step": 81620 }, { "epoch": 0.32979552919597444, "grad_norm": 719.2767333984375, "learning_rate": 4.258362694829629e-05, "loss": 63.4508, "step": 81630 }, { "epoch": 0.3298359304613421, "grad_norm": 494.90911865234375, "learning_rate": 4.258114544726835e-05, "loss": 67.4852, "step": 81640 }, { "epoch": 0.32987633172670966, "grad_norm": 730.2485961914062, "learning_rate": 4.257866360348692e-05, "loss": 58.3942, "step": 81650 }, { "epoch": 0.3299167329920773, "grad_norm": 528.159423828125, "learning_rate": 4.257618141700039e-05, "loss": 55.3444, "step": 81660 }, { "epoch": 0.32995713425744494, "grad_norm": 911.97998046875, "learning_rate": 4.257369888785715e-05, "loss": 69.4855, "step": 81670 }, { "epoch": 0.3299975355228126, "grad_norm": 1036.72021484375, "learning_rate": 4.2571216016105614e-05, "loss": 90.7197, "step": 81680 }, { "epoch": 0.3300379367881802, "grad_norm": 1021.642333984375, "learning_rate": 4.256873280179416e-05, "loss": 96.8164, "step": 81690 }, { "epoch": 0.33007833805354786, "grad_norm": 1377.108642578125, "learning_rate": 4.256624924497123e-05, "loss": 73.541, "step": 81700 }, { "epoch": 0.33011873931891544, "grad_norm": 702.5912475585938, "learning_rate": 4.256376534568522e-05, "loss": 98.5258, "step": 81710 }, { "epoch": 0.3301591405842831, "grad_norm": 3800.063720703125, "learning_rate": 4.256128110398457e-05, "loss": 64.3711, "step": 81720 }, { "epoch": 0.3301995418496507, "grad_norm": 806.7174072265625, "learning_rate": 4.25587965199177e-05, "loss": 77.5143, "step": 81730 }, { "epoch": 0.33023994311501836, "grad_norm": 753.8306274414062, "learning_rate": 4.255631159353305e-05, "loss": 85.3019, "step": 81740 }, { "epoch": 0.330280344380386, "grad_norm": 1084.2166748046875, "learning_rate": 4.2553826324879064e-05, "loss": 70.9181, "step": 81750 }, { "epoch": 0.33032074564575364, "grad_norm": 1052.1856689453125, "learning_rate": 4.2551340714004203e-05, "loss": 71.8444, "step": 81760 }, { "epoch": 0.3303611469111213, "grad_norm": 1021.4315185546875, "learning_rate": 4.254885476095691e-05, "loss": 72.4072, "step": 81770 }, { "epoch": 0.33040154817648887, "grad_norm": 433.3638000488281, "learning_rate": 4.254636846578566e-05, "loss": 95.435, "step": 81780 }, { "epoch": 0.3304419494418565, "grad_norm": 778.073974609375, "learning_rate": 4.254388182853894e-05, "loss": 72.969, "step": 81790 }, { "epoch": 0.33048235070722415, "grad_norm": 1745.2950439453125, "learning_rate": 4.254139484926519e-05, "loss": 81.5817, "step": 81800 }, { "epoch": 0.3305227519725918, "grad_norm": 273.6972961425781, "learning_rate": 4.253890752801293e-05, "loss": 73.0842, "step": 81810 }, { "epoch": 0.33056315323795943, "grad_norm": 711.11328125, "learning_rate": 4.253641986483062e-05, "loss": 66.8901, "step": 81820 }, { "epoch": 0.33060355450332707, "grad_norm": 810.6575927734375, "learning_rate": 4.2533931859766794e-05, "loss": 80.1247, "step": 81830 }, { "epoch": 0.33064395576869465, "grad_norm": 0.0, "learning_rate": 4.253144351286994e-05, "loss": 72.4615, "step": 81840 }, { "epoch": 0.3306843570340623, "grad_norm": 2913.29443359375, "learning_rate": 4.252895482418856e-05, "loss": 76.5862, "step": 81850 }, { "epoch": 0.33072475829942993, "grad_norm": 1155.0936279296875, "learning_rate": 4.252646579377119e-05, "loss": 84.624, "step": 81860 }, { "epoch": 0.3307651595647976, "grad_norm": 587.8722534179688, "learning_rate": 4.252397642166633e-05, "loss": 72.8901, "step": 81870 }, { "epoch": 0.3308055608301652, "grad_norm": 840.6516723632812, "learning_rate": 4.252148670792254e-05, "loss": 92.0446, "step": 81880 }, { "epoch": 0.33084596209553285, "grad_norm": 701.7250366210938, "learning_rate": 4.251899665258835e-05, "loss": 30.5136, "step": 81890 }, { "epoch": 0.33088636336090044, "grad_norm": 0.0, "learning_rate": 4.2516506255712296e-05, "loss": 71.1064, "step": 81900 }, { "epoch": 0.3309267646262681, "grad_norm": 485.8135986328125, "learning_rate": 4.251401551734293e-05, "loss": 77.7925, "step": 81910 }, { "epoch": 0.3309671658916357, "grad_norm": 918.968994140625, "learning_rate": 4.2511524437528825e-05, "loss": 79.0183, "step": 81920 }, { "epoch": 0.33100756715700336, "grad_norm": 808.7003173828125, "learning_rate": 4.250903301631853e-05, "loss": 45.6422, "step": 81930 }, { "epoch": 0.331047968422371, "grad_norm": 1006.4502563476562, "learning_rate": 4.250654125376062e-05, "loss": 81.9783, "step": 81940 }, { "epoch": 0.33108836968773864, "grad_norm": 926.7799682617188, "learning_rate": 4.250404914990367e-05, "loss": 51.5458, "step": 81950 }, { "epoch": 0.3311287709531063, "grad_norm": 422.4704284667969, "learning_rate": 4.250155670479628e-05, "loss": 68.1897, "step": 81960 }, { "epoch": 0.33116917221847386, "grad_norm": 661.1273193359375, "learning_rate": 4.2499063918487034e-05, "loss": 57.1841, "step": 81970 }, { "epoch": 0.3312095734838415, "grad_norm": 1525.314208984375, "learning_rate": 4.2496570791024513e-05, "loss": 85.9705, "step": 81980 }, { "epoch": 0.33124997474920914, "grad_norm": 677.7116088867188, "learning_rate": 4.2494077322457346e-05, "loss": 49.9874, "step": 81990 }, { "epoch": 0.3312903760145768, "grad_norm": 854.898193359375, "learning_rate": 4.249158351283414e-05, "loss": 67.6385, "step": 82000 }, { "epoch": 0.3313307772799444, "grad_norm": 836.3336791992188, "learning_rate": 4.24890893622035e-05, "loss": 79.6089, "step": 82010 }, { "epoch": 0.33137117854531206, "grad_norm": 509.12847900390625, "learning_rate": 4.248659487061406e-05, "loss": 30.2002, "step": 82020 }, { "epoch": 0.33141157981067965, "grad_norm": 623.1883544921875, "learning_rate": 4.248410003811445e-05, "loss": 54.6345, "step": 82030 }, { "epoch": 0.3314519810760473, "grad_norm": 1047.8133544921875, "learning_rate": 4.248160486475331e-05, "loss": 69.4941, "step": 82040 }, { "epoch": 0.3314923823414149, "grad_norm": 2248.522216796875, "learning_rate": 4.247910935057929e-05, "loss": 75.4848, "step": 82050 }, { "epoch": 0.33153278360678257, "grad_norm": 834.0875854492188, "learning_rate": 4.2476613495641026e-05, "loss": 56.126, "step": 82060 }, { "epoch": 0.3315731848721502, "grad_norm": 629.9591064453125, "learning_rate": 4.247411729998718e-05, "loss": 64.6742, "step": 82070 }, { "epoch": 0.33161358613751785, "grad_norm": 738.5386352539062, "learning_rate": 4.247162076366643e-05, "loss": 56.9134, "step": 82080 }, { "epoch": 0.33165398740288543, "grad_norm": 734.9909057617188, "learning_rate": 4.246912388672744e-05, "loss": 58.0093, "step": 82090 }, { "epoch": 0.33169438866825307, "grad_norm": 315.8343505859375, "learning_rate": 4.246662666921888e-05, "loss": 68.2145, "step": 82100 }, { "epoch": 0.3317347899336207, "grad_norm": 471.6864318847656, "learning_rate": 4.2464129111189444e-05, "loss": 98.6437, "step": 82110 }, { "epoch": 0.33177519119898835, "grad_norm": 913.5321044921875, "learning_rate": 4.2461631212687816e-05, "loss": 50.3258, "step": 82120 }, { "epoch": 0.331815592464356, "grad_norm": 837.1478271484375, "learning_rate": 4.24591329737627e-05, "loss": 79.771, "step": 82130 }, { "epoch": 0.33185599372972363, "grad_norm": 681.1831665039062, "learning_rate": 4.24566343944628e-05, "loss": 54.0234, "step": 82140 }, { "epoch": 0.33189639499509127, "grad_norm": 497.15582275390625, "learning_rate": 4.245413547483682e-05, "loss": 79.7911, "step": 82150 }, { "epoch": 0.33193679626045886, "grad_norm": 463.25885009765625, "learning_rate": 4.245163621493349e-05, "loss": 60.4234, "step": 82160 }, { "epoch": 0.3319771975258265, "grad_norm": 566.7998046875, "learning_rate": 4.244913661480152e-05, "loss": 47.7919, "step": 82170 }, { "epoch": 0.33201759879119414, "grad_norm": 1045.619384765625, "learning_rate": 4.2446636674489645e-05, "loss": 67.3056, "step": 82180 }, { "epoch": 0.3320580000565618, "grad_norm": 605.4349975585938, "learning_rate": 4.244413639404662e-05, "loss": 68.3609, "step": 82190 }, { "epoch": 0.3320984013219294, "grad_norm": 562.8687133789062, "learning_rate": 4.244163577352116e-05, "loss": 64.8594, "step": 82200 }, { "epoch": 0.33213880258729706, "grad_norm": 830.25927734375, "learning_rate": 4.243913481296205e-05, "loss": 42.151, "step": 82210 }, { "epoch": 0.33217920385266464, "grad_norm": 570.14453125, "learning_rate": 4.243663351241801e-05, "loss": 91.821, "step": 82220 }, { "epoch": 0.3322196051180323, "grad_norm": 499.4716796875, "learning_rate": 4.243413187193783e-05, "loss": 56.719, "step": 82230 }, { "epoch": 0.3322600063833999, "grad_norm": 536.1835327148438, "learning_rate": 4.2431629891570266e-05, "loss": 71.9568, "step": 82240 }, { "epoch": 0.33230040764876756, "grad_norm": 907.7794799804688, "learning_rate": 4.242912757136412e-05, "loss": 80.2039, "step": 82250 }, { "epoch": 0.3323408089141352, "grad_norm": 896.99267578125, "learning_rate": 4.2426624911368146e-05, "loss": 61.2477, "step": 82260 }, { "epoch": 0.33238121017950284, "grad_norm": 3104.49169921875, "learning_rate": 4.242412191163115e-05, "loss": 64.1988, "step": 82270 }, { "epoch": 0.3324216114448705, "grad_norm": 739.8251342773438, "learning_rate": 4.242161857220193e-05, "loss": 99.7195, "step": 82280 }, { "epoch": 0.33246201271023806, "grad_norm": 717.78662109375, "learning_rate": 4.241911489312927e-05, "loss": 69.5272, "step": 82290 }, { "epoch": 0.3325024139756057, "grad_norm": 494.52197265625, "learning_rate": 4.241661087446202e-05, "loss": 46.656, "step": 82300 }, { "epoch": 0.33254281524097334, "grad_norm": 355.43536376953125, "learning_rate": 4.2414106516248964e-05, "loss": 56.7551, "step": 82310 }, { "epoch": 0.332583216506341, "grad_norm": 1103.3970947265625, "learning_rate": 4.241160181853894e-05, "loss": 68.9124, "step": 82320 }, { "epoch": 0.3326236177717086, "grad_norm": 742.4725952148438, "learning_rate": 4.240909678138077e-05, "loss": 83.4965, "step": 82330 }, { "epoch": 0.33266401903707626, "grad_norm": 1117.4141845703125, "learning_rate": 4.24065914048233e-05, "loss": 71.4723, "step": 82340 }, { "epoch": 0.33270442030244385, "grad_norm": 539.0693969726562, "learning_rate": 4.2404085688915364e-05, "loss": 53.2493, "step": 82350 }, { "epoch": 0.3327448215678115, "grad_norm": 4054.252197265625, "learning_rate": 4.240157963370582e-05, "loss": 99.1258, "step": 82360 }, { "epoch": 0.33278522283317913, "grad_norm": 826.72509765625, "learning_rate": 4.2399073239243526e-05, "loss": 68.9222, "step": 82370 }, { "epoch": 0.33282562409854677, "grad_norm": 831.2509155273438, "learning_rate": 4.239656650557734e-05, "loss": 73.0231, "step": 82380 }, { "epoch": 0.3328660253639144, "grad_norm": 409.9931335449219, "learning_rate": 4.239405943275613e-05, "loss": 45.4566, "step": 82390 }, { "epoch": 0.33290642662928205, "grad_norm": 926.7503051757812, "learning_rate": 4.2391552020828775e-05, "loss": 77.0996, "step": 82400 }, { "epoch": 0.33294682789464963, "grad_norm": 1059.4488525390625, "learning_rate": 4.2389044269844155e-05, "loss": 95.9397, "step": 82410 }, { "epoch": 0.3329872291600173, "grad_norm": 0.0, "learning_rate": 4.238653617985118e-05, "loss": 49.413, "step": 82420 }, { "epoch": 0.3330276304253849, "grad_norm": 1252.39453125, "learning_rate": 4.238402775089871e-05, "loss": 66.4441, "step": 82430 }, { "epoch": 0.33306803169075255, "grad_norm": 1263.342529296875, "learning_rate": 4.238151898303569e-05, "loss": 76.9728, "step": 82440 }, { "epoch": 0.3331084329561202, "grad_norm": 473.049072265625, "learning_rate": 4.2379009876311e-05, "loss": 63.0306, "step": 82450 }, { "epoch": 0.33314883422148783, "grad_norm": 1100.3658447265625, "learning_rate": 4.237650043077357e-05, "loss": 107.0959, "step": 82460 }, { "epoch": 0.3331892354868555, "grad_norm": 731.7568359375, "learning_rate": 4.237399064647231e-05, "loss": 105.6519, "step": 82470 }, { "epoch": 0.33322963675222306, "grad_norm": 569.7168579101562, "learning_rate": 4.237148052345616e-05, "loss": 89.5767, "step": 82480 }, { "epoch": 0.3332700380175907, "grad_norm": 650.82958984375, "learning_rate": 4.236897006177405e-05, "loss": 84.1082, "step": 82490 }, { "epoch": 0.33331043928295834, "grad_norm": 2010.1734619140625, "learning_rate": 4.2366459261474933e-05, "loss": 60.609, "step": 82500 }, { "epoch": 0.333350840548326, "grad_norm": 1313.4893798828125, "learning_rate": 4.2363948122607756e-05, "loss": 72.3615, "step": 82510 }, { "epoch": 0.3333912418136936, "grad_norm": 538.3892211914062, "learning_rate": 4.236143664522146e-05, "loss": 78.7254, "step": 82520 }, { "epoch": 0.33343164307906126, "grad_norm": 678.4175415039062, "learning_rate": 4.235892482936502e-05, "loss": 67.0122, "step": 82530 }, { "epoch": 0.33347204434442884, "grad_norm": 1527.4241943359375, "learning_rate": 4.2356412675087406e-05, "loss": 46.1357, "step": 82540 }, { "epoch": 0.3335124456097965, "grad_norm": 889.2577514648438, "learning_rate": 4.23539001824376e-05, "loss": 85.7127, "step": 82550 }, { "epoch": 0.3335528468751641, "grad_norm": 621.6854858398438, "learning_rate": 4.2351387351464565e-05, "loss": 62.1339, "step": 82560 }, { "epoch": 0.33359324814053176, "grad_norm": 606.4068603515625, "learning_rate": 4.2348874182217305e-05, "loss": 56.7306, "step": 82570 }, { "epoch": 0.3336336494058994, "grad_norm": 785.7623901367188, "learning_rate": 4.2346360674744815e-05, "loss": 49.6323, "step": 82580 }, { "epoch": 0.33367405067126704, "grad_norm": 1377.6890869140625, "learning_rate": 4.234384682909608e-05, "loss": 84.6193, "step": 82590 }, { "epoch": 0.3337144519366347, "grad_norm": 2395.7802734375, "learning_rate": 4.234133264532012e-05, "loss": 70.3222, "step": 82600 }, { "epoch": 0.33375485320200227, "grad_norm": 746.2875366210938, "learning_rate": 4.2338818123465966e-05, "loss": 41.8322, "step": 82610 }, { "epoch": 0.3337952544673699, "grad_norm": 461.2134094238281, "learning_rate": 4.2336303263582624e-05, "loss": 40.6642, "step": 82620 }, { "epoch": 0.33383565573273755, "grad_norm": 469.310546875, "learning_rate": 4.233378806571912e-05, "loss": 67.8555, "step": 82630 }, { "epoch": 0.3338760569981052, "grad_norm": 827.2962036132812, "learning_rate": 4.2331272529924495e-05, "loss": 74.3388, "step": 82640 }, { "epoch": 0.3339164582634728, "grad_norm": 544.0149536132812, "learning_rate": 4.2328756656247795e-05, "loss": 79.4444, "step": 82650 }, { "epoch": 0.33395685952884047, "grad_norm": 383.1783142089844, "learning_rate": 4.2326240444738055e-05, "loss": 51.7502, "step": 82660 }, { "epoch": 0.33399726079420805, "grad_norm": 1237.60009765625, "learning_rate": 4.232372389544434e-05, "loss": 73.6413, "step": 82670 }, { "epoch": 0.3340376620595757, "grad_norm": 309.4773864746094, "learning_rate": 4.232120700841571e-05, "loss": 59.2111, "step": 82680 }, { "epoch": 0.33407806332494333, "grad_norm": 1590.2628173828125, "learning_rate": 4.2318689783701224e-05, "loss": 59.3465, "step": 82690 }, { "epoch": 0.33411846459031097, "grad_norm": 464.6656799316406, "learning_rate": 4.2316172221349973e-05, "loss": 38.0717, "step": 82700 }, { "epoch": 0.3341588658556786, "grad_norm": 2365.112060546875, "learning_rate": 4.231365432141103e-05, "loss": 52.5256, "step": 82710 }, { "epoch": 0.33419926712104625, "grad_norm": 905.1858520507812, "learning_rate": 4.231113608393348e-05, "loss": 78.6967, "step": 82720 }, { "epoch": 0.33423966838641384, "grad_norm": 399.5769348144531, "learning_rate": 4.2308617508966414e-05, "loss": 79.4581, "step": 82730 }, { "epoch": 0.3342800696517815, "grad_norm": 2698.52685546875, "learning_rate": 4.230609859655895e-05, "loss": 78.356, "step": 82740 }, { "epoch": 0.3343204709171491, "grad_norm": 1030.6182861328125, "learning_rate": 4.230357934676017e-05, "loss": 57.3243, "step": 82750 }, { "epoch": 0.33436087218251676, "grad_norm": 1275.8216552734375, "learning_rate": 4.230105975961921e-05, "loss": 77.0945, "step": 82760 }, { "epoch": 0.3344012734478844, "grad_norm": 846.1837768554688, "learning_rate": 4.229853983518518e-05, "loss": 58.413, "step": 82770 }, { "epoch": 0.33444167471325204, "grad_norm": 502.4288330078125, "learning_rate": 4.229601957350722e-05, "loss": 55.0714, "step": 82780 }, { "epoch": 0.3344820759786197, "grad_norm": 1429.5887451171875, "learning_rate": 4.229349897463445e-05, "loss": 74.8495, "step": 82790 }, { "epoch": 0.33452247724398726, "grad_norm": 534.1192016601562, "learning_rate": 4.2290978038616e-05, "loss": 69.4769, "step": 82800 }, { "epoch": 0.3345628785093549, "grad_norm": 625.2440185546875, "learning_rate": 4.228845676550105e-05, "loss": 68.7517, "step": 82810 }, { "epoch": 0.33460327977472254, "grad_norm": 529.8661499023438, "learning_rate": 4.2285935155338724e-05, "loss": 77.1349, "step": 82820 }, { "epoch": 0.3346436810400902, "grad_norm": 1316.5428466796875, "learning_rate": 4.22834132081782e-05, "loss": 61.7906, "step": 82830 }, { "epoch": 0.3346840823054578, "grad_norm": 483.9060974121094, "learning_rate": 4.2280890924068625e-05, "loss": 62.0083, "step": 82840 }, { "epoch": 0.33472448357082546, "grad_norm": 882.261962890625, "learning_rate": 4.22783683030592e-05, "loss": 62.3824, "step": 82850 }, { "epoch": 0.33476488483619304, "grad_norm": 733.6650390625, "learning_rate": 4.227584534519907e-05, "loss": 81.4946, "step": 82860 }, { "epoch": 0.3348052861015607, "grad_norm": 604.4740600585938, "learning_rate": 4.227332205053746e-05, "loss": 72.0598, "step": 82870 }, { "epoch": 0.3348456873669283, "grad_norm": 4985.6357421875, "learning_rate": 4.2270798419123534e-05, "loss": 66.5251, "step": 82880 }, { "epoch": 0.33488608863229596, "grad_norm": 477.78289794921875, "learning_rate": 4.2268274451006506e-05, "loss": 116.5259, "step": 82890 }, { "epoch": 0.3349264898976636, "grad_norm": 1549.6126708984375, "learning_rate": 4.226575014623557e-05, "loss": 49.6759, "step": 82900 }, { "epoch": 0.33496689116303124, "grad_norm": 416.32879638671875, "learning_rate": 4.2263225504859955e-05, "loss": 69.3102, "step": 82910 }, { "epoch": 0.3350072924283989, "grad_norm": 575.8262939453125, "learning_rate": 4.226070052692886e-05, "loss": 54.391, "step": 82920 }, { "epoch": 0.33504769369376647, "grad_norm": 318.236572265625, "learning_rate": 4.2258175212491537e-05, "loss": 58.843, "step": 82930 }, { "epoch": 0.3350880949591341, "grad_norm": 439.44488525390625, "learning_rate": 4.2255649561597186e-05, "loss": 62.2686, "step": 82940 }, { "epoch": 0.33512849622450175, "grad_norm": 857.3777465820312, "learning_rate": 4.225312357429508e-05, "loss": 68.3515, "step": 82950 }, { "epoch": 0.3351688974898694, "grad_norm": 1923.6248779296875, "learning_rate": 4.225059725063444e-05, "loss": 75.4591, "step": 82960 }, { "epoch": 0.33520929875523703, "grad_norm": 1054.3583984375, "learning_rate": 4.2248070590664525e-05, "loss": 57.7459, "step": 82970 }, { "epoch": 0.33524970002060467, "grad_norm": 974.0528564453125, "learning_rate": 4.224554359443459e-05, "loss": 52.0075, "step": 82980 }, { "epoch": 0.33529010128597225, "grad_norm": 1275.845947265625, "learning_rate": 4.22430162619939e-05, "loss": 94.6089, "step": 82990 }, { "epoch": 0.3353305025513399, "grad_norm": 1231.898681640625, "learning_rate": 4.224048859339175e-05, "loss": 65.2636, "step": 83000 }, { "epoch": 0.33537090381670753, "grad_norm": 592.545166015625, "learning_rate": 4.223796058867738e-05, "loss": 74.3102, "step": 83010 }, { "epoch": 0.3354113050820752, "grad_norm": 570.7125244140625, "learning_rate": 4.22354322479001e-05, "loss": 77.6862, "step": 83020 }, { "epoch": 0.3354517063474428, "grad_norm": 814.4917602539062, "learning_rate": 4.22329035711092e-05, "loss": 78.4505, "step": 83030 }, { "epoch": 0.33549210761281045, "grad_norm": 1354.454345703125, "learning_rate": 4.223037455835397e-05, "loss": 56.5575, "step": 83040 }, { "epoch": 0.33553250887817804, "grad_norm": 1030.1226806640625, "learning_rate": 4.2227845209683716e-05, "loss": 53.6785, "step": 83050 }, { "epoch": 0.3355729101435457, "grad_norm": 1466.65966796875, "learning_rate": 4.222531552514775e-05, "loss": 68.2507, "step": 83060 }, { "epoch": 0.3356133114089133, "grad_norm": 705.6240844726562, "learning_rate": 4.2222785504795394e-05, "loss": 92.5086, "step": 83070 }, { "epoch": 0.33565371267428096, "grad_norm": 703.1721801757812, "learning_rate": 4.2220255148675956e-05, "loss": 59.856, "step": 83080 }, { "epoch": 0.3356941139396486, "grad_norm": 523.6635131835938, "learning_rate": 4.221772445683878e-05, "loss": 93.9165, "step": 83090 }, { "epoch": 0.33573451520501624, "grad_norm": 915.0527954101562, "learning_rate": 4.221519342933321e-05, "loss": 62.3113, "step": 83100 }, { "epoch": 0.3357749164703839, "grad_norm": 5522.94091796875, "learning_rate": 4.221266206620859e-05, "loss": 62.9414, "step": 83110 }, { "epoch": 0.33581531773575146, "grad_norm": 619.2235717773438, "learning_rate": 4.221013036751424e-05, "loss": 51.0544, "step": 83120 }, { "epoch": 0.3358557190011191, "grad_norm": 860.1320190429688, "learning_rate": 4.220759833329955e-05, "loss": 57.2342, "step": 83130 }, { "epoch": 0.33589612026648674, "grad_norm": 1275.2235107421875, "learning_rate": 4.2205065963613864e-05, "loss": 72.2249, "step": 83140 }, { "epoch": 0.3359365215318544, "grad_norm": 1409.1829833984375, "learning_rate": 4.220253325850657e-05, "loss": 75.4809, "step": 83150 }, { "epoch": 0.335976922797222, "grad_norm": 155.7539825439453, "learning_rate": 4.220000021802702e-05, "loss": 66.316, "step": 83160 }, { "epoch": 0.33601732406258966, "grad_norm": 519.6751098632812, "learning_rate": 4.219746684222462e-05, "loss": 46.7393, "step": 83170 }, { "epoch": 0.33605772532795725, "grad_norm": 3975.830078125, "learning_rate": 4.219493313114875e-05, "loss": 113.0992, "step": 83180 }, { "epoch": 0.3360981265933249, "grad_norm": 741.3836059570312, "learning_rate": 4.219239908484881e-05, "loss": 62.3929, "step": 83190 }, { "epoch": 0.3361385278586925, "grad_norm": 1000.0194091796875, "learning_rate": 4.218986470337419e-05, "loss": 63.8103, "step": 83200 }, { "epoch": 0.33617892912406017, "grad_norm": 915.8265380859375, "learning_rate": 4.21873299867743e-05, "loss": 64.819, "step": 83210 }, { "epoch": 0.3362193303894278, "grad_norm": 1064.971923828125, "learning_rate": 4.218479493509858e-05, "loss": 75.7704, "step": 83220 }, { "epoch": 0.33625973165479545, "grad_norm": 381.1853942871094, "learning_rate": 4.218225954839643e-05, "loss": 44.5471, "step": 83230 }, { "epoch": 0.3363001329201631, "grad_norm": 2388.5380859375, "learning_rate": 4.217972382671729e-05, "loss": 56.8206, "step": 83240 }, { "epoch": 0.33634053418553067, "grad_norm": 2224.6982421875, "learning_rate": 4.2177187770110576e-05, "loss": 75.5993, "step": 83250 }, { "epoch": 0.3363809354508983, "grad_norm": 584.9492797851562, "learning_rate": 4.2174651378625754e-05, "loss": 76.4168, "step": 83260 }, { "epoch": 0.33642133671626595, "grad_norm": 576.220947265625, "learning_rate": 4.217211465231226e-05, "loss": 69.955, "step": 83270 }, { "epoch": 0.3364617379816336, "grad_norm": 419.0504455566406, "learning_rate": 4.2169577591219545e-05, "loss": 61.2648, "step": 83280 }, { "epoch": 0.33650213924700123, "grad_norm": 649.897705078125, "learning_rate": 4.216704019539707e-05, "loss": 50.4646, "step": 83290 }, { "epoch": 0.33654254051236887, "grad_norm": 1460.0035400390625, "learning_rate": 4.2164502464894316e-05, "loss": 66.0741, "step": 83300 }, { "epoch": 0.33658294177773646, "grad_norm": 618.8856201171875, "learning_rate": 4.216196439976076e-05, "loss": 57.7886, "step": 83310 }, { "epoch": 0.3366233430431041, "grad_norm": 540.6898193359375, "learning_rate": 4.2159426000045854e-05, "loss": 62.811, "step": 83320 }, { "epoch": 0.33666374430847174, "grad_norm": 629.9652099609375, "learning_rate": 4.215688726579911e-05, "loss": 61.0531, "step": 83330 }, { "epoch": 0.3367041455738394, "grad_norm": 3191.65185546875, "learning_rate": 4.2154348197070017e-05, "loss": 70.6365, "step": 83340 }, { "epoch": 0.336744546839207, "grad_norm": 483.31396484375, "learning_rate": 4.215180879390808e-05, "loss": 71.8596, "step": 83350 }, { "epoch": 0.33678494810457466, "grad_norm": 633.4132080078125, "learning_rate": 4.2149269056362794e-05, "loss": 65.0201, "step": 83360 }, { "epoch": 0.33682534936994224, "grad_norm": 1588.8582763671875, "learning_rate": 4.214672898448367e-05, "loss": 81.9684, "step": 83370 }, { "epoch": 0.3368657506353099, "grad_norm": 1885.4788818359375, "learning_rate": 4.214418857832025e-05, "loss": 81.2159, "step": 83380 }, { "epoch": 0.3369061519006775, "grad_norm": 2098.720947265625, "learning_rate": 4.214164783792205e-05, "loss": 78.4835, "step": 83390 }, { "epoch": 0.33694655316604516, "grad_norm": 934.3864135742188, "learning_rate": 4.213910676333859e-05, "loss": 51.1806, "step": 83400 }, { "epoch": 0.3369869544314128, "grad_norm": 561.3240356445312, "learning_rate": 4.213656535461942e-05, "loss": 83.0985, "step": 83410 }, { "epoch": 0.33702735569678044, "grad_norm": 517.56982421875, "learning_rate": 4.213402361181409e-05, "loss": 52.2328, "step": 83420 }, { "epoch": 0.3370677569621481, "grad_norm": 501.65325927734375, "learning_rate": 4.213148153497215e-05, "loss": 45.5168, "step": 83430 }, { "epoch": 0.33710815822751566, "grad_norm": 619.2743530273438, "learning_rate": 4.212893912414316e-05, "loss": 54.3897, "step": 83440 }, { "epoch": 0.3371485594928833, "grad_norm": 281.7502746582031, "learning_rate": 4.212639637937668e-05, "loss": 48.6456, "step": 83450 }, { "epoch": 0.33718896075825094, "grad_norm": 1214.2874755859375, "learning_rate": 4.212385330072228e-05, "loss": 80.0758, "step": 83460 }, { "epoch": 0.3372293620236186, "grad_norm": 639.830322265625, "learning_rate": 4.2121309888229544e-05, "loss": 64.5304, "step": 83470 }, { "epoch": 0.3372697632889862, "grad_norm": 823.2178955078125, "learning_rate": 4.2118766141948066e-05, "loss": 52.8296, "step": 83480 }, { "epoch": 0.33731016455435386, "grad_norm": 429.5492858886719, "learning_rate": 4.211622206192742e-05, "loss": 63.3038, "step": 83490 }, { "epoch": 0.33735056581972145, "grad_norm": 0.0, "learning_rate": 4.211367764821722e-05, "loss": 82.3349, "step": 83500 }, { "epoch": 0.3373909670850891, "grad_norm": 974.5023803710938, "learning_rate": 4.211113290086706e-05, "loss": 43.6207, "step": 83510 }, { "epoch": 0.33743136835045673, "grad_norm": 804.0608520507812, "learning_rate": 4.2108587819926554e-05, "loss": 66.1226, "step": 83520 }, { "epoch": 0.33747176961582437, "grad_norm": 269.8977355957031, "learning_rate": 4.2106042405445325e-05, "loss": 82.0022, "step": 83530 }, { "epoch": 0.337512170881192, "grad_norm": 1011.4495849609375, "learning_rate": 4.210349665747299e-05, "loss": 62.9397, "step": 83540 }, { "epoch": 0.33755257214655965, "grad_norm": 568.0720825195312, "learning_rate": 4.210095057605917e-05, "loss": 80.0488, "step": 83550 }, { "epoch": 0.3375929734119273, "grad_norm": 3479.609375, "learning_rate": 4.209840416125353e-05, "loss": 88.0878, "step": 83560 }, { "epoch": 0.3376333746772949, "grad_norm": 0.0, "learning_rate": 4.20958574131057e-05, "loss": 100.4558, "step": 83570 }, { "epoch": 0.3376737759426625, "grad_norm": 2387.772216796875, "learning_rate": 4.209331033166531e-05, "loss": 77.3918, "step": 83580 }, { "epoch": 0.33771417720803015, "grad_norm": 710.8726806640625, "learning_rate": 4.209076291698205e-05, "loss": 53.3156, "step": 83590 }, { "epoch": 0.3377545784733978, "grad_norm": 847.5494384765625, "learning_rate": 4.208821516910557e-05, "loss": 64.4423, "step": 83600 }, { "epoch": 0.33779497973876543, "grad_norm": 540.9591674804688, "learning_rate": 4.208566708808554e-05, "loss": 54.3956, "step": 83610 }, { "epoch": 0.3378353810041331, "grad_norm": 426.9322814941406, "learning_rate": 4.2083118673971613e-05, "loss": 80.7343, "step": 83620 }, { "epoch": 0.33787578226950066, "grad_norm": 530.9072875976562, "learning_rate": 4.2080569926813503e-05, "loss": 62.1767, "step": 83630 }, { "epoch": 0.3379161835348683, "grad_norm": 3503.007080078125, "learning_rate": 4.20780208466609e-05, "loss": 95.5601, "step": 83640 }, { "epoch": 0.33795658480023594, "grad_norm": 764.01708984375, "learning_rate": 4.207547143356347e-05, "loss": 44.9169, "step": 83650 }, { "epoch": 0.3379969860656036, "grad_norm": 898.1088256835938, "learning_rate": 4.207292168757095e-05, "loss": 42.9797, "step": 83660 }, { "epoch": 0.3380373873309712, "grad_norm": 616.1493530273438, "learning_rate": 4.2070371608733025e-05, "loss": 55.619, "step": 83670 }, { "epoch": 0.33807778859633886, "grad_norm": 1211.4007568359375, "learning_rate": 4.206782119709942e-05, "loss": 92.8507, "step": 83680 }, { "epoch": 0.33811818986170644, "grad_norm": 1338.9266357421875, "learning_rate": 4.206527045271985e-05, "loss": 60.5956, "step": 83690 }, { "epoch": 0.3381585911270741, "grad_norm": 5582.28515625, "learning_rate": 4.206271937564405e-05, "loss": 60.9006, "step": 83700 }, { "epoch": 0.3381989923924417, "grad_norm": 1023.3121337890625, "learning_rate": 4.206016796592174e-05, "loss": 69.0761, "step": 83710 }, { "epoch": 0.33823939365780936, "grad_norm": 665.3640747070312, "learning_rate": 4.2057616223602684e-05, "loss": 63.9492, "step": 83720 }, { "epoch": 0.338279794923177, "grad_norm": 360.83563232421875, "learning_rate": 4.205506414873661e-05, "loss": 40.3897, "step": 83730 }, { "epoch": 0.33832019618854464, "grad_norm": 1596.3583984375, "learning_rate": 4.205251174137329e-05, "loss": 72.7595, "step": 83740 }, { "epoch": 0.3383605974539123, "grad_norm": 361.8247985839844, "learning_rate": 4.2049959001562464e-05, "loss": 98.5569, "step": 83750 }, { "epoch": 0.33840099871927987, "grad_norm": 818.7006225585938, "learning_rate": 4.204740592935392e-05, "loss": 88.2147, "step": 83760 }, { "epoch": 0.3384413999846475, "grad_norm": 434.5075378417969, "learning_rate": 4.2044852524797406e-05, "loss": 104.2029, "step": 83770 }, { "epoch": 0.33848180125001515, "grad_norm": 722.4409790039062, "learning_rate": 4.204229878794273e-05, "loss": 48.2412, "step": 83780 }, { "epoch": 0.3385222025153828, "grad_norm": 328.6600036621094, "learning_rate": 4.203974471883966e-05, "loss": 50.057, "step": 83790 }, { "epoch": 0.3385626037807504, "grad_norm": 381.5543518066406, "learning_rate": 4.2037190317538e-05, "loss": 62.9453, "step": 83800 }, { "epoch": 0.33860300504611807, "grad_norm": 1003.5863037109375, "learning_rate": 4.2034635584087535e-05, "loss": 97.3556, "step": 83810 }, { "epoch": 0.33864340631148565, "grad_norm": 257.1349182128906, "learning_rate": 4.203208051853808e-05, "loss": 65.2049, "step": 83820 }, { "epoch": 0.3386838075768533, "grad_norm": 542.9520263671875, "learning_rate": 4.202952512093945e-05, "loss": 60.1434, "step": 83830 }, { "epoch": 0.33872420884222093, "grad_norm": 742.0493774414062, "learning_rate": 4.202696939134146e-05, "loss": 75.3315, "step": 83840 }, { "epoch": 0.33876461010758857, "grad_norm": 839.355712890625, "learning_rate": 4.202441332979394e-05, "loss": 53.2438, "step": 83850 }, { "epoch": 0.3388050113729562, "grad_norm": 616.1432495117188, "learning_rate": 4.20218569363467e-05, "loss": 64.6932, "step": 83860 }, { "epoch": 0.33884541263832385, "grad_norm": 956.6621704101562, "learning_rate": 4.2019300211049615e-05, "loss": 66.2041, "step": 83870 }, { "epoch": 0.3388858139036915, "grad_norm": 1587.0950927734375, "learning_rate": 4.2016743153952505e-05, "loss": 72.81, "step": 83880 }, { "epoch": 0.3389262151690591, "grad_norm": 1762.2025146484375, "learning_rate": 4.201418576510523e-05, "loss": 55.4421, "step": 83890 }, { "epoch": 0.3389666164344267, "grad_norm": 874.0090942382812, "learning_rate": 4.201162804455763e-05, "loss": 62.957, "step": 83900 }, { "epoch": 0.33900701769979436, "grad_norm": 457.5177307128906, "learning_rate": 4.2009069992359595e-05, "loss": 100.793, "step": 83910 }, { "epoch": 0.339047418965162, "grad_norm": 469.1245422363281, "learning_rate": 4.200651160856098e-05, "loss": 66.5053, "step": 83920 }, { "epoch": 0.33908782023052964, "grad_norm": 394.78167724609375, "learning_rate": 4.200395289321167e-05, "loss": 35.0095, "step": 83930 }, { "epoch": 0.3391282214958973, "grad_norm": 1044.5858154296875, "learning_rate": 4.2001393846361536e-05, "loss": 99.2867, "step": 83940 }, { "epoch": 0.33916862276126486, "grad_norm": 793.2132568359375, "learning_rate": 4.199883446806048e-05, "loss": 31.189, "step": 83950 }, { "epoch": 0.3392090240266325, "grad_norm": 441.0299072265625, "learning_rate": 4.19962747583584e-05, "loss": 78.0899, "step": 83960 }, { "epoch": 0.33924942529200014, "grad_norm": 1687.0687255859375, "learning_rate": 4.1993714717305185e-05, "loss": 110.2709, "step": 83970 }, { "epoch": 0.3392898265573678, "grad_norm": 645.5243530273438, "learning_rate": 4.199115434495076e-05, "loss": 70.5179, "step": 83980 }, { "epoch": 0.3393302278227354, "grad_norm": 889.2882690429688, "learning_rate": 4.1988593641345024e-05, "loss": 59.327, "step": 83990 }, { "epoch": 0.33937062908810306, "grad_norm": 545.3060913085938, "learning_rate": 4.198603260653792e-05, "loss": 62.0888, "step": 84000 }, { "epoch": 0.33941103035347064, "grad_norm": 437.35064697265625, "learning_rate": 4.1983471240579356e-05, "loss": 48.4227, "step": 84010 }, { "epoch": 0.3394514316188383, "grad_norm": 662.9463500976562, "learning_rate": 4.198090954351928e-05, "loss": 80.6067, "step": 84020 }, { "epoch": 0.3394918328842059, "grad_norm": 952.4712524414062, "learning_rate": 4.197834751540762e-05, "loss": 84.5905, "step": 84030 }, { "epoch": 0.33953223414957356, "grad_norm": 781.6859130859375, "learning_rate": 4.197578515629435e-05, "loss": 60.8523, "step": 84040 }, { "epoch": 0.3395726354149412, "grad_norm": 975.0667724609375, "learning_rate": 4.1973222466229404e-05, "loss": 54.1651, "step": 84050 }, { "epoch": 0.33961303668030884, "grad_norm": 2085.77392578125, "learning_rate": 4.197065944526275e-05, "loss": 60.0403, "step": 84060 }, { "epoch": 0.3396534379456765, "grad_norm": 599.7440185546875, "learning_rate": 4.196809609344434e-05, "loss": 56.4642, "step": 84070 }, { "epoch": 0.33969383921104407, "grad_norm": 1469.0364990234375, "learning_rate": 4.196553241082418e-05, "loss": 57.0351, "step": 84080 }, { "epoch": 0.3397342404764117, "grad_norm": 579.5526123046875, "learning_rate": 4.1962968397452216e-05, "loss": 58.5323, "step": 84090 }, { "epoch": 0.33977464174177935, "grad_norm": 639.9765625, "learning_rate": 4.1960404053378454e-05, "loss": 65.8241, "step": 84100 }, { "epoch": 0.339815043007147, "grad_norm": 1099.93017578125, "learning_rate": 4.1957839378652886e-05, "loss": 81.618, "step": 84110 }, { "epoch": 0.33985544427251463, "grad_norm": 490.1031494140625, "learning_rate": 4.1955274373325506e-05, "loss": 71.3405, "step": 84120 }, { "epoch": 0.33989584553788227, "grad_norm": 2551.330322265625, "learning_rate": 4.1952709037446324e-05, "loss": 63.7435, "step": 84130 }, { "epoch": 0.33993624680324985, "grad_norm": 930.4701538085938, "learning_rate": 4.1950143371065355e-05, "loss": 58.0226, "step": 84140 }, { "epoch": 0.3399766480686175, "grad_norm": 856.0897216796875, "learning_rate": 4.194757737423261e-05, "loss": 58.936, "step": 84150 }, { "epoch": 0.34001704933398513, "grad_norm": 592.7817993164062, "learning_rate": 4.194501104699812e-05, "loss": 55.8844, "step": 84160 }, { "epoch": 0.3400574505993528, "grad_norm": 1155.2713623046875, "learning_rate": 4.194244438941192e-05, "loss": 62.0951, "step": 84170 }, { "epoch": 0.3400978518647204, "grad_norm": 353.23583984375, "learning_rate": 4.193987740152404e-05, "loss": 59.3539, "step": 84180 }, { "epoch": 0.34013825313008805, "grad_norm": 1153.6064453125, "learning_rate": 4.193731008338453e-05, "loss": 73.7305, "step": 84190 }, { "epoch": 0.3401786543954557, "grad_norm": 524.4332885742188, "learning_rate": 4.193474243504343e-05, "loss": 77.0145, "step": 84200 }, { "epoch": 0.3402190556608233, "grad_norm": 718.3153686523438, "learning_rate": 4.193217445655082e-05, "loss": 61.5601, "step": 84210 }, { "epoch": 0.3402594569261909, "grad_norm": 1005.5006713867188, "learning_rate": 4.192960614795675e-05, "loss": 53.4194, "step": 84220 }, { "epoch": 0.34029985819155856, "grad_norm": 1067.0313720703125, "learning_rate": 4.192703750931129e-05, "loss": 62.9559, "step": 84230 }, { "epoch": 0.3403402594569262, "grad_norm": 794.4739990234375, "learning_rate": 4.192446854066452e-05, "loss": 46.9849, "step": 84240 }, { "epoch": 0.34038066072229384, "grad_norm": 821.4888305664062, "learning_rate": 4.192189924206652e-05, "loss": 54.5941, "step": 84250 }, { "epoch": 0.3404210619876615, "grad_norm": 876.5982666015625, "learning_rate": 4.191932961356739e-05, "loss": 91.2899, "step": 84260 }, { "epoch": 0.34046146325302906, "grad_norm": 780.8380737304688, "learning_rate": 4.1916759655217206e-05, "loss": 53.8854, "step": 84270 }, { "epoch": 0.3405018645183967, "grad_norm": 2006.7110595703125, "learning_rate": 4.1914189367066094e-05, "loss": 64.6178, "step": 84280 }, { "epoch": 0.34054226578376434, "grad_norm": 663.4932861328125, "learning_rate": 4.191161874916415e-05, "loss": 71.5971, "step": 84290 }, { "epoch": 0.340582667049132, "grad_norm": 1891.99658203125, "learning_rate": 4.1909047801561484e-05, "loss": 87.5498, "step": 84300 }, { "epoch": 0.3406230683144996, "grad_norm": 483.8343811035156, "learning_rate": 4.1906476524308235e-05, "loss": 87.025, "step": 84310 }, { "epoch": 0.34066346957986726, "grad_norm": 1536.576171875, "learning_rate": 4.1903904917454516e-05, "loss": 88.3288, "step": 84320 }, { "epoch": 0.34070387084523485, "grad_norm": 708.4118041992188, "learning_rate": 4.190133298105047e-05, "loss": 59.6797, "step": 84330 }, { "epoch": 0.3407442721106025, "grad_norm": 837.2510986328125, "learning_rate": 4.189876071514624e-05, "loss": 60.1374, "step": 84340 }, { "epoch": 0.3407846733759701, "grad_norm": 2082.153564453125, "learning_rate": 4.189618811979197e-05, "loss": 65.0576, "step": 84350 }, { "epoch": 0.34082507464133777, "grad_norm": 1336.408447265625, "learning_rate": 4.18936151950378e-05, "loss": 76.66, "step": 84360 }, { "epoch": 0.3408654759067054, "grad_norm": 2176.240234375, "learning_rate": 4.189104194093392e-05, "loss": 84.4399, "step": 84370 }, { "epoch": 0.34090587717207305, "grad_norm": 602.0006103515625, "learning_rate": 4.1888468357530476e-05, "loss": 68.7765, "step": 84380 }, { "epoch": 0.3409462784374407, "grad_norm": 614.801513671875, "learning_rate": 4.188589444487765e-05, "loss": 63.4444, "step": 84390 }, { "epoch": 0.34098667970280827, "grad_norm": 1069.0560302734375, "learning_rate": 4.188332020302561e-05, "loss": 65.1551, "step": 84400 }, { "epoch": 0.3410270809681759, "grad_norm": 667.7435913085938, "learning_rate": 4.1880745632024554e-05, "loss": 62.9196, "step": 84410 }, { "epoch": 0.34106748223354355, "grad_norm": 668.0552368164062, "learning_rate": 4.187817073192468e-05, "loss": 40.3692, "step": 84420 }, { "epoch": 0.3411078834989112, "grad_norm": 943.7819213867188, "learning_rate": 4.187559550277617e-05, "loss": 57.6345, "step": 84430 }, { "epoch": 0.34114828476427883, "grad_norm": 314.5076599121094, "learning_rate": 4.187301994462924e-05, "loss": 59.5329, "step": 84440 }, { "epoch": 0.34118868602964647, "grad_norm": 759.6746826171875, "learning_rate": 4.1870444057534095e-05, "loss": 58.5424, "step": 84450 }, { "epoch": 0.34122908729501406, "grad_norm": 802.6051635742188, "learning_rate": 4.1867867841540964e-05, "loss": 58.658, "step": 84460 }, { "epoch": 0.3412694885603817, "grad_norm": 1856.4595947265625, "learning_rate": 4.186529129670006e-05, "loss": 88.8393, "step": 84470 }, { "epoch": 0.34130988982574934, "grad_norm": 921.8723754882812, "learning_rate": 4.1862714423061624e-05, "loss": 62.6543, "step": 84480 }, { "epoch": 0.341350291091117, "grad_norm": 756.6406860351562, "learning_rate": 4.186013722067588e-05, "loss": 70.8543, "step": 84490 }, { "epoch": 0.3413906923564846, "grad_norm": 2013.621826171875, "learning_rate": 4.185755968959308e-05, "loss": 62.2787, "step": 84500 }, { "epoch": 0.34143109362185226, "grad_norm": 831.4722900390625, "learning_rate": 4.185498182986349e-05, "loss": 42.0386, "step": 84510 }, { "epoch": 0.3414714948872199, "grad_norm": 723.7086181640625, "learning_rate": 4.185240364153734e-05, "loss": 67.1348, "step": 84520 }, { "epoch": 0.3415118961525875, "grad_norm": 754.1615600585938, "learning_rate": 4.184982512466491e-05, "loss": 80.2152, "step": 84530 }, { "epoch": 0.3415522974179551, "grad_norm": 326.5327453613281, "learning_rate": 4.1847246279296464e-05, "loss": 81.1519, "step": 84540 }, { "epoch": 0.34159269868332276, "grad_norm": 528.33935546875, "learning_rate": 4.184466710548227e-05, "loss": 64.1323, "step": 84550 }, { "epoch": 0.3416330999486904, "grad_norm": 682.2901611328125, "learning_rate": 4.184208760327263e-05, "loss": 64.297, "step": 84560 }, { "epoch": 0.34167350121405804, "grad_norm": 516.5701904296875, "learning_rate": 4.183950777271781e-05, "loss": 54.8298, "step": 84570 }, { "epoch": 0.3417139024794257, "grad_norm": 674.0258178710938, "learning_rate": 4.183692761386813e-05, "loss": 66.7959, "step": 84580 }, { "epoch": 0.34175430374479326, "grad_norm": 317.5458984375, "learning_rate": 4.183434712677387e-05, "loss": 43.3866, "step": 84590 }, { "epoch": 0.3417947050101609, "grad_norm": 1114.966064453125, "learning_rate": 4.183176631148534e-05, "loss": 63.7891, "step": 84600 }, { "epoch": 0.34183510627552854, "grad_norm": 887.722412109375, "learning_rate": 4.1829185168052877e-05, "loss": 87.129, "step": 84610 }, { "epoch": 0.3418755075408962, "grad_norm": 735.17822265625, "learning_rate": 4.182660369652677e-05, "loss": 52.123, "step": 84620 }, { "epoch": 0.3419159088062638, "grad_norm": 470.5625305175781, "learning_rate": 4.182402189695736e-05, "loss": 82.246, "step": 84630 }, { "epoch": 0.34195631007163146, "grad_norm": 1080.039794921875, "learning_rate": 4.1821439769395e-05, "loss": 85.6906, "step": 84640 }, { "epoch": 0.34199671133699905, "grad_norm": 529.3787841796875, "learning_rate": 4.181885731388999e-05, "loss": 66.6203, "step": 84650 }, { "epoch": 0.3420371126023667, "grad_norm": 710.7547607421875, "learning_rate": 4.1816274530492713e-05, "loss": 64.3684, "step": 84660 }, { "epoch": 0.34207751386773433, "grad_norm": 1438.0509033203125, "learning_rate": 4.18136914192535e-05, "loss": 71.3715, "step": 84670 }, { "epoch": 0.34211791513310197, "grad_norm": 779.0742797851562, "learning_rate": 4.181110798022271e-05, "loss": 64.4421, "step": 84680 }, { "epoch": 0.3421583163984696, "grad_norm": 415.0228576660156, "learning_rate": 4.180852421345072e-05, "loss": 58.738, "step": 84690 }, { "epoch": 0.34219871766383725, "grad_norm": 530.8126220703125, "learning_rate": 4.180594011898791e-05, "loss": 64.9684, "step": 84700 }, { "epoch": 0.3422391189292049, "grad_norm": 553.169677734375, "learning_rate": 4.1803355696884625e-05, "loss": 58.3446, "step": 84710 }, { "epoch": 0.3422795201945725, "grad_norm": 766.584228515625, "learning_rate": 4.180077094719128e-05, "loss": 66.1009, "step": 84720 }, { "epoch": 0.3423199214599401, "grad_norm": 801.9248657226562, "learning_rate": 4.179818586995825e-05, "loss": 49.8386, "step": 84730 }, { "epoch": 0.34236032272530775, "grad_norm": 1037.901611328125, "learning_rate": 4.1795600465235947e-05, "loss": 79.0902, "step": 84740 }, { "epoch": 0.3424007239906754, "grad_norm": 871.1192626953125, "learning_rate": 4.179301473307476e-05, "loss": 89.6048, "step": 84750 }, { "epoch": 0.34244112525604303, "grad_norm": 257.207763671875, "learning_rate": 4.179042867352511e-05, "loss": 53.3181, "step": 84760 }, { "epoch": 0.3424815265214107, "grad_norm": 1137.479736328125, "learning_rate": 4.17878422866374e-05, "loss": 64.5483, "step": 84770 }, { "epoch": 0.34252192778677826, "grad_norm": 1104.8631591796875, "learning_rate": 4.1785255572462066e-05, "loss": 50.5376, "step": 84780 }, { "epoch": 0.3425623290521459, "grad_norm": 674.0195922851562, "learning_rate": 4.178266853104954e-05, "loss": 61.1695, "step": 84790 }, { "epoch": 0.34260273031751354, "grad_norm": 647.3651733398438, "learning_rate": 4.178008116245024e-05, "loss": 73.4516, "step": 84800 }, { "epoch": 0.3426431315828812, "grad_norm": 1168.34228515625, "learning_rate": 4.1777493466714624e-05, "loss": 62.1275, "step": 84810 }, { "epoch": 0.3426835328482488, "grad_norm": 2343.4892578125, "learning_rate": 4.177490544389313e-05, "loss": 64.6482, "step": 84820 }, { "epoch": 0.34272393411361646, "grad_norm": 817.9552001953125, "learning_rate": 4.177231709403622e-05, "loss": 64.0418, "step": 84830 }, { "epoch": 0.3427643353789841, "grad_norm": 753.8648681640625, "learning_rate": 4.176972841719435e-05, "loss": 57.1917, "step": 84840 }, { "epoch": 0.3428047366443517, "grad_norm": 510.9037780761719, "learning_rate": 4.1767139413418e-05, "loss": 54.1073, "step": 84850 }, { "epoch": 0.3428451379097193, "grad_norm": 641.147705078125, "learning_rate": 4.176455008275764e-05, "loss": 81.8066, "step": 84860 }, { "epoch": 0.34288553917508696, "grad_norm": 1563.3798828125, "learning_rate": 4.1761960425263735e-05, "loss": 71.83, "step": 84870 }, { "epoch": 0.3429259404404546, "grad_norm": 1007.089111328125, "learning_rate": 4.1759370440986775e-05, "loss": 65.9258, "step": 84880 }, { "epoch": 0.34296634170582224, "grad_norm": 694.405517578125, "learning_rate": 4.175678012997727e-05, "loss": 53.0368, "step": 84890 }, { "epoch": 0.3430067429711899, "grad_norm": 787.7047119140625, "learning_rate": 4.1754189492285714e-05, "loss": 78.8049, "step": 84900 }, { "epoch": 0.34304714423655747, "grad_norm": 1134.1265869140625, "learning_rate": 4.17515985279626e-05, "loss": 52.2765, "step": 84910 }, { "epoch": 0.3430875455019251, "grad_norm": 984.3347778320312, "learning_rate": 4.174900723705845e-05, "loss": 79.6157, "step": 84920 }, { "epoch": 0.34312794676729275, "grad_norm": 671.3541259765625, "learning_rate": 4.174641561962378e-05, "loss": 68.477, "step": 84930 }, { "epoch": 0.3431683480326604, "grad_norm": 870.8594970703125, "learning_rate": 4.174382367570912e-05, "loss": 60.4037, "step": 84940 }, { "epoch": 0.343208749298028, "grad_norm": 11791.9111328125, "learning_rate": 4.174123140536499e-05, "loss": 129.9077, "step": 84950 }, { "epoch": 0.34324915056339567, "grad_norm": 656.0787963867188, "learning_rate": 4.1738638808641936e-05, "loss": 68.1256, "step": 84960 }, { "epoch": 0.34328955182876325, "grad_norm": 549.08935546875, "learning_rate": 4.17360458855905e-05, "loss": 46.7033, "step": 84970 }, { "epoch": 0.3433299530941309, "grad_norm": 748.6014404296875, "learning_rate": 4.1733452636261244e-05, "loss": 37.716, "step": 84980 }, { "epoch": 0.34337035435949853, "grad_norm": 870.7546997070312, "learning_rate": 4.173085906070471e-05, "loss": 46.5251, "step": 84990 }, { "epoch": 0.34341075562486617, "grad_norm": 525.0897216796875, "learning_rate": 4.172826515897146e-05, "loss": 88.4015, "step": 85000 }, { "epoch": 0.3434511568902338, "grad_norm": 1008.6449584960938, "learning_rate": 4.172567093111207e-05, "loss": 69.9759, "step": 85010 }, { "epoch": 0.34349155815560145, "grad_norm": 694.2141723632812, "learning_rate": 4.172307637717711e-05, "loss": 64.9771, "step": 85020 }, { "epoch": 0.3435319594209691, "grad_norm": 601.18505859375, "learning_rate": 4.172048149721717e-05, "loss": 72.7544, "step": 85030 }, { "epoch": 0.3435723606863367, "grad_norm": 1000.3665771484375, "learning_rate": 4.171788629128284e-05, "loss": 79.2617, "step": 85040 }, { "epoch": 0.3436127619517043, "grad_norm": 380.0011901855469, "learning_rate": 4.1715290759424705e-05, "loss": 77.5077, "step": 85050 }, { "epoch": 0.34365316321707196, "grad_norm": 674.123779296875, "learning_rate": 4.1712694901693374e-05, "loss": 104.6444, "step": 85060 }, { "epoch": 0.3436935644824396, "grad_norm": 2173.708251953125, "learning_rate": 4.171009871813944e-05, "loss": 66.0904, "step": 85070 }, { "epoch": 0.34373396574780724, "grad_norm": 1144.228759765625, "learning_rate": 4.170750220881354e-05, "loss": 66.5176, "step": 85080 }, { "epoch": 0.3437743670131749, "grad_norm": 648.3857421875, "learning_rate": 4.1704905373766286e-05, "loss": 58.1772, "step": 85090 }, { "epoch": 0.34381476827854246, "grad_norm": 1063.3370361328125, "learning_rate": 4.170230821304829e-05, "loss": 56.7559, "step": 85100 }, { "epoch": 0.3438551695439101, "grad_norm": 704.5231323242188, "learning_rate": 4.169971072671021e-05, "loss": 68.8396, "step": 85110 }, { "epoch": 0.34389557080927774, "grad_norm": 857.602294921875, "learning_rate": 4.169711291480266e-05, "loss": 49.2389, "step": 85120 }, { "epoch": 0.3439359720746454, "grad_norm": 1297.68701171875, "learning_rate": 4.16945147773763e-05, "loss": 61.9332, "step": 85130 }, { "epoch": 0.343976373340013, "grad_norm": 603.5067138671875, "learning_rate": 4.169191631448178e-05, "loss": 83.4856, "step": 85140 }, { "epoch": 0.34401677460538066, "grad_norm": 541.5191040039062, "learning_rate": 4.1689317526169766e-05, "loss": 44.5568, "step": 85150 }, { "epoch": 0.34405717587074824, "grad_norm": 589.892578125, "learning_rate": 4.168671841249091e-05, "loss": 56.9382, "step": 85160 }, { "epoch": 0.3440975771361159, "grad_norm": 555.4111328125, "learning_rate": 4.168411897349588e-05, "loss": 83.5068, "step": 85170 }, { "epoch": 0.3441379784014835, "grad_norm": 547.9937133789062, "learning_rate": 4.168151920923536e-05, "loss": 34.5336, "step": 85180 }, { "epoch": 0.34417837966685116, "grad_norm": 1771.55078125, "learning_rate": 4.1678919119760054e-05, "loss": 90.6765, "step": 85190 }, { "epoch": 0.3442187809322188, "grad_norm": 696.42236328125, "learning_rate": 4.1676318705120616e-05, "loss": 59.9467, "step": 85200 }, { "epoch": 0.34425918219758644, "grad_norm": 877.6763916015625, "learning_rate": 4.167371796536777e-05, "loss": 95.2035, "step": 85210 }, { "epoch": 0.3442995834629541, "grad_norm": 1338.104248046875, "learning_rate": 4.1671116900552194e-05, "loss": 66.9077, "step": 85220 }, { "epoch": 0.34433998472832167, "grad_norm": 992.7899169921875, "learning_rate": 4.166851551072462e-05, "loss": 60.782, "step": 85230 }, { "epoch": 0.3443803859936893, "grad_norm": 1082.4932861328125, "learning_rate": 4.166591379593575e-05, "loss": 61.3283, "step": 85240 }, { "epoch": 0.34442078725905695, "grad_norm": 1248.29052734375, "learning_rate": 4.166331175623631e-05, "loss": 65.6705, "step": 85250 }, { "epoch": 0.3444611885244246, "grad_norm": 651.8319091796875, "learning_rate": 4.166070939167703e-05, "loss": 107.7256, "step": 85260 }, { "epoch": 0.34450158978979223, "grad_norm": 950.817138671875, "learning_rate": 4.165810670230865e-05, "loss": 53.1864, "step": 85270 }, { "epoch": 0.34454199105515987, "grad_norm": 871.7650756835938, "learning_rate": 4.16555036881819e-05, "loss": 97.4988, "step": 85280 }, { "epoch": 0.34458239232052745, "grad_norm": 581.1294555664062, "learning_rate": 4.1652900349347533e-05, "loss": 52.556, "step": 85290 }, { "epoch": 0.3446227935858951, "grad_norm": 919.3056640625, "learning_rate": 4.165029668585629e-05, "loss": 55.3191, "step": 85300 }, { "epoch": 0.34466319485126273, "grad_norm": 622.2424926757812, "learning_rate": 4.164769269775896e-05, "loss": 107.1292, "step": 85310 }, { "epoch": 0.3447035961166304, "grad_norm": 0.0, "learning_rate": 4.1645088385106266e-05, "loss": 51.1484, "step": 85320 }, { "epoch": 0.344743997381998, "grad_norm": 2131.662353515625, "learning_rate": 4.164248374794902e-05, "loss": 83.7406, "step": 85330 }, { "epoch": 0.34478439864736565, "grad_norm": 527.97509765625, "learning_rate": 4.163987878633798e-05, "loss": 57.3865, "step": 85340 }, { "epoch": 0.3448247999127333, "grad_norm": 460.5814208984375, "learning_rate": 4.163727350032394e-05, "loss": 35.4961, "step": 85350 }, { "epoch": 0.3448652011781009, "grad_norm": 1813.7447509765625, "learning_rate": 4.1634667889957676e-05, "loss": 93.811, "step": 85360 }, { "epoch": 0.3449056024434685, "grad_norm": 516.6041259765625, "learning_rate": 4.1632061955290017e-05, "loss": 66.2047, "step": 85370 }, { "epoch": 0.34494600370883616, "grad_norm": 1379.9407958984375, "learning_rate": 4.1629455696371734e-05, "loss": 62.7511, "step": 85380 }, { "epoch": 0.3449864049742038, "grad_norm": 573.064453125, "learning_rate": 4.162684911325365e-05, "loss": 68.7898, "step": 85390 }, { "epoch": 0.34502680623957144, "grad_norm": 620.5324096679688, "learning_rate": 4.162424220598658e-05, "loss": 62.7597, "step": 85400 }, { "epoch": 0.3450672075049391, "grad_norm": 522.501708984375, "learning_rate": 4.162163497462136e-05, "loss": 63.0242, "step": 85410 }, { "epoch": 0.34510760877030666, "grad_norm": 687.868896484375, "learning_rate": 4.161902741920881e-05, "loss": 68.5159, "step": 85420 }, { "epoch": 0.3451480100356743, "grad_norm": 842.2858276367188, "learning_rate": 4.1616419539799754e-05, "loss": 46.8133, "step": 85430 }, { "epoch": 0.34518841130104194, "grad_norm": 808.839111328125, "learning_rate": 4.161381133644505e-05, "loss": 58.4971, "step": 85440 }, { "epoch": 0.3452288125664096, "grad_norm": 2333.049072265625, "learning_rate": 4.161120280919554e-05, "loss": 99.8404, "step": 85450 }, { "epoch": 0.3452692138317772, "grad_norm": 667.53125, "learning_rate": 4.160859395810208e-05, "loss": 74.1404, "step": 85460 }, { "epoch": 0.34530961509714486, "grad_norm": 886.0628051757812, "learning_rate": 4.160598478321553e-05, "loss": 62.568, "step": 85470 }, { "epoch": 0.34535001636251245, "grad_norm": 421.406005859375, "learning_rate": 4.160337528458676e-05, "loss": 75.3051, "step": 85480 }, { "epoch": 0.3453904176278801, "grad_norm": 852.6929931640625, "learning_rate": 4.160076546226663e-05, "loss": 67.8079, "step": 85490 }, { "epoch": 0.3454308188932477, "grad_norm": 1610.6650390625, "learning_rate": 4.1598155316306044e-05, "loss": 67.7026, "step": 85500 }, { "epoch": 0.34547122015861537, "grad_norm": 740.6041259765625, "learning_rate": 4.1595544846755865e-05, "loss": 69.3296, "step": 85510 }, { "epoch": 0.345511621423983, "grad_norm": 713.525390625, "learning_rate": 4.1592934053667004e-05, "loss": 66.8102, "step": 85520 }, { "epoch": 0.34555202268935065, "grad_norm": 1433.9876708984375, "learning_rate": 4.1590322937090345e-05, "loss": 49.2965, "step": 85530 }, { "epoch": 0.3455924239547183, "grad_norm": 974.6792602539062, "learning_rate": 4.15877114970768e-05, "loss": 71.5766, "step": 85540 }, { "epoch": 0.34563282522008587, "grad_norm": 996.3238525390625, "learning_rate": 4.158509973367728e-05, "loss": 82.5995, "step": 85550 }, { "epoch": 0.3456732264854535, "grad_norm": 3350.908935546875, "learning_rate": 4.1582487646942706e-05, "loss": 86.8919, "step": 85560 }, { "epoch": 0.34571362775082115, "grad_norm": 1187.7080078125, "learning_rate": 4.157987523692399e-05, "loss": 59.8579, "step": 85570 }, { "epoch": 0.3457540290161888, "grad_norm": 713.9551391601562, "learning_rate": 4.157726250367207e-05, "loss": 61.2336, "step": 85580 }, { "epoch": 0.34579443028155643, "grad_norm": 932.2216186523438, "learning_rate": 4.157464944723789e-05, "loss": 72.413, "step": 85590 }, { "epoch": 0.34583483154692407, "grad_norm": 813.9993286132812, "learning_rate": 4.157203606767238e-05, "loss": 65.8375, "step": 85600 }, { "epoch": 0.34587523281229166, "grad_norm": 648.2496948242188, "learning_rate": 4.15694223650265e-05, "loss": 56.1922, "step": 85610 }, { "epoch": 0.3459156340776593, "grad_norm": 1841.92236328125, "learning_rate": 4.156680833935119e-05, "loss": 73.8934, "step": 85620 }, { "epoch": 0.34595603534302694, "grad_norm": 1668.5313720703125, "learning_rate": 4.156419399069744e-05, "loss": 98.3084, "step": 85630 }, { "epoch": 0.3459964366083946, "grad_norm": 451.40338134765625, "learning_rate": 4.156157931911619e-05, "loss": 41.3341, "step": 85640 }, { "epoch": 0.3460368378737622, "grad_norm": 740.665771484375, "learning_rate": 4.155896432465842e-05, "loss": 93.8652, "step": 85650 }, { "epoch": 0.34607723913912986, "grad_norm": 716.4408569335938, "learning_rate": 4.155634900737513e-05, "loss": 57.4022, "step": 85660 }, { "epoch": 0.3461176404044975, "grad_norm": 543.2172241210938, "learning_rate": 4.155373336731728e-05, "loss": 49.6871, "step": 85670 }, { "epoch": 0.3461580416698651, "grad_norm": 1185.981689453125, "learning_rate": 4.155111740453588e-05, "loss": 69.9553, "step": 85680 }, { "epoch": 0.3461984429352327, "grad_norm": 817.763671875, "learning_rate": 4.154850111908192e-05, "loss": 54.951, "step": 85690 }, { "epoch": 0.34623884420060036, "grad_norm": 793.4094848632812, "learning_rate": 4.154588451100642e-05, "loss": 98.9682, "step": 85700 }, { "epoch": 0.346279245465968, "grad_norm": 643.4264526367188, "learning_rate": 4.1543267580360374e-05, "loss": 52.4739, "step": 85710 }, { "epoch": 0.34631964673133564, "grad_norm": 1268.8812255859375, "learning_rate": 4.154065032719481e-05, "loss": 83.5729, "step": 85720 }, { "epoch": 0.3463600479967033, "grad_norm": 532.7398681640625, "learning_rate": 4.153803275156076e-05, "loss": 52.5248, "step": 85730 }, { "epoch": 0.34640044926207086, "grad_norm": 2376.361083984375, "learning_rate": 4.153541485350924e-05, "loss": 97.7747, "step": 85740 }, { "epoch": 0.3464408505274385, "grad_norm": 2388.05908203125, "learning_rate": 4.1532796633091296e-05, "loss": 77.3507, "step": 85750 }, { "epoch": 0.34648125179280614, "grad_norm": 625.3255004882812, "learning_rate": 4.1530178090357976e-05, "loss": 56.9691, "step": 85760 }, { "epoch": 0.3465216530581738, "grad_norm": 792.126953125, "learning_rate": 4.152755922536032e-05, "loss": 51.1804, "step": 85770 }, { "epoch": 0.3465620543235414, "grad_norm": 379.2236328125, "learning_rate": 4.1524940038149384e-05, "loss": 53.4376, "step": 85780 }, { "epoch": 0.34660245558890906, "grad_norm": 1024.7281494140625, "learning_rate": 4.152232052877624e-05, "loss": 101.2286, "step": 85790 }, { "epoch": 0.34664285685427665, "grad_norm": 1685.01611328125, "learning_rate": 4.1519700697291944e-05, "loss": 58.5797, "step": 85800 }, { "epoch": 0.3466832581196443, "grad_norm": 642.328857421875, "learning_rate": 4.1517080543747584e-05, "loss": 80.0744, "step": 85810 }, { "epoch": 0.34672365938501193, "grad_norm": 760.1344604492188, "learning_rate": 4.151446006819423e-05, "loss": 65.5687, "step": 85820 }, { "epoch": 0.34676406065037957, "grad_norm": 499.570556640625, "learning_rate": 4.151183927068298e-05, "loss": 57.8297, "step": 85830 }, { "epoch": 0.3468044619157472, "grad_norm": 1389.1029052734375, "learning_rate": 4.150921815126493e-05, "loss": 74.6479, "step": 85840 }, { "epoch": 0.34684486318111485, "grad_norm": 1510.54150390625, "learning_rate": 4.150659670999116e-05, "loss": 82.4179, "step": 85850 }, { "epoch": 0.3468852644464825, "grad_norm": 728.5758056640625, "learning_rate": 4.150397494691279e-05, "loss": 70.3151, "step": 85860 }, { "epoch": 0.3469256657118501, "grad_norm": 1455.7420654296875, "learning_rate": 4.150135286208093e-05, "loss": 73.2886, "step": 85870 }, { "epoch": 0.3469660669772177, "grad_norm": 328.35015869140625, "learning_rate": 4.149873045554671e-05, "loss": 42.4879, "step": 85880 }, { "epoch": 0.34700646824258535, "grad_norm": 434.59527587890625, "learning_rate": 4.1496107727361235e-05, "loss": 45.5354, "step": 85890 }, { "epoch": 0.347046869507953, "grad_norm": 3333.067138671875, "learning_rate": 4.149348467757566e-05, "loss": 98.8698, "step": 85900 }, { "epoch": 0.34708727077332063, "grad_norm": 605.552978515625, "learning_rate": 4.1490861306241096e-05, "loss": 74.4398, "step": 85910 }, { "epoch": 0.3471276720386883, "grad_norm": 1759.8612060546875, "learning_rate": 4.148823761340871e-05, "loss": 76.9145, "step": 85920 }, { "epoch": 0.34716807330405586, "grad_norm": 567.1038818359375, "learning_rate": 4.1485613599129636e-05, "loss": 40.8438, "step": 85930 }, { "epoch": 0.3472084745694235, "grad_norm": 3881.6103515625, "learning_rate": 4.148298926345504e-05, "loss": 66.3889, "step": 85940 }, { "epoch": 0.34724887583479114, "grad_norm": 621.2008666992188, "learning_rate": 4.148036460643608e-05, "loss": 59.3511, "step": 85950 }, { "epoch": 0.3472892771001588, "grad_norm": 468.0349426269531, "learning_rate": 4.1477739628123934e-05, "loss": 101.2036, "step": 85960 }, { "epoch": 0.3473296783655264, "grad_norm": 1429.98095703125, "learning_rate": 4.1475114328569776e-05, "loss": 56.234, "step": 85970 }, { "epoch": 0.34737007963089406, "grad_norm": 695.315673828125, "learning_rate": 4.147248870782477e-05, "loss": 54.0909, "step": 85980 }, { "epoch": 0.3474104808962617, "grad_norm": 1032.339111328125, "learning_rate": 4.146986276594012e-05, "loss": 73.7455, "step": 85990 }, { "epoch": 0.3474508821616293, "grad_norm": 353.8186950683594, "learning_rate": 4.146723650296701e-05, "loss": 55.2589, "step": 86000 }, { "epoch": 0.3474912834269969, "grad_norm": 1116.111572265625, "learning_rate": 4.146460991895666e-05, "loss": 58.2723, "step": 86010 }, { "epoch": 0.34753168469236456, "grad_norm": 416.8829040527344, "learning_rate": 4.1461983013960245e-05, "loss": 47.4193, "step": 86020 }, { "epoch": 0.3475720859577322, "grad_norm": 794.2473754882812, "learning_rate": 4.1459355788029013e-05, "loss": 67.6769, "step": 86030 }, { "epoch": 0.34761248722309984, "grad_norm": 727.6878051757812, "learning_rate": 4.145672824121416e-05, "loss": 84.4826, "step": 86040 }, { "epoch": 0.3476528884884675, "grad_norm": 2156.322265625, "learning_rate": 4.145410037356692e-05, "loss": 82.6664, "step": 86050 }, { "epoch": 0.34769328975383507, "grad_norm": 511.3225402832031, "learning_rate": 4.145147218513852e-05, "loss": 43.9569, "step": 86060 }, { "epoch": 0.3477336910192027, "grad_norm": 298.62005615234375, "learning_rate": 4.14488436759802e-05, "loss": 45.4608, "step": 86070 }, { "epoch": 0.34777409228457035, "grad_norm": 1355.5355224609375, "learning_rate": 4.144621484614319e-05, "loss": 124.826, "step": 86080 }, { "epoch": 0.347814493549938, "grad_norm": 1087.8583984375, "learning_rate": 4.1443585695678774e-05, "loss": 54.7751, "step": 86090 }, { "epoch": 0.3478548948153056, "grad_norm": 431.3752746582031, "learning_rate": 4.1440956224638184e-05, "loss": 63.882, "step": 86100 }, { "epoch": 0.34789529608067327, "grad_norm": 1415.1689453125, "learning_rate": 4.143832643307269e-05, "loss": 55.6841, "step": 86110 }, { "epoch": 0.34793569734604085, "grad_norm": 500.28759765625, "learning_rate": 4.1435696321033554e-05, "loss": 75.8191, "step": 86120 }, { "epoch": 0.3479760986114085, "grad_norm": 725.256591796875, "learning_rate": 4.143306588857206e-05, "loss": 68.6684, "step": 86130 }, { "epoch": 0.34801649987677613, "grad_norm": 1193.91357421875, "learning_rate": 4.143043513573949e-05, "loss": 68.4111, "step": 86140 }, { "epoch": 0.34805690114214377, "grad_norm": 311.3026428222656, "learning_rate": 4.1427804062587116e-05, "loss": 60.3588, "step": 86150 }, { "epoch": 0.3480973024075114, "grad_norm": 2180.667236328125, "learning_rate": 4.142517266916625e-05, "loss": 71.0806, "step": 86160 }, { "epoch": 0.34813770367287905, "grad_norm": 561.2362060546875, "learning_rate": 4.1422540955528186e-05, "loss": 65.3, "step": 86170 }, { "epoch": 0.3481781049382467, "grad_norm": 674.5610961914062, "learning_rate": 4.141990892172424e-05, "loss": 77.8949, "step": 86180 }, { "epoch": 0.3482185062036143, "grad_norm": 1787.229248046875, "learning_rate": 4.14172765678057e-05, "loss": 88.2457, "step": 86190 }, { "epoch": 0.3482589074689819, "grad_norm": 1689.93505859375, "learning_rate": 4.1414643893823914e-05, "loss": 63.5292, "step": 86200 }, { "epoch": 0.34829930873434956, "grad_norm": 1198.643310546875, "learning_rate": 4.141201089983019e-05, "loss": 57.5542, "step": 86210 }, { "epoch": 0.3483397099997172, "grad_norm": 301.3227233886719, "learning_rate": 4.1409377585875865e-05, "loss": 59.0172, "step": 86220 }, { "epoch": 0.34838011126508484, "grad_norm": 4092.246826171875, "learning_rate": 4.1406743952012275e-05, "loss": 72.4229, "step": 86230 }, { "epoch": 0.3484205125304525, "grad_norm": 2412.6689453125, "learning_rate": 4.140410999829076e-05, "loss": 118.2052, "step": 86240 }, { "epoch": 0.34846091379582006, "grad_norm": 649.2863159179688, "learning_rate": 4.140147572476268e-05, "loss": 78.7629, "step": 86250 }, { "epoch": 0.3485013150611877, "grad_norm": 634.6868286132812, "learning_rate": 4.1398841131479395e-05, "loss": 59.1645, "step": 86260 }, { "epoch": 0.34854171632655534, "grad_norm": 908.8325805664062, "learning_rate": 4.139620621849225e-05, "loss": 60.6888, "step": 86270 }, { "epoch": 0.348582117591923, "grad_norm": 1681.2099609375, "learning_rate": 4.139357098585262e-05, "loss": 82.6059, "step": 86280 }, { "epoch": 0.3486225188572906, "grad_norm": 743.29443359375, "learning_rate": 4.1390935433611886e-05, "loss": 86.1345, "step": 86290 }, { "epoch": 0.34866292012265826, "grad_norm": 906.7951049804688, "learning_rate": 4.138829956182144e-05, "loss": 63.9069, "step": 86300 }, { "epoch": 0.3487033213880259, "grad_norm": 821.8445434570312, "learning_rate": 4.138566337053264e-05, "loss": 64.0225, "step": 86310 }, { "epoch": 0.3487437226533935, "grad_norm": 1009.7755737304688, "learning_rate": 4.1383026859796905e-05, "loss": 71.5133, "step": 86320 }, { "epoch": 0.3487841239187611, "grad_norm": 260.2582702636719, "learning_rate": 4.138039002966563e-05, "loss": 52.0272, "step": 86330 }, { "epoch": 0.34882452518412876, "grad_norm": 847.3204956054688, "learning_rate": 4.137775288019021e-05, "loss": 59.5128, "step": 86340 }, { "epoch": 0.3488649264494964, "grad_norm": 414.3377380371094, "learning_rate": 4.137511541142207e-05, "loss": 55.5041, "step": 86350 }, { "epoch": 0.34890532771486404, "grad_norm": 2023.2427978515625, "learning_rate": 4.137247762341262e-05, "loss": 67.8408, "step": 86360 }, { "epoch": 0.3489457289802317, "grad_norm": 1327.11767578125, "learning_rate": 4.136983951621329e-05, "loss": 50.12, "step": 86370 }, { "epoch": 0.34898613024559927, "grad_norm": 608.749267578125, "learning_rate": 4.136720108987552e-05, "loss": 61.7877, "step": 86380 }, { "epoch": 0.3490265315109669, "grad_norm": 864.80322265625, "learning_rate": 4.136456234445073e-05, "loss": 72.582, "step": 86390 }, { "epoch": 0.34906693277633455, "grad_norm": 749.7403564453125, "learning_rate": 4.136192327999037e-05, "loss": 68.246, "step": 86400 }, { "epoch": 0.3491073340417022, "grad_norm": 1620.177734375, "learning_rate": 4.1359283896545895e-05, "loss": 94.8168, "step": 86410 }, { "epoch": 0.34914773530706983, "grad_norm": 2137.778564453125, "learning_rate": 4.135664419416877e-05, "loss": 54.6499, "step": 86420 }, { "epoch": 0.34918813657243747, "grad_norm": 355.5881042480469, "learning_rate": 4.1354004172910434e-05, "loss": 71.6114, "step": 86430 }, { "epoch": 0.34922853783780505, "grad_norm": 579.06005859375, "learning_rate": 4.135136383282237e-05, "loss": 83.3694, "step": 86440 }, { "epoch": 0.3492689391031727, "grad_norm": 363.8780822753906, "learning_rate": 4.134872317395604e-05, "loss": 71.6161, "step": 86450 }, { "epoch": 0.34930934036854033, "grad_norm": 734.9833984375, "learning_rate": 4.134608219636294e-05, "loss": 63.2569, "step": 86460 }, { "epoch": 0.349349741633908, "grad_norm": 659.5712890625, "learning_rate": 4.134344090009455e-05, "loss": 59.2688, "step": 86470 }, { "epoch": 0.3493901428992756, "grad_norm": 658.69677734375, "learning_rate": 4.1340799285202376e-05, "loss": 49.0295, "step": 86480 }, { "epoch": 0.34943054416464325, "grad_norm": 687.8778076171875, "learning_rate": 4.13381573517379e-05, "loss": 84.1034, "step": 86490 }, { "epoch": 0.3494709454300109, "grad_norm": 576.1513061523438, "learning_rate": 4.133551509975264e-05, "loss": 46.6568, "step": 86500 }, { "epoch": 0.3495113466953785, "grad_norm": 435.50640869140625, "learning_rate": 4.13328725292981e-05, "loss": 51.9577, "step": 86510 }, { "epoch": 0.3495517479607461, "grad_norm": 538.1187744140625, "learning_rate": 4.13302296404258e-05, "loss": 159.9702, "step": 86520 }, { "epoch": 0.34959214922611376, "grad_norm": 989.9334716796875, "learning_rate": 4.132758643318726e-05, "loss": 90.6167, "step": 86530 }, { "epoch": 0.3496325504914814, "grad_norm": 538.1937255859375, "learning_rate": 4.132494290763403e-05, "loss": 52.8154, "step": 86540 }, { "epoch": 0.34967295175684904, "grad_norm": 2396.126708984375, "learning_rate": 4.1322299063817624e-05, "loss": 60.857, "step": 86550 }, { "epoch": 0.3497133530222167, "grad_norm": 909.927978515625, "learning_rate": 4.131965490178959e-05, "loss": 127.2578, "step": 86560 }, { "epoch": 0.34975375428758426, "grad_norm": 535.4166259765625, "learning_rate": 4.131701042160149e-05, "loss": 77.5207, "step": 86570 }, { "epoch": 0.3497941555529519, "grad_norm": 947.1220703125, "learning_rate": 4.131436562330487e-05, "loss": 85.3897, "step": 86580 }, { "epoch": 0.34983455681831954, "grad_norm": 1153.6351318359375, "learning_rate": 4.13117205069513e-05, "loss": 85.3414, "step": 86590 }, { "epoch": 0.3498749580836872, "grad_norm": 1040.203857421875, "learning_rate": 4.130907507259233e-05, "loss": 79.24, "step": 86600 }, { "epoch": 0.3499153593490548, "grad_norm": 737.9971313476562, "learning_rate": 4.130642932027955e-05, "loss": 47.7125, "step": 86610 }, { "epoch": 0.34995576061442246, "grad_norm": 1519.4359130859375, "learning_rate": 4.130378325006453e-05, "loss": 59.9955, "step": 86620 }, { "epoch": 0.3499961618797901, "grad_norm": 734.98828125, "learning_rate": 4.130113686199887e-05, "loss": 44.1955, "step": 86630 }, { "epoch": 0.3500365631451577, "grad_norm": 1075.6939697265625, "learning_rate": 4.129849015613415e-05, "loss": 73.9399, "step": 86640 }, { "epoch": 0.3500769644105253, "grad_norm": 3161.4248046875, "learning_rate": 4.1295843132521973e-05, "loss": 81.6022, "step": 86650 }, { "epoch": 0.35011736567589297, "grad_norm": 1227.13134765625, "learning_rate": 4.129319579121394e-05, "loss": 67.1966, "step": 86660 }, { "epoch": 0.3501577669412606, "grad_norm": 779.573974609375, "learning_rate": 4.129054813226167e-05, "loss": 72.8276, "step": 86670 }, { "epoch": 0.35019816820662825, "grad_norm": 378.6111755371094, "learning_rate": 4.1287900155716784e-05, "loss": 43.1125, "step": 86680 }, { "epoch": 0.3502385694719959, "grad_norm": 865.598876953125, "learning_rate": 4.128525186163089e-05, "loss": 75.8998, "step": 86690 }, { "epoch": 0.35027897073736347, "grad_norm": 1067.35693359375, "learning_rate": 4.128260325005564e-05, "loss": 66.426, "step": 86700 }, { "epoch": 0.3503193720027311, "grad_norm": 1469.167236328125, "learning_rate": 4.127995432104264e-05, "loss": 95.7886, "step": 86710 }, { "epoch": 0.35035977326809875, "grad_norm": 554.118408203125, "learning_rate": 4.127730507464356e-05, "loss": 83.7477, "step": 86720 }, { "epoch": 0.3504001745334664, "grad_norm": 1134.07470703125, "learning_rate": 4.127465551091003e-05, "loss": 123.3934, "step": 86730 }, { "epoch": 0.35044057579883403, "grad_norm": 1663.503173828125, "learning_rate": 4.1272005629893714e-05, "loss": 51.4834, "step": 86740 }, { "epoch": 0.35048097706420167, "grad_norm": 464.7097473144531, "learning_rate": 4.1269355431646274e-05, "loss": 47.998, "step": 86750 }, { "epoch": 0.35052137832956926, "grad_norm": 533.934814453125, "learning_rate": 4.126670491621938e-05, "loss": 89.6148, "step": 86760 }, { "epoch": 0.3505617795949369, "grad_norm": 1182.53466796875, "learning_rate": 4.126405408366468e-05, "loss": 58.9152, "step": 86770 }, { "epoch": 0.35060218086030454, "grad_norm": 1748.3175048828125, "learning_rate": 4.1261402934033886e-05, "loss": 76.4899, "step": 86780 }, { "epoch": 0.3506425821256722, "grad_norm": 503.2514343261719, "learning_rate": 4.125875146737868e-05, "loss": 94.3811, "step": 86790 }, { "epoch": 0.3506829833910398, "grad_norm": 602.4920043945312, "learning_rate": 4.125609968375072e-05, "loss": 41.265, "step": 86800 }, { "epoch": 0.35072338465640746, "grad_norm": 1253.5115966796875, "learning_rate": 4.125344758320174e-05, "loss": 56.0734, "step": 86810 }, { "epoch": 0.3507637859217751, "grad_norm": 1205.50634765625, "learning_rate": 4.125079516578344e-05, "loss": 90.1057, "step": 86820 }, { "epoch": 0.3508041871871427, "grad_norm": 422.2942810058594, "learning_rate": 4.12481424315475e-05, "loss": 60.9675, "step": 86830 }, { "epoch": 0.3508445884525103, "grad_norm": 702.0094604492188, "learning_rate": 4.124548938054568e-05, "loss": 53.5536, "step": 86840 }, { "epoch": 0.35088498971787796, "grad_norm": 604.8543090820312, "learning_rate": 4.1242836012829665e-05, "loss": 64.9087, "step": 86850 }, { "epoch": 0.3509253909832456, "grad_norm": 307.5919494628906, "learning_rate": 4.1240182328451204e-05, "loss": 72.9601, "step": 86860 }, { "epoch": 0.35096579224861324, "grad_norm": 1943.6920166015625, "learning_rate": 4.123752832746203e-05, "loss": 87.0177, "step": 86870 }, { "epoch": 0.3510061935139809, "grad_norm": 1072.779296875, "learning_rate": 4.123487400991388e-05, "loss": 73.2874, "step": 86880 }, { "epoch": 0.35104659477934846, "grad_norm": 644.7705078125, "learning_rate": 4.1232219375858504e-05, "loss": 82.4604, "step": 86890 }, { "epoch": 0.3510869960447161, "grad_norm": 214.5071563720703, "learning_rate": 4.1229564425347654e-05, "loss": 50.0135, "step": 86900 }, { "epoch": 0.35112739731008374, "grad_norm": 662.0516357421875, "learning_rate": 4.122690915843309e-05, "loss": 45.6385, "step": 86910 }, { "epoch": 0.3511677985754514, "grad_norm": 1001.0264282226562, "learning_rate": 4.122425357516658e-05, "loss": 63.8673, "step": 86920 }, { "epoch": 0.351208199840819, "grad_norm": 1112.534912109375, "learning_rate": 4.1221597675599886e-05, "loss": 54.2994, "step": 86930 }, { "epoch": 0.35124860110618666, "grad_norm": 565.2496948242188, "learning_rate": 4.1218941459784796e-05, "loss": 50.4045, "step": 86940 }, { "epoch": 0.3512890023715543, "grad_norm": 823.8052978515625, "learning_rate": 4.121628492777311e-05, "loss": 66.9651, "step": 86950 }, { "epoch": 0.3513294036369219, "grad_norm": 875.3696899414062, "learning_rate": 4.121362807961658e-05, "loss": 84.8828, "step": 86960 }, { "epoch": 0.35136980490228953, "grad_norm": 412.0173034667969, "learning_rate": 4.1210970915367026e-05, "loss": 78.3683, "step": 86970 }, { "epoch": 0.35141020616765717, "grad_norm": 1158.7080078125, "learning_rate": 4.120831343507625e-05, "loss": 77.7848, "step": 86980 }, { "epoch": 0.3514506074330248, "grad_norm": 261.9429016113281, "learning_rate": 4.1205655638796065e-05, "loss": 43.2981, "step": 86990 }, { "epoch": 0.35149100869839245, "grad_norm": 779.2062377929688, "learning_rate": 4.1202997526578276e-05, "loss": 57.5505, "step": 87000 }, { "epoch": 0.3515314099637601, "grad_norm": 742.50048828125, "learning_rate": 4.120033909847471e-05, "loss": 68.5733, "step": 87010 }, { "epoch": 0.3515718112291277, "grad_norm": 610.2131958007812, "learning_rate": 4.1197680354537186e-05, "loss": 64.115, "step": 87020 }, { "epoch": 0.3516122124944953, "grad_norm": 994.4957275390625, "learning_rate": 4.119502129481755e-05, "loss": 42.9862, "step": 87030 }, { "epoch": 0.35165261375986295, "grad_norm": 1622.78076171875, "learning_rate": 4.119236191936764e-05, "loss": 55.3836, "step": 87040 }, { "epoch": 0.3516930150252306, "grad_norm": 1582.70458984375, "learning_rate": 4.118970222823929e-05, "loss": 71.9621, "step": 87050 }, { "epoch": 0.35173341629059823, "grad_norm": 1007.8423461914062, "learning_rate": 4.118704222148436e-05, "loss": 66.6885, "step": 87060 }, { "epoch": 0.3517738175559659, "grad_norm": 431.42828369140625, "learning_rate": 4.118438189915471e-05, "loss": 83.4902, "step": 87070 }, { "epoch": 0.35181421882133346, "grad_norm": 1616.8265380859375, "learning_rate": 4.118172126130221e-05, "loss": 49.96, "step": 87080 }, { "epoch": 0.3518546200867011, "grad_norm": 358.4612121582031, "learning_rate": 4.117906030797871e-05, "loss": 85.6933, "step": 87090 }, { "epoch": 0.35189502135206874, "grad_norm": 595.7027587890625, "learning_rate": 4.1176399039236116e-05, "loss": 67.9241, "step": 87100 }, { "epoch": 0.3519354226174364, "grad_norm": 826.05517578125, "learning_rate": 4.117373745512628e-05, "loss": 92.1798, "step": 87110 }, { "epoch": 0.351975823882804, "grad_norm": 2575.160888671875, "learning_rate": 4.117107555570111e-05, "loss": 80.2855, "step": 87120 }, { "epoch": 0.35201622514817166, "grad_norm": 635.9917602539062, "learning_rate": 4.1168413341012496e-05, "loss": 51.3892, "step": 87130 }, { "epoch": 0.3520566264135393, "grad_norm": 586.3646240234375, "learning_rate": 4.116575081111235e-05, "loss": 74.7427, "step": 87140 }, { "epoch": 0.3520970276789069, "grad_norm": 379.6501770019531, "learning_rate": 4.116308796605255e-05, "loss": 42.7718, "step": 87150 }, { "epoch": 0.3521374289442745, "grad_norm": 757.53076171875, "learning_rate": 4.116042480588505e-05, "loss": 79.6147, "step": 87160 }, { "epoch": 0.35217783020964216, "grad_norm": 521.4224853515625, "learning_rate": 4.1157761330661734e-05, "loss": 114.8519, "step": 87170 }, { "epoch": 0.3522182314750098, "grad_norm": 543.6570434570312, "learning_rate": 4.115509754043454e-05, "loss": 67.7495, "step": 87180 }, { "epoch": 0.35225863274037744, "grad_norm": 635.4970703125, "learning_rate": 4.115243343525541e-05, "loss": 78.6889, "step": 87190 }, { "epoch": 0.3522990340057451, "grad_norm": 1206.6654052734375, "learning_rate": 4.1149769015176275e-05, "loss": 56.5697, "step": 87200 }, { "epoch": 0.35233943527111267, "grad_norm": 811.91455078125, "learning_rate": 4.114710428024907e-05, "loss": 78.0935, "step": 87210 }, { "epoch": 0.3523798365364803, "grad_norm": 1116.96240234375, "learning_rate": 4.114443923052577e-05, "loss": 59.2988, "step": 87220 }, { "epoch": 0.35242023780184795, "grad_norm": 623.7386474609375, "learning_rate": 4.11417738660583e-05, "loss": 56.9475, "step": 87230 }, { "epoch": 0.3524606390672156, "grad_norm": 1740.817138671875, "learning_rate": 4.113910818689864e-05, "loss": 65.6938, "step": 87240 }, { "epoch": 0.3525010403325832, "grad_norm": 752.4931640625, "learning_rate": 4.113644219309877e-05, "loss": 83.3021, "step": 87250 }, { "epoch": 0.35254144159795087, "grad_norm": 1242.556640625, "learning_rate": 4.1133775884710634e-05, "loss": 59.4292, "step": 87260 }, { "epoch": 0.3525818428633185, "grad_norm": 1105.26318359375, "learning_rate": 4.113110926178625e-05, "loss": 52.1607, "step": 87270 }, { "epoch": 0.3526222441286861, "grad_norm": 941.704345703125, "learning_rate": 4.112844232437757e-05, "loss": 44.7782, "step": 87280 }, { "epoch": 0.35266264539405373, "grad_norm": 975.8741455078125, "learning_rate": 4.112577507253661e-05, "loss": 63.2205, "step": 87290 }, { "epoch": 0.35270304665942137, "grad_norm": 267.49029541015625, "learning_rate": 4.112310750631536e-05, "loss": 55.718, "step": 87300 }, { "epoch": 0.352743447924789, "grad_norm": 492.7359619140625, "learning_rate": 4.112043962576583e-05, "loss": 87.6296, "step": 87310 }, { "epoch": 0.35278384919015665, "grad_norm": 723.97412109375, "learning_rate": 4.1117771430940035e-05, "loss": 72.1795, "step": 87320 }, { "epoch": 0.3528242504555243, "grad_norm": 1166.4947509765625, "learning_rate": 4.111510292188998e-05, "loss": 61.6153, "step": 87330 }, { "epoch": 0.3528646517208919, "grad_norm": 943.3729248046875, "learning_rate": 4.111243409866769e-05, "loss": 78.3224, "step": 87340 }, { "epoch": 0.3529050529862595, "grad_norm": 1874.787353515625, "learning_rate": 4.110976496132522e-05, "loss": 43.5677, "step": 87350 }, { "epoch": 0.35294545425162716, "grad_norm": 567.2123413085938, "learning_rate": 4.1107095509914584e-05, "loss": 114.9888, "step": 87360 }, { "epoch": 0.3529858555169948, "grad_norm": 1504.210205078125, "learning_rate": 4.1104425744487826e-05, "loss": 63.7791, "step": 87370 }, { "epoch": 0.35302625678236244, "grad_norm": 252.47015380859375, "learning_rate": 4.1101755665096996e-05, "loss": 46.304, "step": 87380 }, { "epoch": 0.3530666580477301, "grad_norm": 952.9064331054688, "learning_rate": 4.109908527179415e-05, "loss": 68.7049, "step": 87390 }, { "epoch": 0.35310705931309766, "grad_norm": 3070.967041015625, "learning_rate": 4.109641456463135e-05, "loss": 70.3882, "step": 87400 }, { "epoch": 0.3531474605784653, "grad_norm": 1895.307861328125, "learning_rate": 4.109374354366066e-05, "loss": 44.8805, "step": 87410 }, { "epoch": 0.35318786184383294, "grad_norm": 1331.394287109375, "learning_rate": 4.109107220893415e-05, "loss": 59.9866, "step": 87420 }, { "epoch": 0.3532282631092006, "grad_norm": 249.96646118164062, "learning_rate": 4.1088400560503905e-05, "loss": 66.2172, "step": 87430 }, { "epoch": 0.3532686643745682, "grad_norm": 947.6691284179688, "learning_rate": 4.108572859842201e-05, "loss": 90.3438, "step": 87440 }, { "epoch": 0.35330906563993586, "grad_norm": 573.4298706054688, "learning_rate": 4.108305632274055e-05, "loss": 34.3865, "step": 87450 }, { "epoch": 0.3533494669053035, "grad_norm": 602.94384765625, "learning_rate": 4.108038373351163e-05, "loss": 51.0906, "step": 87460 }, { "epoch": 0.3533898681706711, "grad_norm": 3697.798095703125, "learning_rate": 4.107771083078735e-05, "loss": 70.8239, "step": 87470 }, { "epoch": 0.3534302694360387, "grad_norm": 549.9475708007812, "learning_rate": 4.107503761461983e-05, "loss": 48.9701, "step": 87480 }, { "epoch": 0.35347067070140636, "grad_norm": 651.7734985351562, "learning_rate": 4.107236408506116e-05, "loss": 87.1631, "step": 87490 }, { "epoch": 0.353511071966774, "grad_norm": 854.0162963867188, "learning_rate": 4.1069690242163484e-05, "loss": 39.3587, "step": 87500 }, { "epoch": 0.35355147323214164, "grad_norm": 845.365234375, "learning_rate": 4.106701608597893e-05, "loss": 80.0646, "step": 87510 }, { "epoch": 0.3535918744975093, "grad_norm": 571.5369262695312, "learning_rate": 4.106434161655962e-05, "loss": 54.8925, "step": 87520 }, { "epoch": 0.35363227576287687, "grad_norm": 514.8745727539062, "learning_rate": 4.106166683395769e-05, "loss": 80.812, "step": 87530 }, { "epoch": 0.3536726770282445, "grad_norm": 895.7685546875, "learning_rate": 4.105899173822531e-05, "loss": 52.7344, "step": 87540 }, { "epoch": 0.35371307829361215, "grad_norm": 408.03485107421875, "learning_rate": 4.1056316329414616e-05, "loss": 63.1606, "step": 87550 }, { "epoch": 0.3537534795589798, "grad_norm": 794.3991088867188, "learning_rate": 4.105364060757776e-05, "loss": 81.9505, "step": 87560 }, { "epoch": 0.35379388082434743, "grad_norm": 308.87799072265625, "learning_rate": 4.1050964572766923e-05, "loss": 71.2676, "step": 87570 }, { "epoch": 0.35383428208971507, "grad_norm": 743.8997192382812, "learning_rate": 4.104828822503427e-05, "loss": 54.9144, "step": 87580 }, { "epoch": 0.3538746833550827, "grad_norm": 800.4918212890625, "learning_rate": 4.104561156443197e-05, "loss": 48.0048, "step": 87590 }, { "epoch": 0.3539150846204503, "grad_norm": 217.0595245361328, "learning_rate": 4.104293459101222e-05, "loss": 71.4233, "step": 87600 }, { "epoch": 0.35395548588581793, "grad_norm": 778.3648071289062, "learning_rate": 4.104025730482719e-05, "loss": 66.6853, "step": 87610 }, { "epoch": 0.3539958871511856, "grad_norm": 712.687255859375, "learning_rate": 4.103757970592909e-05, "loss": 59.8492, "step": 87620 }, { "epoch": 0.3540362884165532, "grad_norm": 366.0815734863281, "learning_rate": 4.1034901794370116e-05, "loss": 49.8096, "step": 87630 }, { "epoch": 0.35407668968192085, "grad_norm": 1185.479736328125, "learning_rate": 4.1032223570202474e-05, "loss": 83.2932, "step": 87640 }, { "epoch": 0.3541170909472885, "grad_norm": 411.51373291015625, "learning_rate": 4.102954503347839e-05, "loss": 45.1826, "step": 87650 }, { "epoch": 0.3541574922126561, "grad_norm": 749.7902221679688, "learning_rate": 4.102686618425006e-05, "loss": 56.4452, "step": 87660 }, { "epoch": 0.3541978934780237, "grad_norm": 573.7445068359375, "learning_rate": 4.102418702256973e-05, "loss": 59.8702, "step": 87670 }, { "epoch": 0.35423829474339136, "grad_norm": 1168.4649658203125, "learning_rate": 4.1021507548489625e-05, "loss": 66.0522, "step": 87680 }, { "epoch": 0.354278696008759, "grad_norm": 949.6201171875, "learning_rate": 4.1018827762061985e-05, "loss": 82.0665, "step": 87690 }, { "epoch": 0.35431909727412664, "grad_norm": 1565.3529052734375, "learning_rate": 4.101614766333904e-05, "loss": 102.3075, "step": 87700 }, { "epoch": 0.3543594985394943, "grad_norm": 1542.8006591796875, "learning_rate": 4.101346725237305e-05, "loss": 66.2509, "step": 87710 }, { "epoch": 0.35439989980486186, "grad_norm": 527.8439331054688, "learning_rate": 4.1010786529216284e-05, "loss": 71.4895, "step": 87720 }, { "epoch": 0.3544403010702295, "grad_norm": 1168.476318359375, "learning_rate": 4.100810549392099e-05, "loss": 85.4038, "step": 87730 }, { "epoch": 0.35448070233559714, "grad_norm": 714.1607666015625, "learning_rate": 4.100542414653943e-05, "loss": 51.9524, "step": 87740 }, { "epoch": 0.3545211036009648, "grad_norm": 2820.892578125, "learning_rate": 4.100274248712389e-05, "loss": 84.763, "step": 87750 }, { "epoch": 0.3545615048663324, "grad_norm": 1056.6544189453125, "learning_rate": 4.1000060515726647e-05, "loss": 46.4493, "step": 87760 }, { "epoch": 0.35460190613170006, "grad_norm": 637.3196411132812, "learning_rate": 4.0997378232399984e-05, "loss": 64.2221, "step": 87770 }, { "epoch": 0.3546423073970677, "grad_norm": 1249.30078125, "learning_rate": 4.09946956371962e-05, "loss": 62.3628, "step": 87780 }, { "epoch": 0.3546827086624353, "grad_norm": 562.5096435546875, "learning_rate": 4.0992012730167584e-05, "loss": 56.4057, "step": 87790 }, { "epoch": 0.3547231099278029, "grad_norm": 515.3623046875, "learning_rate": 4.098932951136645e-05, "loss": 75.1756, "step": 87800 }, { "epoch": 0.35476351119317057, "grad_norm": 1004.6653442382812, "learning_rate": 4.098664598084511e-05, "loss": 87.7658, "step": 87810 }, { "epoch": 0.3548039124585382, "grad_norm": 514.6812133789062, "learning_rate": 4.0983962138655873e-05, "loss": 63.0205, "step": 87820 }, { "epoch": 0.35484431372390585, "grad_norm": 327.2781677246094, "learning_rate": 4.0981277984851066e-05, "loss": 64.0254, "step": 87830 }, { "epoch": 0.3548847149892735, "grad_norm": 945.9813842773438, "learning_rate": 4.097859351948301e-05, "loss": 71.4724, "step": 87840 }, { "epoch": 0.35492511625464107, "grad_norm": 1146.121337890625, "learning_rate": 4.0975908742604055e-05, "loss": 74.1327, "step": 87850 }, { "epoch": 0.3549655175200087, "grad_norm": 2756.161376953125, "learning_rate": 4.097322365426653e-05, "loss": 60.8623, "step": 87860 }, { "epoch": 0.35500591878537635, "grad_norm": 616.4971923828125, "learning_rate": 4.097053825452278e-05, "loss": 74.3813, "step": 87870 }, { "epoch": 0.355046320050744, "grad_norm": 796.4453735351562, "learning_rate": 4.0967852543425175e-05, "loss": 65.7487, "step": 87880 }, { "epoch": 0.35508672131611163, "grad_norm": 377.42138671875, "learning_rate": 4.0965166521026065e-05, "loss": 46.341, "step": 87890 }, { "epoch": 0.35512712258147927, "grad_norm": 605.7762451171875, "learning_rate": 4.096248018737781e-05, "loss": 81.4626, "step": 87900 }, { "epoch": 0.3551675238468469, "grad_norm": 408.6437072753906, "learning_rate": 4.095979354253279e-05, "loss": 85.3279, "step": 87910 }, { "epoch": 0.3552079251122145, "grad_norm": 673.0602416992188, "learning_rate": 4.095710658654337e-05, "loss": 35.6571, "step": 87920 }, { "epoch": 0.35524832637758214, "grad_norm": 562.7644653320312, "learning_rate": 4.0954419319461946e-05, "loss": 68.498, "step": 87930 }, { "epoch": 0.3552887276429498, "grad_norm": 1345.530029296875, "learning_rate": 4.09517317413409e-05, "loss": 54.9481, "step": 87940 }, { "epoch": 0.3553291289083174, "grad_norm": 670.7591552734375, "learning_rate": 4.094904385223264e-05, "loss": 95.4115, "step": 87950 }, { "epoch": 0.35536953017368506, "grad_norm": 6252.8359375, "learning_rate": 4.094635565218955e-05, "loss": 85.2645, "step": 87960 }, { "epoch": 0.3554099314390527, "grad_norm": 699.56982421875, "learning_rate": 4.094366714126405e-05, "loss": 61.0998, "step": 87970 }, { "epoch": 0.3554503327044203, "grad_norm": 572.2498779296875, "learning_rate": 4.094097831950855e-05, "loss": 55.9745, "step": 87980 }, { "epoch": 0.3554907339697879, "grad_norm": 477.888671875, "learning_rate": 4.093828918697547e-05, "loss": 64.4699, "step": 87990 }, { "epoch": 0.35553113523515556, "grad_norm": 1221.31982421875, "learning_rate": 4.093559974371725e-05, "loss": 60.6648, "step": 88000 }, { "epoch": 0.3555715365005232, "grad_norm": 390.103759765625, "learning_rate": 4.09329099897863e-05, "loss": 51.6583, "step": 88010 }, { "epoch": 0.35561193776589084, "grad_norm": 1909.0634765625, "learning_rate": 4.0930219925235056e-05, "loss": 62.6641, "step": 88020 }, { "epoch": 0.3556523390312585, "grad_norm": 0.0, "learning_rate": 4.0927529550115986e-05, "loss": 54.3005, "step": 88030 }, { "epoch": 0.35569274029662606, "grad_norm": 1166.9232177734375, "learning_rate": 4.0924838864481516e-05, "loss": 77.7679, "step": 88040 }, { "epoch": 0.3557331415619937, "grad_norm": 736.1539916992188, "learning_rate": 4.092214786838413e-05, "loss": 55.0295, "step": 88050 }, { "epoch": 0.35577354282736134, "grad_norm": 957.0504150390625, "learning_rate": 4.0919456561876256e-05, "loss": 72.6961, "step": 88060 }, { "epoch": 0.355813944092729, "grad_norm": 687.1393432617188, "learning_rate": 4.091676494501039e-05, "loss": 60.3476, "step": 88070 }, { "epoch": 0.3558543453580966, "grad_norm": 1779.8883056640625, "learning_rate": 4.0914073017838996e-05, "loss": 85.0388, "step": 88080 }, { "epoch": 0.35589474662346426, "grad_norm": 1815.7283935546875, "learning_rate": 4.091138078041455e-05, "loss": 67.7276, "step": 88090 }, { "epoch": 0.3559351478888319, "grad_norm": 510.8993835449219, "learning_rate": 4.090868823278956e-05, "loss": 74.9138, "step": 88100 }, { "epoch": 0.3559755491541995, "grad_norm": 1010.1596069335938, "learning_rate": 4.090599537501649e-05, "loss": 70.1757, "step": 88110 }, { "epoch": 0.35601595041956713, "grad_norm": 806.6387329101562, "learning_rate": 4.090330220714785e-05, "loss": 64.2958, "step": 88120 }, { "epoch": 0.35605635168493477, "grad_norm": 1274.768310546875, "learning_rate": 4.090060872923615e-05, "loss": 98.0021, "step": 88130 }, { "epoch": 0.3560967529503024, "grad_norm": 819.7002563476562, "learning_rate": 4.089791494133389e-05, "loss": 52.4494, "step": 88140 }, { "epoch": 0.35613715421567005, "grad_norm": 1136.931396484375, "learning_rate": 4.0895220843493606e-05, "loss": 105.6924, "step": 88150 }, { "epoch": 0.3561775554810377, "grad_norm": 2234.421630859375, "learning_rate": 4.0892526435767795e-05, "loss": 103.7004, "step": 88160 }, { "epoch": 0.3562179567464053, "grad_norm": 841.349853515625, "learning_rate": 4.088983171820901e-05, "loss": 52.9514, "step": 88170 }, { "epoch": 0.3562583580117729, "grad_norm": 725.3030395507812, "learning_rate": 4.088713669086977e-05, "loss": 49.6578, "step": 88180 }, { "epoch": 0.35629875927714055, "grad_norm": 1026.1910400390625, "learning_rate": 4.088444135380262e-05, "loss": 63.2285, "step": 88190 }, { "epoch": 0.3563391605425082, "grad_norm": 1608.8538818359375, "learning_rate": 4.088174570706011e-05, "loss": 65.9221, "step": 88200 }, { "epoch": 0.35637956180787583, "grad_norm": 608.7267456054688, "learning_rate": 4.0879049750694795e-05, "loss": 59.0359, "step": 88210 }, { "epoch": 0.3564199630732435, "grad_norm": 3048.426513671875, "learning_rate": 4.0876353484759224e-05, "loss": 100.1788, "step": 88220 }, { "epoch": 0.35646036433861106, "grad_norm": 540.722412109375, "learning_rate": 4.087365690930597e-05, "loss": 66.9113, "step": 88230 }, { "epoch": 0.3565007656039787, "grad_norm": 781.4616088867188, "learning_rate": 4.0870960024387596e-05, "loss": 103.4365, "step": 88240 }, { "epoch": 0.35654116686934634, "grad_norm": 1404.780029296875, "learning_rate": 4.086826283005669e-05, "loss": 58.6043, "step": 88250 }, { "epoch": 0.356581568134714, "grad_norm": 440.78472900390625, "learning_rate": 4.0865565326365835e-05, "loss": 43.4884, "step": 88260 }, { "epoch": 0.3566219694000816, "grad_norm": 425.8787841796875, "learning_rate": 4.086286751336761e-05, "loss": 44.2717, "step": 88270 }, { "epoch": 0.35666237066544926, "grad_norm": 386.0218200683594, "learning_rate": 4.0860169391114625e-05, "loss": 52.1067, "step": 88280 }, { "epoch": 0.3567027719308169, "grad_norm": 421.0408935546875, "learning_rate": 4.085747095965946e-05, "loss": 63.7093, "step": 88290 }, { "epoch": 0.3567431731961845, "grad_norm": 688.8589477539062, "learning_rate": 4.085477221905474e-05, "loss": 63.6186, "step": 88300 }, { "epoch": 0.3567835744615521, "grad_norm": 2102.650634765625, "learning_rate": 4.085207316935308e-05, "loss": 120.8342, "step": 88310 }, { "epoch": 0.35682397572691976, "grad_norm": 644.7640380859375, "learning_rate": 4.084937381060708e-05, "loss": 71.6069, "step": 88320 }, { "epoch": 0.3568643769922874, "grad_norm": 718.741455078125, "learning_rate": 4.084667414286939e-05, "loss": 49.9685, "step": 88330 }, { "epoch": 0.35690477825765504, "grad_norm": 1353.2684326171875, "learning_rate": 4.0843974166192614e-05, "loss": 89.7978, "step": 88340 }, { "epoch": 0.3569451795230227, "grad_norm": 518.8037109375, "learning_rate": 4.0841273880629416e-05, "loss": 68.2225, "step": 88350 }, { "epoch": 0.35698558078839027, "grad_norm": 491.72265625, "learning_rate": 4.083857328623243e-05, "loss": 62.1392, "step": 88360 }, { "epoch": 0.3570259820537579, "grad_norm": 713.7077026367188, "learning_rate": 4.0835872383054296e-05, "loss": 59.1912, "step": 88370 }, { "epoch": 0.35706638331912555, "grad_norm": 546.5416259765625, "learning_rate": 4.083317117114768e-05, "loss": 65.1588, "step": 88380 }, { "epoch": 0.3571067845844932, "grad_norm": 749.3577270507812, "learning_rate": 4.083046965056524e-05, "loss": 65.049, "step": 88390 }, { "epoch": 0.3571471858498608, "grad_norm": 808.5428466796875, "learning_rate": 4.082776782135964e-05, "loss": 59.8705, "step": 88400 }, { "epoch": 0.35718758711522847, "grad_norm": 372.74261474609375, "learning_rate": 4.082506568358357e-05, "loss": 62.2298, "step": 88410 }, { "epoch": 0.3572279883805961, "grad_norm": 574.08447265625, "learning_rate": 4.082236323728968e-05, "loss": 56.4968, "step": 88420 }, { "epoch": 0.3572683896459637, "grad_norm": 901.8555297851562, "learning_rate": 4.0819660482530684e-05, "loss": 67.4959, "step": 88430 }, { "epoch": 0.35730879091133133, "grad_norm": 2012.2177734375, "learning_rate": 4.0816957419359264e-05, "loss": 60.3428, "step": 88440 }, { "epoch": 0.35734919217669897, "grad_norm": 434.2742614746094, "learning_rate": 4.0814254047828116e-05, "loss": 55.9326, "step": 88450 }, { "epoch": 0.3573895934420666, "grad_norm": 1581.9569091796875, "learning_rate": 4.081155036798994e-05, "loss": 80.8084, "step": 88460 }, { "epoch": 0.35742999470743425, "grad_norm": 980.014404296875, "learning_rate": 4.080884637989745e-05, "loss": 53.9677, "step": 88470 }, { "epoch": 0.3574703959728019, "grad_norm": 438.5036926269531, "learning_rate": 4.080614208360336e-05, "loss": 46.5922, "step": 88480 }, { "epoch": 0.3575107972381695, "grad_norm": 1167.0660400390625, "learning_rate": 4.080343747916039e-05, "loss": 60.1899, "step": 88490 }, { "epoch": 0.3575511985035371, "grad_norm": 1493.02734375, "learning_rate": 4.080073256662127e-05, "loss": 55.8727, "step": 88500 }, { "epoch": 0.35759159976890476, "grad_norm": 643.5103759765625, "learning_rate": 4.079802734603874e-05, "loss": 75.9446, "step": 88510 }, { "epoch": 0.3576320010342724, "grad_norm": 619.2804565429688, "learning_rate": 4.079532181746553e-05, "loss": 59.9599, "step": 88520 }, { "epoch": 0.35767240229964004, "grad_norm": 699.5809936523438, "learning_rate": 4.079261598095439e-05, "loss": 75.2107, "step": 88530 }, { "epoch": 0.3577128035650077, "grad_norm": 2605.017578125, "learning_rate": 4.078990983655807e-05, "loss": 78.6188, "step": 88540 }, { "epoch": 0.35775320483037526, "grad_norm": 483.28961181640625, "learning_rate": 4.078720338432933e-05, "loss": 52.8732, "step": 88550 }, { "epoch": 0.3577936060957429, "grad_norm": 371.9551086425781, "learning_rate": 4.078449662432093e-05, "loss": 67.858, "step": 88560 }, { "epoch": 0.35783400736111054, "grad_norm": 539.9357299804688, "learning_rate": 4.078178955658565e-05, "loss": 62.4049, "step": 88570 }, { "epoch": 0.3578744086264782, "grad_norm": 571.8364868164062, "learning_rate": 4.077908218117625e-05, "loss": 80.4749, "step": 88580 }, { "epoch": 0.3579148098918458, "grad_norm": 1833.5181884765625, "learning_rate": 4.077637449814552e-05, "loss": 59.1497, "step": 88590 }, { "epoch": 0.35795521115721346, "grad_norm": 1636.6201171875, "learning_rate": 4.077366650754624e-05, "loss": 69.4785, "step": 88600 }, { "epoch": 0.3579956124225811, "grad_norm": 2520.92822265625, "learning_rate": 4.077095820943122e-05, "loss": 92.7431, "step": 88610 }, { "epoch": 0.3580360136879487, "grad_norm": 781.56298828125, "learning_rate": 4.0768249603853245e-05, "loss": 66.4768, "step": 88620 }, { "epoch": 0.3580764149533163, "grad_norm": 462.7001037597656, "learning_rate": 4.0765540690865134e-05, "loss": 39.243, "step": 88630 }, { "epoch": 0.35811681621868396, "grad_norm": 1026.7464599609375, "learning_rate": 4.076283147051968e-05, "loss": 68.4941, "step": 88640 }, { "epoch": 0.3581572174840516, "grad_norm": 558.1005859375, "learning_rate": 4.0760121942869725e-05, "loss": 75.5133, "step": 88650 }, { "epoch": 0.35819761874941924, "grad_norm": 1109.0501708984375, "learning_rate": 4.075741210796806e-05, "loss": 89.089, "step": 88660 }, { "epoch": 0.3582380200147869, "grad_norm": 1361.9794921875, "learning_rate": 4.075470196586755e-05, "loss": 81.0406, "step": 88670 }, { "epoch": 0.35827842128015447, "grad_norm": 390.9439697265625, "learning_rate": 4.075199151662101e-05, "loss": 67.3965, "step": 88680 }, { "epoch": 0.3583188225455221, "grad_norm": 433.46759033203125, "learning_rate": 4.074928076028128e-05, "loss": 59.471, "step": 88690 }, { "epoch": 0.35835922381088975, "grad_norm": 3093.1083984375, "learning_rate": 4.074656969690122e-05, "loss": 67.4171, "step": 88700 }, { "epoch": 0.3583996250762574, "grad_norm": 650.4321899414062, "learning_rate": 4.0743858326533674e-05, "loss": 57.6206, "step": 88710 }, { "epoch": 0.35844002634162503, "grad_norm": 693.6144409179688, "learning_rate": 4.0741146649231504e-05, "loss": 69.4233, "step": 88720 }, { "epoch": 0.35848042760699267, "grad_norm": 1305.1888427734375, "learning_rate": 4.0738434665047575e-05, "loss": 66.7869, "step": 88730 }, { "epoch": 0.3585208288723603, "grad_norm": 671.9458618164062, "learning_rate": 4.0735722374034764e-05, "loss": 94.4328, "step": 88740 }, { "epoch": 0.3585612301377279, "grad_norm": 782.94873046875, "learning_rate": 4.073300977624594e-05, "loss": 59.1198, "step": 88750 }, { "epoch": 0.35860163140309553, "grad_norm": 808.3102416992188, "learning_rate": 4.073029687173399e-05, "loss": 65.3728, "step": 88760 }, { "epoch": 0.3586420326684632, "grad_norm": 2808.516357421875, "learning_rate": 4.0727583660551806e-05, "loss": 57.111, "step": 88770 }, { "epoch": 0.3586824339338308, "grad_norm": 905.2903442382812, "learning_rate": 4.0724870142752284e-05, "loss": 80.5446, "step": 88780 }, { "epoch": 0.35872283519919845, "grad_norm": 245.458251953125, "learning_rate": 4.0722156318388315e-05, "loss": 81.0946, "step": 88790 }, { "epoch": 0.3587632364645661, "grad_norm": 1166.0023193359375, "learning_rate": 4.071944218751282e-05, "loss": 62.5315, "step": 88800 }, { "epoch": 0.3588036377299337, "grad_norm": 1779.5706787109375, "learning_rate": 4.0716727750178704e-05, "loss": 73.8024, "step": 88810 }, { "epoch": 0.3588440389953013, "grad_norm": 3553.527587890625, "learning_rate": 4.071401300643889e-05, "loss": 107.6503, "step": 88820 }, { "epoch": 0.35888444026066896, "grad_norm": 990.9931030273438, "learning_rate": 4.0711297956346306e-05, "loss": 51.3191, "step": 88830 }, { "epoch": 0.3589248415260366, "grad_norm": 567.8832397460938, "learning_rate": 4.070858259995387e-05, "loss": 85.3655, "step": 88840 }, { "epoch": 0.35896524279140424, "grad_norm": 978.09130859375, "learning_rate": 4.070586693731454e-05, "loss": 86.0509, "step": 88850 }, { "epoch": 0.3590056440567719, "grad_norm": 654.3468017578125, "learning_rate": 4.0703150968481246e-05, "loss": 80.2084, "step": 88860 }, { "epoch": 0.35904604532213946, "grad_norm": 219.86631774902344, "learning_rate": 4.070043469350694e-05, "loss": 61.1951, "step": 88870 }, { "epoch": 0.3590864465875071, "grad_norm": 804.8790283203125, "learning_rate": 4.069771811244457e-05, "loss": 75.843, "step": 88880 }, { "epoch": 0.35912684785287474, "grad_norm": 546.8237915039062, "learning_rate": 4.0695001225347104e-05, "loss": 51.4436, "step": 88890 }, { "epoch": 0.3591672491182424, "grad_norm": 584.9393920898438, "learning_rate": 4.0692284032267516e-05, "loss": 50.8256, "step": 88900 }, { "epoch": 0.35920765038361, "grad_norm": 2287.378173828125, "learning_rate": 4.0689566533258765e-05, "loss": 85.8929, "step": 88910 }, { "epoch": 0.35924805164897766, "grad_norm": 798.82470703125, "learning_rate": 4.068684872837384e-05, "loss": 54.3008, "step": 88920 }, { "epoch": 0.3592884529143453, "grad_norm": 631.5094604492188, "learning_rate": 4.068413061766572e-05, "loss": 67.0688, "step": 88930 }, { "epoch": 0.3593288541797129, "grad_norm": 788.1356811523438, "learning_rate": 4.068141220118741e-05, "loss": 63.0645, "step": 88940 }, { "epoch": 0.3593692554450805, "grad_norm": 424.5524597167969, "learning_rate": 4.067869347899188e-05, "loss": 54.8084, "step": 88950 }, { "epoch": 0.35940965671044817, "grad_norm": 653.3056640625, "learning_rate": 4.067597445113216e-05, "loss": 50.6989, "step": 88960 }, { "epoch": 0.3594500579758158, "grad_norm": 343.56512451171875, "learning_rate": 4.067325511766124e-05, "loss": 82.852, "step": 88970 }, { "epoch": 0.35949045924118345, "grad_norm": 750.8988647460938, "learning_rate": 4.067053547863215e-05, "loss": 85.4746, "step": 88980 }, { "epoch": 0.3595308605065511, "grad_norm": 1303.02197265625, "learning_rate": 4.06678155340979e-05, "loss": 50.7246, "step": 88990 }, { "epoch": 0.35957126177191867, "grad_norm": 777.9240112304688, "learning_rate": 4.066509528411152e-05, "loss": 62.3671, "step": 89000 }, { "epoch": 0.3596116630372863, "grad_norm": 304.01275634765625, "learning_rate": 4.066237472872604e-05, "loss": 68.3459, "step": 89010 }, { "epoch": 0.35965206430265395, "grad_norm": 1499.3465576171875, "learning_rate": 4.0659653867994496e-05, "loss": 69.0432, "step": 89020 }, { "epoch": 0.3596924655680216, "grad_norm": 1114.2425537109375, "learning_rate": 4.065693270196995e-05, "loss": 68.1484, "step": 89030 }, { "epoch": 0.35973286683338923, "grad_norm": 738.8199462890625, "learning_rate": 4.065421123070543e-05, "loss": 59.836, "step": 89040 }, { "epoch": 0.35977326809875687, "grad_norm": 616.7838134765625, "learning_rate": 4.065148945425401e-05, "loss": 61.9072, "step": 89050 }, { "epoch": 0.3598136693641245, "grad_norm": 215.50350952148438, "learning_rate": 4.064876737266874e-05, "loss": 81.0354, "step": 89060 }, { "epoch": 0.3598540706294921, "grad_norm": 344.2589111328125, "learning_rate": 4.06460449860027e-05, "loss": 61.0841, "step": 89070 }, { "epoch": 0.35989447189485974, "grad_norm": 740.9758911132812, "learning_rate": 4.064332229430895e-05, "loss": 53.366, "step": 89080 }, { "epoch": 0.3599348731602274, "grad_norm": 717.7742919921875, "learning_rate": 4.0640599297640584e-05, "loss": 42.3575, "step": 89090 }, { "epoch": 0.359975274425595, "grad_norm": 1074.0352783203125, "learning_rate": 4.063787599605068e-05, "loss": 79.3194, "step": 89100 }, { "epoch": 0.36001567569096266, "grad_norm": 1248.140869140625, "learning_rate": 4.063515238959233e-05, "loss": 60.0265, "step": 89110 }, { "epoch": 0.3600560769563303, "grad_norm": 400.5279235839844, "learning_rate": 4.063242847831864e-05, "loss": 56.0902, "step": 89120 }, { "epoch": 0.3600964782216979, "grad_norm": 4143.07568359375, "learning_rate": 4.06297042622827e-05, "loss": 77.4453, "step": 89130 }, { "epoch": 0.3601368794870655, "grad_norm": 520.5917358398438, "learning_rate": 4.062697974153764e-05, "loss": 56.0878, "step": 89140 }, { "epoch": 0.36017728075243316, "grad_norm": 1186.0679931640625, "learning_rate": 4.062425491613656e-05, "loss": 70.4133, "step": 89150 }, { "epoch": 0.3602176820178008, "grad_norm": 972.8978271484375, "learning_rate": 4.062152978613258e-05, "loss": 47.6216, "step": 89160 }, { "epoch": 0.36025808328316844, "grad_norm": 1175.881591796875, "learning_rate": 4.061880435157884e-05, "loss": 52.5779, "step": 89170 }, { "epoch": 0.3602984845485361, "grad_norm": 1707.835205078125, "learning_rate": 4.061607861252847e-05, "loss": 49.4881, "step": 89180 }, { "epoch": 0.36033888581390366, "grad_norm": 898.6934814453125, "learning_rate": 4.0613352569034615e-05, "loss": 83.9531, "step": 89190 }, { "epoch": 0.3603792870792713, "grad_norm": 791.3284912109375, "learning_rate": 4.0610626221150394e-05, "loss": 56.8294, "step": 89200 }, { "epoch": 0.36041968834463894, "grad_norm": 439.3869323730469, "learning_rate": 4.060789956892899e-05, "loss": 78.4056, "step": 89210 }, { "epoch": 0.3604600896100066, "grad_norm": 306.3957214355469, "learning_rate": 4.060517261242355e-05, "loss": 53.2591, "step": 89220 }, { "epoch": 0.3605004908753742, "grad_norm": 862.3554077148438, "learning_rate": 4.060244535168723e-05, "loss": 57.5533, "step": 89230 }, { "epoch": 0.36054089214074186, "grad_norm": 631.755859375, "learning_rate": 4.0599717786773204e-05, "loss": 46.8937, "step": 89240 }, { "epoch": 0.3605812934061095, "grad_norm": 1259.4813232421875, "learning_rate": 4.059698991773466e-05, "loss": 62.3537, "step": 89250 }, { "epoch": 0.3606216946714771, "grad_norm": 467.6385192871094, "learning_rate": 4.059426174462476e-05, "loss": 65.602, "step": 89260 }, { "epoch": 0.36066209593684473, "grad_norm": 492.99609375, "learning_rate": 4.0591533267496694e-05, "loss": 106.2255, "step": 89270 }, { "epoch": 0.36070249720221237, "grad_norm": 893.2991943359375, "learning_rate": 4.058880448640367e-05, "loss": 96.5574, "step": 89280 }, { "epoch": 0.36074289846758, "grad_norm": 645.7114868164062, "learning_rate": 4.058607540139887e-05, "loss": 90.8627, "step": 89290 }, { "epoch": 0.36078329973294765, "grad_norm": 3922.514404296875, "learning_rate": 4.0583346012535506e-05, "loss": 111.6305, "step": 89300 }, { "epoch": 0.3608237009983153, "grad_norm": 904.625, "learning_rate": 4.058061631986679e-05, "loss": 79.8804, "step": 89310 }, { "epoch": 0.3608641022636829, "grad_norm": 384.1736755371094, "learning_rate": 4.057788632344593e-05, "loss": 60.1387, "step": 89320 }, { "epoch": 0.3609045035290505, "grad_norm": 675.1691284179688, "learning_rate": 4.0575156023326166e-05, "loss": 60.848, "step": 89330 }, { "epoch": 0.36094490479441815, "grad_norm": 963.3970947265625, "learning_rate": 4.0572425419560714e-05, "loss": 57.821, "step": 89340 }, { "epoch": 0.3609853060597858, "grad_norm": 0.0, "learning_rate": 4.056969451220282e-05, "loss": 72.7515, "step": 89350 }, { "epoch": 0.36102570732515343, "grad_norm": 610.1162719726562, "learning_rate": 4.0566963301305705e-05, "loss": 50.6369, "step": 89360 }, { "epoch": 0.3610661085905211, "grad_norm": 643.9298706054688, "learning_rate": 4.056423178692262e-05, "loss": 39.3118, "step": 89370 }, { "epoch": 0.3611065098558887, "grad_norm": 531.35693359375, "learning_rate": 4.056149996910683e-05, "loss": 75.3905, "step": 89380 }, { "epoch": 0.3611469111212563, "grad_norm": 503.90777587890625, "learning_rate": 4.05587678479116e-05, "loss": 65.8769, "step": 89390 }, { "epoch": 0.36118731238662394, "grad_norm": 1145.5985107421875, "learning_rate": 4.055603542339016e-05, "loss": 51.2441, "step": 89400 }, { "epoch": 0.3612277136519916, "grad_norm": 411.92626953125, "learning_rate": 4.055330269559581e-05, "loss": 35.5195, "step": 89410 }, { "epoch": 0.3612681149173592, "grad_norm": 745.9331665039062, "learning_rate": 4.055056966458182e-05, "loss": 58.2253, "step": 89420 }, { "epoch": 0.36130851618272686, "grad_norm": 5621.34814453125, "learning_rate": 4.054783633040146e-05, "loss": 78.9581, "step": 89430 }, { "epoch": 0.3613489174480945, "grad_norm": 5587.95263671875, "learning_rate": 4.054510269310803e-05, "loss": 83.4821, "step": 89440 }, { "epoch": 0.3613893187134621, "grad_norm": 402.0248107910156, "learning_rate": 4.0542368752754825e-05, "loss": 57.0781, "step": 89450 }, { "epoch": 0.3614297199788297, "grad_norm": 1605.563720703125, "learning_rate": 4.053963450939513e-05, "loss": 90.5012, "step": 89460 }, { "epoch": 0.36147012124419736, "grad_norm": 471.9956359863281, "learning_rate": 4.053689996308227e-05, "loss": 71.2476, "step": 89470 }, { "epoch": 0.361510522509565, "grad_norm": 0.0, "learning_rate": 4.053416511386954e-05, "loss": 45.8093, "step": 89480 }, { "epoch": 0.36155092377493264, "grad_norm": 800.1768188476562, "learning_rate": 4.0531429961810264e-05, "loss": 46.3764, "step": 89490 }, { "epoch": 0.3615913250403003, "grad_norm": 731.527587890625, "learning_rate": 4.052869450695776e-05, "loss": 78.3263, "step": 89500 }, { "epoch": 0.36163172630566787, "grad_norm": 1999.2166748046875, "learning_rate": 4.052595874936537e-05, "loss": 70.2021, "step": 89510 }, { "epoch": 0.3616721275710355, "grad_norm": 0.0, "learning_rate": 4.0523222689086414e-05, "loss": 75.0036, "step": 89520 }, { "epoch": 0.36171252883640315, "grad_norm": 896.73388671875, "learning_rate": 4.052048632617424e-05, "loss": 73.4701, "step": 89530 }, { "epoch": 0.3617529301017708, "grad_norm": 1283.2412109375, "learning_rate": 4.05177496606822e-05, "loss": 50.1073, "step": 89540 }, { "epoch": 0.3617933313671384, "grad_norm": 543.897705078125, "learning_rate": 4.0515012692663646e-05, "loss": 52.8131, "step": 89550 }, { "epoch": 0.36183373263250607, "grad_norm": 1975.94384765625, "learning_rate": 4.051227542217192e-05, "loss": 92.001, "step": 89560 }, { "epoch": 0.3618741338978737, "grad_norm": 247.90350341796875, "learning_rate": 4.0509537849260404e-05, "loss": 31.5658, "step": 89570 }, { "epoch": 0.3619145351632413, "grad_norm": 859.4539184570312, "learning_rate": 4.0506799973982465e-05, "loss": 55.6403, "step": 89580 }, { "epoch": 0.36195493642860893, "grad_norm": 1263.11474609375, "learning_rate": 4.0504061796391474e-05, "loss": 59.762, "step": 89590 }, { "epoch": 0.36199533769397657, "grad_norm": 263.7984313964844, "learning_rate": 4.050132331654082e-05, "loss": 36.6857, "step": 89600 }, { "epoch": 0.3620357389593442, "grad_norm": 599.0194702148438, "learning_rate": 4.0498584534483877e-05, "loss": 101.6762, "step": 89610 }, { "epoch": 0.36207614022471185, "grad_norm": 1161.4105224609375, "learning_rate": 4.0495845450274064e-05, "loss": 73.1191, "step": 89620 }, { "epoch": 0.3621165414900795, "grad_norm": 977.6044921875, "learning_rate": 4.0493106063964754e-05, "loss": 63.5236, "step": 89630 }, { "epoch": 0.3621569427554471, "grad_norm": 653.26708984375, "learning_rate": 4.0490366375609376e-05, "loss": 99.8486, "step": 89640 }, { "epoch": 0.3621973440208147, "grad_norm": 1127.0865478515625, "learning_rate": 4.048762638526132e-05, "loss": 51.8638, "step": 89650 }, { "epoch": 0.36223774528618236, "grad_norm": 565.7835693359375, "learning_rate": 4.048488609297402e-05, "loss": 61.2772, "step": 89660 }, { "epoch": 0.36227814655155, "grad_norm": 774.4669799804688, "learning_rate": 4.0482145498800884e-05, "loss": 61.0465, "step": 89670 }, { "epoch": 0.36231854781691764, "grad_norm": 2059.27880859375, "learning_rate": 4.047940460279537e-05, "loss": 80.3147, "step": 89680 }, { "epoch": 0.3623589490822853, "grad_norm": 890.2815551757812, "learning_rate": 4.0476663405010874e-05, "loss": 85.4335, "step": 89690 }, { "epoch": 0.3623993503476529, "grad_norm": 1310.5477294921875, "learning_rate": 4.047392190550087e-05, "loss": 51.1065, "step": 89700 }, { "epoch": 0.3624397516130205, "grad_norm": 640.0499877929688, "learning_rate": 4.047118010431879e-05, "loss": 79.5663, "step": 89710 }, { "epoch": 0.36248015287838814, "grad_norm": 773.0057373046875, "learning_rate": 4.0468438001518084e-05, "loss": 56.4114, "step": 89720 }, { "epoch": 0.3625205541437558, "grad_norm": 652.1478881835938, "learning_rate": 4.046569559715221e-05, "loss": 52.9334, "step": 89730 }, { "epoch": 0.3625609554091234, "grad_norm": 1459.3680419921875, "learning_rate": 4.0462952891274655e-05, "loss": 71.04, "step": 89740 }, { "epoch": 0.36260135667449106, "grad_norm": 1920.9996337890625, "learning_rate": 4.046020988393885e-05, "loss": 61.0197, "step": 89750 }, { "epoch": 0.3626417579398587, "grad_norm": 1488.5030517578125, "learning_rate": 4.045746657519831e-05, "loss": 76.9832, "step": 89760 }, { "epoch": 0.3626821592052263, "grad_norm": 852.4432983398438, "learning_rate": 4.04547229651065e-05, "loss": 61.3612, "step": 89770 }, { "epoch": 0.3627225604705939, "grad_norm": 748.500244140625, "learning_rate": 4.0451979053716906e-05, "loss": 61.8753, "step": 89780 }, { "epoch": 0.36276296173596156, "grad_norm": 988.9141235351562, "learning_rate": 4.044923484108303e-05, "loss": 53.8847, "step": 89790 }, { "epoch": 0.3628033630013292, "grad_norm": 1280.90673828125, "learning_rate": 4.044649032725836e-05, "loss": 63.5749, "step": 89800 }, { "epoch": 0.36284376426669684, "grad_norm": 1100.82177734375, "learning_rate": 4.044374551229641e-05, "loss": 87.8433, "step": 89810 }, { "epoch": 0.3628841655320645, "grad_norm": 936.5944213867188, "learning_rate": 4.0441000396250694e-05, "loss": 62.9258, "step": 89820 }, { "epoch": 0.36292456679743207, "grad_norm": 701.0770874023438, "learning_rate": 4.0438254979174725e-05, "loss": 50.3677, "step": 89830 }, { "epoch": 0.3629649680627997, "grad_norm": 0.0, "learning_rate": 4.043550926112203e-05, "loss": 60.1964, "step": 89840 }, { "epoch": 0.36300536932816735, "grad_norm": 546.4686889648438, "learning_rate": 4.043276324214613e-05, "loss": 53.8509, "step": 89850 }, { "epoch": 0.363045770593535, "grad_norm": 1106.510009765625, "learning_rate": 4.043001692230056e-05, "loss": 69.3517, "step": 89860 }, { "epoch": 0.36308617185890263, "grad_norm": 665.1596069335938, "learning_rate": 4.042727030163888e-05, "loss": 53.5625, "step": 89870 }, { "epoch": 0.36312657312427027, "grad_norm": 640.2094116210938, "learning_rate": 4.042452338021461e-05, "loss": 90.0176, "step": 89880 }, { "epoch": 0.3631669743896379, "grad_norm": 552.5271606445312, "learning_rate": 4.0421776158081326e-05, "loss": 39.7842, "step": 89890 }, { "epoch": 0.3632073756550055, "grad_norm": 609.4852905273438, "learning_rate": 4.041902863529256e-05, "loss": 98.3096, "step": 89900 }, { "epoch": 0.36324777692037313, "grad_norm": 1036.63671875, "learning_rate": 4.041628081190191e-05, "loss": 74.554, "step": 89910 }, { "epoch": 0.3632881781857408, "grad_norm": 296.92547607421875, "learning_rate": 4.041353268796293e-05, "loss": 75.188, "step": 89920 }, { "epoch": 0.3633285794511084, "grad_norm": 404.4271240234375, "learning_rate": 4.041078426352918e-05, "loss": 43.2436, "step": 89930 }, { "epoch": 0.36336898071647605, "grad_norm": 1445.6839599609375, "learning_rate": 4.0408035538654264e-05, "loss": 48.7288, "step": 89940 }, { "epoch": 0.3634093819818437, "grad_norm": 745.0489501953125, "learning_rate": 4.040528651339176e-05, "loss": 77.1038, "step": 89950 }, { "epoch": 0.3634497832472113, "grad_norm": 543.2232055664062, "learning_rate": 4.0402537187795274e-05, "loss": 56.8202, "step": 89960 }, { "epoch": 0.3634901845125789, "grad_norm": 557.49072265625, "learning_rate": 4.039978756191839e-05, "loss": 62.1664, "step": 89970 }, { "epoch": 0.36353058577794656, "grad_norm": 612.9962768554688, "learning_rate": 4.039703763581472e-05, "loss": 82.6836, "step": 89980 }, { "epoch": 0.3635709870433142, "grad_norm": 921.2637939453125, "learning_rate": 4.039428740953787e-05, "loss": 50.0055, "step": 89990 }, { "epoch": 0.36361138830868184, "grad_norm": 1612.236328125, "learning_rate": 4.039153688314145e-05, "loss": 85.7117, "step": 90000 }, { "epoch": 0.3636517895740495, "grad_norm": 656.6378173828125, "learning_rate": 4.038878605667912e-05, "loss": 84.2719, "step": 90010 }, { "epoch": 0.3636921908394171, "grad_norm": 1089.3055419921875, "learning_rate": 4.038603493020447e-05, "loss": 76.3893, "step": 90020 }, { "epoch": 0.3637325921047847, "grad_norm": 954.1974487304688, "learning_rate": 4.038328350377115e-05, "loss": 111.787, "step": 90030 }, { "epoch": 0.36377299337015234, "grad_norm": 725.2201538085938, "learning_rate": 4.0380531777432794e-05, "loss": 60.9843, "step": 90040 }, { "epoch": 0.36381339463552, "grad_norm": 1051.9312744140625, "learning_rate": 4.037777975124306e-05, "loss": 75.9995, "step": 90050 }, { "epoch": 0.3638537959008876, "grad_norm": 1385.2197265625, "learning_rate": 4.037502742525559e-05, "loss": 85.4595, "step": 90060 }, { "epoch": 0.36389419716625526, "grad_norm": 505.546875, "learning_rate": 4.037227479952404e-05, "loss": 56.1493, "step": 90070 }, { "epoch": 0.3639345984316229, "grad_norm": 749.1838989257812, "learning_rate": 4.036952187410208e-05, "loss": 63.6978, "step": 90080 }, { "epoch": 0.3639749996969905, "grad_norm": 534.1312866210938, "learning_rate": 4.036676864904338e-05, "loss": 57.1218, "step": 90090 }, { "epoch": 0.3640154009623581, "grad_norm": 921.06005859375, "learning_rate": 4.036401512440161e-05, "loss": 58.3411, "step": 90100 }, { "epoch": 0.36405580222772577, "grad_norm": 679.614990234375, "learning_rate": 4.0361261300230465e-05, "loss": 66.4312, "step": 90110 }, { "epoch": 0.3640962034930934, "grad_norm": 511.14501953125, "learning_rate": 4.035850717658362e-05, "loss": 106.1777, "step": 90120 }, { "epoch": 0.36413660475846105, "grad_norm": 1034.344482421875, "learning_rate": 4.035575275351476e-05, "loss": 78.6351, "step": 90130 }, { "epoch": 0.3641770060238287, "grad_norm": 880.7720947265625, "learning_rate": 4.0352998031077604e-05, "loss": 67.1489, "step": 90140 }, { "epoch": 0.36421740728919627, "grad_norm": 1041.2879638671875, "learning_rate": 4.035024300932584e-05, "loss": 59.1619, "step": 90150 }, { "epoch": 0.3642578085545639, "grad_norm": 414.6109924316406, "learning_rate": 4.0347487688313194e-05, "loss": 57.6462, "step": 90160 }, { "epoch": 0.36429820981993155, "grad_norm": 803.5387573242188, "learning_rate": 4.034473206809337e-05, "loss": 62.8699, "step": 90170 }, { "epoch": 0.3643386110852992, "grad_norm": 982.3478393554688, "learning_rate": 4.0341976148720095e-05, "loss": 97.5246, "step": 90180 }, { "epoch": 0.36437901235066683, "grad_norm": 951.2240600585938, "learning_rate": 4.03392199302471e-05, "loss": 51.8766, "step": 90190 }, { "epoch": 0.36441941361603447, "grad_norm": 754.9638061523438, "learning_rate": 4.033646341272811e-05, "loss": 59.1747, "step": 90200 }, { "epoch": 0.3644598148814021, "grad_norm": 1095.536376953125, "learning_rate": 4.033370659621687e-05, "loss": 43.2328, "step": 90210 }, { "epoch": 0.3645002161467697, "grad_norm": 915.0740966796875, "learning_rate": 4.033094948076713e-05, "loss": 77.3148, "step": 90220 }, { "epoch": 0.36454061741213734, "grad_norm": 502.77545166015625, "learning_rate": 4.032819206643263e-05, "loss": 41.8404, "step": 90230 }, { "epoch": 0.364581018677505, "grad_norm": 352.51165771484375, "learning_rate": 4.032543435326714e-05, "loss": 55.2335, "step": 90240 }, { "epoch": 0.3646214199428726, "grad_norm": 1066.30615234375, "learning_rate": 4.0322676341324415e-05, "loss": 87.1459, "step": 90250 }, { "epoch": 0.36466182120824026, "grad_norm": 486.5985107421875, "learning_rate": 4.0319918030658225e-05, "loss": 67.0294, "step": 90260 }, { "epoch": 0.3647022224736079, "grad_norm": 637.4968872070312, "learning_rate": 4.031715942132235e-05, "loss": 67.5639, "step": 90270 }, { "epoch": 0.3647426237389755, "grad_norm": 919.53759765625, "learning_rate": 4.031440051337056e-05, "loss": 88.9865, "step": 90280 }, { "epoch": 0.3647830250043431, "grad_norm": 551.5164184570312, "learning_rate": 4.031164130685665e-05, "loss": 68.1998, "step": 90290 }, { "epoch": 0.36482342626971076, "grad_norm": 1867.7833251953125, "learning_rate": 4.030888180183441e-05, "loss": 80.087, "step": 90300 }, { "epoch": 0.3648638275350784, "grad_norm": 731.1893920898438, "learning_rate": 4.030612199835764e-05, "loss": 49.1685, "step": 90310 }, { "epoch": 0.36490422880044604, "grad_norm": 1434.2115478515625, "learning_rate": 4.030336189648014e-05, "loss": 79.8181, "step": 90320 }, { "epoch": 0.3649446300658137, "grad_norm": 5654.36669921875, "learning_rate": 4.030060149625573e-05, "loss": 106.5813, "step": 90330 }, { "epoch": 0.3649850313311813, "grad_norm": 1383.1339111328125, "learning_rate": 4.02978407977382e-05, "loss": 81.1142, "step": 90340 }, { "epoch": 0.3650254325965489, "grad_norm": 1352.9986572265625, "learning_rate": 4.0295079800981395e-05, "loss": 75.8624, "step": 90350 }, { "epoch": 0.36506583386191654, "grad_norm": 821.6564331054688, "learning_rate": 4.029231850603914e-05, "loss": 87.5254, "step": 90360 }, { "epoch": 0.3651062351272842, "grad_norm": 375.5804138183594, "learning_rate": 4.028955691296526e-05, "loss": 43.4082, "step": 90370 }, { "epoch": 0.3651466363926518, "grad_norm": 2169.375244140625, "learning_rate": 4.0286795021813594e-05, "loss": 67.1946, "step": 90380 }, { "epoch": 0.36518703765801946, "grad_norm": 7678.23828125, "learning_rate": 4.0284032832637985e-05, "loss": 96.252, "step": 90390 }, { "epoch": 0.3652274389233871, "grad_norm": 1672.2906494140625, "learning_rate": 4.028127034549229e-05, "loss": 76.2553, "step": 90400 }, { "epoch": 0.3652678401887547, "grad_norm": 959.1575927734375, "learning_rate": 4.027850756043037e-05, "loss": 55.6581, "step": 90410 }, { "epoch": 0.36530824145412233, "grad_norm": 646.5990600585938, "learning_rate": 4.0275744477506074e-05, "loss": 79.772, "step": 90420 }, { "epoch": 0.36534864271948997, "grad_norm": 905.00439453125, "learning_rate": 4.027298109677327e-05, "loss": 40.6796, "step": 90430 }, { "epoch": 0.3653890439848576, "grad_norm": 800.4560546875, "learning_rate": 4.027021741828584e-05, "loss": 52.6281, "step": 90440 }, { "epoch": 0.36542944525022525, "grad_norm": 507.7906188964844, "learning_rate": 4.0267453442097664e-05, "loss": 72.2229, "step": 90450 }, { "epoch": 0.3654698465155929, "grad_norm": 371.3627624511719, "learning_rate": 4.026468916826262e-05, "loss": 65.535, "step": 90460 }, { "epoch": 0.3655102477809605, "grad_norm": 393.8462219238281, "learning_rate": 4.02619245968346e-05, "loss": 75.8037, "step": 90470 }, { "epoch": 0.3655506490463281, "grad_norm": 460.1830749511719, "learning_rate": 4.0259159727867504e-05, "loss": 82.6051, "step": 90480 }, { "epoch": 0.36559105031169575, "grad_norm": 521.975341796875, "learning_rate": 4.025639456141523e-05, "loss": 61.9708, "step": 90490 }, { "epoch": 0.3656314515770634, "grad_norm": 1583.7652587890625, "learning_rate": 4.02536290975317e-05, "loss": 64.9353, "step": 90500 }, { "epoch": 0.36567185284243103, "grad_norm": 465.25244140625, "learning_rate": 4.02508633362708e-05, "loss": 65.8302, "step": 90510 }, { "epoch": 0.3657122541077987, "grad_norm": 293.57965087890625, "learning_rate": 4.024809727768648e-05, "loss": 45.0099, "step": 90520 }, { "epoch": 0.3657526553731663, "grad_norm": 536.3772583007812, "learning_rate": 4.024533092183266e-05, "loss": 74.4069, "step": 90530 }, { "epoch": 0.3657930566385339, "grad_norm": 479.0234680175781, "learning_rate": 4.024256426876325e-05, "loss": 51.6421, "step": 90540 }, { "epoch": 0.36583345790390154, "grad_norm": 1907.06494140625, "learning_rate": 4.02397973185322e-05, "loss": 77.2531, "step": 90550 }, { "epoch": 0.3658738591692692, "grad_norm": 1270.7117919921875, "learning_rate": 4.023703007119347e-05, "loss": 59.475, "step": 90560 }, { "epoch": 0.3659142604346368, "grad_norm": 380.741943359375, "learning_rate": 4.023426252680098e-05, "loss": 59.4132, "step": 90570 }, { "epoch": 0.36595466170000446, "grad_norm": 1124.605224609375, "learning_rate": 4.023149468540871e-05, "loss": 67.3577, "step": 90580 }, { "epoch": 0.3659950629653721, "grad_norm": 1151.210205078125, "learning_rate": 4.02287265470706e-05, "loss": 63.5007, "step": 90590 }, { "epoch": 0.3660354642307397, "grad_norm": 0.0, "learning_rate": 4.022595811184064e-05, "loss": 38.7889, "step": 90600 }, { "epoch": 0.3660758654961073, "grad_norm": 531.6884155273438, "learning_rate": 4.022318937977277e-05, "loss": 70.17, "step": 90610 }, { "epoch": 0.36611626676147496, "grad_norm": 928.3363647460938, "learning_rate": 4.022042035092101e-05, "loss": 63.7609, "step": 90620 }, { "epoch": 0.3661566680268426, "grad_norm": 1050.515380859375, "learning_rate": 4.02176510253393e-05, "loss": 62.498, "step": 90630 }, { "epoch": 0.36619706929221024, "grad_norm": 1077.6502685546875, "learning_rate": 4.021488140308165e-05, "loss": 90.2313, "step": 90640 }, { "epoch": 0.3662374705575779, "grad_norm": 1099.2191162109375, "learning_rate": 4.021211148420205e-05, "loss": 65.9301, "step": 90650 }, { "epoch": 0.3662778718229455, "grad_norm": 633.58935546875, "learning_rate": 4.020934126875452e-05, "loss": 41.3643, "step": 90660 }, { "epoch": 0.3663182730883131, "grad_norm": 1048.8428955078125, "learning_rate": 4.0206570756793046e-05, "loss": 63.0074, "step": 90670 }, { "epoch": 0.36635867435368075, "grad_norm": 369.955810546875, "learning_rate": 4.020379994837164e-05, "loss": 76.6917, "step": 90680 }, { "epoch": 0.3663990756190484, "grad_norm": 654.5360717773438, "learning_rate": 4.020102884354433e-05, "loss": 71.9556, "step": 90690 }, { "epoch": 0.366439476884416, "grad_norm": 689.0057983398438, "learning_rate": 4.019825744236514e-05, "loss": 68.2348, "step": 90700 }, { "epoch": 0.36647987814978367, "grad_norm": 408.1641540527344, "learning_rate": 4.0195485744888096e-05, "loss": 99.4495, "step": 90710 }, { "epoch": 0.3665202794151513, "grad_norm": 817.236328125, "learning_rate": 4.019271375116722e-05, "loss": 72.3637, "step": 90720 }, { "epoch": 0.3665606806805189, "grad_norm": 1630.006103515625, "learning_rate": 4.018994146125659e-05, "loss": 85.6407, "step": 90730 }, { "epoch": 0.36660108194588653, "grad_norm": 1074.2464599609375, "learning_rate": 4.0187168875210216e-05, "loss": 59.7042, "step": 90740 }, { "epoch": 0.36664148321125417, "grad_norm": 380.92657470703125, "learning_rate": 4.018439599308217e-05, "loss": 81.9409, "step": 90750 }, { "epoch": 0.3666818844766218, "grad_norm": 0.0, "learning_rate": 4.0181622814926504e-05, "loss": 81.8555, "step": 90760 }, { "epoch": 0.36672228574198945, "grad_norm": 703.6498413085938, "learning_rate": 4.0178849340797285e-05, "loss": 67.2053, "step": 90770 }, { "epoch": 0.3667626870073571, "grad_norm": 706.4108276367188, "learning_rate": 4.0176075570748596e-05, "loss": 49.0458, "step": 90780 }, { "epoch": 0.3668030882727247, "grad_norm": 2707.36572265625, "learning_rate": 4.017330150483449e-05, "loss": 53.4966, "step": 90790 }, { "epoch": 0.3668434895380923, "grad_norm": 472.0765075683594, "learning_rate": 4.017052714310906e-05, "loss": 38.695, "step": 90800 }, { "epoch": 0.36688389080345996, "grad_norm": 786.6532592773438, "learning_rate": 4.0167752485626385e-05, "loss": 52.5584, "step": 90810 }, { "epoch": 0.3669242920688276, "grad_norm": 680.0988159179688, "learning_rate": 4.0164977532440584e-05, "loss": 55.2876, "step": 90820 }, { "epoch": 0.36696469333419524, "grad_norm": 1092.18359375, "learning_rate": 4.0162202283605725e-05, "loss": 55.0247, "step": 90830 }, { "epoch": 0.3670050945995629, "grad_norm": 944.074951171875, "learning_rate": 4.015942673917593e-05, "loss": 72.8051, "step": 90840 }, { "epoch": 0.3670454958649305, "grad_norm": 1217.025634765625, "learning_rate": 4.015665089920531e-05, "loss": 74.4885, "step": 90850 }, { "epoch": 0.3670858971302981, "grad_norm": 710.3255004882812, "learning_rate": 4.0153874763747976e-05, "loss": 72.1298, "step": 90860 }, { "epoch": 0.36712629839566574, "grad_norm": 494.13946533203125, "learning_rate": 4.015109833285805e-05, "loss": 66.7202, "step": 90870 }, { "epoch": 0.3671666996610334, "grad_norm": 693.8568115234375, "learning_rate": 4.0148321606589656e-05, "loss": 60.7179, "step": 90880 }, { "epoch": 0.367207100926401, "grad_norm": 548.3522338867188, "learning_rate": 4.014554458499694e-05, "loss": 47.7584, "step": 90890 }, { "epoch": 0.36724750219176866, "grad_norm": 885.053466796875, "learning_rate": 4.014276726813404e-05, "loss": 26.558, "step": 90900 }, { "epoch": 0.3672879034571363, "grad_norm": 1183.4205322265625, "learning_rate": 4.013998965605509e-05, "loss": 71.821, "step": 90910 }, { "epoch": 0.3673283047225039, "grad_norm": 1207.58203125, "learning_rate": 4.013721174881425e-05, "loss": 49.7049, "step": 90920 }, { "epoch": 0.3673687059878715, "grad_norm": 1142.948486328125, "learning_rate": 4.013443354646567e-05, "loss": 74.3073, "step": 90930 }, { "epoch": 0.36740910725323916, "grad_norm": 0.0, "learning_rate": 4.0131655049063514e-05, "loss": 50.2877, "step": 90940 }, { "epoch": 0.3674495085186068, "grad_norm": 2414.29541015625, "learning_rate": 4.012887625666195e-05, "loss": 76.8267, "step": 90950 }, { "epoch": 0.36748990978397444, "grad_norm": 682.6073608398438, "learning_rate": 4.012609716931517e-05, "loss": 46.8848, "step": 90960 }, { "epoch": 0.3675303110493421, "grad_norm": 648.0629272460938, "learning_rate": 4.012331778707732e-05, "loss": 57.106, "step": 90970 }, { "epoch": 0.3675707123147097, "grad_norm": 976.9451293945312, "learning_rate": 4.012053811000262e-05, "loss": 56.0116, "step": 90980 }, { "epoch": 0.3676111135800773, "grad_norm": 712.3170166015625, "learning_rate": 4.0117758138145235e-05, "loss": 72.1889, "step": 90990 }, { "epoch": 0.36765151484544495, "grad_norm": 579.1261596679688, "learning_rate": 4.011497787155938e-05, "loss": 54.2764, "step": 91000 }, { "epoch": 0.3676919161108126, "grad_norm": 885.5346069335938, "learning_rate": 4.0112197310299235e-05, "loss": 78.5092, "step": 91010 }, { "epoch": 0.36773231737618023, "grad_norm": 661.86572265625, "learning_rate": 4.010941645441904e-05, "loss": 68.8041, "step": 91020 }, { "epoch": 0.36777271864154787, "grad_norm": 474.86328125, "learning_rate": 4.010663530397298e-05, "loss": 62.9406, "step": 91030 }, { "epoch": 0.3678131199069155, "grad_norm": 476.5716857910156, "learning_rate": 4.01038538590153e-05, "loss": 102.7171, "step": 91040 }, { "epoch": 0.3678535211722831, "grad_norm": 696.08837890625, "learning_rate": 4.0101072119600196e-05, "loss": 97.6508, "step": 91050 }, { "epoch": 0.36789392243765073, "grad_norm": 751.9807739257812, "learning_rate": 4.009829008578192e-05, "loss": 85.6849, "step": 91060 }, { "epoch": 0.3679343237030184, "grad_norm": 505.8662109375, "learning_rate": 4.0095507757614717e-05, "loss": 54.5299, "step": 91070 }, { "epoch": 0.367974724968386, "grad_norm": 711.6263427734375, "learning_rate": 4.009272513515281e-05, "loss": 58.3999, "step": 91080 }, { "epoch": 0.36801512623375365, "grad_norm": 623.3084106445312, "learning_rate": 4.008994221845046e-05, "loss": 65.1364, "step": 91090 }, { "epoch": 0.3680555274991213, "grad_norm": 479.5476379394531, "learning_rate": 4.0087159007561916e-05, "loss": 40.831, "step": 91100 }, { "epoch": 0.3680959287644889, "grad_norm": 969.3098754882812, "learning_rate": 4.0084375502541446e-05, "loss": 64.9479, "step": 91110 }, { "epoch": 0.3681363300298565, "grad_norm": 267.7486572265625, "learning_rate": 4.00815917034433e-05, "loss": 107.4876, "step": 91120 }, { "epoch": 0.36817673129522416, "grad_norm": 1258.0924072265625, "learning_rate": 4.007880761032177e-05, "loss": 63.7383, "step": 91130 }, { "epoch": 0.3682171325605918, "grad_norm": 664.6405029296875, "learning_rate": 4.0076023223231105e-05, "loss": 65.6308, "step": 91140 }, { "epoch": 0.36825753382595944, "grad_norm": 869.2908325195312, "learning_rate": 4.007323854222562e-05, "loss": 48.834, "step": 91150 }, { "epoch": 0.3682979350913271, "grad_norm": 359.8714599609375, "learning_rate": 4.007045356735959e-05, "loss": 51.4937, "step": 91160 }, { "epoch": 0.3683383363566947, "grad_norm": 454.6327209472656, "learning_rate": 4.00676682986873e-05, "loss": 80.0302, "step": 91170 }, { "epoch": 0.3683787376220623, "grad_norm": 582.725341796875, "learning_rate": 4.006488273626307e-05, "loss": 81.7921, "step": 91180 }, { "epoch": 0.36841913888742994, "grad_norm": 998.3577880859375, "learning_rate": 4.006209688014119e-05, "loss": 40.4945, "step": 91190 }, { "epoch": 0.3684595401527976, "grad_norm": 951.7125854492188, "learning_rate": 4.005931073037596e-05, "loss": 54.5525, "step": 91200 }, { "epoch": 0.3684999414181652, "grad_norm": 517.168701171875, "learning_rate": 4.005652428702173e-05, "loss": 52.3269, "step": 91210 }, { "epoch": 0.36854034268353286, "grad_norm": 982.7772216796875, "learning_rate": 4.0053737550132816e-05, "loss": 65.8004, "step": 91220 }, { "epoch": 0.3685807439489005, "grad_norm": 0.0, "learning_rate": 4.005095051976353e-05, "loss": 58.1448, "step": 91230 }, { "epoch": 0.3686211452142681, "grad_norm": 1081.756103515625, "learning_rate": 4.0048163195968214e-05, "loss": 51.2047, "step": 91240 }, { "epoch": 0.3686615464796357, "grad_norm": 837.7747802734375, "learning_rate": 4.0045375578801214e-05, "loss": 66.0523, "step": 91250 }, { "epoch": 0.36870194774500337, "grad_norm": 316.6372375488281, "learning_rate": 4.004258766831686e-05, "loss": 64.1896, "step": 91260 }, { "epoch": 0.368742349010371, "grad_norm": 820.6416015625, "learning_rate": 4.0039799464569524e-05, "loss": 83.5785, "step": 91270 }, { "epoch": 0.36878275027573865, "grad_norm": 1047.304443359375, "learning_rate": 4.003701096761355e-05, "loss": 64.335, "step": 91280 }, { "epoch": 0.3688231515411063, "grad_norm": 882.9609375, "learning_rate": 4.0034222177503314e-05, "loss": 74.856, "step": 91290 }, { "epoch": 0.36886355280647387, "grad_norm": 758.5153198242188, "learning_rate": 4.003143309429317e-05, "loss": 57.2044, "step": 91300 }, { "epoch": 0.3689039540718415, "grad_norm": 637.7432861328125, "learning_rate": 4.0028643718037496e-05, "loss": 67.143, "step": 91310 }, { "epoch": 0.36894435533720915, "grad_norm": 982.8681640625, "learning_rate": 4.0025854048790677e-05, "loss": 56.672, "step": 91320 }, { "epoch": 0.3689847566025768, "grad_norm": 975.6199951171875, "learning_rate": 4.00230640866071e-05, "loss": 77.8675, "step": 91330 }, { "epoch": 0.36902515786794443, "grad_norm": 463.59063720703125, "learning_rate": 4.0020273831541155e-05, "loss": 47.0041, "step": 91340 }, { "epoch": 0.36906555913331207, "grad_norm": 1927.79541015625, "learning_rate": 4.001748328364724e-05, "loss": 100.6176, "step": 91350 }, { "epoch": 0.3691059603986797, "grad_norm": 614.8819580078125, "learning_rate": 4.001469244297975e-05, "loss": 63.3884, "step": 91360 }, { "epoch": 0.3691463616640473, "grad_norm": 578.9991455078125, "learning_rate": 4.00119013095931e-05, "loss": 38.7527, "step": 91370 }, { "epoch": 0.36918676292941494, "grad_norm": 424.689697265625, "learning_rate": 4.0009109883541715e-05, "loss": 71.5254, "step": 91380 }, { "epoch": 0.3692271641947826, "grad_norm": 592.7898559570312, "learning_rate": 4.000631816488001e-05, "loss": 88.0966, "step": 91390 }, { "epoch": 0.3692675654601502, "grad_norm": 699.6017456054688, "learning_rate": 4.000352615366239e-05, "loss": 65.126, "step": 91400 }, { "epoch": 0.36930796672551786, "grad_norm": 3042.03662109375, "learning_rate": 4.0000733849943313e-05, "loss": 60.7142, "step": 91410 }, { "epoch": 0.3693483679908855, "grad_norm": 799.2810668945312, "learning_rate": 3.999794125377721e-05, "loss": 36.586, "step": 91420 }, { "epoch": 0.3693887692562531, "grad_norm": 968.9716796875, "learning_rate": 3.999514836521851e-05, "loss": 69.6768, "step": 91430 }, { "epoch": 0.3694291705216207, "grad_norm": 1178.7864990234375, "learning_rate": 3.999235518432168e-05, "loss": 61.2758, "step": 91440 }, { "epoch": 0.36946957178698836, "grad_norm": 490.9415283203125, "learning_rate": 3.998956171114116e-05, "loss": 92.8936, "step": 91450 }, { "epoch": 0.369509973052356, "grad_norm": 429.26019287109375, "learning_rate": 3.998676794573142e-05, "loss": 54.8671, "step": 91460 }, { "epoch": 0.36955037431772364, "grad_norm": 556.7578125, "learning_rate": 3.998397388814693e-05, "loss": 50.8339, "step": 91470 }, { "epoch": 0.3695907755830913, "grad_norm": 745.5537719726562, "learning_rate": 3.9981179538442146e-05, "loss": 75.3068, "step": 91480 }, { "epoch": 0.3696311768484589, "grad_norm": 1237.29638671875, "learning_rate": 3.9978384896671564e-05, "loss": 71.3501, "step": 91490 }, { "epoch": 0.3696715781138265, "grad_norm": 846.2611083984375, "learning_rate": 3.997558996288965e-05, "loss": 68.3017, "step": 91500 }, { "epoch": 0.36971197937919414, "grad_norm": 1087.683837890625, "learning_rate": 3.9972794737150895e-05, "loss": 89.0834, "step": 91510 }, { "epoch": 0.3697523806445618, "grad_norm": 462.5960998535156, "learning_rate": 3.996999921950981e-05, "loss": 72.2417, "step": 91520 }, { "epoch": 0.3697927819099294, "grad_norm": 623.9275512695312, "learning_rate": 3.9967203410020875e-05, "loss": 79.5964, "step": 91530 }, { "epoch": 0.36983318317529706, "grad_norm": 2321.183837890625, "learning_rate": 3.99644073087386e-05, "loss": 100.5441, "step": 91540 }, { "epoch": 0.3698735844406647, "grad_norm": 931.0929565429688, "learning_rate": 3.9961610915717515e-05, "loss": 69.4745, "step": 91550 }, { "epoch": 0.3699139857060323, "grad_norm": 459.2430419921875, "learning_rate": 3.9958814231012115e-05, "loss": 48.2317, "step": 91560 }, { "epoch": 0.36995438697139993, "grad_norm": 2218.796630859375, "learning_rate": 3.9956017254676923e-05, "loss": 73.4239, "step": 91570 }, { "epoch": 0.36999478823676757, "grad_norm": 579.8865966796875, "learning_rate": 3.995321998676648e-05, "loss": 69.1222, "step": 91580 }, { "epoch": 0.3700351895021352, "grad_norm": 805.66943359375, "learning_rate": 3.995042242733532e-05, "loss": 40.6279, "step": 91590 }, { "epoch": 0.37007559076750285, "grad_norm": 1477.2010498046875, "learning_rate": 3.9947624576437975e-05, "loss": 54.183, "step": 91600 }, { "epoch": 0.3701159920328705, "grad_norm": 1422.080078125, "learning_rate": 3.994482643412899e-05, "loss": 44.8291, "step": 91610 }, { "epoch": 0.3701563932982381, "grad_norm": 596.1489868164062, "learning_rate": 3.994202800046292e-05, "loss": 50.5651, "step": 91620 }, { "epoch": 0.3701967945636057, "grad_norm": 697.937744140625, "learning_rate": 3.993922927549432e-05, "loss": 51.1245, "step": 91630 }, { "epoch": 0.37023719582897335, "grad_norm": 660.627197265625, "learning_rate": 3.993643025927776e-05, "loss": 73.9116, "step": 91640 }, { "epoch": 0.370277597094341, "grad_norm": 735.249267578125, "learning_rate": 3.9933630951867805e-05, "loss": 41.1959, "step": 91650 }, { "epoch": 0.37031799835970863, "grad_norm": 1717.015380859375, "learning_rate": 3.9930831353319023e-05, "loss": 66.5677, "step": 91660 }, { "epoch": 0.3703583996250763, "grad_norm": 628.260009765625, "learning_rate": 3.992803146368599e-05, "loss": 50.1199, "step": 91670 }, { "epoch": 0.3703988008904439, "grad_norm": 1197.982666015625, "learning_rate": 3.99252312830233e-05, "loss": 53.4161, "step": 91680 }, { "epoch": 0.3704392021558115, "grad_norm": 759.8359375, "learning_rate": 3.992243081138555e-05, "loss": 49.2217, "step": 91690 }, { "epoch": 0.37047960342117914, "grad_norm": 881.2600708007812, "learning_rate": 3.9919630048827314e-05, "loss": 75.7815, "step": 91700 }, { "epoch": 0.3705200046865468, "grad_norm": 356.75958251953125, "learning_rate": 3.991682899540322e-05, "loss": 67.6897, "step": 91710 }, { "epoch": 0.3705604059519144, "grad_norm": 1126.136474609375, "learning_rate": 3.9914027651167866e-05, "loss": 67.9846, "step": 91720 }, { "epoch": 0.37060080721728206, "grad_norm": 467.5633544921875, "learning_rate": 3.9911226016175866e-05, "loss": 77.1551, "step": 91730 }, { "epoch": 0.3706412084826497, "grad_norm": 590.6211547851562, "learning_rate": 3.990842409048183e-05, "loss": 52.3267, "step": 91740 }, { "epoch": 0.3706816097480173, "grad_norm": 2134.7158203125, "learning_rate": 3.99056218741404e-05, "loss": 89.5028, "step": 91750 }, { "epoch": 0.3707220110133849, "grad_norm": 545.798583984375, "learning_rate": 3.990281936720619e-05, "loss": 59.62, "step": 91760 }, { "epoch": 0.37076241227875256, "grad_norm": 2029.9058837890625, "learning_rate": 3.990001656973385e-05, "loss": 59.6643, "step": 91770 }, { "epoch": 0.3708028135441202, "grad_norm": 413.22491455078125, "learning_rate": 3.9897213481778006e-05, "loss": 48.1997, "step": 91780 }, { "epoch": 0.37084321480948784, "grad_norm": 1251.12744140625, "learning_rate": 3.9894410103393323e-05, "loss": 59.8171, "step": 91790 }, { "epoch": 0.3708836160748555, "grad_norm": 520.3812866210938, "learning_rate": 3.989160643463445e-05, "loss": 71.2307, "step": 91800 }, { "epoch": 0.3709240173402231, "grad_norm": 866.8963012695312, "learning_rate": 3.988880247555604e-05, "loss": 102.0608, "step": 91810 }, { "epoch": 0.3709644186055907, "grad_norm": 546.447998046875, "learning_rate": 3.9885998226212764e-05, "loss": 48.1409, "step": 91820 }, { "epoch": 0.37100481987095835, "grad_norm": 1045.4818115234375, "learning_rate": 3.988319368665928e-05, "loss": 96.8548, "step": 91830 }, { "epoch": 0.371045221136326, "grad_norm": 1263.72314453125, "learning_rate": 3.988038885695028e-05, "loss": 51.555, "step": 91840 }, { "epoch": 0.3710856224016936, "grad_norm": 929.8193359375, "learning_rate": 3.987758373714044e-05, "loss": 48.9912, "step": 91850 }, { "epoch": 0.37112602366706127, "grad_norm": 1874.9107666015625, "learning_rate": 3.987477832728444e-05, "loss": 48.1193, "step": 91860 }, { "epoch": 0.3711664249324289, "grad_norm": 449.92388916015625, "learning_rate": 3.987197262743697e-05, "loss": 60.4172, "step": 91870 }, { "epoch": 0.3712068261977965, "grad_norm": 3075.2314453125, "learning_rate": 3.986916663765275e-05, "loss": 66.7121, "step": 91880 }, { "epoch": 0.37124722746316413, "grad_norm": 805.412841796875, "learning_rate": 3.9866360357986467e-05, "loss": 60.1465, "step": 91890 }, { "epoch": 0.37128762872853177, "grad_norm": 925.1229248046875, "learning_rate": 3.9863553788492834e-05, "loss": 40.3686, "step": 91900 }, { "epoch": 0.3713280299938994, "grad_norm": 600.6539306640625, "learning_rate": 3.9860746929226567e-05, "loss": 66.7164, "step": 91910 }, { "epoch": 0.37136843125926705, "grad_norm": 1517.1585693359375, "learning_rate": 3.985793978024239e-05, "loss": 42.1975, "step": 91920 }, { "epoch": 0.3714088325246347, "grad_norm": 1709.787353515625, "learning_rate": 3.985513234159502e-05, "loss": 61.1718, "step": 91930 }, { "epoch": 0.3714492337900023, "grad_norm": 610.5380249023438, "learning_rate": 3.985232461333921e-05, "loss": 66.061, "step": 91940 }, { "epoch": 0.3714896350553699, "grad_norm": 1254.192138671875, "learning_rate": 3.984951659552968e-05, "loss": 61.3981, "step": 91950 }, { "epoch": 0.37153003632073756, "grad_norm": 725.9743041992188, "learning_rate": 3.984670828822118e-05, "loss": 50.754, "step": 91960 }, { "epoch": 0.3715704375861052, "grad_norm": 898.7955322265625, "learning_rate": 3.984389969146846e-05, "loss": 80.66, "step": 91970 }, { "epoch": 0.37161083885147284, "grad_norm": 1205.29345703125, "learning_rate": 3.9841090805326264e-05, "loss": 88.857, "step": 91980 }, { "epoch": 0.3716512401168405, "grad_norm": 304.8878479003906, "learning_rate": 3.983828162984937e-05, "loss": 39.1823, "step": 91990 }, { "epoch": 0.3716916413822081, "grad_norm": 474.86700439453125, "learning_rate": 3.983547216509254e-05, "loss": 55.8331, "step": 92000 }, { "epoch": 0.3717320426475757, "grad_norm": 401.64044189453125, "learning_rate": 3.9832662411110536e-05, "loss": 69.07, "step": 92010 }, { "epoch": 0.37177244391294334, "grad_norm": 338.5098571777344, "learning_rate": 3.9829852367958144e-05, "loss": 49.6506, "step": 92020 }, { "epoch": 0.371812845178311, "grad_norm": 616.5515747070312, "learning_rate": 3.9827042035690145e-05, "loss": 55.922, "step": 92030 }, { "epoch": 0.3718532464436786, "grad_norm": 696.16357421875, "learning_rate": 3.9824231414361324e-05, "loss": 56.3399, "step": 92040 }, { "epoch": 0.37189364770904626, "grad_norm": 555.5895385742188, "learning_rate": 3.982142050402649e-05, "loss": 57.3369, "step": 92050 }, { "epoch": 0.3719340489744139, "grad_norm": 777.5509033203125, "learning_rate": 3.9818609304740414e-05, "loss": 72.6481, "step": 92060 }, { "epoch": 0.3719744502397815, "grad_norm": 752.47705078125, "learning_rate": 3.981579781655794e-05, "loss": 71.8169, "step": 92070 }, { "epoch": 0.3720148515051491, "grad_norm": 973.05419921875, "learning_rate": 3.981298603953385e-05, "loss": 84.1057, "step": 92080 }, { "epoch": 0.37205525277051676, "grad_norm": 956.1757202148438, "learning_rate": 3.9810173973722974e-05, "loss": 57.5237, "step": 92090 }, { "epoch": 0.3720956540358844, "grad_norm": 606.1538696289062, "learning_rate": 3.980736161918013e-05, "loss": 51.6493, "step": 92100 }, { "epoch": 0.37213605530125204, "grad_norm": 401.66314697265625, "learning_rate": 3.980454897596014e-05, "loss": 74.039, "step": 92110 }, { "epoch": 0.3721764565666197, "grad_norm": 469.2101745605469, "learning_rate": 3.980173604411786e-05, "loss": 62.7403, "step": 92120 }, { "epoch": 0.3722168578319873, "grad_norm": 814.7521362304688, "learning_rate": 3.979892282370811e-05, "loss": 93.9603, "step": 92130 }, { "epoch": 0.3722572590973549, "grad_norm": 1050.54296875, "learning_rate": 3.979610931478574e-05, "loss": 79.703, "step": 92140 }, { "epoch": 0.37229766036272255, "grad_norm": 426.4966735839844, "learning_rate": 3.97932955174056e-05, "loss": 74.3453, "step": 92150 }, { "epoch": 0.3723380616280902, "grad_norm": 587.7415161132812, "learning_rate": 3.979048143162255e-05, "loss": 58.7905, "step": 92160 }, { "epoch": 0.37237846289345783, "grad_norm": 1028.9864501953125, "learning_rate": 3.978766705749145e-05, "loss": 49.0045, "step": 92170 }, { "epoch": 0.37241886415882547, "grad_norm": 722.3172607421875, "learning_rate": 3.9784852395067166e-05, "loss": 50.1978, "step": 92180 }, { "epoch": 0.3724592654241931, "grad_norm": 1028.6400146484375, "learning_rate": 3.978203744440457e-05, "loss": 65.8073, "step": 92190 }, { "epoch": 0.3724996666895607, "grad_norm": 350.545654296875, "learning_rate": 3.977922220555855e-05, "loss": 66.0355, "step": 92200 }, { "epoch": 0.37254006795492833, "grad_norm": 888.4639282226562, "learning_rate": 3.977640667858398e-05, "loss": 61.8959, "step": 92210 }, { "epoch": 0.372580469220296, "grad_norm": 938.3932495117188, "learning_rate": 3.977359086353576e-05, "loss": 69.4164, "step": 92220 }, { "epoch": 0.3726208704856636, "grad_norm": 1005.269775390625, "learning_rate": 3.977077476046877e-05, "loss": 65.6192, "step": 92230 }, { "epoch": 0.37266127175103125, "grad_norm": 557.5337524414062, "learning_rate": 3.976795836943793e-05, "loss": 59.2249, "step": 92240 }, { "epoch": 0.3727016730163989, "grad_norm": 2857.20703125, "learning_rate": 3.976514169049814e-05, "loss": 68.5465, "step": 92250 }, { "epoch": 0.3727420742817665, "grad_norm": 1169.2440185546875, "learning_rate": 3.97623247237043e-05, "loss": 46.9372, "step": 92260 }, { "epoch": 0.3727824755471341, "grad_norm": 561.6387329101562, "learning_rate": 3.9759507469111346e-05, "loss": 59.3901, "step": 92270 }, { "epoch": 0.37282287681250176, "grad_norm": 835.4718017578125, "learning_rate": 3.9756689926774196e-05, "loss": 85.8709, "step": 92280 }, { "epoch": 0.3728632780778694, "grad_norm": 789.12841796875, "learning_rate": 3.975387209674778e-05, "loss": 71.775, "step": 92290 }, { "epoch": 0.37290367934323704, "grad_norm": 0.0, "learning_rate": 3.9751053979087035e-05, "loss": 47.8555, "step": 92300 }, { "epoch": 0.3729440806086047, "grad_norm": 776.5151977539062, "learning_rate": 3.9748235573846894e-05, "loss": 70.7797, "step": 92310 }, { "epoch": 0.3729844818739723, "grad_norm": 987.219482421875, "learning_rate": 3.97454168810823e-05, "loss": 74.3305, "step": 92320 }, { "epoch": 0.3730248831393399, "grad_norm": 508.5974426269531, "learning_rate": 3.974259790084822e-05, "loss": 80.4925, "step": 92330 }, { "epoch": 0.37306528440470754, "grad_norm": 784.2496948242188, "learning_rate": 3.973977863319961e-05, "loss": 75.617, "step": 92340 }, { "epoch": 0.3731056856700752, "grad_norm": 418.7388916015625, "learning_rate": 3.973695907819142e-05, "loss": 49.6771, "step": 92350 }, { "epoch": 0.3731460869354428, "grad_norm": 1385.4676513671875, "learning_rate": 3.973413923587862e-05, "loss": 54.8952, "step": 92360 }, { "epoch": 0.37318648820081046, "grad_norm": 1004.26611328125, "learning_rate": 3.97313191063162e-05, "loss": 66.2516, "step": 92370 }, { "epoch": 0.3732268894661781, "grad_norm": 331.77972412109375, "learning_rate": 3.9728498689559126e-05, "loss": 62.8471, "step": 92380 }, { "epoch": 0.3732672907315457, "grad_norm": 799.3560180664062, "learning_rate": 3.972567798566238e-05, "loss": 76.5567, "step": 92390 }, { "epoch": 0.3733076919969133, "grad_norm": 733.6048583984375, "learning_rate": 3.9722856994680966e-05, "loss": 60.599, "step": 92400 }, { "epoch": 0.37334809326228097, "grad_norm": 610.0665893554688, "learning_rate": 3.9720035716669876e-05, "loss": 58.7812, "step": 92410 }, { "epoch": 0.3733884945276486, "grad_norm": 757.6383666992188, "learning_rate": 3.971721415168411e-05, "loss": 72.6537, "step": 92420 }, { "epoch": 0.37342889579301625, "grad_norm": 628.03515625, "learning_rate": 3.971439229977869e-05, "loss": 58.028, "step": 92430 }, { "epoch": 0.3734692970583839, "grad_norm": 567.295654296875, "learning_rate": 3.9711570161008596e-05, "loss": 54.5905, "step": 92440 }, { "epoch": 0.3735096983237515, "grad_norm": 453.08258056640625, "learning_rate": 3.9708747735428886e-05, "loss": 53.7484, "step": 92450 }, { "epoch": 0.3735500995891191, "grad_norm": 426.8596496582031, "learning_rate": 3.9705925023094554e-05, "loss": 73.5093, "step": 92460 }, { "epoch": 0.37359050085448675, "grad_norm": 372.20684814453125, "learning_rate": 3.970310202406064e-05, "loss": 44.4281, "step": 92470 }, { "epoch": 0.3736309021198544, "grad_norm": 714.4765014648438, "learning_rate": 3.970027873838219e-05, "loss": 78.6218, "step": 92480 }, { "epoch": 0.37367130338522203, "grad_norm": 709.8063354492188, "learning_rate": 3.969745516611424e-05, "loss": 55.8991, "step": 92490 }, { "epoch": 0.37371170465058967, "grad_norm": 840.8602294921875, "learning_rate": 3.969463130731183e-05, "loss": 68.27, "step": 92500 }, { "epoch": 0.3737521059159573, "grad_norm": 1879.0968017578125, "learning_rate": 3.969180716203002e-05, "loss": 98.6429, "step": 92510 }, { "epoch": 0.3737925071813249, "grad_norm": 925.2473754882812, "learning_rate": 3.9688982730323865e-05, "loss": 74.3842, "step": 92520 }, { "epoch": 0.37383290844669254, "grad_norm": 1487.61962890625, "learning_rate": 3.968615801224843e-05, "loss": 81.2509, "step": 92530 }, { "epoch": 0.3738733097120602, "grad_norm": 585.5298461914062, "learning_rate": 3.968333300785878e-05, "loss": 89.9086, "step": 92540 }, { "epoch": 0.3739137109774278, "grad_norm": 598.8243408203125, "learning_rate": 3.968050771720999e-05, "loss": 52.4405, "step": 92550 }, { "epoch": 0.37395411224279546, "grad_norm": 451.022705078125, "learning_rate": 3.967768214035715e-05, "loss": 71.7542, "step": 92560 }, { "epoch": 0.3739945135081631, "grad_norm": 768.9769287109375, "learning_rate": 3.967485627735534e-05, "loss": 92.9239, "step": 92570 }, { "epoch": 0.3740349147735307, "grad_norm": 854.9164428710938, "learning_rate": 3.967203012825965e-05, "loss": 71.4999, "step": 92580 }, { "epoch": 0.3740753160388983, "grad_norm": 471.7916259765625, "learning_rate": 3.966920369312518e-05, "loss": 47.2625, "step": 92590 }, { "epoch": 0.37411571730426596, "grad_norm": 478.0005187988281, "learning_rate": 3.966637697200703e-05, "loss": 56.9574, "step": 92600 }, { "epoch": 0.3741561185696336, "grad_norm": 968.0185546875, "learning_rate": 3.9663549964960314e-05, "loss": 62.3624, "step": 92610 }, { "epoch": 0.37419651983500124, "grad_norm": 867.2205810546875, "learning_rate": 3.966072267204014e-05, "loss": 33.5701, "step": 92620 }, { "epoch": 0.3742369211003689, "grad_norm": 941.1777954101562, "learning_rate": 3.965789509330163e-05, "loss": 78.525, "step": 92630 }, { "epoch": 0.3742773223657365, "grad_norm": 612.6570434570312, "learning_rate": 3.965506722879991e-05, "loss": 40.913, "step": 92640 }, { "epoch": 0.3743177236311041, "grad_norm": 127.48741149902344, "learning_rate": 3.965223907859011e-05, "loss": 63.8331, "step": 92650 }, { "epoch": 0.37435812489647174, "grad_norm": 443.2701721191406, "learning_rate": 3.964941064272736e-05, "loss": 71.8824, "step": 92660 }, { "epoch": 0.3743985261618394, "grad_norm": 431.3937072753906, "learning_rate": 3.9646581921266815e-05, "loss": 74.9664, "step": 92670 }, { "epoch": 0.374438927427207, "grad_norm": 852.2191772460938, "learning_rate": 3.964375291426361e-05, "loss": 51.0597, "step": 92680 }, { "epoch": 0.37447932869257466, "grad_norm": 493.4492492675781, "learning_rate": 3.96409236217729e-05, "loss": 39.8483, "step": 92690 }, { "epoch": 0.3745197299579423, "grad_norm": 363.88836669921875, "learning_rate": 3.963809404384985e-05, "loss": 44.7268, "step": 92700 }, { "epoch": 0.3745601312233099, "grad_norm": 2506.282470703125, "learning_rate": 3.9635264180549624e-05, "loss": 79.4526, "step": 92710 }, { "epoch": 0.37460053248867753, "grad_norm": 1399.570556640625, "learning_rate": 3.963243403192739e-05, "loss": 62.9623, "step": 92720 }, { "epoch": 0.37464093375404517, "grad_norm": 502.1692810058594, "learning_rate": 3.9629603598038314e-05, "loss": 73.1174, "step": 92730 }, { "epoch": 0.3746813350194128, "grad_norm": 478.8690185546875, "learning_rate": 3.962677287893758e-05, "loss": 66.815, "step": 92740 }, { "epoch": 0.37472173628478045, "grad_norm": 563.9537963867188, "learning_rate": 3.962394187468039e-05, "loss": 72.0108, "step": 92750 }, { "epoch": 0.3747621375501481, "grad_norm": 761.1668090820312, "learning_rate": 3.962111058532192e-05, "loss": 42.6949, "step": 92760 }, { "epoch": 0.37480253881551573, "grad_norm": 795.1704711914062, "learning_rate": 3.961827901091737e-05, "loss": 58.1634, "step": 92770 }, { "epoch": 0.3748429400808833, "grad_norm": 811.5784301757812, "learning_rate": 3.9615447151521945e-05, "loss": 75.2723, "step": 92780 }, { "epoch": 0.37488334134625095, "grad_norm": 444.902099609375, "learning_rate": 3.961261500719085e-05, "loss": 62.2803, "step": 92790 }, { "epoch": 0.3749237426116186, "grad_norm": 530.8676147460938, "learning_rate": 3.960978257797931e-05, "loss": 62.1482, "step": 92800 }, { "epoch": 0.37496414387698623, "grad_norm": 230.55746459960938, "learning_rate": 3.9606949863942526e-05, "loss": 60.5643, "step": 92810 }, { "epoch": 0.3750045451423539, "grad_norm": 656.6939697265625, "learning_rate": 3.960411686513574e-05, "loss": 49.9396, "step": 92820 }, { "epoch": 0.3750449464077215, "grad_norm": 1161.34765625, "learning_rate": 3.960128358161418e-05, "loss": 50.9719, "step": 92830 }, { "epoch": 0.3750853476730891, "grad_norm": 768.6620483398438, "learning_rate": 3.9598450013433075e-05, "loss": 68.8406, "step": 92840 }, { "epoch": 0.37512574893845674, "grad_norm": 512.8631591796875, "learning_rate": 3.9595616160647674e-05, "loss": 61.0759, "step": 92850 }, { "epoch": 0.3751661502038244, "grad_norm": 341.27178955078125, "learning_rate": 3.959278202331322e-05, "loss": 50.0951, "step": 92860 }, { "epoch": 0.375206551469192, "grad_norm": 1139.0494384765625, "learning_rate": 3.9589947601484974e-05, "loss": 72.3378, "step": 92870 }, { "epoch": 0.37524695273455966, "grad_norm": 703.4913940429688, "learning_rate": 3.9587112895218184e-05, "loss": 57.7523, "step": 92880 }, { "epoch": 0.3752873539999273, "grad_norm": 1080.4427490234375, "learning_rate": 3.958427790456811e-05, "loss": 68.2021, "step": 92890 }, { "epoch": 0.3753277552652949, "grad_norm": 669.9296264648438, "learning_rate": 3.958144262959004e-05, "loss": 71.9688, "step": 92900 }, { "epoch": 0.3753681565306625, "grad_norm": 945.6260986328125, "learning_rate": 3.9578607070339235e-05, "loss": 72.3592, "step": 92910 }, { "epoch": 0.37540855779603016, "grad_norm": 310.77203369140625, "learning_rate": 3.957577122687098e-05, "loss": 53.5487, "step": 92920 }, { "epoch": 0.3754489590613978, "grad_norm": 602.9820556640625, "learning_rate": 3.957293509924056e-05, "loss": 61.1102, "step": 92930 }, { "epoch": 0.37548936032676544, "grad_norm": 257.7418518066406, "learning_rate": 3.9570098687503274e-05, "loss": 56.4356, "step": 92940 }, { "epoch": 0.3755297615921331, "grad_norm": 896.652587890625, "learning_rate": 3.9567261991714404e-05, "loss": 63.6779, "step": 92950 }, { "epoch": 0.3755701628575007, "grad_norm": 1012.0924072265625, "learning_rate": 3.9564425011929265e-05, "loss": 68.4286, "step": 92960 }, { "epoch": 0.3756105641228683, "grad_norm": 525.5440673828125, "learning_rate": 3.956158774820316e-05, "loss": 53.852, "step": 92970 }, { "epoch": 0.37565096538823595, "grad_norm": 657.0477905273438, "learning_rate": 3.955875020059141e-05, "loss": 79.606, "step": 92980 }, { "epoch": 0.3756913666536036, "grad_norm": 1086.502197265625, "learning_rate": 3.955591236914933e-05, "loss": 46.6891, "step": 92990 }, { "epoch": 0.3757317679189712, "grad_norm": 1276.398193359375, "learning_rate": 3.955307425393224e-05, "loss": 83.8669, "step": 93000 }, { "epoch": 0.37577216918433887, "grad_norm": 1241.0797119140625, "learning_rate": 3.955023585499547e-05, "loss": 65.6787, "step": 93010 }, { "epoch": 0.3758125704497065, "grad_norm": 516.9462890625, "learning_rate": 3.954739717239437e-05, "loss": 80.84, "step": 93020 }, { "epoch": 0.3758529717150741, "grad_norm": 475.4250183105469, "learning_rate": 3.954455820618427e-05, "loss": 72.3098, "step": 93030 }, { "epoch": 0.37589337298044173, "grad_norm": 950.8383178710938, "learning_rate": 3.954171895642052e-05, "loss": 76.2437, "step": 93040 }, { "epoch": 0.37593377424580937, "grad_norm": 631.9634399414062, "learning_rate": 3.953887942315847e-05, "loss": 42.5692, "step": 93050 }, { "epoch": 0.375974175511177, "grad_norm": 942.7606201171875, "learning_rate": 3.953603960645349e-05, "loss": 79.1041, "step": 93060 }, { "epoch": 0.37601457677654465, "grad_norm": 1219.8829345703125, "learning_rate": 3.953319950636092e-05, "loss": 107.0019, "step": 93070 }, { "epoch": 0.3760549780419123, "grad_norm": 688.3053588867188, "learning_rate": 3.953035912293616e-05, "loss": 64.1869, "step": 93080 }, { "epoch": 0.37609537930727993, "grad_norm": 632.4610595703125, "learning_rate": 3.9527518456234544e-05, "loss": 57.2202, "step": 93090 }, { "epoch": 0.3761357805726475, "grad_norm": 323.7148742675781, "learning_rate": 3.95246775063115e-05, "loss": 45.6848, "step": 93100 }, { "epoch": 0.37617618183801516, "grad_norm": 789.0226440429688, "learning_rate": 3.952183627322238e-05, "loss": 76.8052, "step": 93110 }, { "epoch": 0.3762165831033828, "grad_norm": 631.6780395507812, "learning_rate": 3.951899475702259e-05, "loss": 57.1333, "step": 93120 }, { "epoch": 0.37625698436875044, "grad_norm": 1251.869140625, "learning_rate": 3.951615295776752e-05, "loss": 120.3193, "step": 93130 }, { "epoch": 0.3762973856341181, "grad_norm": 525.0264892578125, "learning_rate": 3.951331087551257e-05, "loss": 77.0742, "step": 93140 }, { "epoch": 0.3763377868994857, "grad_norm": 927.7319946289062, "learning_rate": 3.951046851031315e-05, "loss": 68.461, "step": 93150 }, { "epoch": 0.3763781881648533, "grad_norm": 388.8170471191406, "learning_rate": 3.950762586222468e-05, "loss": 33.0051, "step": 93160 }, { "epoch": 0.37641858943022094, "grad_norm": 462.57025146484375, "learning_rate": 3.950478293130258e-05, "loss": 57.6939, "step": 93170 }, { "epoch": 0.3764589906955886, "grad_norm": 818.913818359375, "learning_rate": 3.950193971760226e-05, "loss": 47.7335, "step": 93180 }, { "epoch": 0.3764993919609562, "grad_norm": 2200.9609375, "learning_rate": 3.949909622117918e-05, "loss": 64.2052, "step": 93190 }, { "epoch": 0.37653979322632386, "grad_norm": 908.902099609375, "learning_rate": 3.9496252442088733e-05, "loss": 36.8062, "step": 93200 }, { "epoch": 0.3765801944916915, "grad_norm": 740.1572265625, "learning_rate": 3.949340838038639e-05, "loss": 68.5629, "step": 93210 }, { "epoch": 0.3766205957570591, "grad_norm": 1135.758544921875, "learning_rate": 3.949056403612758e-05, "loss": 62.5657, "step": 93220 }, { "epoch": 0.3766609970224267, "grad_norm": 2026.0989990234375, "learning_rate": 3.9487719409367774e-05, "loss": 74.2239, "step": 93230 }, { "epoch": 0.37670139828779436, "grad_norm": 864.1495361328125, "learning_rate": 3.948487450016242e-05, "loss": 92.9084, "step": 93240 }, { "epoch": 0.376741799553162, "grad_norm": 810.767333984375, "learning_rate": 3.948202930856697e-05, "loss": 80.3417, "step": 93250 }, { "epoch": 0.37678220081852964, "grad_norm": 170.60748291015625, "learning_rate": 3.947918383463691e-05, "loss": 58.1501, "step": 93260 }, { "epoch": 0.3768226020838973, "grad_norm": 521.3902587890625, "learning_rate": 3.947633807842771e-05, "loss": 24.7328, "step": 93270 }, { "epoch": 0.3768630033492649, "grad_norm": 615.25048828125, "learning_rate": 3.947349203999484e-05, "loss": 69.8992, "step": 93280 }, { "epoch": 0.3769034046146325, "grad_norm": 616.8837280273438, "learning_rate": 3.9470645719393794e-05, "loss": 48.8142, "step": 93290 }, { "epoch": 0.37694380588000015, "grad_norm": 1704.8671875, "learning_rate": 3.946779911668006e-05, "loss": 62.0936, "step": 93300 }, { "epoch": 0.3769842071453678, "grad_norm": 816.0055541992188, "learning_rate": 3.9464952231909135e-05, "loss": 56.5077, "step": 93310 }, { "epoch": 0.37702460841073543, "grad_norm": 501.01641845703125, "learning_rate": 3.946210506513651e-05, "loss": 51.1529, "step": 93320 }, { "epoch": 0.37706500967610307, "grad_norm": 1191.9462890625, "learning_rate": 3.945925761641771e-05, "loss": 56.0396, "step": 93330 }, { "epoch": 0.3771054109414707, "grad_norm": 910.802734375, "learning_rate": 3.945640988580824e-05, "loss": 83.711, "step": 93340 }, { "epoch": 0.3771458122068383, "grad_norm": 1067.3336181640625, "learning_rate": 3.9453561873363615e-05, "loss": 57.1342, "step": 93350 }, { "epoch": 0.37718621347220593, "grad_norm": 894.1754150390625, "learning_rate": 3.945071357913935e-05, "loss": 53.6304, "step": 93360 }, { "epoch": 0.3772266147375736, "grad_norm": 760.7733764648438, "learning_rate": 3.9447865003191e-05, "loss": 67.4268, "step": 93370 }, { "epoch": 0.3772670160029412, "grad_norm": 771.8887939453125, "learning_rate": 3.9445016145574074e-05, "loss": 69.5092, "step": 93380 }, { "epoch": 0.37730741726830885, "grad_norm": 459.5794372558594, "learning_rate": 3.9442167006344124e-05, "loss": 46.507, "step": 93390 }, { "epoch": 0.3773478185336765, "grad_norm": 2548.902099609375, "learning_rate": 3.943931758555669e-05, "loss": 73.9357, "step": 93400 }, { "epoch": 0.37738821979904413, "grad_norm": 1343.5716552734375, "learning_rate": 3.9436467883267334e-05, "loss": 52.8962, "step": 93410 }, { "epoch": 0.3774286210644117, "grad_norm": 1176.486083984375, "learning_rate": 3.9433617899531597e-05, "loss": 71.4715, "step": 93420 }, { "epoch": 0.37746902232977936, "grad_norm": 3818.821533203125, "learning_rate": 3.943076763440505e-05, "loss": 73.5444, "step": 93430 }, { "epoch": 0.377509423595147, "grad_norm": 1363.684814453125, "learning_rate": 3.942791708794326e-05, "loss": 45.8224, "step": 93440 }, { "epoch": 0.37754982486051464, "grad_norm": 602.17138671875, "learning_rate": 3.9425066260201796e-05, "loss": 56.5135, "step": 93450 }, { "epoch": 0.3775902261258823, "grad_norm": 533.1290893554688, "learning_rate": 3.942221515123623e-05, "loss": 47.6091, "step": 93460 }, { "epoch": 0.3776306273912499, "grad_norm": 868.53125, "learning_rate": 3.941936376110217e-05, "loss": 60.5942, "step": 93470 }, { "epoch": 0.3776710286566175, "grad_norm": 581.7073364257812, "learning_rate": 3.9416512089855184e-05, "loss": 52.1139, "step": 93480 }, { "epoch": 0.37771142992198514, "grad_norm": 983.8074951171875, "learning_rate": 3.941366013755087e-05, "loss": 46.983, "step": 93490 }, { "epoch": 0.3777518311873528, "grad_norm": 1439.6392822265625, "learning_rate": 3.941080790424484e-05, "loss": 56.4244, "step": 93500 }, { "epoch": 0.3777922324527204, "grad_norm": 1822.520751953125, "learning_rate": 3.940795538999268e-05, "loss": 77.834, "step": 93510 }, { "epoch": 0.37783263371808806, "grad_norm": 437.8319396972656, "learning_rate": 3.940510259485002e-05, "loss": 47.9851, "step": 93520 }, { "epoch": 0.3778730349834557, "grad_norm": 1477.3594970703125, "learning_rate": 3.9402249518872456e-05, "loss": 104.0979, "step": 93530 }, { "epoch": 0.3779134362488233, "grad_norm": 517.4141235351562, "learning_rate": 3.939939616211563e-05, "loss": 46.596, "step": 93540 }, { "epoch": 0.3779538375141909, "grad_norm": 673.5197143554688, "learning_rate": 3.9396542524635175e-05, "loss": 61.4171, "step": 93550 }, { "epoch": 0.37799423877955857, "grad_norm": 679.6529541015625, "learning_rate": 3.939368860648669e-05, "loss": 50.3042, "step": 93560 }, { "epoch": 0.3780346400449262, "grad_norm": 1323.72314453125, "learning_rate": 3.939083440772585e-05, "loss": 52.2302, "step": 93570 }, { "epoch": 0.37807504131029385, "grad_norm": 1001.907958984375, "learning_rate": 3.938797992840828e-05, "loss": 66.2964, "step": 93580 }, { "epoch": 0.3781154425756615, "grad_norm": 615.739990234375, "learning_rate": 3.9385125168589635e-05, "loss": 72.0375, "step": 93590 }, { "epoch": 0.3781558438410291, "grad_norm": 190.5300750732422, "learning_rate": 3.938227012832557e-05, "loss": 38.2591, "step": 93600 }, { "epoch": 0.3781962451063967, "grad_norm": 4424.119140625, "learning_rate": 3.9379414807671736e-05, "loss": 86.606, "step": 93610 }, { "epoch": 0.37823664637176435, "grad_norm": 1783.222900390625, "learning_rate": 3.937655920668382e-05, "loss": 45.5719, "step": 93620 }, { "epoch": 0.378277047637132, "grad_norm": 508.1633605957031, "learning_rate": 3.937370332541747e-05, "loss": 46.5244, "step": 93630 }, { "epoch": 0.37831744890249963, "grad_norm": 1679.250732421875, "learning_rate": 3.937084716392838e-05, "loss": 105.5736, "step": 93640 }, { "epoch": 0.37835785016786727, "grad_norm": 992.9168090820312, "learning_rate": 3.936799072227222e-05, "loss": 66.7025, "step": 93650 }, { "epoch": 0.3783982514332349, "grad_norm": 709.3222045898438, "learning_rate": 3.936513400050469e-05, "loss": 49.8676, "step": 93660 }, { "epoch": 0.3784386526986025, "grad_norm": 1248.9063720703125, "learning_rate": 3.936227699868147e-05, "loss": 67.6602, "step": 93670 }, { "epoch": 0.37847905396397014, "grad_norm": 494.0985107421875, "learning_rate": 3.9359419716858274e-05, "loss": 52.3046, "step": 93680 }, { "epoch": 0.3785194552293378, "grad_norm": 317.8280334472656, "learning_rate": 3.9356562155090795e-05, "loss": 37.2544, "step": 93690 }, { "epoch": 0.3785598564947054, "grad_norm": 2666.53662109375, "learning_rate": 3.935370431343475e-05, "loss": 82.7156, "step": 93700 }, { "epoch": 0.37860025776007306, "grad_norm": 1855.157958984375, "learning_rate": 3.935084619194584e-05, "loss": 79.1015, "step": 93710 }, { "epoch": 0.3786406590254407, "grad_norm": 621.322265625, "learning_rate": 3.93479877906798e-05, "loss": 57.4279, "step": 93720 }, { "epoch": 0.37868106029080834, "grad_norm": 785.6000366210938, "learning_rate": 3.934512910969235e-05, "loss": 75.8258, "step": 93730 }, { "epoch": 0.3787214615561759, "grad_norm": 507.967041015625, "learning_rate": 3.934227014903922e-05, "loss": 63.3973, "step": 93740 }, { "epoch": 0.37876186282154356, "grad_norm": 1563.5101318359375, "learning_rate": 3.933941090877615e-05, "loss": 52.865, "step": 93750 }, { "epoch": 0.3788022640869112, "grad_norm": 571.6914672851562, "learning_rate": 3.933655138895889e-05, "loss": 61.8429, "step": 93760 }, { "epoch": 0.37884266535227884, "grad_norm": 927.9751586914062, "learning_rate": 3.9333691589643177e-05, "loss": 51.3117, "step": 93770 }, { "epoch": 0.3788830666176465, "grad_norm": 582.5885009765625, "learning_rate": 3.9330831510884755e-05, "loss": 85.4374, "step": 93780 }, { "epoch": 0.3789234678830141, "grad_norm": 681.2293701171875, "learning_rate": 3.932797115273941e-05, "loss": 75.4987, "step": 93790 }, { "epoch": 0.3789638691483817, "grad_norm": 720.52392578125, "learning_rate": 3.932511051526289e-05, "loss": 83.3338, "step": 93800 }, { "epoch": 0.37900427041374934, "grad_norm": 888.4859008789062, "learning_rate": 3.9322249598510955e-05, "loss": 67.421, "step": 93810 }, { "epoch": 0.379044671679117, "grad_norm": 1525.38134765625, "learning_rate": 3.93193884025394e-05, "loss": 85.0812, "step": 93820 }, { "epoch": 0.3790850729444846, "grad_norm": 705.2191772460938, "learning_rate": 3.931652692740399e-05, "loss": 67.6637, "step": 93830 }, { "epoch": 0.37912547420985226, "grad_norm": 1749.32568359375, "learning_rate": 3.931366517316052e-05, "loss": 68.8921, "step": 93840 }, { "epoch": 0.3791658754752199, "grad_norm": 751.9481201171875, "learning_rate": 3.9310803139864775e-05, "loss": 123.6256, "step": 93850 }, { "epoch": 0.3792062767405875, "grad_norm": 476.33636474609375, "learning_rate": 3.9307940827572555e-05, "loss": 50.916, "step": 93860 }, { "epoch": 0.37924667800595513, "grad_norm": 267.0869140625, "learning_rate": 3.9305078236339666e-05, "loss": 57.5847, "step": 93870 }, { "epoch": 0.37928707927132277, "grad_norm": 953.2073364257812, "learning_rate": 3.930221536622191e-05, "loss": 61.0358, "step": 93880 }, { "epoch": 0.3793274805366904, "grad_norm": 630.3345336914062, "learning_rate": 3.9299352217275105e-05, "loss": 70.866, "step": 93890 }, { "epoch": 0.37936788180205805, "grad_norm": 356.209228515625, "learning_rate": 3.9296488789555066e-05, "loss": 45.0516, "step": 93900 }, { "epoch": 0.3794082830674257, "grad_norm": 410.72320556640625, "learning_rate": 3.9293625083117616e-05, "loss": 73.401, "step": 93910 }, { "epoch": 0.37944868433279333, "grad_norm": 855.3485107421875, "learning_rate": 3.9290761098018585e-05, "loss": 55.5836, "step": 93920 }, { "epoch": 0.3794890855981609, "grad_norm": 787.3507690429688, "learning_rate": 3.928789683431381e-05, "loss": 55.8925, "step": 93930 }, { "epoch": 0.37952948686352855, "grad_norm": 600.7017822265625, "learning_rate": 3.928503229205913e-05, "loss": 60.9399, "step": 93940 }, { "epoch": 0.3795698881288962, "grad_norm": 561.0108032226562, "learning_rate": 3.928216747131039e-05, "loss": 69.8876, "step": 93950 }, { "epoch": 0.37961028939426383, "grad_norm": 1152.1119384765625, "learning_rate": 3.927930237212345e-05, "loss": 76.2245, "step": 93960 }, { "epoch": 0.3796506906596315, "grad_norm": 743.5120239257812, "learning_rate": 3.9276436994554144e-05, "loss": 57.824, "step": 93970 }, { "epoch": 0.3796910919249991, "grad_norm": 733.8156127929688, "learning_rate": 3.927357133865836e-05, "loss": 42.9731, "step": 93980 }, { "epoch": 0.3797314931903667, "grad_norm": 431.87158203125, "learning_rate": 3.927070540449195e-05, "loss": 63.5863, "step": 93990 }, { "epoch": 0.37977189445573434, "grad_norm": 751.9428100585938, "learning_rate": 3.92678391921108e-05, "loss": 78.9025, "step": 94000 }, { "epoch": 0.379812295721102, "grad_norm": 767.0701293945312, "learning_rate": 3.926497270157077e-05, "loss": 42.9222, "step": 94010 }, { "epoch": 0.3798526969864696, "grad_norm": 768.52197265625, "learning_rate": 3.926210593292775e-05, "loss": 48.6413, "step": 94020 }, { "epoch": 0.37989309825183726, "grad_norm": 618.26806640625, "learning_rate": 3.925923888623764e-05, "loss": 54.2861, "step": 94030 }, { "epoch": 0.3799334995172049, "grad_norm": 647.1188354492188, "learning_rate": 3.925637156155633e-05, "loss": 70.6201, "step": 94040 }, { "epoch": 0.37997390078257254, "grad_norm": 886.6661987304688, "learning_rate": 3.925350395893971e-05, "loss": 85.2603, "step": 94050 }, { "epoch": 0.3800143020479401, "grad_norm": 1664.85693359375, "learning_rate": 3.925063607844369e-05, "loss": 55.2438, "step": 94060 }, { "epoch": 0.38005470331330776, "grad_norm": 788.0062866210938, "learning_rate": 3.9247767920124176e-05, "loss": 73.1715, "step": 94070 }, { "epoch": 0.3800951045786754, "grad_norm": 435.3290710449219, "learning_rate": 3.924489948403711e-05, "loss": 70.93, "step": 94080 }, { "epoch": 0.38013550584404304, "grad_norm": 143.54469299316406, "learning_rate": 3.924203077023839e-05, "loss": 31.3308, "step": 94090 }, { "epoch": 0.3801759071094107, "grad_norm": 1035.6220703125, "learning_rate": 3.923916177878394e-05, "loss": 59.6959, "step": 94100 }, { "epoch": 0.3802163083747783, "grad_norm": 1633.117431640625, "learning_rate": 3.9236292509729697e-05, "loss": 77.1727, "step": 94110 }, { "epoch": 0.3802567096401459, "grad_norm": 421.79974365234375, "learning_rate": 3.9233422963131616e-05, "loss": 45.4822, "step": 94120 }, { "epoch": 0.38029711090551355, "grad_norm": 953.0307006835938, "learning_rate": 3.9230553139045617e-05, "loss": 98.6893, "step": 94130 }, { "epoch": 0.3803375121708812, "grad_norm": 1626.166015625, "learning_rate": 3.922768303752766e-05, "loss": 73.0103, "step": 94140 }, { "epoch": 0.3803779134362488, "grad_norm": 1201.143310546875, "learning_rate": 3.92248126586337e-05, "loss": 49.0338, "step": 94150 }, { "epoch": 0.38041831470161647, "grad_norm": 1199.5015869140625, "learning_rate": 3.922194200241969e-05, "loss": 49.4027, "step": 94160 }, { "epoch": 0.3804587159669841, "grad_norm": 948.0160522460938, "learning_rate": 3.92190710689416e-05, "loss": 47.4217, "step": 94170 }, { "epoch": 0.3804991172323517, "grad_norm": 1361.0826416015625, "learning_rate": 3.92161998582554e-05, "loss": 68.6793, "step": 94180 }, { "epoch": 0.38053951849771933, "grad_norm": 691.0305786132812, "learning_rate": 3.9213328370417065e-05, "loss": 86.5888, "step": 94190 }, { "epoch": 0.38057991976308697, "grad_norm": 448.7333984375, "learning_rate": 3.9210456605482576e-05, "loss": 45.7264, "step": 94200 }, { "epoch": 0.3806203210284546, "grad_norm": 926.4880981445312, "learning_rate": 3.920758456350792e-05, "loss": 73.8021, "step": 94210 }, { "epoch": 0.38066072229382225, "grad_norm": 542.78662109375, "learning_rate": 3.9204712244549085e-05, "loss": 65.7361, "step": 94220 }, { "epoch": 0.3807011235591899, "grad_norm": 3470.885986328125, "learning_rate": 3.9201839648662074e-05, "loss": 63.5717, "step": 94230 }, { "epoch": 0.38074152482455753, "grad_norm": 596.5708618164062, "learning_rate": 3.919896677590289e-05, "loss": 69.1542, "step": 94240 }, { "epoch": 0.3807819260899251, "grad_norm": 797.0637817382812, "learning_rate": 3.919609362632753e-05, "loss": 106.799, "step": 94250 }, { "epoch": 0.38082232735529276, "grad_norm": 641.5678100585938, "learning_rate": 3.9193220199992025e-05, "loss": 42.721, "step": 94260 }, { "epoch": 0.3808627286206604, "grad_norm": 370.0324401855469, "learning_rate": 3.919034649695238e-05, "loss": 47.1425, "step": 94270 }, { "epoch": 0.38090312988602804, "grad_norm": 1143.82861328125, "learning_rate": 3.918747251726463e-05, "loss": 51.9892, "step": 94280 }, { "epoch": 0.3809435311513957, "grad_norm": 859.6338500976562, "learning_rate": 3.9184598260984795e-05, "loss": 64.1427, "step": 94290 }, { "epoch": 0.3809839324167633, "grad_norm": 423.8070068359375, "learning_rate": 3.9181723728168916e-05, "loss": 75.3932, "step": 94300 }, { "epoch": 0.3810243336821309, "grad_norm": 945.8095703125, "learning_rate": 3.9178848918873027e-05, "loss": 47.6868, "step": 94310 }, { "epoch": 0.38106473494749854, "grad_norm": 1396.765625, "learning_rate": 3.9175973833153186e-05, "loss": 53.5173, "step": 94320 }, { "epoch": 0.3811051362128662, "grad_norm": 1253.411376953125, "learning_rate": 3.9173098471065434e-05, "loss": 48.6057, "step": 94330 }, { "epoch": 0.3811455374782338, "grad_norm": 627.4695434570312, "learning_rate": 3.9170222832665825e-05, "loss": 68.0112, "step": 94340 }, { "epoch": 0.38118593874360146, "grad_norm": 918.7919311523438, "learning_rate": 3.9167346918010425e-05, "loss": 102.3109, "step": 94350 }, { "epoch": 0.3812263400089691, "grad_norm": 1036.47265625, "learning_rate": 3.9164470727155314e-05, "loss": 77.9767, "step": 94360 }, { "epoch": 0.3812667412743367, "grad_norm": 1388.8292236328125, "learning_rate": 3.916159426015655e-05, "loss": 74.0313, "step": 94370 }, { "epoch": 0.3813071425397043, "grad_norm": 761.83203125, "learning_rate": 3.9158717517070214e-05, "loss": 58.2245, "step": 94380 }, { "epoch": 0.38134754380507196, "grad_norm": 416.04351806640625, "learning_rate": 3.915584049795239e-05, "loss": 50.2299, "step": 94390 }, { "epoch": 0.3813879450704396, "grad_norm": 1973.710205078125, "learning_rate": 3.915296320285917e-05, "loss": 56.33, "step": 94400 }, { "epoch": 0.38142834633580724, "grad_norm": 895.8132934570312, "learning_rate": 3.915008563184664e-05, "loss": 54.553, "step": 94410 }, { "epoch": 0.3814687476011749, "grad_norm": 521.8209838867188, "learning_rate": 3.9147207784970914e-05, "loss": 69.8902, "step": 94420 }, { "epoch": 0.3815091488665425, "grad_norm": 1060.228515625, "learning_rate": 3.914432966228808e-05, "loss": 72.3957, "step": 94430 }, { "epoch": 0.3815495501319101, "grad_norm": 478.51885986328125, "learning_rate": 3.914145126385426e-05, "loss": 68.0388, "step": 94440 }, { "epoch": 0.38158995139727775, "grad_norm": 855.6203002929688, "learning_rate": 3.9138572589725576e-05, "loss": 52.0944, "step": 94450 }, { "epoch": 0.3816303526626454, "grad_norm": 2904.56298828125, "learning_rate": 3.9135693639958125e-05, "loss": 50.8609, "step": 94460 }, { "epoch": 0.38167075392801303, "grad_norm": 1457.2967529296875, "learning_rate": 3.913281441460806e-05, "loss": 86.8889, "step": 94470 }, { "epoch": 0.38171115519338067, "grad_norm": 745.7070922851562, "learning_rate": 3.91299349137315e-05, "loss": 63.1558, "step": 94480 }, { "epoch": 0.3817515564587483, "grad_norm": 832.9826049804688, "learning_rate": 3.912705513738458e-05, "loss": 59.3702, "step": 94490 }, { "epoch": 0.3817919577241159, "grad_norm": 768.7559204101562, "learning_rate": 3.912417508562345e-05, "loss": 65.5276, "step": 94500 }, { "epoch": 0.38183235898948353, "grad_norm": 657.02734375, "learning_rate": 3.912129475850426e-05, "loss": 71.6129, "step": 94510 }, { "epoch": 0.3818727602548512, "grad_norm": 1496.1007080078125, "learning_rate": 3.911841415608315e-05, "loss": 83.0548, "step": 94520 }, { "epoch": 0.3819131615202188, "grad_norm": 1321.64697265625, "learning_rate": 3.911553327841629e-05, "loss": 42.8167, "step": 94530 }, { "epoch": 0.38195356278558645, "grad_norm": 434.47039794921875, "learning_rate": 3.9112652125559845e-05, "loss": 28.5604, "step": 94540 }, { "epoch": 0.3819939640509541, "grad_norm": 1304.2777099609375, "learning_rate": 3.910977069756998e-05, "loss": 54.2573, "step": 94550 }, { "epoch": 0.38203436531632173, "grad_norm": 1068.87939453125, "learning_rate": 3.9106888994502864e-05, "loss": 34.1891, "step": 94560 }, { "epoch": 0.3820747665816893, "grad_norm": 4277.7587890625, "learning_rate": 3.9104007016414695e-05, "loss": 87.1632, "step": 94570 }, { "epoch": 0.38211516784705696, "grad_norm": 96.375244140625, "learning_rate": 3.910112476336164e-05, "loss": 76.1936, "step": 94580 }, { "epoch": 0.3821555691124246, "grad_norm": 775.3291015625, "learning_rate": 3.90982422353999e-05, "loss": 74.7171, "step": 94590 }, { "epoch": 0.38219597037779224, "grad_norm": 696.175537109375, "learning_rate": 3.909535943258567e-05, "loss": 54.9566, "step": 94600 }, { "epoch": 0.3822363716431599, "grad_norm": 724.5578002929688, "learning_rate": 3.909247635497516e-05, "loss": 77.6065, "step": 94610 }, { "epoch": 0.3822767729085275, "grad_norm": 393.4891662597656, "learning_rate": 3.9089593002624555e-05, "loss": 65.3597, "step": 94620 }, { "epoch": 0.3823171741738951, "grad_norm": 1047.53662109375, "learning_rate": 3.908670937559008e-05, "loss": 63.4875, "step": 94630 }, { "epoch": 0.38235757543926274, "grad_norm": 1051.636962890625, "learning_rate": 3.908382547392796e-05, "loss": 39.8985, "step": 94640 }, { "epoch": 0.3823979767046304, "grad_norm": 705.0889892578125, "learning_rate": 3.908094129769442e-05, "loss": 52.4874, "step": 94650 }, { "epoch": 0.382438377969998, "grad_norm": 1593.9310302734375, "learning_rate": 3.907805684694566e-05, "loss": 82.5799, "step": 94660 }, { "epoch": 0.38247877923536566, "grad_norm": 462.7720947265625, "learning_rate": 3.9075172121737945e-05, "loss": 48.7856, "step": 94670 }, { "epoch": 0.3825191805007333, "grad_norm": 1333.7330322265625, "learning_rate": 3.907228712212751e-05, "loss": 88.1809, "step": 94680 }, { "epoch": 0.3825595817661009, "grad_norm": 606.8469848632812, "learning_rate": 3.906940184817057e-05, "loss": 63.5642, "step": 94690 }, { "epoch": 0.3825999830314685, "grad_norm": 280.5736389160156, "learning_rate": 3.906651629992342e-05, "loss": 54.9645, "step": 94700 }, { "epoch": 0.38264038429683617, "grad_norm": 1871.5345458984375, "learning_rate": 3.906363047744229e-05, "loss": 162.7685, "step": 94710 }, { "epoch": 0.3826807855622038, "grad_norm": 605.5426025390625, "learning_rate": 3.9060744380783435e-05, "loss": 43.6138, "step": 94720 }, { "epoch": 0.38272118682757145, "grad_norm": 2624.923583984375, "learning_rate": 3.9057858010003137e-05, "loss": 108.0628, "step": 94730 }, { "epoch": 0.3827615880929391, "grad_norm": 978.3139038085938, "learning_rate": 3.905497136515766e-05, "loss": 51.3885, "step": 94740 }, { "epoch": 0.3828019893583067, "grad_norm": 2022.71435546875, "learning_rate": 3.905208444630327e-05, "loss": 58.5064, "step": 94750 }, { "epoch": 0.3828423906236743, "grad_norm": 395.371826171875, "learning_rate": 3.9049197253496264e-05, "loss": 60.7483, "step": 94760 }, { "epoch": 0.38288279188904195, "grad_norm": 848.9832153320312, "learning_rate": 3.904630978679292e-05, "loss": 57.199, "step": 94770 }, { "epoch": 0.3829231931544096, "grad_norm": 1061.2220458984375, "learning_rate": 3.9043422046249544e-05, "loss": 64.7823, "step": 94780 }, { "epoch": 0.38296359441977723, "grad_norm": 824.2564086914062, "learning_rate": 3.904053403192242e-05, "loss": 58.263, "step": 94790 }, { "epoch": 0.38300399568514487, "grad_norm": 1173.652099609375, "learning_rate": 3.903764574386786e-05, "loss": 80.1155, "step": 94800 }, { "epoch": 0.3830443969505125, "grad_norm": 596.0165405273438, "learning_rate": 3.903475718214217e-05, "loss": 64.6889, "step": 94810 }, { "epoch": 0.3830847982158801, "grad_norm": 544.8338012695312, "learning_rate": 3.9031868346801656e-05, "loss": 90.0342, "step": 94820 }, { "epoch": 0.38312519948124774, "grad_norm": 582.4503173828125, "learning_rate": 3.902897923790265e-05, "loss": 67.4698, "step": 94830 }, { "epoch": 0.3831656007466154, "grad_norm": 1065.5283203125, "learning_rate": 3.902608985550147e-05, "loss": 64.2469, "step": 94840 }, { "epoch": 0.383206002011983, "grad_norm": 718.9613037109375, "learning_rate": 3.902320019965445e-05, "loss": 50.7413, "step": 94850 }, { "epoch": 0.38324640327735066, "grad_norm": 497.2271423339844, "learning_rate": 3.902031027041793e-05, "loss": 46.6418, "step": 94860 }, { "epoch": 0.3832868045427183, "grad_norm": 767.0538330078125, "learning_rate": 3.901742006784822e-05, "loss": 86.6283, "step": 94870 }, { "epoch": 0.38332720580808594, "grad_norm": 101.88063049316406, "learning_rate": 3.9014529592001705e-05, "loss": 80.5774, "step": 94880 }, { "epoch": 0.3833676070734535, "grad_norm": 479.095947265625, "learning_rate": 3.901163884293472e-05, "loss": 44.5601, "step": 94890 }, { "epoch": 0.38340800833882116, "grad_norm": 560.9264526367188, "learning_rate": 3.900874782070362e-05, "loss": 68.0652, "step": 94900 }, { "epoch": 0.3834484096041888, "grad_norm": 530.4874877929688, "learning_rate": 3.900585652536477e-05, "loss": 87.2366, "step": 94910 }, { "epoch": 0.38348881086955644, "grad_norm": 779.4183959960938, "learning_rate": 3.900296495697453e-05, "loss": 70.1493, "step": 94920 }, { "epoch": 0.3835292121349241, "grad_norm": 0.0, "learning_rate": 3.9000073115589286e-05, "loss": 59.7547, "step": 94930 }, { "epoch": 0.3835696134002917, "grad_norm": 516.5095825195312, "learning_rate": 3.899718100126541e-05, "loss": 50.7623, "step": 94940 }, { "epoch": 0.3836100146656593, "grad_norm": 569.387939453125, "learning_rate": 3.899428861405928e-05, "loss": 83.759, "step": 94950 }, { "epoch": 0.38365041593102694, "grad_norm": 586.040771484375, "learning_rate": 3.899139595402729e-05, "loss": 42.7253, "step": 94960 }, { "epoch": 0.3836908171963946, "grad_norm": 2201.12646484375, "learning_rate": 3.898850302122583e-05, "loss": 56.9618, "step": 94970 }, { "epoch": 0.3837312184617622, "grad_norm": 511.3911437988281, "learning_rate": 3.898560981571131e-05, "loss": 55.5062, "step": 94980 }, { "epoch": 0.38377161972712986, "grad_norm": 383.2709045410156, "learning_rate": 3.8982716337540115e-05, "loss": 60.895, "step": 94990 }, { "epoch": 0.3838120209924975, "grad_norm": 768.9598388671875, "learning_rate": 3.897982258676867e-05, "loss": 59.8323, "step": 95000 }, { "epoch": 0.3838524222578651, "grad_norm": 1006.3447875976562, "learning_rate": 3.897692856345339e-05, "loss": 43.6415, "step": 95010 }, { "epoch": 0.38389282352323273, "grad_norm": 774.2186889648438, "learning_rate": 3.897403426765069e-05, "loss": 101.709, "step": 95020 }, { "epoch": 0.38393322478860037, "grad_norm": 1448.4361572265625, "learning_rate": 3.8971139699417e-05, "loss": 91.2065, "step": 95030 }, { "epoch": 0.383973626053968, "grad_norm": 771.1356811523438, "learning_rate": 3.896824485880874e-05, "loss": 60.503, "step": 95040 }, { "epoch": 0.38401402731933565, "grad_norm": 726.0272827148438, "learning_rate": 3.8965349745882365e-05, "loss": 70.719, "step": 95050 }, { "epoch": 0.3840544285847033, "grad_norm": 791.23193359375, "learning_rate": 3.896245436069431e-05, "loss": 83.7272, "step": 95060 }, { "epoch": 0.38409482985007093, "grad_norm": 805.988037109375, "learning_rate": 3.8959558703301015e-05, "loss": 62.7216, "step": 95070 }, { "epoch": 0.3841352311154385, "grad_norm": 318.8636169433594, "learning_rate": 3.895666277375892e-05, "loss": 59.8062, "step": 95080 }, { "epoch": 0.38417563238080615, "grad_norm": 0.0, "learning_rate": 3.8953766572124515e-05, "loss": 46.7181, "step": 95090 }, { "epoch": 0.3842160336461738, "grad_norm": 616.468017578125, "learning_rate": 3.895087009845425e-05, "loss": 55.656, "step": 95100 }, { "epoch": 0.38425643491154143, "grad_norm": 872.3570556640625, "learning_rate": 3.8947973352804584e-05, "loss": 78.4981, "step": 95110 }, { "epoch": 0.3842968361769091, "grad_norm": 765.6026000976562, "learning_rate": 3.894507633523199e-05, "loss": 54.1042, "step": 95120 }, { "epoch": 0.3843372374422767, "grad_norm": 903.8804931640625, "learning_rate": 3.894217904579296e-05, "loss": 79.6192, "step": 95130 }, { "epoch": 0.3843776387076443, "grad_norm": 725.60498046875, "learning_rate": 3.8939281484543974e-05, "loss": 84.4251, "step": 95140 }, { "epoch": 0.38441803997301194, "grad_norm": 1229.2586669921875, "learning_rate": 3.893638365154152e-05, "loss": 125.7261, "step": 95150 }, { "epoch": 0.3844584412383796, "grad_norm": 727.9944458007812, "learning_rate": 3.8933485546842094e-05, "loss": 55.8314, "step": 95160 }, { "epoch": 0.3844988425037472, "grad_norm": 732.46337890625, "learning_rate": 3.893058717050218e-05, "loss": 43.0125, "step": 95170 }, { "epoch": 0.38453924376911486, "grad_norm": 948.4493408203125, "learning_rate": 3.892768852257831e-05, "loss": 48.8631, "step": 95180 }, { "epoch": 0.3845796450344825, "grad_norm": 395.610595703125, "learning_rate": 3.892478960312698e-05, "loss": 97.7245, "step": 95190 }, { "epoch": 0.38462004629985014, "grad_norm": 480.57672119140625, "learning_rate": 3.8921890412204705e-05, "loss": 67.3274, "step": 95200 }, { "epoch": 0.3846604475652177, "grad_norm": 601.1005859375, "learning_rate": 3.891899094986801e-05, "loss": 76.2722, "step": 95210 }, { "epoch": 0.38470084883058536, "grad_norm": 1527.212646484375, "learning_rate": 3.891609121617342e-05, "loss": 30.4016, "step": 95220 }, { "epoch": 0.384741250095953, "grad_norm": 598.6000366210938, "learning_rate": 3.8913191211177464e-05, "loss": 38.4764, "step": 95230 }, { "epoch": 0.38478165136132064, "grad_norm": 1061.5889892578125, "learning_rate": 3.891029093493669e-05, "loss": 57.333, "step": 95240 }, { "epoch": 0.3848220526266883, "grad_norm": 719.52392578125, "learning_rate": 3.8907390387507625e-05, "loss": 66.065, "step": 95250 }, { "epoch": 0.3848624538920559, "grad_norm": 723.9736328125, "learning_rate": 3.890448956894682e-05, "loss": 41.1686, "step": 95260 }, { "epoch": 0.3849028551574235, "grad_norm": 789.1625366210938, "learning_rate": 3.8901588479310846e-05, "loss": 112.5225, "step": 95270 }, { "epoch": 0.38494325642279115, "grad_norm": 321.8541259765625, "learning_rate": 3.889868711865624e-05, "loss": 43.6828, "step": 95280 }, { "epoch": 0.3849836576881588, "grad_norm": 449.05401611328125, "learning_rate": 3.8895785487039574e-05, "loss": 82.2265, "step": 95290 }, { "epoch": 0.3850240589535264, "grad_norm": 677.4368286132812, "learning_rate": 3.8892883584517415e-05, "loss": 52.7545, "step": 95300 }, { "epoch": 0.38506446021889407, "grad_norm": 960.1495361328125, "learning_rate": 3.888998141114634e-05, "loss": 51.1408, "step": 95310 }, { "epoch": 0.3851048614842617, "grad_norm": 3223.043701171875, "learning_rate": 3.8887078966982925e-05, "loss": 75.4026, "step": 95320 }, { "epoch": 0.3851452627496293, "grad_norm": 1701.433837890625, "learning_rate": 3.888417625208376e-05, "loss": 65.9606, "step": 95330 }, { "epoch": 0.38518566401499693, "grad_norm": 1047.423583984375, "learning_rate": 3.888127326650542e-05, "loss": 53.6032, "step": 95340 }, { "epoch": 0.38522606528036457, "grad_norm": 591.1990356445312, "learning_rate": 3.887837001030452e-05, "loss": 67.6553, "step": 95350 }, { "epoch": 0.3852664665457322, "grad_norm": 1093.99365234375, "learning_rate": 3.887546648353765e-05, "loss": 44.322, "step": 95360 }, { "epoch": 0.38530686781109985, "grad_norm": 545.5051879882812, "learning_rate": 3.887256268626142e-05, "loss": 76.8624, "step": 95370 }, { "epoch": 0.3853472690764675, "grad_norm": 1276.1842041015625, "learning_rate": 3.886965861853244e-05, "loss": 48.7556, "step": 95380 }, { "epoch": 0.38538767034183513, "grad_norm": 739.0816650390625, "learning_rate": 3.886675428040732e-05, "loss": 62.3025, "step": 95390 }, { "epoch": 0.3854280716072027, "grad_norm": 655.6229858398438, "learning_rate": 3.8863849671942685e-05, "loss": 51.2894, "step": 95400 }, { "epoch": 0.38546847287257036, "grad_norm": 835.5565795898438, "learning_rate": 3.886094479319517e-05, "loss": 63.1423, "step": 95410 }, { "epoch": 0.385508874137938, "grad_norm": 983.1041259765625, "learning_rate": 3.885803964422139e-05, "loss": 65.168, "step": 95420 }, { "epoch": 0.38554927540330564, "grad_norm": 928.8274536132812, "learning_rate": 3.885513422507799e-05, "loss": 54.2019, "step": 95430 }, { "epoch": 0.3855896766686733, "grad_norm": 716.9531860351562, "learning_rate": 3.885222853582163e-05, "loss": 69.1313, "step": 95440 }, { "epoch": 0.3856300779340409, "grad_norm": 1002.2490234375, "learning_rate": 3.8849322576508934e-05, "loss": 57.0536, "step": 95450 }, { "epoch": 0.3856704791994085, "grad_norm": 721.2386474609375, "learning_rate": 3.884641634719657e-05, "loss": 75.6969, "step": 95460 }, { "epoch": 0.38571088046477614, "grad_norm": 673.9929809570312, "learning_rate": 3.884350984794118e-05, "loss": 68.2813, "step": 95470 }, { "epoch": 0.3857512817301438, "grad_norm": 728.1707153320312, "learning_rate": 3.8840603078799445e-05, "loss": 53.9483, "step": 95480 }, { "epoch": 0.3857916829955114, "grad_norm": 823.33740234375, "learning_rate": 3.883769603982803e-05, "loss": 51.6572, "step": 95490 }, { "epoch": 0.38583208426087906, "grad_norm": 1103.5313720703125, "learning_rate": 3.883478873108361e-05, "loss": 49.7478, "step": 95500 }, { "epoch": 0.3858724855262467, "grad_norm": 1030.14404296875, "learning_rate": 3.883188115262285e-05, "loss": 52.5702, "step": 95510 }, { "epoch": 0.38591288679161434, "grad_norm": 937.8312377929688, "learning_rate": 3.8828973304502446e-05, "loss": 66.6772, "step": 95520 }, { "epoch": 0.3859532880569819, "grad_norm": 1410.5428466796875, "learning_rate": 3.88260651867791e-05, "loss": 55.7531, "step": 95530 }, { "epoch": 0.38599368932234956, "grad_norm": 480.3839111328125, "learning_rate": 3.8823156799509484e-05, "loss": 61.0244, "step": 95540 }, { "epoch": 0.3860340905877172, "grad_norm": 471.7522888183594, "learning_rate": 3.8820248142750316e-05, "loss": 86.2028, "step": 95550 }, { "epoch": 0.38607449185308484, "grad_norm": 531.3958129882812, "learning_rate": 3.881733921655829e-05, "loss": 88.4188, "step": 95560 }, { "epoch": 0.3861148931184525, "grad_norm": 878.5484008789062, "learning_rate": 3.881443002099012e-05, "loss": 64.7605, "step": 95570 }, { "epoch": 0.3861552943838201, "grad_norm": 371.7911682128906, "learning_rate": 3.8811520556102535e-05, "loss": 51.486, "step": 95580 }, { "epoch": 0.3861956956491877, "grad_norm": 529.8397216796875, "learning_rate": 3.880861082195224e-05, "loss": 72.443, "step": 95590 }, { "epoch": 0.38623609691455535, "grad_norm": 988.7250366210938, "learning_rate": 3.880570081859597e-05, "loss": 83.2871, "step": 95600 }, { "epoch": 0.386276498179923, "grad_norm": 433.3272705078125, "learning_rate": 3.880279054609045e-05, "loss": 45.9184, "step": 95610 }, { "epoch": 0.38631689944529063, "grad_norm": 1444.31787109375, "learning_rate": 3.8799880004492425e-05, "loss": 67.2129, "step": 95620 }, { "epoch": 0.38635730071065827, "grad_norm": 507.1636047363281, "learning_rate": 3.879696919385864e-05, "loss": 44.7271, "step": 95630 }, { "epoch": 0.3863977019760259, "grad_norm": 1155.373046875, "learning_rate": 3.879405811424583e-05, "loss": 66.5983, "step": 95640 }, { "epoch": 0.3864381032413935, "grad_norm": 490.9678955078125, "learning_rate": 3.879114676571076e-05, "loss": 60.9269, "step": 95650 }, { "epoch": 0.38647850450676113, "grad_norm": 0.0, "learning_rate": 3.878823514831018e-05, "loss": 56.5793, "step": 95660 }, { "epoch": 0.3865189057721288, "grad_norm": 1740.7720947265625, "learning_rate": 3.878532326210086e-05, "loss": 74.9253, "step": 95670 }, { "epoch": 0.3865593070374964, "grad_norm": 437.5474853515625, "learning_rate": 3.8782411107139564e-05, "loss": 280.9561, "step": 95680 }, { "epoch": 0.38659970830286405, "grad_norm": 1296.4512939453125, "learning_rate": 3.877949868348307e-05, "loss": 73.3886, "step": 95690 }, { "epoch": 0.3866401095682317, "grad_norm": 372.0828552246094, "learning_rate": 3.877658599118815e-05, "loss": 70.0043, "step": 95700 }, { "epoch": 0.38668051083359933, "grad_norm": 462.0464782714844, "learning_rate": 3.87736730303116e-05, "loss": 69.3731, "step": 95710 }, { "epoch": 0.3867209120989669, "grad_norm": 674.0639038085938, "learning_rate": 3.87707598009102e-05, "loss": 82.2932, "step": 95720 }, { "epoch": 0.38676131336433456, "grad_norm": 281.2317810058594, "learning_rate": 3.8767846303040746e-05, "loss": 66.2907, "step": 95730 }, { "epoch": 0.3868017146297022, "grad_norm": 531.0217895507812, "learning_rate": 3.876493253676004e-05, "loss": 68.4629, "step": 95740 }, { "epoch": 0.38684211589506984, "grad_norm": 871.7710571289062, "learning_rate": 3.8762018502124894e-05, "loss": 61.8916, "step": 95750 }, { "epoch": 0.3868825171604375, "grad_norm": 600.0103759765625, "learning_rate": 3.875910419919211e-05, "loss": 86.7091, "step": 95760 }, { "epoch": 0.3869229184258051, "grad_norm": 631.7959594726562, "learning_rate": 3.87561896280185e-05, "loss": 57.5459, "step": 95770 }, { "epoch": 0.3869633196911727, "grad_norm": 459.5212097167969, "learning_rate": 3.8753274788660894e-05, "loss": 38.7924, "step": 95780 }, { "epoch": 0.38700372095654034, "grad_norm": 722.750244140625, "learning_rate": 3.875035968117612e-05, "loss": 61.4237, "step": 95790 }, { "epoch": 0.387044122221908, "grad_norm": 1434.1351318359375, "learning_rate": 3.8747444305621e-05, "loss": 45.202, "step": 95800 }, { "epoch": 0.3870845234872756, "grad_norm": 1343.0447998046875, "learning_rate": 3.874452866205237e-05, "loss": 74.4229, "step": 95810 }, { "epoch": 0.38712492475264326, "grad_norm": 851.3370971679688, "learning_rate": 3.874161275052709e-05, "loss": 57.2729, "step": 95820 }, { "epoch": 0.3871653260180109, "grad_norm": 519.22216796875, "learning_rate": 3.873869657110198e-05, "loss": 52.2334, "step": 95830 }, { "epoch": 0.38720572728337854, "grad_norm": 1355.063720703125, "learning_rate": 3.873578012383393e-05, "loss": 80.8646, "step": 95840 }, { "epoch": 0.3872461285487461, "grad_norm": 615.1976318359375, "learning_rate": 3.873286340877975e-05, "loss": 58.7473, "step": 95850 }, { "epoch": 0.38728652981411377, "grad_norm": 780.4457397460938, "learning_rate": 3.8729946425996345e-05, "loss": 72.4027, "step": 95860 }, { "epoch": 0.3873269310794814, "grad_norm": 345.5774841308594, "learning_rate": 3.8727029175540554e-05, "loss": 61.2218, "step": 95870 }, { "epoch": 0.38736733234484905, "grad_norm": 546.84228515625, "learning_rate": 3.872411165746927e-05, "loss": 47.3672, "step": 95880 }, { "epoch": 0.3874077336102167, "grad_norm": 1022.2073364257812, "learning_rate": 3.872119387183936e-05, "loss": 74.9494, "step": 95890 }, { "epoch": 0.3874481348755843, "grad_norm": 789.552978515625, "learning_rate": 3.8718275818707715e-05, "loss": 64.7529, "step": 95900 }, { "epoch": 0.3874885361409519, "grad_norm": 446.9588623046875, "learning_rate": 3.8715357498131214e-05, "loss": 66.2357, "step": 95910 }, { "epoch": 0.38752893740631955, "grad_norm": 432.23651123046875, "learning_rate": 3.871243891016676e-05, "loss": 65.1847, "step": 95920 }, { "epoch": 0.3875693386716872, "grad_norm": 382.0699462890625, "learning_rate": 3.870952005487125e-05, "loss": 46.396, "step": 95930 }, { "epoch": 0.38760973993705483, "grad_norm": 553.678955078125, "learning_rate": 3.870660093230159e-05, "loss": 59.2287, "step": 95940 }, { "epoch": 0.38765014120242247, "grad_norm": 397.8702392578125, "learning_rate": 3.870368154251469e-05, "loss": 57.8399, "step": 95950 }, { "epoch": 0.3876905424677901, "grad_norm": 776.6229248046875, "learning_rate": 3.870076188556746e-05, "loss": 49.7638, "step": 95960 }, { "epoch": 0.3877309437331577, "grad_norm": 940.7283325195312, "learning_rate": 3.869784196151682e-05, "loss": 69.5812, "step": 95970 }, { "epoch": 0.38777134499852534, "grad_norm": 1250.880859375, "learning_rate": 3.869492177041971e-05, "loss": 86.8493, "step": 95980 }, { "epoch": 0.387811746263893, "grad_norm": 718.7807006835938, "learning_rate": 3.8692001312333036e-05, "loss": 64.5188, "step": 95990 }, { "epoch": 0.3878521475292606, "grad_norm": 1685.44921875, "learning_rate": 3.868908058731376e-05, "loss": 81.74, "step": 96000 }, { "epoch": 0.38789254879462826, "grad_norm": 1482.5496826171875, "learning_rate": 3.8686159595418805e-05, "loss": 85.64, "step": 96010 }, { "epoch": 0.3879329500599959, "grad_norm": 494.4794006347656, "learning_rate": 3.868323833670512e-05, "loss": 75.5731, "step": 96020 }, { "epoch": 0.38797335132536354, "grad_norm": 1048.5169677734375, "learning_rate": 3.868031681122966e-05, "loss": 100.7712, "step": 96030 }, { "epoch": 0.3880137525907311, "grad_norm": 431.1014099121094, "learning_rate": 3.867739501904938e-05, "loss": 41.6055, "step": 96040 }, { "epoch": 0.38805415385609876, "grad_norm": 670.1493530273438, "learning_rate": 3.867447296022124e-05, "loss": 55.9565, "step": 96050 }, { "epoch": 0.3880945551214664, "grad_norm": 826.1647338867188, "learning_rate": 3.8671550634802216e-05, "loss": 64.8045, "step": 96060 }, { "epoch": 0.38813495638683404, "grad_norm": 408.30645751953125, "learning_rate": 3.866862804284928e-05, "loss": 48.8189, "step": 96070 }, { "epoch": 0.3881753576522017, "grad_norm": 523.0665283203125, "learning_rate": 3.8665705184419386e-05, "loss": 39.3677, "step": 96080 }, { "epoch": 0.3882157589175693, "grad_norm": 1171.9281005859375, "learning_rate": 3.8662782059569546e-05, "loss": 76.4366, "step": 96090 }, { "epoch": 0.3882561601829369, "grad_norm": 688.5985717773438, "learning_rate": 3.865985866835673e-05, "loss": 54.2586, "step": 96100 }, { "epoch": 0.38829656144830454, "grad_norm": 703.115234375, "learning_rate": 3.865693501083794e-05, "loss": 41.2508, "step": 96110 }, { "epoch": 0.3883369627136722, "grad_norm": 764.5994262695312, "learning_rate": 3.865401108707017e-05, "loss": 70.6168, "step": 96120 }, { "epoch": 0.3883773639790398, "grad_norm": 653.46240234375, "learning_rate": 3.8651086897110424e-05, "loss": 95.4823, "step": 96130 }, { "epoch": 0.38841776524440746, "grad_norm": 1796.30029296875, "learning_rate": 3.864816244101571e-05, "loss": 83.386, "step": 96140 }, { "epoch": 0.3884581665097751, "grad_norm": 552.94677734375, "learning_rate": 3.8645237718843044e-05, "loss": 56.03, "step": 96150 }, { "epoch": 0.38849856777514274, "grad_norm": 776.851806640625, "learning_rate": 3.864231273064944e-05, "loss": 82.2406, "step": 96160 }, { "epoch": 0.38853896904051033, "grad_norm": 465.9934997558594, "learning_rate": 3.8639387476491926e-05, "loss": 68.4747, "step": 96170 }, { "epoch": 0.38857937030587797, "grad_norm": 2382.02685546875, "learning_rate": 3.863646195642754e-05, "loss": 96.2178, "step": 96180 }, { "epoch": 0.3886197715712456, "grad_norm": 781.694580078125, "learning_rate": 3.8633536170513296e-05, "loss": 69.0198, "step": 96190 }, { "epoch": 0.38866017283661325, "grad_norm": 822.1322631835938, "learning_rate": 3.8630610118806254e-05, "loss": 56.8671, "step": 96200 }, { "epoch": 0.3887005741019809, "grad_norm": 518.6736450195312, "learning_rate": 3.862768380136345e-05, "loss": 43.1214, "step": 96210 }, { "epoch": 0.38874097536734853, "grad_norm": 1137.239013671875, "learning_rate": 3.862475721824193e-05, "loss": 77.5319, "step": 96220 }, { "epoch": 0.3887813766327161, "grad_norm": 113.21827697753906, "learning_rate": 3.862183036949875e-05, "loss": 64.3933, "step": 96230 }, { "epoch": 0.38882177789808375, "grad_norm": 1088.9544677734375, "learning_rate": 3.861890325519098e-05, "loss": 69.0374, "step": 96240 }, { "epoch": 0.3888621791634514, "grad_norm": 1370.9381103515625, "learning_rate": 3.861597587537568e-05, "loss": 59.8645, "step": 96250 }, { "epoch": 0.38890258042881903, "grad_norm": 523.1607055664062, "learning_rate": 3.861304823010991e-05, "loss": 71.4097, "step": 96260 }, { "epoch": 0.3889429816941867, "grad_norm": 959.0990600585938, "learning_rate": 3.861012031945077e-05, "loss": 42.5465, "step": 96270 }, { "epoch": 0.3889833829595543, "grad_norm": 660.4682006835938, "learning_rate": 3.8607192143455326e-05, "loss": 55.936, "step": 96280 }, { "epoch": 0.3890237842249219, "grad_norm": 748.14990234375, "learning_rate": 3.860426370218067e-05, "loss": 43.6322, "step": 96290 }, { "epoch": 0.38906418549028954, "grad_norm": 584.9511108398438, "learning_rate": 3.860133499568387e-05, "loss": 70.2999, "step": 96300 }, { "epoch": 0.3891045867556572, "grad_norm": 1015.5634765625, "learning_rate": 3.859840602402206e-05, "loss": 109.6341, "step": 96310 }, { "epoch": 0.3891449880210248, "grad_norm": 1177.64697265625, "learning_rate": 3.859547678725231e-05, "loss": 63.7178, "step": 96320 }, { "epoch": 0.38918538928639246, "grad_norm": 480.65618896484375, "learning_rate": 3.859254728543175e-05, "loss": 62.31, "step": 96330 }, { "epoch": 0.3892257905517601, "grad_norm": 1283.1507568359375, "learning_rate": 3.8589617518617485e-05, "loss": 61.4974, "step": 96340 }, { "epoch": 0.38926619181712774, "grad_norm": 1167.147216796875, "learning_rate": 3.858668748686662e-05, "loss": 48.8035, "step": 96350 }, { "epoch": 0.3893065930824953, "grad_norm": 793.9154052734375, "learning_rate": 3.858375719023629e-05, "loss": 68.4192, "step": 96360 }, { "epoch": 0.38934699434786296, "grad_norm": 838.4378051757812, "learning_rate": 3.8580826628783625e-05, "loss": 86.5221, "step": 96370 }, { "epoch": 0.3893873956132306, "grad_norm": 797.8261108398438, "learning_rate": 3.857789580256575e-05, "loss": 63.2042, "step": 96380 }, { "epoch": 0.38942779687859824, "grad_norm": 601.6024169921875, "learning_rate": 3.857496471163981e-05, "loss": 52.494, "step": 96390 }, { "epoch": 0.3894681981439659, "grad_norm": 693.2849731445312, "learning_rate": 3.8572033356062943e-05, "loss": 88.3605, "step": 96400 }, { "epoch": 0.3895085994093335, "grad_norm": 1151.2178955078125, "learning_rate": 3.8569101735892296e-05, "loss": 47.6969, "step": 96410 }, { "epoch": 0.3895490006747011, "grad_norm": 534.7440185546875, "learning_rate": 3.856616985118502e-05, "loss": 56.3229, "step": 96420 }, { "epoch": 0.38958940194006875, "grad_norm": 883.9851684570312, "learning_rate": 3.8563237701998286e-05, "loss": 44.7732, "step": 96430 }, { "epoch": 0.3896298032054364, "grad_norm": 903.25341796875, "learning_rate": 3.856030528838925e-05, "loss": 49.8167, "step": 96440 }, { "epoch": 0.389670204470804, "grad_norm": 1717.411376953125, "learning_rate": 3.8557372610415074e-05, "loss": 56.202, "step": 96450 }, { "epoch": 0.38971060573617167, "grad_norm": 1218.4908447265625, "learning_rate": 3.8554439668132946e-05, "loss": 48.3608, "step": 96460 }, { "epoch": 0.3897510070015393, "grad_norm": 884.7052001953125, "learning_rate": 3.855150646160003e-05, "loss": 46.3305, "step": 96470 }, { "epoch": 0.38979140826690695, "grad_norm": 360.12017822265625, "learning_rate": 3.854857299087353e-05, "loss": 37.3111, "step": 96480 }, { "epoch": 0.38983180953227453, "grad_norm": 1448.187744140625, "learning_rate": 3.8545639256010625e-05, "loss": 71.6807, "step": 96490 }, { "epoch": 0.38987221079764217, "grad_norm": 935.8035278320312, "learning_rate": 3.85427052570685e-05, "loss": 60.7646, "step": 96500 }, { "epoch": 0.3899126120630098, "grad_norm": 805.13037109375, "learning_rate": 3.853977099410436e-05, "loss": 46.3225, "step": 96510 }, { "epoch": 0.38995301332837745, "grad_norm": 562.0592041015625, "learning_rate": 3.853683646717543e-05, "loss": 62.2224, "step": 96520 }, { "epoch": 0.3899934145937451, "grad_norm": 1327.85498046875, "learning_rate": 3.853390167633889e-05, "loss": 66.2915, "step": 96530 }, { "epoch": 0.39003381585911273, "grad_norm": 1803.8189697265625, "learning_rate": 3.8530966621651976e-05, "loss": 89.2004, "step": 96540 }, { "epoch": 0.3900742171244803, "grad_norm": 363.34783935546875, "learning_rate": 3.8528031303171895e-05, "loss": 38.5132, "step": 96550 }, { "epoch": 0.39011461838984796, "grad_norm": 1082.673095703125, "learning_rate": 3.852509572095588e-05, "loss": 58.43, "step": 96560 }, { "epoch": 0.3901550196552156, "grad_norm": 1270.4439697265625, "learning_rate": 3.852215987506117e-05, "loss": 55.3348, "step": 96570 }, { "epoch": 0.39019542092058324, "grad_norm": 478.66815185546875, "learning_rate": 3.851922376554499e-05, "loss": 59.5862, "step": 96580 }, { "epoch": 0.3902358221859509, "grad_norm": 772.7987670898438, "learning_rate": 3.851628739246457e-05, "loss": 53.7853, "step": 96590 }, { "epoch": 0.3902762234513185, "grad_norm": 353.50360107421875, "learning_rate": 3.851335075587718e-05, "loss": 68.1417, "step": 96600 }, { "epoch": 0.3903166247166861, "grad_norm": 3968.27587890625, "learning_rate": 3.8510413855840056e-05, "loss": 92.6137, "step": 96610 }, { "epoch": 0.39035702598205374, "grad_norm": 624.1312255859375, "learning_rate": 3.850747669241046e-05, "loss": 74.6902, "step": 96620 }, { "epoch": 0.3903974272474214, "grad_norm": 1058.5302734375, "learning_rate": 3.850453926564565e-05, "loss": 67.8454, "step": 96630 }, { "epoch": 0.390437828512789, "grad_norm": 1056.72021484375, "learning_rate": 3.85016015756029e-05, "loss": 88.1807, "step": 96640 }, { "epoch": 0.39047822977815666, "grad_norm": 1824.6705322265625, "learning_rate": 3.849866362233947e-05, "loss": 71.9311, "step": 96650 }, { "epoch": 0.3905186310435243, "grad_norm": 608.2755126953125, "learning_rate": 3.849572540591264e-05, "loss": 45.0875, "step": 96660 }, { "epoch": 0.39055903230889194, "grad_norm": 665.3267822265625, "learning_rate": 3.84927869263797e-05, "loss": 67.2472, "step": 96670 }, { "epoch": 0.3905994335742595, "grad_norm": 752.25927734375, "learning_rate": 3.848984818379793e-05, "loss": 108.7115, "step": 96680 }, { "epoch": 0.39063983483962716, "grad_norm": 393.086181640625, "learning_rate": 3.848690917822463e-05, "loss": 61.483, "step": 96690 }, { "epoch": 0.3906802361049948, "grad_norm": 1095.4296875, "learning_rate": 3.8483969909717087e-05, "loss": 57.5574, "step": 96700 }, { "epoch": 0.39072063737036244, "grad_norm": 841.97705078125, "learning_rate": 3.8481030378332614e-05, "loss": 68.264, "step": 96710 }, { "epoch": 0.3907610386357301, "grad_norm": 410.4032897949219, "learning_rate": 3.84780905841285e-05, "loss": 69.9679, "step": 96720 }, { "epoch": 0.3908014399010977, "grad_norm": 513.5309448242188, "learning_rate": 3.8475150527162085e-05, "loss": 80.5341, "step": 96730 }, { "epoch": 0.3908418411664653, "grad_norm": 451.75445556640625, "learning_rate": 3.847221020749067e-05, "loss": 45.8152, "step": 96740 }, { "epoch": 0.39088224243183295, "grad_norm": 1032.3883056640625, "learning_rate": 3.8469269625171576e-05, "loss": 84.8716, "step": 96750 }, { "epoch": 0.3909226436972006, "grad_norm": 472.958251953125, "learning_rate": 3.846632878026214e-05, "loss": 49.6663, "step": 96760 }, { "epoch": 0.39096304496256823, "grad_norm": 969.470703125, "learning_rate": 3.8463387672819696e-05, "loss": 63.3029, "step": 96770 }, { "epoch": 0.39100344622793587, "grad_norm": 1277.8431396484375, "learning_rate": 3.846044630290158e-05, "loss": 56.5187, "step": 96780 }, { "epoch": 0.3910438474933035, "grad_norm": 364.2802734375, "learning_rate": 3.845750467056511e-05, "loss": 59.6623, "step": 96790 }, { "epoch": 0.39108424875867115, "grad_norm": 982.3886108398438, "learning_rate": 3.8454562775867684e-05, "loss": 85.9636, "step": 96800 }, { "epoch": 0.39112465002403873, "grad_norm": 416.16619873046875, "learning_rate": 3.8451620618866616e-05, "loss": 84.5701, "step": 96810 }, { "epoch": 0.3911650512894064, "grad_norm": 627.9135131835938, "learning_rate": 3.844867819961928e-05, "loss": 44.0341, "step": 96820 }, { "epoch": 0.391205452554774, "grad_norm": 1361.487060546875, "learning_rate": 3.8445735518183043e-05, "loss": 48.6817, "step": 96830 }, { "epoch": 0.39124585382014165, "grad_norm": 1403.5098876953125, "learning_rate": 3.8442792574615275e-05, "loss": 49.1378, "step": 96840 }, { "epoch": 0.3912862550855093, "grad_norm": 1468.5533447265625, "learning_rate": 3.843984936897334e-05, "loss": 71.1556, "step": 96850 }, { "epoch": 0.39132665635087693, "grad_norm": 2194.382080078125, "learning_rate": 3.843690590131462e-05, "loss": 89.6682, "step": 96860 }, { "epoch": 0.3913670576162445, "grad_norm": 899.1420288085938, "learning_rate": 3.84339621716965e-05, "loss": 59.679, "step": 96870 }, { "epoch": 0.39140745888161216, "grad_norm": 726.7706298828125, "learning_rate": 3.843101818017637e-05, "loss": 81.8773, "step": 96880 }, { "epoch": 0.3914478601469798, "grad_norm": 441.8895568847656, "learning_rate": 3.8428073926811625e-05, "loss": 87.1567, "step": 96890 }, { "epoch": 0.39148826141234744, "grad_norm": 788.19921875, "learning_rate": 3.842512941165968e-05, "loss": 81.7942, "step": 96900 }, { "epoch": 0.3915286626777151, "grad_norm": 0.0, "learning_rate": 3.842218463477791e-05, "loss": 45.6026, "step": 96910 }, { "epoch": 0.3915690639430827, "grad_norm": 864.9554443359375, "learning_rate": 3.841923959622375e-05, "loss": 49.6541, "step": 96920 }, { "epoch": 0.3916094652084503, "grad_norm": 903.1978149414062, "learning_rate": 3.84162942960546e-05, "loss": 76.4913, "step": 96930 }, { "epoch": 0.39164986647381794, "grad_norm": 848.7103271484375, "learning_rate": 3.841334873432789e-05, "loss": 67.0842, "step": 96940 }, { "epoch": 0.3916902677391856, "grad_norm": 898.5715942382812, "learning_rate": 3.841040291110103e-05, "loss": 40.5335, "step": 96950 }, { "epoch": 0.3917306690045532, "grad_norm": 422.8970031738281, "learning_rate": 3.840745682643147e-05, "loss": 62.1978, "step": 96960 }, { "epoch": 0.39177107026992086, "grad_norm": 404.5113220214844, "learning_rate": 3.840451048037663e-05, "loss": 56.2681, "step": 96970 }, { "epoch": 0.3918114715352885, "grad_norm": 1311.5926513671875, "learning_rate": 3.8401563872993966e-05, "loss": 63.1407, "step": 96980 }, { "epoch": 0.39185187280065614, "grad_norm": 477.2160339355469, "learning_rate": 3.839861700434091e-05, "loss": 51.8024, "step": 96990 }, { "epoch": 0.3918922740660237, "grad_norm": 835.2833862304688, "learning_rate": 3.8395669874474915e-05, "loss": 48.2391, "step": 97000 }, { "epoch": 0.39193267533139137, "grad_norm": 906.5626831054688, "learning_rate": 3.839272248345344e-05, "loss": 61.7122, "step": 97010 }, { "epoch": 0.391973076596759, "grad_norm": 321.434326171875, "learning_rate": 3.838977483133395e-05, "loss": 66.5921, "step": 97020 }, { "epoch": 0.39201347786212665, "grad_norm": 408.02197265625, "learning_rate": 3.838682691817391e-05, "loss": 47.1528, "step": 97030 }, { "epoch": 0.3920538791274943, "grad_norm": 953.2274780273438, "learning_rate": 3.8383878744030776e-05, "loss": 47.6821, "step": 97040 }, { "epoch": 0.3920942803928619, "grad_norm": 541.6514282226562, "learning_rate": 3.8380930308962036e-05, "loss": 60.5129, "step": 97050 }, { "epoch": 0.3921346816582295, "grad_norm": 443.59527587890625, "learning_rate": 3.837798161302518e-05, "loss": 67.278, "step": 97060 }, { "epoch": 0.39217508292359715, "grad_norm": 1681.9339599609375, "learning_rate": 3.8375032656277684e-05, "loss": 59.9378, "step": 97070 }, { "epoch": 0.3922154841889648, "grad_norm": 696.7039184570312, "learning_rate": 3.837208343877703e-05, "loss": 49.9206, "step": 97080 }, { "epoch": 0.39225588545433243, "grad_norm": 885.4489135742188, "learning_rate": 3.8369133960580724e-05, "loss": 69.6553, "step": 97090 }, { "epoch": 0.39229628671970007, "grad_norm": 688.5819702148438, "learning_rate": 3.836618422174628e-05, "loss": 80.361, "step": 97100 }, { "epoch": 0.3923366879850677, "grad_norm": 757.7618408203125, "learning_rate": 3.83632342223312e-05, "loss": 49.0418, "step": 97110 }, { "epoch": 0.39237708925043535, "grad_norm": 435.7521057128906, "learning_rate": 3.836028396239297e-05, "loss": 52.3195, "step": 97120 }, { "epoch": 0.39241749051580294, "grad_norm": 622.2557983398438, "learning_rate": 3.8357333441989134e-05, "loss": 69.8973, "step": 97130 }, { "epoch": 0.3924578917811706, "grad_norm": 2009.5267333984375, "learning_rate": 3.835438266117721e-05, "loss": 63.6782, "step": 97140 }, { "epoch": 0.3924982930465382, "grad_norm": 851.5496826171875, "learning_rate": 3.835143162001472e-05, "loss": 79.4912, "step": 97150 }, { "epoch": 0.39253869431190586, "grad_norm": 496.26544189453125, "learning_rate": 3.834848031855919e-05, "loss": 53.252, "step": 97160 }, { "epoch": 0.3925790955772735, "grad_norm": 664.895263671875, "learning_rate": 3.8345528756868164e-05, "loss": 51.9402, "step": 97170 }, { "epoch": 0.39261949684264114, "grad_norm": 963.7388305664062, "learning_rate": 3.8342576934999184e-05, "loss": 57.3356, "step": 97180 }, { "epoch": 0.3926598981080087, "grad_norm": 706.6784057617188, "learning_rate": 3.83396248530098e-05, "loss": 74.5728, "step": 97190 }, { "epoch": 0.39270029937337636, "grad_norm": 486.8099365234375, "learning_rate": 3.8336672510957574e-05, "loss": 64.1127, "step": 97200 }, { "epoch": 0.392740700638744, "grad_norm": 563.300048828125, "learning_rate": 3.833371990890003e-05, "loss": 53.4073, "step": 97210 }, { "epoch": 0.39278110190411164, "grad_norm": 382.8632507324219, "learning_rate": 3.8330767046894765e-05, "loss": 44.1544, "step": 97220 }, { "epoch": 0.3928215031694793, "grad_norm": 944.963134765625, "learning_rate": 3.8327813924999326e-05, "loss": 64.7648, "step": 97230 }, { "epoch": 0.3928619044348469, "grad_norm": 428.9286193847656, "learning_rate": 3.83248605432713e-05, "loss": 57.617, "step": 97240 }, { "epoch": 0.3929023057002145, "grad_norm": 742.8692016601562, "learning_rate": 3.832190690176825e-05, "loss": 44.4798, "step": 97250 }, { "epoch": 0.39294270696558214, "grad_norm": 139.9200439453125, "learning_rate": 3.831895300054777e-05, "loss": 73.2557, "step": 97260 }, { "epoch": 0.3929831082309498, "grad_norm": 2734.72802734375, "learning_rate": 3.8315998839667445e-05, "loss": 73.5291, "step": 97270 }, { "epoch": 0.3930235094963174, "grad_norm": 1699.1915283203125, "learning_rate": 3.8313044419184873e-05, "loss": 92.5831, "step": 97280 }, { "epoch": 0.39306391076168506, "grad_norm": 1669.2913818359375, "learning_rate": 3.831008973915764e-05, "loss": 54.4022, "step": 97290 }, { "epoch": 0.3931043120270527, "grad_norm": 1141.6053466796875, "learning_rate": 3.830713479964335e-05, "loss": 85.7646, "step": 97300 }, { "epoch": 0.39314471329242034, "grad_norm": 1941.885986328125, "learning_rate": 3.8304179600699626e-05, "loss": 74.6156, "step": 97310 }, { "epoch": 0.39318511455778793, "grad_norm": 877.8158569335938, "learning_rate": 3.830122414238406e-05, "loss": 56.3947, "step": 97320 }, { "epoch": 0.39322551582315557, "grad_norm": 600.1071166992188, "learning_rate": 3.829826842475429e-05, "loss": 56.1246, "step": 97330 }, { "epoch": 0.3932659170885232, "grad_norm": 1540.666259765625, "learning_rate": 3.8295312447867924e-05, "loss": 58.5337, "step": 97340 }, { "epoch": 0.39330631835389085, "grad_norm": 620.038330078125, "learning_rate": 3.82923562117826e-05, "loss": 40.1906, "step": 97350 }, { "epoch": 0.3933467196192585, "grad_norm": 649.1076049804688, "learning_rate": 3.828939971655595e-05, "loss": 86.4342, "step": 97360 }, { "epoch": 0.39338712088462613, "grad_norm": 1530.02001953125, "learning_rate": 3.828644296224562e-05, "loss": 61.6796, "step": 97370 }, { "epoch": 0.3934275221499937, "grad_norm": 1663.7215576171875, "learning_rate": 3.8283485948909224e-05, "loss": 59.6131, "step": 97380 }, { "epoch": 0.39346792341536135, "grad_norm": 509.4885559082031, "learning_rate": 3.828052867660445e-05, "loss": 60.7145, "step": 97390 }, { "epoch": 0.393508324680729, "grad_norm": 586.28955078125, "learning_rate": 3.827757114538892e-05, "loss": 95.243, "step": 97400 }, { "epoch": 0.39354872594609663, "grad_norm": 808.3604736328125, "learning_rate": 3.82746133553203e-05, "loss": 66.3657, "step": 97410 }, { "epoch": 0.3935891272114643, "grad_norm": 307.7069396972656, "learning_rate": 3.827165530645627e-05, "loss": 69.5492, "step": 97420 }, { "epoch": 0.3936295284768319, "grad_norm": 1287.857666015625, "learning_rate": 3.8268696998854486e-05, "loss": 74.0201, "step": 97430 }, { "epoch": 0.3936699297421995, "grad_norm": 1741.658935546875, "learning_rate": 3.826573843257262e-05, "loss": 95.2432, "step": 97440 }, { "epoch": 0.39371033100756714, "grad_norm": 530.0614624023438, "learning_rate": 3.826277960766835e-05, "loss": 76.4804, "step": 97450 }, { "epoch": 0.3937507322729348, "grad_norm": 449.9317932128906, "learning_rate": 3.8259820524199374e-05, "loss": 58.2512, "step": 97460 }, { "epoch": 0.3937911335383024, "grad_norm": 273.4659729003906, "learning_rate": 3.8256861182223366e-05, "loss": 50.8024, "step": 97470 }, { "epoch": 0.39383153480367006, "grad_norm": 1517.554931640625, "learning_rate": 3.8253901581798016e-05, "loss": 65.5294, "step": 97480 }, { "epoch": 0.3938719360690377, "grad_norm": 286.39971923828125, "learning_rate": 3.825094172298104e-05, "loss": 41.5933, "step": 97490 }, { "epoch": 0.39391233733440534, "grad_norm": 1205.6583251953125, "learning_rate": 3.824798160583012e-05, "loss": 59.1793, "step": 97500 }, { "epoch": 0.3939527385997729, "grad_norm": 1654.855224609375, "learning_rate": 3.824502123040299e-05, "loss": 71.1765, "step": 97510 }, { "epoch": 0.39399313986514056, "grad_norm": 636.5582275390625, "learning_rate": 3.824206059675736e-05, "loss": 76.6066, "step": 97520 }, { "epoch": 0.3940335411305082, "grad_norm": 878.0634765625, "learning_rate": 3.823909970495092e-05, "loss": 66.1705, "step": 97530 }, { "epoch": 0.39407394239587584, "grad_norm": 733.1356811523438, "learning_rate": 3.8236138555041434e-05, "loss": 86.5831, "step": 97540 }, { "epoch": 0.3941143436612435, "grad_norm": 372.1419982910156, "learning_rate": 3.823317714708661e-05, "loss": 52.7755, "step": 97550 }, { "epoch": 0.3941547449266111, "grad_norm": 978.9861450195312, "learning_rate": 3.823021548114417e-05, "loss": 55.6883, "step": 97560 }, { "epoch": 0.3941951461919787, "grad_norm": 339.8622741699219, "learning_rate": 3.822725355727188e-05, "loss": 51.7447, "step": 97570 }, { "epoch": 0.39423554745734635, "grad_norm": 610.7191162109375, "learning_rate": 3.8224291375527464e-05, "loss": 49.7207, "step": 97580 }, { "epoch": 0.394275948722714, "grad_norm": 1472.842041015625, "learning_rate": 3.822132893596869e-05, "loss": 55.3662, "step": 97590 }, { "epoch": 0.3943163499880816, "grad_norm": 558.3397827148438, "learning_rate": 3.821836623865329e-05, "loss": 91.5254, "step": 97600 }, { "epoch": 0.39435675125344927, "grad_norm": 770.9660034179688, "learning_rate": 3.821540328363905e-05, "loss": 58.9525, "step": 97610 }, { "epoch": 0.3943971525188169, "grad_norm": 900.81982421875, "learning_rate": 3.821244007098371e-05, "loss": 49.8541, "step": 97620 }, { "epoch": 0.39443755378418455, "grad_norm": 472.5091247558594, "learning_rate": 3.820947660074504e-05, "loss": 70.776, "step": 97630 }, { "epoch": 0.39447795504955213, "grad_norm": 1419.308349609375, "learning_rate": 3.820651287298084e-05, "loss": 62.8083, "step": 97640 }, { "epoch": 0.39451835631491977, "grad_norm": 1251.2393798828125, "learning_rate": 3.8203548887748865e-05, "loss": 98.8956, "step": 97650 }, { "epoch": 0.3945587575802874, "grad_norm": 568.4578857421875, "learning_rate": 3.8200584645106904e-05, "loss": 52.2117, "step": 97660 }, { "epoch": 0.39459915884565505, "grad_norm": 1025.6453857421875, "learning_rate": 3.819762014511275e-05, "loss": 77.8451, "step": 97670 }, { "epoch": 0.3946395601110227, "grad_norm": 1376.6019287109375, "learning_rate": 3.81946553878242e-05, "loss": 63.783, "step": 97680 }, { "epoch": 0.39467996137639033, "grad_norm": 1392.58544921875, "learning_rate": 3.819169037329905e-05, "loss": 51.8016, "step": 97690 }, { "epoch": 0.3947203626417579, "grad_norm": 737.3711547851562, "learning_rate": 3.8188725101595094e-05, "loss": 44.1351, "step": 97700 }, { "epoch": 0.39476076390712556, "grad_norm": 675.7130737304688, "learning_rate": 3.818575957277016e-05, "loss": 70.7781, "step": 97710 }, { "epoch": 0.3948011651724932, "grad_norm": 522.35302734375, "learning_rate": 3.8182793786882065e-05, "loss": 54.7861, "step": 97720 }, { "epoch": 0.39484156643786084, "grad_norm": 1283.5611572265625, "learning_rate": 3.817982774398861e-05, "loss": 86.0404, "step": 97730 }, { "epoch": 0.3948819677032285, "grad_norm": 837.8106079101562, "learning_rate": 3.817686144414762e-05, "loss": 72.8473, "step": 97740 }, { "epoch": 0.3949223689685961, "grad_norm": 2973.33740234375, "learning_rate": 3.8173894887416945e-05, "loss": 72.2817, "step": 97750 }, { "epoch": 0.3949627702339637, "grad_norm": 433.107421875, "learning_rate": 3.8170928073854396e-05, "loss": 71.6905, "step": 97760 }, { "epoch": 0.39500317149933134, "grad_norm": 559.8467407226562, "learning_rate": 3.816796100351783e-05, "loss": 59.3247, "step": 97770 }, { "epoch": 0.395043572764699, "grad_norm": 676.9853515625, "learning_rate": 3.8164993676465074e-05, "loss": 59.816, "step": 97780 }, { "epoch": 0.3950839740300666, "grad_norm": 1209.71923828125, "learning_rate": 3.816202609275401e-05, "loss": 78.3244, "step": 97790 }, { "epoch": 0.39512437529543426, "grad_norm": 407.7916259765625, "learning_rate": 3.8159058252442446e-05, "loss": 40.6203, "step": 97800 }, { "epoch": 0.3951647765608019, "grad_norm": 0.0, "learning_rate": 3.815609015558829e-05, "loss": 68.3833, "step": 97810 }, { "epoch": 0.39520517782616954, "grad_norm": 235.8328094482422, "learning_rate": 3.815312180224937e-05, "loss": 47.2443, "step": 97820 }, { "epoch": 0.3952455790915371, "grad_norm": 620.359619140625, "learning_rate": 3.8150153192483566e-05, "loss": 53.1402, "step": 97830 }, { "epoch": 0.39528598035690476, "grad_norm": 1514.925537109375, "learning_rate": 3.814718432634876e-05, "loss": 83.4849, "step": 97840 }, { "epoch": 0.3953263816222724, "grad_norm": 195.32774353027344, "learning_rate": 3.8144215203902834e-05, "loss": 58.4113, "step": 97850 }, { "epoch": 0.39536678288764004, "grad_norm": 610.07568359375, "learning_rate": 3.814124582520365e-05, "loss": 47.7628, "step": 97860 }, { "epoch": 0.3954071841530077, "grad_norm": 1019.016845703125, "learning_rate": 3.813827619030913e-05, "loss": 66.7314, "step": 97870 }, { "epoch": 0.3954475854183753, "grad_norm": 340.5806884765625, "learning_rate": 3.813530629927714e-05, "loss": 37.2535, "step": 97880 }, { "epoch": 0.3954879866837429, "grad_norm": 705.3890991210938, "learning_rate": 3.81323361521656e-05, "loss": 63.5229, "step": 97890 }, { "epoch": 0.39552838794911055, "grad_norm": 911.9359741210938, "learning_rate": 3.81293657490324e-05, "loss": 107.5619, "step": 97900 }, { "epoch": 0.3955687892144782, "grad_norm": 448.5742492675781, "learning_rate": 3.812639508993545e-05, "loss": 67.1163, "step": 97910 }, { "epoch": 0.39560919047984583, "grad_norm": 292.3797607421875, "learning_rate": 3.8123424174932674e-05, "loss": 46.8763, "step": 97920 }, { "epoch": 0.39564959174521347, "grad_norm": 516.19287109375, "learning_rate": 3.812045300408199e-05, "loss": 60.135, "step": 97930 }, { "epoch": 0.3956899930105811, "grad_norm": 1726.6441650390625, "learning_rate": 3.811748157744132e-05, "loss": 111.1981, "step": 97940 }, { "epoch": 0.39573039427594875, "grad_norm": 617.226806640625, "learning_rate": 3.8114509895068586e-05, "loss": 40.3687, "step": 97950 }, { "epoch": 0.39577079554131633, "grad_norm": 1564.2078857421875, "learning_rate": 3.811153795702174e-05, "loss": 57.8685, "step": 97960 }, { "epoch": 0.395811196806684, "grad_norm": 543.6961669921875, "learning_rate": 3.81085657633587e-05, "loss": 63.6772, "step": 97970 }, { "epoch": 0.3958515980720516, "grad_norm": 619.5729370117188, "learning_rate": 3.810559331413743e-05, "loss": 53.4549, "step": 97980 }, { "epoch": 0.39589199933741925, "grad_norm": 1818.6915283203125, "learning_rate": 3.810262060941587e-05, "loss": 46.7142, "step": 97990 }, { "epoch": 0.3959324006027869, "grad_norm": 423.151611328125, "learning_rate": 3.8099647649251986e-05, "loss": 68.5805, "step": 98000 }, { "epoch": 0.39597280186815453, "grad_norm": 797.4796752929688, "learning_rate": 3.809667443370372e-05, "loss": 43.375, "step": 98010 }, { "epoch": 0.3960132031335221, "grad_norm": 478.58349609375, "learning_rate": 3.809370096282902e-05, "loss": 63.259, "step": 98020 }, { "epoch": 0.39605360439888976, "grad_norm": 477.88470458984375, "learning_rate": 3.8090727236685906e-05, "loss": 85.8146, "step": 98030 }, { "epoch": 0.3960940056642574, "grad_norm": 358.82342529296875, "learning_rate": 3.808775325533232e-05, "loss": 46.4715, "step": 98040 }, { "epoch": 0.39613440692962504, "grad_norm": 1070.005615234375, "learning_rate": 3.808477901882624e-05, "loss": 50.1758, "step": 98050 }, { "epoch": 0.3961748081949927, "grad_norm": 568.8511352539062, "learning_rate": 3.808180452722566e-05, "loss": 53.4005, "step": 98060 }, { "epoch": 0.3962152094603603, "grad_norm": 523.6063232421875, "learning_rate": 3.8078829780588564e-05, "loss": 65.6613, "step": 98070 }, { "epoch": 0.3962556107257279, "grad_norm": 1423.616943359375, "learning_rate": 3.8075854778972955e-05, "loss": 88.5826, "step": 98080 }, { "epoch": 0.39629601199109554, "grad_norm": 821.159912109375, "learning_rate": 3.807287952243682e-05, "loss": 52.51, "step": 98090 }, { "epoch": 0.3963364132564632, "grad_norm": 1087.4752197265625, "learning_rate": 3.8069904011038165e-05, "loss": 62.1011, "step": 98100 }, { "epoch": 0.3963768145218308, "grad_norm": 981.5172119140625, "learning_rate": 3.806692824483501e-05, "loss": 43.4204, "step": 98110 }, { "epoch": 0.39641721578719846, "grad_norm": 1140.0291748046875, "learning_rate": 3.806395222388536e-05, "loss": 67.1618, "step": 98120 }, { "epoch": 0.3964576170525661, "grad_norm": 863.1648559570312, "learning_rate": 3.8060975948247223e-05, "loss": 71.7122, "step": 98130 }, { "epoch": 0.39649801831793374, "grad_norm": 631.3798217773438, "learning_rate": 3.805799941797865e-05, "loss": 42.8692, "step": 98140 }, { "epoch": 0.3965384195833013, "grad_norm": 1376.2213134765625, "learning_rate": 3.805502263313765e-05, "loss": 55.8479, "step": 98150 }, { "epoch": 0.39657882084866897, "grad_norm": 476.51446533203125, "learning_rate": 3.805204559378227e-05, "loss": 57.2946, "step": 98160 }, { "epoch": 0.3966192221140366, "grad_norm": 941.9556884765625, "learning_rate": 3.804906829997053e-05, "loss": 71.0839, "step": 98170 }, { "epoch": 0.39665962337940425, "grad_norm": 764.8499755859375, "learning_rate": 3.804609075176049e-05, "loss": 61.4329, "step": 98180 }, { "epoch": 0.3967000246447719, "grad_norm": 804.7611694335938, "learning_rate": 3.8043112949210194e-05, "loss": 50.3516, "step": 98190 }, { "epoch": 0.3967404259101395, "grad_norm": 757.18798828125, "learning_rate": 3.80401348923777e-05, "loss": 65.6766, "step": 98200 }, { "epoch": 0.3967808271755071, "grad_norm": 802.1757202148438, "learning_rate": 3.803715658132105e-05, "loss": 51.4916, "step": 98210 }, { "epoch": 0.39682122844087475, "grad_norm": 677.951904296875, "learning_rate": 3.803417801609833e-05, "loss": 40.4412, "step": 98220 }, { "epoch": 0.3968616297062424, "grad_norm": 960.4522094726562, "learning_rate": 3.803119919676761e-05, "loss": 53.576, "step": 98230 }, { "epoch": 0.39690203097161003, "grad_norm": 1985.3116455078125, "learning_rate": 3.802822012338694e-05, "loss": 62.2902, "step": 98240 }, { "epoch": 0.39694243223697767, "grad_norm": 1060.7294921875, "learning_rate": 3.802524079601442e-05, "loss": 59.7074, "step": 98250 }, { "epoch": 0.3969828335023453, "grad_norm": 926.2281494140625, "learning_rate": 3.802226121470811e-05, "loss": 67.3111, "step": 98260 }, { "epoch": 0.39702323476771295, "grad_norm": 558.5460205078125, "learning_rate": 3.8019281379526114e-05, "loss": 52.2698, "step": 98270 }, { "epoch": 0.39706363603308054, "grad_norm": 1760.0323486328125, "learning_rate": 3.8016301290526534e-05, "loss": 63.7224, "step": 98280 }, { "epoch": 0.3971040372984482, "grad_norm": 717.9991455078125, "learning_rate": 3.8013320947767464e-05, "loss": 61.1942, "step": 98290 }, { "epoch": 0.3971444385638158, "grad_norm": 302.7607421875, "learning_rate": 3.8010340351306997e-05, "loss": 52.8783, "step": 98300 }, { "epoch": 0.39718483982918346, "grad_norm": 603.1386108398438, "learning_rate": 3.800735950120324e-05, "loss": 61.837, "step": 98310 }, { "epoch": 0.3972252410945511, "grad_norm": 714.36474609375, "learning_rate": 3.8004378397514315e-05, "loss": 57.4805, "step": 98320 }, { "epoch": 0.39726564235991874, "grad_norm": 578.4430541992188, "learning_rate": 3.800139704029835e-05, "loss": 44.0688, "step": 98330 }, { "epoch": 0.3973060436252863, "grad_norm": 556.3411254882812, "learning_rate": 3.7998415429613444e-05, "loss": 58.5663, "step": 98340 }, { "epoch": 0.39734644489065396, "grad_norm": 402.5372314453125, "learning_rate": 3.7995433565517735e-05, "loss": 47.3463, "step": 98350 }, { "epoch": 0.3973868461560216, "grad_norm": 652.9655151367188, "learning_rate": 3.799245144806937e-05, "loss": 77.4669, "step": 98360 }, { "epoch": 0.39742724742138924, "grad_norm": 351.4555358886719, "learning_rate": 3.7989469077326466e-05, "loss": 55.3536, "step": 98370 }, { "epoch": 0.3974676486867569, "grad_norm": 530.9819946289062, "learning_rate": 3.798648645334718e-05, "loss": 48.7565, "step": 98380 }, { "epoch": 0.3975080499521245, "grad_norm": 997.8034057617188, "learning_rate": 3.798350357618965e-05, "loss": 69.4074, "step": 98390 }, { "epoch": 0.3975484512174921, "grad_norm": 1051.143310546875, "learning_rate": 3.798052044591204e-05, "loss": 68.8657, "step": 98400 }, { "epoch": 0.39758885248285974, "grad_norm": 763.1336059570312, "learning_rate": 3.79775370625725e-05, "loss": 65.6765, "step": 98410 }, { "epoch": 0.3976292537482274, "grad_norm": 727.5631713867188, "learning_rate": 3.797455342622919e-05, "loss": 55.0416, "step": 98420 }, { "epoch": 0.397669655013595, "grad_norm": 747.4799194335938, "learning_rate": 3.797156953694028e-05, "loss": 62.9093, "step": 98430 }, { "epoch": 0.39771005627896266, "grad_norm": 568.0397338867188, "learning_rate": 3.796858539476394e-05, "loss": 57.0158, "step": 98440 }, { "epoch": 0.3977504575443303, "grad_norm": 1187.3994140625, "learning_rate": 3.7965600999758356e-05, "loss": 75.5707, "step": 98450 }, { "epoch": 0.39779085880969794, "grad_norm": 1377.0982666015625, "learning_rate": 3.796261635198171e-05, "loss": 110.1248, "step": 98460 }, { "epoch": 0.39783126007506553, "grad_norm": 541.7080078125, "learning_rate": 3.7959631451492176e-05, "loss": 113.9989, "step": 98470 }, { "epoch": 0.39787166134043317, "grad_norm": 1125.3931884765625, "learning_rate": 3.7956646298347956e-05, "loss": 53.4357, "step": 98480 }, { "epoch": 0.3979120626058008, "grad_norm": 1457.5439453125, "learning_rate": 3.795366089260725e-05, "loss": 47.1348, "step": 98490 }, { "epoch": 0.39795246387116845, "grad_norm": 639.0778198242188, "learning_rate": 3.795067523432826e-05, "loss": 53.8182, "step": 98500 }, { "epoch": 0.3979928651365361, "grad_norm": 473.65032958984375, "learning_rate": 3.794768932356918e-05, "loss": 63.5976, "step": 98510 }, { "epoch": 0.39803326640190373, "grad_norm": 1148.6119384765625, "learning_rate": 3.7944703160388234e-05, "loss": 43.6071, "step": 98520 }, { "epoch": 0.3980736676672713, "grad_norm": 2093.45849609375, "learning_rate": 3.794171674484363e-05, "loss": 60.3479, "step": 98530 }, { "epoch": 0.39811406893263895, "grad_norm": 1044.666748046875, "learning_rate": 3.793873007699361e-05, "loss": 88.364, "step": 98540 }, { "epoch": 0.3981544701980066, "grad_norm": 597.9827270507812, "learning_rate": 3.7935743156896375e-05, "loss": 54.5251, "step": 98550 }, { "epoch": 0.39819487146337423, "grad_norm": 353.5838928222656, "learning_rate": 3.793275598461017e-05, "loss": 66.4365, "step": 98560 }, { "epoch": 0.3982352727287419, "grad_norm": 399.634521484375, "learning_rate": 3.792976856019323e-05, "loss": 31.9752, "step": 98570 }, { "epoch": 0.3982756739941095, "grad_norm": 915.1165771484375, "learning_rate": 3.792678088370379e-05, "loss": 70.9123, "step": 98580 }, { "epoch": 0.39831607525947715, "grad_norm": 863.623046875, "learning_rate": 3.792379295520011e-05, "loss": 69.3178, "step": 98590 }, { "epoch": 0.39835647652484474, "grad_norm": 773.9990844726562, "learning_rate": 3.792080477474043e-05, "loss": 75.029, "step": 98600 }, { "epoch": 0.3983968777902124, "grad_norm": 889.9473876953125, "learning_rate": 3.7917816342383005e-05, "loss": 80.714, "step": 98610 }, { "epoch": 0.39843727905558, "grad_norm": 551.6591186523438, "learning_rate": 3.7914827658186103e-05, "loss": 59.7947, "step": 98620 }, { "epoch": 0.39847768032094766, "grad_norm": 457.0266418457031, "learning_rate": 3.791183872220798e-05, "loss": 64.1721, "step": 98630 }, { "epoch": 0.3985180815863153, "grad_norm": 778.4760131835938, "learning_rate": 3.790884953450692e-05, "loss": 53.5698, "step": 98640 }, { "epoch": 0.39855848285168294, "grad_norm": 628.935791015625, "learning_rate": 3.790586009514119e-05, "loss": 45.5346, "step": 98650 }, { "epoch": 0.3985988841170505, "grad_norm": 390.69134521484375, "learning_rate": 3.790287040416908e-05, "loss": 80.0498, "step": 98660 }, { "epoch": 0.39863928538241816, "grad_norm": 585.2010498046875, "learning_rate": 3.7899880461648865e-05, "loss": 45.9213, "step": 98670 }, { "epoch": 0.3986796866477858, "grad_norm": 537.3295288085938, "learning_rate": 3.789689026763883e-05, "loss": 52.588, "step": 98680 }, { "epoch": 0.39872008791315344, "grad_norm": 1386.0994873046875, "learning_rate": 3.789389982219729e-05, "loss": 69.4656, "step": 98690 }, { "epoch": 0.3987604891785211, "grad_norm": 471.1151123046875, "learning_rate": 3.789090912538253e-05, "loss": 52.704, "step": 98700 }, { "epoch": 0.3988008904438887, "grad_norm": 407.30584716796875, "learning_rate": 3.7887918177252855e-05, "loss": 86.2156, "step": 98710 }, { "epoch": 0.3988412917092563, "grad_norm": 563.2343139648438, "learning_rate": 3.788492697786658e-05, "loss": 79.5212, "step": 98720 }, { "epoch": 0.39888169297462395, "grad_norm": 3062.3173828125, "learning_rate": 3.788193552728204e-05, "loss": 67.4379, "step": 98730 }, { "epoch": 0.3989220942399916, "grad_norm": 581.1060791015625, "learning_rate": 3.7878943825557516e-05, "loss": 53.4574, "step": 98740 }, { "epoch": 0.3989624955053592, "grad_norm": 495.8964538574219, "learning_rate": 3.787595187275136e-05, "loss": 58.9977, "step": 98750 }, { "epoch": 0.39900289677072687, "grad_norm": 576.1397705078125, "learning_rate": 3.7872959668921884e-05, "loss": 87.9499, "step": 98760 }, { "epoch": 0.3990432980360945, "grad_norm": 429.25439453125, "learning_rate": 3.786996721412745e-05, "loss": 42.8542, "step": 98770 }, { "epoch": 0.39908369930146215, "grad_norm": 403.6635437011719, "learning_rate": 3.7866974508426354e-05, "loss": 43.5901, "step": 98780 }, { "epoch": 0.39912410056682973, "grad_norm": 498.6553039550781, "learning_rate": 3.786398155187698e-05, "loss": 81.6215, "step": 98790 }, { "epoch": 0.39916450183219737, "grad_norm": 779.7023315429688, "learning_rate": 3.786098834453766e-05, "loss": 47.5752, "step": 98800 }, { "epoch": 0.399204903097565, "grad_norm": 615.2477416992188, "learning_rate": 3.7857994886466755e-05, "loss": 53.3112, "step": 98810 }, { "epoch": 0.39924530436293265, "grad_norm": 2094.180908203125, "learning_rate": 3.7855001177722615e-05, "loss": 62.0935, "step": 98820 }, { "epoch": 0.3992857056283003, "grad_norm": 477.36724853515625, "learning_rate": 3.785200721836361e-05, "loss": 75.4263, "step": 98830 }, { "epoch": 0.39932610689366793, "grad_norm": 2158.408447265625, "learning_rate": 3.7849013008448115e-05, "loss": 99.4542, "step": 98840 }, { "epoch": 0.3993665081590355, "grad_norm": 1072.518310546875, "learning_rate": 3.784601854803449e-05, "loss": 77.213, "step": 98850 }, { "epoch": 0.39940690942440316, "grad_norm": 796.7136840820312, "learning_rate": 3.784302383718113e-05, "loss": 62.9405, "step": 98860 }, { "epoch": 0.3994473106897708, "grad_norm": 716.7407836914062, "learning_rate": 3.784002887594639e-05, "loss": 66.7786, "step": 98870 }, { "epoch": 0.39948771195513844, "grad_norm": 526.4265747070312, "learning_rate": 3.783703366438868e-05, "loss": 47.3223, "step": 98880 }, { "epoch": 0.3995281132205061, "grad_norm": 420.97637939453125, "learning_rate": 3.783403820256639e-05, "loss": 43.0085, "step": 98890 }, { "epoch": 0.3995685144858737, "grad_norm": 931.0668334960938, "learning_rate": 3.783104249053793e-05, "loss": 84.5414, "step": 98900 }, { "epoch": 0.39960891575124136, "grad_norm": 729.2132568359375, "learning_rate": 3.782804652836168e-05, "loss": 59.9172, "step": 98910 }, { "epoch": 0.39964931701660894, "grad_norm": 518.9829711914062, "learning_rate": 3.782505031609607e-05, "loss": 64.4889, "step": 98920 }, { "epoch": 0.3996897182819766, "grad_norm": 756.1053466796875, "learning_rate": 3.782205385379948e-05, "loss": 73.3509, "step": 98930 }, { "epoch": 0.3997301195473442, "grad_norm": 1349.8555908203125, "learning_rate": 3.781905714153037e-05, "loss": 70.6158, "step": 98940 }, { "epoch": 0.39977052081271186, "grad_norm": 573.3903198242188, "learning_rate": 3.781606017934713e-05, "loss": 48.6632, "step": 98950 }, { "epoch": 0.3998109220780795, "grad_norm": 510.5514221191406, "learning_rate": 3.78130629673082e-05, "loss": 65.3422, "step": 98960 }, { "epoch": 0.39985132334344714, "grad_norm": 676.6386108398438, "learning_rate": 3.781006550547202e-05, "loss": 49.1923, "step": 98970 }, { "epoch": 0.3998917246088147, "grad_norm": 1945.87353515625, "learning_rate": 3.780706779389701e-05, "loss": 109.869, "step": 98980 }, { "epoch": 0.39993212587418236, "grad_norm": 607.2948608398438, "learning_rate": 3.7804069832641615e-05, "loss": 59.3958, "step": 98990 }, { "epoch": 0.39997252713955, "grad_norm": 3488.2783203125, "learning_rate": 3.780107162176429e-05, "loss": 71.8975, "step": 99000 }, { "epoch": 0.40001292840491764, "grad_norm": 869.2059326171875, "learning_rate": 3.779807316132349e-05, "loss": 71.5209, "step": 99010 }, { "epoch": 0.4000533296702853, "grad_norm": 1111.3924560546875, "learning_rate": 3.779507445137766e-05, "loss": 72.5968, "step": 99020 }, { "epoch": 0.4000937309356529, "grad_norm": 590.8203125, "learning_rate": 3.779207549198527e-05, "loss": 68.4518, "step": 99030 }, { "epoch": 0.4001341322010205, "grad_norm": 1010.8024291992188, "learning_rate": 3.778907628320477e-05, "loss": 55.1932, "step": 99040 }, { "epoch": 0.40017453346638815, "grad_norm": 1730.03271484375, "learning_rate": 3.778607682509465e-05, "loss": 72.5133, "step": 99050 }, { "epoch": 0.4002149347317558, "grad_norm": 706.1375122070312, "learning_rate": 3.7783077117713386e-05, "loss": 51.3278, "step": 99060 }, { "epoch": 0.40025533599712343, "grad_norm": 491.18145751953125, "learning_rate": 3.778007716111945e-05, "loss": 57.2659, "step": 99070 }, { "epoch": 0.40029573726249107, "grad_norm": 967.4299926757812, "learning_rate": 3.777707695537133e-05, "loss": 54.0169, "step": 99080 }, { "epoch": 0.4003361385278587, "grad_norm": 586.3837280273438, "learning_rate": 3.777407650052751e-05, "loss": 72.3556, "step": 99090 }, { "epoch": 0.40037653979322635, "grad_norm": 618.3201904296875, "learning_rate": 3.77710757966465e-05, "loss": 57.029, "step": 99100 }, { "epoch": 0.40041694105859393, "grad_norm": 753.272705078125, "learning_rate": 3.7768074843786796e-05, "loss": 61.8438, "step": 99110 }, { "epoch": 0.4004573423239616, "grad_norm": 249.07408142089844, "learning_rate": 3.776507364200689e-05, "loss": 66.9332, "step": 99120 }, { "epoch": 0.4004977435893292, "grad_norm": 371.7924499511719, "learning_rate": 3.77620721913653e-05, "loss": 57.5363, "step": 99130 }, { "epoch": 0.40053814485469685, "grad_norm": 1456.956787109375, "learning_rate": 3.7759070491920544e-05, "loss": 66.1014, "step": 99140 }, { "epoch": 0.4005785461200645, "grad_norm": 700.562744140625, "learning_rate": 3.775606854373115e-05, "loss": 60.8896, "step": 99150 }, { "epoch": 0.40061894738543213, "grad_norm": 712.740966796875, "learning_rate": 3.775306634685562e-05, "loss": 57.9598, "step": 99160 }, { "epoch": 0.4006593486507997, "grad_norm": 2491.177490234375, "learning_rate": 3.7750063901352494e-05, "loss": 66.2804, "step": 99170 }, { "epoch": 0.40069974991616736, "grad_norm": 930.8803100585938, "learning_rate": 3.774706120728032e-05, "loss": 61.9584, "step": 99180 }, { "epoch": 0.400740151181535, "grad_norm": 488.9775085449219, "learning_rate": 3.774405826469762e-05, "loss": 51.8706, "step": 99190 }, { "epoch": 0.40078055244690264, "grad_norm": 1185.0855712890625, "learning_rate": 3.7741055073662946e-05, "loss": 59.6434, "step": 99200 }, { "epoch": 0.4008209537122703, "grad_norm": 1065.01025390625, "learning_rate": 3.773805163423484e-05, "loss": 90.2759, "step": 99210 }, { "epoch": 0.4008613549776379, "grad_norm": 1346.4959716796875, "learning_rate": 3.773504794647187e-05, "loss": 47.388, "step": 99220 }, { "epoch": 0.40090175624300556, "grad_norm": 672.511962890625, "learning_rate": 3.7732044010432564e-05, "loss": 43.0481, "step": 99230 }, { "epoch": 0.40094215750837314, "grad_norm": 655.6102294921875, "learning_rate": 3.772903982617552e-05, "loss": 57.7771, "step": 99240 }, { "epoch": 0.4009825587737408, "grad_norm": 1324.095703125, "learning_rate": 3.7726035393759285e-05, "loss": 57.1472, "step": 99250 }, { "epoch": 0.4010229600391084, "grad_norm": 1157.129150390625, "learning_rate": 3.772303071324244e-05, "loss": 60.1481, "step": 99260 }, { "epoch": 0.40106336130447606, "grad_norm": 784.1769409179688, "learning_rate": 3.772002578468356e-05, "loss": 101.2776, "step": 99270 }, { "epoch": 0.4011037625698437, "grad_norm": 682.7789306640625, "learning_rate": 3.771702060814123e-05, "loss": 86.0453, "step": 99280 }, { "epoch": 0.40114416383521134, "grad_norm": 3423.19921875, "learning_rate": 3.771401518367403e-05, "loss": 74.2687, "step": 99290 }, { "epoch": 0.4011845651005789, "grad_norm": 672.9019775390625, "learning_rate": 3.771100951134057e-05, "loss": 55.2397, "step": 99300 }, { "epoch": 0.40122496636594657, "grad_norm": 642.078857421875, "learning_rate": 3.770800359119943e-05, "loss": 34.1067, "step": 99310 }, { "epoch": 0.4012653676313142, "grad_norm": 986.0284423828125, "learning_rate": 3.770499742330922e-05, "loss": 64.7369, "step": 99320 }, { "epoch": 0.40130576889668185, "grad_norm": 416.7942199707031, "learning_rate": 3.770199100772853e-05, "loss": 63.9787, "step": 99330 }, { "epoch": 0.4013461701620495, "grad_norm": 1163.7266845703125, "learning_rate": 3.7698984344515997e-05, "loss": 55.3137, "step": 99340 }, { "epoch": 0.4013865714274171, "grad_norm": 591.773681640625, "learning_rate": 3.769597743373023e-05, "loss": 54.0791, "step": 99350 }, { "epoch": 0.4014269726927847, "grad_norm": 1694.82373046875, "learning_rate": 3.769297027542985e-05, "loss": 46.7332, "step": 99360 }, { "epoch": 0.40146737395815235, "grad_norm": 717.4148559570312, "learning_rate": 3.768996286967347e-05, "loss": 55.8938, "step": 99370 }, { "epoch": 0.40150777522352, "grad_norm": 591.4462890625, "learning_rate": 3.768695521651973e-05, "loss": 46.4734, "step": 99380 }, { "epoch": 0.40154817648888763, "grad_norm": 1100.54150390625, "learning_rate": 3.7683947316027276e-05, "loss": 63.5928, "step": 99390 }, { "epoch": 0.40158857775425527, "grad_norm": 421.6627197265625, "learning_rate": 3.7680939168254733e-05, "loss": 59.3417, "step": 99400 }, { "epoch": 0.4016289790196229, "grad_norm": 307.5633544921875, "learning_rate": 3.767793077326075e-05, "loss": 94.9799, "step": 99410 }, { "epoch": 0.40166938028499055, "grad_norm": 743.0738525390625, "learning_rate": 3.767492213110397e-05, "loss": 58.2898, "step": 99420 }, { "epoch": 0.40170978155035814, "grad_norm": 724.935791015625, "learning_rate": 3.767191324184308e-05, "loss": 54.7535, "step": 99430 }, { "epoch": 0.4017501828157258, "grad_norm": 1314.9210205078125, "learning_rate": 3.7668904105536706e-05, "loss": 76.4102, "step": 99440 }, { "epoch": 0.4017905840810934, "grad_norm": 920.00537109375, "learning_rate": 3.7665894722243525e-05, "loss": 58.301, "step": 99450 }, { "epoch": 0.40183098534646106, "grad_norm": 1673.596435546875, "learning_rate": 3.76628850920222e-05, "loss": 57.4322, "step": 99460 }, { "epoch": 0.4018713866118287, "grad_norm": 826.5586547851562, "learning_rate": 3.7659875214931426e-05, "loss": 69.376, "step": 99470 }, { "epoch": 0.40191178787719634, "grad_norm": 581.40283203125, "learning_rate": 3.765686509102985e-05, "loss": 36.2128, "step": 99480 }, { "epoch": 0.4019521891425639, "grad_norm": 326.0263977050781, "learning_rate": 3.765385472037618e-05, "loss": 64.3921, "step": 99490 }, { "epoch": 0.40199259040793156, "grad_norm": 729.6510009765625, "learning_rate": 3.765084410302909e-05, "loss": 63.8964, "step": 99500 }, { "epoch": 0.4020329916732992, "grad_norm": 920.1556396484375, "learning_rate": 3.76478332390473e-05, "loss": 41.994, "step": 99510 }, { "epoch": 0.40207339293866684, "grad_norm": 888.444091796875, "learning_rate": 3.764482212848948e-05, "loss": 70.7583, "step": 99520 }, { "epoch": 0.4021137942040345, "grad_norm": 909.1885375976562, "learning_rate": 3.7641810771414335e-05, "loss": 67.3313, "step": 99530 }, { "epoch": 0.4021541954694021, "grad_norm": 1571.4930419921875, "learning_rate": 3.763879916788059e-05, "loss": 66.0156, "step": 99540 }, { "epoch": 0.40219459673476976, "grad_norm": 724.2689208984375, "learning_rate": 3.763578731794695e-05, "loss": 66.495, "step": 99550 }, { "epoch": 0.40223499800013734, "grad_norm": 734.593505859375, "learning_rate": 3.7632775221672115e-05, "loss": 41.2429, "step": 99560 }, { "epoch": 0.402275399265505, "grad_norm": 1102.271728515625, "learning_rate": 3.7629762879114835e-05, "loss": 71.2054, "step": 99570 }, { "epoch": 0.4023158005308726, "grad_norm": 1053.3270263671875, "learning_rate": 3.7626750290333824e-05, "loss": 49.4548, "step": 99580 }, { "epoch": 0.40235620179624026, "grad_norm": 774.8560180664062, "learning_rate": 3.7623737455387814e-05, "loss": 53.1317, "step": 99590 }, { "epoch": 0.4023966030616079, "grad_norm": 792.8966064453125, "learning_rate": 3.762072437433555e-05, "loss": 63.3754, "step": 99600 }, { "epoch": 0.40243700432697554, "grad_norm": 648.0517578125, "learning_rate": 3.761771104723576e-05, "loss": 41.7011, "step": 99610 }, { "epoch": 0.40247740559234313, "grad_norm": 816.7391967773438, "learning_rate": 3.76146974741472e-05, "loss": 55.6581, "step": 99620 }, { "epoch": 0.40251780685771077, "grad_norm": 2065.635498046875, "learning_rate": 3.761168365512862e-05, "loss": 76.0005, "step": 99630 }, { "epoch": 0.4025582081230784, "grad_norm": 1137.087890625, "learning_rate": 3.760866959023877e-05, "loss": 58.8005, "step": 99640 }, { "epoch": 0.40259860938844605, "grad_norm": 1612.660400390625, "learning_rate": 3.760565527953641e-05, "loss": 52.3195, "step": 99650 }, { "epoch": 0.4026390106538137, "grad_norm": 1130.4898681640625, "learning_rate": 3.7602640723080315e-05, "loss": 58.5023, "step": 99660 }, { "epoch": 0.40267941191918133, "grad_norm": 1282.1605224609375, "learning_rate": 3.7599625920929254e-05, "loss": 60.2157, "step": 99670 }, { "epoch": 0.4027198131845489, "grad_norm": 626.7401733398438, "learning_rate": 3.759661087314199e-05, "loss": 61.224, "step": 99680 }, { "epoch": 0.40276021444991655, "grad_norm": 403.7524108886719, "learning_rate": 3.759359557977732e-05, "loss": 43.2286, "step": 99690 }, { "epoch": 0.4028006157152842, "grad_norm": 459.4110107421875, "learning_rate": 3.759058004089402e-05, "loss": 63.8654, "step": 99700 }, { "epoch": 0.40284101698065183, "grad_norm": 689.9656982421875, "learning_rate": 3.758756425655089e-05, "loss": 55.9281, "step": 99710 }, { "epoch": 0.4028814182460195, "grad_norm": 493.5693664550781, "learning_rate": 3.7584548226806696e-05, "loss": 46.4668, "step": 99720 }, { "epoch": 0.4029218195113871, "grad_norm": 568.452880859375, "learning_rate": 3.758153195172026e-05, "loss": 85.0147, "step": 99730 }, { "epoch": 0.40296222077675475, "grad_norm": 775.391845703125, "learning_rate": 3.7578515431350384e-05, "loss": 62.4054, "step": 99740 }, { "epoch": 0.40300262204212234, "grad_norm": 1037.2498779296875, "learning_rate": 3.757549866575588e-05, "loss": 58.9337, "step": 99750 }, { "epoch": 0.40304302330749, "grad_norm": 487.4396667480469, "learning_rate": 3.757248165499555e-05, "loss": 78.7348, "step": 99760 }, { "epoch": 0.4030834245728576, "grad_norm": 1064.0037841796875, "learning_rate": 3.7569464399128215e-05, "loss": 43.4251, "step": 99770 }, { "epoch": 0.40312382583822526, "grad_norm": 417.7305603027344, "learning_rate": 3.75664468982127e-05, "loss": 43.3443, "step": 99780 }, { "epoch": 0.4031642271035929, "grad_norm": 1124.982177734375, "learning_rate": 3.756342915230784e-05, "loss": 72.6339, "step": 99790 }, { "epoch": 0.40320462836896054, "grad_norm": 1895.08935546875, "learning_rate": 3.7560411161472456e-05, "loss": 87.87, "step": 99800 }, { "epoch": 0.4032450296343281, "grad_norm": 885.612060546875, "learning_rate": 3.755739292576539e-05, "loss": 68.6611, "step": 99810 }, { "epoch": 0.40328543089969576, "grad_norm": 1059.715576171875, "learning_rate": 3.7554374445245474e-05, "loss": 50.6557, "step": 99820 }, { "epoch": 0.4033258321650634, "grad_norm": 1289.69677734375, "learning_rate": 3.755135571997158e-05, "loss": 55.3921, "step": 99830 }, { "epoch": 0.40336623343043104, "grad_norm": 948.4808959960938, "learning_rate": 3.7548336750002544e-05, "loss": 52.4533, "step": 99840 }, { "epoch": 0.4034066346957987, "grad_norm": 768.9962768554688, "learning_rate": 3.7545317535397214e-05, "loss": 52.6331, "step": 99850 }, { "epoch": 0.4034470359611663, "grad_norm": 806.0327758789062, "learning_rate": 3.754229807621446e-05, "loss": 58.2938, "step": 99860 }, { "epoch": 0.40348743722653396, "grad_norm": 631.0634155273438, "learning_rate": 3.753927837251315e-05, "loss": 68.7876, "step": 99870 }, { "epoch": 0.40352783849190155, "grad_norm": 574.1565551757812, "learning_rate": 3.753625842435216e-05, "loss": 45.7764, "step": 99880 }, { "epoch": 0.4035682397572692, "grad_norm": 1697.2900390625, "learning_rate": 3.753323823179035e-05, "loss": 65.6436, "step": 99890 }, { "epoch": 0.4036086410226368, "grad_norm": 260.11083984375, "learning_rate": 3.7530217794886606e-05, "loss": 71.5105, "step": 99900 }, { "epoch": 0.40364904228800447, "grad_norm": 932.4937133789062, "learning_rate": 3.752719711369982e-05, "loss": 61.679, "step": 99910 }, { "epoch": 0.4036894435533721, "grad_norm": 676.4361572265625, "learning_rate": 3.752417618828888e-05, "loss": 90.9519, "step": 99920 }, { "epoch": 0.40372984481873975, "grad_norm": 6873.72802734375, "learning_rate": 3.752115501871267e-05, "loss": 99.3489, "step": 99930 }, { "epoch": 0.40377024608410733, "grad_norm": 0.0, "learning_rate": 3.75181336050301e-05, "loss": 58.3508, "step": 99940 }, { "epoch": 0.40381064734947497, "grad_norm": 619.0031127929688, "learning_rate": 3.751511194730007e-05, "loss": 58.6171, "step": 99950 }, { "epoch": 0.4038510486148426, "grad_norm": 963.3147583007812, "learning_rate": 3.751209004558149e-05, "loss": 60.5256, "step": 99960 }, { "epoch": 0.40389144988021025, "grad_norm": 720.6400756835938, "learning_rate": 3.750906789993327e-05, "loss": 59.5601, "step": 99970 }, { "epoch": 0.4039318511455779, "grad_norm": 1478.5609130859375, "learning_rate": 3.7506045510414335e-05, "loss": 46.86, "step": 99980 }, { "epoch": 0.40397225241094553, "grad_norm": 427.1527099609375, "learning_rate": 3.7503022877083606e-05, "loss": 57.4738, "step": 99990 }, { "epoch": 0.4040126536763131, "grad_norm": 967.2838134765625, "learning_rate": 3.7500000000000003e-05, "loss": 77.0952, "step": 100000 }, { "epoch": 0.40405305494168076, "grad_norm": 896.5092163085938, "learning_rate": 3.749697687922247e-05, "loss": 49.8017, "step": 100010 }, { "epoch": 0.4040934562070484, "grad_norm": 663.4006958007812, "learning_rate": 3.749395351480993e-05, "loss": 45.9336, "step": 100020 }, { "epoch": 0.40413385747241604, "grad_norm": 471.2986145019531, "learning_rate": 3.749092990682134e-05, "loss": 62.4995, "step": 100030 }, { "epoch": 0.4041742587377837, "grad_norm": 956.7597045898438, "learning_rate": 3.748790605531565e-05, "loss": 62.3297, "step": 100040 }, { "epoch": 0.4042146600031513, "grad_norm": 2078.440185546875, "learning_rate": 3.748488196035179e-05, "loss": 129.3817, "step": 100050 }, { "epoch": 0.40425506126851896, "grad_norm": 4852.7265625, "learning_rate": 3.748185762198873e-05, "loss": 86.8261, "step": 100060 }, { "epoch": 0.40429546253388654, "grad_norm": 1042.48828125, "learning_rate": 3.747883304028543e-05, "loss": 51.4865, "step": 100070 }, { "epoch": 0.4043358637992542, "grad_norm": 758.50048828125, "learning_rate": 3.7475808215300854e-05, "loss": 47.7026, "step": 100080 }, { "epoch": 0.4043762650646218, "grad_norm": 769.8150024414062, "learning_rate": 3.7472783147093985e-05, "loss": 85.2325, "step": 100090 }, { "epoch": 0.40441666632998946, "grad_norm": 777.2942504882812, "learning_rate": 3.746975783572377e-05, "loss": 67.7482, "step": 100100 }, { "epoch": 0.4044570675953571, "grad_norm": 605.9794921875, "learning_rate": 3.746673228124922e-05, "loss": 53.6268, "step": 100110 }, { "epoch": 0.40449746886072474, "grad_norm": 1080.2708740234375, "learning_rate": 3.7463706483729296e-05, "loss": 69.3192, "step": 100120 }, { "epoch": 0.4045378701260923, "grad_norm": 725.3931884765625, "learning_rate": 3.7460680443223004e-05, "loss": 42.6117, "step": 100130 }, { "epoch": 0.40457827139145996, "grad_norm": 629.57470703125, "learning_rate": 3.745765415978933e-05, "loss": 69.1875, "step": 100140 }, { "epoch": 0.4046186726568276, "grad_norm": 788.4440307617188, "learning_rate": 3.7454627633487274e-05, "loss": 86.3007, "step": 100150 }, { "epoch": 0.40465907392219524, "grad_norm": 879.8585815429688, "learning_rate": 3.7451600864375844e-05, "loss": 81.9851, "step": 100160 }, { "epoch": 0.4046994751875629, "grad_norm": 578.5433349609375, "learning_rate": 3.7448573852514035e-05, "loss": 56.7397, "step": 100170 }, { "epoch": 0.4047398764529305, "grad_norm": 731.2473754882812, "learning_rate": 3.744554659796088e-05, "loss": 54.396, "step": 100180 }, { "epoch": 0.40478027771829816, "grad_norm": 718.974609375, "learning_rate": 3.744251910077538e-05, "loss": 47.8276, "step": 100190 }, { "epoch": 0.40482067898366575, "grad_norm": 714.3526000976562, "learning_rate": 3.7439491361016564e-05, "loss": 68.6014, "step": 100200 }, { "epoch": 0.4048610802490334, "grad_norm": 681.6476440429688, "learning_rate": 3.743646337874346e-05, "loss": 68.5919, "step": 100210 }, { "epoch": 0.40490148151440103, "grad_norm": 709.8939819335938, "learning_rate": 3.743343515401511e-05, "loss": 60.53, "step": 100220 }, { "epoch": 0.40494188277976867, "grad_norm": 730.64892578125, "learning_rate": 3.743040668689053e-05, "loss": 91.2893, "step": 100230 }, { "epoch": 0.4049822840451363, "grad_norm": 1242.37451171875, "learning_rate": 3.742737797742878e-05, "loss": 50.5919, "step": 100240 }, { "epoch": 0.40502268531050395, "grad_norm": 1279.793701171875, "learning_rate": 3.742434902568889e-05, "loss": 47.2868, "step": 100250 }, { "epoch": 0.40506308657587153, "grad_norm": 446.11175537109375, "learning_rate": 3.742131983172992e-05, "loss": 43.8764, "step": 100260 }, { "epoch": 0.4051034878412392, "grad_norm": 478.0585021972656, "learning_rate": 3.741829039561092e-05, "loss": 67.3198, "step": 100270 }, { "epoch": 0.4051438891066068, "grad_norm": 1781.1514892578125, "learning_rate": 3.741526071739097e-05, "loss": 55.5198, "step": 100280 }, { "epoch": 0.40518429037197445, "grad_norm": 601.347412109375, "learning_rate": 3.741223079712911e-05, "loss": 50.489, "step": 100290 }, { "epoch": 0.4052246916373421, "grad_norm": 631.7415161132812, "learning_rate": 3.7409200634884426e-05, "loss": 52.5421, "step": 100300 }, { "epoch": 0.40526509290270973, "grad_norm": 431.8924255371094, "learning_rate": 3.740617023071598e-05, "loss": 59.2278, "step": 100310 }, { "epoch": 0.4053054941680773, "grad_norm": 1287.239501953125, "learning_rate": 3.740313958468287e-05, "loss": 63.1438, "step": 100320 }, { "epoch": 0.40534589543344496, "grad_norm": 769.814208984375, "learning_rate": 3.7400108696844156e-05, "loss": 56.3332, "step": 100330 }, { "epoch": 0.4053862966988126, "grad_norm": 372.637451171875, "learning_rate": 3.739707756725894e-05, "loss": 61.8536, "step": 100340 }, { "epoch": 0.40542669796418024, "grad_norm": 537.6292114257812, "learning_rate": 3.739404619598632e-05, "loss": 75.01, "step": 100350 }, { "epoch": 0.4054670992295479, "grad_norm": 1282.1539306640625, "learning_rate": 3.7391014583085385e-05, "loss": 42.7495, "step": 100360 }, { "epoch": 0.4055075004949155, "grad_norm": 1255.515869140625, "learning_rate": 3.738798272861525e-05, "loss": 79.5692, "step": 100370 }, { "epoch": 0.40554790176028316, "grad_norm": 1217.02294921875, "learning_rate": 3.7384950632634995e-05, "loss": 55.6681, "step": 100380 }, { "epoch": 0.40558830302565074, "grad_norm": 480.9776306152344, "learning_rate": 3.7381918295203774e-05, "loss": 43.6625, "step": 100390 }, { "epoch": 0.4056287042910184, "grad_norm": 861.3986206054688, "learning_rate": 3.7378885716380664e-05, "loss": 48.5241, "step": 100400 }, { "epoch": 0.405669105556386, "grad_norm": 490.213134765625, "learning_rate": 3.737585289622482e-05, "loss": 60.6238, "step": 100410 }, { "epoch": 0.40570950682175366, "grad_norm": 0.0, "learning_rate": 3.7372819834795335e-05, "loss": 82.8279, "step": 100420 }, { "epoch": 0.4057499080871213, "grad_norm": 3635.81689453125, "learning_rate": 3.736978653215136e-05, "loss": 79.4986, "step": 100430 }, { "epoch": 0.40579030935248894, "grad_norm": 803.3690795898438, "learning_rate": 3.736675298835203e-05, "loss": 84.2047, "step": 100440 }, { "epoch": 0.4058307106178565, "grad_norm": 1111.7283935546875, "learning_rate": 3.7363719203456495e-05, "loss": 51.9508, "step": 100450 }, { "epoch": 0.40587111188322417, "grad_norm": 472.92242431640625, "learning_rate": 3.736068517752388e-05, "loss": 47.1429, "step": 100460 }, { "epoch": 0.4059115131485918, "grad_norm": 638.3177490234375, "learning_rate": 3.735765091061334e-05, "loss": 74.3898, "step": 100470 }, { "epoch": 0.40595191441395945, "grad_norm": 877.7601318359375, "learning_rate": 3.7354616402784035e-05, "loss": 63.0587, "step": 100480 }, { "epoch": 0.4059923156793271, "grad_norm": 625.0929565429688, "learning_rate": 3.735158165409514e-05, "loss": 38.2794, "step": 100490 }, { "epoch": 0.4060327169446947, "grad_norm": 756.3575439453125, "learning_rate": 3.7348546664605777e-05, "loss": 64.2025, "step": 100500 }, { "epoch": 0.4060731182100623, "grad_norm": 1217.9346923828125, "learning_rate": 3.7345511434375145e-05, "loss": 68.1389, "step": 100510 }, { "epoch": 0.40611351947542995, "grad_norm": 873.300048828125, "learning_rate": 3.734247596346242e-05, "loss": 66.9642, "step": 100520 }, { "epoch": 0.4061539207407976, "grad_norm": 959.7666015625, "learning_rate": 3.733944025192677e-05, "loss": 66.9598, "step": 100530 }, { "epoch": 0.40619432200616523, "grad_norm": 789.1475830078125, "learning_rate": 3.733640429982738e-05, "loss": 58.7012, "step": 100540 }, { "epoch": 0.40623472327153287, "grad_norm": 480.3835754394531, "learning_rate": 3.7333368107223424e-05, "loss": 43.2679, "step": 100550 }, { "epoch": 0.4062751245369005, "grad_norm": 759.8263549804688, "learning_rate": 3.7330331674174125e-05, "loss": 66.052, "step": 100560 }, { "epoch": 0.40631552580226815, "grad_norm": 859.6227416992188, "learning_rate": 3.732729500073866e-05, "loss": 56.3151, "step": 100570 }, { "epoch": 0.40635592706763574, "grad_norm": 405.04998779296875, "learning_rate": 3.732425808697622e-05, "loss": 54.2167, "step": 100580 }, { "epoch": 0.4063963283330034, "grad_norm": 821.4298706054688, "learning_rate": 3.732122093294603e-05, "loss": 67.599, "step": 100590 }, { "epoch": 0.406436729598371, "grad_norm": 777.3472290039062, "learning_rate": 3.731818353870729e-05, "loss": 58.1797, "step": 100600 }, { "epoch": 0.40647713086373866, "grad_norm": 1343.93359375, "learning_rate": 3.731514590431922e-05, "loss": 65.9451, "step": 100610 }, { "epoch": 0.4065175321291063, "grad_norm": 800.9368896484375, "learning_rate": 3.731210802984105e-05, "loss": 67.0606, "step": 100620 }, { "epoch": 0.40655793339447394, "grad_norm": 1185.2408447265625, "learning_rate": 3.730906991533199e-05, "loss": 76.7648, "step": 100630 }, { "epoch": 0.4065983346598415, "grad_norm": 545.9066772460938, "learning_rate": 3.7306031560851275e-05, "loss": 42.591, "step": 100640 }, { "epoch": 0.40663873592520916, "grad_norm": 551.5760498046875, "learning_rate": 3.730299296645814e-05, "loss": 81.1927, "step": 100650 }, { "epoch": 0.4066791371905768, "grad_norm": 3141.09326171875, "learning_rate": 3.729995413221183e-05, "loss": 85.903, "step": 100660 }, { "epoch": 0.40671953845594444, "grad_norm": 1117.46728515625, "learning_rate": 3.7296915058171566e-05, "loss": 59.2733, "step": 100670 }, { "epoch": 0.4067599397213121, "grad_norm": 2471.260986328125, "learning_rate": 3.729387574439662e-05, "loss": 68.098, "step": 100680 }, { "epoch": 0.4068003409866797, "grad_norm": 768.341552734375, "learning_rate": 3.729083619094624e-05, "loss": 73.1918, "step": 100690 }, { "epoch": 0.40684074225204736, "grad_norm": 0.0, "learning_rate": 3.7287796397879674e-05, "loss": 28.0081, "step": 100700 }, { "epoch": 0.40688114351741494, "grad_norm": 1264.1614990234375, "learning_rate": 3.72847563652562e-05, "loss": 45.6677, "step": 100710 }, { "epoch": 0.4069215447827826, "grad_norm": 287.6547546386719, "learning_rate": 3.7281716093135063e-05, "loss": 60.4336, "step": 100720 }, { "epoch": 0.4069619460481502, "grad_norm": 565.496826171875, "learning_rate": 3.7278675581575564e-05, "loss": 49.9453, "step": 100730 }, { "epoch": 0.40700234731351786, "grad_norm": 513.9716796875, "learning_rate": 3.7275634830636957e-05, "loss": 48.138, "step": 100740 }, { "epoch": 0.4070427485788855, "grad_norm": 798.354248046875, "learning_rate": 3.727259384037852e-05, "loss": 49.801, "step": 100750 }, { "epoch": 0.40708314984425314, "grad_norm": 901.558837890625, "learning_rate": 3.726955261085956e-05, "loss": 38.1762, "step": 100760 }, { "epoch": 0.40712355110962073, "grad_norm": 808.7131958007812, "learning_rate": 3.726651114213935e-05, "loss": 63.8376, "step": 100770 }, { "epoch": 0.40716395237498837, "grad_norm": 726.5311279296875, "learning_rate": 3.726346943427719e-05, "loss": 51.7929, "step": 100780 }, { "epoch": 0.407204353640356, "grad_norm": 367.88018798828125, "learning_rate": 3.726042748733238e-05, "loss": 85.8663, "step": 100790 }, { "epoch": 0.40724475490572365, "grad_norm": 368.78338623046875, "learning_rate": 3.725738530136422e-05, "loss": 33.2297, "step": 100800 }, { "epoch": 0.4072851561710913, "grad_norm": 942.588623046875, "learning_rate": 3.7254342876432026e-05, "loss": 74.4766, "step": 100810 }, { "epoch": 0.40732555743645893, "grad_norm": 704.6798095703125, "learning_rate": 3.7251300212595106e-05, "loss": 84.4977, "step": 100820 }, { "epoch": 0.4073659587018265, "grad_norm": 411.1459045410156, "learning_rate": 3.724825730991279e-05, "loss": 67.2837, "step": 100830 }, { "epoch": 0.40740635996719415, "grad_norm": 969.4332885742188, "learning_rate": 3.7245214168444386e-05, "loss": 73.3558, "step": 100840 }, { "epoch": 0.4074467612325618, "grad_norm": 857.5194091796875, "learning_rate": 3.724217078824923e-05, "loss": 87.0812, "step": 100850 }, { "epoch": 0.40748716249792943, "grad_norm": 789.8101196289062, "learning_rate": 3.723912716938665e-05, "loss": 62.384, "step": 100860 }, { "epoch": 0.4075275637632971, "grad_norm": 963.7822265625, "learning_rate": 3.723608331191598e-05, "loss": 70.5818, "step": 100870 }, { "epoch": 0.4075679650286647, "grad_norm": 1622.1092529296875, "learning_rate": 3.723303921589657e-05, "loss": 73.844, "step": 100880 }, { "epoch": 0.40760836629403235, "grad_norm": 4206.56787109375, "learning_rate": 3.722999488138776e-05, "loss": 86.4621, "step": 100890 }, { "epoch": 0.40764876755939994, "grad_norm": 780.2285766601562, "learning_rate": 3.722695030844891e-05, "loss": 42.5627, "step": 100900 }, { "epoch": 0.4076891688247676, "grad_norm": 1180.5223388671875, "learning_rate": 3.7223905497139366e-05, "loss": 59.9004, "step": 100910 }, { "epoch": 0.4077295700901352, "grad_norm": 609.8385009765625, "learning_rate": 3.722086044751849e-05, "loss": 39.532, "step": 100920 }, { "epoch": 0.40776997135550286, "grad_norm": 0.0, "learning_rate": 3.721781515964565e-05, "loss": 33.3144, "step": 100930 }, { "epoch": 0.4078103726208705, "grad_norm": 1108.013671875, "learning_rate": 3.721476963358021e-05, "loss": 67.8884, "step": 100940 }, { "epoch": 0.40785077388623814, "grad_norm": 503.3689270019531, "learning_rate": 3.721172386938155e-05, "loss": 63.0006, "step": 100950 }, { "epoch": 0.4078911751516057, "grad_norm": 2834.478515625, "learning_rate": 3.720867786710904e-05, "loss": 74.9134, "step": 100960 }, { "epoch": 0.40793157641697336, "grad_norm": 1194.1221923828125, "learning_rate": 3.7205631626822074e-05, "loss": 47.9969, "step": 100970 }, { "epoch": 0.407971977682341, "grad_norm": 798.5789794921875, "learning_rate": 3.7202585148580036e-05, "loss": 86.6126, "step": 100980 }, { "epoch": 0.40801237894770864, "grad_norm": 505.0960998535156, "learning_rate": 3.7199538432442316e-05, "loss": 59.1577, "step": 100990 }, { "epoch": 0.4080527802130763, "grad_norm": 767.0346069335938, "learning_rate": 3.719649147846832e-05, "loss": 64.2209, "step": 101000 }, { "epoch": 0.4080931814784439, "grad_norm": 577.536865234375, "learning_rate": 3.7193444286717436e-05, "loss": 64.3384, "step": 101010 }, { "epoch": 0.40813358274381156, "grad_norm": 668.321533203125, "learning_rate": 3.719039685724909e-05, "loss": 39.2026, "step": 101020 }, { "epoch": 0.40817398400917915, "grad_norm": 211.69288635253906, "learning_rate": 3.718734919012267e-05, "loss": 51.6079, "step": 101030 }, { "epoch": 0.4082143852745468, "grad_norm": 391.42852783203125, "learning_rate": 3.71843012853976e-05, "loss": 42.6838, "step": 101040 }, { "epoch": 0.4082547865399144, "grad_norm": 441.98419189453125, "learning_rate": 3.718125314313331e-05, "loss": 40.6746, "step": 101050 }, { "epoch": 0.40829518780528207, "grad_norm": 772.721923828125, "learning_rate": 3.7178204763389216e-05, "loss": 68.107, "step": 101060 }, { "epoch": 0.4083355890706497, "grad_norm": 286.0000305175781, "learning_rate": 3.717515614622476e-05, "loss": 79.75, "step": 101070 }, { "epoch": 0.40837599033601735, "grad_norm": 966.0552368164062, "learning_rate": 3.717210729169935e-05, "loss": 72.2583, "step": 101080 }, { "epoch": 0.40841639160138493, "grad_norm": 756.3565673828125, "learning_rate": 3.7169058199872455e-05, "loss": 69.5634, "step": 101090 }, { "epoch": 0.40845679286675257, "grad_norm": 754.9552001953125, "learning_rate": 3.71660088708035e-05, "loss": 58.8931, "step": 101100 }, { "epoch": 0.4084971941321202, "grad_norm": 1555.589599609375, "learning_rate": 3.716295930455194e-05, "loss": 79.8807, "step": 101110 }, { "epoch": 0.40853759539748785, "grad_norm": 1325.0550537109375, "learning_rate": 3.7159909501177226e-05, "loss": 68.7145, "step": 101120 }, { "epoch": 0.4085779966628555, "grad_norm": 620.8428955078125, "learning_rate": 3.715685946073881e-05, "loss": 55.6151, "step": 101130 }, { "epoch": 0.40861839792822313, "grad_norm": 955.8616943359375, "learning_rate": 3.7153809183296176e-05, "loss": 66.1763, "step": 101140 }, { "epoch": 0.4086587991935907, "grad_norm": 599.90234375, "learning_rate": 3.715075866890876e-05, "loss": 65.0991, "step": 101150 }, { "epoch": 0.40869920045895836, "grad_norm": 3034.8486328125, "learning_rate": 3.7147707917636046e-05, "loss": 69.5837, "step": 101160 }, { "epoch": 0.408739601724326, "grad_norm": 0.0, "learning_rate": 3.7144656929537524e-05, "loss": 45.1975, "step": 101170 }, { "epoch": 0.40878000298969364, "grad_norm": 502.8624572753906, "learning_rate": 3.714160570467266e-05, "loss": 73.8093, "step": 101180 }, { "epoch": 0.4088204042550613, "grad_norm": 0.0, "learning_rate": 3.7138554243100934e-05, "loss": 48.2301, "step": 101190 }, { "epoch": 0.4088608055204289, "grad_norm": 648.0908203125, "learning_rate": 3.713550254488185e-05, "loss": 103.6744, "step": 101200 }, { "epoch": 0.40890120678579656, "grad_norm": 1352.32421875, "learning_rate": 3.71324506100749e-05, "loss": 82.1861, "step": 101210 }, { "epoch": 0.40894160805116414, "grad_norm": 1041.673583984375, "learning_rate": 3.712939843873957e-05, "loss": 40.6213, "step": 101220 }, { "epoch": 0.4089820093165318, "grad_norm": 763.6616821289062, "learning_rate": 3.7126346030935374e-05, "loss": 55.786, "step": 101230 }, { "epoch": 0.4090224105818994, "grad_norm": 892.0834350585938, "learning_rate": 3.712329338672182e-05, "loss": 60.7086, "step": 101240 }, { "epoch": 0.40906281184726706, "grad_norm": 788.8603515625, "learning_rate": 3.712024050615843e-05, "loss": 53.5069, "step": 101250 }, { "epoch": 0.4091032131126347, "grad_norm": 1101.8909912109375, "learning_rate": 3.71171873893047e-05, "loss": 47.3406, "step": 101260 }, { "epoch": 0.40914361437800234, "grad_norm": 296.9659729003906, "learning_rate": 3.711413403622017e-05, "loss": 55.5417, "step": 101270 }, { "epoch": 0.4091840156433699, "grad_norm": 653.2747802734375, "learning_rate": 3.711108044696436e-05, "loss": 71.033, "step": 101280 }, { "epoch": 0.40922441690873756, "grad_norm": 671.96533203125, "learning_rate": 3.710802662159679e-05, "loss": 58.3862, "step": 101290 }, { "epoch": 0.4092648181741052, "grad_norm": 519.48095703125, "learning_rate": 3.710497256017702e-05, "loss": 90.0393, "step": 101300 }, { "epoch": 0.40930521943947284, "grad_norm": 837.6402587890625, "learning_rate": 3.7101918262764576e-05, "loss": 60.9863, "step": 101310 }, { "epoch": 0.4093456207048405, "grad_norm": 907.7230224609375, "learning_rate": 3.7098863729419e-05, "loss": 80.4925, "step": 101320 }, { "epoch": 0.4093860219702081, "grad_norm": 2056.220947265625, "learning_rate": 3.709580896019985e-05, "loss": 49.3788, "step": 101330 }, { "epoch": 0.40942642323557576, "grad_norm": 531.6597900390625, "learning_rate": 3.7092753955166674e-05, "loss": 81.7104, "step": 101340 }, { "epoch": 0.40946682450094335, "grad_norm": 714.7631225585938, "learning_rate": 3.708969871437904e-05, "loss": 63.8878, "step": 101350 }, { "epoch": 0.409507225766311, "grad_norm": 1157.5352783203125, "learning_rate": 3.7086643237896504e-05, "loss": 41.3693, "step": 101360 }, { "epoch": 0.40954762703167863, "grad_norm": 531.5262451171875, "learning_rate": 3.708358752577863e-05, "loss": 71.7268, "step": 101370 }, { "epoch": 0.40958802829704627, "grad_norm": 447.32330322265625, "learning_rate": 3.7080531578085e-05, "loss": 66.6252, "step": 101380 }, { "epoch": 0.4096284295624139, "grad_norm": 983.9207153320312, "learning_rate": 3.707747539487519e-05, "loss": 62.158, "step": 101390 }, { "epoch": 0.40966883082778155, "grad_norm": 294.036376953125, "learning_rate": 3.7074418976208766e-05, "loss": 54.1539, "step": 101400 }, { "epoch": 0.40970923209314913, "grad_norm": 2036.715087890625, "learning_rate": 3.707136232214534e-05, "loss": 53.9095, "step": 101410 }, { "epoch": 0.4097496333585168, "grad_norm": 1761.55908203125, "learning_rate": 3.706830543274449e-05, "loss": 57.585, "step": 101420 }, { "epoch": 0.4097900346238844, "grad_norm": 1144.66064453125, "learning_rate": 3.706524830806581e-05, "loss": 80.4368, "step": 101430 }, { "epoch": 0.40983043588925205, "grad_norm": 457.09368896484375, "learning_rate": 3.706219094816891e-05, "loss": 80.4445, "step": 101440 }, { "epoch": 0.4098708371546197, "grad_norm": 451.6939697265625, "learning_rate": 3.705913335311338e-05, "loss": 46.0204, "step": 101450 }, { "epoch": 0.40991123841998733, "grad_norm": 675.3792724609375, "learning_rate": 3.705607552295883e-05, "loss": 29.4593, "step": 101460 }, { "epoch": 0.4099516396853549, "grad_norm": 473.2438659667969, "learning_rate": 3.7053017457764895e-05, "loss": 69.6346, "step": 101470 }, { "epoch": 0.40999204095072256, "grad_norm": 1086.820556640625, "learning_rate": 3.704995915759117e-05, "loss": 52.2497, "step": 101480 }, { "epoch": 0.4100324422160902, "grad_norm": 1838.8997802734375, "learning_rate": 3.704690062249729e-05, "loss": 67.0797, "step": 101490 }, { "epoch": 0.41007284348145784, "grad_norm": 1140.804931640625, "learning_rate": 3.704384185254288e-05, "loss": 59.0365, "step": 101500 }, { "epoch": 0.4101132447468255, "grad_norm": 967.5361938476562, "learning_rate": 3.7040782847787576e-05, "loss": 62.3368, "step": 101510 }, { "epoch": 0.4101536460121931, "grad_norm": 862.10107421875, "learning_rate": 3.7037723608291015e-05, "loss": 66.9464, "step": 101520 }, { "epoch": 0.41019404727756076, "grad_norm": 333.78411865234375, "learning_rate": 3.703466413411282e-05, "loss": 57.9339, "step": 101530 }, { "epoch": 0.41023444854292834, "grad_norm": 1024.932861328125, "learning_rate": 3.703160442531266e-05, "loss": 50.8443, "step": 101540 }, { "epoch": 0.410274849808296, "grad_norm": 1006.3261108398438, "learning_rate": 3.702854448195019e-05, "loss": 66.2163, "step": 101550 }, { "epoch": 0.4103152510736636, "grad_norm": 720.7424926757812, "learning_rate": 3.7025484304085034e-05, "loss": 61.6989, "step": 101560 }, { "epoch": 0.41035565233903126, "grad_norm": 972.9140625, "learning_rate": 3.702242389177687e-05, "loss": 58.963, "step": 101570 }, { "epoch": 0.4103960536043989, "grad_norm": 819.4244384765625, "learning_rate": 3.701936324508537e-05, "loss": 58.8996, "step": 101580 }, { "epoch": 0.41043645486976654, "grad_norm": 1408.4876708984375, "learning_rate": 3.7016302364070196e-05, "loss": 104.3692, "step": 101590 }, { "epoch": 0.4104768561351341, "grad_norm": 1001.0791625976562, "learning_rate": 3.701324124879102e-05, "loss": 76.1169, "step": 101600 }, { "epoch": 0.41051725740050177, "grad_norm": 1362.98193359375, "learning_rate": 3.701017989930752e-05, "loss": 79.0587, "step": 101610 }, { "epoch": 0.4105576586658694, "grad_norm": 494.7317199707031, "learning_rate": 3.7007118315679384e-05, "loss": 41.3256, "step": 101620 }, { "epoch": 0.41059805993123705, "grad_norm": 418.2009582519531, "learning_rate": 3.700405649796629e-05, "loss": 77.2711, "step": 101630 }, { "epoch": 0.4106384611966047, "grad_norm": 934.8997192382812, "learning_rate": 3.700099444622794e-05, "loss": 64.7147, "step": 101640 }, { "epoch": 0.4106788624619723, "grad_norm": 829.1802978515625, "learning_rate": 3.699793216052402e-05, "loss": 57.4641, "step": 101650 }, { "epoch": 0.41071926372733997, "grad_norm": 536.0474853515625, "learning_rate": 3.699486964091423e-05, "loss": 79.0541, "step": 101660 }, { "epoch": 0.41075966499270755, "grad_norm": 808.2246704101562, "learning_rate": 3.699180688745829e-05, "loss": 50.0221, "step": 101670 }, { "epoch": 0.4108000662580752, "grad_norm": 558.8209838867188, "learning_rate": 3.6988743900215894e-05, "loss": 52.1435, "step": 101680 }, { "epoch": 0.41084046752344283, "grad_norm": 1482.892578125, "learning_rate": 3.698568067924677e-05, "loss": 49.8897, "step": 101690 }, { "epoch": 0.41088086878881047, "grad_norm": 494.3254699707031, "learning_rate": 3.698261722461063e-05, "loss": 64.2092, "step": 101700 }, { "epoch": 0.4109212700541781, "grad_norm": 455.20489501953125, "learning_rate": 3.6979553536367194e-05, "loss": 38.1696, "step": 101710 }, { "epoch": 0.41096167131954575, "grad_norm": 528.5197143554688, "learning_rate": 3.69764896145762e-05, "loss": 42.2116, "step": 101720 }, { "epoch": 0.41100207258491334, "grad_norm": 935.036865234375, "learning_rate": 3.697342545929737e-05, "loss": 76.0732, "step": 101730 }, { "epoch": 0.411042473850281, "grad_norm": 1118.97216796875, "learning_rate": 3.697036107059044e-05, "loss": 37.8624, "step": 101740 }, { "epoch": 0.4110828751156486, "grad_norm": 1046.9444580078125, "learning_rate": 3.696729644851518e-05, "loss": 34.6092, "step": 101750 }, { "epoch": 0.41112327638101626, "grad_norm": 427.7893981933594, "learning_rate": 3.696423159313129e-05, "loss": 67.689, "step": 101760 }, { "epoch": 0.4111636776463839, "grad_norm": 655.9987182617188, "learning_rate": 3.696116650449856e-05, "loss": 42.4555, "step": 101770 }, { "epoch": 0.41120407891175154, "grad_norm": 911.2579956054688, "learning_rate": 3.6958101182676726e-05, "loss": 61.5867, "step": 101780 }, { "epoch": 0.4112444801771191, "grad_norm": 2679.955078125, "learning_rate": 3.6955035627725557e-05, "loss": 62.913, "step": 101790 }, { "epoch": 0.41128488144248676, "grad_norm": 736.22802734375, "learning_rate": 3.695196983970481e-05, "loss": 54.0211, "step": 101800 }, { "epoch": 0.4113252827078544, "grad_norm": 1243.8758544921875, "learning_rate": 3.694890381867425e-05, "loss": 58.7315, "step": 101810 }, { "epoch": 0.41136568397322204, "grad_norm": 828.197265625, "learning_rate": 3.6945837564693666e-05, "loss": 65.7947, "step": 101820 }, { "epoch": 0.4114060852385897, "grad_norm": 682.7947387695312, "learning_rate": 3.6942771077822835e-05, "loss": 57.709, "step": 101830 }, { "epoch": 0.4114464865039573, "grad_norm": 1457.8909912109375, "learning_rate": 3.693970435812153e-05, "loss": 53.162, "step": 101840 }, { "epoch": 0.41148688776932496, "grad_norm": 1344.3275146484375, "learning_rate": 3.693663740564953e-05, "loss": 54.1553, "step": 101850 }, { "epoch": 0.41152728903469254, "grad_norm": 0.0, "learning_rate": 3.693357022046665e-05, "loss": 47.7167, "step": 101860 }, { "epoch": 0.4115676903000602, "grad_norm": 927.4996337890625, "learning_rate": 3.693050280263268e-05, "loss": 74.7277, "step": 101870 }, { "epoch": 0.4116080915654278, "grad_norm": 401.41693115234375, "learning_rate": 3.6927435152207406e-05, "loss": 51.1262, "step": 101880 }, { "epoch": 0.41164849283079546, "grad_norm": 756.7000122070312, "learning_rate": 3.6924367269250644e-05, "loss": 43.8885, "step": 101890 }, { "epoch": 0.4116888940961631, "grad_norm": 1407.22412109375, "learning_rate": 3.69212991538222e-05, "loss": 51.1716, "step": 101900 }, { "epoch": 0.41172929536153074, "grad_norm": 1306.109619140625, "learning_rate": 3.691823080598189e-05, "loss": 56.4997, "step": 101910 }, { "epoch": 0.41176969662689833, "grad_norm": 890.89013671875, "learning_rate": 3.6915162225789546e-05, "loss": 88.45, "step": 101920 }, { "epoch": 0.41181009789226597, "grad_norm": 195.86825561523438, "learning_rate": 3.691209341330497e-05, "loss": 70.7165, "step": 101930 }, { "epoch": 0.4118504991576336, "grad_norm": 1073.44677734375, "learning_rate": 3.690902436858801e-05, "loss": 88.2749, "step": 101940 }, { "epoch": 0.41189090042300125, "grad_norm": 310.966552734375, "learning_rate": 3.690595509169848e-05, "loss": 101.5226, "step": 101950 }, { "epoch": 0.4119313016883689, "grad_norm": 515.8529663085938, "learning_rate": 3.690288558269623e-05, "loss": 55.0783, "step": 101960 }, { "epoch": 0.41197170295373653, "grad_norm": 548.2571411132812, "learning_rate": 3.68998158416411e-05, "loss": 51.7741, "step": 101970 }, { "epoch": 0.41201210421910417, "grad_norm": 712.4876708984375, "learning_rate": 3.689674586859292e-05, "loss": 55.8182, "step": 101980 }, { "epoch": 0.41205250548447175, "grad_norm": 1289.3074951171875, "learning_rate": 3.689367566361157e-05, "loss": 83.5634, "step": 101990 }, { "epoch": 0.4120929067498394, "grad_norm": 378.75164794921875, "learning_rate": 3.689060522675689e-05, "loss": 62.7789, "step": 102000 }, { "epoch": 0.41213330801520703, "grad_norm": 788.7225341796875, "learning_rate": 3.6887534558088727e-05, "loss": 51.0795, "step": 102010 }, { "epoch": 0.4121737092805747, "grad_norm": 419.6149597167969, "learning_rate": 3.688446365766696e-05, "loss": 72.9321, "step": 102020 }, { "epoch": 0.4122141105459423, "grad_norm": 424.8109436035156, "learning_rate": 3.688139252555146e-05, "loss": 51.3233, "step": 102030 }, { "epoch": 0.41225451181130995, "grad_norm": 340.1546630859375, "learning_rate": 3.6878321161802104e-05, "loss": 99.2203, "step": 102040 }, { "epoch": 0.41229491307667754, "grad_norm": 572.083984375, "learning_rate": 3.6875249566478745e-05, "loss": 62.0742, "step": 102050 }, { "epoch": 0.4123353143420452, "grad_norm": 639.38818359375, "learning_rate": 3.687217773964129e-05, "loss": 72.4136, "step": 102060 }, { "epoch": 0.4123757156074128, "grad_norm": 405.4486389160156, "learning_rate": 3.686910568134962e-05, "loss": 57.4386, "step": 102070 }, { "epoch": 0.41241611687278046, "grad_norm": 938.0454711914062, "learning_rate": 3.686603339166362e-05, "loss": 64.274, "step": 102080 }, { "epoch": 0.4124565181381481, "grad_norm": 1430.1524658203125, "learning_rate": 3.686296087064319e-05, "loss": 62.3886, "step": 102090 }, { "epoch": 0.41249691940351574, "grad_norm": 796.8814086914062, "learning_rate": 3.685988811834823e-05, "loss": 85.7702, "step": 102100 }, { "epoch": 0.4125373206688833, "grad_norm": 1625.54833984375, "learning_rate": 3.685681513483865e-05, "loss": 86.8409, "step": 102110 }, { "epoch": 0.41257772193425096, "grad_norm": 754.6946411132812, "learning_rate": 3.685374192017436e-05, "loss": 42.4442, "step": 102120 }, { "epoch": 0.4126181231996186, "grad_norm": 599.8799438476562, "learning_rate": 3.6850668474415255e-05, "loss": 42.2114, "step": 102130 }, { "epoch": 0.41265852446498624, "grad_norm": 919.1255493164062, "learning_rate": 3.684759479762127e-05, "loss": 66.864, "step": 102140 }, { "epoch": 0.4126989257303539, "grad_norm": 352.7879943847656, "learning_rate": 3.684452088985233e-05, "loss": 46.0815, "step": 102150 }, { "epoch": 0.4127393269957215, "grad_norm": 1374.6343994140625, "learning_rate": 3.6841446751168355e-05, "loss": 69.1762, "step": 102160 }, { "epoch": 0.41277972826108916, "grad_norm": 597.5450439453125, "learning_rate": 3.683837238162928e-05, "loss": 85.0557, "step": 102170 }, { "epoch": 0.41282012952645675, "grad_norm": 674.8424072265625, "learning_rate": 3.683529778129503e-05, "loss": 79.6359, "step": 102180 }, { "epoch": 0.4128605307918244, "grad_norm": 624.8746337890625, "learning_rate": 3.683222295022557e-05, "loss": 39.7842, "step": 102190 }, { "epoch": 0.412900932057192, "grad_norm": 1277.397705078125, "learning_rate": 3.682914788848083e-05, "loss": 58.8937, "step": 102200 }, { "epoch": 0.41294133332255967, "grad_norm": 722.8589477539062, "learning_rate": 3.682607259612076e-05, "loss": 80.0508, "step": 102210 }, { "epoch": 0.4129817345879273, "grad_norm": 824.6448364257812, "learning_rate": 3.682299707320532e-05, "loss": 53.1664, "step": 102220 }, { "epoch": 0.41302213585329495, "grad_norm": 234.46766662597656, "learning_rate": 3.681992131979446e-05, "loss": 66.3412, "step": 102230 }, { "epoch": 0.41306253711866253, "grad_norm": 1732.6822509765625, "learning_rate": 3.681684533594815e-05, "loss": 118.9763, "step": 102240 }, { "epoch": 0.41310293838403017, "grad_norm": 985.6689453125, "learning_rate": 3.681376912172636e-05, "loss": 48.3016, "step": 102250 }, { "epoch": 0.4131433396493978, "grad_norm": 534.2235107421875, "learning_rate": 3.6810692677189046e-05, "loss": 41.1004, "step": 102260 }, { "epoch": 0.41318374091476545, "grad_norm": 0.0, "learning_rate": 3.680761600239621e-05, "loss": 47.6259, "step": 102270 }, { "epoch": 0.4132241421801331, "grad_norm": 2476.089111328125, "learning_rate": 3.680453909740782e-05, "loss": 60.4867, "step": 102280 }, { "epoch": 0.41326454344550073, "grad_norm": 733.741455078125, "learning_rate": 3.680146196228386e-05, "loss": 58.1537, "step": 102290 }, { "epoch": 0.41330494471086837, "grad_norm": 543.4186401367188, "learning_rate": 3.6798384597084325e-05, "loss": 42.9503, "step": 102300 }, { "epoch": 0.41334534597623596, "grad_norm": 535.3422241210938, "learning_rate": 3.67953070018692e-05, "loss": 76.8054, "step": 102310 }, { "epoch": 0.4133857472416036, "grad_norm": 851.113037109375, "learning_rate": 3.679222917669851e-05, "loss": 53.6934, "step": 102320 }, { "epoch": 0.41342614850697124, "grad_norm": 3898.60693359375, "learning_rate": 3.6789151121632226e-05, "loss": 58.586, "step": 102330 }, { "epoch": 0.4134665497723389, "grad_norm": 425.6492919921875, "learning_rate": 3.678607283673037e-05, "loss": 61.4479, "step": 102340 }, { "epoch": 0.4135069510377065, "grad_norm": 877.3761596679688, "learning_rate": 3.678299432205296e-05, "loss": 73.7707, "step": 102350 }, { "epoch": 0.41354735230307416, "grad_norm": 763.9284057617188, "learning_rate": 3.6779915577660015e-05, "loss": 56.7363, "step": 102360 }, { "epoch": 0.41358775356844174, "grad_norm": 1059.4683837890625, "learning_rate": 3.677683660361155e-05, "loss": 52.8749, "step": 102370 }, { "epoch": 0.4136281548338094, "grad_norm": 408.92071533203125, "learning_rate": 3.677375739996759e-05, "loss": 53.0879, "step": 102380 }, { "epoch": 0.413668556099177, "grad_norm": 529.644775390625, "learning_rate": 3.677067796678817e-05, "loss": 53.6878, "step": 102390 }, { "epoch": 0.41370895736454466, "grad_norm": 888.7438354492188, "learning_rate": 3.6767598304133324e-05, "loss": 44.5858, "step": 102400 }, { "epoch": 0.4137493586299123, "grad_norm": 1067.380615234375, "learning_rate": 3.676451841206308e-05, "loss": 42.4786, "step": 102410 }, { "epoch": 0.41378975989527994, "grad_norm": 2619.344482421875, "learning_rate": 3.67614382906375e-05, "loss": 59.8869, "step": 102420 }, { "epoch": 0.4138301611606475, "grad_norm": 730.7283325195312, "learning_rate": 3.675835793991662e-05, "loss": 53.2614, "step": 102430 }, { "epoch": 0.41387056242601516, "grad_norm": 540.2338256835938, "learning_rate": 3.67552773599605e-05, "loss": 53.5669, "step": 102440 }, { "epoch": 0.4139109636913828, "grad_norm": 424.4781799316406, "learning_rate": 3.675219655082921e-05, "loss": 40.3122, "step": 102450 }, { "epoch": 0.41395136495675044, "grad_norm": 1885.5537109375, "learning_rate": 3.6749115512582786e-05, "loss": 59.4535, "step": 102460 }, { "epoch": 0.4139917662221181, "grad_norm": 790.8214111328125, "learning_rate": 3.674603424528131e-05, "loss": 68.5578, "step": 102470 }, { "epoch": 0.4140321674874857, "grad_norm": 788.549560546875, "learning_rate": 3.674295274898485e-05, "loss": 115.0106, "step": 102480 }, { "epoch": 0.41407256875285336, "grad_norm": 3169.2861328125, "learning_rate": 3.673987102375348e-05, "loss": 78.3599, "step": 102490 }, { "epoch": 0.41411297001822095, "grad_norm": 1180.6651611328125, "learning_rate": 3.673678906964727e-05, "loss": 69.2081, "step": 102500 }, { "epoch": 0.4141533712835886, "grad_norm": 870.7091674804688, "learning_rate": 3.673370688672632e-05, "loss": 76.4967, "step": 102510 }, { "epoch": 0.41419377254895623, "grad_norm": 535.1434326171875, "learning_rate": 3.673062447505072e-05, "loss": 37.986, "step": 102520 }, { "epoch": 0.41423417381432387, "grad_norm": 574.4082641601562, "learning_rate": 3.672754183468055e-05, "loss": 43.2866, "step": 102530 }, { "epoch": 0.4142745750796915, "grad_norm": 1545.2213134765625, "learning_rate": 3.672445896567592e-05, "loss": 84.1171, "step": 102540 }, { "epoch": 0.41431497634505915, "grad_norm": 625.9170532226562, "learning_rate": 3.6721375868096924e-05, "loss": 44.562, "step": 102550 }, { "epoch": 0.41435537761042673, "grad_norm": 948.0220336914062, "learning_rate": 3.6718292542003666e-05, "loss": 68.8121, "step": 102560 }, { "epoch": 0.4143957788757944, "grad_norm": 400.49859619140625, "learning_rate": 3.671520898745627e-05, "loss": 59.8667, "step": 102570 }, { "epoch": 0.414436180141162, "grad_norm": 329.8058776855469, "learning_rate": 3.671212520451484e-05, "loss": 76.8248, "step": 102580 }, { "epoch": 0.41447658140652965, "grad_norm": 916.3284301757812, "learning_rate": 3.670904119323949e-05, "loss": 74.5609, "step": 102590 }, { "epoch": 0.4145169826718973, "grad_norm": 561.3569946289062, "learning_rate": 3.6705956953690364e-05, "loss": 43.266, "step": 102600 }, { "epoch": 0.41455738393726493, "grad_norm": 627.5663452148438, "learning_rate": 3.670287248592758e-05, "loss": 49.7998, "step": 102610 }, { "epoch": 0.4145977852026326, "grad_norm": 919.4302368164062, "learning_rate": 3.669978779001127e-05, "loss": 72.0918, "step": 102620 }, { "epoch": 0.41463818646800016, "grad_norm": 536.8106079101562, "learning_rate": 3.669670286600157e-05, "loss": 84.2724, "step": 102630 }, { "epoch": 0.4146785877333678, "grad_norm": 926.4111328125, "learning_rate": 3.6693617713958634e-05, "loss": 49.7705, "step": 102640 }, { "epoch": 0.41471898899873544, "grad_norm": 511.818359375, "learning_rate": 3.66905323339426e-05, "loss": 46.5629, "step": 102650 }, { "epoch": 0.4147593902641031, "grad_norm": 1487.813720703125, "learning_rate": 3.668744672601361e-05, "loss": 55.226, "step": 102660 }, { "epoch": 0.4147997915294707, "grad_norm": 1999.3150634765625, "learning_rate": 3.668436089023184e-05, "loss": 84.6968, "step": 102670 }, { "epoch": 0.41484019279483836, "grad_norm": 443.5814208984375, "learning_rate": 3.668127482665743e-05, "loss": 59.7227, "step": 102680 }, { "epoch": 0.41488059406020594, "grad_norm": 798.040283203125, "learning_rate": 3.667818853535056e-05, "loss": 88.5318, "step": 102690 }, { "epoch": 0.4149209953255736, "grad_norm": 1065.25927734375, "learning_rate": 3.667510201637139e-05, "loss": 46.2704, "step": 102700 }, { "epoch": 0.4149613965909412, "grad_norm": 213.02117919921875, "learning_rate": 3.667201526978009e-05, "loss": 72.9519, "step": 102710 }, { "epoch": 0.41500179785630886, "grad_norm": 582.4226684570312, "learning_rate": 3.6668928295636854e-05, "loss": 43.031, "step": 102720 }, { "epoch": 0.4150421991216765, "grad_norm": 550.1882934570312, "learning_rate": 3.666584109400185e-05, "loss": 56.0368, "step": 102730 }, { "epoch": 0.41508260038704414, "grad_norm": 563.0902709960938, "learning_rate": 3.666275366493526e-05, "loss": 58.4682, "step": 102740 }, { "epoch": 0.4151230016524117, "grad_norm": 1416.572998046875, "learning_rate": 3.665966600849728e-05, "loss": 48.4765, "step": 102750 }, { "epoch": 0.41516340291777937, "grad_norm": 977.8565063476562, "learning_rate": 3.665657812474812e-05, "loss": 99.4884, "step": 102760 }, { "epoch": 0.415203804183147, "grad_norm": 605.6978149414062, "learning_rate": 3.6653490013747965e-05, "loss": 62.5299, "step": 102770 }, { "epoch": 0.41524420544851465, "grad_norm": 1297.1224365234375, "learning_rate": 3.665040167555702e-05, "loss": 53.5037, "step": 102780 }, { "epoch": 0.4152846067138823, "grad_norm": 823.6423950195312, "learning_rate": 3.664731311023549e-05, "loss": 50.0008, "step": 102790 }, { "epoch": 0.4153250079792499, "grad_norm": 611.2938842773438, "learning_rate": 3.664422431784361e-05, "loss": 50.3032, "step": 102800 }, { "epoch": 0.41536540924461757, "grad_norm": 713.4339599609375, "learning_rate": 3.6641135298441576e-05, "loss": 65.9606, "step": 102810 }, { "epoch": 0.41540581050998515, "grad_norm": 908.730712890625, "learning_rate": 3.6638046052089616e-05, "loss": 72.0269, "step": 102820 }, { "epoch": 0.4154462117753528, "grad_norm": 714.7061767578125, "learning_rate": 3.6634956578847954e-05, "loss": 85.8837, "step": 102830 }, { "epoch": 0.41548661304072043, "grad_norm": 490.82269287109375, "learning_rate": 3.663186687877682e-05, "loss": 57.8224, "step": 102840 }, { "epoch": 0.41552701430608807, "grad_norm": 954.6878662109375, "learning_rate": 3.662877695193646e-05, "loss": 58.0162, "step": 102850 }, { "epoch": 0.4155674155714557, "grad_norm": 220.2834930419922, "learning_rate": 3.6625686798387106e-05, "loss": 67.6011, "step": 102860 }, { "epoch": 0.41560781683682335, "grad_norm": 1146.68994140625, "learning_rate": 3.6622596418188995e-05, "loss": 86.136, "step": 102870 }, { "epoch": 0.41564821810219094, "grad_norm": 983.2677001953125, "learning_rate": 3.661950581140239e-05, "loss": 32.4779, "step": 102880 }, { "epoch": 0.4156886193675586, "grad_norm": 694.6705322265625, "learning_rate": 3.661641497808754e-05, "loss": 52.4579, "step": 102890 }, { "epoch": 0.4157290206329262, "grad_norm": 500.0966491699219, "learning_rate": 3.66133239183047e-05, "loss": 54.7675, "step": 102900 }, { "epoch": 0.41576942189829386, "grad_norm": 847.8213500976562, "learning_rate": 3.6610232632114124e-05, "loss": 55.5015, "step": 102910 }, { "epoch": 0.4158098231636615, "grad_norm": 1668.6177978515625, "learning_rate": 3.6607141119576084e-05, "loss": 98.7418, "step": 102920 }, { "epoch": 0.41585022442902914, "grad_norm": 397.88885498046875, "learning_rate": 3.6604049380750855e-05, "loss": 49.9987, "step": 102930 }, { "epoch": 0.4158906256943968, "grad_norm": 378.2391662597656, "learning_rate": 3.660095741569871e-05, "loss": 62.2791, "step": 102940 }, { "epoch": 0.41593102695976436, "grad_norm": 539.7728271484375, "learning_rate": 3.659786522447993e-05, "loss": 57.2088, "step": 102950 }, { "epoch": 0.415971428225132, "grad_norm": 925.6470336914062, "learning_rate": 3.659477280715479e-05, "loss": 54.7426, "step": 102960 }, { "epoch": 0.41601182949049964, "grad_norm": 0.0, "learning_rate": 3.659168016378359e-05, "loss": 54.7942, "step": 102970 }, { "epoch": 0.4160522307558673, "grad_norm": 1118.713623046875, "learning_rate": 3.658858729442662e-05, "loss": 45.2592, "step": 102980 }, { "epoch": 0.4160926320212349, "grad_norm": 478.5465087890625, "learning_rate": 3.658549419914417e-05, "loss": 78.469, "step": 102990 }, { "epoch": 0.41613303328660256, "grad_norm": 556.0774536132812, "learning_rate": 3.6582400877996546e-05, "loss": 56.5781, "step": 103000 }, { "epoch": 0.41617343455197015, "grad_norm": 360.80340576171875, "learning_rate": 3.6579307331044054e-05, "loss": 52.6853, "step": 103010 }, { "epoch": 0.4162138358173378, "grad_norm": 601.7576293945312, "learning_rate": 3.657621355834701e-05, "loss": 61.4476, "step": 103020 }, { "epoch": 0.4162542370827054, "grad_norm": 592.1106567382812, "learning_rate": 3.657311955996571e-05, "loss": 51.4591, "step": 103030 }, { "epoch": 0.41629463834807306, "grad_norm": 637.8342895507812, "learning_rate": 3.657002533596049e-05, "loss": 61.3845, "step": 103040 }, { "epoch": 0.4163350396134407, "grad_norm": 371.7774963378906, "learning_rate": 3.6566930886391674e-05, "loss": 74.0396, "step": 103050 }, { "epoch": 0.41637544087880834, "grad_norm": 375.3711242675781, "learning_rate": 3.656383621131959e-05, "loss": 56.7201, "step": 103060 }, { "epoch": 0.41641584214417593, "grad_norm": 538.1626586914062, "learning_rate": 3.656074131080457e-05, "loss": 51.9634, "step": 103070 }, { "epoch": 0.41645624340954357, "grad_norm": 1070.9267578125, "learning_rate": 3.655764618490692e-05, "loss": 92.4124, "step": 103080 }, { "epoch": 0.4164966446749112, "grad_norm": 656.9863891601562, "learning_rate": 3.655455083368703e-05, "loss": 65.6765, "step": 103090 }, { "epoch": 0.41653704594027885, "grad_norm": 841.900634765625, "learning_rate": 3.655145525720522e-05, "loss": 68.1387, "step": 103100 }, { "epoch": 0.4165774472056465, "grad_norm": 409.5385437011719, "learning_rate": 3.6548359455521836e-05, "loss": 41.3072, "step": 103110 }, { "epoch": 0.41661784847101413, "grad_norm": 598.8934936523438, "learning_rate": 3.654526342869724e-05, "loss": 64.1787, "step": 103120 }, { "epoch": 0.41665824973638177, "grad_norm": 938.2333374023438, "learning_rate": 3.654216717679179e-05, "loss": 63.3869, "step": 103130 }, { "epoch": 0.41669865100174935, "grad_norm": 0.0, "learning_rate": 3.6539070699865853e-05, "loss": 45.5103, "step": 103140 }, { "epoch": 0.416739052267117, "grad_norm": 555.0664672851562, "learning_rate": 3.653597399797979e-05, "loss": 78.243, "step": 103150 }, { "epoch": 0.41677945353248463, "grad_norm": 200.4738006591797, "learning_rate": 3.6532877071193974e-05, "loss": 45.6943, "step": 103160 }, { "epoch": 0.4168198547978523, "grad_norm": 773.7369384765625, "learning_rate": 3.652977991956878e-05, "loss": 76.8474, "step": 103170 }, { "epoch": 0.4168602560632199, "grad_norm": 1079.94775390625, "learning_rate": 3.6526682543164595e-05, "loss": 42.3459, "step": 103180 }, { "epoch": 0.41690065732858755, "grad_norm": 919.0646362304688, "learning_rate": 3.6523584942041794e-05, "loss": 57.7047, "step": 103190 }, { "epoch": 0.41694105859395514, "grad_norm": 891.546630859375, "learning_rate": 3.6520487116260776e-05, "loss": 73.0311, "step": 103200 }, { "epoch": 0.4169814598593228, "grad_norm": 673.0984497070312, "learning_rate": 3.6517389065881925e-05, "loss": 52.7989, "step": 103210 }, { "epoch": 0.4170218611246904, "grad_norm": 622.553955078125, "learning_rate": 3.651429079096566e-05, "loss": 61.8157, "step": 103220 }, { "epoch": 0.41706226239005806, "grad_norm": 590.778564453125, "learning_rate": 3.651119229157235e-05, "loss": 42.1001, "step": 103230 }, { "epoch": 0.4171026636554257, "grad_norm": 783.4470825195312, "learning_rate": 3.650809356776242e-05, "loss": 57.0175, "step": 103240 }, { "epoch": 0.41714306492079334, "grad_norm": 1166.0804443359375, "learning_rate": 3.6504994619596294e-05, "loss": 72.4459, "step": 103250 }, { "epoch": 0.4171834661861609, "grad_norm": 981.3162231445312, "learning_rate": 3.650189544713437e-05, "loss": 64.13, "step": 103260 }, { "epoch": 0.41722386745152856, "grad_norm": 556.0223388671875, "learning_rate": 3.649879605043707e-05, "loss": 61.0497, "step": 103270 }, { "epoch": 0.4172642687168962, "grad_norm": 1262.341064453125, "learning_rate": 3.6495696429564823e-05, "loss": 66.8657, "step": 103280 }, { "epoch": 0.41730466998226384, "grad_norm": 322.91082763671875, "learning_rate": 3.649259658457805e-05, "loss": 62.4766, "step": 103290 }, { "epoch": 0.4173450712476315, "grad_norm": 603.0479736328125, "learning_rate": 3.6489496515537204e-05, "loss": 65.0343, "step": 103300 }, { "epoch": 0.4173854725129991, "grad_norm": 724.8381958007812, "learning_rate": 3.648639622250269e-05, "loss": 71.2941, "step": 103310 }, { "epoch": 0.41742587377836676, "grad_norm": 709.285888671875, "learning_rate": 3.648329570553498e-05, "loss": 69.7722, "step": 103320 }, { "epoch": 0.41746627504373435, "grad_norm": 1008.2940673828125, "learning_rate": 3.648019496469451e-05, "loss": 61.9354, "step": 103330 }, { "epoch": 0.417506676309102, "grad_norm": 716.356689453125, "learning_rate": 3.647709400004172e-05, "loss": 102.8124, "step": 103340 }, { "epoch": 0.4175470775744696, "grad_norm": 661.27880859375, "learning_rate": 3.647399281163708e-05, "loss": 54.8302, "step": 103350 }, { "epoch": 0.41758747883983727, "grad_norm": 1043.45263671875, "learning_rate": 3.647089139954104e-05, "loss": 60.8454, "step": 103360 }, { "epoch": 0.4176278801052049, "grad_norm": 862.45263671875, "learning_rate": 3.646778976381407e-05, "loss": 50.1279, "step": 103370 }, { "epoch": 0.41766828137057255, "grad_norm": 924.0723266601562, "learning_rate": 3.646468790451663e-05, "loss": 41.2529, "step": 103380 }, { "epoch": 0.41770868263594013, "grad_norm": 597.7651977539062, "learning_rate": 3.64615858217092e-05, "loss": 57.1287, "step": 103390 }, { "epoch": 0.41774908390130777, "grad_norm": 379.25152587890625, "learning_rate": 3.645848351545225e-05, "loss": 62.9141, "step": 103400 }, { "epoch": 0.4177894851666754, "grad_norm": 953.6447143554688, "learning_rate": 3.645538098580627e-05, "loss": 53.8604, "step": 103410 }, { "epoch": 0.41782988643204305, "grad_norm": 575.6273193359375, "learning_rate": 3.6452278232831735e-05, "loss": 76.9551, "step": 103420 }, { "epoch": 0.4178702876974107, "grad_norm": 1073.8568115234375, "learning_rate": 3.644917525658914e-05, "loss": 35.6009, "step": 103430 }, { "epoch": 0.41791068896277833, "grad_norm": 1554.2855224609375, "learning_rate": 3.644607205713898e-05, "loss": 59.1008, "step": 103440 }, { "epoch": 0.41795109022814597, "grad_norm": 619.3630981445312, "learning_rate": 3.644296863454175e-05, "loss": 48.0955, "step": 103450 }, { "epoch": 0.41799149149351356, "grad_norm": 810.5908813476562, "learning_rate": 3.643986498885796e-05, "loss": 66.1217, "step": 103460 }, { "epoch": 0.4180318927588812, "grad_norm": 2242.524658203125, "learning_rate": 3.643676112014811e-05, "loss": 79.2221, "step": 103470 }, { "epoch": 0.41807229402424884, "grad_norm": 303.63385009765625, "learning_rate": 3.643365702847272e-05, "loss": 45.5608, "step": 103480 }, { "epoch": 0.4181126952896165, "grad_norm": 1422.8160400390625, "learning_rate": 3.643055271389229e-05, "loss": 66.0965, "step": 103490 }, { "epoch": 0.4181530965549841, "grad_norm": 741.1747436523438, "learning_rate": 3.642744817646736e-05, "loss": 45.8928, "step": 103500 }, { "epoch": 0.41819349782035176, "grad_norm": 506.9162902832031, "learning_rate": 3.642434341625844e-05, "loss": 67.4611, "step": 103510 }, { "epoch": 0.41823389908571934, "grad_norm": 592.9998168945312, "learning_rate": 3.642123843332606e-05, "loss": 63.5184, "step": 103520 }, { "epoch": 0.418274300351087, "grad_norm": 303.4530334472656, "learning_rate": 3.641813322773076e-05, "loss": 50.6571, "step": 103530 }, { "epoch": 0.4183147016164546, "grad_norm": 1942.93115234375, "learning_rate": 3.641502779953307e-05, "loss": 46.2943, "step": 103540 }, { "epoch": 0.41835510288182226, "grad_norm": 748.298095703125, "learning_rate": 3.6411922148793544e-05, "loss": 54.8428, "step": 103550 }, { "epoch": 0.4183955041471899, "grad_norm": 902.7544555664062, "learning_rate": 3.640881627557271e-05, "loss": 54.3055, "step": 103560 }, { "epoch": 0.41843590541255754, "grad_norm": 1064.920654296875, "learning_rate": 3.640571017993113e-05, "loss": 58.3005, "step": 103570 }, { "epoch": 0.4184763066779251, "grad_norm": 1428.6287841796875, "learning_rate": 3.6402603861929374e-05, "loss": 64.2658, "step": 103580 }, { "epoch": 0.41851670794329277, "grad_norm": 503.11871337890625, "learning_rate": 3.639949732162797e-05, "loss": 61.6714, "step": 103590 }, { "epoch": 0.4185571092086604, "grad_norm": 1219.691162109375, "learning_rate": 3.639639055908751e-05, "loss": 66.9562, "step": 103600 }, { "epoch": 0.41859751047402805, "grad_norm": 223.49790954589844, "learning_rate": 3.639328357436853e-05, "loss": 55.7522, "step": 103610 }, { "epoch": 0.4186379117393957, "grad_norm": 514.73681640625, "learning_rate": 3.639017636753163e-05, "loss": 71.0878, "step": 103620 }, { "epoch": 0.4186783130047633, "grad_norm": 383.3266906738281, "learning_rate": 3.638706893863739e-05, "loss": 71.9139, "step": 103630 }, { "epoch": 0.41871871427013097, "grad_norm": 539.1884765625, "learning_rate": 3.638396128774636e-05, "loss": 76.8819, "step": 103640 }, { "epoch": 0.41875911553549855, "grad_norm": 1771.3558349609375, "learning_rate": 3.6380853414919144e-05, "loss": 65.5004, "step": 103650 }, { "epoch": 0.4187995168008662, "grad_norm": 257.1325988769531, "learning_rate": 3.6377745320216346e-05, "loss": 84.9693, "step": 103660 }, { "epoch": 0.41883991806623383, "grad_norm": 1277.81787109375, "learning_rate": 3.6374637003698536e-05, "loss": 59.5876, "step": 103670 }, { "epoch": 0.41888031933160147, "grad_norm": 985.5901489257812, "learning_rate": 3.637152846542633e-05, "loss": 47.3202, "step": 103680 }, { "epoch": 0.4189207205969691, "grad_norm": 943.6722412109375, "learning_rate": 3.636841970546031e-05, "loss": 52.7835, "step": 103690 }, { "epoch": 0.41896112186233675, "grad_norm": 512.9838256835938, "learning_rate": 3.63653107238611e-05, "loss": 59.9564, "step": 103700 }, { "epoch": 0.41900152312770433, "grad_norm": 886.2669067382812, "learning_rate": 3.636220152068931e-05, "loss": 69.5346, "step": 103710 }, { "epoch": 0.419041924393072, "grad_norm": 866.3492431640625, "learning_rate": 3.635909209600555e-05, "loss": 68.2752, "step": 103720 }, { "epoch": 0.4190823256584396, "grad_norm": 536.70947265625, "learning_rate": 3.635598244987043e-05, "loss": 56.4688, "step": 103730 }, { "epoch": 0.41912272692380725, "grad_norm": 421.8968200683594, "learning_rate": 3.6352872582344596e-05, "loss": 63.9303, "step": 103740 }, { "epoch": 0.4191631281891749, "grad_norm": 437.35546875, "learning_rate": 3.634976249348867e-05, "loss": 29.7742, "step": 103750 }, { "epoch": 0.41920352945454253, "grad_norm": 1521.4986572265625, "learning_rate": 3.634665218336328e-05, "loss": 52.6372, "step": 103760 }, { "epoch": 0.4192439307199102, "grad_norm": 814.3853759765625, "learning_rate": 3.6343541652029064e-05, "loss": 68.0177, "step": 103770 }, { "epoch": 0.41928433198527776, "grad_norm": 495.0726318359375, "learning_rate": 3.6340430899546656e-05, "loss": 49.5452, "step": 103780 }, { "epoch": 0.4193247332506454, "grad_norm": 1200.4290771484375, "learning_rate": 3.633731992597672e-05, "loss": 81.554, "step": 103790 }, { "epoch": 0.41936513451601304, "grad_norm": 1105.92822265625, "learning_rate": 3.633420873137988e-05, "loss": 59.0107, "step": 103800 }, { "epoch": 0.4194055357813807, "grad_norm": 965.9473266601562, "learning_rate": 3.633109731581682e-05, "loss": 52.3541, "step": 103810 }, { "epoch": 0.4194459370467483, "grad_norm": 784.390625, "learning_rate": 3.632798567934817e-05, "loss": 42.5555, "step": 103820 }, { "epoch": 0.41948633831211596, "grad_norm": 1471.4720458984375, "learning_rate": 3.632487382203462e-05, "loss": 61.9829, "step": 103830 }, { "epoch": 0.41952673957748354, "grad_norm": 826.1594848632812, "learning_rate": 3.632176174393682e-05, "loss": 53.5257, "step": 103840 }, { "epoch": 0.4195671408428512, "grad_norm": 526.9407958984375, "learning_rate": 3.631864944511545e-05, "loss": 58.7132, "step": 103850 }, { "epoch": 0.4196075421082188, "grad_norm": 845.7454833984375, "learning_rate": 3.6315536925631174e-05, "loss": 44.2861, "step": 103860 }, { "epoch": 0.41964794337358646, "grad_norm": 869.853515625, "learning_rate": 3.631242418554469e-05, "loss": 78.6071, "step": 103870 }, { "epoch": 0.4196883446389541, "grad_norm": 858.0875244140625, "learning_rate": 3.630931122491666e-05, "loss": 53.3545, "step": 103880 }, { "epoch": 0.41972874590432174, "grad_norm": 472.39337158203125, "learning_rate": 3.6306198043807795e-05, "loss": 44.7906, "step": 103890 }, { "epoch": 0.4197691471696893, "grad_norm": 1292.262939453125, "learning_rate": 3.630308464227877e-05, "loss": 46.8033, "step": 103900 }, { "epoch": 0.41980954843505697, "grad_norm": 667.8062133789062, "learning_rate": 3.62999710203903e-05, "loss": 75.6769, "step": 103910 }, { "epoch": 0.4198499497004246, "grad_norm": 862.8394775390625, "learning_rate": 3.629685717820307e-05, "loss": 56.4103, "step": 103920 }, { "epoch": 0.41989035096579225, "grad_norm": 348.3255920410156, "learning_rate": 3.629374311577779e-05, "loss": 44.0832, "step": 103930 }, { "epoch": 0.4199307522311599, "grad_norm": 1450.09375, "learning_rate": 3.629062883317519e-05, "loss": 68.2507, "step": 103940 }, { "epoch": 0.4199711534965275, "grad_norm": 537.00537109375, "learning_rate": 3.628751433045596e-05, "loss": 69.9783, "step": 103950 }, { "epoch": 0.42001155476189517, "grad_norm": 581.5643310546875, "learning_rate": 3.628439960768082e-05, "loss": 63.0526, "step": 103960 }, { "epoch": 0.42005195602726275, "grad_norm": 664.759521484375, "learning_rate": 3.62812846649105e-05, "loss": 56.3519, "step": 103970 }, { "epoch": 0.4200923572926304, "grad_norm": 635.3744506835938, "learning_rate": 3.6278169502205736e-05, "loss": 61.1876, "step": 103980 }, { "epoch": 0.42013275855799803, "grad_norm": 530.5980224609375, "learning_rate": 3.627505411962724e-05, "loss": 66.6494, "step": 103990 }, { "epoch": 0.42017315982336567, "grad_norm": 758.6454467773438, "learning_rate": 3.627193851723577e-05, "loss": 58.0647, "step": 104000 }, { "epoch": 0.4202135610887333, "grad_norm": 2163.636474609375, "learning_rate": 3.6268822695092056e-05, "loss": 46.5954, "step": 104010 }, { "epoch": 0.42025396235410095, "grad_norm": 1194.97119140625, "learning_rate": 3.626570665325684e-05, "loss": 61.3154, "step": 104020 }, { "epoch": 0.42029436361946854, "grad_norm": 1891.653076171875, "learning_rate": 3.626259039179086e-05, "loss": 59.1667, "step": 104030 }, { "epoch": 0.4203347648848362, "grad_norm": 609.3426513671875, "learning_rate": 3.6259473910754904e-05, "loss": 41.8143, "step": 104040 }, { "epoch": 0.4203751661502038, "grad_norm": 1182.2841796875, "learning_rate": 3.625635721020969e-05, "loss": 64.0979, "step": 104050 }, { "epoch": 0.42041556741557146, "grad_norm": 960.9483642578125, "learning_rate": 3.6253240290216e-05, "loss": 49.4696, "step": 104060 }, { "epoch": 0.4204559686809391, "grad_norm": 292.2507629394531, "learning_rate": 3.62501231508346e-05, "loss": 71.0735, "step": 104070 }, { "epoch": 0.42049636994630674, "grad_norm": 782.7526245117188, "learning_rate": 3.624700579212626e-05, "loss": 55.9743, "step": 104080 }, { "epoch": 0.4205367712116744, "grad_norm": 1657.7484130859375, "learning_rate": 3.624388821415175e-05, "loss": 35.3792, "step": 104090 }, { "epoch": 0.42057717247704196, "grad_norm": 462.6631774902344, "learning_rate": 3.624077041697185e-05, "loss": 53.0793, "step": 104100 }, { "epoch": 0.4206175737424096, "grad_norm": 606.4874267578125, "learning_rate": 3.6237652400647345e-05, "loss": 45.0725, "step": 104110 }, { "epoch": 0.42065797500777724, "grad_norm": 842.7166137695312, "learning_rate": 3.623453416523902e-05, "loss": 67.8834, "step": 104120 }, { "epoch": 0.4206983762731449, "grad_norm": 249.5082550048828, "learning_rate": 3.623141571080766e-05, "loss": 37.8796, "step": 104130 }, { "epoch": 0.4207387775385125, "grad_norm": 343.2146911621094, "learning_rate": 3.6228297037414074e-05, "loss": 57.456, "step": 104140 }, { "epoch": 0.42077917880388016, "grad_norm": 3797.084228515625, "learning_rate": 3.622517814511906e-05, "loss": 60.7318, "step": 104150 }, { "epoch": 0.42081958006924775, "grad_norm": 617.2786254882812, "learning_rate": 3.622205903398342e-05, "loss": 59.6277, "step": 104160 }, { "epoch": 0.4208599813346154, "grad_norm": 836.6347045898438, "learning_rate": 3.6218939704067955e-05, "loss": 69.2782, "step": 104170 }, { "epoch": 0.420900382599983, "grad_norm": 988.6712646484375, "learning_rate": 3.621582015543348e-05, "loss": 71.7698, "step": 104180 }, { "epoch": 0.42094078386535067, "grad_norm": 377.2164001464844, "learning_rate": 3.621270038814083e-05, "loss": 58.6837, "step": 104190 }, { "epoch": 0.4209811851307183, "grad_norm": 498.7535095214844, "learning_rate": 3.6209580402250815e-05, "loss": 38.0926, "step": 104200 }, { "epoch": 0.42102158639608595, "grad_norm": 403.9892883300781, "learning_rate": 3.620646019782425e-05, "loss": 51.1487, "step": 104210 }, { "epoch": 0.42106198766145353, "grad_norm": 1094.6707763671875, "learning_rate": 3.6203339774921976e-05, "loss": 49.0772, "step": 104220 }, { "epoch": 0.42110238892682117, "grad_norm": 1082.783935546875, "learning_rate": 3.6200219133604816e-05, "loss": 71.491, "step": 104230 }, { "epoch": 0.4211427901921888, "grad_norm": 443.6391296386719, "learning_rate": 3.6197098273933634e-05, "loss": 57.1322, "step": 104240 }, { "epoch": 0.42118319145755645, "grad_norm": 431.85430908203125, "learning_rate": 3.619397719596924e-05, "loss": 51.8768, "step": 104250 }, { "epoch": 0.4212235927229241, "grad_norm": 2122.891845703125, "learning_rate": 3.619085589977251e-05, "loss": 62.3512, "step": 104260 }, { "epoch": 0.42126399398829173, "grad_norm": 1347.9971923828125, "learning_rate": 3.618773438540428e-05, "loss": 68.3303, "step": 104270 }, { "epoch": 0.42130439525365937, "grad_norm": 583.928955078125, "learning_rate": 3.618461265292541e-05, "loss": 52.5091, "step": 104280 }, { "epoch": 0.42134479651902695, "grad_norm": 523.10546875, "learning_rate": 3.618149070239676e-05, "loss": 55.4588, "step": 104290 }, { "epoch": 0.4213851977843946, "grad_norm": 891.4425659179688, "learning_rate": 3.617836853387918e-05, "loss": 56.3773, "step": 104300 }, { "epoch": 0.42142559904976223, "grad_norm": 534.82470703125, "learning_rate": 3.6175246147433563e-05, "loss": 64.5376, "step": 104310 }, { "epoch": 0.4214660003151299, "grad_norm": 529.0205688476562, "learning_rate": 3.617212354312076e-05, "loss": 50.4187, "step": 104320 }, { "epoch": 0.4215064015804975, "grad_norm": 303.3257141113281, "learning_rate": 3.616900072100166e-05, "loss": 42.9221, "step": 104330 }, { "epoch": 0.42154680284586515, "grad_norm": 1264.615478515625, "learning_rate": 3.6165877681137136e-05, "loss": 72.0312, "step": 104340 }, { "epoch": 0.42158720411123274, "grad_norm": 1191.180908203125, "learning_rate": 3.6162754423588085e-05, "loss": 57.8124, "step": 104350 }, { "epoch": 0.4216276053766004, "grad_norm": 366.5780944824219, "learning_rate": 3.61596309484154e-05, "loss": 56.5291, "step": 104360 }, { "epoch": 0.421668006641968, "grad_norm": 512.8042602539062, "learning_rate": 3.615650725567995e-05, "loss": 57.7641, "step": 104370 }, { "epoch": 0.42170840790733566, "grad_norm": 782.4150390625, "learning_rate": 3.615338334544265e-05, "loss": 41.4072, "step": 104380 }, { "epoch": 0.4217488091727033, "grad_norm": 1751.27587890625, "learning_rate": 3.615025921776439e-05, "loss": 49.6701, "step": 104390 }, { "epoch": 0.42178921043807094, "grad_norm": 1079.94189453125, "learning_rate": 3.614713487270611e-05, "loss": 46.7276, "step": 104400 }, { "epoch": 0.4218296117034386, "grad_norm": 1060.4190673828125, "learning_rate": 3.614401031032867e-05, "loss": 106.2965, "step": 104410 }, { "epoch": 0.42187001296880616, "grad_norm": 749.7593994140625, "learning_rate": 3.614088553069303e-05, "loss": 44.3926, "step": 104420 }, { "epoch": 0.4219104142341738, "grad_norm": 628.2740478515625, "learning_rate": 3.6137760533860074e-05, "loss": 88.6748, "step": 104430 }, { "epoch": 0.42195081549954144, "grad_norm": 461.367431640625, "learning_rate": 3.613463531989076e-05, "loss": 49.6176, "step": 104440 }, { "epoch": 0.4219912167649091, "grad_norm": 873.3343505859375, "learning_rate": 3.613150988884599e-05, "loss": 61.126, "step": 104450 }, { "epoch": 0.4220316180302767, "grad_norm": 393.72869873046875, "learning_rate": 3.612838424078671e-05, "loss": 38.0402, "step": 104460 }, { "epoch": 0.42207201929564436, "grad_norm": 850.967041015625, "learning_rate": 3.612525837577384e-05, "loss": 66.1431, "step": 104470 }, { "epoch": 0.42211242056101195, "grad_norm": 1090.09814453125, "learning_rate": 3.6122132293868335e-05, "loss": 82.8322, "step": 104480 }, { "epoch": 0.4221528218263796, "grad_norm": 956.2141723632812, "learning_rate": 3.611900599513114e-05, "loss": 57.5093, "step": 104490 }, { "epoch": 0.4221932230917472, "grad_norm": 407.1945495605469, "learning_rate": 3.611587947962319e-05, "loss": 50.0537, "step": 104500 }, { "epoch": 0.42223362435711487, "grad_norm": 486.4323425292969, "learning_rate": 3.6112752747405447e-05, "loss": 52.2107, "step": 104510 }, { "epoch": 0.4222740256224825, "grad_norm": 864.6810913085938, "learning_rate": 3.6109625798538873e-05, "loss": 50.7976, "step": 104520 }, { "epoch": 0.42231442688785015, "grad_norm": 534.1397705078125, "learning_rate": 3.6106498633084424e-05, "loss": 48.8516, "step": 104530 }, { "epoch": 0.42235482815321773, "grad_norm": 636.9712524414062, "learning_rate": 3.610337125110307e-05, "loss": 52.8733, "step": 104540 }, { "epoch": 0.42239522941858537, "grad_norm": 1049.5821533203125, "learning_rate": 3.610024365265577e-05, "loss": 56.6238, "step": 104550 }, { "epoch": 0.422435630683953, "grad_norm": 374.7794189453125, "learning_rate": 3.6097115837803505e-05, "loss": 73.0223, "step": 104560 }, { "epoch": 0.42247603194932065, "grad_norm": 321.7105712890625, "learning_rate": 3.609398780660726e-05, "loss": 51.7502, "step": 104570 }, { "epoch": 0.4225164332146883, "grad_norm": 833.6190185546875, "learning_rate": 3.6090859559128e-05, "loss": 44.2709, "step": 104580 }, { "epoch": 0.42255683448005593, "grad_norm": 465.5598449707031, "learning_rate": 3.6087731095426733e-05, "loss": 54.5164, "step": 104590 }, { "epoch": 0.42259723574542357, "grad_norm": 460.9774169921875, "learning_rate": 3.608460241556443e-05, "loss": 59.3148, "step": 104600 }, { "epoch": 0.42263763701079116, "grad_norm": 390.5638427734375, "learning_rate": 3.6081473519602105e-05, "loss": 70.8456, "step": 104610 }, { "epoch": 0.4226780382761588, "grad_norm": 1563.58642578125, "learning_rate": 3.607834440760074e-05, "loss": 57.9671, "step": 104620 }, { "epoch": 0.42271843954152644, "grad_norm": 1309.08935546875, "learning_rate": 3.607521507962136e-05, "loss": 63.8607, "step": 104630 }, { "epoch": 0.4227588408068941, "grad_norm": 2113.58447265625, "learning_rate": 3.6072085535724956e-05, "loss": 57.3463, "step": 104640 }, { "epoch": 0.4227992420722617, "grad_norm": 1004.9712524414062, "learning_rate": 3.606895577597255e-05, "loss": 67.326, "step": 104650 }, { "epoch": 0.42283964333762936, "grad_norm": 0.0, "learning_rate": 3.606582580042513e-05, "loss": 33.3827, "step": 104660 }, { "epoch": 0.42288004460299694, "grad_norm": 594.0809936523438, "learning_rate": 3.606269560914376e-05, "loss": 48.728, "step": 104670 }, { "epoch": 0.4229204458683646, "grad_norm": 891.5621948242188, "learning_rate": 3.6059565202189435e-05, "loss": 63.9774, "step": 104680 }, { "epoch": 0.4229608471337322, "grad_norm": 622.961669921875, "learning_rate": 3.605643457962319e-05, "loss": 53.7502, "step": 104690 }, { "epoch": 0.42300124839909986, "grad_norm": 899.34716796875, "learning_rate": 3.605330374150607e-05, "loss": 62.4448, "step": 104700 }, { "epoch": 0.4230416496644675, "grad_norm": 698.8828735351562, "learning_rate": 3.60501726878991e-05, "loss": 72.48, "step": 104710 }, { "epoch": 0.42308205092983514, "grad_norm": 591.2288208007812, "learning_rate": 3.604704141886332e-05, "loss": 50.3654, "step": 104720 }, { "epoch": 0.4231224521952028, "grad_norm": 1411.56640625, "learning_rate": 3.6043909934459785e-05, "loss": 68.7279, "step": 104730 }, { "epoch": 0.42316285346057037, "grad_norm": 3371.13134765625, "learning_rate": 3.604077823474954e-05, "loss": 72.1287, "step": 104740 }, { "epoch": 0.423203254725938, "grad_norm": 0.0, "learning_rate": 3.603764631979363e-05, "loss": 61.998, "step": 104750 }, { "epoch": 0.42324365599130565, "grad_norm": 704.408935546875, "learning_rate": 3.603451418965313e-05, "loss": 27.3794, "step": 104760 }, { "epoch": 0.4232840572566733, "grad_norm": 973.9273681640625, "learning_rate": 3.60313818443891e-05, "loss": 87.7188, "step": 104770 }, { "epoch": 0.4233244585220409, "grad_norm": 908.014404296875, "learning_rate": 3.602824928406259e-05, "loss": 51.2298, "step": 104780 }, { "epoch": 0.42336485978740857, "grad_norm": 693.9036254882812, "learning_rate": 3.602511650873469e-05, "loss": 97.5675, "step": 104790 }, { "epoch": 0.42340526105277615, "grad_norm": 1070.6943359375, "learning_rate": 3.602198351846647e-05, "loss": 70.0561, "step": 104800 }, { "epoch": 0.4234456623181438, "grad_norm": 970.8385620117188, "learning_rate": 3.6018850313319e-05, "loss": 71.5289, "step": 104810 }, { "epoch": 0.42348606358351143, "grad_norm": 1016.22021484375, "learning_rate": 3.6015716893353376e-05, "loss": 53.878, "step": 104820 }, { "epoch": 0.42352646484887907, "grad_norm": 532.7132568359375, "learning_rate": 3.601258325863067e-05, "loss": 62.0343, "step": 104830 }, { "epoch": 0.4235668661142467, "grad_norm": 94.07963562011719, "learning_rate": 3.600944940921199e-05, "loss": 67.1461, "step": 104840 }, { "epoch": 0.42360726737961435, "grad_norm": 921.68017578125, "learning_rate": 3.6006315345158434e-05, "loss": 65.9748, "step": 104850 }, { "epoch": 0.42364766864498193, "grad_norm": 1337.1324462890625, "learning_rate": 3.600318106653108e-05, "loss": 60.1088, "step": 104860 }, { "epoch": 0.4236880699103496, "grad_norm": 522.1008911132812, "learning_rate": 3.600004657339105e-05, "loss": 55.0299, "step": 104870 }, { "epoch": 0.4237284711757172, "grad_norm": 452.1679992675781, "learning_rate": 3.5996911865799454e-05, "loss": 42.4841, "step": 104880 }, { "epoch": 0.42376887244108485, "grad_norm": 712.0603637695312, "learning_rate": 3.59937769438174e-05, "loss": 57.7705, "step": 104890 }, { "epoch": 0.4238092737064525, "grad_norm": 486.09613037109375, "learning_rate": 3.5990641807506e-05, "loss": 39.141, "step": 104900 }, { "epoch": 0.42384967497182013, "grad_norm": 1278.7674560546875, "learning_rate": 3.598750645692638e-05, "loss": 59.3348, "step": 104910 }, { "epoch": 0.4238900762371878, "grad_norm": 1631.982666015625, "learning_rate": 3.5984370892139666e-05, "loss": 68.6455, "step": 104920 }, { "epoch": 0.42393047750255536, "grad_norm": 466.52874755859375, "learning_rate": 3.598123511320699e-05, "loss": 37.1398, "step": 104930 }, { "epoch": 0.423970878767923, "grad_norm": 1008.3381958007812, "learning_rate": 3.597809912018947e-05, "loss": 65.8413, "step": 104940 }, { "epoch": 0.42401128003329064, "grad_norm": 738.3510131835938, "learning_rate": 3.597496291314827e-05, "loss": 67.1094, "step": 104950 }, { "epoch": 0.4240516812986583, "grad_norm": 337.72796630859375, "learning_rate": 3.5971826492144504e-05, "loss": 63.2226, "step": 104960 }, { "epoch": 0.4240920825640259, "grad_norm": 295.6142272949219, "learning_rate": 3.5968689857239345e-05, "loss": 65.1638, "step": 104970 }, { "epoch": 0.42413248382939356, "grad_norm": 797.707275390625, "learning_rate": 3.596555300849392e-05, "loss": 77.6298, "step": 104980 }, { "epoch": 0.42417288509476114, "grad_norm": 476.190185546875, "learning_rate": 3.5962415945969405e-05, "loss": 69.4243, "step": 104990 }, { "epoch": 0.4242132863601288, "grad_norm": 412.16644287109375, "learning_rate": 3.5959278669726935e-05, "loss": 40.4091, "step": 105000 }, { "epoch": 0.4242536876254964, "grad_norm": 565.7656860351562, "learning_rate": 3.595614117982769e-05, "loss": 68.7301, "step": 105010 }, { "epoch": 0.42429408889086406, "grad_norm": 733.7787475585938, "learning_rate": 3.5953003476332835e-05, "loss": 54.2594, "step": 105020 }, { "epoch": 0.4243344901562317, "grad_norm": 842.4686279296875, "learning_rate": 3.5949865559303536e-05, "loss": 65.4532, "step": 105030 }, { "epoch": 0.42437489142159934, "grad_norm": 1525.9725341796875, "learning_rate": 3.594672742880097e-05, "loss": 51.1515, "step": 105040 }, { "epoch": 0.424415292686967, "grad_norm": 1515.8177490234375, "learning_rate": 3.594358908488632e-05, "loss": 42.8702, "step": 105050 }, { "epoch": 0.42445569395233457, "grad_norm": 922.44091796875, "learning_rate": 3.594045052762076e-05, "loss": 57.4078, "step": 105060 }, { "epoch": 0.4244960952177022, "grad_norm": 1049.3338623046875, "learning_rate": 3.5937311757065494e-05, "loss": 55.3803, "step": 105070 }, { "epoch": 0.42453649648306985, "grad_norm": 563.2704467773438, "learning_rate": 3.5934172773281696e-05, "loss": 28.5142, "step": 105080 }, { "epoch": 0.4245768977484375, "grad_norm": 655.0813598632812, "learning_rate": 3.593103357633058e-05, "loss": 41.6843, "step": 105090 }, { "epoch": 0.4246172990138051, "grad_norm": 684.439453125, "learning_rate": 3.592789416627332e-05, "loss": 50.5486, "step": 105100 }, { "epoch": 0.42465770027917277, "grad_norm": 765.9920654296875, "learning_rate": 3.592475454317115e-05, "loss": 68.0954, "step": 105110 }, { "epoch": 0.42469810154454035, "grad_norm": 1379.385009765625, "learning_rate": 3.592161470708526e-05, "loss": 65.1052, "step": 105120 }, { "epoch": 0.424738502809908, "grad_norm": 296.92437744140625, "learning_rate": 3.591847465807687e-05, "loss": 46.8323, "step": 105130 }, { "epoch": 0.42477890407527563, "grad_norm": 554.3091430664062, "learning_rate": 3.59153343962072e-05, "loss": 54.8523, "step": 105140 }, { "epoch": 0.42481930534064327, "grad_norm": 529.84521484375, "learning_rate": 3.5912193921537476e-05, "loss": 44.9619, "step": 105150 }, { "epoch": 0.4248597066060109, "grad_norm": 571.56591796875, "learning_rate": 3.5909053234128895e-05, "loss": 44.5782, "step": 105160 }, { "epoch": 0.42490010787137855, "grad_norm": 598.4688110351562, "learning_rate": 3.590591233404271e-05, "loss": 75.1092, "step": 105170 }, { "epoch": 0.42494050913674614, "grad_norm": 1925.43798828125, "learning_rate": 3.590277122134015e-05, "loss": 52.7716, "step": 105180 }, { "epoch": 0.4249809104021138, "grad_norm": 435.9007263183594, "learning_rate": 3.5899629896082454e-05, "loss": 35.1308, "step": 105190 }, { "epoch": 0.4250213116674814, "grad_norm": 492.9730224609375, "learning_rate": 3.5896488358330856e-05, "loss": 47.9135, "step": 105200 }, { "epoch": 0.42506171293284906, "grad_norm": 1603.56591796875, "learning_rate": 3.5893346608146607e-05, "loss": 62.7425, "step": 105210 }, { "epoch": 0.4251021141982167, "grad_norm": 715.2278442382812, "learning_rate": 3.5890204645590964e-05, "loss": 127.7718, "step": 105220 }, { "epoch": 0.42514251546358434, "grad_norm": 572.968017578125, "learning_rate": 3.588706247072518e-05, "loss": 65.1566, "step": 105230 }, { "epoch": 0.425182916728952, "grad_norm": 859.6871948242188, "learning_rate": 3.588392008361049e-05, "loss": 49.4484, "step": 105240 }, { "epoch": 0.42522331799431956, "grad_norm": 1073.6341552734375, "learning_rate": 3.588077748430819e-05, "loss": 65.2147, "step": 105250 }, { "epoch": 0.4252637192596872, "grad_norm": 802.9885864257812, "learning_rate": 3.587763467287953e-05, "loss": 44.3761, "step": 105260 }, { "epoch": 0.42530412052505484, "grad_norm": 1480.44921875, "learning_rate": 3.587449164938578e-05, "loss": 53.1393, "step": 105270 }, { "epoch": 0.4253445217904225, "grad_norm": 544.9711303710938, "learning_rate": 3.5871348413888204e-05, "loss": 64.2403, "step": 105280 }, { "epoch": 0.4253849230557901, "grad_norm": 1001.657958984375, "learning_rate": 3.586820496644811e-05, "loss": 61.8277, "step": 105290 }, { "epoch": 0.42542532432115776, "grad_norm": 1063.820068359375, "learning_rate": 3.586506130712676e-05, "loss": 47.9627, "step": 105300 }, { "epoch": 0.42546572558652535, "grad_norm": 1031.9720458984375, "learning_rate": 3.5861917435985445e-05, "loss": 58.5947, "step": 105310 }, { "epoch": 0.425506126851893, "grad_norm": 376.46630859375, "learning_rate": 3.585877335308546e-05, "loss": 62.4376, "step": 105320 }, { "epoch": 0.4255465281172606, "grad_norm": 1131.775390625, "learning_rate": 3.5855629058488095e-05, "loss": 71.483, "step": 105330 }, { "epoch": 0.42558692938262827, "grad_norm": 687.2857666015625, "learning_rate": 3.585248455225466e-05, "loss": 69.5148, "step": 105340 }, { "epoch": 0.4256273306479959, "grad_norm": 473.1686706542969, "learning_rate": 3.584933983444644e-05, "loss": 89.6963, "step": 105350 }, { "epoch": 0.42566773191336355, "grad_norm": 742.19873046875, "learning_rate": 3.5846194905124757e-05, "loss": 67.2487, "step": 105360 }, { "epoch": 0.4257081331787312, "grad_norm": 369.0582275390625, "learning_rate": 3.584304976435092e-05, "loss": 84.1498, "step": 105370 }, { "epoch": 0.42574853444409877, "grad_norm": 409.1021728515625, "learning_rate": 3.5839904412186256e-05, "loss": 63.912, "step": 105380 }, { "epoch": 0.4257889357094664, "grad_norm": 777.0023193359375, "learning_rate": 3.583675884869206e-05, "loss": 52.3509, "step": 105390 }, { "epoch": 0.42582933697483405, "grad_norm": 345.2575988769531, "learning_rate": 3.5833613073929684e-05, "loss": 37.5077, "step": 105400 }, { "epoch": 0.4258697382402017, "grad_norm": 1329.019287109375, "learning_rate": 3.583046708796043e-05, "loss": 46.7334, "step": 105410 }, { "epoch": 0.42591013950556933, "grad_norm": 574.7626953125, "learning_rate": 3.582732089084566e-05, "loss": 49.3083, "step": 105420 }, { "epoch": 0.42595054077093697, "grad_norm": 0.0, "learning_rate": 3.582417448264669e-05, "loss": 53.3416, "step": 105430 }, { "epoch": 0.42599094203630455, "grad_norm": 1714.5047607421875, "learning_rate": 3.582102786342485e-05, "loss": 67.8292, "step": 105440 }, { "epoch": 0.4260313433016722, "grad_norm": 1370.195068359375, "learning_rate": 3.581788103324152e-05, "loss": 58.7996, "step": 105450 }, { "epoch": 0.42607174456703983, "grad_norm": 1272.4110107421875, "learning_rate": 3.581473399215802e-05, "loss": 70.6126, "step": 105460 }, { "epoch": 0.4261121458324075, "grad_norm": 662.2067260742188, "learning_rate": 3.581158674023572e-05, "loss": 51.143, "step": 105470 }, { "epoch": 0.4261525470977751, "grad_norm": 803.9454956054688, "learning_rate": 3.5808439277535964e-05, "loss": 38.5995, "step": 105480 }, { "epoch": 0.42619294836314275, "grad_norm": 507.4480285644531, "learning_rate": 3.580529160412013e-05, "loss": 56.6335, "step": 105490 }, { "epoch": 0.42623334962851034, "grad_norm": 673.6394653320312, "learning_rate": 3.580214372004956e-05, "loss": 71.4622, "step": 105500 }, { "epoch": 0.426273750893878, "grad_norm": 506.5180969238281, "learning_rate": 3.579899562538564e-05, "loss": 34.8514, "step": 105510 }, { "epoch": 0.4263141521592456, "grad_norm": 912.122802734375, "learning_rate": 3.5795847320189746e-05, "loss": 57.6717, "step": 105520 }, { "epoch": 0.42635455342461326, "grad_norm": 888.0779418945312, "learning_rate": 3.5792698804523245e-05, "loss": 55.5145, "step": 105530 }, { "epoch": 0.4263949546899809, "grad_norm": 670.391845703125, "learning_rate": 3.5789550078447526e-05, "loss": 78.2021, "step": 105540 }, { "epoch": 0.42643535595534854, "grad_norm": 418.4927673339844, "learning_rate": 3.5786401142023975e-05, "loss": 73.2309, "step": 105550 }, { "epoch": 0.4264757572207162, "grad_norm": 579.839599609375, "learning_rate": 3.5783251995313985e-05, "loss": 49.0248, "step": 105560 }, { "epoch": 0.42651615848608376, "grad_norm": 1221.287841796875, "learning_rate": 3.5780102638378936e-05, "loss": 48.0422, "step": 105570 }, { "epoch": 0.4265565597514514, "grad_norm": 576.9136962890625, "learning_rate": 3.577695307128024e-05, "loss": 48.1065, "step": 105580 }, { "epoch": 0.42659696101681904, "grad_norm": 682.1599731445312, "learning_rate": 3.57738032940793e-05, "loss": 80.5689, "step": 105590 }, { "epoch": 0.4266373622821867, "grad_norm": 952.7507934570312, "learning_rate": 3.577065330683751e-05, "loss": 59.2839, "step": 105600 }, { "epoch": 0.4266777635475543, "grad_norm": 755.3192749023438, "learning_rate": 3.5767503109616296e-05, "loss": 63.4179, "step": 105610 }, { "epoch": 0.42671816481292196, "grad_norm": 528.3074340820312, "learning_rate": 3.576435270247706e-05, "loss": 49.0063, "step": 105620 }, { "epoch": 0.42675856607828955, "grad_norm": 1181.072509765625, "learning_rate": 3.5761202085481235e-05, "loss": 53.1849, "step": 105630 }, { "epoch": 0.4267989673436572, "grad_norm": 825.5591430664062, "learning_rate": 3.575805125869022e-05, "loss": 54.1307, "step": 105640 }, { "epoch": 0.4268393686090248, "grad_norm": 1657.726806640625, "learning_rate": 3.5754900222165465e-05, "loss": 84.8316, "step": 105650 }, { "epoch": 0.42687976987439247, "grad_norm": 707.2351684570312, "learning_rate": 3.5751748975968394e-05, "loss": 71.4824, "step": 105660 }, { "epoch": 0.4269201711397601, "grad_norm": 514.521484375, "learning_rate": 3.574859752016045e-05, "loss": 60.5219, "step": 105670 }, { "epoch": 0.42696057240512775, "grad_norm": 661.9301147460938, "learning_rate": 3.574544585480305e-05, "loss": 45.8945, "step": 105680 }, { "epoch": 0.4270009736704954, "grad_norm": 416.9234924316406, "learning_rate": 3.574229397995765e-05, "loss": 63.908, "step": 105690 }, { "epoch": 0.42704137493586297, "grad_norm": 835.8658447265625, "learning_rate": 3.573914189568571e-05, "loss": 77.679, "step": 105700 }, { "epoch": 0.4270817762012306, "grad_norm": 1938.5263671875, "learning_rate": 3.5735989602048665e-05, "loss": 62.1259, "step": 105710 }, { "epoch": 0.42712217746659825, "grad_norm": 825.2261962890625, "learning_rate": 3.573283709910798e-05, "loss": 50.9862, "step": 105720 }, { "epoch": 0.4271625787319659, "grad_norm": 1108.54345703125, "learning_rate": 3.572968438692509e-05, "loss": 75.7806, "step": 105730 }, { "epoch": 0.42720297999733353, "grad_norm": 338.0368347167969, "learning_rate": 3.5726531465561504e-05, "loss": 40.806, "step": 105740 }, { "epoch": 0.42724338126270117, "grad_norm": 876.4752197265625, "learning_rate": 3.572337833507865e-05, "loss": 63.9452, "step": 105750 }, { "epoch": 0.42728378252806876, "grad_norm": 429.4320983886719, "learning_rate": 3.572022499553802e-05, "loss": 79.8431, "step": 105760 }, { "epoch": 0.4273241837934364, "grad_norm": 749.8831787109375, "learning_rate": 3.5717071447001083e-05, "loss": 54.6913, "step": 105770 }, { "epoch": 0.42736458505880404, "grad_norm": 902.8178100585938, "learning_rate": 3.571391768952932e-05, "loss": 45.6217, "step": 105780 }, { "epoch": 0.4274049863241717, "grad_norm": 1094.285400390625, "learning_rate": 3.571076372318422e-05, "loss": 76.6032, "step": 105790 }, { "epoch": 0.4274453875895393, "grad_norm": 1502.871337890625, "learning_rate": 3.570760954802726e-05, "loss": 80.5316, "step": 105800 }, { "epoch": 0.42748578885490696, "grad_norm": 572.2935180664062, "learning_rate": 3.5704455164119945e-05, "loss": 64.48, "step": 105810 }, { "epoch": 0.42752619012027454, "grad_norm": 742.1455688476562, "learning_rate": 3.5701300571523755e-05, "loss": 82.0077, "step": 105820 }, { "epoch": 0.4275665913856422, "grad_norm": 520.0400390625, "learning_rate": 3.569814577030022e-05, "loss": 74.5518, "step": 105830 }, { "epoch": 0.4276069926510098, "grad_norm": 967.4630126953125, "learning_rate": 3.569499076051081e-05, "loss": 51.6548, "step": 105840 }, { "epoch": 0.42764739391637746, "grad_norm": 1097.524658203125, "learning_rate": 3.5691835542217054e-05, "loss": 66.9995, "step": 105850 }, { "epoch": 0.4276877951817451, "grad_norm": 1097.96533203125, "learning_rate": 3.5688680115480455e-05, "loss": 55.4627, "step": 105860 }, { "epoch": 0.42772819644711274, "grad_norm": 936.7929077148438, "learning_rate": 3.5685524480362543e-05, "loss": 79.1348, "step": 105870 }, { "epoch": 0.4277685977124804, "grad_norm": 427.792724609375, "learning_rate": 3.568236863692482e-05, "loss": 57.1244, "step": 105880 }, { "epoch": 0.42780899897784797, "grad_norm": 430.7917785644531, "learning_rate": 3.567921258522883e-05, "loss": 45.7751, "step": 105890 }, { "epoch": 0.4278494002432156, "grad_norm": 1268.4412841796875, "learning_rate": 3.567605632533608e-05, "loss": 51.2699, "step": 105900 }, { "epoch": 0.42788980150858325, "grad_norm": 156.56045532226562, "learning_rate": 3.5672899857308134e-05, "loss": 58.0844, "step": 105910 }, { "epoch": 0.4279302027739509, "grad_norm": 829.1176147460938, "learning_rate": 3.56697431812065e-05, "loss": 55.2921, "step": 105920 }, { "epoch": 0.4279706040393185, "grad_norm": 610.2461547851562, "learning_rate": 3.566658629709273e-05, "loss": 50.3534, "step": 105930 }, { "epoch": 0.42801100530468617, "grad_norm": 1424.289306640625, "learning_rate": 3.566342920502837e-05, "loss": 63.0704, "step": 105940 }, { "epoch": 0.42805140657005375, "grad_norm": 3496.262939453125, "learning_rate": 3.5660271905074974e-05, "loss": 59.1756, "step": 105950 }, { "epoch": 0.4280918078354214, "grad_norm": 413.6312561035156, "learning_rate": 3.565711439729408e-05, "loss": 55.2588, "step": 105960 }, { "epoch": 0.42813220910078903, "grad_norm": 632.3339233398438, "learning_rate": 3.565395668174725e-05, "loss": 69.6535, "step": 105970 }, { "epoch": 0.42817261036615667, "grad_norm": 1112.0599365234375, "learning_rate": 3.565079875849605e-05, "loss": 51.4274, "step": 105980 }, { "epoch": 0.4282130116315243, "grad_norm": 127.4210433959961, "learning_rate": 3.564764062760205e-05, "loss": 64.7097, "step": 105990 }, { "epoch": 0.42825341289689195, "grad_norm": 1343.2515869140625, "learning_rate": 3.564448228912682e-05, "loss": 50.9704, "step": 106000 }, { "epoch": 0.4282938141622596, "grad_norm": 794.98779296875, "learning_rate": 3.564132374313192e-05, "loss": 38.4331, "step": 106010 }, { "epoch": 0.4283342154276272, "grad_norm": 195.28814697265625, "learning_rate": 3.5638164989678935e-05, "loss": 61.8604, "step": 106020 }, { "epoch": 0.4283746166929948, "grad_norm": 1086.4222412109375, "learning_rate": 3.563500602882945e-05, "loss": 65.9661, "step": 106030 }, { "epoch": 0.42841501795836245, "grad_norm": 737.5653076171875, "learning_rate": 3.5631846860645044e-05, "loss": 55.4186, "step": 106040 }, { "epoch": 0.4284554192237301, "grad_norm": 864.1533203125, "learning_rate": 3.562868748518732e-05, "loss": 82.7763, "step": 106050 }, { "epoch": 0.42849582048909773, "grad_norm": 1645.748046875, "learning_rate": 3.562552790251785e-05, "loss": 70.2854, "step": 106060 }, { "epoch": 0.4285362217544654, "grad_norm": 754.142578125, "learning_rate": 3.562236811269824e-05, "loss": 55.0108, "step": 106070 }, { "epoch": 0.42857662301983296, "grad_norm": 408.4161376953125, "learning_rate": 3.56192081157901e-05, "loss": 69.7249, "step": 106080 }, { "epoch": 0.4286170242852006, "grad_norm": 308.8442077636719, "learning_rate": 3.561604791185503e-05, "loss": 54.3556, "step": 106090 }, { "epoch": 0.42865742555056824, "grad_norm": 1148.19677734375, "learning_rate": 3.561288750095465e-05, "loss": 57.7566, "step": 106100 }, { "epoch": 0.4286978268159359, "grad_norm": 589.8562622070312, "learning_rate": 3.560972688315055e-05, "loss": 65.197, "step": 106110 }, { "epoch": 0.4287382280813035, "grad_norm": 1979.934326171875, "learning_rate": 3.5606566058504375e-05, "loss": 67.4663, "step": 106120 }, { "epoch": 0.42877862934667116, "grad_norm": 638.1552124023438, "learning_rate": 3.560340502707773e-05, "loss": 40.0643, "step": 106130 }, { "epoch": 0.42881903061203874, "grad_norm": 1344.94580078125, "learning_rate": 3.560024378893224e-05, "loss": 71.7906, "step": 106140 }, { "epoch": 0.4288594318774064, "grad_norm": 541.87353515625, "learning_rate": 3.559708234412954e-05, "loss": 89.126, "step": 106150 }, { "epoch": 0.428899833142774, "grad_norm": 1129.01708984375, "learning_rate": 3.559392069273127e-05, "loss": 74.1695, "step": 106160 }, { "epoch": 0.42894023440814166, "grad_norm": 741.240234375, "learning_rate": 3.559075883479906e-05, "loss": 77.3889, "step": 106170 }, { "epoch": 0.4289806356735093, "grad_norm": 373.0411682128906, "learning_rate": 3.558759677039455e-05, "loss": 85.0204, "step": 106180 }, { "epoch": 0.42902103693887694, "grad_norm": 705.1157836914062, "learning_rate": 3.558443449957939e-05, "loss": 77.1509, "step": 106190 }, { "epoch": 0.4290614382042446, "grad_norm": 565.6890258789062, "learning_rate": 3.5581272022415244e-05, "loss": 62.9252, "step": 106200 }, { "epoch": 0.42910183946961217, "grad_norm": 2649.929443359375, "learning_rate": 3.5578109338963736e-05, "loss": 79.2853, "step": 106210 }, { "epoch": 0.4291422407349798, "grad_norm": 645.3712768554688, "learning_rate": 3.557494644928654e-05, "loss": 52.6527, "step": 106220 }, { "epoch": 0.42918264200034745, "grad_norm": 446.864013671875, "learning_rate": 3.5571783353445325e-05, "loss": 54.5988, "step": 106230 }, { "epoch": 0.4292230432657151, "grad_norm": 1004.2807006835938, "learning_rate": 3.5568620051501756e-05, "loss": 67.0789, "step": 106240 }, { "epoch": 0.4292634445310827, "grad_norm": 611.4927368164062, "learning_rate": 3.556545654351749e-05, "loss": 38.8004, "step": 106250 }, { "epoch": 0.42930384579645037, "grad_norm": 868.6104125976562, "learning_rate": 3.556229282955421e-05, "loss": 44.7674, "step": 106260 }, { "epoch": 0.42934424706181795, "grad_norm": 1725.8865966796875, "learning_rate": 3.5559128909673595e-05, "loss": 71.4952, "step": 106270 }, { "epoch": 0.4293846483271856, "grad_norm": 654.6998291015625, "learning_rate": 3.555596478393733e-05, "loss": 48.7473, "step": 106280 }, { "epoch": 0.42942504959255323, "grad_norm": 1723.461669921875, "learning_rate": 3.555280045240709e-05, "loss": 62.5021, "step": 106290 }, { "epoch": 0.42946545085792087, "grad_norm": 989.3924560546875, "learning_rate": 3.554963591514457e-05, "loss": 64.8296, "step": 106300 }, { "epoch": 0.4295058521232885, "grad_norm": 767.1257934570312, "learning_rate": 3.554647117221147e-05, "loss": 58.58, "step": 106310 }, { "epoch": 0.42954625338865615, "grad_norm": 958.0810546875, "learning_rate": 3.554330622366949e-05, "loss": 82.886, "step": 106320 }, { "epoch": 0.42958665465402374, "grad_norm": 1372.3026123046875, "learning_rate": 3.554014106958032e-05, "loss": 91.1392, "step": 106330 }, { "epoch": 0.4296270559193914, "grad_norm": 765.9713745117188, "learning_rate": 3.5536975710005677e-05, "loss": 43.6938, "step": 106340 }, { "epoch": 0.429667457184759, "grad_norm": 2553.733642578125, "learning_rate": 3.553381014500727e-05, "loss": 54.1921, "step": 106350 }, { "epoch": 0.42970785845012666, "grad_norm": 1080.475341796875, "learning_rate": 3.5530644374646815e-05, "loss": 62.0937, "step": 106360 }, { "epoch": 0.4297482597154943, "grad_norm": 2564.04638671875, "learning_rate": 3.5527478398986015e-05, "loss": 63.5857, "step": 106370 }, { "epoch": 0.42978866098086194, "grad_norm": 299.32989501953125, "learning_rate": 3.552431221808661e-05, "loss": 44.5124, "step": 106380 }, { "epoch": 0.4298290622462296, "grad_norm": 631.6810302734375, "learning_rate": 3.5521145832010314e-05, "loss": 49.5127, "step": 106390 }, { "epoch": 0.42986946351159716, "grad_norm": 1121.8922119140625, "learning_rate": 3.551797924081887e-05, "loss": 58.7079, "step": 106400 }, { "epoch": 0.4299098647769648, "grad_norm": 657.3575439453125, "learning_rate": 3.5514812444574004e-05, "loss": 50.7894, "step": 106410 }, { "epoch": 0.42995026604233244, "grad_norm": 662.6183471679688, "learning_rate": 3.551164544333745e-05, "loss": 79.3807, "step": 106420 }, { "epoch": 0.4299906673077001, "grad_norm": 593.0071411132812, "learning_rate": 3.550847823717096e-05, "loss": 65.9797, "step": 106430 }, { "epoch": 0.4300310685730677, "grad_norm": 755.3797607421875, "learning_rate": 3.5505310826136286e-05, "loss": 51.0179, "step": 106440 }, { "epoch": 0.43007146983843536, "grad_norm": 1839.5137939453125, "learning_rate": 3.5502143210295165e-05, "loss": 62.1264, "step": 106450 }, { "epoch": 0.43011187110380295, "grad_norm": 722.4067993164062, "learning_rate": 3.549897538970934e-05, "loss": 63.9707, "step": 106460 }, { "epoch": 0.4301522723691706, "grad_norm": 465.209716796875, "learning_rate": 3.54958073644406e-05, "loss": 51.9835, "step": 106470 }, { "epoch": 0.4301926736345382, "grad_norm": 348.4991760253906, "learning_rate": 3.5492639134550695e-05, "loss": 84.2273, "step": 106480 }, { "epoch": 0.43023307489990587, "grad_norm": 1531.664794921875, "learning_rate": 3.548947070010138e-05, "loss": 81.6072, "step": 106490 }, { "epoch": 0.4302734761652735, "grad_norm": 797.4585571289062, "learning_rate": 3.548630206115443e-05, "loss": 56.6595, "step": 106500 }, { "epoch": 0.43031387743064115, "grad_norm": 412.25360107421875, "learning_rate": 3.5483133217771625e-05, "loss": 57.9216, "step": 106510 }, { "epoch": 0.4303542786960088, "grad_norm": 647.94921875, "learning_rate": 3.5479964170014746e-05, "loss": 99.7013, "step": 106520 }, { "epoch": 0.43039467996137637, "grad_norm": 649.0404052734375, "learning_rate": 3.547679491794557e-05, "loss": 46.1641, "step": 106530 }, { "epoch": 0.430435081226744, "grad_norm": 434.8841552734375, "learning_rate": 3.547362546162588e-05, "loss": 61.1318, "step": 106540 }, { "epoch": 0.43047548249211165, "grad_norm": 733.9017333984375, "learning_rate": 3.547045580111746e-05, "loss": 49.06, "step": 106550 }, { "epoch": 0.4305158837574793, "grad_norm": 721.9252319335938, "learning_rate": 3.546728593648213e-05, "loss": 61.6053, "step": 106560 }, { "epoch": 0.43055628502284693, "grad_norm": 553.4278564453125, "learning_rate": 3.546411586778167e-05, "loss": 59.98, "step": 106570 }, { "epoch": 0.43059668628821457, "grad_norm": 410.0889892578125, "learning_rate": 3.546094559507787e-05, "loss": 44.391, "step": 106580 }, { "epoch": 0.43063708755358215, "grad_norm": 429.70361328125, "learning_rate": 3.5457775118432556e-05, "loss": 90.1694, "step": 106590 }, { "epoch": 0.4306774888189498, "grad_norm": 899.5925903320312, "learning_rate": 3.545460443790753e-05, "loss": 79.3271, "step": 106600 }, { "epoch": 0.43071789008431743, "grad_norm": 1410.063232421875, "learning_rate": 3.545143355356462e-05, "loss": 75.9909, "step": 106610 }, { "epoch": 0.4307582913496851, "grad_norm": 1006.2098999023438, "learning_rate": 3.544826246546563e-05, "loss": 61.4801, "step": 106620 }, { "epoch": 0.4307986926150527, "grad_norm": 618.147216796875, "learning_rate": 3.544509117367238e-05, "loss": 56.4724, "step": 106630 }, { "epoch": 0.43083909388042035, "grad_norm": 866.3955078125, "learning_rate": 3.544191967824669e-05, "loss": 47.1831, "step": 106640 }, { "epoch": 0.43087949514578794, "grad_norm": 802.1903686523438, "learning_rate": 3.543874797925042e-05, "loss": 62.7335, "step": 106650 }, { "epoch": 0.4309198964111556, "grad_norm": 722.4039306640625, "learning_rate": 3.543557607674537e-05, "loss": 57.5358, "step": 106660 }, { "epoch": 0.4309602976765232, "grad_norm": 467.41937255859375, "learning_rate": 3.543240397079339e-05, "loss": 42.7253, "step": 106670 }, { "epoch": 0.43100069894189086, "grad_norm": 421.191162109375, "learning_rate": 3.542923166145633e-05, "loss": 60.0862, "step": 106680 }, { "epoch": 0.4310411002072585, "grad_norm": 797.8417358398438, "learning_rate": 3.542605914879603e-05, "loss": 69.1785, "step": 106690 }, { "epoch": 0.43108150147262614, "grad_norm": 1393.1776123046875, "learning_rate": 3.542288643287434e-05, "loss": 42.2223, "step": 106700 }, { "epoch": 0.4311219027379938, "grad_norm": 699.2664184570312, "learning_rate": 3.5419713513753114e-05, "loss": 71.7247, "step": 106710 }, { "epoch": 0.43116230400336136, "grad_norm": 1001.225830078125, "learning_rate": 3.54165403914942e-05, "loss": 54.1351, "step": 106720 }, { "epoch": 0.431202705268729, "grad_norm": 587.420166015625, "learning_rate": 3.541336706615947e-05, "loss": 62.8683, "step": 106730 }, { "epoch": 0.43124310653409664, "grad_norm": 481.1246643066406, "learning_rate": 3.541019353781079e-05, "loss": 48.2253, "step": 106740 }, { "epoch": 0.4312835077994643, "grad_norm": 694.5806274414062, "learning_rate": 3.540701980651003e-05, "loss": 50.7718, "step": 106750 }, { "epoch": 0.4313239090648319, "grad_norm": 1048.5947265625, "learning_rate": 3.540384587231906e-05, "loss": 78.4464, "step": 106760 }, { "epoch": 0.43136431033019956, "grad_norm": 372.85406494140625, "learning_rate": 3.540067173529976e-05, "loss": 48.4107, "step": 106770 }, { "epoch": 0.43140471159556715, "grad_norm": 2308.911865234375, "learning_rate": 3.5397497395514004e-05, "loss": 73.4827, "step": 106780 }, { "epoch": 0.4314451128609348, "grad_norm": 183.2915802001953, "learning_rate": 3.5394322853023694e-05, "loss": 35.4445, "step": 106790 }, { "epoch": 0.4314855141263024, "grad_norm": 4619.44140625, "learning_rate": 3.53911481078907e-05, "loss": 50.2226, "step": 106800 }, { "epoch": 0.43152591539167007, "grad_norm": 873.1148071289062, "learning_rate": 3.5387973160176926e-05, "loss": 51.0404, "step": 106810 }, { "epoch": 0.4315663166570377, "grad_norm": 480.3205871582031, "learning_rate": 3.538479800994426e-05, "loss": 41.806, "step": 106820 }, { "epoch": 0.43160671792240535, "grad_norm": 560.2127685546875, "learning_rate": 3.538162265725462e-05, "loss": 34.7096, "step": 106830 }, { "epoch": 0.431647119187773, "grad_norm": 800.5224609375, "learning_rate": 3.5378447102169895e-05, "loss": 83.1514, "step": 106840 }, { "epoch": 0.43168752045314057, "grad_norm": 403.2615661621094, "learning_rate": 3.537527134475201e-05, "loss": 81.7792, "step": 106850 }, { "epoch": 0.4317279217185082, "grad_norm": 613.8062744140625, "learning_rate": 3.537209538506286e-05, "loss": 51.7972, "step": 106860 }, { "epoch": 0.43176832298387585, "grad_norm": 794.0146484375, "learning_rate": 3.5368919223164374e-05, "loss": 78.0091, "step": 106870 }, { "epoch": 0.4318087242492435, "grad_norm": 877.7191162109375, "learning_rate": 3.536574285911847e-05, "loss": 47.9676, "step": 106880 }, { "epoch": 0.43184912551461113, "grad_norm": 1015.1839599609375, "learning_rate": 3.5362566292987076e-05, "loss": 56.4783, "step": 106890 }, { "epoch": 0.43188952677997877, "grad_norm": 519.8533935546875, "learning_rate": 3.535938952483211e-05, "loss": 53.7836, "step": 106900 }, { "epoch": 0.43192992804534636, "grad_norm": 356.93109130859375, "learning_rate": 3.5356212554715506e-05, "loss": 63.2428, "step": 106910 }, { "epoch": 0.431970329310714, "grad_norm": 694.5162963867188, "learning_rate": 3.535303538269922e-05, "loss": 33.9129, "step": 106920 }, { "epoch": 0.43201073057608164, "grad_norm": 796.4173583984375, "learning_rate": 3.534985800884517e-05, "loss": 58.3042, "step": 106930 }, { "epoch": 0.4320511318414493, "grad_norm": 0.0, "learning_rate": 3.5346680433215316e-05, "loss": 38.0315, "step": 106940 }, { "epoch": 0.4320915331068169, "grad_norm": 570.5447387695312, "learning_rate": 3.5343502655871594e-05, "loss": 58.1119, "step": 106950 }, { "epoch": 0.43213193437218456, "grad_norm": 2234.41162109375, "learning_rate": 3.534032467687597e-05, "loss": 50.3924, "step": 106960 }, { "epoch": 0.43217233563755214, "grad_norm": 470.1455993652344, "learning_rate": 3.533714649629039e-05, "loss": 51.558, "step": 106970 }, { "epoch": 0.4322127369029198, "grad_norm": 641.6006469726562, "learning_rate": 3.533396811417682e-05, "loss": 46.6653, "step": 106980 }, { "epoch": 0.4322531381682874, "grad_norm": 1771.727294921875, "learning_rate": 3.533078953059721e-05, "loss": 42.6348, "step": 106990 }, { "epoch": 0.43229353943365506, "grad_norm": 885.59765625, "learning_rate": 3.532761074561355e-05, "loss": 58.1762, "step": 107000 }, { "epoch": 0.4323339406990227, "grad_norm": 618.5126953125, "learning_rate": 3.5324431759287796e-05, "loss": 43.6578, "step": 107010 }, { "epoch": 0.43237434196439034, "grad_norm": 740.6704711914062, "learning_rate": 3.532125257168193e-05, "loss": 49.2011, "step": 107020 }, { "epoch": 0.432414743229758, "grad_norm": 713.0264892578125, "learning_rate": 3.531807318285793e-05, "loss": 67.6269, "step": 107030 }, { "epoch": 0.43245514449512557, "grad_norm": 793.6551513671875, "learning_rate": 3.531489359287779e-05, "loss": 78.4188, "step": 107040 }, { "epoch": 0.4324955457604932, "grad_norm": 1205.0350341796875, "learning_rate": 3.531171380180348e-05, "loss": 77.0156, "step": 107050 }, { "epoch": 0.43253594702586085, "grad_norm": 1664.7984619140625, "learning_rate": 3.530853380969701e-05, "loss": 51.596, "step": 107060 }, { "epoch": 0.4325763482912285, "grad_norm": 353.55523681640625, "learning_rate": 3.5305353616620355e-05, "loss": 36.4841, "step": 107070 }, { "epoch": 0.4326167495565961, "grad_norm": 607.7645263671875, "learning_rate": 3.5302173222635524e-05, "loss": 56.0603, "step": 107080 }, { "epoch": 0.43265715082196377, "grad_norm": 688.8688354492188, "learning_rate": 3.529899262780453e-05, "loss": 60.969, "step": 107090 }, { "epoch": 0.43269755208733135, "grad_norm": 431.1716003417969, "learning_rate": 3.529581183218937e-05, "loss": 43.1526, "step": 107100 }, { "epoch": 0.432737953352699, "grad_norm": 1323.6634521484375, "learning_rate": 3.529263083585206e-05, "loss": 64.5691, "step": 107110 }, { "epoch": 0.43277835461806663, "grad_norm": 650.7156372070312, "learning_rate": 3.528944963885461e-05, "loss": 24.5781, "step": 107120 }, { "epoch": 0.43281875588343427, "grad_norm": 1326.6907958984375, "learning_rate": 3.528626824125905e-05, "loss": 55.9819, "step": 107130 }, { "epoch": 0.4328591571488019, "grad_norm": 727.043701171875, "learning_rate": 3.528308664312739e-05, "loss": 44.3257, "step": 107140 }, { "epoch": 0.43289955841416955, "grad_norm": 719.6072998046875, "learning_rate": 3.527990484452166e-05, "loss": 46.9241, "step": 107150 }, { "epoch": 0.4329399596795372, "grad_norm": 1634.4859619140625, "learning_rate": 3.527672284550389e-05, "loss": 87.1335, "step": 107160 }, { "epoch": 0.4329803609449048, "grad_norm": 281.7586975097656, "learning_rate": 3.527354064613612e-05, "loss": 70.6858, "step": 107170 }, { "epoch": 0.4330207622102724, "grad_norm": 1065.198974609375, "learning_rate": 3.5270358246480386e-05, "loss": 79.6831, "step": 107180 }, { "epoch": 0.43306116347564005, "grad_norm": 263.0101623535156, "learning_rate": 3.526717564659873e-05, "loss": 61.8488, "step": 107190 }, { "epoch": 0.4331015647410077, "grad_norm": 918.0792236328125, "learning_rate": 3.52639928465532e-05, "loss": 56.7985, "step": 107200 }, { "epoch": 0.43314196600637533, "grad_norm": 454.3590393066406, "learning_rate": 3.526080984640585e-05, "loss": 59.4921, "step": 107210 }, { "epoch": 0.433182367271743, "grad_norm": 332.873291015625, "learning_rate": 3.525762664621872e-05, "loss": 34.6081, "step": 107220 }, { "epoch": 0.43322276853711056, "grad_norm": 1424.8919677734375, "learning_rate": 3.5254443246053886e-05, "loss": 54.6382, "step": 107230 }, { "epoch": 0.4332631698024782, "grad_norm": 1493.1998291015625, "learning_rate": 3.5251259645973394e-05, "loss": 77.3529, "step": 107240 }, { "epoch": 0.43330357106784584, "grad_norm": 310.91943359375, "learning_rate": 3.524807584603932e-05, "loss": 58.498, "step": 107250 }, { "epoch": 0.4333439723332135, "grad_norm": 713.1577758789062, "learning_rate": 3.5244891846313736e-05, "loss": 48.7548, "step": 107260 }, { "epoch": 0.4333843735985811, "grad_norm": 746.4721069335938, "learning_rate": 3.5241707646858703e-05, "loss": 67.363, "step": 107270 }, { "epoch": 0.43342477486394876, "grad_norm": 1737.3177490234375, "learning_rate": 3.523852324773631e-05, "loss": 58.3859, "step": 107280 }, { "epoch": 0.43346517612931634, "grad_norm": 240.0654754638672, "learning_rate": 3.523533864900863e-05, "loss": 89.2563, "step": 107290 }, { "epoch": 0.433505577394684, "grad_norm": 697.5162353515625, "learning_rate": 3.523215385073777e-05, "loss": 66.2658, "step": 107300 }, { "epoch": 0.4335459786600516, "grad_norm": 527.6311645507812, "learning_rate": 3.52289688529858e-05, "loss": 39.8144, "step": 107310 }, { "epoch": 0.43358637992541926, "grad_norm": 1482.4761962890625, "learning_rate": 3.5225783655814796e-05, "loss": 56.7785, "step": 107320 }, { "epoch": 0.4336267811907869, "grad_norm": 1850.587646484375, "learning_rate": 3.522259825928689e-05, "loss": 69.2113, "step": 107330 }, { "epoch": 0.43366718245615454, "grad_norm": 821.7341918945312, "learning_rate": 3.5219412663464167e-05, "loss": 85.9736, "step": 107340 }, { "epoch": 0.4337075837215222, "grad_norm": 1092.7030029296875, "learning_rate": 3.521622686840873e-05, "loss": 74.6831, "step": 107350 }, { "epoch": 0.43374798498688977, "grad_norm": 633.818115234375, "learning_rate": 3.521304087418269e-05, "loss": 64.548, "step": 107360 }, { "epoch": 0.4337883862522574, "grad_norm": 994.1685791015625, "learning_rate": 3.520985468084816e-05, "loss": 31.1137, "step": 107370 }, { "epoch": 0.43382878751762505, "grad_norm": 887.774169921875, "learning_rate": 3.520666828846726e-05, "loss": 52.128, "step": 107380 }, { "epoch": 0.4338691887829927, "grad_norm": 478.0501708984375, "learning_rate": 3.52034816971021e-05, "loss": 60.6667, "step": 107390 }, { "epoch": 0.4339095900483603, "grad_norm": 1626.81884765625, "learning_rate": 3.5200294906814824e-05, "loss": 108.8241, "step": 107400 }, { "epoch": 0.43394999131372797, "grad_norm": 482.78741455078125, "learning_rate": 3.519710791766754e-05, "loss": 57.1852, "step": 107410 }, { "epoch": 0.43399039257909555, "grad_norm": 536.291259765625, "learning_rate": 3.5193920729722384e-05, "loss": 55.354, "step": 107420 }, { "epoch": 0.4340307938444632, "grad_norm": 395.05181884765625, "learning_rate": 3.51907333430415e-05, "loss": 51.6174, "step": 107430 }, { "epoch": 0.43407119510983083, "grad_norm": 504.0698547363281, "learning_rate": 3.5187545757687015e-05, "loss": 48.5033, "step": 107440 }, { "epoch": 0.43411159637519847, "grad_norm": 938.1822509765625, "learning_rate": 3.518435797372109e-05, "loss": 77.9516, "step": 107450 }, { "epoch": 0.4341519976405661, "grad_norm": 542.053955078125, "learning_rate": 3.5181169991205866e-05, "loss": 50.1242, "step": 107460 }, { "epoch": 0.43419239890593375, "grad_norm": 945.6251831054688, "learning_rate": 3.517798181020348e-05, "loss": 82.4342, "step": 107470 }, { "epoch": 0.4342328001713014, "grad_norm": 841.2509765625, "learning_rate": 3.517479343077611e-05, "loss": 48.6936, "step": 107480 }, { "epoch": 0.434273201436669, "grad_norm": 1135.741943359375, "learning_rate": 3.517160485298589e-05, "loss": 57.5404, "step": 107490 }, { "epoch": 0.4343136027020366, "grad_norm": 634.0276489257812, "learning_rate": 3.516841607689501e-05, "loss": 57.2011, "step": 107500 }, { "epoch": 0.43435400396740426, "grad_norm": 684.9427490234375, "learning_rate": 3.516522710256562e-05, "loss": 63.644, "step": 107510 }, { "epoch": 0.4343944052327719, "grad_norm": 707.3285522460938, "learning_rate": 3.516203793005989e-05, "loss": 44.6688, "step": 107520 }, { "epoch": 0.43443480649813954, "grad_norm": 765.8327026367188, "learning_rate": 3.515884855944e-05, "loss": 46.0717, "step": 107530 }, { "epoch": 0.4344752077635072, "grad_norm": 708.8120727539062, "learning_rate": 3.515565899076813e-05, "loss": 47.5356, "step": 107540 }, { "epoch": 0.43451560902887476, "grad_norm": 779.5784912109375, "learning_rate": 3.5152469224106454e-05, "loss": 50.02, "step": 107550 }, { "epoch": 0.4345560102942424, "grad_norm": 638.3084106445312, "learning_rate": 3.514927925951717e-05, "loss": 69.4422, "step": 107560 }, { "epoch": 0.43459641155961004, "grad_norm": 0.0, "learning_rate": 3.5146089097062456e-05, "loss": 48.1131, "step": 107570 }, { "epoch": 0.4346368128249777, "grad_norm": 613.4315185546875, "learning_rate": 3.514289873680451e-05, "loss": 39.7531, "step": 107580 }, { "epoch": 0.4346772140903453, "grad_norm": 596.2418212890625, "learning_rate": 3.513970817880554e-05, "loss": 46.1025, "step": 107590 }, { "epoch": 0.43471761535571296, "grad_norm": 742.696044921875, "learning_rate": 3.513651742312774e-05, "loss": 50.7197, "step": 107600 }, { "epoch": 0.43475801662108055, "grad_norm": 517.7498168945312, "learning_rate": 3.51333264698333e-05, "loss": 72.2126, "step": 107610 }, { "epoch": 0.4347984178864482, "grad_norm": 336.8522644042969, "learning_rate": 3.5130135318984456e-05, "loss": 97.5587, "step": 107620 }, { "epoch": 0.4348388191518158, "grad_norm": 1882.3516845703125, "learning_rate": 3.512694397064341e-05, "loss": 46.2149, "step": 107630 }, { "epoch": 0.43487922041718347, "grad_norm": 852.6275024414062, "learning_rate": 3.512375242487236e-05, "loss": 59.0864, "step": 107640 }, { "epoch": 0.4349196216825511, "grad_norm": 1158.6456298828125, "learning_rate": 3.512056068173356e-05, "loss": 86.1055, "step": 107650 }, { "epoch": 0.43496002294791875, "grad_norm": 1050.2406005859375, "learning_rate": 3.511736874128922e-05, "loss": 47.8806, "step": 107660 }, { "epoch": 0.4350004242132864, "grad_norm": 576.0054931640625, "learning_rate": 3.5114176603601564e-05, "loss": 44.2818, "step": 107670 }, { "epoch": 0.43504082547865397, "grad_norm": 3012.95849609375, "learning_rate": 3.511098426873283e-05, "loss": 75.0666, "step": 107680 }, { "epoch": 0.4350812267440216, "grad_norm": 1107.3419189453125, "learning_rate": 3.5107791736745244e-05, "loss": 47.5885, "step": 107690 }, { "epoch": 0.43512162800938925, "grad_norm": 486.3786926269531, "learning_rate": 3.5104599007701054e-05, "loss": 52.94, "step": 107700 }, { "epoch": 0.4351620292747569, "grad_norm": 1440.493896484375, "learning_rate": 3.510140608166251e-05, "loss": 44.7074, "step": 107710 }, { "epoch": 0.43520243054012453, "grad_norm": 665.9118041992188, "learning_rate": 3.5098212958691854e-05, "loss": 84.7358, "step": 107720 }, { "epoch": 0.43524283180549217, "grad_norm": 670.6646118164062, "learning_rate": 3.509501963885134e-05, "loss": 47.459, "step": 107730 }, { "epoch": 0.43528323307085975, "grad_norm": 1127.4244384765625, "learning_rate": 3.509182612220322e-05, "loss": 66.771, "step": 107740 }, { "epoch": 0.4353236343362274, "grad_norm": 1750.5572509765625, "learning_rate": 3.5088632408809755e-05, "loss": 100.8368, "step": 107750 }, { "epoch": 0.43536403560159503, "grad_norm": 694.012451171875, "learning_rate": 3.50854384987332e-05, "loss": 67.874, "step": 107760 }, { "epoch": 0.4354044368669627, "grad_norm": 543.4619750976562, "learning_rate": 3.508224439203583e-05, "loss": 48.8384, "step": 107770 }, { "epoch": 0.4354448381323303, "grad_norm": 1029.557373046875, "learning_rate": 3.5079050088779926e-05, "loss": 41.3493, "step": 107780 }, { "epoch": 0.43548523939769795, "grad_norm": 404.4317321777344, "learning_rate": 3.5075855589027746e-05, "loss": 59.9128, "step": 107790 }, { "epoch": 0.4355256406630656, "grad_norm": 459.72357177734375, "learning_rate": 3.507266089284157e-05, "loss": 49.0815, "step": 107800 }, { "epoch": 0.4355660419284332, "grad_norm": 1144.3963623046875, "learning_rate": 3.506946600028368e-05, "loss": 93.688, "step": 107810 }, { "epoch": 0.4356064431938008, "grad_norm": 1337.915771484375, "learning_rate": 3.5066270911416373e-05, "loss": 41.8152, "step": 107820 }, { "epoch": 0.43564684445916846, "grad_norm": 537.2900390625, "learning_rate": 3.506307562630194e-05, "loss": 33.384, "step": 107830 }, { "epoch": 0.4356872457245361, "grad_norm": 676.332275390625, "learning_rate": 3.5059880145002654e-05, "loss": 72.9059, "step": 107840 }, { "epoch": 0.43572764698990374, "grad_norm": 1162.9178466796875, "learning_rate": 3.505668446758083e-05, "loss": 46.9766, "step": 107850 }, { "epoch": 0.4357680482552714, "grad_norm": 1080.3890380859375, "learning_rate": 3.505348859409876e-05, "loss": 53.5876, "step": 107860 }, { "epoch": 0.43580844952063896, "grad_norm": 1019.6876220703125, "learning_rate": 3.5050292524618764e-05, "loss": 39.1764, "step": 107870 }, { "epoch": 0.4358488507860066, "grad_norm": 521.6009521484375, "learning_rate": 3.5047096259203135e-05, "loss": 69.4503, "step": 107880 }, { "epoch": 0.43588925205137424, "grad_norm": 1152.7193603515625, "learning_rate": 3.5043899797914187e-05, "loss": 74.8687, "step": 107890 }, { "epoch": 0.4359296533167419, "grad_norm": 993.6572265625, "learning_rate": 3.504070314081425e-05, "loss": 64.1491, "step": 107900 }, { "epoch": 0.4359700545821095, "grad_norm": 3389.17578125, "learning_rate": 3.503750628796563e-05, "loss": 62.572, "step": 107910 }, { "epoch": 0.43601045584747716, "grad_norm": 1042.504150390625, "learning_rate": 3.503430923943066e-05, "loss": 51.6757, "step": 107920 }, { "epoch": 0.43605085711284475, "grad_norm": 1820.093505859375, "learning_rate": 3.503111199527167e-05, "loss": 46.9761, "step": 107930 }, { "epoch": 0.4360912583782124, "grad_norm": 530.4893798828125, "learning_rate": 3.5027914555550976e-05, "loss": 80.2376, "step": 107940 }, { "epoch": 0.43613165964358, "grad_norm": 673.11767578125, "learning_rate": 3.502471692033094e-05, "loss": 36.759, "step": 107950 }, { "epoch": 0.43617206090894767, "grad_norm": 979.2339477539062, "learning_rate": 3.5021519089673876e-05, "loss": 53.1278, "step": 107960 }, { "epoch": 0.4362124621743153, "grad_norm": 533.0077514648438, "learning_rate": 3.501832106364213e-05, "loss": 44.0872, "step": 107970 }, { "epoch": 0.43625286343968295, "grad_norm": 2183.581298828125, "learning_rate": 3.501512284229807e-05, "loss": 74.3517, "step": 107980 }, { "epoch": 0.4362932647050506, "grad_norm": 1532.984375, "learning_rate": 3.5011924425704036e-05, "loss": 55.7305, "step": 107990 }, { "epoch": 0.43633366597041817, "grad_norm": 794.1177978515625, "learning_rate": 3.5008725813922386e-05, "loss": 39.9372, "step": 108000 }, { "epoch": 0.4363740672357858, "grad_norm": 489.5924072265625, "learning_rate": 3.5005527007015455e-05, "loss": 51.468, "step": 108010 }, { "epoch": 0.43641446850115345, "grad_norm": 1078.9996337890625, "learning_rate": 3.500232800504563e-05, "loss": 69.4806, "step": 108020 }, { "epoch": 0.4364548697665211, "grad_norm": 880.2926025390625, "learning_rate": 3.499912880807528e-05, "loss": 53.5359, "step": 108030 }, { "epoch": 0.43649527103188873, "grad_norm": 827.0285034179688, "learning_rate": 3.4995929416166756e-05, "loss": 50.6137, "step": 108040 }, { "epoch": 0.43653567229725637, "grad_norm": 720.4219970703125, "learning_rate": 3.499272982938244e-05, "loss": 35.6677, "step": 108050 }, { "epoch": 0.43657607356262396, "grad_norm": 739.00927734375, "learning_rate": 3.4989530047784716e-05, "loss": 70.8884, "step": 108060 }, { "epoch": 0.4366164748279916, "grad_norm": 387.0387878417969, "learning_rate": 3.498633007143596e-05, "loss": 47.103, "step": 108070 }, { "epoch": 0.43665687609335924, "grad_norm": 912.795654296875, "learning_rate": 3.498312990039856e-05, "loss": 56.658, "step": 108080 }, { "epoch": 0.4366972773587269, "grad_norm": 1193.864990234375, "learning_rate": 3.497992953473491e-05, "loss": 77.9862, "step": 108090 }, { "epoch": 0.4367376786240945, "grad_norm": 711.3486938476562, "learning_rate": 3.4976728974507384e-05, "loss": 31.2605, "step": 108100 }, { "epoch": 0.43677807988946216, "grad_norm": 1228.09033203125, "learning_rate": 3.497352821977839e-05, "loss": 42.339, "step": 108110 }, { "epoch": 0.4368184811548298, "grad_norm": 524.43798828125, "learning_rate": 3.497032727061034e-05, "loss": 59.911, "step": 108120 }, { "epoch": 0.4368588824201974, "grad_norm": 255.4713134765625, "learning_rate": 3.496712612706561e-05, "loss": 63.1904, "step": 108130 }, { "epoch": 0.436899283685565, "grad_norm": 261.4523620605469, "learning_rate": 3.4963924789206636e-05, "loss": 88.5668, "step": 108140 }, { "epoch": 0.43693968495093266, "grad_norm": 680.0599365234375, "learning_rate": 3.496072325709582e-05, "loss": 45.9959, "step": 108150 }, { "epoch": 0.4369800862163003, "grad_norm": 402.27239990234375, "learning_rate": 3.495752153079557e-05, "loss": 50.0955, "step": 108160 }, { "epoch": 0.43702048748166794, "grad_norm": 0.0, "learning_rate": 3.495431961036832e-05, "loss": 44.4594, "step": 108170 }, { "epoch": 0.4370608887470356, "grad_norm": 455.03326416015625, "learning_rate": 3.495111749587647e-05, "loss": 51.1414, "step": 108180 }, { "epoch": 0.43710129001240317, "grad_norm": 438.03314208984375, "learning_rate": 3.494791518738247e-05, "loss": 112.6708, "step": 108190 }, { "epoch": 0.4371416912777708, "grad_norm": 547.6517333984375, "learning_rate": 3.494471268494875e-05, "loss": 31.8104, "step": 108200 }, { "epoch": 0.43718209254313845, "grad_norm": 541.253173828125, "learning_rate": 3.494150998863772e-05, "loss": 63.8281, "step": 108210 }, { "epoch": 0.4372224938085061, "grad_norm": 710.3536376953125, "learning_rate": 3.4938307098511846e-05, "loss": 43.2936, "step": 108220 }, { "epoch": 0.4372628950738737, "grad_norm": 795.8914184570312, "learning_rate": 3.493510401463355e-05, "loss": 48.5519, "step": 108230 }, { "epoch": 0.43730329633924137, "grad_norm": 1529.8331298828125, "learning_rate": 3.493190073706529e-05, "loss": 42.2938, "step": 108240 }, { "epoch": 0.43734369760460895, "grad_norm": 1139.19189453125, "learning_rate": 3.4928697265869515e-05, "loss": 43.6591, "step": 108250 }, { "epoch": 0.4373840988699766, "grad_norm": 427.7257080078125, "learning_rate": 3.492549360110868e-05, "loss": 126.2329, "step": 108260 }, { "epoch": 0.43742450013534423, "grad_norm": 489.84521484375, "learning_rate": 3.4922289742845224e-05, "loss": 51.3288, "step": 108270 }, { "epoch": 0.43746490140071187, "grad_norm": 0.0, "learning_rate": 3.491908569114164e-05, "loss": 57.9718, "step": 108280 }, { "epoch": 0.4375053026660795, "grad_norm": 901.902099609375, "learning_rate": 3.491588144606035e-05, "loss": 58.3395, "step": 108290 }, { "epoch": 0.43754570393144715, "grad_norm": 549.3409423828125, "learning_rate": 3.491267700766386e-05, "loss": 82.0675, "step": 108300 }, { "epoch": 0.4375861051968148, "grad_norm": 490.4176025390625, "learning_rate": 3.490947237601462e-05, "loss": 52.1806, "step": 108310 }, { "epoch": 0.4376265064621824, "grad_norm": 650.8810424804688, "learning_rate": 3.4906267551175124e-05, "loss": 76.2948, "step": 108320 }, { "epoch": 0.43766690772755, "grad_norm": 959.3955688476562, "learning_rate": 3.4903062533207834e-05, "loss": 76.9859, "step": 108330 }, { "epoch": 0.43770730899291765, "grad_norm": 609.113037109375, "learning_rate": 3.489985732217525e-05, "loss": 34.4562, "step": 108340 }, { "epoch": 0.4377477102582853, "grad_norm": 2289.5068359375, "learning_rate": 3.4896651918139845e-05, "loss": 71.8435, "step": 108350 }, { "epoch": 0.43778811152365293, "grad_norm": 0.0, "learning_rate": 3.489344632116412e-05, "loss": 54.8811, "step": 108360 }, { "epoch": 0.4378285127890206, "grad_norm": 416.5489196777344, "learning_rate": 3.489024053131056e-05, "loss": 54.7843, "step": 108370 }, { "epoch": 0.43786891405438816, "grad_norm": 850.0728149414062, "learning_rate": 3.488703454864167e-05, "loss": 83.0363, "step": 108380 }, { "epoch": 0.4379093153197558, "grad_norm": 580.010498046875, "learning_rate": 3.488382837321995e-05, "loss": 45.165, "step": 108390 }, { "epoch": 0.43794971658512344, "grad_norm": 684.89111328125, "learning_rate": 3.488062200510791e-05, "loss": 78.0684, "step": 108400 }, { "epoch": 0.4379901178504911, "grad_norm": 1013.9144287109375, "learning_rate": 3.487741544436806e-05, "loss": 63.4015, "step": 108410 }, { "epoch": 0.4380305191158587, "grad_norm": 758.853759765625, "learning_rate": 3.48742086910629e-05, "loss": 71.6265, "step": 108420 }, { "epoch": 0.43807092038122636, "grad_norm": 398.92095947265625, "learning_rate": 3.487100174525498e-05, "loss": 43.4276, "step": 108430 }, { "epoch": 0.438111321646594, "grad_norm": 313.7893371582031, "learning_rate": 3.4867794607006784e-05, "loss": 48.0121, "step": 108440 }, { "epoch": 0.4381517229119616, "grad_norm": 1349.3988037109375, "learning_rate": 3.486458727638085e-05, "loss": 70.9725, "step": 108450 }, { "epoch": 0.4381921241773292, "grad_norm": 433.0930480957031, "learning_rate": 3.486137975343971e-05, "loss": 61.4865, "step": 108460 }, { "epoch": 0.43823252544269686, "grad_norm": 1083.5059814453125, "learning_rate": 3.48581720382459e-05, "loss": 54.0927, "step": 108470 }, { "epoch": 0.4382729267080645, "grad_norm": 2715.4462890625, "learning_rate": 3.485496413086195e-05, "loss": 87.5101, "step": 108480 }, { "epoch": 0.43831332797343214, "grad_norm": 566.393798828125, "learning_rate": 3.4851756031350394e-05, "loss": 56.3198, "step": 108490 }, { "epoch": 0.4383537292387998, "grad_norm": 815.7106323242188, "learning_rate": 3.484854773977378e-05, "loss": 61.3737, "step": 108500 }, { "epoch": 0.43839413050416737, "grad_norm": 807.414306640625, "learning_rate": 3.4845339256194666e-05, "loss": 44.4692, "step": 108510 }, { "epoch": 0.438434531769535, "grad_norm": 984.805419921875, "learning_rate": 3.484213058067559e-05, "loss": 75.2083, "step": 108520 }, { "epoch": 0.43847493303490265, "grad_norm": 1864.611083984375, "learning_rate": 3.483892171327911e-05, "loss": 86.4765, "step": 108530 }, { "epoch": 0.4385153343002703, "grad_norm": 1487.40869140625, "learning_rate": 3.4835712654067785e-05, "loss": 76.399, "step": 108540 }, { "epoch": 0.4385557355656379, "grad_norm": 735.3146362304688, "learning_rate": 3.483250340310418e-05, "loss": 70.6352, "step": 108550 }, { "epoch": 0.43859613683100557, "grad_norm": 807.2081298828125, "learning_rate": 3.482929396045087e-05, "loss": 48.8148, "step": 108560 }, { "epoch": 0.43863653809637315, "grad_norm": 632.7271118164062, "learning_rate": 3.48260843261704e-05, "loss": 77.7649, "step": 108570 }, { "epoch": 0.4386769393617408, "grad_norm": 897.5272216796875, "learning_rate": 3.482287450032536e-05, "loss": 51.7576, "step": 108580 }, { "epoch": 0.43871734062710843, "grad_norm": 753.651611328125, "learning_rate": 3.4819664482978325e-05, "loss": 71.9576, "step": 108590 }, { "epoch": 0.43875774189247607, "grad_norm": 524.542724609375, "learning_rate": 3.481645427419188e-05, "loss": 51.1556, "step": 108600 }, { "epoch": 0.4387981431578437, "grad_norm": 578.78173828125, "learning_rate": 3.48132438740286e-05, "loss": 42.8052, "step": 108610 }, { "epoch": 0.43883854442321135, "grad_norm": 634.4146118164062, "learning_rate": 3.481003328255108e-05, "loss": 56.4694, "step": 108620 }, { "epoch": 0.438878945688579, "grad_norm": 823.702392578125, "learning_rate": 3.480682249982191e-05, "loss": 51.0967, "step": 108630 }, { "epoch": 0.4389193469539466, "grad_norm": 625.2899169921875, "learning_rate": 3.4803611525903685e-05, "loss": 55.0041, "step": 108640 }, { "epoch": 0.4389597482193142, "grad_norm": 251.00901794433594, "learning_rate": 3.480040036085901e-05, "loss": 52.5765, "step": 108650 }, { "epoch": 0.43900014948468186, "grad_norm": 1115.88134765625, "learning_rate": 3.479718900475049e-05, "loss": 83.0873, "step": 108660 }, { "epoch": 0.4390405507500495, "grad_norm": 9209.01953125, "learning_rate": 3.479397745764071e-05, "loss": 142.5869, "step": 108670 }, { "epoch": 0.43908095201541714, "grad_norm": 704.7980346679688, "learning_rate": 3.479076571959231e-05, "loss": 47.6946, "step": 108680 }, { "epoch": 0.4391213532807848, "grad_norm": 1096.587890625, "learning_rate": 3.4787553790667896e-05, "loss": 57.3606, "step": 108690 }, { "epoch": 0.43916175454615236, "grad_norm": 685.4246215820312, "learning_rate": 3.4784341670930065e-05, "loss": 52.4217, "step": 108700 }, { "epoch": 0.43920215581152, "grad_norm": 385.501220703125, "learning_rate": 3.478112936044146e-05, "loss": 43.921, "step": 108710 }, { "epoch": 0.43924255707688764, "grad_norm": 648.174560546875, "learning_rate": 3.477791685926471e-05, "loss": 48.4695, "step": 108720 }, { "epoch": 0.4392829583422553, "grad_norm": 201.7768096923828, "learning_rate": 3.4774704167462434e-05, "loss": 63.3514, "step": 108730 }, { "epoch": 0.4393233596076229, "grad_norm": 579.0665283203125, "learning_rate": 3.477149128509727e-05, "loss": 59.8261, "step": 108740 }, { "epoch": 0.43936376087299056, "grad_norm": 727.160888671875, "learning_rate": 3.476827821223184e-05, "loss": 48.0612, "step": 108750 }, { "epoch": 0.4394041621383582, "grad_norm": 842.6546020507812, "learning_rate": 3.4765064948928814e-05, "loss": 52.5195, "step": 108760 }, { "epoch": 0.4394445634037258, "grad_norm": 680.0587158203125, "learning_rate": 3.4761851495250816e-05, "loss": 83.2483, "step": 108770 }, { "epoch": 0.4394849646690934, "grad_norm": 946.002197265625, "learning_rate": 3.475863785126049e-05, "loss": 61.0262, "step": 108780 }, { "epoch": 0.43952536593446107, "grad_norm": 1292.424072265625, "learning_rate": 3.47554240170205e-05, "loss": 117.6964, "step": 108790 }, { "epoch": 0.4395657671998287, "grad_norm": 593.932861328125, "learning_rate": 3.475220999259349e-05, "loss": 60.331, "step": 108800 }, { "epoch": 0.43960616846519635, "grad_norm": 740.8704833984375, "learning_rate": 3.4748995778042136e-05, "loss": 88.3325, "step": 108810 }, { "epoch": 0.439646569730564, "grad_norm": 495.8048400878906, "learning_rate": 3.474578137342909e-05, "loss": 46.6653, "step": 108820 }, { "epoch": 0.43968697099593157, "grad_norm": 907.8876342773438, "learning_rate": 3.474256677881701e-05, "loss": 65.5185, "step": 108830 }, { "epoch": 0.4397273722612992, "grad_norm": 1308.182861328125, "learning_rate": 3.473935199426858e-05, "loss": 109.5993, "step": 108840 }, { "epoch": 0.43976777352666685, "grad_norm": 538.4534912109375, "learning_rate": 3.4736137019846465e-05, "loss": 47.8979, "step": 108850 }, { "epoch": 0.4398081747920345, "grad_norm": 302.4723815917969, "learning_rate": 3.4732921855613355e-05, "loss": 60.7093, "step": 108860 }, { "epoch": 0.43984857605740213, "grad_norm": 1104.8883056640625, "learning_rate": 3.472970650163191e-05, "loss": 61.7136, "step": 108870 }, { "epoch": 0.43988897732276977, "grad_norm": 1319.9013671875, "learning_rate": 3.4726490957964834e-05, "loss": 71.9735, "step": 108880 }, { "epoch": 0.43992937858813735, "grad_norm": 3414.708251953125, "learning_rate": 3.472327522467481e-05, "loss": 74.4029, "step": 108890 }, { "epoch": 0.439969779853505, "grad_norm": 442.96405029296875, "learning_rate": 3.4720059301824525e-05, "loss": 74.8008, "step": 108900 }, { "epoch": 0.44001018111887263, "grad_norm": 849.4617919921875, "learning_rate": 3.4716843189476687e-05, "loss": 67.8839, "step": 108910 }, { "epoch": 0.4400505823842403, "grad_norm": 464.7969970703125, "learning_rate": 3.471362688769398e-05, "loss": 55.6521, "step": 108920 }, { "epoch": 0.4400909836496079, "grad_norm": 426.9241943359375, "learning_rate": 3.471041039653913e-05, "loss": 53.6479, "step": 108930 }, { "epoch": 0.44013138491497555, "grad_norm": 926.24853515625, "learning_rate": 3.4707193716074816e-05, "loss": 58.2524, "step": 108940 }, { "epoch": 0.4401717861803432, "grad_norm": 1493.707763671875, "learning_rate": 3.470397684636377e-05, "loss": 60.5845, "step": 108950 }, { "epoch": 0.4402121874457108, "grad_norm": 715.8504638671875, "learning_rate": 3.4700759787468695e-05, "loss": 61.8466, "step": 108960 }, { "epoch": 0.4402525887110784, "grad_norm": 539.120849609375, "learning_rate": 3.469754253945232e-05, "loss": 50.327, "step": 108970 }, { "epoch": 0.44029298997644606, "grad_norm": 644.7427978515625, "learning_rate": 3.4694325102377355e-05, "loss": 74.642, "step": 108980 }, { "epoch": 0.4403333912418137, "grad_norm": 702.2979736328125, "learning_rate": 3.469110747630653e-05, "loss": 56.1246, "step": 108990 }, { "epoch": 0.44037379250718134, "grad_norm": 1032.9031982421875, "learning_rate": 3.4687889661302576e-05, "loss": 56.0517, "step": 109000 }, { "epoch": 0.440414193772549, "grad_norm": 972.0687866210938, "learning_rate": 3.468467165742823e-05, "loss": 56.0222, "step": 109010 }, { "epoch": 0.44045459503791656, "grad_norm": 598.9658203125, "learning_rate": 3.468145346474622e-05, "loss": 57.363, "step": 109020 }, { "epoch": 0.4404949963032842, "grad_norm": 616.7764282226562, "learning_rate": 3.4678235083319296e-05, "loss": 82.0413, "step": 109030 }, { "epoch": 0.44053539756865184, "grad_norm": 318.1781005859375, "learning_rate": 3.467501651321019e-05, "loss": 62.9576, "step": 109040 }, { "epoch": 0.4405757988340195, "grad_norm": 718.73681640625, "learning_rate": 3.467179775448166e-05, "loss": 66.6093, "step": 109050 }, { "epoch": 0.4406162000993871, "grad_norm": 973.098388671875, "learning_rate": 3.466857880719645e-05, "loss": 65.0112, "step": 109060 }, { "epoch": 0.44065660136475476, "grad_norm": 1736.9281005859375, "learning_rate": 3.466535967141732e-05, "loss": 67.5672, "step": 109070 }, { "epoch": 0.4406970026301224, "grad_norm": 1171.598388671875, "learning_rate": 3.466214034720702e-05, "loss": 62.2778, "step": 109080 }, { "epoch": 0.44073740389549, "grad_norm": 981.1380004882812, "learning_rate": 3.4658920834628335e-05, "loss": 72.6023, "step": 109090 }, { "epoch": 0.4407778051608576, "grad_norm": 858.4905395507812, "learning_rate": 3.4655701133744e-05, "loss": 43.7386, "step": 109100 }, { "epoch": 0.44081820642622527, "grad_norm": 734.6194458007812, "learning_rate": 3.465248124461681e-05, "loss": 41.7183, "step": 109110 }, { "epoch": 0.4408586076915929, "grad_norm": 2607.31787109375, "learning_rate": 3.4649261167309526e-05, "loss": 105.1528, "step": 109120 }, { "epoch": 0.44089900895696055, "grad_norm": 675.8436279296875, "learning_rate": 3.464604090188493e-05, "loss": 75.2605, "step": 109130 }, { "epoch": 0.4409394102223282, "grad_norm": 472.8738098144531, "learning_rate": 3.46428204484058e-05, "loss": 50.3132, "step": 109140 }, { "epoch": 0.44097981148769577, "grad_norm": 629.1958618164062, "learning_rate": 3.463959980693492e-05, "loss": 47.585, "step": 109150 }, { "epoch": 0.4410202127530634, "grad_norm": 714.365478515625, "learning_rate": 3.4636378977535075e-05, "loss": 62.9666, "step": 109160 }, { "epoch": 0.44106061401843105, "grad_norm": 1306.74169921875, "learning_rate": 3.4633157960269056e-05, "loss": 67.3224, "step": 109170 }, { "epoch": 0.4411010152837987, "grad_norm": 810.0322265625, "learning_rate": 3.462993675519968e-05, "loss": 45.5099, "step": 109180 }, { "epoch": 0.44114141654916633, "grad_norm": 822.7820434570312, "learning_rate": 3.462671536238972e-05, "loss": 47.5919, "step": 109190 }, { "epoch": 0.44118181781453397, "grad_norm": 719.748046875, "learning_rate": 3.462349378190199e-05, "loss": 58.4106, "step": 109200 }, { "epoch": 0.44122221907990156, "grad_norm": 1387.738525390625, "learning_rate": 3.4620272013799286e-05, "loss": 55.8598, "step": 109210 }, { "epoch": 0.4412626203452692, "grad_norm": 291.135009765625, "learning_rate": 3.461705005814444e-05, "loss": 56.4821, "step": 109220 }, { "epoch": 0.44130302161063684, "grad_norm": 2201.89453125, "learning_rate": 3.4613827915000244e-05, "loss": 112.0971, "step": 109230 }, { "epoch": 0.4413434228760045, "grad_norm": 828.1795043945312, "learning_rate": 3.461060558442952e-05, "loss": 52.2993, "step": 109240 }, { "epoch": 0.4413838241413721, "grad_norm": 493.1712341308594, "learning_rate": 3.460738306649509e-05, "loss": 38.8712, "step": 109250 }, { "epoch": 0.44142422540673976, "grad_norm": 310.04742431640625, "learning_rate": 3.4604160361259796e-05, "loss": 54.1063, "step": 109260 }, { "epoch": 0.4414646266721074, "grad_norm": 1105.275634765625, "learning_rate": 3.460093746878644e-05, "loss": 57.73, "step": 109270 }, { "epoch": 0.441505027937475, "grad_norm": 422.4311828613281, "learning_rate": 3.459771438913787e-05, "loss": 76.8583, "step": 109280 }, { "epoch": 0.4415454292028426, "grad_norm": 533.1647338867188, "learning_rate": 3.459449112237691e-05, "loss": 70.399, "step": 109290 }, { "epoch": 0.44158583046821026, "grad_norm": 453.3349304199219, "learning_rate": 3.459126766856641e-05, "loss": 47.0437, "step": 109300 }, { "epoch": 0.4416262317335779, "grad_norm": 868.0079345703125, "learning_rate": 3.458804402776921e-05, "loss": 49.0908, "step": 109310 }, { "epoch": 0.44166663299894554, "grad_norm": 986.170166015625, "learning_rate": 3.458482020004815e-05, "loss": 54.6284, "step": 109320 }, { "epoch": 0.4417070342643132, "grad_norm": 447.4167785644531, "learning_rate": 3.4581596185466094e-05, "loss": 59.609, "step": 109330 }, { "epoch": 0.44174743552968077, "grad_norm": 641.2802124023438, "learning_rate": 3.457837198408588e-05, "loss": 85.7572, "step": 109340 }, { "epoch": 0.4417878367950484, "grad_norm": 808.4306640625, "learning_rate": 3.457514759597038e-05, "loss": 64.826, "step": 109350 }, { "epoch": 0.44182823806041605, "grad_norm": 202.7559051513672, "learning_rate": 3.457192302118244e-05, "loss": 52.6859, "step": 109360 }, { "epoch": 0.4418686393257837, "grad_norm": 1533.5028076171875, "learning_rate": 3.4568698259784945e-05, "loss": 71.5681, "step": 109370 }, { "epoch": 0.4419090405911513, "grad_norm": 788.40771484375, "learning_rate": 3.4565473311840735e-05, "loss": 56.7714, "step": 109380 }, { "epoch": 0.44194944185651897, "grad_norm": 659.1009521484375, "learning_rate": 3.4562248177412715e-05, "loss": 44.7925, "step": 109390 }, { "epoch": 0.44198984312188655, "grad_norm": 695.70068359375, "learning_rate": 3.455902285656373e-05, "loss": 95.6126, "step": 109400 }, { "epoch": 0.4420302443872542, "grad_norm": 536.9435424804688, "learning_rate": 3.4555797349356676e-05, "loss": 52.6949, "step": 109410 }, { "epoch": 0.44207064565262183, "grad_norm": 928.5408935546875, "learning_rate": 3.455257165585444e-05, "loss": 65.1302, "step": 109420 }, { "epoch": 0.44211104691798947, "grad_norm": 635.9898071289062, "learning_rate": 3.454934577611989e-05, "loss": 88.1777, "step": 109430 }, { "epoch": 0.4421514481833571, "grad_norm": 1163.3731689453125, "learning_rate": 3.454611971021593e-05, "loss": 75.0465, "step": 109440 }, { "epoch": 0.44219184944872475, "grad_norm": 1179.76708984375, "learning_rate": 3.454289345820546e-05, "loss": 72.196, "step": 109450 }, { "epoch": 0.4422322507140924, "grad_norm": 407.4288024902344, "learning_rate": 3.453966702015137e-05, "loss": 37.4781, "step": 109460 }, { "epoch": 0.44227265197946, "grad_norm": 1263.00732421875, "learning_rate": 3.453644039611656e-05, "loss": 55.1117, "step": 109470 }, { "epoch": 0.4423130532448276, "grad_norm": 441.8350830078125, "learning_rate": 3.453321358616393e-05, "loss": 34.3389, "step": 109480 }, { "epoch": 0.44235345451019525, "grad_norm": 813.268798828125, "learning_rate": 3.452998659035639e-05, "loss": 61.2214, "step": 109490 }, { "epoch": 0.4423938557755629, "grad_norm": 1138.2327880859375, "learning_rate": 3.452675940875686e-05, "loss": 69.0981, "step": 109500 }, { "epoch": 0.44243425704093053, "grad_norm": 1215.769287109375, "learning_rate": 3.452353204142824e-05, "loss": 52.1015, "step": 109510 }, { "epoch": 0.4424746583062982, "grad_norm": 169.15260314941406, "learning_rate": 3.452030448843347e-05, "loss": 49.8006, "step": 109520 }, { "epoch": 0.44251505957166576, "grad_norm": 372.5907287597656, "learning_rate": 3.451707674983546e-05, "loss": 67.884, "step": 109530 }, { "epoch": 0.4425554608370334, "grad_norm": 1219.9879150390625, "learning_rate": 3.451384882569714e-05, "loss": 48.5552, "step": 109540 }, { "epoch": 0.44259586210240104, "grad_norm": 1142.11865234375, "learning_rate": 3.4510620716081446e-05, "loss": 50.2349, "step": 109550 }, { "epoch": 0.4426362633677687, "grad_norm": 1006.7579956054688, "learning_rate": 3.45073924210513e-05, "loss": 100.4007, "step": 109560 }, { "epoch": 0.4426766646331363, "grad_norm": 714.3269653320312, "learning_rate": 3.4504163940669634e-05, "loss": 65.3141, "step": 109570 }, { "epoch": 0.44271706589850396, "grad_norm": 1006.07177734375, "learning_rate": 3.4500935274999413e-05, "loss": 98.7038, "step": 109580 }, { "epoch": 0.4427574671638716, "grad_norm": 1100.7425537109375, "learning_rate": 3.449770642410356e-05, "loss": 58.6847, "step": 109590 }, { "epoch": 0.4427978684292392, "grad_norm": 1185.401123046875, "learning_rate": 3.4494477388045035e-05, "loss": 56.3779, "step": 109600 }, { "epoch": 0.4428382696946068, "grad_norm": 1830.9031982421875, "learning_rate": 3.449124816688677e-05, "loss": 71.803, "step": 109610 }, { "epoch": 0.44287867095997446, "grad_norm": 639.5352172851562, "learning_rate": 3.448801876069176e-05, "loss": 59.7502, "step": 109620 }, { "epoch": 0.4429190722253421, "grad_norm": 818.8923950195312, "learning_rate": 3.4484789169522927e-05, "loss": 65.0993, "step": 109630 }, { "epoch": 0.44295947349070974, "grad_norm": 520.5197143554688, "learning_rate": 3.448155939344324e-05, "loss": 58.1046, "step": 109640 }, { "epoch": 0.4429998747560774, "grad_norm": 2226.27978515625, "learning_rate": 3.4478329432515674e-05, "loss": 49.6613, "step": 109650 }, { "epoch": 0.44304027602144497, "grad_norm": 3536.548095703125, "learning_rate": 3.44750992868032e-05, "loss": 60.4391, "step": 109660 }, { "epoch": 0.4430806772868126, "grad_norm": 922.1171875, "learning_rate": 3.447186895636879e-05, "loss": 49.5081, "step": 109670 }, { "epoch": 0.44312107855218025, "grad_norm": 485.9327697753906, "learning_rate": 3.4468638441275415e-05, "loss": 52.8482, "step": 109680 }, { "epoch": 0.4431614798175479, "grad_norm": 1202.439208984375, "learning_rate": 3.4465407741586056e-05, "loss": 51.3917, "step": 109690 }, { "epoch": 0.4432018810829155, "grad_norm": 3666.6962890625, "learning_rate": 3.4462176857363704e-05, "loss": 50.0755, "step": 109700 }, { "epoch": 0.44324228234828317, "grad_norm": 566.9150390625, "learning_rate": 3.445894578867134e-05, "loss": 77.9221, "step": 109710 }, { "epoch": 0.44328268361365075, "grad_norm": 695.4548950195312, "learning_rate": 3.445571453557196e-05, "loss": 92.9399, "step": 109720 }, { "epoch": 0.4433230848790184, "grad_norm": 1101.68212890625, "learning_rate": 3.445248309812856e-05, "loss": 67.3483, "step": 109730 }, { "epoch": 0.44336348614438603, "grad_norm": 782.0620727539062, "learning_rate": 3.4449251476404135e-05, "loss": 47.7574, "step": 109740 }, { "epoch": 0.44340388740975367, "grad_norm": 404.2615661621094, "learning_rate": 3.444601967046168e-05, "loss": 68.9839, "step": 109750 }, { "epoch": 0.4434442886751213, "grad_norm": 1344.5721435546875, "learning_rate": 3.444278768036421e-05, "loss": 51.2115, "step": 109760 }, { "epoch": 0.44348468994048895, "grad_norm": 453.09136962890625, "learning_rate": 3.443955550617474e-05, "loss": 62.8084, "step": 109770 }, { "epoch": 0.4435250912058566, "grad_norm": 690.1050415039062, "learning_rate": 3.443632314795627e-05, "loss": 37.6234, "step": 109780 }, { "epoch": 0.4435654924712242, "grad_norm": 942.2594604492188, "learning_rate": 3.443309060577182e-05, "loss": 57.9371, "step": 109790 }, { "epoch": 0.4436058937365918, "grad_norm": 2099.522216796875, "learning_rate": 3.442985787968442e-05, "loss": 96.0077, "step": 109800 }, { "epoch": 0.44364629500195946, "grad_norm": 894.1741333007812, "learning_rate": 3.4426624969757083e-05, "loss": 43.2961, "step": 109810 }, { "epoch": 0.4436866962673271, "grad_norm": 515.2462158203125, "learning_rate": 3.442339187605283e-05, "loss": 44.3043, "step": 109820 }, { "epoch": 0.44372709753269474, "grad_norm": 1050.0601806640625, "learning_rate": 3.442015859863472e-05, "loss": 80.5312, "step": 109830 }, { "epoch": 0.4437674987980624, "grad_norm": 2025.021484375, "learning_rate": 3.4416925137565754e-05, "loss": 60.7582, "step": 109840 }, { "epoch": 0.44380790006342996, "grad_norm": 1199.0517578125, "learning_rate": 3.4413691492908985e-05, "loss": 62.2759, "step": 109850 }, { "epoch": 0.4438483013287976, "grad_norm": 781.955078125, "learning_rate": 3.441045766472745e-05, "loss": 55.6269, "step": 109860 }, { "epoch": 0.44388870259416524, "grad_norm": 1648.44140625, "learning_rate": 3.440722365308421e-05, "loss": 57.5258, "step": 109870 }, { "epoch": 0.4439291038595329, "grad_norm": 1811.8358154296875, "learning_rate": 3.440398945804229e-05, "loss": 78.8444, "step": 109880 }, { "epoch": 0.4439695051249005, "grad_norm": 678.7551879882812, "learning_rate": 3.440075507966476e-05, "loss": 58.5628, "step": 109890 }, { "epoch": 0.44400990639026816, "grad_norm": 1020.2061157226562, "learning_rate": 3.439752051801467e-05, "loss": 41.0844, "step": 109900 }, { "epoch": 0.4440503076556358, "grad_norm": 1242.7587890625, "learning_rate": 3.439428577315508e-05, "loss": 50.6694, "step": 109910 }, { "epoch": 0.4440907089210034, "grad_norm": 1669.455810546875, "learning_rate": 3.439105084514905e-05, "loss": 61.132, "step": 109920 }, { "epoch": 0.444131110186371, "grad_norm": 357.2478942871094, "learning_rate": 3.4387815734059654e-05, "loss": 64.2863, "step": 109930 }, { "epoch": 0.44417151145173867, "grad_norm": 696.1216430664062, "learning_rate": 3.438458043994995e-05, "loss": 52.3681, "step": 109940 }, { "epoch": 0.4442119127171063, "grad_norm": 1389.9837646484375, "learning_rate": 3.438134496288302e-05, "loss": 59.619, "step": 109950 }, { "epoch": 0.44425231398247395, "grad_norm": 819.2481689453125, "learning_rate": 3.437810930292195e-05, "loss": 71.3323, "step": 109960 }, { "epoch": 0.4442927152478416, "grad_norm": 541.750732421875, "learning_rate": 3.43748734601298e-05, "loss": 73.0371, "step": 109970 }, { "epoch": 0.44433311651320917, "grad_norm": 923.3477172851562, "learning_rate": 3.437163743456967e-05, "loss": 50.583, "step": 109980 }, { "epoch": 0.4443735177785768, "grad_norm": 1519.56005859375, "learning_rate": 3.436840122630464e-05, "loss": 63.0997, "step": 109990 }, { "epoch": 0.44441391904394445, "grad_norm": 1023.0885620117188, "learning_rate": 3.436516483539781e-05, "loss": 35.0977, "step": 110000 }, { "epoch": 0.4444543203093121, "grad_norm": 787.899658203125, "learning_rate": 3.4361928261912254e-05, "loss": 56.4767, "step": 110010 }, { "epoch": 0.44449472157467973, "grad_norm": 0.0, "learning_rate": 3.4358691505911104e-05, "loss": 44.5502, "step": 110020 }, { "epoch": 0.44453512284004737, "grad_norm": 385.0424499511719, "learning_rate": 3.4355454567457445e-05, "loss": 70.914, "step": 110030 }, { "epoch": 0.44457552410541495, "grad_norm": 967.9429931640625, "learning_rate": 3.435221744661438e-05, "loss": 62.6088, "step": 110040 }, { "epoch": 0.4446159253707826, "grad_norm": 389.7696533203125, "learning_rate": 3.434898014344501e-05, "loss": 48.413, "step": 110050 }, { "epoch": 0.44465632663615023, "grad_norm": 1513.93017578125, "learning_rate": 3.434574265801247e-05, "loss": 50.9386, "step": 110060 }, { "epoch": 0.4446967279015179, "grad_norm": 461.18096923828125, "learning_rate": 3.4342504990379866e-05, "loss": 66.7544, "step": 110070 }, { "epoch": 0.4447371291668855, "grad_norm": 593.7630615234375, "learning_rate": 3.433926714061032e-05, "loss": 60.6121, "step": 110080 }, { "epoch": 0.44477753043225315, "grad_norm": 1564.3096923828125, "learning_rate": 3.433602910876694e-05, "loss": 70.2792, "step": 110090 }, { "epoch": 0.4448179316976208, "grad_norm": 1443.179931640625, "learning_rate": 3.433279089491288e-05, "loss": 36.5048, "step": 110100 }, { "epoch": 0.4448583329629884, "grad_norm": 822.4376220703125, "learning_rate": 3.432955249911125e-05, "loss": 78.1726, "step": 110110 }, { "epoch": 0.444898734228356, "grad_norm": 1508.205078125, "learning_rate": 3.432631392142519e-05, "loss": 66.1732, "step": 110120 }, { "epoch": 0.44493913549372366, "grad_norm": 251.31320190429688, "learning_rate": 3.432307516191783e-05, "loss": 34.4932, "step": 110130 }, { "epoch": 0.4449795367590913, "grad_norm": 1120.1636962890625, "learning_rate": 3.4319836220652335e-05, "loss": 38.0345, "step": 110140 }, { "epoch": 0.44501993802445894, "grad_norm": 439.1837158203125, "learning_rate": 3.431659709769183e-05, "loss": 56.9619, "step": 110150 }, { "epoch": 0.4450603392898266, "grad_norm": 969.8883666992188, "learning_rate": 3.431335779309947e-05, "loss": 37.3155, "step": 110160 }, { "epoch": 0.44510074055519416, "grad_norm": 813.2791137695312, "learning_rate": 3.43101183069384e-05, "loss": 82.1769, "step": 110170 }, { "epoch": 0.4451411418205618, "grad_norm": 712.302001953125, "learning_rate": 3.430687863927178e-05, "loss": 53.5758, "step": 110180 }, { "epoch": 0.44518154308592944, "grad_norm": 730.143310546875, "learning_rate": 3.4303638790162774e-05, "loss": 61.2881, "step": 110190 }, { "epoch": 0.4452219443512971, "grad_norm": 945.42041015625, "learning_rate": 3.430039875967454e-05, "loss": 62.6643, "step": 110200 }, { "epoch": 0.4452623456166647, "grad_norm": 887.8884887695312, "learning_rate": 3.429715854787024e-05, "loss": 52.4225, "step": 110210 }, { "epoch": 0.44530274688203236, "grad_norm": 778.4572143554688, "learning_rate": 3.429391815481305e-05, "loss": 80.6638, "step": 110220 }, { "epoch": 0.4453431481474, "grad_norm": 701.6489868164062, "learning_rate": 3.429067758056613e-05, "loss": 70.6435, "step": 110230 }, { "epoch": 0.4453835494127676, "grad_norm": 1139.0123291015625, "learning_rate": 3.428743682519269e-05, "loss": 78.8899, "step": 110240 }, { "epoch": 0.4454239506781352, "grad_norm": 0.0, "learning_rate": 3.428419588875588e-05, "loss": 51.6693, "step": 110250 }, { "epoch": 0.44546435194350287, "grad_norm": 919.1795654296875, "learning_rate": 3.428095477131888e-05, "loss": 56.4408, "step": 110260 }, { "epoch": 0.4455047532088705, "grad_norm": 953.95263671875, "learning_rate": 3.427771347294489e-05, "loss": 72.902, "step": 110270 }, { "epoch": 0.44554515447423815, "grad_norm": 1921.18359375, "learning_rate": 3.427447199369711e-05, "loss": 43.3139, "step": 110280 }, { "epoch": 0.4455855557396058, "grad_norm": 790.10791015625, "learning_rate": 3.4271230333638716e-05, "loss": 71.2949, "step": 110290 }, { "epoch": 0.44562595700497337, "grad_norm": 1008.7927856445312, "learning_rate": 3.426798849283291e-05, "loss": 60.3911, "step": 110300 }, { "epoch": 0.445666358270341, "grad_norm": 749.1685791015625, "learning_rate": 3.4264746471342905e-05, "loss": 78.0302, "step": 110310 }, { "epoch": 0.44570675953570865, "grad_norm": 967.6669921875, "learning_rate": 3.4261504269231904e-05, "loss": 49.5635, "step": 110320 }, { "epoch": 0.4457471608010763, "grad_norm": 1514.6182861328125, "learning_rate": 3.4258261886563104e-05, "loss": 76.0732, "step": 110330 }, { "epoch": 0.44578756206644393, "grad_norm": 1716.936279296875, "learning_rate": 3.425501932339971e-05, "loss": 133.2402, "step": 110340 }, { "epoch": 0.44582796333181157, "grad_norm": 959.5086669921875, "learning_rate": 3.425177657980496e-05, "loss": 71.294, "step": 110350 }, { "epoch": 0.44586836459717916, "grad_norm": 812.0950317382812, "learning_rate": 3.4248533655842066e-05, "loss": 57.06, "step": 110360 }, { "epoch": 0.4459087658625468, "grad_norm": 267.0820617675781, "learning_rate": 3.4245290551574237e-05, "loss": 44.4798, "step": 110370 }, { "epoch": 0.44594916712791444, "grad_norm": 1544.18310546875, "learning_rate": 3.4242047267064715e-05, "loss": 57.8316, "step": 110380 }, { "epoch": 0.4459895683932821, "grad_norm": 695.9027709960938, "learning_rate": 3.4238803802376716e-05, "loss": 56.366, "step": 110390 }, { "epoch": 0.4460299696586497, "grad_norm": 685.2529296875, "learning_rate": 3.423556015757349e-05, "loss": 57.191, "step": 110400 }, { "epoch": 0.44607037092401736, "grad_norm": 653.3543090820312, "learning_rate": 3.423231633271826e-05, "loss": 61.4118, "step": 110410 }, { "epoch": 0.446110772189385, "grad_norm": 1396.4661865234375, "learning_rate": 3.4229072327874274e-05, "loss": 53.9518, "step": 110420 }, { "epoch": 0.4461511734547526, "grad_norm": 742.2490234375, "learning_rate": 3.422582814310476e-05, "loss": 64.2797, "step": 110430 }, { "epoch": 0.4461915747201202, "grad_norm": 811.0308837890625, "learning_rate": 3.4222583778472996e-05, "loss": 34.3383, "step": 110440 }, { "epoch": 0.44623197598548786, "grad_norm": 784.7843017578125, "learning_rate": 3.421933923404219e-05, "loss": 83.5336, "step": 110450 }, { "epoch": 0.4462723772508555, "grad_norm": 368.3476257324219, "learning_rate": 3.421609450987563e-05, "loss": 43.9047, "step": 110460 }, { "epoch": 0.44631277851622314, "grad_norm": 507.54205322265625, "learning_rate": 3.421284960603657e-05, "loss": 40.9963, "step": 110470 }, { "epoch": 0.4463531797815908, "grad_norm": 1140.799560546875, "learning_rate": 3.4209604522588255e-05, "loss": 46.2735, "step": 110480 }, { "epoch": 0.44639358104695837, "grad_norm": 1338.29541015625, "learning_rate": 3.4206359259593954e-05, "loss": 37.1206, "step": 110490 }, { "epoch": 0.446433982312326, "grad_norm": 665.2498168945312, "learning_rate": 3.4203113817116957e-05, "loss": 57.0931, "step": 110500 }, { "epoch": 0.44647438357769365, "grad_norm": 734.5198974609375, "learning_rate": 3.4199868195220505e-05, "loss": 71.1633, "step": 110510 }, { "epoch": 0.4465147848430613, "grad_norm": 679.3670043945312, "learning_rate": 3.419662239396789e-05, "loss": 52.7156, "step": 110520 }, { "epoch": 0.4465551861084289, "grad_norm": 538.1387939453125, "learning_rate": 3.419337641342239e-05, "loss": 87.353, "step": 110530 }, { "epoch": 0.44659558737379657, "grad_norm": 625.4715576171875, "learning_rate": 3.419013025364727e-05, "loss": 52.2098, "step": 110540 }, { "epoch": 0.4466359886391642, "grad_norm": 1380.083740234375, "learning_rate": 3.4186883914705835e-05, "loss": 110.8045, "step": 110550 }, { "epoch": 0.4466763899045318, "grad_norm": 664.3757934570312, "learning_rate": 3.418363739666137e-05, "loss": 33.5992, "step": 110560 }, { "epoch": 0.44671679116989943, "grad_norm": 352.8798522949219, "learning_rate": 3.418039069957717e-05, "loss": 54.5655, "step": 110570 }, { "epoch": 0.44675719243526707, "grad_norm": 196.41831970214844, "learning_rate": 3.417714382351652e-05, "loss": 46.0049, "step": 110580 }, { "epoch": 0.4467975937006347, "grad_norm": 1419.017822265625, "learning_rate": 3.417389676854274e-05, "loss": 87.4692, "step": 110590 }, { "epoch": 0.44683799496600235, "grad_norm": 515.550537109375, "learning_rate": 3.417064953471911e-05, "loss": 46.8774, "step": 110600 }, { "epoch": 0.44687839623137, "grad_norm": 729.7348022460938, "learning_rate": 3.416740212210894e-05, "loss": 47.5663, "step": 110610 }, { "epoch": 0.4469187974967376, "grad_norm": 618.8557739257812, "learning_rate": 3.416415453077555e-05, "loss": 56.8461, "step": 110620 }, { "epoch": 0.4469591987621052, "grad_norm": 1495.1834716796875, "learning_rate": 3.416090676078225e-05, "loss": 71.6594, "step": 110630 }, { "epoch": 0.44699960002747285, "grad_norm": 537.9067993164062, "learning_rate": 3.415765881219236e-05, "loss": 75.8224, "step": 110640 }, { "epoch": 0.4470400012928405, "grad_norm": 1021.9192504882812, "learning_rate": 3.4154410685069196e-05, "loss": 71.1588, "step": 110650 }, { "epoch": 0.44708040255820813, "grad_norm": 1147.8583984375, "learning_rate": 3.4151162379476075e-05, "loss": 99.0359, "step": 110660 }, { "epoch": 0.4471208038235758, "grad_norm": 951.2720947265625, "learning_rate": 3.414791389547635e-05, "loss": 62.2096, "step": 110670 }, { "epoch": 0.44716120508894336, "grad_norm": 292.7242736816406, "learning_rate": 3.414466523313332e-05, "loss": 40.3493, "step": 110680 }, { "epoch": 0.447201606354311, "grad_norm": 1284.24755859375, "learning_rate": 3.414141639251033e-05, "loss": 49.9673, "step": 110690 }, { "epoch": 0.44724200761967864, "grad_norm": 1174.4100341796875, "learning_rate": 3.413816737367073e-05, "loss": 76.3328, "step": 110700 }, { "epoch": 0.4472824088850463, "grad_norm": 1334.654052734375, "learning_rate": 3.4134918176677846e-05, "loss": 68.6847, "step": 110710 }, { "epoch": 0.4473228101504139, "grad_norm": 1329.33203125, "learning_rate": 3.4131668801595027e-05, "loss": 60.1202, "step": 110720 }, { "epoch": 0.44736321141578156, "grad_norm": 472.8536071777344, "learning_rate": 3.4128419248485635e-05, "loss": 55.691, "step": 110730 }, { "epoch": 0.4474036126811492, "grad_norm": 1049.84619140625, "learning_rate": 3.4125169517413e-05, "loss": 70.7531, "step": 110740 }, { "epoch": 0.4474440139465168, "grad_norm": 527.7225952148438, "learning_rate": 3.412191960844049e-05, "loss": 48.9243, "step": 110750 }, { "epoch": 0.4474844152118844, "grad_norm": 415.2084045410156, "learning_rate": 3.411866952163146e-05, "loss": 52.9608, "step": 110760 }, { "epoch": 0.44752481647725206, "grad_norm": 1201.3553466796875, "learning_rate": 3.4115419257049286e-05, "loss": 71.9567, "step": 110770 }, { "epoch": 0.4475652177426197, "grad_norm": 245.35345458984375, "learning_rate": 3.4112168814757307e-05, "loss": 69.7229, "step": 110780 }, { "epoch": 0.44760561900798734, "grad_norm": 1845.09033203125, "learning_rate": 3.41089181948189e-05, "loss": 68.3285, "step": 110790 }, { "epoch": 0.447646020273355, "grad_norm": 874.4844360351562, "learning_rate": 3.410566739729746e-05, "loss": 42.1089, "step": 110800 }, { "epoch": 0.44768642153872257, "grad_norm": 466.1338806152344, "learning_rate": 3.410241642225633e-05, "loss": 53.4753, "step": 110810 }, { "epoch": 0.4477268228040902, "grad_norm": 391.43475341796875, "learning_rate": 3.409916526975892e-05, "loss": 56.1511, "step": 110820 }, { "epoch": 0.44776722406945785, "grad_norm": 1110.3125, "learning_rate": 3.409591393986859e-05, "loss": 56.4363, "step": 110830 }, { "epoch": 0.4478076253348255, "grad_norm": 441.3172302246094, "learning_rate": 3.409266243264874e-05, "loss": 54.6422, "step": 110840 }, { "epoch": 0.4478480266001931, "grad_norm": 868.8545532226562, "learning_rate": 3.408941074816275e-05, "loss": 56.9401, "step": 110850 }, { "epoch": 0.44788842786556077, "grad_norm": 988.2617797851562, "learning_rate": 3.408615888647402e-05, "loss": 58.8403, "step": 110860 }, { "epoch": 0.4479288291309284, "grad_norm": 1164.9776611328125, "learning_rate": 3.408290684764594e-05, "loss": 63.0026, "step": 110870 }, { "epoch": 0.447969230396296, "grad_norm": 592.5801391601562, "learning_rate": 3.407965463174192e-05, "loss": 52.5244, "step": 110880 }, { "epoch": 0.44800963166166363, "grad_norm": 647.7515258789062, "learning_rate": 3.407640223882536e-05, "loss": 29.65, "step": 110890 }, { "epoch": 0.44805003292703127, "grad_norm": 954.7613525390625, "learning_rate": 3.407314966895966e-05, "loss": 56.6861, "step": 110900 }, { "epoch": 0.4480904341923989, "grad_norm": 997.4801025390625, "learning_rate": 3.406989692220824e-05, "loss": 72.3521, "step": 110910 }, { "epoch": 0.44813083545776655, "grad_norm": 308.1066589355469, "learning_rate": 3.4066643998634505e-05, "loss": 79.9528, "step": 110920 }, { "epoch": 0.4481712367231342, "grad_norm": 1413.3211669921875, "learning_rate": 3.406339089830188e-05, "loss": 90.6204, "step": 110930 }, { "epoch": 0.4482116379885018, "grad_norm": 761.3001098632812, "learning_rate": 3.406013762127379e-05, "loss": 59.6376, "step": 110940 }, { "epoch": 0.4482520392538694, "grad_norm": 550.8245239257812, "learning_rate": 3.405688416761364e-05, "loss": 61.1043, "step": 110950 }, { "epoch": 0.44829244051923706, "grad_norm": 1125.8817138671875, "learning_rate": 3.4053630537384885e-05, "loss": 54.3687, "step": 110960 }, { "epoch": 0.4483328417846047, "grad_norm": 1198.2652587890625, "learning_rate": 3.4050376730650935e-05, "loss": 52.4602, "step": 110970 }, { "epoch": 0.44837324304997234, "grad_norm": 293.5941162109375, "learning_rate": 3.4047122747475224e-05, "loss": 48.189, "step": 110980 }, { "epoch": 0.44841364431534, "grad_norm": 1793.2451171875, "learning_rate": 3.40438685879212e-05, "loss": 58.882, "step": 110990 }, { "epoch": 0.44845404558070756, "grad_norm": 405.7001037597656, "learning_rate": 3.4040614252052305e-05, "loss": 87.0919, "step": 111000 }, { "epoch": 0.4484944468460752, "grad_norm": 609.4334716796875, "learning_rate": 3.403735973993198e-05, "loss": 88.5648, "step": 111010 }, { "epoch": 0.44853484811144284, "grad_norm": 476.433837890625, "learning_rate": 3.403410505162369e-05, "loss": 72.3557, "step": 111020 }, { "epoch": 0.4485752493768105, "grad_norm": 547.7504272460938, "learning_rate": 3.403085018719085e-05, "loss": 71.4732, "step": 111030 }, { "epoch": 0.4486156506421781, "grad_norm": 618.4702758789062, "learning_rate": 3.402759514669694e-05, "loss": 44.4472, "step": 111040 }, { "epoch": 0.44865605190754576, "grad_norm": 731.4344482421875, "learning_rate": 3.4024339930205415e-05, "loss": 53.5866, "step": 111050 }, { "epoch": 0.4486964531729134, "grad_norm": 457.8855895996094, "learning_rate": 3.402108453777974e-05, "loss": 58.6352, "step": 111060 }, { "epoch": 0.448736854438281, "grad_norm": 612.9066772460938, "learning_rate": 3.401782896948338e-05, "loss": 80.7433, "step": 111070 }, { "epoch": 0.4487772557036486, "grad_norm": 590.0531616210938, "learning_rate": 3.401457322537979e-05, "loss": 49.937, "step": 111080 }, { "epoch": 0.44881765696901627, "grad_norm": 579.5253295898438, "learning_rate": 3.401131730553247e-05, "loss": 53.8527, "step": 111090 }, { "epoch": 0.4488580582343839, "grad_norm": 761.0218505859375, "learning_rate": 3.400806121000487e-05, "loss": 57.2909, "step": 111100 }, { "epoch": 0.44889845949975155, "grad_norm": 938.6339721679688, "learning_rate": 3.400480493886048e-05, "loss": 54.5442, "step": 111110 }, { "epoch": 0.4489388607651192, "grad_norm": 800.48486328125, "learning_rate": 3.400154849216278e-05, "loss": 50.155, "step": 111120 }, { "epoch": 0.44897926203048677, "grad_norm": 1541.491943359375, "learning_rate": 3.3998291869975266e-05, "loss": 47.9149, "step": 111130 }, { "epoch": 0.4490196632958544, "grad_norm": 631.569580078125, "learning_rate": 3.399503507236141e-05, "loss": 60.8179, "step": 111140 }, { "epoch": 0.44906006456122205, "grad_norm": 233.70372009277344, "learning_rate": 3.399177809938472e-05, "loss": 33.1188, "step": 111150 }, { "epoch": 0.4491004658265897, "grad_norm": 492.7761535644531, "learning_rate": 3.398852095110868e-05, "loss": 62.9832, "step": 111160 }, { "epoch": 0.44914086709195733, "grad_norm": 691.3985595703125, "learning_rate": 3.398526362759681e-05, "loss": 58.8685, "step": 111170 }, { "epoch": 0.44918126835732497, "grad_norm": 763.817626953125, "learning_rate": 3.3982006128912584e-05, "loss": 34.1555, "step": 111180 }, { "epoch": 0.4492216696226926, "grad_norm": 794.317626953125, "learning_rate": 3.3978748455119536e-05, "loss": 71.4623, "step": 111190 }, { "epoch": 0.4492620708880602, "grad_norm": 423.894287109375, "learning_rate": 3.397549060628116e-05, "loss": 56.8908, "step": 111200 }, { "epoch": 0.44930247215342783, "grad_norm": 2169.064697265625, "learning_rate": 3.3972232582460974e-05, "loss": 52.3652, "step": 111210 }, { "epoch": 0.4493428734187955, "grad_norm": 363.28289794921875, "learning_rate": 3.3968974383722495e-05, "loss": 40.4274, "step": 111220 }, { "epoch": 0.4493832746841631, "grad_norm": 973.9365234375, "learning_rate": 3.3965716010129236e-05, "loss": 106.0774, "step": 111230 }, { "epoch": 0.44942367594953075, "grad_norm": 3008.888671875, "learning_rate": 3.396245746174473e-05, "loss": 44.22, "step": 111240 }, { "epoch": 0.4494640772148984, "grad_norm": 926.1411743164062, "learning_rate": 3.39591987386325e-05, "loss": 46.0268, "step": 111250 }, { "epoch": 0.449504478480266, "grad_norm": 177.50221252441406, "learning_rate": 3.3955939840856096e-05, "loss": 44.916, "step": 111260 }, { "epoch": 0.4495448797456336, "grad_norm": 779.60888671875, "learning_rate": 3.395268076847902e-05, "loss": 56.0308, "step": 111270 }, { "epoch": 0.44958528101100126, "grad_norm": 323.57818603515625, "learning_rate": 3.394942152156482e-05, "loss": 40.499, "step": 111280 }, { "epoch": 0.4496256822763689, "grad_norm": 513.7103881835938, "learning_rate": 3.394616210017705e-05, "loss": 44.3521, "step": 111290 }, { "epoch": 0.44966608354173654, "grad_norm": 657.2388305664062, "learning_rate": 3.3942902504379235e-05, "loss": 72.6372, "step": 111300 }, { "epoch": 0.4497064848071042, "grad_norm": 513.4599609375, "learning_rate": 3.3939642734234936e-05, "loss": 118.4347, "step": 111310 }, { "epoch": 0.44974688607247176, "grad_norm": 719.9033203125, "learning_rate": 3.39363827898077e-05, "loss": 85.1415, "step": 111320 }, { "epoch": 0.4497872873378394, "grad_norm": 1146.169677734375, "learning_rate": 3.393312267116107e-05, "loss": 60.6316, "step": 111330 }, { "epoch": 0.44982768860320704, "grad_norm": 633.3748168945312, "learning_rate": 3.392986237835863e-05, "loss": 60.0308, "step": 111340 }, { "epoch": 0.4498680898685747, "grad_norm": 436.7900390625, "learning_rate": 3.3926601911463915e-05, "loss": 54.0903, "step": 111350 }, { "epoch": 0.4499084911339423, "grad_norm": 425.1293029785156, "learning_rate": 3.392334127054051e-05, "loss": 59.7452, "step": 111360 }, { "epoch": 0.44994889239930996, "grad_norm": 1201.4161376953125, "learning_rate": 3.392008045565197e-05, "loss": 56.7192, "step": 111370 }, { "epoch": 0.4499892936646776, "grad_norm": 287.7637939453125, "learning_rate": 3.391681946686186e-05, "loss": 67.3147, "step": 111380 }, { "epoch": 0.4500296949300452, "grad_norm": 818.6224975585938, "learning_rate": 3.3913558304233776e-05, "loss": 53.3463, "step": 111390 }, { "epoch": 0.4500700961954128, "grad_norm": 385.80987548828125, "learning_rate": 3.3910296967831266e-05, "loss": 66.1127, "step": 111400 }, { "epoch": 0.45011049746078047, "grad_norm": 1808.7755126953125, "learning_rate": 3.3907035457717944e-05, "loss": 67.1233, "step": 111410 }, { "epoch": 0.4501508987261481, "grad_norm": 512.4915771484375, "learning_rate": 3.390377377395738e-05, "loss": 52.8904, "step": 111420 }, { "epoch": 0.45019129999151575, "grad_norm": 1636.7056884765625, "learning_rate": 3.3900511916613155e-05, "loss": 101.467, "step": 111430 }, { "epoch": 0.4502317012568834, "grad_norm": 682.1113891601562, "learning_rate": 3.389724988574887e-05, "loss": 47.2666, "step": 111440 }, { "epoch": 0.45027210252225097, "grad_norm": 401.15771484375, "learning_rate": 3.389398768142812e-05, "loss": 40.7815, "step": 111450 }, { "epoch": 0.4503125037876186, "grad_norm": 1251.0804443359375, "learning_rate": 3.389072530371451e-05, "loss": 49.2346, "step": 111460 }, { "epoch": 0.45035290505298625, "grad_norm": 642.7127075195312, "learning_rate": 3.388746275267162e-05, "loss": 63.2051, "step": 111470 }, { "epoch": 0.4503933063183539, "grad_norm": 605.9111938476562, "learning_rate": 3.388420002836307e-05, "loss": 52.5846, "step": 111480 }, { "epoch": 0.45043370758372153, "grad_norm": 1575.560302734375, "learning_rate": 3.3880937130852466e-05, "loss": 35.7747, "step": 111490 }, { "epoch": 0.45047410884908917, "grad_norm": 645.1834716796875, "learning_rate": 3.387767406020343e-05, "loss": 55.3005, "step": 111500 }, { "epoch": 0.4505145101144568, "grad_norm": 666.5519409179688, "learning_rate": 3.3874410816479564e-05, "loss": 48.5718, "step": 111510 }, { "epoch": 0.4505549113798244, "grad_norm": 654.6434326171875, "learning_rate": 3.387114739974448e-05, "loss": 81.415, "step": 111520 }, { "epoch": 0.45059531264519204, "grad_norm": 2661.026123046875, "learning_rate": 3.3867883810061824e-05, "loss": 84.8381, "step": 111530 }, { "epoch": 0.4506357139105597, "grad_norm": 2447.82470703125, "learning_rate": 3.38646200474952e-05, "loss": 48.1841, "step": 111540 }, { "epoch": 0.4506761151759273, "grad_norm": 708.4982299804688, "learning_rate": 3.3861356112108247e-05, "loss": 56.9643, "step": 111550 }, { "epoch": 0.45071651644129496, "grad_norm": 489.48846435546875, "learning_rate": 3.3858092003964594e-05, "loss": 57.1001, "step": 111560 }, { "epoch": 0.4507569177066626, "grad_norm": 538.01806640625, "learning_rate": 3.385482772312787e-05, "loss": 43.918, "step": 111570 }, { "epoch": 0.4507973189720302, "grad_norm": 624.0175170898438, "learning_rate": 3.3851563269661726e-05, "loss": 95.1447, "step": 111580 }, { "epoch": 0.4508377202373978, "grad_norm": 565.3558959960938, "learning_rate": 3.38482986436298e-05, "loss": 44.5088, "step": 111590 }, { "epoch": 0.45087812150276546, "grad_norm": 746.841796875, "learning_rate": 3.384503384509574e-05, "loss": 44.0926, "step": 111600 }, { "epoch": 0.4509185227681331, "grad_norm": 969.9041748046875, "learning_rate": 3.384176887412318e-05, "loss": 38.6924, "step": 111610 }, { "epoch": 0.45095892403350074, "grad_norm": 971.7702026367188, "learning_rate": 3.38385037307758e-05, "loss": 56.2464, "step": 111620 }, { "epoch": 0.4509993252988684, "grad_norm": 1089.8248291015625, "learning_rate": 3.383523841511723e-05, "loss": 89.415, "step": 111630 }, { "epoch": 0.45103972656423597, "grad_norm": 535.305419921875, "learning_rate": 3.3831972927211135e-05, "loss": 41.9581, "step": 111640 }, { "epoch": 0.4510801278296036, "grad_norm": 1719.795166015625, "learning_rate": 3.382870726712119e-05, "loss": 74.1624, "step": 111650 }, { "epoch": 0.45112052909497125, "grad_norm": 1085.8828125, "learning_rate": 3.382544143491104e-05, "loss": 81.0191, "step": 111660 }, { "epoch": 0.4511609303603389, "grad_norm": 409.8812561035156, "learning_rate": 3.382217543064438e-05, "loss": 40.052, "step": 111670 }, { "epoch": 0.4512013316257065, "grad_norm": 1443.2547607421875, "learning_rate": 3.381890925438486e-05, "loss": 37.1698, "step": 111680 }, { "epoch": 0.45124173289107417, "grad_norm": 1432.521728515625, "learning_rate": 3.3815642906196156e-05, "loss": 75.6335, "step": 111690 }, { "epoch": 0.4512821341564418, "grad_norm": 1234.38720703125, "learning_rate": 3.381237638614196e-05, "loss": 48.0366, "step": 111700 }, { "epoch": 0.4513225354218094, "grad_norm": 840.4757690429688, "learning_rate": 3.380910969428596e-05, "loss": 58.3024, "step": 111710 }, { "epoch": 0.45136293668717703, "grad_norm": 591.4329833984375, "learning_rate": 3.380584283069183e-05, "loss": 42.8441, "step": 111720 }, { "epoch": 0.45140333795254467, "grad_norm": 579.3867797851562, "learning_rate": 3.380257579542325e-05, "loss": 100.9832, "step": 111730 }, { "epoch": 0.4514437392179123, "grad_norm": 1018.6928100585938, "learning_rate": 3.379930858854392e-05, "loss": 40.5557, "step": 111740 }, { "epoch": 0.45148414048327995, "grad_norm": 475.0141906738281, "learning_rate": 3.3796041210117546e-05, "loss": 47.8023, "step": 111750 }, { "epoch": 0.4515245417486476, "grad_norm": 1127.599853515625, "learning_rate": 3.379277366020782e-05, "loss": 56.3643, "step": 111760 }, { "epoch": 0.4515649430140152, "grad_norm": 632.768310546875, "learning_rate": 3.3789505938878443e-05, "loss": 42.618, "step": 111770 }, { "epoch": 0.4516053442793828, "grad_norm": 558.4849853515625, "learning_rate": 3.378623804619313e-05, "loss": 38.2455, "step": 111780 }, { "epoch": 0.45164574554475045, "grad_norm": 1455.6197509765625, "learning_rate": 3.378296998221557e-05, "loss": 54.4919, "step": 111790 }, { "epoch": 0.4516861468101181, "grad_norm": 650.8120727539062, "learning_rate": 3.3779701747009504e-05, "loss": 68.0886, "step": 111800 }, { "epoch": 0.45172654807548573, "grad_norm": 613.3826904296875, "learning_rate": 3.377643334063862e-05, "loss": 47.3737, "step": 111810 }, { "epoch": 0.4517669493408534, "grad_norm": 814.562744140625, "learning_rate": 3.3773164763166655e-05, "loss": 70.561, "step": 111820 }, { "epoch": 0.451807350606221, "grad_norm": 785.9385986328125, "learning_rate": 3.376989601465733e-05, "loss": 38.8111, "step": 111830 }, { "epoch": 0.4518477518715886, "grad_norm": 640.478759765625, "learning_rate": 3.376662709517435e-05, "loss": 55.49, "step": 111840 }, { "epoch": 0.45188815313695624, "grad_norm": 590.5025024414062, "learning_rate": 3.3763358004781475e-05, "loss": 59.1937, "step": 111850 }, { "epoch": 0.4519285544023239, "grad_norm": 681.9274291992188, "learning_rate": 3.3760088743542424e-05, "loss": 121.1419, "step": 111860 }, { "epoch": 0.4519689556676915, "grad_norm": 1086.4017333984375, "learning_rate": 3.375681931152093e-05, "loss": 57.0334, "step": 111870 }, { "epoch": 0.45200935693305916, "grad_norm": 614.7588500976562, "learning_rate": 3.375354970878073e-05, "loss": 51.712, "step": 111880 }, { "epoch": 0.4520497581984268, "grad_norm": 1066.3997802734375, "learning_rate": 3.375027993538559e-05, "loss": 62.087, "step": 111890 }, { "epoch": 0.4520901594637944, "grad_norm": 644.5421142578125, "learning_rate": 3.374700999139923e-05, "loss": 58.5303, "step": 111900 }, { "epoch": 0.452130560729162, "grad_norm": 840.0023193359375, "learning_rate": 3.37437398768854e-05, "loss": 71.8506, "step": 111910 }, { "epoch": 0.45217096199452966, "grad_norm": 1441.012451171875, "learning_rate": 3.374046959190786e-05, "loss": 92.4958, "step": 111920 }, { "epoch": 0.4522113632598973, "grad_norm": 1386.08349609375, "learning_rate": 3.3737199136530364e-05, "loss": 51.7875, "step": 111930 }, { "epoch": 0.45225176452526494, "grad_norm": 1856.324951171875, "learning_rate": 3.373392851081668e-05, "loss": 74.6996, "step": 111940 }, { "epoch": 0.4522921657906326, "grad_norm": 371.86566162109375, "learning_rate": 3.373065771483056e-05, "loss": 53.6041, "step": 111950 }, { "epoch": 0.45233256705600017, "grad_norm": 862.8280639648438, "learning_rate": 3.372738674863577e-05, "loss": 51.361, "step": 111960 }, { "epoch": 0.4523729683213678, "grad_norm": 923.8464965820312, "learning_rate": 3.372411561229609e-05, "loss": 43.7514, "step": 111970 }, { "epoch": 0.45241336958673545, "grad_norm": 716.6856079101562, "learning_rate": 3.372084430587528e-05, "loss": 39.6764, "step": 111980 }, { "epoch": 0.4524537708521031, "grad_norm": 593.4553833007812, "learning_rate": 3.371757282943712e-05, "loss": 39.9165, "step": 111990 }, { "epoch": 0.4524941721174707, "grad_norm": 496.314208984375, "learning_rate": 3.3714301183045385e-05, "loss": 78.6274, "step": 112000 }, { "epoch": 0.45253457338283837, "grad_norm": 811.6598510742188, "learning_rate": 3.3711029366763866e-05, "loss": 34.8752, "step": 112010 }, { "epoch": 0.452574974648206, "grad_norm": 1419.599609375, "learning_rate": 3.370775738065634e-05, "loss": 81.8719, "step": 112020 }, { "epoch": 0.4526153759135736, "grad_norm": 1418.275146484375, "learning_rate": 3.370448522478661e-05, "loss": 65.6506, "step": 112030 }, { "epoch": 0.45265577717894123, "grad_norm": 8803.1748046875, "learning_rate": 3.370121289921845e-05, "loss": 112.1481, "step": 112040 }, { "epoch": 0.45269617844430887, "grad_norm": 328.6112060546875, "learning_rate": 3.369794040401567e-05, "loss": 38.9246, "step": 112050 }, { "epoch": 0.4527365797096765, "grad_norm": 576.2307739257812, "learning_rate": 3.3694667739242066e-05, "loss": 54.7131, "step": 112060 }, { "epoch": 0.45277698097504415, "grad_norm": 1210.724365234375, "learning_rate": 3.369139490496144e-05, "loss": 60.4555, "step": 112070 }, { "epoch": 0.4528173822404118, "grad_norm": 1678.520751953125, "learning_rate": 3.368812190123759e-05, "loss": 53.7549, "step": 112080 }, { "epoch": 0.4528577835057794, "grad_norm": 754.4735717773438, "learning_rate": 3.3684848728134334e-05, "loss": 55.5762, "step": 112090 }, { "epoch": 0.452898184771147, "grad_norm": 294.9254455566406, "learning_rate": 3.368157538571548e-05, "loss": 62.4506, "step": 112100 }, { "epoch": 0.45293858603651466, "grad_norm": 1608.3568115234375, "learning_rate": 3.367830187404484e-05, "loss": 55.6337, "step": 112110 }, { "epoch": 0.4529789873018823, "grad_norm": 680.6564331054688, "learning_rate": 3.367502819318624e-05, "loss": 49.9141, "step": 112120 }, { "epoch": 0.45301938856724994, "grad_norm": 1690.9805908203125, "learning_rate": 3.36717543432035e-05, "loss": 58.1772, "step": 112130 }, { "epoch": 0.4530597898326176, "grad_norm": 564.469482421875, "learning_rate": 3.366848032416045e-05, "loss": 59.8349, "step": 112140 }, { "epoch": 0.4531001910979852, "grad_norm": 725.934326171875, "learning_rate": 3.3665206136120906e-05, "loss": 72.5532, "step": 112150 }, { "epoch": 0.4531405923633528, "grad_norm": 949.9281616210938, "learning_rate": 3.3661931779148707e-05, "loss": 65.5223, "step": 112160 }, { "epoch": 0.45318099362872044, "grad_norm": 1123.207763671875, "learning_rate": 3.365865725330769e-05, "loss": 52.4678, "step": 112170 }, { "epoch": 0.4532213948940881, "grad_norm": 264.70001220703125, "learning_rate": 3.3655382558661685e-05, "loss": 64.133, "step": 112180 }, { "epoch": 0.4532617961594557, "grad_norm": 1888.4464111328125, "learning_rate": 3.3652107695274555e-05, "loss": 45.2613, "step": 112190 }, { "epoch": 0.45330219742482336, "grad_norm": 539.1635131835938, "learning_rate": 3.3648832663210124e-05, "loss": 95.8625, "step": 112200 }, { "epoch": 0.453342598690191, "grad_norm": 510.55816650390625, "learning_rate": 3.3645557462532245e-05, "loss": 39.8649, "step": 112210 }, { "epoch": 0.4533829999555586, "grad_norm": 1129.6290283203125, "learning_rate": 3.364228209330477e-05, "loss": 54.8464, "step": 112220 }, { "epoch": 0.4534234012209262, "grad_norm": 718.903564453125, "learning_rate": 3.363900655559157e-05, "loss": 70.0421, "step": 112230 }, { "epoch": 0.45346380248629387, "grad_norm": 765.8641967773438, "learning_rate": 3.363573084945648e-05, "loss": 43.2255, "step": 112240 }, { "epoch": 0.4535042037516615, "grad_norm": 305.63763427734375, "learning_rate": 3.363245497496337e-05, "loss": 55.8746, "step": 112250 }, { "epoch": 0.45354460501702915, "grad_norm": 359.51715087890625, "learning_rate": 3.362917893217611e-05, "loss": 73.9199, "step": 112260 }, { "epoch": 0.4535850062823968, "grad_norm": 516.9877319335938, "learning_rate": 3.362590272115855e-05, "loss": 67.4331, "step": 112270 }, { "epoch": 0.45362540754776437, "grad_norm": 862.2160034179688, "learning_rate": 3.3622626341974594e-05, "loss": 57.4161, "step": 112280 }, { "epoch": 0.453665808813132, "grad_norm": 1069.54150390625, "learning_rate": 3.361934979468809e-05, "loss": 57.9138, "step": 112290 }, { "epoch": 0.45370621007849965, "grad_norm": 375.27825927734375, "learning_rate": 3.3616073079362926e-05, "loss": 51.6367, "step": 112300 }, { "epoch": 0.4537466113438673, "grad_norm": 450.5061950683594, "learning_rate": 3.361279619606299e-05, "loss": 61.73, "step": 112310 }, { "epoch": 0.45378701260923493, "grad_norm": 1464.5079345703125, "learning_rate": 3.360951914485215e-05, "loss": 52.9956, "step": 112320 }, { "epoch": 0.45382741387460257, "grad_norm": 324.18194580078125, "learning_rate": 3.3606241925794295e-05, "loss": 47.1093, "step": 112330 }, { "epoch": 0.4538678151399702, "grad_norm": 836.4578857421875, "learning_rate": 3.360296453895333e-05, "loss": 56.2767, "step": 112340 }, { "epoch": 0.4539082164053378, "grad_norm": 1074.7415771484375, "learning_rate": 3.3599686984393134e-05, "loss": 50.7534, "step": 112350 }, { "epoch": 0.45394861767070543, "grad_norm": 225.61444091796875, "learning_rate": 3.359640926217763e-05, "loss": 40.0397, "step": 112360 }, { "epoch": 0.4539890189360731, "grad_norm": 1213.6234130859375, "learning_rate": 3.359313137237069e-05, "loss": 47.421, "step": 112370 }, { "epoch": 0.4540294202014407, "grad_norm": 640.165283203125, "learning_rate": 3.3589853315036225e-05, "loss": 60.4271, "step": 112380 }, { "epoch": 0.45406982146680835, "grad_norm": 354.8425598144531, "learning_rate": 3.358657509023815e-05, "loss": 58.7236, "step": 112390 }, { "epoch": 0.454110222732176, "grad_norm": 1010.51123046875, "learning_rate": 3.3583296698040384e-05, "loss": 67.7665, "step": 112400 }, { "epoch": 0.4541506239975436, "grad_norm": 313.4957580566406, "learning_rate": 3.3580018138506824e-05, "loss": 73.5613, "step": 112410 }, { "epoch": 0.4541910252629112, "grad_norm": 623.6607055664062, "learning_rate": 3.3576739411701394e-05, "loss": 58.9651, "step": 112420 }, { "epoch": 0.45423142652827886, "grad_norm": 870.5923461914062, "learning_rate": 3.357346051768801e-05, "loss": 64.04, "step": 112430 }, { "epoch": 0.4542718277936465, "grad_norm": 499.6788330078125, "learning_rate": 3.35701814565306e-05, "loss": 59.2575, "step": 112440 }, { "epoch": 0.45431222905901414, "grad_norm": 902.0311889648438, "learning_rate": 3.356690222829309e-05, "loss": 37.0818, "step": 112450 }, { "epoch": 0.4543526303243818, "grad_norm": 713.7149047851562, "learning_rate": 3.356362283303941e-05, "loss": 52.5913, "step": 112460 }, { "epoch": 0.45439303158974936, "grad_norm": 671.7296142578125, "learning_rate": 3.3560343270833495e-05, "loss": 53.5628, "step": 112470 }, { "epoch": 0.454433432855117, "grad_norm": 1461.3150634765625, "learning_rate": 3.355706354173928e-05, "loss": 78.7695, "step": 112480 }, { "epoch": 0.45447383412048464, "grad_norm": 1066.4896240234375, "learning_rate": 3.3553783645820715e-05, "loss": 74.929, "step": 112490 }, { "epoch": 0.4545142353858523, "grad_norm": 257.38531494140625, "learning_rate": 3.355050358314172e-05, "loss": 39.4283, "step": 112500 }, { "epoch": 0.4545546366512199, "grad_norm": 685.919677734375, "learning_rate": 3.354722335376626e-05, "loss": 51.0791, "step": 112510 }, { "epoch": 0.45459503791658756, "grad_norm": 1068.9114990234375, "learning_rate": 3.354394295775829e-05, "loss": 44.3589, "step": 112520 }, { "epoch": 0.4546354391819552, "grad_norm": 604.07568359375, "learning_rate": 3.354066239518174e-05, "loss": 65.293, "step": 112530 }, { "epoch": 0.4546758404473228, "grad_norm": 1334.7886962890625, "learning_rate": 3.353738166610058e-05, "loss": 52.1476, "step": 112540 }, { "epoch": 0.4547162417126904, "grad_norm": 517.1060791015625, "learning_rate": 3.353410077057877e-05, "loss": 80.8657, "step": 112550 }, { "epoch": 0.45475664297805807, "grad_norm": 479.4462890625, "learning_rate": 3.3530819708680286e-05, "loss": 54.3628, "step": 112560 }, { "epoch": 0.4547970442434257, "grad_norm": 579.92041015625, "learning_rate": 3.352753848046907e-05, "loss": 41.2108, "step": 112570 }, { "epoch": 0.45483744550879335, "grad_norm": 557.235107421875, "learning_rate": 3.3524257086009104e-05, "loss": 33.0313, "step": 112580 }, { "epoch": 0.454877846774161, "grad_norm": 929.7172241210938, "learning_rate": 3.352097552536435e-05, "loss": 40.3618, "step": 112590 }, { "epoch": 0.45491824803952857, "grad_norm": 1180.1580810546875, "learning_rate": 3.35176937985988e-05, "loss": 73.8001, "step": 112600 }, { "epoch": 0.4549586493048962, "grad_norm": 931.85009765625, "learning_rate": 3.351441190577642e-05, "loss": 45.0227, "step": 112610 }, { "epoch": 0.45499905057026385, "grad_norm": 911.3134155273438, "learning_rate": 3.3511129846961184e-05, "loss": 59.2229, "step": 112620 }, { "epoch": 0.4550394518356315, "grad_norm": 675.170654296875, "learning_rate": 3.35078476222171e-05, "loss": 80.4119, "step": 112630 }, { "epoch": 0.45507985310099913, "grad_norm": 1176.3988037109375, "learning_rate": 3.350456523160815e-05, "loss": 47.3479, "step": 112640 }, { "epoch": 0.45512025436636677, "grad_norm": 969.1943359375, "learning_rate": 3.350128267519832e-05, "loss": 81.494, "step": 112650 }, { "epoch": 0.4551606556317344, "grad_norm": 291.8066711425781, "learning_rate": 3.349799995305162e-05, "loss": 41.662, "step": 112660 }, { "epoch": 0.455201056897102, "grad_norm": 658.59765625, "learning_rate": 3.3494717065232016e-05, "loss": 61.2986, "step": 112670 }, { "epoch": 0.45524145816246964, "grad_norm": 917.7549438476562, "learning_rate": 3.349143401180354e-05, "loss": 63.2453, "step": 112680 }, { "epoch": 0.4552818594278373, "grad_norm": 453.6599426269531, "learning_rate": 3.348815079283018e-05, "loss": 59.7746, "step": 112690 }, { "epoch": 0.4553222606932049, "grad_norm": 850.1251220703125, "learning_rate": 3.3484867408375954e-05, "loss": 57.4509, "step": 112700 }, { "epoch": 0.45536266195857256, "grad_norm": 701.5401000976562, "learning_rate": 3.348158385850487e-05, "loss": 41.1296, "step": 112710 }, { "epoch": 0.4554030632239402, "grad_norm": 819.9852294921875, "learning_rate": 3.347830014328094e-05, "loss": 69.8444, "step": 112720 }, { "epoch": 0.4554434644893078, "grad_norm": 720.2810668945312, "learning_rate": 3.347501626276819e-05, "loss": 75.9919, "step": 112730 }, { "epoch": 0.4554838657546754, "grad_norm": 495.089599609375, "learning_rate": 3.3471732217030625e-05, "loss": 40.4414, "step": 112740 }, { "epoch": 0.45552426702004306, "grad_norm": 573.2144775390625, "learning_rate": 3.346844800613229e-05, "loss": 43.111, "step": 112750 }, { "epoch": 0.4555646682854107, "grad_norm": 1056.5557861328125, "learning_rate": 3.346516363013719e-05, "loss": 92.2572, "step": 112760 }, { "epoch": 0.45560506955077834, "grad_norm": 487.2084655761719, "learning_rate": 3.346187908910938e-05, "loss": 36.6314, "step": 112770 }, { "epoch": 0.455645470816146, "grad_norm": 501.5868225097656, "learning_rate": 3.345859438311287e-05, "loss": 56.4939, "step": 112780 }, { "epoch": 0.45568587208151357, "grad_norm": 792.0189819335938, "learning_rate": 3.345530951221171e-05, "loss": 94.0912, "step": 112790 }, { "epoch": 0.4557262733468812, "grad_norm": 778.9989624023438, "learning_rate": 3.3452024476469934e-05, "loss": 40.7863, "step": 112800 }, { "epoch": 0.45576667461224885, "grad_norm": 1507.0234375, "learning_rate": 3.3448739275951595e-05, "loss": 61.6816, "step": 112810 }, { "epoch": 0.4558070758776165, "grad_norm": 432.25244140625, "learning_rate": 3.344545391072073e-05, "loss": 44.9511, "step": 112820 }, { "epoch": 0.4558474771429841, "grad_norm": 655.8712158203125, "learning_rate": 3.34421683808414e-05, "loss": 48.9905, "step": 112830 }, { "epoch": 0.45588787840835177, "grad_norm": 940.5873413085938, "learning_rate": 3.343888268637765e-05, "loss": 59.8554, "step": 112840 }, { "epoch": 0.4559282796737194, "grad_norm": 1201.72412109375, "learning_rate": 3.343559682739353e-05, "loss": 85.8798, "step": 112850 }, { "epoch": 0.455968680939087, "grad_norm": 822.5703735351562, "learning_rate": 3.343231080395312e-05, "loss": 54.6579, "step": 112860 }, { "epoch": 0.45600908220445463, "grad_norm": 476.291748046875, "learning_rate": 3.342902461612045e-05, "loss": 40.1478, "step": 112870 }, { "epoch": 0.45604948346982227, "grad_norm": 1140.7523193359375, "learning_rate": 3.3425738263959615e-05, "loss": 36.0184, "step": 112880 }, { "epoch": 0.4560898847351899, "grad_norm": 627.5087280273438, "learning_rate": 3.3422451747534684e-05, "loss": 67.8078, "step": 112890 }, { "epoch": 0.45613028600055755, "grad_norm": 525.3790283203125, "learning_rate": 3.3419165066909705e-05, "loss": 47.1049, "step": 112900 }, { "epoch": 0.4561706872659252, "grad_norm": 361.6174011230469, "learning_rate": 3.3415878222148776e-05, "loss": 66.291, "step": 112910 }, { "epoch": 0.4562110885312928, "grad_norm": 1267.40966796875, "learning_rate": 3.341259121331597e-05, "loss": 39.7781, "step": 112920 }, { "epoch": 0.4562514897966604, "grad_norm": 643.3392333984375, "learning_rate": 3.340930404047537e-05, "loss": 37.3639, "step": 112930 }, { "epoch": 0.45629189106202805, "grad_norm": 481.6955871582031, "learning_rate": 3.3406016703691055e-05, "loss": 55.8988, "step": 112940 }, { "epoch": 0.4563322923273957, "grad_norm": 650.4725952148438, "learning_rate": 3.340272920302711e-05, "loss": 91.6434, "step": 112950 }, { "epoch": 0.45637269359276333, "grad_norm": 674.4364013671875, "learning_rate": 3.339944153854764e-05, "loss": 46.4506, "step": 112960 }, { "epoch": 0.456413094858131, "grad_norm": 542.6135864257812, "learning_rate": 3.3396153710316736e-05, "loss": 54.1574, "step": 112970 }, { "epoch": 0.4564534961234986, "grad_norm": 1714.6644287109375, "learning_rate": 3.339286571839848e-05, "loss": 96.3534, "step": 112980 }, { "epoch": 0.4564938973888662, "grad_norm": 525.607177734375, "learning_rate": 3.338957756285699e-05, "loss": 67.7952, "step": 112990 }, { "epoch": 0.45653429865423384, "grad_norm": 1665.643798828125, "learning_rate": 3.338628924375638e-05, "loss": 69.8561, "step": 113000 }, { "epoch": 0.4565746999196015, "grad_norm": 1218.552490234375, "learning_rate": 3.338300076116073e-05, "loss": 91.2745, "step": 113010 }, { "epoch": 0.4566151011849691, "grad_norm": 1521.2366943359375, "learning_rate": 3.337971211513417e-05, "loss": 59.6287, "step": 113020 }, { "epoch": 0.45665550245033676, "grad_norm": 898.8818969726562, "learning_rate": 3.337642330574081e-05, "loss": 54.526, "step": 113030 }, { "epoch": 0.4566959037157044, "grad_norm": 390.90435791015625, "learning_rate": 3.3373134333044756e-05, "loss": 35.1512, "step": 113040 }, { "epoch": 0.456736304981072, "grad_norm": 788.0700073242188, "learning_rate": 3.336984519711015e-05, "loss": 46.5823, "step": 113050 }, { "epoch": 0.4567767062464396, "grad_norm": 979.811767578125, "learning_rate": 3.336655589800109e-05, "loss": 58.1711, "step": 113060 }, { "epoch": 0.45681710751180726, "grad_norm": 1041.4178466796875, "learning_rate": 3.336326643578172e-05, "loss": 55.3556, "step": 113070 }, { "epoch": 0.4568575087771749, "grad_norm": 594.2887573242188, "learning_rate": 3.3359976810516164e-05, "loss": 52.3926, "step": 113080 }, { "epoch": 0.45689791004254254, "grad_norm": 731.5540771484375, "learning_rate": 3.335668702226856e-05, "loss": 107.6374, "step": 113090 }, { "epoch": 0.4569383113079102, "grad_norm": 608.2393798828125, "learning_rate": 3.3353397071103046e-05, "loss": 57.6193, "step": 113100 }, { "epoch": 0.45697871257327777, "grad_norm": 617.5512084960938, "learning_rate": 3.3350106957083744e-05, "loss": 72.52, "step": 113110 }, { "epoch": 0.4570191138386454, "grad_norm": 658.0664672851562, "learning_rate": 3.334681668027481e-05, "loss": 94.5809, "step": 113120 }, { "epoch": 0.45705951510401305, "grad_norm": 543.9719848632812, "learning_rate": 3.334352624074039e-05, "loss": 41.8438, "step": 113130 }, { "epoch": 0.4570999163693807, "grad_norm": 696.774658203125, "learning_rate": 3.334023563854463e-05, "loss": 76.3532, "step": 113140 }, { "epoch": 0.4571403176347483, "grad_norm": 2398.177734375, "learning_rate": 3.333694487375168e-05, "loss": 38.9571, "step": 113150 }, { "epoch": 0.45718071890011597, "grad_norm": 455.6088562011719, "learning_rate": 3.33336539464257e-05, "loss": 32.256, "step": 113160 }, { "epoch": 0.4572211201654836, "grad_norm": 1092.000732421875, "learning_rate": 3.3330362856630845e-05, "loss": 59.1251, "step": 113170 }, { "epoch": 0.4572615214308512, "grad_norm": 1621.383544921875, "learning_rate": 3.3327071604431275e-05, "loss": 43.2729, "step": 113180 }, { "epoch": 0.45730192269621883, "grad_norm": 680.7568969726562, "learning_rate": 3.3323780189891166e-05, "loss": 49.1451, "step": 113190 }, { "epoch": 0.45734232396158647, "grad_norm": 280.8765563964844, "learning_rate": 3.332048861307467e-05, "loss": 54.7998, "step": 113200 }, { "epoch": 0.4573827252269541, "grad_norm": 457.403564453125, "learning_rate": 3.331719687404597e-05, "loss": 91.7487, "step": 113210 }, { "epoch": 0.45742312649232175, "grad_norm": 777.8742065429688, "learning_rate": 3.331390497286922e-05, "loss": 44.5807, "step": 113220 }, { "epoch": 0.4574635277576894, "grad_norm": 1098.077880859375, "learning_rate": 3.331061290960863e-05, "loss": 90.1487, "step": 113230 }, { "epoch": 0.457503929023057, "grad_norm": 882.6914672851562, "learning_rate": 3.3307320684328354e-05, "loss": 68.7481, "step": 113240 }, { "epoch": 0.4575443302884246, "grad_norm": 1128.127197265625, "learning_rate": 3.330402829709258e-05, "loss": 65.0538, "step": 113250 }, { "epoch": 0.45758473155379226, "grad_norm": 937.3806762695312, "learning_rate": 3.3300735747965505e-05, "loss": 57.1227, "step": 113260 }, { "epoch": 0.4576251328191599, "grad_norm": 911.514404296875, "learning_rate": 3.329744303701132e-05, "loss": 85.5697, "step": 113270 }, { "epoch": 0.45766553408452754, "grad_norm": 532.0978393554688, "learning_rate": 3.3294150164294204e-05, "loss": 41.7875, "step": 113280 }, { "epoch": 0.4577059353498952, "grad_norm": 2276.76904296875, "learning_rate": 3.329085712987836e-05, "loss": 58.2938, "step": 113290 }, { "epoch": 0.4577463366152628, "grad_norm": 633.0645751953125, "learning_rate": 3.3287563933827995e-05, "loss": 45.8421, "step": 113300 }, { "epoch": 0.4577867378806304, "grad_norm": 884.1083984375, "learning_rate": 3.328427057620729e-05, "loss": 69.57, "step": 113310 }, { "epoch": 0.45782713914599804, "grad_norm": 832.331298828125, "learning_rate": 3.328097705708047e-05, "loss": 36.6988, "step": 113320 }, { "epoch": 0.4578675404113657, "grad_norm": 1298.6260986328125, "learning_rate": 3.3277683376511744e-05, "loss": 67.1316, "step": 113330 }, { "epoch": 0.4579079416767333, "grad_norm": 690.5106811523438, "learning_rate": 3.327438953456532e-05, "loss": 43.1363, "step": 113340 }, { "epoch": 0.45794834294210096, "grad_norm": 638.0509643554688, "learning_rate": 3.327109553130541e-05, "loss": 64.9828, "step": 113350 }, { "epoch": 0.4579887442074686, "grad_norm": 712.4091186523438, "learning_rate": 3.326780136679623e-05, "loss": 51.8144, "step": 113360 }, { "epoch": 0.4580291454728362, "grad_norm": 768.2869873046875, "learning_rate": 3.326450704110201e-05, "loss": 43.4623, "step": 113370 }, { "epoch": 0.4580695467382038, "grad_norm": 667.932373046875, "learning_rate": 3.3261212554286975e-05, "loss": 60.1954, "step": 113380 }, { "epoch": 0.45810994800357147, "grad_norm": 797.756591796875, "learning_rate": 3.3257917906415336e-05, "loss": 64.3577, "step": 113390 }, { "epoch": 0.4581503492689391, "grad_norm": 417.9449462890625, "learning_rate": 3.325462309755134e-05, "loss": 52.2687, "step": 113400 }, { "epoch": 0.45819075053430675, "grad_norm": 131.72840881347656, "learning_rate": 3.325132812775922e-05, "loss": 53.6158, "step": 113410 }, { "epoch": 0.4582311517996744, "grad_norm": 1277.050537109375, "learning_rate": 3.324803299710321e-05, "loss": 79.7133, "step": 113420 }, { "epoch": 0.45827155306504197, "grad_norm": 247.889404296875, "learning_rate": 3.3244737705647554e-05, "loss": 79.97, "step": 113430 }, { "epoch": 0.4583119543304096, "grad_norm": 1504.002685546875, "learning_rate": 3.324144225345649e-05, "loss": 57.3291, "step": 113440 }, { "epoch": 0.45835235559577725, "grad_norm": 3024.2744140625, "learning_rate": 3.3238146640594256e-05, "loss": 55.5882, "step": 113450 }, { "epoch": 0.4583927568611449, "grad_norm": 1827.9388427734375, "learning_rate": 3.323485086712513e-05, "loss": 68.6746, "step": 113460 }, { "epoch": 0.45843315812651253, "grad_norm": 1997.11279296875, "learning_rate": 3.323155493311334e-05, "loss": 60.3449, "step": 113470 }, { "epoch": 0.45847355939188017, "grad_norm": 1310.3616943359375, "learning_rate": 3.322825883862314e-05, "loss": 58.0651, "step": 113480 }, { "epoch": 0.4585139606572478, "grad_norm": 542.6761474609375, "learning_rate": 3.32249625837188e-05, "loss": 45.5163, "step": 113490 }, { "epoch": 0.4585543619226154, "grad_norm": 109.23764038085938, "learning_rate": 3.322166616846458e-05, "loss": 46.8832, "step": 113500 }, { "epoch": 0.45859476318798303, "grad_norm": 770.7872314453125, "learning_rate": 3.321836959292475e-05, "loss": 54.6829, "step": 113510 }, { "epoch": 0.4586351644533507, "grad_norm": 1391.15185546875, "learning_rate": 3.321507285716357e-05, "loss": 76.5766, "step": 113520 }, { "epoch": 0.4586755657187183, "grad_norm": 1156.2738037109375, "learning_rate": 3.321177596124532e-05, "loss": 44.4677, "step": 113530 }, { "epoch": 0.45871596698408595, "grad_norm": 882.244140625, "learning_rate": 3.3208478905234274e-05, "loss": 64.2383, "step": 113540 }, { "epoch": 0.4587563682494536, "grad_norm": 1060.12841796875, "learning_rate": 3.32051816891947e-05, "loss": 49.1175, "step": 113550 }, { "epoch": 0.4587967695148212, "grad_norm": 1234.741455078125, "learning_rate": 3.320188431319088e-05, "loss": 41.2987, "step": 113560 }, { "epoch": 0.4588371707801888, "grad_norm": 437.5050048828125, "learning_rate": 3.31985867772871e-05, "loss": 63.9297, "step": 113570 }, { "epoch": 0.45887757204555646, "grad_norm": 839.0947875976562, "learning_rate": 3.319528908154766e-05, "loss": 55.2399, "step": 113580 }, { "epoch": 0.4589179733109241, "grad_norm": 1059.20654296875, "learning_rate": 3.319199122603683e-05, "loss": 35.0855, "step": 113590 }, { "epoch": 0.45895837457629174, "grad_norm": 1006.2760009765625, "learning_rate": 3.318869321081892e-05, "loss": 75.6239, "step": 113600 }, { "epoch": 0.4589987758416594, "grad_norm": 1427.097412109375, "learning_rate": 3.3185395035958224e-05, "loss": 53.2945, "step": 113610 }, { "epoch": 0.459039177107027, "grad_norm": 960.8377075195312, "learning_rate": 3.318209670151904e-05, "loss": 74.3849, "step": 113620 }, { "epoch": 0.4590795783723946, "grad_norm": 780.0336303710938, "learning_rate": 3.317879820756566e-05, "loss": 59.6619, "step": 113630 }, { "epoch": 0.45911997963776224, "grad_norm": 913.7399291992188, "learning_rate": 3.31754995541624e-05, "loss": 54.3696, "step": 113640 }, { "epoch": 0.4591603809031299, "grad_norm": 884.738037109375, "learning_rate": 3.3172200741373563e-05, "loss": 87.1222, "step": 113650 }, { "epoch": 0.4592007821684975, "grad_norm": 495.6034851074219, "learning_rate": 3.3168901769263474e-05, "loss": 52.4804, "step": 113660 }, { "epoch": 0.45924118343386516, "grad_norm": 1088.6365966796875, "learning_rate": 3.316560263789643e-05, "loss": 46.7347, "step": 113670 }, { "epoch": 0.4592815846992328, "grad_norm": 724.1251831054688, "learning_rate": 3.3162303347336764e-05, "loss": 53.7025, "step": 113680 }, { "epoch": 0.4593219859646004, "grad_norm": 982.661376953125, "learning_rate": 3.315900389764879e-05, "loss": 47.1129, "step": 113690 }, { "epoch": 0.459362387229968, "grad_norm": 662.5863037109375, "learning_rate": 3.315570428889684e-05, "loss": 35.4883, "step": 113700 }, { "epoch": 0.45940278849533567, "grad_norm": 1490.281005859375, "learning_rate": 3.315240452114523e-05, "loss": 52.5399, "step": 113710 }, { "epoch": 0.4594431897607033, "grad_norm": 1237.041748046875, "learning_rate": 3.31491045944583e-05, "loss": 59.784, "step": 113720 }, { "epoch": 0.45948359102607095, "grad_norm": 700.3053588867188, "learning_rate": 3.314580450890038e-05, "loss": 70.0428, "step": 113730 }, { "epoch": 0.4595239922914386, "grad_norm": 851.7976684570312, "learning_rate": 3.3142504264535804e-05, "loss": 50.4964, "step": 113740 }, { "epoch": 0.45956439355680617, "grad_norm": 983.1741943359375, "learning_rate": 3.313920386142892e-05, "loss": 59.0461, "step": 113750 }, { "epoch": 0.4596047948221738, "grad_norm": 1348.2669677734375, "learning_rate": 3.313590329964406e-05, "loss": 59.1032, "step": 113760 }, { "epoch": 0.45964519608754145, "grad_norm": 487.41680908203125, "learning_rate": 3.313260257924558e-05, "loss": 87.3492, "step": 113770 }, { "epoch": 0.4596855973529091, "grad_norm": 345.0389099121094, "learning_rate": 3.312930170029783e-05, "loss": 52.9883, "step": 113780 }, { "epoch": 0.45972599861827673, "grad_norm": 438.7192077636719, "learning_rate": 3.3126000662865156e-05, "loss": 49.1873, "step": 113790 }, { "epoch": 0.45976639988364437, "grad_norm": 493.15130615234375, "learning_rate": 3.312269946701191e-05, "loss": 50.4528, "step": 113800 }, { "epoch": 0.459806801149012, "grad_norm": 1520.73974609375, "learning_rate": 3.311939811280246e-05, "loss": 42.8416, "step": 113810 }, { "epoch": 0.4598472024143796, "grad_norm": 146.88551330566406, "learning_rate": 3.311609660030117e-05, "loss": 52.2549, "step": 113820 }, { "epoch": 0.45988760367974724, "grad_norm": 610.1900634765625, "learning_rate": 3.311279492957239e-05, "loss": 81.4292, "step": 113830 }, { "epoch": 0.4599280049451149, "grad_norm": 478.4176330566406, "learning_rate": 3.31094931006805e-05, "loss": 59.0037, "step": 113840 }, { "epoch": 0.4599684062104825, "grad_norm": 698.8412475585938, "learning_rate": 3.310619111368986e-05, "loss": 60.8137, "step": 113850 }, { "epoch": 0.46000880747585016, "grad_norm": 719.0433349609375, "learning_rate": 3.310288896866486e-05, "loss": 40.1776, "step": 113860 }, { "epoch": 0.4600492087412178, "grad_norm": 807.5015258789062, "learning_rate": 3.309958666566986e-05, "loss": 87.1863, "step": 113870 }, { "epoch": 0.4600896100065854, "grad_norm": 773.2158813476562, "learning_rate": 3.309628420476926e-05, "loss": 65.442, "step": 113880 }, { "epoch": 0.460130011271953, "grad_norm": 1120.7901611328125, "learning_rate": 3.309298158602742e-05, "loss": 46.1566, "step": 113890 }, { "epoch": 0.46017041253732066, "grad_norm": 2974.2939453125, "learning_rate": 3.308967880950874e-05, "loss": 86.9134, "step": 113900 }, { "epoch": 0.4602108138026883, "grad_norm": 1336.4696044921875, "learning_rate": 3.308637587527761e-05, "loss": 79.6506, "step": 113910 }, { "epoch": 0.46025121506805594, "grad_norm": 1928.9310302734375, "learning_rate": 3.3083072783398416e-05, "loss": 96.6911, "step": 113920 }, { "epoch": 0.4602916163334236, "grad_norm": 538.5294799804688, "learning_rate": 3.3079769533935556e-05, "loss": 49.8037, "step": 113930 }, { "epoch": 0.4603320175987912, "grad_norm": 809.7352294921875, "learning_rate": 3.307646612695343e-05, "loss": 61.5456, "step": 113940 }, { "epoch": 0.4603724188641588, "grad_norm": 550.5420532226562, "learning_rate": 3.307316256251644e-05, "loss": 49.7282, "step": 113950 }, { "epoch": 0.46041282012952645, "grad_norm": 1422.148193359375, "learning_rate": 3.3069858840688994e-05, "loss": 71.9855, "step": 113960 }, { "epoch": 0.4604532213948941, "grad_norm": 426.9280090332031, "learning_rate": 3.3066554961535485e-05, "loss": 46.9851, "step": 113970 }, { "epoch": 0.4604936226602617, "grad_norm": 844.1886596679688, "learning_rate": 3.3063250925120334e-05, "loss": 56.5634, "step": 113980 }, { "epoch": 0.46053402392562937, "grad_norm": 590.6248779296875, "learning_rate": 3.305994673150797e-05, "loss": 58.149, "step": 113990 }, { "epoch": 0.460574425190997, "grad_norm": 722.6728515625, "learning_rate": 3.305664238076278e-05, "loss": 48.5719, "step": 114000 }, { "epoch": 0.4606148264563646, "grad_norm": 761.7833251953125, "learning_rate": 3.30533378729492e-05, "loss": 67.1143, "step": 114010 }, { "epoch": 0.46065522772173223, "grad_norm": 258.24639892578125, "learning_rate": 3.3050033208131656e-05, "loss": 50.417, "step": 114020 }, { "epoch": 0.46069562898709987, "grad_norm": 391.4142150878906, "learning_rate": 3.304672838637457e-05, "loss": 65.8437, "step": 114030 }, { "epoch": 0.4607360302524675, "grad_norm": 432.7169189453125, "learning_rate": 3.3043423407742375e-05, "loss": 60.9631, "step": 114040 }, { "epoch": 0.46077643151783515, "grad_norm": 893.4138793945312, "learning_rate": 3.3040118272299495e-05, "loss": 131.9281, "step": 114050 }, { "epoch": 0.4608168327832028, "grad_norm": 1183.3709716796875, "learning_rate": 3.303681298011037e-05, "loss": 74.6201, "step": 114060 }, { "epoch": 0.4608572340485704, "grad_norm": 327.8617248535156, "learning_rate": 3.303350753123944e-05, "loss": 38.2665, "step": 114070 }, { "epoch": 0.460897635313938, "grad_norm": 642.1771850585938, "learning_rate": 3.3030201925751145e-05, "loss": 50.6443, "step": 114080 }, { "epoch": 0.46093803657930565, "grad_norm": 1546.3465576171875, "learning_rate": 3.302689616370993e-05, "loss": 37.8828, "step": 114090 }, { "epoch": 0.4609784378446733, "grad_norm": 634.73876953125, "learning_rate": 3.302359024518024e-05, "loss": 29.7485, "step": 114100 }, { "epoch": 0.46101883911004093, "grad_norm": 658.0567626953125, "learning_rate": 3.302028417022653e-05, "loss": 45.2375, "step": 114110 }, { "epoch": 0.4610592403754086, "grad_norm": 633.6270751953125, "learning_rate": 3.301697793891324e-05, "loss": 59.6125, "step": 114120 }, { "epoch": 0.4610996416407762, "grad_norm": 1438.4256591796875, "learning_rate": 3.301367155130485e-05, "loss": 89.7295, "step": 114130 }, { "epoch": 0.4611400429061438, "grad_norm": 1383.4329833984375, "learning_rate": 3.3010365007465805e-05, "loss": 48.6195, "step": 114140 }, { "epoch": 0.46118044417151144, "grad_norm": 530.7550659179688, "learning_rate": 3.300705830746057e-05, "loss": 46.5861, "step": 114150 }, { "epoch": 0.4612208454368791, "grad_norm": 633.077880859375, "learning_rate": 3.300375145135361e-05, "loss": 60.9828, "step": 114160 }, { "epoch": 0.4612612467022467, "grad_norm": 376.91864013671875, "learning_rate": 3.3000444439209396e-05, "loss": 62.8593, "step": 114170 }, { "epoch": 0.46130164796761436, "grad_norm": 721.732666015625, "learning_rate": 3.299713727109239e-05, "loss": 84.59, "step": 114180 }, { "epoch": 0.461342049232982, "grad_norm": 473.7373352050781, "learning_rate": 3.299382994706709e-05, "loss": 94.0822, "step": 114190 }, { "epoch": 0.4613824504983496, "grad_norm": 1528.2044677734375, "learning_rate": 3.299052246719795e-05, "loss": 81.7651, "step": 114200 }, { "epoch": 0.4614228517637172, "grad_norm": 890.8794555664062, "learning_rate": 3.298721483154946e-05, "loss": 40.4416, "step": 114210 }, { "epoch": 0.46146325302908486, "grad_norm": 541.0910034179688, "learning_rate": 3.298390704018611e-05, "loss": 43.575, "step": 114220 }, { "epoch": 0.4615036542944525, "grad_norm": 915.8447875976562, "learning_rate": 3.298059909317239e-05, "loss": 70.035, "step": 114230 }, { "epoch": 0.46154405555982014, "grad_norm": 1335.07275390625, "learning_rate": 3.297729099057277e-05, "loss": 58.3776, "step": 114240 }, { "epoch": 0.4615844568251878, "grad_norm": 1571.07373046875, "learning_rate": 3.2973982732451755e-05, "loss": 72.0075, "step": 114250 }, { "epoch": 0.4616248580905554, "grad_norm": 1263.757080078125, "learning_rate": 3.297067431887384e-05, "loss": 53.4101, "step": 114260 }, { "epoch": 0.461665259355923, "grad_norm": 916.6146850585938, "learning_rate": 3.296736574990353e-05, "loss": 78.4652, "step": 114270 }, { "epoch": 0.46170566062129065, "grad_norm": 365.46142578125, "learning_rate": 3.296405702560532e-05, "loss": 80.6376, "step": 114280 }, { "epoch": 0.4617460618866583, "grad_norm": 944.3591918945312, "learning_rate": 3.2960748146043716e-05, "loss": 90.1059, "step": 114290 }, { "epoch": 0.46178646315202593, "grad_norm": 679.8099365234375, "learning_rate": 3.295743911128324e-05, "loss": 48.7843, "step": 114300 }, { "epoch": 0.46182686441739357, "grad_norm": 1261.843017578125, "learning_rate": 3.295412992138838e-05, "loss": 78.7052, "step": 114310 }, { "epoch": 0.4618672656827612, "grad_norm": 481.5779724121094, "learning_rate": 3.295082057642367e-05, "loss": 41.2433, "step": 114320 }, { "epoch": 0.4619076669481288, "grad_norm": 903.3895874023438, "learning_rate": 3.294751107645361e-05, "loss": 48.583, "step": 114330 }, { "epoch": 0.46194806821349643, "grad_norm": 730.4287109375, "learning_rate": 3.294420142154274e-05, "loss": 78.3227, "step": 114340 }, { "epoch": 0.46198846947886407, "grad_norm": 563.7612915039062, "learning_rate": 3.2940891611755564e-05, "loss": 53.6732, "step": 114350 }, { "epoch": 0.4620288707442317, "grad_norm": 1336.9444580078125, "learning_rate": 3.293758164715663e-05, "loss": 55.2327, "step": 114360 }, { "epoch": 0.46206927200959935, "grad_norm": 1316.6781005859375, "learning_rate": 3.293427152781044e-05, "loss": 39.9807, "step": 114370 }, { "epoch": 0.462109673274967, "grad_norm": 1014.697265625, "learning_rate": 3.2930961253781554e-05, "loss": 81.932, "step": 114380 }, { "epoch": 0.4621500745403346, "grad_norm": 2144.41064453125, "learning_rate": 3.292765082513449e-05, "loss": 80.4216, "step": 114390 }, { "epoch": 0.4621904758057022, "grad_norm": 509.5421447753906, "learning_rate": 3.29243402419338e-05, "loss": 40.3778, "step": 114400 }, { "epoch": 0.46223087707106986, "grad_norm": 432.00933837890625, "learning_rate": 3.2921029504244004e-05, "loss": 76.2071, "step": 114410 }, { "epoch": 0.4622712783364375, "grad_norm": 722.176025390625, "learning_rate": 3.2917718612129665e-05, "loss": 41.5058, "step": 114420 }, { "epoch": 0.46231167960180514, "grad_norm": 725.9029541015625, "learning_rate": 3.291440756565533e-05, "loss": 53.8693, "step": 114430 }, { "epoch": 0.4623520808671728, "grad_norm": 281.3984375, "learning_rate": 3.2911096364885544e-05, "loss": 80.6305, "step": 114440 }, { "epoch": 0.4623924821325404, "grad_norm": 811.3935546875, "learning_rate": 3.290778500988485e-05, "loss": 55.3654, "step": 114450 }, { "epoch": 0.462432883397908, "grad_norm": 472.8229064941406, "learning_rate": 3.2904473500717824e-05, "loss": 59.1575, "step": 114460 }, { "epoch": 0.46247328466327564, "grad_norm": 713.5052490234375, "learning_rate": 3.290116183744902e-05, "loss": 50.5053, "step": 114470 }, { "epoch": 0.4625136859286433, "grad_norm": 1182.7664794921875, "learning_rate": 3.2897850020143005e-05, "loss": 59.1632, "step": 114480 }, { "epoch": 0.4625540871940109, "grad_norm": 845.945556640625, "learning_rate": 3.289453804886433e-05, "loss": 85.3159, "step": 114490 }, { "epoch": 0.46259448845937856, "grad_norm": 406.4460754394531, "learning_rate": 3.289122592367757e-05, "loss": 39.6031, "step": 114500 }, { "epoch": 0.4626348897247462, "grad_norm": 883.4130249023438, "learning_rate": 3.288791364464729e-05, "loss": 44.7248, "step": 114510 }, { "epoch": 0.4626752909901138, "grad_norm": 447.2448425292969, "learning_rate": 3.2884601211838085e-05, "loss": 62.0368, "step": 114520 }, { "epoch": 0.4627156922554814, "grad_norm": 640.0363159179688, "learning_rate": 3.288128862531452e-05, "loss": 56.4858, "step": 114530 }, { "epoch": 0.46275609352084907, "grad_norm": 891.1563720703125, "learning_rate": 3.287797588514117e-05, "loss": 70.3198, "step": 114540 }, { "epoch": 0.4627964947862167, "grad_norm": 628.9306030273438, "learning_rate": 3.287466299138262e-05, "loss": 53.9219, "step": 114550 }, { "epoch": 0.46283689605158435, "grad_norm": 382.36456298828125, "learning_rate": 3.287134994410347e-05, "loss": 42.8895, "step": 114560 }, { "epoch": 0.462877297316952, "grad_norm": 1126.9713134765625, "learning_rate": 3.28680367433683e-05, "loss": 38.392, "step": 114570 }, { "epoch": 0.4629176985823196, "grad_norm": 2824.01025390625, "learning_rate": 3.28647233892417e-05, "loss": 77.3813, "step": 114580 }, { "epoch": 0.4629580998476872, "grad_norm": 413.0010986328125, "learning_rate": 3.286140988178826e-05, "loss": 50.3714, "step": 114590 }, { "epoch": 0.46299850111305485, "grad_norm": 952.1806030273438, "learning_rate": 3.28580962210726e-05, "loss": 55.7656, "step": 114600 }, { "epoch": 0.4630389023784225, "grad_norm": 491.023193359375, "learning_rate": 3.2854782407159305e-05, "loss": 47.8107, "step": 114610 }, { "epoch": 0.46307930364379013, "grad_norm": 413.3840026855469, "learning_rate": 3.285146844011298e-05, "loss": 57.6984, "step": 114620 }, { "epoch": 0.46311970490915777, "grad_norm": 593.4551391601562, "learning_rate": 3.2848154319998235e-05, "loss": 47.3308, "step": 114630 }, { "epoch": 0.4631601061745254, "grad_norm": 543.7266845703125, "learning_rate": 3.2844840046879686e-05, "loss": 46.1537, "step": 114640 }, { "epoch": 0.463200507439893, "grad_norm": 876.9476318359375, "learning_rate": 3.2841525620821945e-05, "loss": 39.367, "step": 114650 }, { "epoch": 0.46324090870526063, "grad_norm": 470.560791015625, "learning_rate": 3.2838211041889625e-05, "loss": 56.22, "step": 114660 }, { "epoch": 0.4632813099706283, "grad_norm": 692.4049072265625, "learning_rate": 3.2834896310147336e-05, "loss": 46.9886, "step": 114670 }, { "epoch": 0.4633217112359959, "grad_norm": 1012.37890625, "learning_rate": 3.283158142565971e-05, "loss": 77.0895, "step": 114680 }, { "epoch": 0.46336211250136355, "grad_norm": 1028.7650146484375, "learning_rate": 3.282826638849138e-05, "loss": 60.6332, "step": 114690 }, { "epoch": 0.4634025137667312, "grad_norm": 1001.2539672851562, "learning_rate": 3.2824951198706954e-05, "loss": 55.1329, "step": 114700 }, { "epoch": 0.4634429150320988, "grad_norm": 707.6488647460938, "learning_rate": 3.2821635856371086e-05, "loss": 68.9933, "step": 114710 }, { "epoch": 0.4634833162974664, "grad_norm": 640.0159912109375, "learning_rate": 3.28183203615484e-05, "loss": 56.8567, "step": 114720 }, { "epoch": 0.46352371756283406, "grad_norm": 1112.1865234375, "learning_rate": 3.281500471430353e-05, "loss": 63.7654, "step": 114730 }, { "epoch": 0.4635641188282017, "grad_norm": 871.5289306640625, "learning_rate": 3.281168891470112e-05, "loss": 64.3242, "step": 114740 }, { "epoch": 0.46360452009356934, "grad_norm": 232.17013549804688, "learning_rate": 3.2808372962805816e-05, "loss": 51.8217, "step": 114750 }, { "epoch": 0.463644921358937, "grad_norm": 653.4492797851562, "learning_rate": 3.280505685868226e-05, "loss": 32.7491, "step": 114760 }, { "epoch": 0.4636853226243046, "grad_norm": 806.7274780273438, "learning_rate": 3.2801740602395105e-05, "loss": 60.581, "step": 114770 }, { "epoch": 0.4637257238896722, "grad_norm": 698.8681640625, "learning_rate": 3.279842419400899e-05, "loss": 81.199, "step": 114780 }, { "epoch": 0.46376612515503984, "grad_norm": 1318.1488037109375, "learning_rate": 3.2795107633588586e-05, "loss": 61.5727, "step": 114790 }, { "epoch": 0.4638065264204075, "grad_norm": 2012.5032958984375, "learning_rate": 3.279179092119855e-05, "loss": 63.0752, "step": 114800 }, { "epoch": 0.4638469276857751, "grad_norm": 946.7776489257812, "learning_rate": 3.278847405690353e-05, "loss": 104.6616, "step": 114810 }, { "epoch": 0.46388732895114276, "grad_norm": 332.1543273925781, "learning_rate": 3.278515704076821e-05, "loss": 57.2958, "step": 114820 }, { "epoch": 0.4639277302165104, "grad_norm": 628.6360473632812, "learning_rate": 3.278183987285724e-05, "loss": 49.5415, "step": 114830 }, { "epoch": 0.463968131481878, "grad_norm": 407.3933410644531, "learning_rate": 3.277852255323529e-05, "loss": 45.7567, "step": 114840 }, { "epoch": 0.46400853274724563, "grad_norm": 1091.437255859375, "learning_rate": 3.277520508196705e-05, "loss": 76.9394, "step": 114850 }, { "epoch": 0.46404893401261327, "grad_norm": 540.4053344726562, "learning_rate": 3.277188745911717e-05, "loss": 42.3653, "step": 114860 }, { "epoch": 0.4640893352779809, "grad_norm": 1648.4383544921875, "learning_rate": 3.276856968475035e-05, "loss": 78.4631, "step": 114870 }, { "epoch": 0.46412973654334855, "grad_norm": 469.8823547363281, "learning_rate": 3.276525175893126e-05, "loss": 59.8111, "step": 114880 }, { "epoch": 0.4641701378087162, "grad_norm": 925.3463745117188, "learning_rate": 3.27619336817246e-05, "loss": 49.3538, "step": 114890 }, { "epoch": 0.46421053907408383, "grad_norm": 1113.8001708984375, "learning_rate": 3.2758615453195034e-05, "loss": 64.5263, "step": 114900 }, { "epoch": 0.4642509403394514, "grad_norm": 591.8302612304688, "learning_rate": 3.275529707340728e-05, "loss": 71.7116, "step": 114910 }, { "epoch": 0.46429134160481905, "grad_norm": 564.6202392578125, "learning_rate": 3.2751978542425995e-05, "loss": 58.8196, "step": 114920 }, { "epoch": 0.4643317428701867, "grad_norm": 1191.913330078125, "learning_rate": 3.2748659860315916e-05, "loss": 42.0783, "step": 114930 }, { "epoch": 0.46437214413555433, "grad_norm": 599.6019287109375, "learning_rate": 3.274534102714172e-05, "loss": 50.7316, "step": 114940 }, { "epoch": 0.46441254540092197, "grad_norm": 527.0614013671875, "learning_rate": 3.2742022042968104e-05, "loss": 51.8205, "step": 114950 }, { "epoch": 0.4644529466662896, "grad_norm": 665.9529418945312, "learning_rate": 3.273870290785979e-05, "loss": 68.3194, "step": 114960 }, { "epoch": 0.4644933479316572, "grad_norm": 515.5640258789062, "learning_rate": 3.2735383621881485e-05, "loss": 37.5466, "step": 114970 }, { "epoch": 0.46453374919702484, "grad_norm": 822.676513671875, "learning_rate": 3.273206418509788e-05, "loss": 69.2502, "step": 114980 }, { "epoch": 0.4645741504623925, "grad_norm": 1226.153076171875, "learning_rate": 3.272874459757371e-05, "loss": 50.1505, "step": 114990 }, { "epoch": 0.4646145517277601, "grad_norm": 1813.3848876953125, "learning_rate": 3.272542485937369e-05, "loss": 77.2155, "step": 115000 }, { "epoch": 0.46465495299312776, "grad_norm": 579.1246337890625, "learning_rate": 3.2722104970562525e-05, "loss": 93.8051, "step": 115010 }, { "epoch": 0.4646953542584954, "grad_norm": 0.0, "learning_rate": 3.271878493120496e-05, "loss": 37.53, "step": 115020 }, { "epoch": 0.464735755523863, "grad_norm": 285.748779296875, "learning_rate": 3.27154647413657e-05, "loss": 51.3141, "step": 115030 }, { "epoch": 0.4647761567892306, "grad_norm": 585.9220581054688, "learning_rate": 3.271214440110948e-05, "loss": 66.3542, "step": 115040 }, { "epoch": 0.46481655805459826, "grad_norm": 385.7046813964844, "learning_rate": 3.270882391050104e-05, "loss": 43.7319, "step": 115050 }, { "epoch": 0.4648569593199659, "grad_norm": 799.7272338867188, "learning_rate": 3.270550326960511e-05, "loss": 73.2882, "step": 115060 }, { "epoch": 0.46489736058533354, "grad_norm": 1642.924560546875, "learning_rate": 3.270218247848642e-05, "loss": 90.7406, "step": 115070 }, { "epoch": 0.4649377618507012, "grad_norm": 890.0045776367188, "learning_rate": 3.269886153720972e-05, "loss": 65.4913, "step": 115080 }, { "epoch": 0.4649781631160688, "grad_norm": 1087.22802734375, "learning_rate": 3.2695540445839764e-05, "loss": 66.0933, "step": 115090 }, { "epoch": 0.4650185643814364, "grad_norm": 436.0012512207031, "learning_rate": 3.269221920444127e-05, "loss": 48.0393, "step": 115100 }, { "epoch": 0.46505896564680405, "grad_norm": 821.9993286132812, "learning_rate": 3.2688897813079005e-05, "loss": 48.355, "step": 115110 }, { "epoch": 0.4650993669121717, "grad_norm": 434.6961364746094, "learning_rate": 3.2685576271817716e-05, "loss": 54.5126, "step": 115120 }, { "epoch": 0.4651397681775393, "grad_norm": 897.4439697265625, "learning_rate": 3.268225458072217e-05, "loss": 62.3177, "step": 115130 }, { "epoch": 0.46518016944290697, "grad_norm": 2384.05224609375, "learning_rate": 3.267893273985711e-05, "loss": 72.2372, "step": 115140 }, { "epoch": 0.4652205707082746, "grad_norm": 724.9418334960938, "learning_rate": 3.26756107492873e-05, "loss": 56.3772, "step": 115150 }, { "epoch": 0.4652609719736422, "grad_norm": 666.811767578125, "learning_rate": 3.267228860907751e-05, "loss": 45.4823, "step": 115160 }, { "epoch": 0.46530137323900983, "grad_norm": 846.5064086914062, "learning_rate": 3.266896631929251e-05, "loss": 49.0491, "step": 115170 }, { "epoch": 0.46534177450437747, "grad_norm": 372.02447509765625, "learning_rate": 3.2665643879997056e-05, "loss": 78.3602, "step": 115180 }, { "epoch": 0.4653821757697451, "grad_norm": 745.8118896484375, "learning_rate": 3.266232129125593e-05, "loss": 39.577, "step": 115190 }, { "epoch": 0.46542257703511275, "grad_norm": 936.6083984375, "learning_rate": 3.2658998553133895e-05, "loss": 43.7622, "step": 115200 }, { "epoch": 0.4654629783004804, "grad_norm": 458.98114013671875, "learning_rate": 3.2655675665695754e-05, "loss": 49.8332, "step": 115210 }, { "epoch": 0.46550337956584803, "grad_norm": 636.2042846679688, "learning_rate": 3.2652352629006275e-05, "loss": 63.9273, "step": 115220 }, { "epoch": 0.4655437808312156, "grad_norm": 556.1316528320312, "learning_rate": 3.264902944313023e-05, "loss": 46.3902, "step": 115230 }, { "epoch": 0.46558418209658325, "grad_norm": 724.4549560546875, "learning_rate": 3.2645706108132424e-05, "loss": 54.3436, "step": 115240 }, { "epoch": 0.4656245833619509, "grad_norm": 546.86767578125, "learning_rate": 3.264238262407764e-05, "loss": 50.2506, "step": 115250 }, { "epoch": 0.46566498462731853, "grad_norm": 784.9192504882812, "learning_rate": 3.263905899103068e-05, "loss": 40.6821, "step": 115260 }, { "epoch": 0.4657053858926862, "grad_norm": 686.7406616210938, "learning_rate": 3.263573520905633e-05, "loss": 52.2234, "step": 115270 }, { "epoch": 0.4657457871580538, "grad_norm": 709.7639770507812, "learning_rate": 3.263241127821938e-05, "loss": 57.2129, "step": 115280 }, { "epoch": 0.4657861884234214, "grad_norm": 704.1000366210938, "learning_rate": 3.262908719858466e-05, "loss": 44.5228, "step": 115290 }, { "epoch": 0.46582658968878904, "grad_norm": 444.74029541015625, "learning_rate": 3.262576297021695e-05, "loss": 49.4663, "step": 115300 }, { "epoch": 0.4658669909541567, "grad_norm": 951.1953125, "learning_rate": 3.262243859318105e-05, "loss": 60.8233, "step": 115310 }, { "epoch": 0.4659073922195243, "grad_norm": 466.8185119628906, "learning_rate": 3.2619114067541796e-05, "loss": 92.2595, "step": 115320 }, { "epoch": 0.46594779348489196, "grad_norm": 581.5831298828125, "learning_rate": 3.2615789393363995e-05, "loss": 53.4414, "step": 115330 }, { "epoch": 0.4659881947502596, "grad_norm": 1562.436767578125, "learning_rate": 3.261246457071245e-05, "loss": 61.0114, "step": 115340 }, { "epoch": 0.4660285960156272, "grad_norm": 1172.59130859375, "learning_rate": 3.260913959965201e-05, "loss": 71.2536, "step": 115350 }, { "epoch": 0.4660689972809948, "grad_norm": 751.3859252929688, "learning_rate": 3.260581448024745e-05, "loss": 53.4582, "step": 115360 }, { "epoch": 0.46610939854636246, "grad_norm": 479.7427673339844, "learning_rate": 3.260248921256364e-05, "loss": 38.3002, "step": 115370 }, { "epoch": 0.4661497998117301, "grad_norm": 562.0839233398438, "learning_rate": 3.2599163796665376e-05, "loss": 74.0789, "step": 115380 }, { "epoch": 0.46619020107709774, "grad_norm": 1441.49853515625, "learning_rate": 3.25958382326175e-05, "loss": 46.7784, "step": 115390 }, { "epoch": 0.4662306023424654, "grad_norm": 586.0812377929688, "learning_rate": 3.2592512520484856e-05, "loss": 54.3181, "step": 115400 }, { "epoch": 0.466271003607833, "grad_norm": 782.4759521484375, "learning_rate": 3.2589186660332274e-05, "loss": 59.9024, "step": 115410 }, { "epoch": 0.4663114048732006, "grad_norm": 623.7604370117188, "learning_rate": 3.2585860652224585e-05, "loss": 52.6946, "step": 115420 }, { "epoch": 0.46635180613856825, "grad_norm": 958.4971313476562, "learning_rate": 3.2582534496226644e-05, "loss": 56.1301, "step": 115430 }, { "epoch": 0.4663922074039359, "grad_norm": 803.4427490234375, "learning_rate": 3.257920819240328e-05, "loss": 56.7052, "step": 115440 }, { "epoch": 0.46643260866930353, "grad_norm": 481.8475646972656, "learning_rate": 3.2575881740819355e-05, "loss": 53.755, "step": 115450 }, { "epoch": 0.46647300993467117, "grad_norm": 542.654052734375, "learning_rate": 3.257255514153971e-05, "loss": 66.2002, "step": 115460 }, { "epoch": 0.4665134112000388, "grad_norm": 468.3689270019531, "learning_rate": 3.256922839462921e-05, "loss": 60.2367, "step": 115470 }, { "epoch": 0.4665538124654064, "grad_norm": 1403.767333984375, "learning_rate": 3.25659015001527e-05, "loss": 108.456, "step": 115480 }, { "epoch": 0.46659421373077403, "grad_norm": 708.3703002929688, "learning_rate": 3.2562574458175044e-05, "loss": 46.7288, "step": 115490 }, { "epoch": 0.4666346149961417, "grad_norm": 1207.593017578125, "learning_rate": 3.2559247268761115e-05, "loss": 63.8019, "step": 115500 }, { "epoch": 0.4666750162615093, "grad_norm": 919.4540405273438, "learning_rate": 3.2555919931975766e-05, "loss": 71.2822, "step": 115510 }, { "epoch": 0.46671541752687695, "grad_norm": 2360.607421875, "learning_rate": 3.2552592447883865e-05, "loss": 63.8303, "step": 115520 }, { "epoch": 0.4667558187922446, "grad_norm": 1612.3497314453125, "learning_rate": 3.254926481655028e-05, "loss": 45.8642, "step": 115530 }, { "epoch": 0.4667962200576122, "grad_norm": 1008.3920288085938, "learning_rate": 3.25459370380399e-05, "loss": 49.0776, "step": 115540 }, { "epoch": 0.4668366213229798, "grad_norm": 2217.866943359375, "learning_rate": 3.254260911241759e-05, "loss": 72.4225, "step": 115550 }, { "epoch": 0.46687702258834746, "grad_norm": 413.4029235839844, "learning_rate": 3.253928103974823e-05, "loss": 66.9536, "step": 115560 }, { "epoch": 0.4669174238537151, "grad_norm": 389.1944580078125, "learning_rate": 3.253595282009671e-05, "loss": 32.475, "step": 115570 }, { "epoch": 0.46695782511908274, "grad_norm": 0.0, "learning_rate": 3.253262445352791e-05, "loss": 60.644, "step": 115580 }, { "epoch": 0.4669982263844504, "grad_norm": 366.5119934082031, "learning_rate": 3.252929594010671e-05, "loss": 59.2907, "step": 115590 }, { "epoch": 0.467038627649818, "grad_norm": 882.8370971679688, "learning_rate": 3.2525967279898015e-05, "loss": 70.1194, "step": 115600 }, { "epoch": 0.4670790289151856, "grad_norm": 955.0043334960938, "learning_rate": 3.252263847296671e-05, "loss": 62.6897, "step": 115610 }, { "epoch": 0.46711943018055324, "grad_norm": 815.323486328125, "learning_rate": 3.25193095193777e-05, "loss": 30.4579, "step": 115620 }, { "epoch": 0.4671598314459209, "grad_norm": 721.734130859375, "learning_rate": 3.251598041919587e-05, "loss": 66.6569, "step": 115630 }, { "epoch": 0.4672002327112885, "grad_norm": 870.7584838867188, "learning_rate": 3.251265117248614e-05, "loss": 52.2032, "step": 115640 }, { "epoch": 0.46724063397665616, "grad_norm": 4967.96728515625, "learning_rate": 3.25093217793134e-05, "loss": 56.7999, "step": 115650 }, { "epoch": 0.4672810352420238, "grad_norm": 805.152099609375, "learning_rate": 3.250599223974258e-05, "loss": 52.8073, "step": 115660 }, { "epoch": 0.4673214365073914, "grad_norm": 614.6864624023438, "learning_rate": 3.250266255383857e-05, "loss": 54.9746, "step": 115670 }, { "epoch": 0.467361837772759, "grad_norm": 1058.3001708984375, "learning_rate": 3.249933272166629e-05, "loss": 92.6986, "step": 115680 }, { "epoch": 0.46740223903812667, "grad_norm": 1258.4139404296875, "learning_rate": 3.249600274329066e-05, "loss": 54.6076, "step": 115690 }, { "epoch": 0.4674426403034943, "grad_norm": 712.8331909179688, "learning_rate": 3.24926726187766e-05, "loss": 65.3554, "step": 115700 }, { "epoch": 0.46748304156886195, "grad_norm": 826.5805053710938, "learning_rate": 3.248934234818902e-05, "loss": 82.2117, "step": 115710 }, { "epoch": 0.4675234428342296, "grad_norm": 770.5543212890625, "learning_rate": 3.248601193159287e-05, "loss": 73.6685, "step": 115720 }, { "epoch": 0.4675638440995972, "grad_norm": 780.5651245117188, "learning_rate": 3.248268136905304e-05, "loss": 44.8846, "step": 115730 }, { "epoch": 0.4676042453649648, "grad_norm": 1183.1986083984375, "learning_rate": 3.247935066063451e-05, "loss": 57.1673, "step": 115740 }, { "epoch": 0.46764464663033245, "grad_norm": 461.81573486328125, "learning_rate": 3.247601980640217e-05, "loss": 45.7133, "step": 115750 }, { "epoch": 0.4676850478957001, "grad_norm": 720.4097290039062, "learning_rate": 3.247268880642098e-05, "loss": 44.3927, "step": 115760 }, { "epoch": 0.46772544916106773, "grad_norm": 1013.3065795898438, "learning_rate": 3.246935766075588e-05, "loss": 52.5788, "step": 115770 }, { "epoch": 0.46776585042643537, "grad_norm": 947.147216796875, "learning_rate": 3.24660263694718e-05, "loss": 93.3442, "step": 115780 }, { "epoch": 0.467806251691803, "grad_norm": 676.0074462890625, "learning_rate": 3.24626949326337e-05, "loss": 48.2604, "step": 115790 }, { "epoch": 0.4678466529571706, "grad_norm": 433.5167541503906, "learning_rate": 3.245936335030651e-05, "loss": 68.8754, "step": 115800 }, { "epoch": 0.46788705422253823, "grad_norm": 759.6268920898438, "learning_rate": 3.2456031622555197e-05, "loss": 42.3743, "step": 115810 }, { "epoch": 0.4679274554879059, "grad_norm": 650.4202880859375, "learning_rate": 3.245269974944471e-05, "loss": 83.6153, "step": 115820 }, { "epoch": 0.4679678567532735, "grad_norm": 963.0789794921875, "learning_rate": 3.2449367731039996e-05, "loss": 60.6028, "step": 115830 }, { "epoch": 0.46800825801864115, "grad_norm": 2292.56005859375, "learning_rate": 3.244603556740603e-05, "loss": 74.8116, "step": 115840 }, { "epoch": 0.4680486592840088, "grad_norm": 457.2271423339844, "learning_rate": 3.2442703258607766e-05, "loss": 37.3418, "step": 115850 }, { "epoch": 0.4680890605493764, "grad_norm": 474.98126220703125, "learning_rate": 3.243937080471017e-05, "loss": 61.6, "step": 115860 }, { "epoch": 0.468129461814744, "grad_norm": 528.2426147460938, "learning_rate": 3.243603820577822e-05, "loss": 37.5654, "step": 115870 }, { "epoch": 0.46816986308011166, "grad_norm": 885.8614501953125, "learning_rate": 3.243270546187687e-05, "loss": 39.1532, "step": 115880 }, { "epoch": 0.4682102643454793, "grad_norm": 2204.4306640625, "learning_rate": 3.242937257307109e-05, "loss": 67.4763, "step": 115890 }, { "epoch": 0.46825066561084694, "grad_norm": 1517.2777099609375, "learning_rate": 3.2426039539425876e-05, "loss": 53.94, "step": 115900 }, { "epoch": 0.4682910668762146, "grad_norm": 2055.982666015625, "learning_rate": 3.2422706361006194e-05, "loss": 58.1359, "step": 115910 }, { "epoch": 0.4683314681415822, "grad_norm": 607.13330078125, "learning_rate": 3.241937303787703e-05, "loss": 49.6527, "step": 115920 }, { "epoch": 0.4683718694069498, "grad_norm": 899.364990234375, "learning_rate": 3.2416039570103375e-05, "loss": 57.977, "step": 115930 }, { "epoch": 0.46841227067231744, "grad_norm": 654.2115478515625, "learning_rate": 3.241270595775021e-05, "loss": 73.5976, "step": 115940 }, { "epoch": 0.4684526719376851, "grad_norm": 786.1331176757812, "learning_rate": 3.240937220088253e-05, "loss": 35.1766, "step": 115950 }, { "epoch": 0.4684930732030527, "grad_norm": 1251.7491455078125, "learning_rate": 3.240603829956531e-05, "loss": 62.6786, "step": 115960 }, { "epoch": 0.46853347446842036, "grad_norm": 1378.3673095703125, "learning_rate": 3.240270425386357e-05, "loss": 75.6629, "step": 115970 }, { "epoch": 0.468573875733788, "grad_norm": 344.47747802734375, "learning_rate": 3.2399370063842294e-05, "loss": 46.953, "step": 115980 }, { "epoch": 0.4686142769991556, "grad_norm": 383.6866760253906, "learning_rate": 3.23960357295665e-05, "loss": 47.923, "step": 115990 }, { "epoch": 0.46865467826452323, "grad_norm": 471.9056091308594, "learning_rate": 3.239270125110117e-05, "loss": 77.3322, "step": 116000 }, { "epoch": 0.46869507952989087, "grad_norm": 879.043701171875, "learning_rate": 3.238936662851133e-05, "loss": 48.9826, "step": 116010 }, { "epoch": 0.4687354807952585, "grad_norm": 632.9555053710938, "learning_rate": 3.2386031861861976e-05, "loss": 31.8865, "step": 116020 }, { "epoch": 0.46877588206062615, "grad_norm": 1467.3824462890625, "learning_rate": 3.2382696951218135e-05, "loss": 95.1815, "step": 116030 }, { "epoch": 0.4688162833259938, "grad_norm": 336.1209411621094, "learning_rate": 3.2379361896644816e-05, "loss": 54.7575, "step": 116040 }, { "epoch": 0.46885668459136143, "grad_norm": 873.735595703125, "learning_rate": 3.237602669820704e-05, "loss": 63.7571, "step": 116050 }, { "epoch": 0.468897085856729, "grad_norm": 537.8552856445312, "learning_rate": 3.2372691355969816e-05, "loss": 55.9651, "step": 116060 }, { "epoch": 0.46893748712209665, "grad_norm": 353.98614501953125, "learning_rate": 3.2369355869998185e-05, "loss": 39.4995, "step": 116070 }, { "epoch": 0.4689778883874643, "grad_norm": 195.72950744628906, "learning_rate": 3.236602024035716e-05, "loss": 53.4221, "step": 116080 }, { "epoch": 0.46901828965283193, "grad_norm": 657.9056396484375, "learning_rate": 3.236268446711179e-05, "loss": 66.8073, "step": 116090 }, { "epoch": 0.4690586909181996, "grad_norm": 483.935546875, "learning_rate": 3.235934855032709e-05, "loss": 51.066, "step": 116100 }, { "epoch": 0.4690990921835672, "grad_norm": 1542.11865234375, "learning_rate": 3.23560124900681e-05, "loss": 89.0544, "step": 116110 }, { "epoch": 0.4691394934489348, "grad_norm": 838.8958740234375, "learning_rate": 3.235267628639987e-05, "loss": 60.8755, "step": 116120 }, { "epoch": 0.46917989471430244, "grad_norm": 861.8223266601562, "learning_rate": 3.234933993938742e-05, "loss": 47.4594, "step": 116130 }, { "epoch": 0.4692202959796701, "grad_norm": 550.157958984375, "learning_rate": 3.2346003449095805e-05, "loss": 53.5828, "step": 116140 }, { "epoch": 0.4692606972450377, "grad_norm": 503.9146728515625, "learning_rate": 3.234266681559007e-05, "loss": 78.4864, "step": 116150 }, { "epoch": 0.46930109851040536, "grad_norm": 333.4444885253906, "learning_rate": 3.2339330038935265e-05, "loss": 54.1187, "step": 116160 }, { "epoch": 0.469341499775773, "grad_norm": 666.8385620117188, "learning_rate": 3.233599311919644e-05, "loss": 60.4023, "step": 116170 }, { "epoch": 0.4693819010411406, "grad_norm": 641.484619140625, "learning_rate": 3.233265605643866e-05, "loss": 63.9357, "step": 116180 }, { "epoch": 0.4694223023065082, "grad_norm": 1227.126953125, "learning_rate": 3.232931885072697e-05, "loss": 61.4396, "step": 116190 }, { "epoch": 0.46946270357187586, "grad_norm": 1222.384765625, "learning_rate": 3.2325981502126433e-05, "loss": 61.0376, "step": 116200 }, { "epoch": 0.4695031048372435, "grad_norm": 706.1589965820312, "learning_rate": 3.232264401070213e-05, "loss": 58.2779, "step": 116210 }, { "epoch": 0.46954350610261114, "grad_norm": 558.5364990234375, "learning_rate": 3.231930637651909e-05, "loss": 52.5935, "step": 116220 }, { "epoch": 0.4695839073679788, "grad_norm": 928.905029296875, "learning_rate": 3.231596859964242e-05, "loss": 61.5201, "step": 116230 }, { "epoch": 0.4696243086333464, "grad_norm": 630.4097290039062, "learning_rate": 3.2312630680137175e-05, "loss": 37.5664, "step": 116240 }, { "epoch": 0.469664709898714, "grad_norm": 1384.9384765625, "learning_rate": 3.230929261806842e-05, "loss": 53.7496, "step": 116250 }, { "epoch": 0.46970511116408165, "grad_norm": 147.7491912841797, "learning_rate": 3.230595441350125e-05, "loss": 55.0247, "step": 116260 }, { "epoch": 0.4697455124294493, "grad_norm": 334.8374938964844, "learning_rate": 3.2302616066500735e-05, "loss": 46.3881, "step": 116270 }, { "epoch": 0.4697859136948169, "grad_norm": 789.24755859375, "learning_rate": 3.229927757713196e-05, "loss": 70.6806, "step": 116280 }, { "epoch": 0.46982631496018457, "grad_norm": 1363.5418701171875, "learning_rate": 3.229593894546001e-05, "loss": 58.1901, "step": 116290 }, { "epoch": 0.4698667162255522, "grad_norm": 609.9053344726562, "learning_rate": 3.229260017154997e-05, "loss": 98.0226, "step": 116300 }, { "epoch": 0.4699071174909198, "grad_norm": 865.0936279296875, "learning_rate": 3.228926125546695e-05, "loss": 61.3911, "step": 116310 }, { "epoch": 0.46994751875628743, "grad_norm": 590.865234375, "learning_rate": 3.228592219727602e-05, "loss": 86.893, "step": 116320 }, { "epoch": 0.46998792002165507, "grad_norm": 789.493408203125, "learning_rate": 3.2282582997042285e-05, "loss": 49.3227, "step": 116330 }, { "epoch": 0.4700283212870227, "grad_norm": 707.34814453125, "learning_rate": 3.2279243654830836e-05, "loss": 69.9071, "step": 116340 }, { "epoch": 0.47006872255239035, "grad_norm": 1062.26123046875, "learning_rate": 3.2275904170706797e-05, "loss": 55.3705, "step": 116350 }, { "epoch": 0.470109123817758, "grad_norm": 967.4932861328125, "learning_rate": 3.227256454473526e-05, "loss": 42.835, "step": 116360 }, { "epoch": 0.47014952508312563, "grad_norm": 645.4476318359375, "learning_rate": 3.226922477698133e-05, "loss": 57.326, "step": 116370 }, { "epoch": 0.4701899263484932, "grad_norm": 884.6676025390625, "learning_rate": 3.226588486751012e-05, "loss": 66.296, "step": 116380 }, { "epoch": 0.47023032761386085, "grad_norm": 3511.986083984375, "learning_rate": 3.2262544816386745e-05, "loss": 49.9027, "step": 116390 }, { "epoch": 0.4702707288792285, "grad_norm": 476.053955078125, "learning_rate": 3.225920462367632e-05, "loss": 68.0334, "step": 116400 }, { "epoch": 0.47031113014459613, "grad_norm": 367.8993225097656, "learning_rate": 3.225586428944396e-05, "loss": 41.9805, "step": 116410 }, { "epoch": 0.4703515314099638, "grad_norm": 500.8946533203125, "learning_rate": 3.225252381375479e-05, "loss": 65.6448, "step": 116420 }, { "epoch": 0.4703919326753314, "grad_norm": 1217.2655029296875, "learning_rate": 3.224918319667394e-05, "loss": 83.4919, "step": 116430 }, { "epoch": 0.470432333940699, "grad_norm": 662.4497680664062, "learning_rate": 3.2245842438266526e-05, "loss": 65.4877, "step": 116440 }, { "epoch": 0.47047273520606664, "grad_norm": 725.7619018554688, "learning_rate": 3.224250153859769e-05, "loss": 55.515, "step": 116450 }, { "epoch": 0.4705131364714343, "grad_norm": 910.7822265625, "learning_rate": 3.223916049773256e-05, "loss": 43.4893, "step": 116460 }, { "epoch": 0.4705535377368019, "grad_norm": 2010.1138916015625, "learning_rate": 3.223581931573625e-05, "loss": 58.6502, "step": 116470 }, { "epoch": 0.47059393900216956, "grad_norm": 715.5496826171875, "learning_rate": 3.223247799267394e-05, "loss": 84.7735, "step": 116480 }, { "epoch": 0.4706343402675372, "grad_norm": 737.7570190429688, "learning_rate": 3.2229136528610736e-05, "loss": 39.7872, "step": 116490 }, { "epoch": 0.4706747415329048, "grad_norm": 532.3171997070312, "learning_rate": 3.222579492361179e-05, "loss": 67.2663, "step": 116500 }, { "epoch": 0.4707151427982724, "grad_norm": 1556.9482421875, "learning_rate": 3.222245317774226e-05, "loss": 57.7589, "step": 116510 }, { "epoch": 0.47075554406364006, "grad_norm": 1564.6119384765625, "learning_rate": 3.221911129106728e-05, "loss": 41.327, "step": 116520 }, { "epoch": 0.4707959453290077, "grad_norm": 668.7022094726562, "learning_rate": 3.221576926365202e-05, "loss": 53.1316, "step": 116530 }, { "epoch": 0.47083634659437534, "grad_norm": 940.4009399414062, "learning_rate": 3.221242709556161e-05, "loss": 52.2775, "step": 116540 }, { "epoch": 0.470876747859743, "grad_norm": 929.6431884765625, "learning_rate": 3.220908478686123e-05, "loss": 63.8318, "step": 116550 }, { "epoch": 0.4709171491251106, "grad_norm": 770.2941284179688, "learning_rate": 3.220574233761603e-05, "loss": 50.6914, "step": 116560 }, { "epoch": 0.4709575503904782, "grad_norm": 2690.2001953125, "learning_rate": 3.220239974789117e-05, "loss": 103.2043, "step": 116570 }, { "epoch": 0.47099795165584585, "grad_norm": 744.5656127929688, "learning_rate": 3.219905701775182e-05, "loss": 40.3893, "step": 116580 }, { "epoch": 0.4710383529212135, "grad_norm": 2249.509765625, "learning_rate": 3.219571414726315e-05, "loss": 55.5555, "step": 116590 }, { "epoch": 0.47107875418658113, "grad_norm": 392.30657958984375, "learning_rate": 3.219237113649032e-05, "loss": 63.3421, "step": 116600 }, { "epoch": 0.47111915545194877, "grad_norm": 0.0, "learning_rate": 3.2189027985498514e-05, "loss": 82.4716, "step": 116610 }, { "epoch": 0.4711595567173164, "grad_norm": 626.5792846679688, "learning_rate": 3.2185684694352916e-05, "loss": 38.248, "step": 116620 }, { "epoch": 0.471199957982684, "grad_norm": 0.0, "learning_rate": 3.218234126311869e-05, "loss": 39.9608, "step": 116630 }, { "epoch": 0.47124035924805163, "grad_norm": 959.5233154296875, "learning_rate": 3.2178997691861014e-05, "loss": 56.8771, "step": 116640 }, { "epoch": 0.4712807605134193, "grad_norm": 368.611083984375, "learning_rate": 3.217565398064509e-05, "loss": 30.2148, "step": 116650 }, { "epoch": 0.4713211617787869, "grad_norm": 2147.029296875, "learning_rate": 3.2172310129536096e-05, "loss": 56.8126, "step": 116660 }, { "epoch": 0.47136156304415455, "grad_norm": 528.3900146484375, "learning_rate": 3.2168966138599225e-05, "loss": 93.9726, "step": 116670 }, { "epoch": 0.4714019643095222, "grad_norm": 1705.246337890625, "learning_rate": 3.2165622007899676e-05, "loss": 42.9779, "step": 116680 }, { "epoch": 0.47144236557488983, "grad_norm": 1022.4556884765625, "learning_rate": 3.216227773750262e-05, "loss": 40.3776, "step": 116690 }, { "epoch": 0.4714827668402574, "grad_norm": 836.2701416015625, "learning_rate": 3.215893332747328e-05, "loss": 75.0597, "step": 116700 }, { "epoch": 0.47152316810562506, "grad_norm": 493.19110107421875, "learning_rate": 3.2155588777876856e-05, "loss": 45.9303, "step": 116710 }, { "epoch": 0.4715635693709927, "grad_norm": 800.6790771484375, "learning_rate": 3.215224408877854e-05, "loss": 52.3173, "step": 116720 }, { "epoch": 0.47160397063636034, "grad_norm": 1031.7352294921875, "learning_rate": 3.2148899260243545e-05, "loss": 41.5077, "step": 116730 }, { "epoch": 0.471644371901728, "grad_norm": 524.3075561523438, "learning_rate": 3.214555429233707e-05, "loss": 43.6509, "step": 116740 }, { "epoch": 0.4716847731670956, "grad_norm": 2053.65576171875, "learning_rate": 3.214220918512434e-05, "loss": 52.429, "step": 116750 }, { "epoch": 0.4717251744324632, "grad_norm": 660.8265380859375, "learning_rate": 3.213886393867057e-05, "loss": 59.9484, "step": 116760 }, { "epoch": 0.47176557569783084, "grad_norm": 539.3421630859375, "learning_rate": 3.2135518553040964e-05, "loss": 56.5854, "step": 116770 }, { "epoch": 0.4718059769631985, "grad_norm": 461.3349304199219, "learning_rate": 3.2132173028300756e-05, "loss": 56.3017, "step": 116780 }, { "epoch": 0.4718463782285661, "grad_norm": 1058.8426513671875, "learning_rate": 3.212882736451516e-05, "loss": 47.9099, "step": 116790 }, { "epoch": 0.47188677949393376, "grad_norm": 521.5872802734375, "learning_rate": 3.21254815617494e-05, "loss": 48.9531, "step": 116800 }, { "epoch": 0.4719271807593014, "grad_norm": 789.1767578125, "learning_rate": 3.212213562006872e-05, "loss": 77.1051, "step": 116810 }, { "epoch": 0.471967582024669, "grad_norm": 710.3841552734375, "learning_rate": 3.2118789539538335e-05, "loss": 69.6694, "step": 116820 }, { "epoch": 0.4720079832900366, "grad_norm": 1419.693115234375, "learning_rate": 3.211544332022348e-05, "loss": 65.4239, "step": 116830 }, { "epoch": 0.47204838455540427, "grad_norm": 518.5504760742188, "learning_rate": 3.21120969621894e-05, "loss": 60.0513, "step": 116840 }, { "epoch": 0.4720887858207719, "grad_norm": 385.5600280761719, "learning_rate": 3.210875046550132e-05, "loss": 39.933, "step": 116850 }, { "epoch": 0.47212918708613955, "grad_norm": 850.2272338867188, "learning_rate": 3.210540383022449e-05, "loss": 81.0095, "step": 116860 }, { "epoch": 0.4721695883515072, "grad_norm": 443.19189453125, "learning_rate": 3.210205705642416e-05, "loss": 81.4084, "step": 116870 }, { "epoch": 0.4722099896168748, "grad_norm": 490.591796875, "learning_rate": 3.209871014416557e-05, "loss": 44.2058, "step": 116880 }, { "epoch": 0.4722503908822424, "grad_norm": 1418.0242919921875, "learning_rate": 3.209536309351397e-05, "loss": 80.6199, "step": 116890 }, { "epoch": 0.47229079214761005, "grad_norm": 1384.6348876953125, "learning_rate": 3.209201590453461e-05, "loss": 90.8569, "step": 116900 }, { "epoch": 0.4723311934129777, "grad_norm": 595.4918823242188, "learning_rate": 3.208866857729276e-05, "loss": 63.6417, "step": 116910 }, { "epoch": 0.47237159467834533, "grad_norm": 689.3577880859375, "learning_rate": 3.208532111185365e-05, "loss": 57.5565, "step": 116920 }, { "epoch": 0.47241199594371297, "grad_norm": 611.20751953125, "learning_rate": 3.208197350828257e-05, "loss": 80.1349, "step": 116930 }, { "epoch": 0.4724523972090806, "grad_norm": 793.7510375976562, "learning_rate": 3.207862576664477e-05, "loss": 73.2373, "step": 116940 }, { "epoch": 0.4724927984744482, "grad_norm": 777.3289184570312, "learning_rate": 3.207527788700551e-05, "loss": 74.5804, "step": 116950 }, { "epoch": 0.47253319973981583, "grad_norm": 1934.6875, "learning_rate": 3.207192986943006e-05, "loss": 78.4664, "step": 116960 }, { "epoch": 0.4725736010051835, "grad_norm": 573.094482421875, "learning_rate": 3.206858171398371e-05, "loss": 77.9668, "step": 116970 }, { "epoch": 0.4726140022705511, "grad_norm": 290.15185546875, "learning_rate": 3.206523342073172e-05, "loss": 56.8567, "step": 116980 }, { "epoch": 0.47265440353591875, "grad_norm": 603.3993530273438, "learning_rate": 3.206188498973935e-05, "loss": 62.5748, "step": 116990 }, { "epoch": 0.4726948048012864, "grad_norm": 459.7473449707031, "learning_rate": 3.205853642107192e-05, "loss": 55.5894, "step": 117000 }, { "epoch": 0.47273520606665403, "grad_norm": 2034.221923828125, "learning_rate": 3.2055187714794674e-05, "loss": 94.7735, "step": 117010 }, { "epoch": 0.4727756073320216, "grad_norm": 556.3843383789062, "learning_rate": 3.205183887097291e-05, "loss": 49.9171, "step": 117020 }, { "epoch": 0.47281600859738926, "grad_norm": 618.3563232421875, "learning_rate": 3.2048489889671915e-05, "loss": 64.9501, "step": 117030 }, { "epoch": 0.4728564098627569, "grad_norm": 1050.610107421875, "learning_rate": 3.204514077095699e-05, "loss": 45.0024, "step": 117040 }, { "epoch": 0.47289681112812454, "grad_norm": 676.1912841796875, "learning_rate": 3.2041791514893416e-05, "loss": 71.348, "step": 117050 }, { "epoch": 0.4729372123934922, "grad_norm": 338.9596252441406, "learning_rate": 3.2038442121546487e-05, "loss": 60.3322, "step": 117060 }, { "epoch": 0.4729776136588598, "grad_norm": 1104.4334716796875, "learning_rate": 3.2035092590981514e-05, "loss": 65.3458, "step": 117070 }, { "epoch": 0.4730180149242274, "grad_norm": 977.1143188476562, "learning_rate": 3.203174292326378e-05, "loss": 65.3839, "step": 117080 }, { "epoch": 0.47305841618959504, "grad_norm": 552.0875854492188, "learning_rate": 3.20283931184586e-05, "loss": 52.0979, "step": 117090 }, { "epoch": 0.4730988174549627, "grad_norm": 615.712646484375, "learning_rate": 3.202504317663128e-05, "loss": 32.2021, "step": 117100 }, { "epoch": 0.4731392187203303, "grad_norm": 550.6535034179688, "learning_rate": 3.2021693097847125e-05, "loss": 70.6825, "step": 117110 }, { "epoch": 0.47317961998569796, "grad_norm": 610.3817749023438, "learning_rate": 3.2018342882171445e-05, "loss": 36.3148, "step": 117120 }, { "epoch": 0.4732200212510656, "grad_norm": 755.9873657226562, "learning_rate": 3.2014992529669566e-05, "loss": 44.023, "step": 117130 }, { "epoch": 0.4732604225164332, "grad_norm": 847.390625, "learning_rate": 3.2011642040406784e-05, "loss": 49.9786, "step": 117140 }, { "epoch": 0.47330082378180083, "grad_norm": 644.954345703125, "learning_rate": 3.200829141444844e-05, "loss": 93.2963, "step": 117150 }, { "epoch": 0.47334122504716847, "grad_norm": 646.0951538085938, "learning_rate": 3.2004940651859844e-05, "loss": 55.8603, "step": 117160 }, { "epoch": 0.4733816263125361, "grad_norm": 1527.83447265625, "learning_rate": 3.200158975270633e-05, "loss": 69.3903, "step": 117170 }, { "epoch": 0.47342202757790375, "grad_norm": 1161.49658203125, "learning_rate": 3.1998238717053206e-05, "loss": 63.7593, "step": 117180 }, { "epoch": 0.4734624288432714, "grad_norm": 1293.619140625, "learning_rate": 3.199488754496582e-05, "loss": 63.1005, "step": 117190 }, { "epoch": 0.47350283010863903, "grad_norm": 505.3819580078125, "learning_rate": 3.19915362365095e-05, "loss": 50.2402, "step": 117200 }, { "epoch": 0.4735432313740066, "grad_norm": 648.815185546875, "learning_rate": 3.198818479174959e-05, "loss": 53.0908, "step": 117210 }, { "epoch": 0.47358363263937425, "grad_norm": 601.2451171875, "learning_rate": 3.198483321075141e-05, "loss": 56.0472, "step": 117220 }, { "epoch": 0.4736240339047419, "grad_norm": 401.19842529296875, "learning_rate": 3.198148149358031e-05, "loss": 58.7673, "step": 117230 }, { "epoch": 0.47366443517010953, "grad_norm": 942.248046875, "learning_rate": 3.197812964030164e-05, "loss": 42.9004, "step": 117240 }, { "epoch": 0.4737048364354772, "grad_norm": 1621.0218505859375, "learning_rate": 3.1974777650980735e-05, "loss": 59.7592, "step": 117250 }, { "epoch": 0.4737452377008448, "grad_norm": 1157.42431640625, "learning_rate": 3.197142552568295e-05, "loss": 53.8315, "step": 117260 }, { "epoch": 0.4737856389662124, "grad_norm": 2135.51611328125, "learning_rate": 3.196807326447363e-05, "loss": 74.3025, "step": 117270 }, { "epoch": 0.47382604023158004, "grad_norm": 982.652587890625, "learning_rate": 3.196472086741815e-05, "loss": 56.9442, "step": 117280 }, { "epoch": 0.4738664414969477, "grad_norm": 303.63623046875, "learning_rate": 3.1961368334581844e-05, "loss": 44.213, "step": 117290 }, { "epoch": 0.4739068427623153, "grad_norm": 555.4284057617188, "learning_rate": 3.195801566603007e-05, "loss": 53.6509, "step": 117300 }, { "epoch": 0.47394724402768296, "grad_norm": 946.455078125, "learning_rate": 3.1954662861828204e-05, "loss": 82.5156, "step": 117310 }, { "epoch": 0.4739876452930506, "grad_norm": 1169.52685546875, "learning_rate": 3.195130992204161e-05, "loss": 57.4077, "step": 117320 }, { "epoch": 0.47402804655841824, "grad_norm": 595.98291015625, "learning_rate": 3.1947956846735645e-05, "loss": 76.41, "step": 117330 }, { "epoch": 0.4740684478237858, "grad_norm": 463.3590393066406, "learning_rate": 3.194460363597569e-05, "loss": 44.9645, "step": 117340 }, { "epoch": 0.47410884908915346, "grad_norm": 443.7630615234375, "learning_rate": 3.1941250289827104e-05, "loss": 49.6183, "step": 117350 }, { "epoch": 0.4741492503545211, "grad_norm": 2856.91259765625, "learning_rate": 3.193789680835527e-05, "loss": 59.4347, "step": 117360 }, { "epoch": 0.47418965161988874, "grad_norm": 502.811279296875, "learning_rate": 3.193454319162557e-05, "loss": 52.4119, "step": 117370 }, { "epoch": 0.4742300528852564, "grad_norm": 867.6959228515625, "learning_rate": 3.193118943970338e-05, "loss": 90.0065, "step": 117380 }, { "epoch": 0.474270454150624, "grad_norm": 1995.4403076171875, "learning_rate": 3.192783555265408e-05, "loss": 68.9316, "step": 117390 }, { "epoch": 0.4743108554159916, "grad_norm": 522.066162109375, "learning_rate": 3.192448153054306e-05, "loss": 41.8321, "step": 117400 }, { "epoch": 0.47435125668135925, "grad_norm": 835.8606567382812, "learning_rate": 3.1921127373435714e-05, "loss": 67.2701, "step": 117410 }, { "epoch": 0.4743916579467269, "grad_norm": 628.2836303710938, "learning_rate": 3.191777308139742e-05, "loss": 57.3309, "step": 117420 }, { "epoch": 0.4744320592120945, "grad_norm": 582.8585205078125, "learning_rate": 3.1914418654493586e-05, "loss": 79.4639, "step": 117430 }, { "epoch": 0.47447246047746217, "grad_norm": 452.00830078125, "learning_rate": 3.191106409278959e-05, "loss": 57.4894, "step": 117440 }, { "epoch": 0.4745128617428298, "grad_norm": 867.4202270507812, "learning_rate": 3.1907709396350844e-05, "loss": 54.852, "step": 117450 }, { "epoch": 0.4745532630081974, "grad_norm": 375.7869567871094, "learning_rate": 3.190435456524275e-05, "loss": 42.05, "step": 117460 }, { "epoch": 0.47459366427356503, "grad_norm": 453.1134033203125, "learning_rate": 3.190099959953071e-05, "loss": 34.9451, "step": 117470 }, { "epoch": 0.47463406553893267, "grad_norm": 614.6144409179688, "learning_rate": 3.189764449928012e-05, "loss": 86.9492, "step": 117480 }, { "epoch": 0.4746744668043003, "grad_norm": 673.782470703125, "learning_rate": 3.1894289264556417e-05, "loss": 69.0601, "step": 117490 }, { "epoch": 0.47471486806966795, "grad_norm": 541.9735107421875, "learning_rate": 3.1890933895424976e-05, "loss": 46.3959, "step": 117500 }, { "epoch": 0.4747552693350356, "grad_norm": 379.82891845703125, "learning_rate": 3.188757839195125e-05, "loss": 44.2336, "step": 117510 }, { "epoch": 0.47479567060040323, "grad_norm": 680.2620849609375, "learning_rate": 3.1884222754200625e-05, "loss": 54.9478, "step": 117520 }, { "epoch": 0.4748360718657708, "grad_norm": 1045.8995361328125, "learning_rate": 3.188086698223853e-05, "loss": 62.2926, "step": 117530 }, { "epoch": 0.47487647313113845, "grad_norm": 1066.854248046875, "learning_rate": 3.1877511076130404e-05, "loss": 52.8988, "step": 117540 }, { "epoch": 0.4749168743965061, "grad_norm": 1133.3275146484375, "learning_rate": 3.187415503594166e-05, "loss": 50.2685, "step": 117550 }, { "epoch": 0.47495727566187373, "grad_norm": 561.2869873046875, "learning_rate": 3.1870798861737705e-05, "loss": 71.2999, "step": 117560 }, { "epoch": 0.4749976769272414, "grad_norm": 905.6627197265625, "learning_rate": 3.1867442553584e-05, "loss": 35.9215, "step": 117570 }, { "epoch": 0.475038078192609, "grad_norm": 2765.510009765625, "learning_rate": 3.186408611154597e-05, "loss": 62.86, "step": 117580 }, { "epoch": 0.4750784794579766, "grad_norm": 3249.31298828125, "learning_rate": 3.186072953568905e-05, "loss": 89.4793, "step": 117590 }, { "epoch": 0.47511888072334424, "grad_norm": 1029.83642578125, "learning_rate": 3.185737282607867e-05, "loss": 52.9404, "step": 117600 }, { "epoch": 0.4751592819887119, "grad_norm": 638.6604614257812, "learning_rate": 3.1854015982780275e-05, "loss": 70.1836, "step": 117610 }, { "epoch": 0.4751996832540795, "grad_norm": 866.2382202148438, "learning_rate": 3.185065900585931e-05, "loss": 60.6042, "step": 117620 }, { "epoch": 0.47524008451944716, "grad_norm": 967.8104858398438, "learning_rate": 3.184730189538122e-05, "loss": 67.1052, "step": 117630 }, { "epoch": 0.4752804857848148, "grad_norm": 990.4212646484375, "learning_rate": 3.1843944651411456e-05, "loss": 44.4866, "step": 117640 }, { "epoch": 0.47532088705018244, "grad_norm": 528.0682983398438, "learning_rate": 3.184058727401546e-05, "loss": 51.6501, "step": 117650 }, { "epoch": 0.47536128831555, "grad_norm": 936.557373046875, "learning_rate": 3.1837229763258705e-05, "loss": 97.2483, "step": 117660 }, { "epoch": 0.47540168958091766, "grad_norm": 870.8411865234375, "learning_rate": 3.183387211920663e-05, "loss": 81.8832, "step": 117670 }, { "epoch": 0.4754420908462853, "grad_norm": 628.0894165039062, "learning_rate": 3.183051434192471e-05, "loss": 44.0012, "step": 117680 }, { "epoch": 0.47548249211165294, "grad_norm": 423.3321228027344, "learning_rate": 3.1827156431478386e-05, "loss": 77.5057, "step": 117690 }, { "epoch": 0.4755228933770206, "grad_norm": 483.94091796875, "learning_rate": 3.1823798387933134e-05, "loss": 39.6376, "step": 117700 }, { "epoch": 0.4755632946423882, "grad_norm": 791.345703125, "learning_rate": 3.182044021135442e-05, "loss": 55.5527, "step": 117710 }, { "epoch": 0.4756036959077558, "grad_norm": 498.8603210449219, "learning_rate": 3.181708190180771e-05, "loss": 41.6617, "step": 117720 }, { "epoch": 0.47564409717312345, "grad_norm": 771.0953979492188, "learning_rate": 3.181372345935848e-05, "loss": 48.7883, "step": 117730 }, { "epoch": 0.4756844984384911, "grad_norm": 661.4583740234375, "learning_rate": 3.1810364884072205e-05, "loss": 72.9009, "step": 117740 }, { "epoch": 0.47572489970385873, "grad_norm": 632.1604614257812, "learning_rate": 3.180700617601436e-05, "loss": 59.3145, "step": 117750 }, { "epoch": 0.47576530096922637, "grad_norm": 806.9335327148438, "learning_rate": 3.180364733525043e-05, "loss": 41.7695, "step": 117760 }, { "epoch": 0.475805702234594, "grad_norm": 776.6650390625, "learning_rate": 3.1800288361845883e-05, "loss": 62.7163, "step": 117770 }, { "epoch": 0.4758461034999616, "grad_norm": 601.4182739257812, "learning_rate": 3.179692925586622e-05, "loss": 54.4891, "step": 117780 }, { "epoch": 0.47588650476532923, "grad_norm": 907.4159545898438, "learning_rate": 3.179357001737692e-05, "loss": 54.7433, "step": 117790 }, { "epoch": 0.4759269060306969, "grad_norm": 792.1842651367188, "learning_rate": 3.179021064644347e-05, "loss": 73.5924, "step": 117800 }, { "epoch": 0.4759673072960645, "grad_norm": 641.330810546875, "learning_rate": 3.178685114313137e-05, "loss": 35.0172, "step": 117810 }, { "epoch": 0.47600770856143215, "grad_norm": 272.5650939941406, "learning_rate": 3.178349150750612e-05, "loss": 77.2498, "step": 117820 }, { "epoch": 0.4760481098267998, "grad_norm": 917.3323974609375, "learning_rate": 3.1780131739633204e-05, "loss": 66.6369, "step": 117830 }, { "epoch": 0.47608851109216743, "grad_norm": 924.8410034179688, "learning_rate": 3.177677183957813e-05, "loss": 50.9081, "step": 117840 }, { "epoch": 0.476128912357535, "grad_norm": 539.0849609375, "learning_rate": 3.17734118074064e-05, "loss": 43.3925, "step": 117850 }, { "epoch": 0.47616931362290266, "grad_norm": 1520.1893310546875, "learning_rate": 3.177005164318353e-05, "loss": 52.2887, "step": 117860 }, { "epoch": 0.4762097148882703, "grad_norm": 0.0, "learning_rate": 3.1766691346974996e-05, "loss": 31.6033, "step": 117870 }, { "epoch": 0.47625011615363794, "grad_norm": 0.0, "learning_rate": 3.176333091884635e-05, "loss": 59.7049, "step": 117880 }, { "epoch": 0.4762905174190056, "grad_norm": 1278.6094970703125, "learning_rate": 3.175997035886307e-05, "loss": 56.4383, "step": 117890 }, { "epoch": 0.4763309186843732, "grad_norm": 1077.4521484375, "learning_rate": 3.1756609667090696e-05, "loss": 70.2687, "step": 117900 }, { "epoch": 0.4763713199497408, "grad_norm": 842.562255859375, "learning_rate": 3.175324884359474e-05, "loss": 112.2075, "step": 117910 }, { "epoch": 0.47641172121510844, "grad_norm": 310.8381042480469, "learning_rate": 3.174988788844072e-05, "loss": 48.3912, "step": 117920 }, { "epoch": 0.4764521224804761, "grad_norm": 1312.25244140625, "learning_rate": 3.1746526801694156e-05, "loss": 52.8015, "step": 117930 }, { "epoch": 0.4764925237458437, "grad_norm": 885.5872802734375, "learning_rate": 3.174316558342059e-05, "loss": 78.4678, "step": 117940 }, { "epoch": 0.47653292501121136, "grad_norm": 629.9498291015625, "learning_rate": 3.173980423368553e-05, "loss": 57.0286, "step": 117950 }, { "epoch": 0.476573326276579, "grad_norm": 622.9977416992188, "learning_rate": 3.173644275255451e-05, "loss": 61.0304, "step": 117960 }, { "epoch": 0.47661372754194664, "grad_norm": 772.329833984375, "learning_rate": 3.173308114009308e-05, "loss": 34.8157, "step": 117970 }, { "epoch": 0.4766541288073142, "grad_norm": 884.4105834960938, "learning_rate": 3.1729719396366765e-05, "loss": 44.0258, "step": 117980 }, { "epoch": 0.47669453007268187, "grad_norm": 1267.8201904296875, "learning_rate": 3.172635752144111e-05, "loss": 53.9944, "step": 117990 }, { "epoch": 0.4767349313380495, "grad_norm": 1093.1011962890625, "learning_rate": 3.172299551538164e-05, "loss": 49.2085, "step": 118000 }, { "epoch": 0.47677533260341715, "grad_norm": 345.0997009277344, "learning_rate": 3.1719633378253924e-05, "loss": 48.0018, "step": 118010 }, { "epoch": 0.4768157338687848, "grad_norm": 610.406494140625, "learning_rate": 3.171627111012349e-05, "loss": 52.8992, "step": 118020 }, { "epoch": 0.4768561351341524, "grad_norm": 1354.5128173828125, "learning_rate": 3.1712908711055897e-05, "loss": 72.9081, "step": 118030 }, { "epoch": 0.47689653639952, "grad_norm": 446.1787109375, "learning_rate": 3.170954618111669e-05, "loss": 63.6848, "step": 118040 }, { "epoch": 0.47693693766488765, "grad_norm": 704.4627075195312, "learning_rate": 3.170618352037142e-05, "loss": 58.9005, "step": 118050 }, { "epoch": 0.4769773389302553, "grad_norm": 481.2560119628906, "learning_rate": 3.170282072888566e-05, "loss": 46.0265, "step": 118060 }, { "epoch": 0.47701774019562293, "grad_norm": 560.6388549804688, "learning_rate": 3.169945780672495e-05, "loss": 42.9609, "step": 118070 }, { "epoch": 0.47705814146099057, "grad_norm": 2451.45556640625, "learning_rate": 3.169609475395486e-05, "loss": 84.6672, "step": 118080 }, { "epoch": 0.4770985427263582, "grad_norm": 458.1382141113281, "learning_rate": 3.169273157064097e-05, "loss": 59.9135, "step": 118090 }, { "epoch": 0.4771389439917258, "grad_norm": 736.26416015625, "learning_rate": 3.168936825684882e-05, "loss": 42.6383, "step": 118100 }, { "epoch": 0.47717934525709343, "grad_norm": 3540.630126953125, "learning_rate": 3.1686004812644e-05, "loss": 58.3743, "step": 118110 }, { "epoch": 0.4772197465224611, "grad_norm": 609.9805908203125, "learning_rate": 3.1682641238092064e-05, "loss": 44.9807, "step": 118120 }, { "epoch": 0.4772601477878287, "grad_norm": 864.6240234375, "learning_rate": 3.16792775332586e-05, "loss": 91.4741, "step": 118130 }, { "epoch": 0.47730054905319635, "grad_norm": 804.3956298828125, "learning_rate": 3.167591369820918e-05, "loss": 64.7296, "step": 118140 }, { "epoch": 0.477340950318564, "grad_norm": 365.2998962402344, "learning_rate": 3.1672549733009396e-05, "loss": 50.6288, "step": 118150 }, { "epoch": 0.47738135158393163, "grad_norm": 402.1658630371094, "learning_rate": 3.166918563772481e-05, "loss": 53.5217, "step": 118160 }, { "epoch": 0.4774217528492992, "grad_norm": 668.7144165039062, "learning_rate": 3.1665821412421015e-05, "loss": 54.7511, "step": 118170 }, { "epoch": 0.47746215411466686, "grad_norm": 919.4798583984375, "learning_rate": 3.1662457057163604e-05, "loss": 47.6506, "step": 118180 }, { "epoch": 0.4775025553800345, "grad_norm": 1031.98193359375, "learning_rate": 3.165909257201816e-05, "loss": 71.545, "step": 118190 }, { "epoch": 0.47754295664540214, "grad_norm": 390.0589294433594, "learning_rate": 3.1655727957050285e-05, "loss": 49.304, "step": 118200 }, { "epoch": 0.4775833579107698, "grad_norm": 1422.1484375, "learning_rate": 3.165236321232557e-05, "loss": 52.9677, "step": 118210 }, { "epoch": 0.4776237591761374, "grad_norm": 846.0404052734375, "learning_rate": 3.1648998337909594e-05, "loss": 62.2265, "step": 118220 }, { "epoch": 0.477664160441505, "grad_norm": 408.1695556640625, "learning_rate": 3.164563333386798e-05, "loss": 67.1796, "step": 118230 }, { "epoch": 0.47770456170687264, "grad_norm": 256.5215759277344, "learning_rate": 3.1642268200266317e-05, "loss": 40.5587, "step": 118240 }, { "epoch": 0.4777449629722403, "grad_norm": 1751.73486328125, "learning_rate": 3.163890293717022e-05, "loss": 52.8397, "step": 118250 }, { "epoch": 0.4777853642376079, "grad_norm": 564.2115478515625, "learning_rate": 3.1635537544645296e-05, "loss": 67.6077, "step": 118260 }, { "epoch": 0.47782576550297556, "grad_norm": 1232.5323486328125, "learning_rate": 3.163217202275715e-05, "loss": 49.1063, "step": 118270 }, { "epoch": 0.4778661667683432, "grad_norm": 3210.296142578125, "learning_rate": 3.162880637157139e-05, "loss": 69.347, "step": 118280 }, { "epoch": 0.47790656803371084, "grad_norm": 674.1715087890625, "learning_rate": 3.1625440591153645e-05, "loss": 63.0254, "step": 118290 }, { "epoch": 0.47794696929907843, "grad_norm": 369.24017333984375, "learning_rate": 3.162207468156952e-05, "loss": 44.4211, "step": 118300 }, { "epoch": 0.47798737056444607, "grad_norm": 634.0050659179688, "learning_rate": 3.161870864288464e-05, "loss": 58.934, "step": 118310 }, { "epoch": 0.4780277718298137, "grad_norm": 1877.2578125, "learning_rate": 3.1615342475164636e-05, "loss": 62.1043, "step": 118320 }, { "epoch": 0.47806817309518135, "grad_norm": 550.0879516601562, "learning_rate": 3.161197617847511e-05, "loss": 41.5093, "step": 118330 }, { "epoch": 0.478108574360549, "grad_norm": 544.1702270507812, "learning_rate": 3.160860975288171e-05, "loss": 59.2241, "step": 118340 }, { "epoch": 0.47814897562591663, "grad_norm": 316.4408264160156, "learning_rate": 3.1605243198450066e-05, "loss": 60.8573, "step": 118350 }, { "epoch": 0.4781893768912842, "grad_norm": 412.7470703125, "learning_rate": 3.16018765152458e-05, "loss": 47.1499, "step": 118360 }, { "epoch": 0.47822977815665185, "grad_norm": 998.5579833984375, "learning_rate": 3.159850970333456e-05, "loss": 62.5579, "step": 118370 }, { "epoch": 0.4782701794220195, "grad_norm": 425.9685974121094, "learning_rate": 3.159514276278197e-05, "loss": 55.9611, "step": 118380 }, { "epoch": 0.47831058068738713, "grad_norm": 393.94635009765625, "learning_rate": 3.1591775693653674e-05, "loss": 50.0992, "step": 118390 }, { "epoch": 0.4783509819527548, "grad_norm": 1880.3455810546875, "learning_rate": 3.158840849601532e-05, "loss": 65.1443, "step": 118400 }, { "epoch": 0.4783913832181224, "grad_norm": 503.0107421875, "learning_rate": 3.1585041169932545e-05, "loss": 44.3258, "step": 118410 }, { "epoch": 0.47843178448349, "grad_norm": 0.0, "learning_rate": 3.1581673715471006e-05, "loss": 53.8743, "step": 118420 }, { "epoch": 0.47847218574885764, "grad_norm": 608.0613403320312, "learning_rate": 3.157830613269635e-05, "loss": 54.61, "step": 118430 }, { "epoch": 0.4785125870142253, "grad_norm": 427.8965148925781, "learning_rate": 3.157493842167423e-05, "loss": 63.0965, "step": 118440 }, { "epoch": 0.4785529882795929, "grad_norm": 507.14190673828125, "learning_rate": 3.15715705824703e-05, "loss": 50.7432, "step": 118450 }, { "epoch": 0.47859338954496056, "grad_norm": 983.0687255859375, "learning_rate": 3.156820261515022e-05, "loss": 87.0616, "step": 118460 }, { "epoch": 0.4786337908103282, "grad_norm": 1234.0418701171875, "learning_rate": 3.1564834519779647e-05, "loss": 48.0925, "step": 118470 }, { "epoch": 0.47867419207569584, "grad_norm": 935.4703369140625, "learning_rate": 3.156146629642425e-05, "loss": 53.1847, "step": 118480 }, { "epoch": 0.4787145933410634, "grad_norm": 820.0416259765625, "learning_rate": 3.155809794514968e-05, "loss": 77.3875, "step": 118490 }, { "epoch": 0.47875499460643106, "grad_norm": 877.6055297851562, "learning_rate": 3.155472946602162e-05, "loss": 59.0853, "step": 118500 }, { "epoch": 0.4787953958717987, "grad_norm": 979.28662109375, "learning_rate": 3.155136085910573e-05, "loss": 60.1543, "step": 118510 }, { "epoch": 0.47883579713716634, "grad_norm": 702.0006713867188, "learning_rate": 3.15479921244677e-05, "loss": 44.0273, "step": 118520 }, { "epoch": 0.478876198402534, "grad_norm": 1405.5360107421875, "learning_rate": 3.1544623262173176e-05, "loss": 63.5543, "step": 118530 }, { "epoch": 0.4789165996679016, "grad_norm": 971.0574340820312, "learning_rate": 3.1541254272287865e-05, "loss": 55.8479, "step": 118540 }, { "epoch": 0.4789570009332692, "grad_norm": 1137.547119140625, "learning_rate": 3.153788515487742e-05, "loss": 66.1714, "step": 118550 }, { "epoch": 0.47899740219863685, "grad_norm": 813.9231567382812, "learning_rate": 3.153451591000756e-05, "loss": 102.5478, "step": 118560 }, { "epoch": 0.4790378034640045, "grad_norm": 870.7437133789062, "learning_rate": 3.153114653774393e-05, "loss": 81.2061, "step": 118570 }, { "epoch": 0.4790782047293721, "grad_norm": 984.029296875, "learning_rate": 3.152777703815223e-05, "loss": 64.9325, "step": 118580 }, { "epoch": 0.47911860599473977, "grad_norm": 853.7675170898438, "learning_rate": 3.152440741129817e-05, "loss": 69.4327, "step": 118590 }, { "epoch": 0.4791590072601074, "grad_norm": 1823.9547119140625, "learning_rate": 3.152103765724743e-05, "loss": 51.6635, "step": 118600 }, { "epoch": 0.479199408525475, "grad_norm": 1002.6013793945312, "learning_rate": 3.1517667776065696e-05, "loss": 53.4132, "step": 118610 }, { "epoch": 0.47923980979084263, "grad_norm": 1199.445556640625, "learning_rate": 3.151429776781868e-05, "loss": 57.5089, "step": 118620 }, { "epoch": 0.47928021105621027, "grad_norm": 545.897705078125, "learning_rate": 3.151092763257206e-05, "loss": 66.9297, "step": 118630 }, { "epoch": 0.4793206123215779, "grad_norm": 1726.234375, "learning_rate": 3.150755737039157e-05, "loss": 53.6566, "step": 118640 }, { "epoch": 0.47936101358694555, "grad_norm": 719.9454956054688, "learning_rate": 3.150418698134289e-05, "loss": 48.0167, "step": 118650 }, { "epoch": 0.4794014148523132, "grad_norm": 864.4921264648438, "learning_rate": 3.150081646549174e-05, "loss": 84.6051, "step": 118660 }, { "epoch": 0.47944181611768083, "grad_norm": 824.414306640625, "learning_rate": 3.149744582290383e-05, "loss": 53.0983, "step": 118670 }, { "epoch": 0.4794822173830484, "grad_norm": 471.1961975097656, "learning_rate": 3.149407505364486e-05, "loss": 69.1806, "step": 118680 }, { "epoch": 0.47952261864841605, "grad_norm": 684.9292602539062, "learning_rate": 3.149070415778056e-05, "loss": 43.8025, "step": 118690 }, { "epoch": 0.4795630199137837, "grad_norm": 507.1294860839844, "learning_rate": 3.148733313537664e-05, "loss": 53.6068, "step": 118700 }, { "epoch": 0.47960342117915133, "grad_norm": 1452.51953125, "learning_rate": 3.148396198649882e-05, "loss": 62.2304, "step": 118710 }, { "epoch": 0.479643822444519, "grad_norm": 568.4696655273438, "learning_rate": 3.148059071121282e-05, "loss": 69.3875, "step": 118720 }, { "epoch": 0.4796842237098866, "grad_norm": 527.501953125, "learning_rate": 3.147721930958437e-05, "loss": 53.4141, "step": 118730 }, { "epoch": 0.4797246249752542, "grad_norm": 1219.31640625, "learning_rate": 3.14738477816792e-05, "loss": 58.4269, "step": 118740 }, { "epoch": 0.47976502624062184, "grad_norm": 1039.8255615234375, "learning_rate": 3.147047612756302e-05, "loss": 39.0244, "step": 118750 }, { "epoch": 0.4798054275059895, "grad_norm": 1056.8941650390625, "learning_rate": 3.146710434730159e-05, "loss": 54.2339, "step": 118760 }, { "epoch": 0.4798458287713571, "grad_norm": 756.7150268554688, "learning_rate": 3.1463732440960625e-05, "loss": 53.7804, "step": 118770 }, { "epoch": 0.47988623003672476, "grad_norm": 990.2318725585938, "learning_rate": 3.1460360408605866e-05, "loss": 45.0402, "step": 118780 }, { "epoch": 0.4799266313020924, "grad_norm": 782.805908203125, "learning_rate": 3.145698825030307e-05, "loss": 52.3285, "step": 118790 }, { "epoch": 0.47996703256746004, "grad_norm": 781.7633666992188, "learning_rate": 3.145361596611795e-05, "loss": 53.3584, "step": 118800 }, { "epoch": 0.4800074338328276, "grad_norm": 429.9420166015625, "learning_rate": 3.1450243556116266e-05, "loss": 61.5989, "step": 118810 }, { "epoch": 0.48004783509819526, "grad_norm": 1086.17822265625, "learning_rate": 3.144687102036376e-05, "loss": 44.8307, "step": 118820 }, { "epoch": 0.4800882363635629, "grad_norm": 858.8822631835938, "learning_rate": 3.1443498358926186e-05, "loss": 63.265, "step": 118830 }, { "epoch": 0.48012863762893054, "grad_norm": 530.9268188476562, "learning_rate": 3.1440125571869306e-05, "loss": 45.8561, "step": 118840 }, { "epoch": 0.4801690388942982, "grad_norm": 426.1288146972656, "learning_rate": 3.143675265925885e-05, "loss": 52.3246, "step": 118850 }, { "epoch": 0.4802094401596658, "grad_norm": 374.76849365234375, "learning_rate": 3.1433379621160586e-05, "loss": 44.3146, "step": 118860 }, { "epoch": 0.4802498414250334, "grad_norm": 727.5894165039062, "learning_rate": 3.143000645764028e-05, "loss": 45.2602, "step": 118870 }, { "epoch": 0.48029024269040105, "grad_norm": 914.193603515625, "learning_rate": 3.142663316876368e-05, "loss": 67.587, "step": 118880 }, { "epoch": 0.4803306439557687, "grad_norm": 119.14258575439453, "learning_rate": 3.1423259754596576e-05, "loss": 48.5501, "step": 118890 }, { "epoch": 0.48037104522113633, "grad_norm": 947.4161376953125, "learning_rate": 3.1419886215204694e-05, "loss": 56.8426, "step": 118900 }, { "epoch": 0.48041144648650397, "grad_norm": 593.0452270507812, "learning_rate": 3.1416512550653835e-05, "loss": 70.3968, "step": 118910 }, { "epoch": 0.4804518477518716, "grad_norm": 757.6504516601562, "learning_rate": 3.141313876100976e-05, "loss": 59.1005, "step": 118920 }, { "epoch": 0.4804922490172392, "grad_norm": 3180.365966796875, "learning_rate": 3.1409764846338245e-05, "loss": 64.8498, "step": 118930 }, { "epoch": 0.48053265028260683, "grad_norm": 566.63671875, "learning_rate": 3.140639080670507e-05, "loss": 64.9276, "step": 118940 }, { "epoch": 0.4805730515479745, "grad_norm": 714.4104614257812, "learning_rate": 3.140301664217599e-05, "loss": 59.2069, "step": 118950 }, { "epoch": 0.4806134528133421, "grad_norm": 575.2021484375, "learning_rate": 3.139964235281682e-05, "loss": 43.4193, "step": 118960 }, { "epoch": 0.48065385407870975, "grad_norm": 1377.7861328125, "learning_rate": 3.1396267938693316e-05, "loss": 63.2737, "step": 118970 }, { "epoch": 0.4806942553440774, "grad_norm": 661.6658935546875, "learning_rate": 3.1392893399871295e-05, "loss": 60.5852, "step": 118980 }, { "epoch": 0.48073465660944503, "grad_norm": 513.0794067382812, "learning_rate": 3.1389518736416507e-05, "loss": 50.5535, "step": 118990 }, { "epoch": 0.4807750578748126, "grad_norm": 1068.8321533203125, "learning_rate": 3.138614394839476e-05, "loss": 56.0899, "step": 119000 }, { "epoch": 0.48081545914018026, "grad_norm": 346.4224548339844, "learning_rate": 3.138276903587186e-05, "loss": 62.7875, "step": 119010 }, { "epoch": 0.4808558604055479, "grad_norm": 591.9736328125, "learning_rate": 3.137939399891359e-05, "loss": 50.5829, "step": 119020 }, { "epoch": 0.48089626167091554, "grad_norm": 762.9810791015625, "learning_rate": 3.1376018837585747e-05, "loss": 66.904, "step": 119030 }, { "epoch": 0.4809366629362832, "grad_norm": 700.6834106445312, "learning_rate": 3.137264355195413e-05, "loss": 66.1691, "step": 119040 }, { "epoch": 0.4809770642016508, "grad_norm": 315.9108581542969, "learning_rate": 3.1369268142084556e-05, "loss": 49.7968, "step": 119050 }, { "epoch": 0.4810174654670184, "grad_norm": 0.0, "learning_rate": 3.136589260804282e-05, "loss": 47.6848, "step": 119060 }, { "epoch": 0.48105786673238604, "grad_norm": 395.9146423339844, "learning_rate": 3.1362516949894725e-05, "loss": 57.0576, "step": 119070 }, { "epoch": 0.4810982679977537, "grad_norm": 476.3524475097656, "learning_rate": 3.135914116770609e-05, "loss": 58.1471, "step": 119080 }, { "epoch": 0.4811386692631213, "grad_norm": 626.7835083007812, "learning_rate": 3.135576526154272e-05, "loss": 39.7096, "step": 119090 }, { "epoch": 0.48117907052848896, "grad_norm": 1119.859619140625, "learning_rate": 3.135238923147043e-05, "loss": 87.1097, "step": 119100 }, { "epoch": 0.4812194717938566, "grad_norm": 804.6915283203125, "learning_rate": 3.1349013077555045e-05, "loss": 36.7657, "step": 119110 }, { "epoch": 0.48125987305922424, "grad_norm": 769.036865234375, "learning_rate": 3.134563679986238e-05, "loss": 40.638, "step": 119120 }, { "epoch": 0.4813002743245918, "grad_norm": 191.26966857910156, "learning_rate": 3.134226039845827e-05, "loss": 50.2752, "step": 119130 }, { "epoch": 0.48134067558995947, "grad_norm": 778.8662109375, "learning_rate": 3.1338883873408516e-05, "loss": 58.7929, "step": 119140 }, { "epoch": 0.4813810768553271, "grad_norm": 514.7074584960938, "learning_rate": 3.133550722477896e-05, "loss": 66.8902, "step": 119150 }, { "epoch": 0.48142147812069475, "grad_norm": 355.43096923828125, "learning_rate": 3.133213045263543e-05, "loss": 58.968, "step": 119160 }, { "epoch": 0.4814618793860624, "grad_norm": 3042.458984375, "learning_rate": 3.132875355704376e-05, "loss": 88.0314, "step": 119170 }, { "epoch": 0.48150228065143, "grad_norm": 656.7515869140625, "learning_rate": 3.1325376538069776e-05, "loss": 68.5067, "step": 119180 }, { "epoch": 0.4815426819167976, "grad_norm": 495.6517028808594, "learning_rate": 3.132199939577932e-05, "loss": 73.5789, "step": 119190 }, { "epoch": 0.48158308318216525, "grad_norm": 1507.7938232421875, "learning_rate": 3.1318622130238236e-05, "loss": 55.6776, "step": 119200 }, { "epoch": 0.4816234844475329, "grad_norm": 802.6456298828125, "learning_rate": 3.1315244741512356e-05, "loss": 49.8764, "step": 119210 }, { "epoch": 0.48166388571290053, "grad_norm": 814.6461181640625, "learning_rate": 3.131186722966753e-05, "loss": 51.0948, "step": 119220 }, { "epoch": 0.48170428697826817, "grad_norm": 826.6341552734375, "learning_rate": 3.1308489594769605e-05, "loss": 54.8559, "step": 119230 }, { "epoch": 0.4817446882436358, "grad_norm": 536.5158081054688, "learning_rate": 3.1305111836884425e-05, "loss": 49.7287, "step": 119240 }, { "epoch": 0.4817850895090034, "grad_norm": 1080.1739501953125, "learning_rate": 3.130173395607785e-05, "loss": 96.4672, "step": 119250 }, { "epoch": 0.48182549077437103, "grad_norm": 875.5479125976562, "learning_rate": 3.129835595241571e-05, "loss": 41.3818, "step": 119260 }, { "epoch": 0.4818658920397387, "grad_norm": 977.9671020507812, "learning_rate": 3.129497782596389e-05, "loss": 44.1907, "step": 119270 }, { "epoch": 0.4819062933051063, "grad_norm": 469.0617370605469, "learning_rate": 3.129159957678824e-05, "loss": 50.5765, "step": 119280 }, { "epoch": 0.48194669457047395, "grad_norm": 851.140869140625, "learning_rate": 3.128822120495462e-05, "loss": 50.6099, "step": 119290 }, { "epoch": 0.4819870958358416, "grad_norm": 608.3344116210938, "learning_rate": 3.1284842710528876e-05, "loss": 72.0118, "step": 119300 }, { "epoch": 0.48202749710120923, "grad_norm": 380.56195068359375, "learning_rate": 3.128146409357689e-05, "loss": 83.0865, "step": 119310 }, { "epoch": 0.4820678983665768, "grad_norm": 601.8255615234375, "learning_rate": 3.127808535416454e-05, "loss": 48.3086, "step": 119320 }, { "epoch": 0.48210829963194446, "grad_norm": 949.0869140625, "learning_rate": 3.127470649235768e-05, "loss": 94.7472, "step": 119330 }, { "epoch": 0.4821487008973121, "grad_norm": 398.5122985839844, "learning_rate": 3.1271327508222174e-05, "loss": 73.9752, "step": 119340 }, { "epoch": 0.48218910216267974, "grad_norm": 683.5873413085938, "learning_rate": 3.126794840182392e-05, "loss": 65.0503, "step": 119350 }, { "epoch": 0.4822295034280474, "grad_norm": 574.1619873046875, "learning_rate": 3.126456917322878e-05, "loss": 55.5617, "step": 119360 }, { "epoch": 0.482269904693415, "grad_norm": 918.1862182617188, "learning_rate": 3.1261189822502644e-05, "loss": 48.1121, "step": 119370 }, { "epoch": 0.4823103059587826, "grad_norm": 1102.67919921875, "learning_rate": 3.125781034971139e-05, "loss": 79.4511, "step": 119380 }, { "epoch": 0.48235070722415024, "grad_norm": 865.8079223632812, "learning_rate": 3.125443075492089e-05, "loss": 58.5068, "step": 119390 }, { "epoch": 0.4823911084895179, "grad_norm": 644.5775756835938, "learning_rate": 3.1251051038197055e-05, "loss": 53.9188, "step": 119400 }, { "epoch": 0.4824315097548855, "grad_norm": 472.7004699707031, "learning_rate": 3.124767119960576e-05, "loss": 46.8185, "step": 119410 }, { "epoch": 0.48247191102025316, "grad_norm": 988.4273681640625, "learning_rate": 3.1244291239212896e-05, "loss": 64.0862, "step": 119420 }, { "epoch": 0.4825123122856208, "grad_norm": 320.8127746582031, "learning_rate": 3.124091115708436e-05, "loss": 66.8379, "step": 119430 }, { "epoch": 0.48255271355098844, "grad_norm": 703.6538696289062, "learning_rate": 3.123753095328604e-05, "loss": 74.7884, "step": 119440 }, { "epoch": 0.48259311481635603, "grad_norm": 741.8955078125, "learning_rate": 3.123415062788385e-05, "loss": 47.4477, "step": 119450 }, { "epoch": 0.48263351608172367, "grad_norm": 764.6326904296875, "learning_rate": 3.123077018094369e-05, "loss": 48.5175, "step": 119460 }, { "epoch": 0.4826739173470913, "grad_norm": 436.60516357421875, "learning_rate": 3.122738961253145e-05, "loss": 66.414, "step": 119470 }, { "epoch": 0.48271431861245895, "grad_norm": 369.33343505859375, "learning_rate": 3.1224008922713044e-05, "loss": 71.549, "step": 119480 }, { "epoch": 0.4827547198778266, "grad_norm": 921.5698852539062, "learning_rate": 3.122062811155438e-05, "loss": 50.5309, "step": 119490 }, { "epoch": 0.48279512114319423, "grad_norm": 597.4305419921875, "learning_rate": 3.121724717912138e-05, "loss": 50.5436, "step": 119500 }, { "epoch": 0.4828355224085618, "grad_norm": 1035.259765625, "learning_rate": 3.121386612547993e-05, "loss": 31.3353, "step": 119510 }, { "epoch": 0.48287592367392945, "grad_norm": 1129.0030517578125, "learning_rate": 3.121048495069596e-05, "loss": 74.6693, "step": 119520 }, { "epoch": 0.4829163249392971, "grad_norm": 1025.2454833984375, "learning_rate": 3.1207103654835394e-05, "loss": 102.4706, "step": 119530 }, { "epoch": 0.48295672620466473, "grad_norm": 379.8052062988281, "learning_rate": 3.120372223796415e-05, "loss": 49.5203, "step": 119540 }, { "epoch": 0.4829971274700324, "grad_norm": 607.2550048828125, "learning_rate": 3.120034070014814e-05, "loss": 70.4186, "step": 119550 }, { "epoch": 0.4830375287354, "grad_norm": 690.1021728515625, "learning_rate": 3.11969590414533e-05, "loss": 43.4509, "step": 119560 }, { "epoch": 0.4830779300007676, "grad_norm": 1215.8643798828125, "learning_rate": 3.119357726194556e-05, "loss": 42.1141, "step": 119570 }, { "epoch": 0.48311833126613524, "grad_norm": 259.37872314453125, "learning_rate": 3.119019536169083e-05, "loss": 58.9376, "step": 119580 }, { "epoch": 0.4831587325315029, "grad_norm": 755.9671020507812, "learning_rate": 3.118681334075506e-05, "loss": 45.3444, "step": 119590 }, { "epoch": 0.4831991337968705, "grad_norm": 1142.39892578125, "learning_rate": 3.118343119920418e-05, "loss": 58.759, "step": 119600 }, { "epoch": 0.48323953506223816, "grad_norm": 651.2678833007812, "learning_rate": 3.1180048937104114e-05, "loss": 51.0291, "step": 119610 }, { "epoch": 0.4832799363276058, "grad_norm": 555.2761840820312, "learning_rate": 3.117666655452083e-05, "loss": 48.9106, "step": 119620 }, { "epoch": 0.48332033759297344, "grad_norm": 1292.247802734375, "learning_rate": 3.117328405152024e-05, "loss": 67.4017, "step": 119630 }, { "epoch": 0.483360738858341, "grad_norm": 579.1093139648438, "learning_rate": 3.11699014281683e-05, "loss": 56.0868, "step": 119640 }, { "epoch": 0.48340114012370866, "grad_norm": 571.940673828125, "learning_rate": 3.116651868453097e-05, "loss": 57.993, "step": 119650 }, { "epoch": 0.4834415413890763, "grad_norm": 592.5040893554688, "learning_rate": 3.116313582067416e-05, "loss": 51.3045, "step": 119660 }, { "epoch": 0.48348194265444394, "grad_norm": 820.21337890625, "learning_rate": 3.115975283666386e-05, "loss": 50.0175, "step": 119670 }, { "epoch": 0.4835223439198116, "grad_norm": 1067.8109130859375, "learning_rate": 3.1156369732566006e-05, "loss": 31.4268, "step": 119680 }, { "epoch": 0.4835627451851792, "grad_norm": 589.9420776367188, "learning_rate": 3.115298650844655e-05, "loss": 85.4477, "step": 119690 }, { "epoch": 0.4836031464505468, "grad_norm": 485.7039489746094, "learning_rate": 3.114960316437145e-05, "loss": 53.188, "step": 119700 }, { "epoch": 0.48364354771591445, "grad_norm": 809.940673828125, "learning_rate": 3.1146219700406674e-05, "loss": 73.8831, "step": 119710 }, { "epoch": 0.4836839489812821, "grad_norm": 1266.80419921875, "learning_rate": 3.114283611661818e-05, "loss": 65.2104, "step": 119720 }, { "epoch": 0.4837243502466497, "grad_norm": 1203.2113037109375, "learning_rate": 3.113945241307194e-05, "loss": 64.743, "step": 119730 }, { "epoch": 0.48376475151201737, "grad_norm": 655.8514404296875, "learning_rate": 3.1136068589833914e-05, "loss": 70.6579, "step": 119740 }, { "epoch": 0.483805152777385, "grad_norm": 842.0756225585938, "learning_rate": 3.1132684646970064e-05, "loss": 41.7421, "step": 119750 }, { "epoch": 0.48384555404275265, "grad_norm": 859.2930297851562, "learning_rate": 3.1129300584546375e-05, "loss": 61.2274, "step": 119760 }, { "epoch": 0.48388595530812023, "grad_norm": 972.3688354492188, "learning_rate": 3.1125916402628814e-05, "loss": 58.9962, "step": 119770 }, { "epoch": 0.48392635657348787, "grad_norm": 983.5337524414062, "learning_rate": 3.112253210128336e-05, "loss": 57.9547, "step": 119780 }, { "epoch": 0.4839667578388555, "grad_norm": 997.2120361328125, "learning_rate": 3.111914768057599e-05, "loss": 47.62, "step": 119790 }, { "epoch": 0.48400715910422315, "grad_norm": 1432.3392333984375, "learning_rate": 3.111576314057268e-05, "loss": 48.3912, "step": 119800 }, { "epoch": 0.4840475603695908, "grad_norm": 502.978759765625, "learning_rate": 3.1112378481339425e-05, "loss": 60.9147, "step": 119810 }, { "epoch": 0.48408796163495843, "grad_norm": 881.69580078125, "learning_rate": 3.1108993702942205e-05, "loss": 63.0135, "step": 119820 }, { "epoch": 0.484128362900326, "grad_norm": 619.3832397460938, "learning_rate": 3.110560880544701e-05, "loss": 55.6975, "step": 119830 }, { "epoch": 0.48416876416569365, "grad_norm": 933.4210815429688, "learning_rate": 3.1102223788919824e-05, "loss": 45.4596, "step": 119840 }, { "epoch": 0.4842091654310613, "grad_norm": 1533.591552734375, "learning_rate": 3.1098838653426645e-05, "loss": 62.3054, "step": 119850 }, { "epoch": 0.48424956669642893, "grad_norm": 815.6768798828125, "learning_rate": 3.1095453399033466e-05, "loss": 53.5172, "step": 119860 }, { "epoch": 0.4842899679617966, "grad_norm": 1024.9600830078125, "learning_rate": 3.109206802580629e-05, "loss": 63.8872, "step": 119870 }, { "epoch": 0.4843303692271642, "grad_norm": 0.0, "learning_rate": 3.10886825338111e-05, "loss": 48.7491, "step": 119880 }, { "epoch": 0.4843707704925318, "grad_norm": 610.7237548828125, "learning_rate": 3.108529692311391e-05, "loss": 59.5073, "step": 119890 }, { "epoch": 0.48441117175789944, "grad_norm": 999.9380493164062, "learning_rate": 3.108191119378073e-05, "loss": 54.3617, "step": 119900 }, { "epoch": 0.4844515730232671, "grad_norm": 589.252685546875, "learning_rate": 3.107852534587756e-05, "loss": 70.6535, "step": 119910 }, { "epoch": 0.4844919742886347, "grad_norm": 1168.3255615234375, "learning_rate": 3.107513937947041e-05, "loss": 65.7924, "step": 119920 }, { "epoch": 0.48453237555400236, "grad_norm": 452.32415771484375, "learning_rate": 3.107175329462529e-05, "loss": 33.2393, "step": 119930 }, { "epoch": 0.48457277681937, "grad_norm": 365.3132629394531, "learning_rate": 3.106836709140821e-05, "loss": 61.387, "step": 119940 }, { "epoch": 0.48461317808473764, "grad_norm": 431.0849914550781, "learning_rate": 3.1064980769885187e-05, "loss": 49.9083, "step": 119950 }, { "epoch": 0.4846535793501052, "grad_norm": 706.0770874023438, "learning_rate": 3.1061594330122246e-05, "loss": 41.8204, "step": 119960 }, { "epoch": 0.48469398061547286, "grad_norm": 519.536865234375, "learning_rate": 3.10582077721854e-05, "loss": 39.0427, "step": 119970 }, { "epoch": 0.4847343818808405, "grad_norm": 1396.991455078125, "learning_rate": 3.1054821096140676e-05, "loss": 70.4659, "step": 119980 }, { "epoch": 0.48477478314620814, "grad_norm": 1468.1710205078125, "learning_rate": 3.10514343020541e-05, "loss": 91.4469, "step": 119990 }, { "epoch": 0.4848151844115758, "grad_norm": 483.8679504394531, "learning_rate": 3.104804738999169e-05, "loss": 31.8263, "step": 120000 }, { "epoch": 0.4848555856769434, "grad_norm": 1103.7783203125, "learning_rate": 3.10446603600195e-05, "loss": 48.717, "step": 120010 }, { "epoch": 0.484895986942311, "grad_norm": 0.0, "learning_rate": 3.104127321220353e-05, "loss": 44.1308, "step": 120020 }, { "epoch": 0.48493638820767865, "grad_norm": 2047.26025390625, "learning_rate": 3.1037885946609824e-05, "loss": 61.4208, "step": 120030 }, { "epoch": 0.4849767894730463, "grad_norm": 2711.773681640625, "learning_rate": 3.103449856330443e-05, "loss": 70.1431, "step": 120040 }, { "epoch": 0.48501719073841393, "grad_norm": 1064.7301025390625, "learning_rate": 3.1031111062353373e-05, "loss": 59.401, "step": 120050 }, { "epoch": 0.48505759200378157, "grad_norm": 1243.82763671875, "learning_rate": 3.102772344382271e-05, "loss": 78.9579, "step": 120060 }, { "epoch": 0.4850979932691492, "grad_norm": 935.9784545898438, "learning_rate": 3.102433570777847e-05, "loss": 56.5204, "step": 120070 }, { "epoch": 0.48513839453451685, "grad_norm": 583.158935546875, "learning_rate": 3.102094785428671e-05, "loss": 63.3833, "step": 120080 }, { "epoch": 0.48517879579988443, "grad_norm": 749.0221557617188, "learning_rate": 3.101755988341347e-05, "loss": 48.2709, "step": 120090 }, { "epoch": 0.4852191970652521, "grad_norm": 876.2115478515625, "learning_rate": 3.101417179522479e-05, "loss": 53.4147, "step": 120100 }, { "epoch": 0.4852595983306197, "grad_norm": 1196.973388671875, "learning_rate": 3.101078358978675e-05, "loss": 56.0996, "step": 120110 }, { "epoch": 0.48529999959598735, "grad_norm": 580.7915649414062, "learning_rate": 3.100739526716538e-05, "loss": 58.37, "step": 120120 }, { "epoch": 0.485340400861355, "grad_norm": 942.7879028320312, "learning_rate": 3.100400682742675e-05, "loss": 64.2288, "step": 120130 }, { "epoch": 0.48538080212672263, "grad_norm": 627.4921264648438, "learning_rate": 3.100061827063692e-05, "loss": 58.1287, "step": 120140 }, { "epoch": 0.4854212033920902, "grad_norm": 1478.2410888671875, "learning_rate": 3.0997229596861944e-05, "loss": 91.4508, "step": 120150 }, { "epoch": 0.48546160465745786, "grad_norm": 1782.7314453125, "learning_rate": 3.099384080616789e-05, "loss": 56.9644, "step": 120160 }, { "epoch": 0.4855020059228255, "grad_norm": 359.5166320800781, "learning_rate": 3.099045189862081e-05, "loss": 43.2427, "step": 120170 }, { "epoch": 0.48554240718819314, "grad_norm": 604.357177734375, "learning_rate": 3.0987062874286804e-05, "loss": 43.9985, "step": 120180 }, { "epoch": 0.4855828084535608, "grad_norm": 536.3892211914062, "learning_rate": 3.098367373323192e-05, "loss": 60.2531, "step": 120190 }, { "epoch": 0.4856232097189284, "grad_norm": 762.7628784179688, "learning_rate": 3.098028447552224e-05, "loss": 45.4865, "step": 120200 }, { "epoch": 0.485663610984296, "grad_norm": 621.0496215820312, "learning_rate": 3.097689510122382e-05, "loss": 49.9217, "step": 120210 }, { "epoch": 0.48570401224966364, "grad_norm": 1028.444580078125, "learning_rate": 3.0973505610402765e-05, "loss": 54.5276, "step": 120220 }, { "epoch": 0.4857444135150313, "grad_norm": 639.4829711914062, "learning_rate": 3.0970116003125146e-05, "loss": 46.6422, "step": 120230 }, { "epoch": 0.4857848147803989, "grad_norm": 955.115234375, "learning_rate": 3.0966726279457034e-05, "loss": 59.0818, "step": 120240 }, { "epoch": 0.48582521604576656, "grad_norm": 1233.369873046875, "learning_rate": 3.0963336439464526e-05, "loss": 56.1891, "step": 120250 }, { "epoch": 0.4858656173111342, "grad_norm": 3286.7587890625, "learning_rate": 3.09599464832137e-05, "loss": 70.8362, "step": 120260 }, { "epoch": 0.48590601857650184, "grad_norm": 749.9498291015625, "learning_rate": 3.0956556410770655e-05, "loss": 71.6872, "step": 120270 }, { "epoch": 0.4859464198418694, "grad_norm": 337.7810974121094, "learning_rate": 3.0953166222201476e-05, "loss": 34.0317, "step": 120280 }, { "epoch": 0.48598682110723707, "grad_norm": 969.7774047851562, "learning_rate": 3.094977591757224e-05, "loss": 39.0394, "step": 120290 }, { "epoch": 0.4860272223726047, "grad_norm": 600.6663208007812, "learning_rate": 3.094638549694908e-05, "loss": 72.5898, "step": 120300 }, { "epoch": 0.48606762363797235, "grad_norm": 388.6318664550781, "learning_rate": 3.0942994960398064e-05, "loss": 58.7893, "step": 120310 }, { "epoch": 0.48610802490334, "grad_norm": 1114.3638916015625, "learning_rate": 3.09396043079853e-05, "loss": 67.1577, "step": 120320 }, { "epoch": 0.4861484261687076, "grad_norm": 723.9693603515625, "learning_rate": 3.0936213539776895e-05, "loss": 66.5562, "step": 120330 }, { "epoch": 0.4861888274340752, "grad_norm": 608.6270141601562, "learning_rate": 3.093282265583895e-05, "loss": 61.2591, "step": 120340 }, { "epoch": 0.48622922869944285, "grad_norm": 455.3746643066406, "learning_rate": 3.092943165623758e-05, "loss": 57.6604, "step": 120350 }, { "epoch": 0.4862696299648105, "grad_norm": 494.3013610839844, "learning_rate": 3.092604054103888e-05, "loss": 42.1755, "step": 120360 }, { "epoch": 0.48631003123017813, "grad_norm": 2547.077392578125, "learning_rate": 3.092264931030897e-05, "loss": 86.2226, "step": 120370 }, { "epoch": 0.48635043249554577, "grad_norm": 671.0562133789062, "learning_rate": 3.0919257964113964e-05, "loss": 60.703, "step": 120380 }, { "epoch": 0.4863908337609134, "grad_norm": 471.1807556152344, "learning_rate": 3.0915866502519975e-05, "loss": 55.5357, "step": 120390 }, { "epoch": 0.48643123502628105, "grad_norm": 656.9327392578125, "learning_rate": 3.091247492559312e-05, "loss": 66.8196, "step": 120400 }, { "epoch": 0.48647163629164863, "grad_norm": 2174.91064453125, "learning_rate": 3.090908323339952e-05, "loss": 62.3699, "step": 120410 }, { "epoch": 0.4865120375570163, "grad_norm": 663.234619140625, "learning_rate": 3.090569142600531e-05, "loss": 58.3517, "step": 120420 }, { "epoch": 0.4865524388223839, "grad_norm": 1044.9910888671875, "learning_rate": 3.09022995034766e-05, "loss": 45.2753, "step": 120430 }, { "epoch": 0.48659284008775155, "grad_norm": 1259.57666015625, "learning_rate": 3.089890746587953e-05, "loss": 37.8244, "step": 120440 }, { "epoch": 0.4866332413531192, "grad_norm": 619.0317993164062, "learning_rate": 3.089551531328021e-05, "loss": 52.6852, "step": 120450 }, { "epoch": 0.48667364261848683, "grad_norm": 547.9066772460938, "learning_rate": 3.0892123045744785e-05, "loss": 35.2198, "step": 120460 }, { "epoch": 0.4867140438838544, "grad_norm": 538.3446044921875, "learning_rate": 3.08887306633394e-05, "loss": 49.3895, "step": 120470 }, { "epoch": 0.48675444514922206, "grad_norm": 1102.380859375, "learning_rate": 3.088533816613017e-05, "loss": 78.057, "step": 120480 }, { "epoch": 0.4867948464145897, "grad_norm": 836.9898071289062, "learning_rate": 3.0881945554183235e-05, "loss": 73.6896, "step": 120490 }, { "epoch": 0.48683524767995734, "grad_norm": 657.3259887695312, "learning_rate": 3.087855282756475e-05, "loss": 45.1531, "step": 120500 }, { "epoch": 0.486875648945325, "grad_norm": 440.7289733886719, "learning_rate": 3.087515998634085e-05, "loss": 57.9621, "step": 120510 }, { "epoch": 0.4869160502106926, "grad_norm": 955.15966796875, "learning_rate": 3.087176703057769e-05, "loss": 56.2972, "step": 120520 }, { "epoch": 0.4869564514760602, "grad_norm": 293.07012939453125, "learning_rate": 3.08683739603414e-05, "loss": 42.8358, "step": 120530 }, { "epoch": 0.48699685274142784, "grad_norm": 516.2140502929688, "learning_rate": 3.0864980775698145e-05, "loss": 92.3896, "step": 120540 }, { "epoch": 0.4870372540067955, "grad_norm": 425.08343505859375, "learning_rate": 3.086158747671406e-05, "loss": 90.2946, "step": 120550 }, { "epoch": 0.4870776552721631, "grad_norm": 302.85626220703125, "learning_rate": 3.085819406345532e-05, "loss": 73.1443, "step": 120560 }, { "epoch": 0.48711805653753076, "grad_norm": 910.6437377929688, "learning_rate": 3.0854800535988064e-05, "loss": 54.2145, "step": 120570 }, { "epoch": 0.4871584578028984, "grad_norm": 1036.3970947265625, "learning_rate": 3.085140689437846e-05, "loss": 49.4283, "step": 120580 }, { "epoch": 0.48719885906826604, "grad_norm": 1020.8902587890625, "learning_rate": 3.084801313869266e-05, "loss": 50.4306, "step": 120590 }, { "epoch": 0.48723926033363363, "grad_norm": 683.2181396484375, "learning_rate": 3.0844619268996845e-05, "loss": 52.7355, "step": 120600 }, { "epoch": 0.48727966159900127, "grad_norm": 712.2384033203125, "learning_rate": 3.084122528535717e-05, "loss": 76.377, "step": 120610 }, { "epoch": 0.4873200628643689, "grad_norm": 1040.790771484375, "learning_rate": 3.0837831187839784e-05, "loss": 66.5839, "step": 120620 }, { "epoch": 0.48736046412973655, "grad_norm": 826.3641967773438, "learning_rate": 3.083443697651088e-05, "loss": 81.0511, "step": 120630 }, { "epoch": 0.4874008653951042, "grad_norm": 1687.8861083984375, "learning_rate": 3.083104265143663e-05, "loss": 73.7149, "step": 120640 }, { "epoch": 0.48744126666047183, "grad_norm": 700.1195678710938, "learning_rate": 3.08276482126832e-05, "loss": 49.3427, "step": 120650 }, { "epoch": 0.4874816679258394, "grad_norm": 661.125244140625, "learning_rate": 3.082425366031676e-05, "loss": 37.9126, "step": 120660 }, { "epoch": 0.48752206919120705, "grad_norm": 773.9281005859375, "learning_rate": 3.08208589944035e-05, "loss": 47.0344, "step": 120670 }, { "epoch": 0.4875624704565747, "grad_norm": 1276.460205078125, "learning_rate": 3.08174642150096e-05, "loss": 53.2406, "step": 120680 }, { "epoch": 0.48760287172194233, "grad_norm": 669.7227172851562, "learning_rate": 3.081406932220123e-05, "loss": 39.3701, "step": 120690 }, { "epoch": 0.48764327298731, "grad_norm": 2131.654541015625, "learning_rate": 3.08106743160446e-05, "loss": 64.2498, "step": 120700 }, { "epoch": 0.4876836742526776, "grad_norm": 621.3330078125, "learning_rate": 3.0807279196605876e-05, "loss": 64.5872, "step": 120710 }, { "epoch": 0.48772407551804525, "grad_norm": 565.8031616210938, "learning_rate": 3.0803883963951255e-05, "loss": 59.7642, "step": 120720 }, { "epoch": 0.48776447678341284, "grad_norm": 936.9571533203125, "learning_rate": 3.080048861814693e-05, "loss": 34.2556, "step": 120730 }, { "epoch": 0.4878048780487805, "grad_norm": 361.34063720703125, "learning_rate": 3.0797093159259085e-05, "loss": 52.7916, "step": 120740 }, { "epoch": 0.4878452793141481, "grad_norm": 325.8556213378906, "learning_rate": 3.079369758735393e-05, "loss": 41.6967, "step": 120750 }, { "epoch": 0.48788568057951576, "grad_norm": 522.2239990234375, "learning_rate": 3.0790301902497666e-05, "loss": 44.1217, "step": 120760 }, { "epoch": 0.4879260818448834, "grad_norm": 1932.948486328125, "learning_rate": 3.078690610475647e-05, "loss": 70.4253, "step": 120770 }, { "epoch": 0.48796648311025104, "grad_norm": 1131.1263427734375, "learning_rate": 3.0783510194196576e-05, "loss": 101.964, "step": 120780 }, { "epoch": 0.4880068843756186, "grad_norm": 1616.0198974609375, "learning_rate": 3.078011417088416e-05, "loss": 55.159, "step": 120790 }, { "epoch": 0.48804728564098626, "grad_norm": 942.608642578125, "learning_rate": 3.0776718034885454e-05, "loss": 47.1413, "step": 120800 }, { "epoch": 0.4880876869063539, "grad_norm": 503.5368347167969, "learning_rate": 3.0773321786266644e-05, "loss": 36.5475, "step": 120810 }, { "epoch": 0.48812808817172154, "grad_norm": 844.5652465820312, "learning_rate": 3.076992542509396e-05, "loss": 52.4938, "step": 120820 }, { "epoch": 0.4881684894370892, "grad_norm": 856.3523559570312, "learning_rate": 3.07665289514336e-05, "loss": 83.7883, "step": 120830 }, { "epoch": 0.4882088907024568, "grad_norm": 400.933349609375, "learning_rate": 3.07631323653518e-05, "loss": 55.8697, "step": 120840 }, { "epoch": 0.4882492919678244, "grad_norm": 604.0474853515625, "learning_rate": 3.075973566691477e-05, "loss": 72.4179, "step": 120850 }, { "epoch": 0.48828969323319205, "grad_norm": 584.9152221679688, "learning_rate": 3.0756338856188716e-05, "loss": 59.9413, "step": 120860 }, { "epoch": 0.4883300944985597, "grad_norm": 734.94287109375, "learning_rate": 3.075294193323988e-05, "loss": 81.9922, "step": 120870 }, { "epoch": 0.4883704957639273, "grad_norm": 775.8441162109375, "learning_rate": 3.074954489813449e-05, "loss": 59.0604, "step": 120880 }, { "epoch": 0.48841089702929497, "grad_norm": 464.788330078125, "learning_rate": 3.074614775093874e-05, "loss": 77.2058, "step": 120890 }, { "epoch": 0.4884512982946626, "grad_norm": 250.41958618164062, "learning_rate": 3.074275049171889e-05, "loss": 40.2773, "step": 120900 }, { "epoch": 0.48849169956003025, "grad_norm": 608.3348388671875, "learning_rate": 3.073935312054117e-05, "loss": 34.6437, "step": 120910 }, { "epoch": 0.48853210082539783, "grad_norm": 1844.59033203125, "learning_rate": 3.0735955637471794e-05, "loss": 71.1501, "step": 120920 }, { "epoch": 0.48857250209076547, "grad_norm": 385.8973388671875, "learning_rate": 3.073255804257702e-05, "loss": 56.5991, "step": 120930 }, { "epoch": 0.4886129033561331, "grad_norm": 503.91558837890625, "learning_rate": 3.072916033592307e-05, "loss": 62.6247, "step": 120940 }, { "epoch": 0.48865330462150075, "grad_norm": 590.6072387695312, "learning_rate": 3.0725762517576195e-05, "loss": 39.7041, "step": 120950 }, { "epoch": 0.4886937058868684, "grad_norm": 369.4613952636719, "learning_rate": 3.072236458760262e-05, "loss": 37.5118, "step": 120960 }, { "epoch": 0.48873410715223603, "grad_norm": 743.9142456054688, "learning_rate": 3.071896654606862e-05, "loss": 48.3398, "step": 120970 }, { "epoch": 0.4887745084176036, "grad_norm": 611.325439453125, "learning_rate": 3.0715568393040405e-05, "loss": 82.7277, "step": 120980 }, { "epoch": 0.48881490968297125, "grad_norm": 1614.0037841796875, "learning_rate": 3.071217012858425e-05, "loss": 102.5452, "step": 120990 }, { "epoch": 0.4888553109483389, "grad_norm": 984.4487915039062, "learning_rate": 3.0708771752766394e-05, "loss": 56.9584, "step": 121000 }, { "epoch": 0.48889571221370653, "grad_norm": 1377.5753173828125, "learning_rate": 3.07053732656531e-05, "loss": 75.7293, "step": 121010 }, { "epoch": 0.4889361134790742, "grad_norm": 438.0140686035156, "learning_rate": 3.070197466731061e-05, "loss": 43.8571, "step": 121020 }, { "epoch": 0.4889765147444418, "grad_norm": 710.8450317382812, "learning_rate": 3.069857595780519e-05, "loss": 66.9329, "step": 121030 }, { "epoch": 0.48901691600980945, "grad_norm": 762.4956665039062, "learning_rate": 3.06951771372031e-05, "loss": 52.6469, "step": 121040 }, { "epoch": 0.48905731727517704, "grad_norm": 732.955322265625, "learning_rate": 3.06917782055706e-05, "loss": 84.5691, "step": 121050 }, { "epoch": 0.4890977185405447, "grad_norm": 899.1510009765625, "learning_rate": 3.0688379162973955e-05, "loss": 78.2393, "step": 121060 }, { "epoch": 0.4891381198059123, "grad_norm": 798.7470092773438, "learning_rate": 3.0684980009479424e-05, "loss": 58.7748, "step": 121070 }, { "epoch": 0.48917852107127996, "grad_norm": 897.6027221679688, "learning_rate": 3.068158074515328e-05, "loss": 52.4273, "step": 121080 }, { "epoch": 0.4892189223366476, "grad_norm": 503.83392333984375, "learning_rate": 3.0678181370061805e-05, "loss": 59.4657, "step": 121090 }, { "epoch": 0.48925932360201524, "grad_norm": 1317.728759765625, "learning_rate": 3.0674781884271254e-05, "loss": 93.741, "step": 121100 }, { "epoch": 0.4892997248673828, "grad_norm": 1355.2291259765625, "learning_rate": 3.067138228784791e-05, "loss": 84.8171, "step": 121110 }, { "epoch": 0.48934012613275046, "grad_norm": 1263.6253662109375, "learning_rate": 3.0667982580858044e-05, "loss": 65.6653, "step": 121120 }, { "epoch": 0.4893805273981181, "grad_norm": 630.547607421875, "learning_rate": 3.066458276336794e-05, "loss": 62.2357, "step": 121130 }, { "epoch": 0.48942092866348574, "grad_norm": 1021.1478271484375, "learning_rate": 3.0661182835443884e-05, "loss": 57.1426, "step": 121140 }, { "epoch": 0.4894613299288534, "grad_norm": 1333.4888916015625, "learning_rate": 3.065778279715215e-05, "loss": 49.1796, "step": 121150 }, { "epoch": 0.489501731194221, "grad_norm": 481.13165283203125, "learning_rate": 3.0654382648559026e-05, "loss": 65.8724, "step": 121160 }, { "epoch": 0.4895421324595886, "grad_norm": 359.9427795410156, "learning_rate": 3.065098238973081e-05, "loss": 77.4488, "step": 121170 }, { "epoch": 0.48958253372495625, "grad_norm": 274.55328369140625, "learning_rate": 3.064758202073377e-05, "loss": 54.833, "step": 121180 }, { "epoch": 0.4896229349903239, "grad_norm": 489.7810974121094, "learning_rate": 3.064418154163422e-05, "loss": 54.8473, "step": 121190 }, { "epoch": 0.48966333625569153, "grad_norm": 674.3745727539062, "learning_rate": 3.064078095249844e-05, "loss": 51.562, "step": 121200 }, { "epoch": 0.48970373752105917, "grad_norm": 443.5577087402344, "learning_rate": 3.0637380253392736e-05, "loss": 68.5722, "step": 121210 }, { "epoch": 0.4897441387864268, "grad_norm": 824.37158203125, "learning_rate": 3.06339794443834e-05, "loss": 46.6327, "step": 121220 }, { "epoch": 0.48978454005179445, "grad_norm": 275.5827331542969, "learning_rate": 3.063057852553674e-05, "loss": 32.1468, "step": 121230 }, { "epoch": 0.48982494131716203, "grad_norm": 573.8633422851562, "learning_rate": 3.062717749691904e-05, "loss": 58.9486, "step": 121240 }, { "epoch": 0.4898653425825297, "grad_norm": 383.3703918457031, "learning_rate": 3.062377635859663e-05, "loss": 39.3934, "step": 121250 }, { "epoch": 0.4899057438478973, "grad_norm": 942.3863525390625, "learning_rate": 3.06203751106358e-05, "loss": 49.6613, "step": 121260 }, { "epoch": 0.48994614511326495, "grad_norm": 0.0, "learning_rate": 3.0616973753102856e-05, "loss": 37.8872, "step": 121270 }, { "epoch": 0.4899865463786326, "grad_norm": 1657.1973876953125, "learning_rate": 3.0613572286064125e-05, "loss": 68.007, "step": 121280 }, { "epoch": 0.49002694764400023, "grad_norm": 647.4043579101562, "learning_rate": 3.061017070958591e-05, "loss": 92.0626, "step": 121290 }, { "epoch": 0.4900673489093678, "grad_norm": 885.7959594726562, "learning_rate": 3.0606769023734536e-05, "loss": 30.4313, "step": 121300 }, { "epoch": 0.49010775017473546, "grad_norm": 1316.2650146484375, "learning_rate": 3.060336722857631e-05, "loss": 83.0374, "step": 121310 }, { "epoch": 0.4901481514401031, "grad_norm": 1012.3235473632812, "learning_rate": 3.059996532417754e-05, "loss": 41.8788, "step": 121320 }, { "epoch": 0.49018855270547074, "grad_norm": 449.5831604003906, "learning_rate": 3.059656331060458e-05, "loss": 55.4948, "step": 121330 }, { "epoch": 0.4902289539708384, "grad_norm": 723.0786743164062, "learning_rate": 3.0593161187923736e-05, "loss": 68.319, "step": 121340 }, { "epoch": 0.490269355236206, "grad_norm": 569.5224609375, "learning_rate": 3.0589758956201327e-05, "loss": 62.8122, "step": 121350 }, { "epoch": 0.49030975650157366, "grad_norm": 572.9276733398438, "learning_rate": 3.058635661550369e-05, "loss": 60.2772, "step": 121360 }, { "epoch": 0.49035015776694124, "grad_norm": 379.9477844238281, "learning_rate": 3.058295416589716e-05, "loss": 69.8202, "step": 121370 }, { "epoch": 0.4903905590323089, "grad_norm": 534.4418334960938, "learning_rate": 3.0579551607448066e-05, "loss": 51.7835, "step": 121380 }, { "epoch": 0.4904309602976765, "grad_norm": 803.1372680664062, "learning_rate": 3.057614894022274e-05, "loss": 43.7992, "step": 121390 }, { "epoch": 0.49047136156304416, "grad_norm": 2386.29736328125, "learning_rate": 3.0572746164287514e-05, "loss": 54.0955, "step": 121400 }, { "epoch": 0.4905117628284118, "grad_norm": 997.156005859375, "learning_rate": 3.0569343279708734e-05, "loss": 65.676, "step": 121410 }, { "epoch": 0.49055216409377944, "grad_norm": 525.251953125, "learning_rate": 3.056594028655274e-05, "loss": 52.9306, "step": 121420 }, { "epoch": 0.490592565359147, "grad_norm": 1752.92236328125, "learning_rate": 3.056253718488588e-05, "loss": 56.5857, "step": 121430 }, { "epoch": 0.49063296662451467, "grad_norm": 704.6538696289062, "learning_rate": 3.055913397477448e-05, "loss": 68.621, "step": 121440 }, { "epoch": 0.4906733678898823, "grad_norm": 615.273681640625, "learning_rate": 3.0555730656284914e-05, "loss": 54.718, "step": 121450 }, { "epoch": 0.49071376915524995, "grad_norm": 676.6206665039062, "learning_rate": 3.0552327229483515e-05, "loss": 60.8578, "step": 121460 }, { "epoch": 0.4907541704206176, "grad_norm": 512.4281005859375, "learning_rate": 3.054892369443663e-05, "loss": 48.8229, "step": 121470 }, { "epoch": 0.4907945716859852, "grad_norm": 885.903076171875, "learning_rate": 3.054552005121064e-05, "loss": 83.9689, "step": 121480 }, { "epoch": 0.4908349729513528, "grad_norm": 405.67919921875, "learning_rate": 3.054211629987187e-05, "loss": 50.5087, "step": 121490 }, { "epoch": 0.49087537421672045, "grad_norm": 430.7709045410156, "learning_rate": 3.053871244048669e-05, "loss": 54.2436, "step": 121500 }, { "epoch": 0.4909157754820881, "grad_norm": 394.2481689453125, "learning_rate": 3.0535308473121455e-05, "loss": 37.6645, "step": 121510 }, { "epoch": 0.49095617674745573, "grad_norm": 2119.940673828125, "learning_rate": 3.053190439784253e-05, "loss": 85.2859, "step": 121520 }, { "epoch": 0.49099657801282337, "grad_norm": 509.3744201660156, "learning_rate": 3.052850021471629e-05, "loss": 74.4669, "step": 121530 }, { "epoch": 0.491036979278191, "grad_norm": 0.0, "learning_rate": 3.052509592380909e-05, "loss": 51.207, "step": 121540 }, { "epoch": 0.49107738054355865, "grad_norm": 615.7112426757812, "learning_rate": 3.052169152518729e-05, "loss": 37.4327, "step": 121550 }, { "epoch": 0.49111778180892623, "grad_norm": 624.2453002929688, "learning_rate": 3.051828701891729e-05, "loss": 52.5991, "step": 121560 }, { "epoch": 0.4911581830742939, "grad_norm": 558.4168090820312, "learning_rate": 3.0514882405065432e-05, "loss": 63.3106, "step": 121570 }, { "epoch": 0.4911985843396615, "grad_norm": 509.6629333496094, "learning_rate": 3.0511477683698108e-05, "loss": 63.4842, "step": 121580 }, { "epoch": 0.49123898560502915, "grad_norm": 806.631103515625, "learning_rate": 3.050807285488168e-05, "loss": 62.4659, "step": 121590 }, { "epoch": 0.4912793868703968, "grad_norm": 814.9642944335938, "learning_rate": 3.050466791868254e-05, "loss": 66.1909, "step": 121600 }, { "epoch": 0.49131978813576443, "grad_norm": 712.5091552734375, "learning_rate": 3.0501262875167063e-05, "loss": 74.5207, "step": 121610 }, { "epoch": 0.491360189401132, "grad_norm": 615.8709106445312, "learning_rate": 3.0497857724401642e-05, "loss": 79.785, "step": 121620 }, { "epoch": 0.49140059066649966, "grad_norm": 729.620361328125, "learning_rate": 3.0494452466452644e-05, "loss": 75.2466, "step": 121630 }, { "epoch": 0.4914409919318673, "grad_norm": 232.82257080078125, "learning_rate": 3.049104710138647e-05, "loss": 40.4371, "step": 121640 }, { "epoch": 0.49148139319723494, "grad_norm": 528.4829711914062, "learning_rate": 3.0487641629269516e-05, "loss": 49.6708, "step": 121650 }, { "epoch": 0.4915217944626026, "grad_norm": 2002.76416015625, "learning_rate": 3.0484236050168153e-05, "loss": 52.0956, "step": 121660 }, { "epoch": 0.4915621957279702, "grad_norm": 575.5763549804688, "learning_rate": 3.048083036414878e-05, "loss": 39.8568, "step": 121670 }, { "epoch": 0.4916025969933378, "grad_norm": 0.0, "learning_rate": 3.0477424571277807e-05, "loss": 39.302, "step": 121680 }, { "epoch": 0.49164299825870544, "grad_norm": 412.2666015625, "learning_rate": 3.047401867162162e-05, "loss": 46.3011, "step": 121690 }, { "epoch": 0.4916833995240731, "grad_norm": 749.7333374023438, "learning_rate": 3.0470612665246618e-05, "loss": 63.5483, "step": 121700 }, { "epoch": 0.4917238007894407, "grad_norm": 686.0115966796875, "learning_rate": 3.0467206552219208e-05, "loss": 37.7942, "step": 121710 }, { "epoch": 0.49176420205480836, "grad_norm": 1040.0323486328125, "learning_rate": 3.0463800332605784e-05, "loss": 69.1257, "step": 121720 }, { "epoch": 0.491804603320176, "grad_norm": 547.2194213867188, "learning_rate": 3.046039400647277e-05, "loss": 28.7517, "step": 121730 }, { "epoch": 0.49184500458554364, "grad_norm": 1607.9515380859375, "learning_rate": 3.0456987573886564e-05, "loss": 67.1605, "step": 121740 }, { "epoch": 0.49188540585091123, "grad_norm": 1343.8623046875, "learning_rate": 3.045358103491357e-05, "loss": 61.9395, "step": 121750 }, { "epoch": 0.49192580711627887, "grad_norm": 360.9034729003906, "learning_rate": 3.0450174389620205e-05, "loss": 60.9107, "step": 121760 }, { "epoch": 0.4919662083816465, "grad_norm": 658.2755126953125, "learning_rate": 3.044676763807288e-05, "loss": 44.2099, "step": 121770 }, { "epoch": 0.49200660964701415, "grad_norm": 463.0184326171875, "learning_rate": 3.044336078033803e-05, "loss": 50.352, "step": 121780 }, { "epoch": 0.4920470109123818, "grad_norm": 1313.499755859375, "learning_rate": 3.043995381648205e-05, "loss": 65.2679, "step": 121790 }, { "epoch": 0.49208741217774943, "grad_norm": 662.1671142578125, "learning_rate": 3.0436546746571372e-05, "loss": 47.1219, "step": 121800 }, { "epoch": 0.492127813443117, "grad_norm": 3354.996826171875, "learning_rate": 3.0433139570672407e-05, "loss": 87.668, "step": 121810 }, { "epoch": 0.49216821470848465, "grad_norm": 931.5249633789062, "learning_rate": 3.0429732288851603e-05, "loss": 44.4428, "step": 121820 }, { "epoch": 0.4922086159738523, "grad_norm": 798.1958618164062, "learning_rate": 3.0426324901175374e-05, "loss": 51.0042, "step": 121830 }, { "epoch": 0.49224901723921993, "grad_norm": 674.7509765625, "learning_rate": 3.0422917407710137e-05, "loss": 33.4516, "step": 121840 }, { "epoch": 0.4922894185045876, "grad_norm": 734.49169921875, "learning_rate": 3.0419509808522334e-05, "loss": 77.9193, "step": 121850 }, { "epoch": 0.4923298197699552, "grad_norm": 561.5974731445312, "learning_rate": 3.0416102103678402e-05, "loss": 52.3013, "step": 121860 }, { "epoch": 0.49237022103532285, "grad_norm": 292.3045959472656, "learning_rate": 3.041269429324477e-05, "loss": 31.1926, "step": 121870 }, { "epoch": 0.49241062230069044, "grad_norm": 613.70361328125, "learning_rate": 3.040928637728787e-05, "loss": 56.5007, "step": 121880 }, { "epoch": 0.4924510235660581, "grad_norm": 1232.8988037109375, "learning_rate": 3.040587835587415e-05, "loss": 73.1134, "step": 121890 }, { "epoch": 0.4924914248314257, "grad_norm": 831.9037475585938, "learning_rate": 3.0402470229070056e-05, "loss": 46.9302, "step": 121900 }, { "epoch": 0.49253182609679336, "grad_norm": 409.0111389160156, "learning_rate": 3.039906199694202e-05, "loss": 39.5951, "step": 121910 }, { "epoch": 0.492572227362161, "grad_norm": 646.8606567382812, "learning_rate": 3.0395653659556488e-05, "loss": 47.5818, "step": 121920 }, { "epoch": 0.49261262862752864, "grad_norm": 363.55609130859375, "learning_rate": 3.039224521697991e-05, "loss": 72.8517, "step": 121930 }, { "epoch": 0.4926530298928962, "grad_norm": 498.0539855957031, "learning_rate": 3.0388836669278738e-05, "loss": 53.5023, "step": 121940 }, { "epoch": 0.49269343115826386, "grad_norm": 943.4515991210938, "learning_rate": 3.038542801651941e-05, "loss": 59.0278, "step": 121950 }, { "epoch": 0.4927338324236315, "grad_norm": 3113.823486328125, "learning_rate": 3.0382019258768403e-05, "loss": 72.7263, "step": 121960 }, { "epoch": 0.49277423368899914, "grad_norm": 509.8355407714844, "learning_rate": 3.0378610396092154e-05, "loss": 51.3322, "step": 121970 }, { "epoch": 0.4928146349543668, "grad_norm": 791.736572265625, "learning_rate": 3.0375201428557132e-05, "loss": 48.9846, "step": 121980 }, { "epoch": 0.4928550362197344, "grad_norm": 532.8504638671875, "learning_rate": 3.0371792356229783e-05, "loss": 35.4836, "step": 121990 }, { "epoch": 0.492895437485102, "grad_norm": 419.5154113769531, "learning_rate": 3.0368383179176585e-05, "loss": 69.5128, "step": 122000 }, { "epoch": 0.49293583875046965, "grad_norm": 3489.129150390625, "learning_rate": 3.036497389746399e-05, "loss": 68.8055, "step": 122010 }, { "epoch": 0.4929762400158373, "grad_norm": 1044.6541748046875, "learning_rate": 3.0361564511158457e-05, "loss": 62.8193, "step": 122020 }, { "epoch": 0.4930166412812049, "grad_norm": 1858.55615234375, "learning_rate": 3.0358155020326477e-05, "loss": 77.7083, "step": 122030 }, { "epoch": 0.49305704254657257, "grad_norm": 682.7332153320312, "learning_rate": 3.0354745425034498e-05, "loss": 64.1814, "step": 122040 }, { "epoch": 0.4930974438119402, "grad_norm": 653.3331298828125, "learning_rate": 3.0351335725349e-05, "loss": 49.824, "step": 122050 }, { "epoch": 0.49313784507730785, "grad_norm": 859.71337890625, "learning_rate": 3.0347925921336463e-05, "loss": 54.182, "step": 122060 }, { "epoch": 0.49317824634267543, "grad_norm": 1686.3477783203125, "learning_rate": 3.0344516013063357e-05, "loss": 43.8826, "step": 122070 }, { "epoch": 0.49321864760804307, "grad_norm": 1858.2529296875, "learning_rate": 3.034110600059616e-05, "loss": 42.0115, "step": 122080 }, { "epoch": 0.4932590488734107, "grad_norm": 857.298828125, "learning_rate": 3.0337695884001343e-05, "loss": 44.892, "step": 122090 }, { "epoch": 0.49329945013877835, "grad_norm": 3229.34765625, "learning_rate": 3.0334285663345404e-05, "loss": 49.8066, "step": 122100 }, { "epoch": 0.493339851404146, "grad_norm": 782.3743896484375, "learning_rate": 3.033087533869482e-05, "loss": 46.6393, "step": 122110 }, { "epoch": 0.49338025266951363, "grad_norm": 1177.9285888671875, "learning_rate": 3.032746491011607e-05, "loss": 54.9906, "step": 122120 }, { "epoch": 0.4934206539348812, "grad_norm": 997.4578247070312, "learning_rate": 3.0324054377675654e-05, "loss": 47.8525, "step": 122130 }, { "epoch": 0.49346105520024885, "grad_norm": 1065.852294921875, "learning_rate": 3.032064374144005e-05, "loss": 58.0819, "step": 122140 }, { "epoch": 0.4935014564656165, "grad_norm": 2079.648681640625, "learning_rate": 3.031723300147577e-05, "loss": 95.1813, "step": 122150 }, { "epoch": 0.49354185773098413, "grad_norm": 759.1595458984375, "learning_rate": 3.0313822157849287e-05, "loss": 54.4963, "step": 122160 }, { "epoch": 0.4935822589963518, "grad_norm": 2073.466064453125, "learning_rate": 3.031041121062711e-05, "loss": 70.8082, "step": 122170 }, { "epoch": 0.4936226602617194, "grad_norm": 1743.0040283203125, "learning_rate": 3.030700015987573e-05, "loss": 57.2004, "step": 122180 }, { "epoch": 0.49366306152708705, "grad_norm": 714.07275390625, "learning_rate": 3.030358900566165e-05, "loss": 60.7911, "step": 122190 }, { "epoch": 0.49370346279245464, "grad_norm": 1210.780029296875, "learning_rate": 3.0300177748051373e-05, "loss": 57.1338, "step": 122200 }, { "epoch": 0.4937438640578223, "grad_norm": 815.665771484375, "learning_rate": 3.02967663871114e-05, "loss": 74.5411, "step": 122210 }, { "epoch": 0.4937842653231899, "grad_norm": 1104.837646484375, "learning_rate": 3.0293354922908235e-05, "loss": 69.4025, "step": 122220 }, { "epoch": 0.49382466658855756, "grad_norm": 693.127197265625, "learning_rate": 3.0289943355508392e-05, "loss": 63.0513, "step": 122230 }, { "epoch": 0.4938650678539252, "grad_norm": 683.1317749023438, "learning_rate": 3.028653168497838e-05, "loss": 46.7121, "step": 122240 }, { "epoch": 0.49390546911929284, "grad_norm": 317.39532470703125, "learning_rate": 3.028311991138472e-05, "loss": 63.5555, "step": 122250 }, { "epoch": 0.4939458703846604, "grad_norm": 466.8677062988281, "learning_rate": 3.0279708034793907e-05, "loss": 45.8454, "step": 122260 }, { "epoch": 0.49398627165002806, "grad_norm": 1304.4661865234375, "learning_rate": 3.027629605527248e-05, "loss": 67.0937, "step": 122270 }, { "epoch": 0.4940266729153957, "grad_norm": 341.1806335449219, "learning_rate": 3.0272883972886935e-05, "loss": 32.4989, "step": 122280 }, { "epoch": 0.49406707418076334, "grad_norm": 668.6641235351562, "learning_rate": 3.02694717877038e-05, "loss": 58.9114, "step": 122290 }, { "epoch": 0.494107475446131, "grad_norm": 1312.1033935546875, "learning_rate": 3.02660594997896e-05, "loss": 52.9595, "step": 122300 }, { "epoch": 0.4941478767114986, "grad_norm": 206.2220916748047, "learning_rate": 3.0262647109210867e-05, "loss": 42.2671, "step": 122310 }, { "epoch": 0.4941882779768662, "grad_norm": 533.5427856445312, "learning_rate": 3.0259234616034116e-05, "loss": 54.6035, "step": 122320 }, { "epoch": 0.49422867924223385, "grad_norm": 389.654296875, "learning_rate": 3.0255822020325873e-05, "loss": 46.6373, "step": 122330 }, { "epoch": 0.4942690805076015, "grad_norm": 509.89276123046875, "learning_rate": 3.025240932215268e-05, "loss": 65.848, "step": 122340 }, { "epoch": 0.49430948177296913, "grad_norm": 738.1375732421875, "learning_rate": 3.024899652158107e-05, "loss": 46.7456, "step": 122350 }, { "epoch": 0.49434988303833677, "grad_norm": 272.38751220703125, "learning_rate": 3.0245583618677558e-05, "loss": 58.7914, "step": 122360 }, { "epoch": 0.4943902843037044, "grad_norm": 599.438232421875, "learning_rate": 3.0242170613508692e-05, "loss": 57.4107, "step": 122370 }, { "epoch": 0.49443068556907205, "grad_norm": 323.33062744140625, "learning_rate": 3.0238757506141012e-05, "loss": 48.1619, "step": 122380 }, { "epoch": 0.49447108683443963, "grad_norm": 1464.4964599609375, "learning_rate": 3.0235344296641067e-05, "loss": 58.9648, "step": 122390 }, { "epoch": 0.4945114880998073, "grad_norm": 2316.94873046875, "learning_rate": 3.023193098507538e-05, "loss": 80.6663, "step": 122400 }, { "epoch": 0.4945518893651749, "grad_norm": 777.03173828125, "learning_rate": 3.0228517571510507e-05, "loss": 74.6564, "step": 122410 }, { "epoch": 0.49459229063054255, "grad_norm": 832.2376098632812, "learning_rate": 3.0225104056013e-05, "loss": 50.8063, "step": 122420 }, { "epoch": 0.4946326918959102, "grad_norm": 1374.7584228515625, "learning_rate": 3.0221690438649386e-05, "loss": 84.333, "step": 122430 }, { "epoch": 0.49467309316127783, "grad_norm": 586.2657470703125, "learning_rate": 3.0218276719486244e-05, "loss": 54.8292, "step": 122440 }, { "epoch": 0.4947134944266454, "grad_norm": 362.1689147949219, "learning_rate": 3.0214862898590095e-05, "loss": 45.3425, "step": 122450 }, { "epoch": 0.49475389569201306, "grad_norm": 740.5028076171875, "learning_rate": 3.021144897602752e-05, "loss": 65.865, "step": 122460 }, { "epoch": 0.4947942969573807, "grad_norm": 776.755615234375, "learning_rate": 3.020803495186506e-05, "loss": 45.3244, "step": 122470 }, { "epoch": 0.49483469822274834, "grad_norm": 1503.520263671875, "learning_rate": 3.020462082616928e-05, "loss": 44.012, "step": 122480 }, { "epoch": 0.494875099488116, "grad_norm": 772.3239135742188, "learning_rate": 3.0201206599006733e-05, "loss": 50.5993, "step": 122490 }, { "epoch": 0.4949155007534836, "grad_norm": 343.9445495605469, "learning_rate": 3.0197792270443982e-05, "loss": 43.6323, "step": 122500 }, { "epoch": 0.49495590201885126, "grad_norm": 765.3672485351562, "learning_rate": 3.0194377840547606e-05, "loss": 48.1465, "step": 122510 }, { "epoch": 0.49499630328421884, "grad_norm": 936.2794799804688, "learning_rate": 3.0190963309384156e-05, "loss": 58.2882, "step": 122520 }, { "epoch": 0.4950367045495865, "grad_norm": 770.3196411132812, "learning_rate": 3.01875486770202e-05, "loss": 43.9133, "step": 122530 }, { "epoch": 0.4950771058149541, "grad_norm": 411.43255615234375, "learning_rate": 3.0184133943522314e-05, "loss": 50.147, "step": 122540 }, { "epoch": 0.49511750708032176, "grad_norm": 589.0484619140625, "learning_rate": 3.0180719108957063e-05, "loss": 48.991, "step": 122550 }, { "epoch": 0.4951579083456894, "grad_norm": 317.48046875, "learning_rate": 3.0177304173391037e-05, "loss": 47.1381, "step": 122560 }, { "epoch": 0.49519830961105704, "grad_norm": 2199.10888671875, "learning_rate": 3.0173889136890786e-05, "loss": 60.0664, "step": 122570 }, { "epoch": 0.4952387108764246, "grad_norm": 2371.26025390625, "learning_rate": 3.0170473999522915e-05, "loss": 53.2513, "step": 122580 }, { "epoch": 0.49527911214179227, "grad_norm": 2204.5810546875, "learning_rate": 3.016705876135399e-05, "loss": 75.0456, "step": 122590 }, { "epoch": 0.4953195134071599, "grad_norm": 1063.3651123046875, "learning_rate": 3.016364342245059e-05, "loss": 85.0364, "step": 122600 }, { "epoch": 0.49535991467252755, "grad_norm": 1718.4310302734375, "learning_rate": 3.016022798287931e-05, "loss": 66.0194, "step": 122610 }, { "epoch": 0.4954003159378952, "grad_norm": 953.0413208007812, "learning_rate": 3.0156812442706715e-05, "loss": 54.6552, "step": 122620 }, { "epoch": 0.4954407172032628, "grad_norm": 702.347900390625, "learning_rate": 3.015339680199941e-05, "loss": 39.405, "step": 122630 }, { "epoch": 0.4954811184686304, "grad_norm": 477.78302001953125, "learning_rate": 3.0149981060823995e-05, "loss": 39.2307, "step": 122640 }, { "epoch": 0.49552151973399805, "grad_norm": 893.2791137695312, "learning_rate": 3.0146565219247036e-05, "loss": 70.0523, "step": 122650 }, { "epoch": 0.4955619209993657, "grad_norm": 493.2530517578125, "learning_rate": 3.0143149277335138e-05, "loss": 59.8175, "step": 122660 }, { "epoch": 0.49560232226473333, "grad_norm": 2252.927734375, "learning_rate": 3.01397332351549e-05, "loss": 79.7985, "step": 122670 }, { "epoch": 0.49564272353010097, "grad_norm": 639.860595703125, "learning_rate": 3.013631709277292e-05, "loss": 80.3618, "step": 122680 }, { "epoch": 0.4956831247954686, "grad_norm": 542.6490478515625, "learning_rate": 3.013290085025579e-05, "loss": 64.4751, "step": 122690 }, { "epoch": 0.49572352606083625, "grad_norm": 933.4337158203125, "learning_rate": 3.0129484507670115e-05, "loss": 57.9422, "step": 122700 }, { "epoch": 0.49576392732620383, "grad_norm": 1414.308837890625, "learning_rate": 3.0126068065082504e-05, "loss": 50.3865, "step": 122710 }, { "epoch": 0.4958043285915715, "grad_norm": 900.0178833007812, "learning_rate": 3.0122651522559553e-05, "loss": 54.273, "step": 122720 }, { "epoch": 0.4958447298569391, "grad_norm": 1027.331787109375, "learning_rate": 3.0119234880167867e-05, "loss": 46.3332, "step": 122730 }, { "epoch": 0.49588513112230675, "grad_norm": 527.3615112304688, "learning_rate": 3.0115818137974067e-05, "loss": 98.7442, "step": 122740 }, { "epoch": 0.4959255323876744, "grad_norm": 546.8230590820312, "learning_rate": 3.0112401296044757e-05, "loss": 68.4313, "step": 122750 }, { "epoch": 0.49596593365304203, "grad_norm": 835.651611328125, "learning_rate": 3.0108984354446556e-05, "loss": 45.3258, "step": 122760 }, { "epoch": 0.4960063349184096, "grad_norm": 459.29144287109375, "learning_rate": 3.0105567313246074e-05, "loss": 70.6443, "step": 122770 }, { "epoch": 0.49604673618377726, "grad_norm": 1120.5982666015625, "learning_rate": 3.010215017250993e-05, "loss": 73.4087, "step": 122780 }, { "epoch": 0.4960871374491449, "grad_norm": 815.575927734375, "learning_rate": 3.0098732932304734e-05, "loss": 51.7506, "step": 122790 }, { "epoch": 0.49612753871451254, "grad_norm": 529.2800903320312, "learning_rate": 3.0095315592697126e-05, "loss": 43.3278, "step": 122800 }, { "epoch": 0.4961679399798802, "grad_norm": 352.13128662109375, "learning_rate": 3.0091898153753705e-05, "loss": 52.9101, "step": 122810 }, { "epoch": 0.4962083412452478, "grad_norm": 2164.66064453125, "learning_rate": 3.0088480615541113e-05, "loss": 69.647, "step": 122820 }, { "epoch": 0.49624874251061546, "grad_norm": 468.87158203125, "learning_rate": 3.0085062978125967e-05, "loss": 65.0103, "step": 122830 }, { "epoch": 0.49628914377598304, "grad_norm": 771.53857421875, "learning_rate": 3.008164524157491e-05, "loss": 47.0131, "step": 122840 }, { "epoch": 0.4963295450413507, "grad_norm": 630.9580078125, "learning_rate": 3.0078227405954557e-05, "loss": 36.1967, "step": 122850 }, { "epoch": 0.4963699463067183, "grad_norm": 632.0338745117188, "learning_rate": 3.007480947133155e-05, "loss": 78.9644, "step": 122860 }, { "epoch": 0.49641034757208596, "grad_norm": 748.582763671875, "learning_rate": 3.0071391437772516e-05, "loss": 52.5742, "step": 122870 }, { "epoch": 0.4964507488374536, "grad_norm": 402.97882080078125, "learning_rate": 3.00679733053441e-05, "loss": 49.2446, "step": 122880 }, { "epoch": 0.49649115010282124, "grad_norm": 814.5822143554688, "learning_rate": 3.0064555074112927e-05, "loss": 49.1269, "step": 122890 }, { "epoch": 0.49653155136818883, "grad_norm": 484.45751953125, "learning_rate": 3.0061136744145652e-05, "loss": 46.5377, "step": 122900 }, { "epoch": 0.49657195263355647, "grad_norm": 1381.5684814453125, "learning_rate": 3.0057718315508905e-05, "loss": 56.9945, "step": 122910 }, { "epoch": 0.4966123538989241, "grad_norm": 1052.2681884765625, "learning_rate": 3.005429978826934e-05, "loss": 54.9941, "step": 122920 }, { "epoch": 0.49665275516429175, "grad_norm": 1034.386474609375, "learning_rate": 3.0050881162493593e-05, "loss": 46.4848, "step": 122930 }, { "epoch": 0.4966931564296594, "grad_norm": 919.5922241210938, "learning_rate": 3.004746243824833e-05, "loss": 56.9942, "step": 122940 }, { "epoch": 0.49673355769502703, "grad_norm": 787.20947265625, "learning_rate": 3.0044043615600175e-05, "loss": 65.2977, "step": 122950 }, { "epoch": 0.4967739589603946, "grad_norm": 0.0, "learning_rate": 3.0040624694615803e-05, "loss": 41.2134, "step": 122960 }, { "epoch": 0.49681436022576225, "grad_norm": 683.5494995117188, "learning_rate": 3.003720567536185e-05, "loss": 72.0073, "step": 122970 }, { "epoch": 0.4968547614911299, "grad_norm": 575.966796875, "learning_rate": 3.003378655790498e-05, "loss": 65.4787, "step": 122980 }, { "epoch": 0.49689516275649753, "grad_norm": 1353.0836181640625, "learning_rate": 3.0030367342311848e-05, "loss": 67.0195, "step": 122990 }, { "epoch": 0.4969355640218652, "grad_norm": 590.2861328125, "learning_rate": 3.002694802864912e-05, "loss": 53.1483, "step": 123000 }, { "epoch": 0.4969759652872328, "grad_norm": 1127.41259765625, "learning_rate": 3.002352861698345e-05, "loss": 88.6338, "step": 123010 }, { "epoch": 0.49701636655260045, "grad_norm": 902.4027099609375, "learning_rate": 3.00201091073815e-05, "loss": 39.8632, "step": 123020 }, { "epoch": 0.49705676781796804, "grad_norm": 760.8033447265625, "learning_rate": 3.0016689499909945e-05, "loss": 37.9242, "step": 123030 }, { "epoch": 0.4970971690833357, "grad_norm": 612.1544189453125, "learning_rate": 3.0013269794635446e-05, "loss": 56.6241, "step": 123040 }, { "epoch": 0.4971375703487033, "grad_norm": 777.5595092773438, "learning_rate": 3.0009849991624662e-05, "loss": 82.5385, "step": 123050 }, { "epoch": 0.49717797161407096, "grad_norm": 1702.484619140625, "learning_rate": 3.0006430090944277e-05, "loss": 59.1968, "step": 123060 }, { "epoch": 0.4972183728794386, "grad_norm": 537.38134765625, "learning_rate": 3.000301009266096e-05, "loss": 46.0222, "step": 123070 }, { "epoch": 0.49725877414480624, "grad_norm": 305.5016784667969, "learning_rate": 2.9999589996841386e-05, "loss": 57.2696, "step": 123080 }, { "epoch": 0.4972991754101738, "grad_norm": 844.5637817382812, "learning_rate": 2.9996169803552233e-05, "loss": 56.0423, "step": 123090 }, { "epoch": 0.49733957667554146, "grad_norm": 883.1146850585938, "learning_rate": 2.9992749512860173e-05, "loss": 32.5165, "step": 123100 }, { "epoch": 0.4973799779409091, "grad_norm": 1908.025634765625, "learning_rate": 2.99893291248319e-05, "loss": 110.2845, "step": 123110 }, { "epoch": 0.49742037920627674, "grad_norm": 1058.1068115234375, "learning_rate": 2.9985908639534075e-05, "loss": 67.6201, "step": 123120 }, { "epoch": 0.4974607804716444, "grad_norm": 1846.614013671875, "learning_rate": 2.998248805703341e-05, "loss": 49.1556, "step": 123130 }, { "epoch": 0.497501181737012, "grad_norm": 1011.847412109375, "learning_rate": 2.9979067377396565e-05, "loss": 25.5965, "step": 123140 }, { "epoch": 0.49754158300237966, "grad_norm": 627.6660766601562, "learning_rate": 2.9975646600690234e-05, "loss": 34.3208, "step": 123150 }, { "epoch": 0.49758198426774725, "grad_norm": 770.2060546875, "learning_rate": 2.9972225726981113e-05, "loss": 52.1218, "step": 123160 }, { "epoch": 0.4976223855331149, "grad_norm": 398.234130859375, "learning_rate": 2.99688047563359e-05, "loss": 60.0683, "step": 123170 }, { "epoch": 0.4976627867984825, "grad_norm": 1440.9393310546875, "learning_rate": 2.996538368882127e-05, "loss": 94.9321, "step": 123180 }, { "epoch": 0.49770318806385017, "grad_norm": 580.39892578125, "learning_rate": 2.9961962524503927e-05, "loss": 46.6695, "step": 123190 }, { "epoch": 0.4977435893292178, "grad_norm": 402.80419921875, "learning_rate": 2.9958541263450584e-05, "loss": 58.4956, "step": 123200 }, { "epoch": 0.49778399059458545, "grad_norm": 709.5952758789062, "learning_rate": 2.9955119905727925e-05, "loss": 54.5307, "step": 123210 }, { "epoch": 0.49782439185995303, "grad_norm": 418.1485290527344, "learning_rate": 2.995169845140264e-05, "loss": 33.9692, "step": 123220 }, { "epoch": 0.49786479312532067, "grad_norm": 555.658203125, "learning_rate": 2.994827690054145e-05, "loss": 57.7686, "step": 123230 }, { "epoch": 0.4979051943906883, "grad_norm": 1826.1614990234375, "learning_rate": 2.9944855253211052e-05, "loss": 59.2031, "step": 123240 }, { "epoch": 0.49794559565605595, "grad_norm": 728.795166015625, "learning_rate": 2.9941433509478156e-05, "loss": 64.7671, "step": 123250 }, { "epoch": 0.4979859969214236, "grad_norm": 562.8650512695312, "learning_rate": 2.993801166940947e-05, "loss": 30.4642, "step": 123260 }, { "epoch": 0.49802639818679123, "grad_norm": 718.48681640625, "learning_rate": 2.9934589733071704e-05, "loss": 43.2636, "step": 123270 }, { "epoch": 0.4980667994521588, "grad_norm": 775.666748046875, "learning_rate": 2.9931167700531578e-05, "loss": 64.5946, "step": 123280 }, { "epoch": 0.49810720071752645, "grad_norm": 941.3685302734375, "learning_rate": 2.9927745571855786e-05, "loss": 68.1941, "step": 123290 }, { "epoch": 0.4981476019828941, "grad_norm": 806.8750610351562, "learning_rate": 2.9924323347111073e-05, "loss": 75.4404, "step": 123300 }, { "epoch": 0.49818800324826173, "grad_norm": 1879.86279296875, "learning_rate": 2.992090102636413e-05, "loss": 82.7253, "step": 123310 }, { "epoch": 0.4982284045136294, "grad_norm": 500.6525573730469, "learning_rate": 2.991747860968168e-05, "loss": 59.617, "step": 123320 }, { "epoch": 0.498268805778997, "grad_norm": 576.9788818359375, "learning_rate": 2.9914056097130473e-05, "loss": 37.7997, "step": 123330 }, { "epoch": 0.49830920704436465, "grad_norm": 1132.8011474609375, "learning_rate": 2.9910633488777196e-05, "loss": 60.6151, "step": 123340 }, { "epoch": 0.49834960830973224, "grad_norm": 1793.87109375, "learning_rate": 2.99072107846886e-05, "loss": 49.606, "step": 123350 }, { "epoch": 0.4983900095750999, "grad_norm": 226.36434936523438, "learning_rate": 2.9903787984931396e-05, "loss": 29.6053, "step": 123360 }, { "epoch": 0.4984304108404675, "grad_norm": 257.8052673339844, "learning_rate": 2.9900365089572328e-05, "loss": 42.242, "step": 123370 }, { "epoch": 0.49847081210583516, "grad_norm": 719.3456420898438, "learning_rate": 2.9896942098678122e-05, "loss": 46.6997, "step": 123380 }, { "epoch": 0.4985112133712028, "grad_norm": 641.3980102539062, "learning_rate": 2.9893519012315503e-05, "loss": 67.5891, "step": 123390 }, { "epoch": 0.49855161463657044, "grad_norm": 685.2548828125, "learning_rate": 2.9890095830551207e-05, "loss": 60.4898, "step": 123400 }, { "epoch": 0.498592015901938, "grad_norm": 632.763671875, "learning_rate": 2.9886672553451985e-05, "loss": 61.0664, "step": 123410 }, { "epoch": 0.49863241716730566, "grad_norm": 0.0, "learning_rate": 2.988324918108456e-05, "loss": 56.5434, "step": 123420 }, { "epoch": 0.4986728184326733, "grad_norm": 1752.1431884765625, "learning_rate": 2.9879825713515676e-05, "loss": 105.4006, "step": 123430 }, { "epoch": 0.49871321969804094, "grad_norm": 575.599853515625, "learning_rate": 2.9876402150812078e-05, "loss": 52.491, "step": 123440 }, { "epoch": 0.4987536209634086, "grad_norm": 620.3259887695312, "learning_rate": 2.9872978493040514e-05, "loss": 48.9634, "step": 123450 }, { "epoch": 0.4987940222287762, "grad_norm": 837.0609741210938, "learning_rate": 2.9869554740267724e-05, "loss": 46.9197, "step": 123460 }, { "epoch": 0.49883442349414386, "grad_norm": 231.35792541503906, "learning_rate": 2.986613089256046e-05, "loss": 62.4719, "step": 123470 }, { "epoch": 0.49887482475951145, "grad_norm": 848.2174072265625, "learning_rate": 2.9862706949985463e-05, "loss": 63.5692, "step": 123480 }, { "epoch": 0.4989152260248791, "grad_norm": 482.8079833984375, "learning_rate": 2.9859282912609497e-05, "loss": 48.4904, "step": 123490 }, { "epoch": 0.49895562729024673, "grad_norm": 786.4293212890625, "learning_rate": 2.98558587804993e-05, "loss": 51.905, "step": 123500 }, { "epoch": 0.49899602855561437, "grad_norm": 967.361083984375, "learning_rate": 2.9852434553721642e-05, "loss": 55.1196, "step": 123510 }, { "epoch": 0.499036429820982, "grad_norm": 691.3804321289062, "learning_rate": 2.984901023234327e-05, "loss": 63.2347, "step": 123520 }, { "epoch": 0.49907683108634965, "grad_norm": 2461.232421875, "learning_rate": 2.9845585816430955e-05, "loss": 86.1027, "step": 123530 }, { "epoch": 0.49911723235171723, "grad_norm": 625.51220703125, "learning_rate": 2.9842161306051446e-05, "loss": 59.4591, "step": 123540 }, { "epoch": 0.4991576336170849, "grad_norm": 937.5235595703125, "learning_rate": 2.9838736701271514e-05, "loss": 86.4743, "step": 123550 }, { "epoch": 0.4991980348824525, "grad_norm": 719.2523803710938, "learning_rate": 2.9835312002157913e-05, "loss": 57.3074, "step": 123560 }, { "epoch": 0.49923843614782015, "grad_norm": 1146.9417724609375, "learning_rate": 2.983188720877741e-05, "loss": 59.0976, "step": 123570 }, { "epoch": 0.4992788374131878, "grad_norm": 643.1884155273438, "learning_rate": 2.9828462321196788e-05, "loss": 59.2565, "step": 123580 }, { "epoch": 0.49931923867855543, "grad_norm": 717.5197143554688, "learning_rate": 2.9825037339482804e-05, "loss": 62.1937, "step": 123590 }, { "epoch": 0.499359639943923, "grad_norm": 911.0812377929688, "learning_rate": 2.9821612263702226e-05, "loss": 53.7806, "step": 123600 }, { "epoch": 0.49940004120929066, "grad_norm": 655.2685546875, "learning_rate": 2.981818709392184e-05, "loss": 76.8916, "step": 123610 }, { "epoch": 0.4994404424746583, "grad_norm": 694.6813354492188, "learning_rate": 2.981476183020842e-05, "loss": 54.2385, "step": 123620 }, { "epoch": 0.49948084374002594, "grad_norm": 277.7393798828125, "learning_rate": 2.9811336472628737e-05, "loss": 56.6968, "step": 123630 }, { "epoch": 0.4995212450053936, "grad_norm": 2142.0654296875, "learning_rate": 2.9807911021249573e-05, "loss": 71.4143, "step": 123640 }, { "epoch": 0.4995616462707612, "grad_norm": 685.448974609375, "learning_rate": 2.9804485476137706e-05, "loss": 41.1674, "step": 123650 }, { "epoch": 0.49960204753612886, "grad_norm": 0.0, "learning_rate": 2.9801059837359925e-05, "loss": 67.2189, "step": 123660 }, { "epoch": 0.49964244880149644, "grad_norm": 563.3724975585938, "learning_rate": 2.979763410498301e-05, "loss": 42.097, "step": 123670 }, { "epoch": 0.4996828500668641, "grad_norm": 823.16259765625, "learning_rate": 2.9794208279073743e-05, "loss": 46.5123, "step": 123680 }, { "epoch": 0.4997232513322317, "grad_norm": 396.4298095703125, "learning_rate": 2.9790782359698914e-05, "loss": 38.3301, "step": 123690 }, { "epoch": 0.49976365259759936, "grad_norm": 487.17205810546875, "learning_rate": 2.9787356346925327e-05, "loss": 54.4319, "step": 123700 }, { "epoch": 0.499804053862967, "grad_norm": 1401.6353759765625, "learning_rate": 2.9783930240819758e-05, "loss": 89.3769, "step": 123710 }, { "epoch": 0.49984445512833464, "grad_norm": 401.6158447265625, "learning_rate": 2.978050404144901e-05, "loss": 39.3288, "step": 123720 }, { "epoch": 0.4998848563937022, "grad_norm": 1638.397216796875, "learning_rate": 2.977707774887987e-05, "loss": 85.3148, "step": 123730 }, { "epoch": 0.49992525765906987, "grad_norm": 525.0819702148438, "learning_rate": 2.9773651363179144e-05, "loss": 62.3121, "step": 123740 }, { "epoch": 0.4999656589244375, "grad_norm": 576.019775390625, "learning_rate": 2.9770224884413623e-05, "loss": 34.6271, "step": 123750 }, { "epoch": 0.5000060601898051, "grad_norm": 596.5224609375, "learning_rate": 2.9766798312650112e-05, "loss": 45.5462, "step": 123760 }, { "epoch": 0.5000464614551727, "grad_norm": 1210.31005859375, "learning_rate": 2.976337164795541e-05, "loss": 43.6167, "step": 123770 }, { "epoch": 0.5000868627205404, "grad_norm": 1921.7060546875, "learning_rate": 2.975994489039634e-05, "loss": 52.5438, "step": 123780 }, { "epoch": 0.500127263985908, "grad_norm": 906.3182373046875, "learning_rate": 2.9756518040039682e-05, "loss": 64.2235, "step": 123790 }, { "epoch": 0.5001676652512757, "grad_norm": 724.0418701171875, "learning_rate": 2.9753091096952255e-05, "loss": 63.5248, "step": 123800 }, { "epoch": 0.5002080665166433, "grad_norm": 1490.1466064453125, "learning_rate": 2.9749664061200877e-05, "loss": 38.7024, "step": 123810 }, { "epoch": 0.5002484677820109, "grad_norm": 262.531494140625, "learning_rate": 2.9746236932852355e-05, "loss": 66.6208, "step": 123820 }, { "epoch": 0.5002888690473786, "grad_norm": 283.681640625, "learning_rate": 2.974280971197349e-05, "loss": 45.1853, "step": 123830 }, { "epoch": 0.5003292703127462, "grad_norm": 636.9791259765625, "learning_rate": 2.973938239863111e-05, "loss": 54.2482, "step": 123840 }, { "epoch": 0.5003696715781139, "grad_norm": 1471.0166015625, "learning_rate": 2.9735954992892033e-05, "loss": 73.4811, "step": 123850 }, { "epoch": 0.5004100728434815, "grad_norm": 870.6261596679688, "learning_rate": 2.9732527494823083e-05, "loss": 69.1818, "step": 123860 }, { "epoch": 0.5004504741088491, "grad_norm": 1015.466064453125, "learning_rate": 2.9729099904491058e-05, "loss": 65.9109, "step": 123870 }, { "epoch": 0.5004908753742168, "grad_norm": 769.5751953125, "learning_rate": 2.97256722219628e-05, "loss": 41.2782, "step": 123880 }, { "epoch": 0.5005312766395843, "grad_norm": 814.8590087890625, "learning_rate": 2.9722244447305135e-05, "loss": 84.8106, "step": 123890 }, { "epoch": 0.5005716779049519, "grad_norm": 476.1192932128906, "learning_rate": 2.9718816580584884e-05, "loss": 43.7513, "step": 123900 }, { "epoch": 0.5006120791703196, "grad_norm": 729.9635620117188, "learning_rate": 2.9715388621868873e-05, "loss": 44.9804, "step": 123910 }, { "epoch": 0.5006524804356872, "grad_norm": 657.6481323242188, "learning_rate": 2.971196057122393e-05, "loss": 68.3173, "step": 123920 }, { "epoch": 0.5006928817010549, "grad_norm": 620.4969482421875, "learning_rate": 2.9708532428716883e-05, "loss": 50.9084, "step": 123930 }, { "epoch": 0.5007332829664225, "grad_norm": 840.12890625, "learning_rate": 2.9705104194414586e-05, "loss": 63.0937, "step": 123940 }, { "epoch": 0.5007736842317901, "grad_norm": 1180.94970703125, "learning_rate": 2.9701675868383848e-05, "loss": 69.3689, "step": 123950 }, { "epoch": 0.5008140854971578, "grad_norm": 630.738037109375, "learning_rate": 2.9698247450691525e-05, "loss": 56.1486, "step": 123960 }, { "epoch": 0.5008544867625254, "grad_norm": 1723.3427734375, "learning_rate": 2.9694818941404444e-05, "loss": 52.5067, "step": 123970 }, { "epoch": 0.5008948880278931, "grad_norm": 1080.7830810546875, "learning_rate": 2.9691390340589466e-05, "loss": 63.3121, "step": 123980 }, { "epoch": 0.5009352892932607, "grad_norm": 428.3757629394531, "learning_rate": 2.9687961648313405e-05, "loss": 45.4763, "step": 123990 }, { "epoch": 0.5009756905586283, "grad_norm": 585.084228515625, "learning_rate": 2.9684532864643122e-05, "loss": 47.8538, "step": 124000 }, { "epoch": 0.501016091823996, "grad_norm": 951.522705078125, "learning_rate": 2.9681103989645453e-05, "loss": 64.0775, "step": 124010 }, { "epoch": 0.5010564930893635, "grad_norm": 414.710693359375, "learning_rate": 2.9677675023387258e-05, "loss": 35.3562, "step": 124020 }, { "epoch": 0.5010968943547311, "grad_norm": 1510.4676513671875, "learning_rate": 2.9674245965935378e-05, "loss": 85.7635, "step": 124030 }, { "epoch": 0.5011372956200988, "grad_norm": 555.8351440429688, "learning_rate": 2.9670816817356668e-05, "loss": 39.8303, "step": 124040 }, { "epoch": 0.5011776968854664, "grad_norm": 976.0780639648438, "learning_rate": 2.9667387577717976e-05, "loss": 61.9712, "step": 124050 }, { "epoch": 0.5012180981508341, "grad_norm": 1387.890869140625, "learning_rate": 2.9663958247086166e-05, "loss": 58.7585, "step": 124060 }, { "epoch": 0.5012584994162017, "grad_norm": 1036.8333740234375, "learning_rate": 2.966052882552809e-05, "loss": 44.4313, "step": 124070 }, { "epoch": 0.5012989006815693, "grad_norm": 1022.4268798828125, "learning_rate": 2.9657099313110593e-05, "loss": 57.0276, "step": 124080 }, { "epoch": 0.501339301946937, "grad_norm": 1398.85546875, "learning_rate": 2.9653669709900555e-05, "loss": 58.4346, "step": 124090 }, { "epoch": 0.5013797032123046, "grad_norm": 496.8158264160156, "learning_rate": 2.9650240015964825e-05, "loss": 62.5083, "step": 124100 }, { "epoch": 0.5014201044776723, "grad_norm": 911.7360229492188, "learning_rate": 2.964681023137028e-05, "loss": 52.9003, "step": 124110 }, { "epoch": 0.5014605057430399, "grad_norm": 932.399658203125, "learning_rate": 2.9643380356183775e-05, "loss": 56.112, "step": 124120 }, { "epoch": 0.5015009070084075, "grad_norm": 0.0, "learning_rate": 2.9639950390472177e-05, "loss": 45.8005, "step": 124130 }, { "epoch": 0.5015413082737752, "grad_norm": 789.1591796875, "learning_rate": 2.9636520334302354e-05, "loss": 73.2515, "step": 124140 }, { "epoch": 0.5015817095391427, "grad_norm": 1213.167236328125, "learning_rate": 2.9633090187741185e-05, "loss": 45.1217, "step": 124150 }, { "epoch": 0.5016221108045104, "grad_norm": 1115.8275146484375, "learning_rate": 2.9629659950855544e-05, "loss": 48.8063, "step": 124160 }, { "epoch": 0.501662512069878, "grad_norm": 1056.0667724609375, "learning_rate": 2.9626229623712288e-05, "loss": 59.1212, "step": 124170 }, { "epoch": 0.5017029133352456, "grad_norm": 269.9258728027344, "learning_rate": 2.9622799206378305e-05, "loss": 83.2247, "step": 124180 }, { "epoch": 0.5017433146006133, "grad_norm": 539.0936889648438, "learning_rate": 2.961936869892048e-05, "loss": 53.4077, "step": 124190 }, { "epoch": 0.5017837158659809, "grad_norm": 3030.404052734375, "learning_rate": 2.9615938101405676e-05, "loss": 76.7048, "step": 124200 }, { "epoch": 0.5018241171313486, "grad_norm": 1794.911376953125, "learning_rate": 2.961250741390078e-05, "loss": 60.769, "step": 124210 }, { "epoch": 0.5018645183967162, "grad_norm": 1027.421875, "learning_rate": 2.960907663647268e-05, "loss": 43.8524, "step": 124220 }, { "epoch": 0.5019049196620838, "grad_norm": 726.89697265625, "learning_rate": 2.9605645769188268e-05, "loss": 57.9974, "step": 124230 }, { "epoch": 0.5019453209274515, "grad_norm": 741.7726440429688, "learning_rate": 2.9602214812114415e-05, "loss": 45.0123, "step": 124240 }, { "epoch": 0.5019857221928191, "grad_norm": 572.0324096679688, "learning_rate": 2.9598783765318007e-05, "loss": 41.7237, "step": 124250 }, { "epoch": 0.5020261234581868, "grad_norm": 905.0762329101562, "learning_rate": 2.9595352628865947e-05, "loss": 60.8909, "step": 124260 }, { "epoch": 0.5020665247235544, "grad_norm": 418.3033447265625, "learning_rate": 2.9591921402825123e-05, "loss": 56.9024, "step": 124270 }, { "epoch": 0.5021069259889219, "grad_norm": 642.4514770507812, "learning_rate": 2.958849008726242e-05, "loss": 56.9975, "step": 124280 }, { "epoch": 0.5021473272542896, "grad_norm": 545.2382202148438, "learning_rate": 2.9585058682244748e-05, "loss": 54.7912, "step": 124290 }, { "epoch": 0.5021877285196572, "grad_norm": 1271.816162109375, "learning_rate": 2.9581627187838994e-05, "loss": 67.879, "step": 124300 }, { "epoch": 0.5022281297850248, "grad_norm": 404.8785095214844, "learning_rate": 2.9578195604112064e-05, "loss": 68.5172, "step": 124310 }, { "epoch": 0.5022685310503925, "grad_norm": 1688.64599609375, "learning_rate": 2.9574763931130843e-05, "loss": 53.8717, "step": 124320 }, { "epoch": 0.5023089323157601, "grad_norm": 574.3252563476562, "learning_rate": 2.9571332168962256e-05, "loss": 66.5893, "step": 124330 }, { "epoch": 0.5023493335811278, "grad_norm": 418.29864501953125, "learning_rate": 2.956790031767319e-05, "loss": 47.6842, "step": 124340 }, { "epoch": 0.5023897348464954, "grad_norm": 1550.780029296875, "learning_rate": 2.9564468377330556e-05, "loss": 66.8126, "step": 124350 }, { "epoch": 0.502430136111863, "grad_norm": 5627.90478515625, "learning_rate": 2.956103634800126e-05, "loss": 86.4269, "step": 124360 }, { "epoch": 0.5024705373772307, "grad_norm": 2004.539306640625, "learning_rate": 2.9557604229752212e-05, "loss": 57.9298, "step": 124370 }, { "epoch": 0.5025109386425983, "grad_norm": 531.3135986328125, "learning_rate": 2.9554172022650317e-05, "loss": 53.1231, "step": 124380 }, { "epoch": 0.502551339907966, "grad_norm": 1106.7730712890625, "learning_rate": 2.9550739726762507e-05, "loss": 62.664, "step": 124390 }, { "epoch": 0.5025917411733335, "grad_norm": 1226.38818359375, "learning_rate": 2.9547307342155673e-05, "loss": 94.7886, "step": 124400 }, { "epoch": 0.5026321424387011, "grad_norm": 608.1491088867188, "learning_rate": 2.9543874868896747e-05, "loss": 71.896, "step": 124410 }, { "epoch": 0.5026725437040688, "grad_norm": 240.57858276367188, "learning_rate": 2.954044230705264e-05, "loss": 96.0706, "step": 124420 }, { "epoch": 0.5027129449694364, "grad_norm": 596.6292114257812, "learning_rate": 2.9537009656690275e-05, "loss": 50.2395, "step": 124430 }, { "epoch": 0.502753346234804, "grad_norm": 742.9195556640625, "learning_rate": 2.953357691787656e-05, "loss": 39.4977, "step": 124440 }, { "epoch": 0.5027937475001717, "grad_norm": 715.0127563476562, "learning_rate": 2.9530144090678435e-05, "loss": 40.7906, "step": 124450 }, { "epoch": 0.5028341487655393, "grad_norm": 494.5076599121094, "learning_rate": 2.952671117516282e-05, "loss": 57.1695, "step": 124460 }, { "epoch": 0.502874550030907, "grad_norm": 766.6427001953125, "learning_rate": 2.952327817139664e-05, "loss": 62.9475, "step": 124470 }, { "epoch": 0.5029149512962746, "grad_norm": 670.2885131835938, "learning_rate": 2.9519845079446823e-05, "loss": 47.9745, "step": 124480 }, { "epoch": 0.5029553525616423, "grad_norm": 636.1236572265625, "learning_rate": 2.9516411899380296e-05, "loss": 57.113, "step": 124490 }, { "epoch": 0.5029957538270099, "grad_norm": 434.45477294921875, "learning_rate": 2.9512978631264006e-05, "loss": 44.9944, "step": 124500 }, { "epoch": 0.5030361550923775, "grad_norm": 687.5653686523438, "learning_rate": 2.950954527516487e-05, "loss": 50.9721, "step": 124510 }, { "epoch": 0.5030765563577452, "grad_norm": 451.1505126953125, "learning_rate": 2.9506111831149818e-05, "loss": 44.7546, "step": 124520 }, { "epoch": 0.5031169576231127, "grad_norm": 1212.3389892578125, "learning_rate": 2.9502678299285798e-05, "loss": 44.7764, "step": 124530 }, { "epoch": 0.5031573588884803, "grad_norm": 572.8779907226562, "learning_rate": 2.949924467963975e-05, "loss": 48.557, "step": 124540 }, { "epoch": 0.503197760153848, "grad_norm": 520.7388916015625, "learning_rate": 2.949581097227861e-05, "loss": 80.5346, "step": 124550 }, { "epoch": 0.5032381614192156, "grad_norm": 713.9329833984375, "learning_rate": 2.9492377177269315e-05, "loss": 32.9715, "step": 124560 }, { "epoch": 0.5032785626845833, "grad_norm": 978.047119140625, "learning_rate": 2.9488943294678818e-05, "loss": 52.3525, "step": 124570 }, { "epoch": 0.5033189639499509, "grad_norm": 1501.214599609375, "learning_rate": 2.948550932457407e-05, "loss": 55.6765, "step": 124580 }, { "epoch": 0.5033593652153185, "grad_norm": 625.777587890625, "learning_rate": 2.9482075267021995e-05, "loss": 46.8336, "step": 124590 }, { "epoch": 0.5033997664806862, "grad_norm": 601.8237915039062, "learning_rate": 2.9478641122089562e-05, "loss": 51.4297, "step": 124600 }, { "epoch": 0.5034401677460538, "grad_norm": 924.4441528320312, "learning_rate": 2.947520688984371e-05, "loss": 60.7378, "step": 124610 }, { "epoch": 0.5034805690114215, "grad_norm": 516.689453125, "learning_rate": 2.9471772570351398e-05, "loss": 31.4356, "step": 124620 }, { "epoch": 0.5035209702767891, "grad_norm": 1193.8798828125, "learning_rate": 2.9468338163679577e-05, "loss": 60.812, "step": 124630 }, { "epoch": 0.5035613715421567, "grad_norm": 663.6973266601562, "learning_rate": 2.9464903669895205e-05, "loss": 35.3694, "step": 124640 }, { "epoch": 0.5036017728075244, "grad_norm": 1353.3345947265625, "learning_rate": 2.9461469089065234e-05, "loss": 51.9646, "step": 124650 }, { "epoch": 0.5036421740728919, "grad_norm": 529.1851806640625, "learning_rate": 2.945803442125663e-05, "loss": 48.2301, "step": 124660 }, { "epoch": 0.5036825753382596, "grad_norm": 331.9324035644531, "learning_rate": 2.9454599666536347e-05, "loss": 54.444, "step": 124670 }, { "epoch": 0.5037229766036272, "grad_norm": 620.619384765625, "learning_rate": 2.9451164824971356e-05, "loss": 92.8183, "step": 124680 }, { "epoch": 0.5037633778689948, "grad_norm": 441.77734375, "learning_rate": 2.9447729896628612e-05, "loss": 47.082, "step": 124690 }, { "epoch": 0.5038037791343625, "grad_norm": 891.2379150390625, "learning_rate": 2.944429488157508e-05, "loss": 70.2383, "step": 124700 }, { "epoch": 0.5038441803997301, "grad_norm": 1449.03955078125, "learning_rate": 2.9440859779877728e-05, "loss": 57.3963, "step": 124710 }, { "epoch": 0.5038845816650978, "grad_norm": 1024.277587890625, "learning_rate": 2.943742459160354e-05, "loss": 48.08, "step": 124720 }, { "epoch": 0.5039249829304654, "grad_norm": 272.8819885253906, "learning_rate": 2.9433989316819467e-05, "loss": 65.6629, "step": 124730 }, { "epoch": 0.503965384195833, "grad_norm": 665.0719604492188, "learning_rate": 2.943055395559249e-05, "loss": 60.4535, "step": 124740 }, { "epoch": 0.5040057854612007, "grad_norm": 1616.3466796875, "learning_rate": 2.9427118507989586e-05, "loss": 88.4401, "step": 124750 }, { "epoch": 0.5040461867265683, "grad_norm": 1536.4730224609375, "learning_rate": 2.942368297407772e-05, "loss": 67.7257, "step": 124760 }, { "epoch": 0.504086587991936, "grad_norm": 258.504638671875, "learning_rate": 2.942024735392389e-05, "loss": 45.7606, "step": 124770 }, { "epoch": 0.5041269892573036, "grad_norm": 1484.1357421875, "learning_rate": 2.9416811647595048e-05, "loss": 65.3528, "step": 124780 }, { "epoch": 0.5041673905226711, "grad_norm": 933.3783569335938, "learning_rate": 2.94133758551582e-05, "loss": 30.6998, "step": 124790 }, { "epoch": 0.5042077917880388, "grad_norm": 437.1009216308594, "learning_rate": 2.9409939976680313e-05, "loss": 73.0693, "step": 124800 }, { "epoch": 0.5042481930534064, "grad_norm": 888.6316528320312, "learning_rate": 2.9406504012228375e-05, "loss": 38.9805, "step": 124810 }, { "epoch": 0.504288594318774, "grad_norm": 454.36627197265625, "learning_rate": 2.9403067961869367e-05, "loss": 32.8442, "step": 124820 }, { "epoch": 0.5043289955841417, "grad_norm": 648.9022827148438, "learning_rate": 2.9399631825670292e-05, "loss": 40.4288, "step": 124830 }, { "epoch": 0.5043693968495093, "grad_norm": 967.1868286132812, "learning_rate": 2.939619560369813e-05, "loss": 63.7735, "step": 124840 }, { "epoch": 0.504409798114877, "grad_norm": 1663.5084228515625, "learning_rate": 2.9392759296019867e-05, "loss": 57.5468, "step": 124850 }, { "epoch": 0.5044501993802446, "grad_norm": 1367.6448974609375, "learning_rate": 2.9389322902702497e-05, "loss": 61.3237, "step": 124860 }, { "epoch": 0.5044906006456122, "grad_norm": 756.0995483398438, "learning_rate": 2.9385886423813024e-05, "loss": 65.3766, "step": 124870 }, { "epoch": 0.5045310019109799, "grad_norm": 630.2203369140625, "learning_rate": 2.938244985941844e-05, "loss": 42.1302, "step": 124880 }, { "epoch": 0.5045714031763475, "grad_norm": 451.697998046875, "learning_rate": 2.9379013209585726e-05, "loss": 47.7857, "step": 124890 }, { "epoch": 0.5046118044417152, "grad_norm": 890.1043701171875, "learning_rate": 2.9375576474381905e-05, "loss": 51.9966, "step": 124900 }, { "epoch": 0.5046522057070827, "grad_norm": 901.4338989257812, "learning_rate": 2.9372139653873958e-05, "loss": 65.1977, "step": 124910 }, { "epoch": 0.5046926069724503, "grad_norm": 1069.64208984375, "learning_rate": 2.9368702748128912e-05, "loss": 55.9412, "step": 124920 }, { "epoch": 0.504733008237818, "grad_norm": 1394.1937255859375, "learning_rate": 2.9365265757213745e-05, "loss": 66.5137, "step": 124930 }, { "epoch": 0.5047734095031856, "grad_norm": 260.0003967285156, "learning_rate": 2.9361828681195484e-05, "loss": 46.3195, "step": 124940 }, { "epoch": 0.5048138107685533, "grad_norm": 521.545654296875, "learning_rate": 2.9358391520141122e-05, "loss": 54.6765, "step": 124950 }, { "epoch": 0.5048542120339209, "grad_norm": 676.0771484375, "learning_rate": 2.935495427411768e-05, "loss": 27.8798, "step": 124960 }, { "epoch": 0.5048946132992885, "grad_norm": 554.6319580078125, "learning_rate": 2.9351516943192155e-05, "loss": 39.7234, "step": 124970 }, { "epoch": 0.5049350145646562, "grad_norm": 1820.2799072265625, "learning_rate": 2.9348079527431567e-05, "loss": 47.2992, "step": 124980 }, { "epoch": 0.5049754158300238, "grad_norm": 730.9981689453125, "learning_rate": 2.9344642026902924e-05, "loss": 58.4385, "step": 124990 }, { "epoch": 0.5050158170953915, "grad_norm": 232.58773803710938, "learning_rate": 2.9341204441673266e-05, "loss": 51.1445, "step": 125000 }, { "epoch": 0.5050562183607591, "grad_norm": 744.6976318359375, "learning_rate": 2.9337766771809577e-05, "loss": 56.7142, "step": 125010 }, { "epoch": 0.5050966196261267, "grad_norm": 807.66748046875, "learning_rate": 2.9334329017378898e-05, "loss": 66.2732, "step": 125020 }, { "epoch": 0.5051370208914944, "grad_norm": 1156.4183349609375, "learning_rate": 2.933089117844824e-05, "loss": 53.764, "step": 125030 }, { "epoch": 0.5051774221568619, "grad_norm": 1409.17626953125, "learning_rate": 2.9327453255084638e-05, "loss": 51.1774, "step": 125040 }, { "epoch": 0.5052178234222295, "grad_norm": 1206.735107421875, "learning_rate": 2.9324015247355098e-05, "loss": 50.6902, "step": 125050 }, { "epoch": 0.5052582246875972, "grad_norm": 1590.0701904296875, "learning_rate": 2.932057715532665e-05, "loss": 61.7914, "step": 125060 }, { "epoch": 0.5052986259529648, "grad_norm": 905.3733520507812, "learning_rate": 2.9317138979066327e-05, "loss": 44.9613, "step": 125070 }, { "epoch": 0.5053390272183325, "grad_norm": 747.811279296875, "learning_rate": 2.9313700718641167e-05, "loss": 65.6674, "step": 125080 }, { "epoch": 0.5053794284837001, "grad_norm": 0.0, "learning_rate": 2.9310262374118185e-05, "loss": 45.0609, "step": 125090 }, { "epoch": 0.5054198297490677, "grad_norm": 528.493408203125, "learning_rate": 2.9306823945564422e-05, "loss": 58.0582, "step": 125100 }, { "epoch": 0.5054602310144354, "grad_norm": 680.7048950195312, "learning_rate": 2.9303385433046902e-05, "loss": 36.582, "step": 125110 }, { "epoch": 0.505500632279803, "grad_norm": 767.6009521484375, "learning_rate": 2.9299946836632673e-05, "loss": 40.1557, "step": 125120 }, { "epoch": 0.5055410335451707, "grad_norm": 1023.16015625, "learning_rate": 2.929650815638877e-05, "loss": 47.9998, "step": 125130 }, { "epoch": 0.5055814348105383, "grad_norm": 588.2609252929688, "learning_rate": 2.9293069392382224e-05, "loss": 50.6491, "step": 125140 }, { "epoch": 0.5056218360759059, "grad_norm": 831.1854858398438, "learning_rate": 2.9289630544680075e-05, "loss": 68.4844, "step": 125150 }, { "epoch": 0.5056622373412736, "grad_norm": 1181.1595458984375, "learning_rate": 2.9286191613349374e-05, "loss": 84.0284, "step": 125160 }, { "epoch": 0.5057026386066411, "grad_norm": 435.8255920410156, "learning_rate": 2.9282752598457165e-05, "loss": 58.2437, "step": 125170 }, { "epoch": 0.5057430398720087, "grad_norm": 727.278564453125, "learning_rate": 2.9279313500070483e-05, "loss": 66.3914, "step": 125180 }, { "epoch": 0.5057834411373764, "grad_norm": 302.980224609375, "learning_rate": 2.927587431825639e-05, "loss": 44.221, "step": 125190 }, { "epoch": 0.505823842402744, "grad_norm": 758.2953491210938, "learning_rate": 2.9272435053081922e-05, "loss": 65.575, "step": 125200 }, { "epoch": 0.5058642436681117, "grad_norm": 181.88172912597656, "learning_rate": 2.9268995704614132e-05, "loss": 40.2345, "step": 125210 }, { "epoch": 0.5059046449334793, "grad_norm": 920.4985961914062, "learning_rate": 2.926555627292007e-05, "loss": 51.6828, "step": 125220 }, { "epoch": 0.505945046198847, "grad_norm": 1029.88330078125, "learning_rate": 2.9262116758066793e-05, "loss": 64.6292, "step": 125230 }, { "epoch": 0.5059854474642146, "grad_norm": 733.74951171875, "learning_rate": 2.9258677160121352e-05, "loss": 53.2531, "step": 125240 }, { "epoch": 0.5060258487295822, "grad_norm": 2031.1961669921875, "learning_rate": 2.9255237479150816e-05, "loss": 62.1065, "step": 125250 }, { "epoch": 0.5060662499949499, "grad_norm": 900.5405883789062, "learning_rate": 2.925179771522223e-05, "loss": 52.0671, "step": 125260 }, { "epoch": 0.5061066512603175, "grad_norm": 554.5584106445312, "learning_rate": 2.924835786840266e-05, "loss": 38.7164, "step": 125270 }, { "epoch": 0.5061470525256851, "grad_norm": 386.526123046875, "learning_rate": 2.9244917938759163e-05, "loss": 54.9804, "step": 125280 }, { "epoch": 0.5061874537910528, "grad_norm": 616.0713500976562, "learning_rate": 2.9241477926358818e-05, "loss": 56.0183, "step": 125290 }, { "epoch": 0.5062278550564203, "grad_norm": 831.8729858398438, "learning_rate": 2.923803783126866e-05, "loss": 46.269, "step": 125300 }, { "epoch": 0.506268256321788, "grad_norm": 506.3004455566406, "learning_rate": 2.923459765355578e-05, "loss": 44.9364, "step": 125310 }, { "epoch": 0.5063086575871556, "grad_norm": 662.8009033203125, "learning_rate": 2.9231157393287234e-05, "loss": 44.294, "step": 125320 }, { "epoch": 0.5063490588525232, "grad_norm": 1044.3468017578125, "learning_rate": 2.9227717050530107e-05, "loss": 59.7929, "step": 125330 }, { "epoch": 0.5063894601178909, "grad_norm": 836.830322265625, "learning_rate": 2.922427662535145e-05, "loss": 50.6092, "step": 125340 }, { "epoch": 0.5064298613832585, "grad_norm": 848.728759765625, "learning_rate": 2.9220836117818344e-05, "loss": 46.637, "step": 125350 }, { "epoch": 0.5064702626486262, "grad_norm": 456.876953125, "learning_rate": 2.9217395527997875e-05, "loss": 45.2742, "step": 125360 }, { "epoch": 0.5065106639139938, "grad_norm": 780.4397583007812, "learning_rate": 2.921395485595711e-05, "loss": 47.9569, "step": 125370 }, { "epoch": 0.5065510651793614, "grad_norm": 851.5180053710938, "learning_rate": 2.9210514101763113e-05, "loss": 74.7248, "step": 125380 }, { "epoch": 0.5065914664447291, "grad_norm": 776.1827392578125, "learning_rate": 2.9207073265482982e-05, "loss": 49.9978, "step": 125390 }, { "epoch": 0.5066318677100967, "grad_norm": 528.63330078125, "learning_rate": 2.920363234718379e-05, "loss": 73.2618, "step": 125400 }, { "epoch": 0.5066722689754644, "grad_norm": 1279.852294921875, "learning_rate": 2.9200191346932627e-05, "loss": 48.9025, "step": 125410 }, { "epoch": 0.506712670240832, "grad_norm": 1353.2442626953125, "learning_rate": 2.919675026479656e-05, "loss": 51.6571, "step": 125420 }, { "epoch": 0.5067530715061995, "grad_norm": 1191.01416015625, "learning_rate": 2.9193309100842693e-05, "loss": 46.1766, "step": 125430 }, { "epoch": 0.5067934727715672, "grad_norm": 1029.6173095703125, "learning_rate": 2.9189867855138103e-05, "loss": 58.42, "step": 125440 }, { "epoch": 0.5068338740369348, "grad_norm": 575.3421630859375, "learning_rate": 2.918642652774989e-05, "loss": 49.3698, "step": 125450 }, { "epoch": 0.5068742753023024, "grad_norm": 710.7742919921875, "learning_rate": 2.9182985118745136e-05, "loss": 46.0317, "step": 125460 }, { "epoch": 0.5069146765676701, "grad_norm": 794.8711547851562, "learning_rate": 2.9179543628190925e-05, "loss": 61.0076, "step": 125470 }, { "epoch": 0.5069550778330377, "grad_norm": 621.3440551757812, "learning_rate": 2.9176102056154363e-05, "loss": 58.3854, "step": 125480 }, { "epoch": 0.5069954790984054, "grad_norm": 617.9365844726562, "learning_rate": 2.9172660402702546e-05, "loss": 47.3791, "step": 125490 }, { "epoch": 0.507035880363773, "grad_norm": 902.1868286132812, "learning_rate": 2.916921866790256e-05, "loss": 49.7716, "step": 125500 }, { "epoch": 0.5070762816291406, "grad_norm": 882.5235595703125, "learning_rate": 2.9165776851821508e-05, "loss": 44.4464, "step": 125510 }, { "epoch": 0.5071166828945083, "grad_norm": 893.1301879882812, "learning_rate": 2.9162334954526493e-05, "loss": 40.7801, "step": 125520 }, { "epoch": 0.5071570841598759, "grad_norm": 457.0363464355469, "learning_rate": 2.915889297608462e-05, "loss": 43.9219, "step": 125530 }, { "epoch": 0.5071974854252436, "grad_norm": 504.5679626464844, "learning_rate": 2.9155450916562994e-05, "loss": 33.9357, "step": 125540 }, { "epoch": 0.5072378866906111, "grad_norm": 866.373046875, "learning_rate": 2.91520087760287e-05, "loss": 72.3561, "step": 125550 }, { "epoch": 0.5072782879559787, "grad_norm": 599.7020874023438, "learning_rate": 2.9148566554548857e-05, "loss": 53.9173, "step": 125560 }, { "epoch": 0.5073186892213464, "grad_norm": 567.0419921875, "learning_rate": 2.914512425219058e-05, "loss": 71.651, "step": 125570 }, { "epoch": 0.507359090486714, "grad_norm": 628.0565795898438, "learning_rate": 2.914168186902097e-05, "loss": 56.5678, "step": 125580 }, { "epoch": 0.5073994917520817, "grad_norm": 528.4154663085938, "learning_rate": 2.9138239405107136e-05, "loss": 42.4691, "step": 125590 }, { "epoch": 0.5074398930174493, "grad_norm": 829.2301635742188, "learning_rate": 2.9134796860516194e-05, "loss": 49.0979, "step": 125600 }, { "epoch": 0.5074802942828169, "grad_norm": 1135.746826171875, "learning_rate": 2.9131354235315268e-05, "loss": 60.1448, "step": 125610 }, { "epoch": 0.5075206955481846, "grad_norm": 402.7625427246094, "learning_rate": 2.912791152957145e-05, "loss": 69.6794, "step": 125620 }, { "epoch": 0.5075610968135522, "grad_norm": 654.6475219726562, "learning_rate": 2.9124468743351884e-05, "loss": 58.2971, "step": 125630 }, { "epoch": 0.5076014980789199, "grad_norm": 552.7247924804688, "learning_rate": 2.9121025876723674e-05, "loss": 66.4654, "step": 125640 }, { "epoch": 0.5076418993442875, "grad_norm": 887.902099609375, "learning_rate": 2.9117582929753932e-05, "loss": 76.861, "step": 125650 }, { "epoch": 0.5076823006096551, "grad_norm": 966.7318725585938, "learning_rate": 2.9114139902509807e-05, "loss": 57.0813, "step": 125660 }, { "epoch": 0.5077227018750228, "grad_norm": 1011.2301635742188, "learning_rate": 2.9110696795058394e-05, "loss": 45.7711, "step": 125670 }, { "epoch": 0.5077631031403903, "grad_norm": 557.7918090820312, "learning_rate": 2.9107253607466832e-05, "loss": 55.3871, "step": 125680 }, { "epoch": 0.5078035044057579, "grad_norm": 3118.763916015625, "learning_rate": 2.910381033980225e-05, "loss": 50.6585, "step": 125690 }, { "epoch": 0.5078439056711256, "grad_norm": 863.99658203125, "learning_rate": 2.910036699213178e-05, "loss": 68.7242, "step": 125700 }, { "epoch": 0.5078843069364932, "grad_norm": 1471.3134765625, "learning_rate": 2.909692356452254e-05, "loss": 52.9448, "step": 125710 }, { "epoch": 0.5079247082018609, "grad_norm": 553.6021118164062, "learning_rate": 2.9093480057041662e-05, "loss": 55.443, "step": 125720 }, { "epoch": 0.5079651094672285, "grad_norm": 329.5192565917969, "learning_rate": 2.9090036469756276e-05, "loss": 67.455, "step": 125730 }, { "epoch": 0.5080055107325961, "grad_norm": 1218.230712890625, "learning_rate": 2.9086592802733536e-05, "loss": 84.1644, "step": 125740 }, { "epoch": 0.5080459119979638, "grad_norm": 140.51890563964844, "learning_rate": 2.908314905604056e-05, "loss": 58.1322, "step": 125750 }, { "epoch": 0.5080863132633314, "grad_norm": 557.7669677734375, "learning_rate": 2.9079705229744493e-05, "loss": 58.6665, "step": 125760 }, { "epoch": 0.5081267145286991, "grad_norm": 357.09234619140625, "learning_rate": 2.907626132391246e-05, "loss": 33.4991, "step": 125770 }, { "epoch": 0.5081671157940667, "grad_norm": 404.6373596191406, "learning_rate": 2.9072817338611636e-05, "loss": 89.8573, "step": 125780 }, { "epoch": 0.5082075170594343, "grad_norm": 606.2252807617188, "learning_rate": 2.9069373273909123e-05, "loss": 61.2002, "step": 125790 }, { "epoch": 0.508247918324802, "grad_norm": 458.4631652832031, "learning_rate": 2.9065929129872094e-05, "loss": 46.1727, "step": 125800 }, { "epoch": 0.5082883195901695, "grad_norm": 901.8555908203125, "learning_rate": 2.906248490656768e-05, "loss": 48.2973, "step": 125810 }, { "epoch": 0.5083287208555372, "grad_norm": 676.2325439453125, "learning_rate": 2.905904060406303e-05, "loss": 57.2065, "step": 125820 }, { "epoch": 0.5083691221209048, "grad_norm": 1410.12158203125, "learning_rate": 2.905559622242529e-05, "loss": 45.1312, "step": 125830 }, { "epoch": 0.5084095233862724, "grad_norm": 386.5525207519531, "learning_rate": 2.9052151761721617e-05, "loss": 73.7721, "step": 125840 }, { "epoch": 0.5084499246516401, "grad_norm": 1108.298583984375, "learning_rate": 2.9048707222019154e-05, "loss": 66.7324, "step": 125850 }, { "epoch": 0.5084903259170077, "grad_norm": 752.5873413085938, "learning_rate": 2.904526260338507e-05, "loss": 52.5618, "step": 125860 }, { "epoch": 0.5085307271823754, "grad_norm": 393.9751281738281, "learning_rate": 2.9041817905886504e-05, "loss": 55.0743, "step": 125870 }, { "epoch": 0.508571128447743, "grad_norm": 509.97735595703125, "learning_rate": 2.9038373129590622e-05, "loss": 51.6777, "step": 125880 }, { "epoch": 0.5086115297131106, "grad_norm": 1160.9713134765625, "learning_rate": 2.903492827456457e-05, "loss": 59.5145, "step": 125890 }, { "epoch": 0.5086519309784783, "grad_norm": 510.33514404296875, "learning_rate": 2.903148334087552e-05, "loss": 34.4883, "step": 125900 }, { "epoch": 0.5086923322438459, "grad_norm": 778.451171875, "learning_rate": 2.9028038328590617e-05, "loss": 57.813, "step": 125910 }, { "epoch": 0.5087327335092136, "grad_norm": 604.14990234375, "learning_rate": 2.9024593237777037e-05, "loss": 44.8038, "step": 125920 }, { "epoch": 0.5087731347745812, "grad_norm": 1083.8634033203125, "learning_rate": 2.902114806850194e-05, "loss": 66.7281, "step": 125930 }, { "epoch": 0.5088135360399487, "grad_norm": 626.0941772460938, "learning_rate": 2.9017702820832498e-05, "loss": 64.0597, "step": 125940 }, { "epoch": 0.5088539373053164, "grad_norm": 2549.89453125, "learning_rate": 2.9014257494835862e-05, "loss": 83.0733, "step": 125950 }, { "epoch": 0.508894338570684, "grad_norm": 271.8528137207031, "learning_rate": 2.901081209057921e-05, "loss": 72.4616, "step": 125960 }, { "epoch": 0.5089347398360516, "grad_norm": 418.9285583496094, "learning_rate": 2.900736660812972e-05, "loss": 32.0242, "step": 125970 }, { "epoch": 0.5089751411014193, "grad_norm": 748.1174926757812, "learning_rate": 2.900392104755455e-05, "loss": 62.5196, "step": 125980 }, { "epoch": 0.5090155423667869, "grad_norm": 1057.87060546875, "learning_rate": 2.900047540892088e-05, "loss": 66.0844, "step": 125990 }, { "epoch": 0.5090559436321546, "grad_norm": 1053.06689453125, "learning_rate": 2.8997029692295874e-05, "loss": 59.8133, "step": 126000 }, { "epoch": 0.5090963448975222, "grad_norm": 1008.4521484375, "learning_rate": 2.8993583897746717e-05, "loss": 67.719, "step": 126010 }, { "epoch": 0.5091367461628898, "grad_norm": 400.8494567871094, "learning_rate": 2.8990138025340596e-05, "loss": 55.9787, "step": 126020 }, { "epoch": 0.5091771474282575, "grad_norm": 776.619140625, "learning_rate": 2.8986692075144673e-05, "loss": 53.733, "step": 126030 }, { "epoch": 0.5092175486936251, "grad_norm": 344.3357238769531, "learning_rate": 2.8983246047226135e-05, "loss": 49.7655, "step": 126040 }, { "epoch": 0.5092579499589928, "grad_norm": 461.5896911621094, "learning_rate": 2.897979994165217e-05, "loss": 66.4076, "step": 126050 }, { "epoch": 0.5092983512243604, "grad_norm": 0.0, "learning_rate": 2.8976353758489955e-05, "loss": 44.4132, "step": 126060 }, { "epoch": 0.5093387524897279, "grad_norm": 446.7651062011719, "learning_rate": 2.897290749780667e-05, "loss": 48.4824, "step": 126070 }, { "epoch": 0.5093791537550956, "grad_norm": 1131.740966796875, "learning_rate": 2.8969461159669513e-05, "loss": 45.0098, "step": 126080 }, { "epoch": 0.5094195550204632, "grad_norm": 1675.897705078125, "learning_rate": 2.8966014744145663e-05, "loss": 93.9438, "step": 126090 }, { "epoch": 0.5094599562858309, "grad_norm": 665.18994140625, "learning_rate": 2.8962568251302324e-05, "loss": 52.6255, "step": 126100 }, { "epoch": 0.5095003575511985, "grad_norm": 404.408203125, "learning_rate": 2.895912168120667e-05, "loss": 57.1605, "step": 126110 }, { "epoch": 0.5095407588165661, "grad_norm": 690.7555541992188, "learning_rate": 2.8955675033925895e-05, "loss": 63.0719, "step": 126120 }, { "epoch": 0.5095811600819338, "grad_norm": 937.3909912109375, "learning_rate": 2.89522283095272e-05, "loss": 61.4134, "step": 126130 }, { "epoch": 0.5096215613473014, "grad_norm": 1157.58642578125, "learning_rate": 2.8948781508077786e-05, "loss": 43.2224, "step": 126140 }, { "epoch": 0.509661962612669, "grad_norm": 766.99609375, "learning_rate": 2.894533462964485e-05, "loss": 45.9795, "step": 126150 }, { "epoch": 0.5097023638780367, "grad_norm": 1816.978515625, "learning_rate": 2.894188767429557e-05, "loss": 53.0026, "step": 126160 }, { "epoch": 0.5097427651434043, "grad_norm": 813.2989501953125, "learning_rate": 2.8938440642097164e-05, "loss": 37.4644, "step": 126170 }, { "epoch": 0.509783166408772, "grad_norm": 691.785888671875, "learning_rate": 2.893499353311683e-05, "loss": 51.4019, "step": 126180 }, { "epoch": 0.5098235676741395, "grad_norm": 1490.9388427734375, "learning_rate": 2.8931546347421773e-05, "loss": 45.6698, "step": 126190 }, { "epoch": 0.5098639689395071, "grad_norm": 548.1686401367188, "learning_rate": 2.8928099085079197e-05, "loss": 41.4219, "step": 126200 }, { "epoch": 0.5099043702048748, "grad_norm": 1110.064697265625, "learning_rate": 2.89246517461563e-05, "loss": 71.6463, "step": 126210 }, { "epoch": 0.5099447714702424, "grad_norm": 549.9130859375, "learning_rate": 2.892120433072031e-05, "loss": 45.8994, "step": 126220 }, { "epoch": 0.5099851727356101, "grad_norm": 4628.6025390625, "learning_rate": 2.8917756838838418e-05, "loss": 75.2251, "step": 126230 }, { "epoch": 0.5100255740009777, "grad_norm": 518.2777099609375, "learning_rate": 2.8914309270577834e-05, "loss": 48.5844, "step": 126240 }, { "epoch": 0.5100659752663453, "grad_norm": 677.185302734375, "learning_rate": 2.8910861626005776e-05, "loss": 61.1778, "step": 126250 }, { "epoch": 0.510106376531713, "grad_norm": 1070.945556640625, "learning_rate": 2.8907413905189456e-05, "loss": 43.0625, "step": 126260 }, { "epoch": 0.5101467777970806, "grad_norm": 1414.0343017578125, "learning_rate": 2.8903966108196096e-05, "loss": 69.5534, "step": 126270 }, { "epoch": 0.5101871790624483, "grad_norm": 1134.17041015625, "learning_rate": 2.8900518235092905e-05, "loss": 73.4212, "step": 126280 }, { "epoch": 0.5102275803278159, "grad_norm": 970.3851318359375, "learning_rate": 2.8897070285947098e-05, "loss": 58.0343, "step": 126290 }, { "epoch": 0.5102679815931835, "grad_norm": 340.81231689453125, "learning_rate": 2.8893622260825904e-05, "loss": 76.9128, "step": 126300 }, { "epoch": 0.5103083828585512, "grad_norm": 637.6676025390625, "learning_rate": 2.889017415979654e-05, "loss": 68.674, "step": 126310 }, { "epoch": 0.5103487841239187, "grad_norm": 643.8228759765625, "learning_rate": 2.8886725982926232e-05, "loss": 81.6261, "step": 126320 }, { "epoch": 0.5103891853892863, "grad_norm": 378.8822021484375, "learning_rate": 2.8883277730282194e-05, "loss": 46.2597, "step": 126330 }, { "epoch": 0.510429586654654, "grad_norm": 1168.7510986328125, "learning_rate": 2.8879829401931652e-05, "loss": 71.7926, "step": 126340 }, { "epoch": 0.5104699879200216, "grad_norm": 522.0606689453125, "learning_rate": 2.8876380997941847e-05, "loss": 54.6862, "step": 126350 }, { "epoch": 0.5105103891853893, "grad_norm": 396.95263671875, "learning_rate": 2.8872932518379997e-05, "loss": 44.9638, "step": 126360 }, { "epoch": 0.5105507904507569, "grad_norm": 794.6138305664062, "learning_rate": 2.886948396331333e-05, "loss": 76.7646, "step": 126370 }, { "epoch": 0.5105911917161245, "grad_norm": 1046.785888671875, "learning_rate": 2.8866035332809084e-05, "loss": 53.4936, "step": 126380 }, { "epoch": 0.5106315929814922, "grad_norm": 869.3177490234375, "learning_rate": 2.886258662693449e-05, "loss": 43.9115, "step": 126390 }, { "epoch": 0.5106719942468598, "grad_norm": 724.6814575195312, "learning_rate": 2.8859137845756784e-05, "loss": 45.5164, "step": 126400 }, { "epoch": 0.5107123955122275, "grad_norm": 411.1709289550781, "learning_rate": 2.8855688989343193e-05, "loss": 57.5156, "step": 126410 }, { "epoch": 0.5107527967775951, "grad_norm": 326.8053283691406, "learning_rate": 2.885224005776096e-05, "loss": 51.5223, "step": 126420 }, { "epoch": 0.5107931980429627, "grad_norm": 843.7996215820312, "learning_rate": 2.884879105107733e-05, "loss": 44.6353, "step": 126430 }, { "epoch": 0.5108335993083304, "grad_norm": 1049.7962646484375, "learning_rate": 2.884534196935953e-05, "loss": 58.139, "step": 126440 }, { "epoch": 0.5108740005736979, "grad_norm": 2668.488037109375, "learning_rate": 2.8841892812674808e-05, "loss": 37.7585, "step": 126450 }, { "epoch": 0.5109144018390656, "grad_norm": 1897.6781005859375, "learning_rate": 2.8838443581090412e-05, "loss": 50.773, "step": 126460 }, { "epoch": 0.5109548031044332, "grad_norm": 556.2504272460938, "learning_rate": 2.8834994274673582e-05, "loss": 56.0956, "step": 126470 }, { "epoch": 0.5109952043698008, "grad_norm": 292.6050109863281, "learning_rate": 2.8831544893491563e-05, "loss": 49.7071, "step": 126480 }, { "epoch": 0.5110356056351685, "grad_norm": 442.0137939453125, "learning_rate": 2.882809543761161e-05, "loss": 51.5451, "step": 126490 }, { "epoch": 0.5110760069005361, "grad_norm": 810.0142211914062, "learning_rate": 2.8824645907100954e-05, "loss": 55.3643, "step": 126500 }, { "epoch": 0.5111164081659038, "grad_norm": 1782.1546630859375, "learning_rate": 2.8821196302026863e-05, "loss": 68.3655, "step": 126510 }, { "epoch": 0.5111568094312714, "grad_norm": 900.7822875976562, "learning_rate": 2.881774662245658e-05, "loss": 48.6357, "step": 126520 }, { "epoch": 0.511197210696639, "grad_norm": 648.9537963867188, "learning_rate": 2.8814296868457364e-05, "loss": 51.299, "step": 126530 }, { "epoch": 0.5112376119620067, "grad_norm": 949.2380981445312, "learning_rate": 2.8810847040096467e-05, "loss": 66.241, "step": 126540 }, { "epoch": 0.5112780132273743, "grad_norm": 227.76019287109375, "learning_rate": 2.8807397137441145e-05, "loss": 32.5676, "step": 126550 }, { "epoch": 0.511318414492742, "grad_norm": 545.3878173828125, "learning_rate": 2.8803947160558652e-05, "loss": 77.758, "step": 126560 }, { "epoch": 0.5113588157581096, "grad_norm": 621.6892700195312, "learning_rate": 2.8800497109516263e-05, "loss": 58.1789, "step": 126570 }, { "epoch": 0.5113992170234771, "grad_norm": 1468.47509765625, "learning_rate": 2.8797046984381208e-05, "loss": 72.7109, "step": 126580 }, { "epoch": 0.5114396182888448, "grad_norm": 406.4592590332031, "learning_rate": 2.8793596785220783e-05, "loss": 64.9677, "step": 126590 }, { "epoch": 0.5114800195542124, "grad_norm": 290.8427429199219, "learning_rate": 2.879014651210223e-05, "loss": 47.5232, "step": 126600 }, { "epoch": 0.51152042081958, "grad_norm": 1523.126220703125, "learning_rate": 2.8786696165092812e-05, "loss": 42.1534, "step": 126610 }, { "epoch": 0.5115608220849477, "grad_norm": 1009.9312133789062, "learning_rate": 2.8783245744259806e-05, "loss": 61.5785, "step": 126620 }, { "epoch": 0.5116012233503153, "grad_norm": 527.927490234375, "learning_rate": 2.877979524967048e-05, "loss": 61.9816, "step": 126630 }, { "epoch": 0.511641624615683, "grad_norm": 575.1068725585938, "learning_rate": 2.8776344681392105e-05, "loss": 60.9859, "step": 126640 }, { "epoch": 0.5116820258810506, "grad_norm": 338.4177551269531, "learning_rate": 2.877289403949194e-05, "loss": 54.107, "step": 126650 }, { "epoch": 0.5117224271464182, "grad_norm": 354.56005859375, "learning_rate": 2.876944332403726e-05, "loss": 54.2842, "step": 126660 }, { "epoch": 0.5117628284117859, "grad_norm": 989.342041015625, "learning_rate": 2.8765992535095345e-05, "loss": 100.1021, "step": 126670 }, { "epoch": 0.5118032296771535, "grad_norm": 547.06982421875, "learning_rate": 2.8762541672733472e-05, "loss": 48.5291, "step": 126680 }, { "epoch": 0.5118436309425212, "grad_norm": 1068.9014892578125, "learning_rate": 2.8759090737018902e-05, "loss": 58.7899, "step": 126690 }, { "epoch": 0.5118840322078888, "grad_norm": 1199.883544921875, "learning_rate": 2.875563972801893e-05, "loss": 43.4314, "step": 126700 }, { "epoch": 0.5119244334732563, "grad_norm": 616.8649291992188, "learning_rate": 2.8752188645800822e-05, "loss": 46.0097, "step": 126710 }, { "epoch": 0.511964834738624, "grad_norm": 2177.439697265625, "learning_rate": 2.874873749043187e-05, "loss": 33.2443, "step": 126720 }, { "epoch": 0.5120052360039916, "grad_norm": 3227.6328125, "learning_rate": 2.8745286261979348e-05, "loss": 64.0699, "step": 126730 }, { "epoch": 0.5120456372693593, "grad_norm": 754.943115234375, "learning_rate": 2.874183496051055e-05, "loss": 40.789, "step": 126740 }, { "epoch": 0.5120860385347269, "grad_norm": 560.1809692382812, "learning_rate": 2.8738383586092745e-05, "loss": 42.9098, "step": 126750 }, { "epoch": 0.5121264398000945, "grad_norm": 1200.2210693359375, "learning_rate": 2.8734932138793225e-05, "loss": 58.1281, "step": 126760 }, { "epoch": 0.5121668410654622, "grad_norm": 569.3571166992188, "learning_rate": 2.8731480618679285e-05, "loss": 71.4217, "step": 126770 }, { "epoch": 0.5122072423308298, "grad_norm": 1209.4700927734375, "learning_rate": 2.8728029025818204e-05, "loss": 51.1106, "step": 126780 }, { "epoch": 0.5122476435961975, "grad_norm": 541.9495849609375, "learning_rate": 2.872457736027728e-05, "loss": 42.31, "step": 126790 }, { "epoch": 0.5122880448615651, "grad_norm": 1008.5130004882812, "learning_rate": 2.8721125622123806e-05, "loss": 101.0686, "step": 126800 }, { "epoch": 0.5123284461269327, "grad_norm": 794.3250732421875, "learning_rate": 2.8717673811425072e-05, "loss": 56.3601, "step": 126810 }, { "epoch": 0.5123688473923004, "grad_norm": 796.3021240234375, "learning_rate": 2.8714221928248368e-05, "loss": 50.3433, "step": 126820 }, { "epoch": 0.5124092486576679, "grad_norm": 536.2626953125, "learning_rate": 2.8710769972661e-05, "loss": 53.0116, "step": 126830 }, { "epoch": 0.5124496499230355, "grad_norm": 816.8209228515625, "learning_rate": 2.8707317944730268e-05, "loss": 42.8337, "step": 126840 }, { "epoch": 0.5124900511884032, "grad_norm": 1031.8271484375, "learning_rate": 2.8703865844523452e-05, "loss": 47.6963, "step": 126850 }, { "epoch": 0.5125304524537708, "grad_norm": 1931.7418212890625, "learning_rate": 2.8700413672107866e-05, "loss": 52.6271, "step": 126860 }, { "epoch": 0.5125708537191385, "grad_norm": 747.9002075195312, "learning_rate": 2.869696142755081e-05, "loss": 57.4997, "step": 126870 }, { "epoch": 0.5126112549845061, "grad_norm": 460.6169128417969, "learning_rate": 2.8693509110919598e-05, "loss": 49.3172, "step": 126880 }, { "epoch": 0.5126516562498737, "grad_norm": 395.9313659667969, "learning_rate": 2.8690056722281513e-05, "loss": 50.4048, "step": 126890 }, { "epoch": 0.5126920575152414, "grad_norm": 673.880859375, "learning_rate": 2.8686604261703875e-05, "loss": 44.8503, "step": 126900 }, { "epoch": 0.512732458780609, "grad_norm": 745.4364624023438, "learning_rate": 2.8683151729253994e-05, "loss": 50.778, "step": 126910 }, { "epoch": 0.5127728600459767, "grad_norm": 887.1217041015625, "learning_rate": 2.8679699124999166e-05, "loss": 44.0796, "step": 126920 }, { "epoch": 0.5128132613113443, "grad_norm": 565.948486328125, "learning_rate": 2.8676246449006715e-05, "loss": 79.4067, "step": 126930 }, { "epoch": 0.5128536625767119, "grad_norm": 602.2057495117188, "learning_rate": 2.8672793701343946e-05, "loss": 53.1035, "step": 126940 }, { "epoch": 0.5128940638420796, "grad_norm": 670.5474243164062, "learning_rate": 2.8669340882078166e-05, "loss": 60.5767, "step": 126950 }, { "epoch": 0.5129344651074471, "grad_norm": 447.6339111328125, "learning_rate": 2.866588799127671e-05, "loss": 67.3426, "step": 126960 }, { "epoch": 0.5129748663728148, "grad_norm": 728.9623413085938, "learning_rate": 2.8662435029006868e-05, "loss": 72.0813, "step": 126970 }, { "epoch": 0.5130152676381824, "grad_norm": 600.4077758789062, "learning_rate": 2.865898199533597e-05, "loss": 54.8805, "step": 126980 }, { "epoch": 0.51305566890355, "grad_norm": 580.8352661132812, "learning_rate": 2.865552889033134e-05, "loss": 55.582, "step": 126990 }, { "epoch": 0.5130960701689177, "grad_norm": 1138.0733642578125, "learning_rate": 2.8652075714060295e-05, "loss": 64.3382, "step": 127000 }, { "epoch": 0.5131364714342853, "grad_norm": 281.50640869140625, "learning_rate": 2.864862246659015e-05, "loss": 45.8135, "step": 127010 }, { "epoch": 0.513176872699653, "grad_norm": 1133.2386474609375, "learning_rate": 2.8645169147988226e-05, "loss": 56.7795, "step": 127020 }, { "epoch": 0.5132172739650206, "grad_norm": 846.5953979492188, "learning_rate": 2.8641715758321857e-05, "loss": 31.9575, "step": 127030 }, { "epoch": 0.5132576752303882, "grad_norm": 3238.64013671875, "learning_rate": 2.8638262297658368e-05, "loss": 78.6456, "step": 127040 }, { "epoch": 0.5132980764957559, "grad_norm": 1550.1759033203125, "learning_rate": 2.863480876606508e-05, "loss": 56.8611, "step": 127050 }, { "epoch": 0.5133384777611235, "grad_norm": 442.9187927246094, "learning_rate": 2.863135516360932e-05, "loss": 41.2573, "step": 127060 }, { "epoch": 0.5133788790264912, "grad_norm": 1137.1116943359375, "learning_rate": 2.8627901490358422e-05, "loss": 46.5267, "step": 127070 }, { "epoch": 0.5134192802918588, "grad_norm": 1043.6861572265625, "learning_rate": 2.8624447746379722e-05, "loss": 60.2835, "step": 127080 }, { "epoch": 0.5134596815572263, "grad_norm": 739.7015380859375, "learning_rate": 2.862099393174055e-05, "loss": 48.624, "step": 127090 }, { "epoch": 0.513500082822594, "grad_norm": 945.224853515625, "learning_rate": 2.861754004650823e-05, "loss": 63.0741, "step": 127100 }, { "epoch": 0.5135404840879616, "grad_norm": 425.7527160644531, "learning_rate": 2.8614086090750103e-05, "loss": 46.5734, "step": 127110 }, { "epoch": 0.5135808853533292, "grad_norm": 1409.0302734375, "learning_rate": 2.8610632064533517e-05, "loss": 47.2388, "step": 127120 }, { "epoch": 0.5136212866186969, "grad_norm": 581.88916015625, "learning_rate": 2.8607177967925792e-05, "loss": 69.2831, "step": 127130 }, { "epoch": 0.5136616878840645, "grad_norm": 478.259765625, "learning_rate": 2.8603723800994275e-05, "loss": 60.1264, "step": 127140 }, { "epoch": 0.5137020891494322, "grad_norm": 2480.73095703125, "learning_rate": 2.8600269563806302e-05, "loss": 68.6383, "step": 127150 }, { "epoch": 0.5137424904147998, "grad_norm": 673.7091064453125, "learning_rate": 2.859681525642923e-05, "loss": 65.6581, "step": 127160 }, { "epoch": 0.5137828916801674, "grad_norm": 675.0384521484375, "learning_rate": 2.8593360878930392e-05, "loss": 57.0633, "step": 127170 }, { "epoch": 0.5138232929455351, "grad_norm": 613.3917236328125, "learning_rate": 2.8589906431377134e-05, "loss": 50.5051, "step": 127180 }, { "epoch": 0.5138636942109027, "grad_norm": 358.6791687011719, "learning_rate": 2.8586451913836797e-05, "loss": 48.4163, "step": 127190 }, { "epoch": 0.5139040954762704, "grad_norm": 1500.3143310546875, "learning_rate": 2.858299732637674e-05, "loss": 69.6514, "step": 127200 }, { "epoch": 0.513944496741638, "grad_norm": 303.2386474609375, "learning_rate": 2.8579542669064296e-05, "loss": 46.2417, "step": 127210 }, { "epoch": 0.5139848980070055, "grad_norm": 936.28662109375, "learning_rate": 2.8576087941966835e-05, "loss": 59.2663, "step": 127220 }, { "epoch": 0.5140252992723732, "grad_norm": 550.0284423828125, "learning_rate": 2.857263314515169e-05, "loss": 46.5445, "step": 127230 }, { "epoch": 0.5140657005377408, "grad_norm": 1154.9842529296875, "learning_rate": 2.856917827868622e-05, "loss": 48.4179, "step": 127240 }, { "epoch": 0.5141061018031085, "grad_norm": 400.9862365722656, "learning_rate": 2.8565723342637796e-05, "loss": 84.6768, "step": 127250 }, { "epoch": 0.5141465030684761, "grad_norm": 1014.5621948242188, "learning_rate": 2.856226833707375e-05, "loss": 60.3505, "step": 127260 }, { "epoch": 0.5141869043338437, "grad_norm": 351.1904602050781, "learning_rate": 2.855881326206145e-05, "loss": 59.1501, "step": 127270 }, { "epoch": 0.5142273055992114, "grad_norm": 520.452392578125, "learning_rate": 2.855535811766825e-05, "loss": 60.3091, "step": 127280 }, { "epoch": 0.514267706864579, "grad_norm": 717.9920043945312, "learning_rate": 2.8551902903961526e-05, "loss": 40.0301, "step": 127290 }, { "epoch": 0.5143081081299467, "grad_norm": 560.2972412109375, "learning_rate": 2.854844762100861e-05, "loss": 50.7043, "step": 127300 }, { "epoch": 0.5143485093953143, "grad_norm": 1860.7342529296875, "learning_rate": 2.854499226887689e-05, "loss": 63.0763, "step": 127310 }, { "epoch": 0.5143889106606819, "grad_norm": 135.43519592285156, "learning_rate": 2.8541536847633717e-05, "loss": 45.6506, "step": 127320 }, { "epoch": 0.5144293119260496, "grad_norm": 766.4346313476562, "learning_rate": 2.8538081357346465e-05, "loss": 72.0964, "step": 127330 }, { "epoch": 0.5144697131914172, "grad_norm": 778.751220703125, "learning_rate": 2.8534625798082488e-05, "loss": 65.8823, "step": 127340 }, { "epoch": 0.5145101144567847, "grad_norm": 490.54461669921875, "learning_rate": 2.853117016990917e-05, "loss": 41.0561, "step": 127350 }, { "epoch": 0.5145505157221524, "grad_norm": 450.0224914550781, "learning_rate": 2.8527714472893862e-05, "loss": 61.406, "step": 127360 }, { "epoch": 0.51459091698752, "grad_norm": 298.4858093261719, "learning_rate": 2.8524258707103957e-05, "loss": 88.5857, "step": 127370 }, { "epoch": 0.5146313182528877, "grad_norm": 1860.1826171875, "learning_rate": 2.85208028726068e-05, "loss": 108.9013, "step": 127380 }, { "epoch": 0.5146717195182553, "grad_norm": 948.806396484375, "learning_rate": 2.8517346969469782e-05, "loss": 47.6132, "step": 127390 }, { "epoch": 0.5147121207836229, "grad_norm": 1325.517578125, "learning_rate": 2.8513890997760272e-05, "loss": 45.2896, "step": 127400 }, { "epoch": 0.5147525220489906, "grad_norm": 605.529052734375, "learning_rate": 2.851043495754566e-05, "loss": 43.9705, "step": 127410 }, { "epoch": 0.5147929233143582, "grad_norm": 426.92193603515625, "learning_rate": 2.8506978848893302e-05, "loss": 39.5349, "step": 127420 }, { "epoch": 0.5148333245797259, "grad_norm": 1157.5235595703125, "learning_rate": 2.8503522671870585e-05, "loss": 56.4158, "step": 127430 }, { "epoch": 0.5148737258450935, "grad_norm": 782.9813232421875, "learning_rate": 2.8500066426544896e-05, "loss": 85.5858, "step": 127440 }, { "epoch": 0.5149141271104611, "grad_norm": 1053.0633544921875, "learning_rate": 2.849661011298361e-05, "loss": 64.2638, "step": 127450 }, { "epoch": 0.5149545283758288, "grad_norm": 652.576171875, "learning_rate": 2.8493153731254102e-05, "loss": 43.2801, "step": 127460 }, { "epoch": 0.5149949296411963, "grad_norm": 1399.2457275390625, "learning_rate": 2.8489697281423767e-05, "loss": 57.9567, "step": 127470 }, { "epoch": 0.515035330906564, "grad_norm": 1636.6573486328125, "learning_rate": 2.8486240763559986e-05, "loss": 87.6768, "step": 127480 }, { "epoch": 0.5150757321719316, "grad_norm": 0.0, "learning_rate": 2.848278417773015e-05, "loss": 68.869, "step": 127490 }, { "epoch": 0.5151161334372992, "grad_norm": 499.08416748046875, "learning_rate": 2.8479327524001636e-05, "loss": 44.9667, "step": 127500 }, { "epoch": 0.5151565347026669, "grad_norm": 905.2075805664062, "learning_rate": 2.8475870802441844e-05, "loss": 51.4555, "step": 127510 }, { "epoch": 0.5151969359680345, "grad_norm": 567.349365234375, "learning_rate": 2.847241401311817e-05, "loss": 37.061, "step": 127520 }, { "epoch": 0.5152373372334021, "grad_norm": 499.4542541503906, "learning_rate": 2.846895715609799e-05, "loss": 46.9687, "step": 127530 }, { "epoch": 0.5152777384987698, "grad_norm": 1374.189208984375, "learning_rate": 2.8465500231448704e-05, "loss": 74.0274, "step": 127540 }, { "epoch": 0.5153181397641374, "grad_norm": 1301.90673828125, "learning_rate": 2.8462043239237707e-05, "loss": 51.4661, "step": 127550 }, { "epoch": 0.5153585410295051, "grad_norm": 290.5340576171875, "learning_rate": 2.845858617953239e-05, "loss": 49.4092, "step": 127560 }, { "epoch": 0.5153989422948727, "grad_norm": 617.203857421875, "learning_rate": 2.8455129052400166e-05, "loss": 48.6922, "step": 127570 }, { "epoch": 0.5154393435602403, "grad_norm": 877.1514282226562, "learning_rate": 2.8451671857908415e-05, "loss": 57.4519, "step": 127580 }, { "epoch": 0.515479744825608, "grad_norm": 1293.734375, "learning_rate": 2.844821459612454e-05, "loss": 97.9196, "step": 127590 }, { "epoch": 0.5155201460909755, "grad_norm": 628.9718017578125, "learning_rate": 2.844475726711595e-05, "loss": 44.7918, "step": 127600 }, { "epoch": 0.5155605473563432, "grad_norm": 645.7540283203125, "learning_rate": 2.844129987095005e-05, "loss": 42.1978, "step": 127610 }, { "epoch": 0.5156009486217108, "grad_norm": 470.0179138183594, "learning_rate": 2.8437842407694236e-05, "loss": 59.6109, "step": 127620 }, { "epoch": 0.5156413498870784, "grad_norm": 548.3775024414062, "learning_rate": 2.843438487741591e-05, "loss": 53.7288, "step": 127630 }, { "epoch": 0.5156817511524461, "grad_norm": 294.67889404296875, "learning_rate": 2.843092728018248e-05, "loss": 49.3608, "step": 127640 }, { "epoch": 0.5157221524178137, "grad_norm": 2212.821533203125, "learning_rate": 2.8427469616061364e-05, "loss": 59.0455, "step": 127650 }, { "epoch": 0.5157625536831814, "grad_norm": 849.8643798828125, "learning_rate": 2.8424011885119954e-05, "loss": 44.9502, "step": 127660 }, { "epoch": 0.515802954948549, "grad_norm": 565.5281372070312, "learning_rate": 2.842055408742567e-05, "loss": 53.8755, "step": 127670 }, { "epoch": 0.5158433562139166, "grad_norm": 874.5379638671875, "learning_rate": 2.8417096223045925e-05, "loss": 53.4064, "step": 127680 }, { "epoch": 0.5158837574792843, "grad_norm": 761.3839111328125, "learning_rate": 2.841363829204814e-05, "loss": 40.8466, "step": 127690 }, { "epoch": 0.5159241587446519, "grad_norm": 409.4591369628906, "learning_rate": 2.841018029449971e-05, "loss": 40.9471, "step": 127700 }, { "epoch": 0.5159645600100196, "grad_norm": 896.9284057617188, "learning_rate": 2.8406722230468063e-05, "loss": 69.344, "step": 127710 }, { "epoch": 0.5160049612753872, "grad_norm": 248.2589569091797, "learning_rate": 2.840326410002061e-05, "loss": 53.1701, "step": 127720 }, { "epoch": 0.5160453625407547, "grad_norm": 770.6051025390625, "learning_rate": 2.839980590322477e-05, "loss": 37.5784, "step": 127730 }, { "epoch": 0.5160857638061224, "grad_norm": 1029.7786865234375, "learning_rate": 2.8396347640147962e-05, "loss": 57.3108, "step": 127740 }, { "epoch": 0.51612616507149, "grad_norm": 450.9689636230469, "learning_rate": 2.8392889310857612e-05, "loss": 47.8311, "step": 127750 }, { "epoch": 0.5161665663368576, "grad_norm": 471.0567932128906, "learning_rate": 2.8389430915421132e-05, "loss": 58.49, "step": 127760 }, { "epoch": 0.5162069676022253, "grad_norm": 991.8955688476562, "learning_rate": 2.8385972453905958e-05, "loss": 43.4331, "step": 127770 }, { "epoch": 0.5162473688675929, "grad_norm": 0.0, "learning_rate": 2.8382513926379504e-05, "loss": 66.81, "step": 127780 }, { "epoch": 0.5162877701329606, "grad_norm": 1275.402587890625, "learning_rate": 2.837905533290921e-05, "loss": 33.7486, "step": 127790 }, { "epoch": 0.5163281713983282, "grad_norm": 921.3496704101562, "learning_rate": 2.8375596673562482e-05, "loss": 66.0726, "step": 127800 }, { "epoch": 0.5163685726636958, "grad_norm": 1107.1654052734375, "learning_rate": 2.8372137948406762e-05, "loss": 64.4679, "step": 127810 }, { "epoch": 0.5164089739290635, "grad_norm": 978.436279296875, "learning_rate": 2.8368679157509477e-05, "loss": 55.1411, "step": 127820 }, { "epoch": 0.5164493751944311, "grad_norm": 794.9774169921875, "learning_rate": 2.8365220300938055e-05, "loss": 44.3256, "step": 127830 }, { "epoch": 0.5164897764597988, "grad_norm": 2009.2308349609375, "learning_rate": 2.8361761378759934e-05, "loss": 52.7043, "step": 127840 }, { "epoch": 0.5165301777251664, "grad_norm": 989.152099609375, "learning_rate": 2.8358302391042536e-05, "loss": 37.631, "step": 127850 }, { "epoch": 0.5165705789905339, "grad_norm": 356.483642578125, "learning_rate": 2.8354843337853314e-05, "loss": 77.6594, "step": 127860 }, { "epoch": 0.5166109802559016, "grad_norm": 1363.230224609375, "learning_rate": 2.835138421925969e-05, "loss": 79.6859, "step": 127870 }, { "epoch": 0.5166513815212692, "grad_norm": 811.1392211914062, "learning_rate": 2.834792503532911e-05, "loss": 75.7948, "step": 127880 }, { "epoch": 0.5166917827866369, "grad_norm": 1689.7584228515625, "learning_rate": 2.8344465786129e-05, "loss": 79.6354, "step": 127890 }, { "epoch": 0.5167321840520045, "grad_norm": 251.05508422851562, "learning_rate": 2.8341006471726816e-05, "loss": 38.7622, "step": 127900 }, { "epoch": 0.5167725853173721, "grad_norm": 1307.02880859375, "learning_rate": 2.833754709218998e-05, "loss": 75.8891, "step": 127910 }, { "epoch": 0.5168129865827398, "grad_norm": 743.4976196289062, "learning_rate": 2.833408764758595e-05, "loss": 37.3798, "step": 127920 }, { "epoch": 0.5168533878481074, "grad_norm": 395.01214599609375, "learning_rate": 2.833062813798216e-05, "loss": 53.7051, "step": 127930 }, { "epoch": 0.516893789113475, "grad_norm": 1006.5745849609375, "learning_rate": 2.832716856344607e-05, "loss": 53.2251, "step": 127940 }, { "epoch": 0.5169341903788427, "grad_norm": 649.6722412109375, "learning_rate": 2.832370892404511e-05, "loss": 47.6598, "step": 127950 }, { "epoch": 0.5169745916442103, "grad_norm": 952.0791625976562, "learning_rate": 2.832024921984674e-05, "loss": 46.0533, "step": 127960 }, { "epoch": 0.517014992909578, "grad_norm": 663.2807006835938, "learning_rate": 2.8316789450918396e-05, "loss": 60.6055, "step": 127970 }, { "epoch": 0.5170553941749455, "grad_norm": 1854.5562744140625, "learning_rate": 2.8313329617327537e-05, "loss": 104.9597, "step": 127980 }, { "epoch": 0.5170957954403131, "grad_norm": 399.9706726074219, "learning_rate": 2.8309869719141608e-05, "loss": 73.0087, "step": 127990 }, { "epoch": 0.5171361967056808, "grad_norm": 742.0582275390625, "learning_rate": 2.8306409756428064e-05, "loss": 63.6711, "step": 128000 }, { "epoch": 0.5171765979710484, "grad_norm": 426.78814697265625, "learning_rate": 2.8302949729254358e-05, "loss": 44.5783, "step": 128010 }, { "epoch": 0.5172169992364161, "grad_norm": 537.81494140625, "learning_rate": 2.8299489637687954e-05, "loss": 46.2207, "step": 128020 }, { "epoch": 0.5172574005017837, "grad_norm": 545.3671264648438, "learning_rate": 2.8296029481796292e-05, "loss": 52.6649, "step": 128030 }, { "epoch": 0.5172978017671513, "grad_norm": 1167.154541015625, "learning_rate": 2.829256926164685e-05, "loss": 68.5006, "step": 128040 }, { "epoch": 0.517338203032519, "grad_norm": 743.8500366210938, "learning_rate": 2.8289108977307067e-05, "loss": 49.9936, "step": 128050 }, { "epoch": 0.5173786042978866, "grad_norm": 708.7972412109375, "learning_rate": 2.8285648628844413e-05, "loss": 58.3343, "step": 128060 }, { "epoch": 0.5174190055632543, "grad_norm": 952.4853515625, "learning_rate": 2.8282188216326345e-05, "loss": 51.3421, "step": 128070 }, { "epoch": 0.5174594068286219, "grad_norm": 761.9249267578125, "learning_rate": 2.8278727739820333e-05, "loss": 47.8939, "step": 128080 }, { "epoch": 0.5174998080939895, "grad_norm": 1164.4027099609375, "learning_rate": 2.827526719939383e-05, "loss": 112.3727, "step": 128090 }, { "epoch": 0.5175402093593572, "grad_norm": 1486.0279541015625, "learning_rate": 2.827180659511431e-05, "loss": 54.5274, "step": 128100 }, { "epoch": 0.5175806106247247, "grad_norm": 1370.9776611328125, "learning_rate": 2.8268345927049234e-05, "loss": 65.6802, "step": 128110 }, { "epoch": 0.5176210118900924, "grad_norm": 796.0413818359375, "learning_rate": 2.8264885195266065e-05, "loss": 58.2697, "step": 128120 }, { "epoch": 0.51766141315546, "grad_norm": 703.8768310546875, "learning_rate": 2.8261424399832293e-05, "loss": 49.1636, "step": 128130 }, { "epoch": 0.5177018144208276, "grad_norm": 419.2386779785156, "learning_rate": 2.825796354081537e-05, "loss": 39.2348, "step": 128140 }, { "epoch": 0.5177422156861953, "grad_norm": 687.80859375, "learning_rate": 2.8254502618282763e-05, "loss": 33.7805, "step": 128150 }, { "epoch": 0.5177826169515629, "grad_norm": 0.0, "learning_rate": 2.8251041632301957e-05, "loss": 54.0625, "step": 128160 }, { "epoch": 0.5178230182169306, "grad_norm": 458.79937744140625, "learning_rate": 2.8247580582940413e-05, "loss": 60.1953, "step": 128170 }, { "epoch": 0.5178634194822982, "grad_norm": 739.254638671875, "learning_rate": 2.824411947026563e-05, "loss": 67.9315, "step": 128180 }, { "epoch": 0.5179038207476658, "grad_norm": 998.814208984375, "learning_rate": 2.824065829434505e-05, "loss": 52.8274, "step": 128190 }, { "epoch": 0.5179442220130335, "grad_norm": 862.947265625, "learning_rate": 2.8237197055246172e-05, "loss": 47.5776, "step": 128200 }, { "epoch": 0.5179846232784011, "grad_norm": 0.0, "learning_rate": 2.8233735753036484e-05, "loss": 58.8695, "step": 128210 }, { "epoch": 0.5180250245437688, "grad_norm": 1318.5653076171875, "learning_rate": 2.823027438778344e-05, "loss": 46.4017, "step": 128220 }, { "epoch": 0.5180654258091364, "grad_norm": 1210.0972900390625, "learning_rate": 2.8226812959554537e-05, "loss": 61.901, "step": 128230 }, { "epoch": 0.5181058270745039, "grad_norm": 756.2396850585938, "learning_rate": 2.8223351468417254e-05, "loss": 49.0368, "step": 128240 }, { "epoch": 0.5181462283398716, "grad_norm": 419.0620422363281, "learning_rate": 2.8219889914439074e-05, "loss": 52.058, "step": 128250 }, { "epoch": 0.5181866296052392, "grad_norm": 868.4646606445312, "learning_rate": 2.821642829768748e-05, "loss": 78.654, "step": 128260 }, { "epoch": 0.5182270308706068, "grad_norm": 424.6618957519531, "learning_rate": 2.8212966618229964e-05, "loss": 94.6811, "step": 128270 }, { "epoch": 0.5182674321359745, "grad_norm": 535.59228515625, "learning_rate": 2.8209504876134007e-05, "loss": 44.2588, "step": 128280 }, { "epoch": 0.5183078334013421, "grad_norm": 578.2286987304688, "learning_rate": 2.8206043071467102e-05, "loss": 38.9842, "step": 128290 }, { "epoch": 0.5183482346667098, "grad_norm": 948.8009033203125, "learning_rate": 2.8202581204296742e-05, "loss": 66.6254, "step": 128300 }, { "epoch": 0.5183886359320774, "grad_norm": 956.4691772460938, "learning_rate": 2.819911927469041e-05, "loss": 83.3204, "step": 128310 }, { "epoch": 0.518429037197445, "grad_norm": 662.3033447265625, "learning_rate": 2.8195657282715594e-05, "loss": 33.0041, "step": 128320 }, { "epoch": 0.5184694384628127, "grad_norm": 841.9630737304688, "learning_rate": 2.81921952284398e-05, "loss": 40.0045, "step": 128330 }, { "epoch": 0.5185098397281803, "grad_norm": 644.8792114257812, "learning_rate": 2.818873311193051e-05, "loss": 49.6458, "step": 128340 }, { "epoch": 0.518550240993548, "grad_norm": 1007.34765625, "learning_rate": 2.8185270933255237e-05, "loss": 45.5391, "step": 128350 }, { "epoch": 0.5185906422589156, "grad_norm": 412.73883056640625, "learning_rate": 2.8181808692481453e-05, "loss": 45.3399, "step": 128360 }, { "epoch": 0.5186310435242831, "grad_norm": 775.2849731445312, "learning_rate": 2.817834638967668e-05, "loss": 56.8275, "step": 128370 }, { "epoch": 0.5186714447896508, "grad_norm": 880.3590698242188, "learning_rate": 2.817488402490841e-05, "loss": 62.0417, "step": 128380 }, { "epoch": 0.5187118460550184, "grad_norm": 0.0, "learning_rate": 2.8171421598244134e-05, "loss": 100.0851, "step": 128390 }, { "epoch": 0.518752247320386, "grad_norm": 757.4847412109375, "learning_rate": 2.816795910975137e-05, "loss": 81.3571, "step": 128400 }, { "epoch": 0.5187926485857537, "grad_norm": 986.7409057617188, "learning_rate": 2.8164496559497605e-05, "loss": 99.9801, "step": 128410 }, { "epoch": 0.5188330498511213, "grad_norm": 1231.8388671875, "learning_rate": 2.816103394755035e-05, "loss": 36.7881, "step": 128420 }, { "epoch": 0.518873451116489, "grad_norm": 551.5934448242188, "learning_rate": 2.8157571273977117e-05, "loss": 48.0908, "step": 128430 }, { "epoch": 0.5189138523818566, "grad_norm": 957.9308471679688, "learning_rate": 2.8154108538845404e-05, "loss": 54.7105, "step": 128440 }, { "epoch": 0.5189542536472243, "grad_norm": 746.921875, "learning_rate": 2.8150645742222714e-05, "loss": 47.9935, "step": 128450 }, { "epoch": 0.5189946549125919, "grad_norm": 817.0953369140625, "learning_rate": 2.814718288417657e-05, "loss": 34.7214, "step": 128460 }, { "epoch": 0.5190350561779595, "grad_norm": 652.5723876953125, "learning_rate": 2.814371996477448e-05, "loss": 51.5591, "step": 128470 }, { "epoch": 0.5190754574433272, "grad_norm": 1426.8663330078125, "learning_rate": 2.8140256984083947e-05, "loss": 53.6987, "step": 128480 }, { "epoch": 0.5191158587086948, "grad_norm": 1349.2745361328125, "learning_rate": 2.8136793942172483e-05, "loss": 67.8195, "step": 128490 }, { "epoch": 0.5191562599740623, "grad_norm": 1255.9248046875, "learning_rate": 2.8133330839107608e-05, "loss": 89.4671, "step": 128500 }, { "epoch": 0.51919666123943, "grad_norm": 772.8133544921875, "learning_rate": 2.8129867674956838e-05, "loss": 99.116, "step": 128510 }, { "epoch": 0.5192370625047976, "grad_norm": 799.4025268554688, "learning_rate": 2.8126404449787685e-05, "loss": 37.4048, "step": 128520 }, { "epoch": 0.5192774637701653, "grad_norm": 767.2107543945312, "learning_rate": 2.8122941163667667e-05, "loss": 53.6676, "step": 128530 }, { "epoch": 0.5193178650355329, "grad_norm": 717.8088989257812, "learning_rate": 2.8119477816664296e-05, "loss": 53.9604, "step": 128540 }, { "epoch": 0.5193582663009005, "grad_norm": 568.8512573242188, "learning_rate": 2.8116014408845116e-05, "loss": 52.336, "step": 128550 }, { "epoch": 0.5193986675662682, "grad_norm": 1033.583984375, "learning_rate": 2.8112550940277616e-05, "loss": 60.4758, "step": 128560 }, { "epoch": 0.5194390688316358, "grad_norm": 267.0958251953125, "learning_rate": 2.810908741102934e-05, "loss": 87.3894, "step": 128570 }, { "epoch": 0.5194794700970035, "grad_norm": 966.0076293945312, "learning_rate": 2.8105623821167804e-05, "loss": 64.1264, "step": 128580 }, { "epoch": 0.5195198713623711, "grad_norm": 888.2479858398438, "learning_rate": 2.810216017076053e-05, "loss": 37.3439, "step": 128590 }, { "epoch": 0.5195602726277387, "grad_norm": 1010.2241821289062, "learning_rate": 2.8098696459875046e-05, "loss": 59.4499, "step": 128600 }, { "epoch": 0.5196006738931064, "grad_norm": 672.6810302734375, "learning_rate": 2.8095232688578883e-05, "loss": 59.5572, "step": 128610 }, { "epoch": 0.5196410751584739, "grad_norm": 1322.0794677734375, "learning_rate": 2.809176885693956e-05, "loss": 75.077, "step": 128620 }, { "epoch": 0.5196814764238415, "grad_norm": 647.8616943359375, "learning_rate": 2.8088304965024614e-05, "loss": 54.4772, "step": 128630 }, { "epoch": 0.5197218776892092, "grad_norm": 895.9517211914062, "learning_rate": 2.8084841012901574e-05, "loss": 32.6104, "step": 128640 }, { "epoch": 0.5197622789545768, "grad_norm": 346.7025451660156, "learning_rate": 2.808137700063797e-05, "loss": 39.3208, "step": 128650 }, { "epoch": 0.5198026802199445, "grad_norm": 911.7136840820312, "learning_rate": 2.807791292830133e-05, "loss": 58.0953, "step": 128660 }, { "epoch": 0.5198430814853121, "grad_norm": 522.1300659179688, "learning_rate": 2.8074448795959203e-05, "loss": 73.4923, "step": 128670 }, { "epoch": 0.5198834827506797, "grad_norm": 657.18505859375, "learning_rate": 2.8070984603679107e-05, "loss": 42.6445, "step": 128680 }, { "epoch": 0.5199238840160474, "grad_norm": 389.8962707519531, "learning_rate": 2.8067520351528587e-05, "loss": 43.4139, "step": 128690 }, { "epoch": 0.519964285281415, "grad_norm": 851.1239013671875, "learning_rate": 2.806405603957517e-05, "loss": 72.5371, "step": 128700 }, { "epoch": 0.5200046865467827, "grad_norm": 3148.775634765625, "learning_rate": 2.8060591667886416e-05, "loss": 56.5808, "step": 128710 }, { "epoch": 0.5200450878121503, "grad_norm": 1202.496826171875, "learning_rate": 2.8057127236529844e-05, "loss": 46.4309, "step": 128720 }, { "epoch": 0.520085489077518, "grad_norm": 962.0496826171875, "learning_rate": 2.805366274557301e-05, "loss": 50.884, "step": 128730 }, { "epoch": 0.5201258903428856, "grad_norm": 453.456298828125, "learning_rate": 2.8050198195083444e-05, "loss": 59.2252, "step": 128740 }, { "epoch": 0.5201662916082531, "grad_norm": 239.76718139648438, "learning_rate": 2.8046733585128687e-05, "loss": 70.7534, "step": 128750 }, { "epoch": 0.5202066928736208, "grad_norm": 457.1228942871094, "learning_rate": 2.80432689157763e-05, "loss": 52.9828, "step": 128760 }, { "epoch": 0.5202470941389884, "grad_norm": 877.8587036132812, "learning_rate": 2.8039804187093816e-05, "loss": 52.8952, "step": 128770 }, { "epoch": 0.520287495404356, "grad_norm": 1153.672119140625, "learning_rate": 2.803633939914878e-05, "loss": 60.0604, "step": 128780 }, { "epoch": 0.5203278966697237, "grad_norm": 120.61048126220703, "learning_rate": 2.803287455200875e-05, "loss": 47.4182, "step": 128790 }, { "epoch": 0.5203682979350913, "grad_norm": 1153.82080078125, "learning_rate": 2.8029409645741267e-05, "loss": 59.4981, "step": 128800 }, { "epoch": 0.520408699200459, "grad_norm": 418.24920654296875, "learning_rate": 2.8025944680413878e-05, "loss": 37.4401, "step": 128810 }, { "epoch": 0.5204491004658266, "grad_norm": 781.8113403320312, "learning_rate": 2.8022479656094154e-05, "loss": 48.7074, "step": 128820 }, { "epoch": 0.5204895017311942, "grad_norm": 836.5515747070312, "learning_rate": 2.801901457284962e-05, "loss": 51.3323, "step": 128830 }, { "epoch": 0.5205299029965619, "grad_norm": 1007.6831665039062, "learning_rate": 2.8015549430747852e-05, "loss": 63.1766, "step": 128840 }, { "epoch": 0.5205703042619295, "grad_norm": 1059.2291259765625, "learning_rate": 2.8012084229856382e-05, "loss": 48.2031, "step": 128850 }, { "epoch": 0.5206107055272972, "grad_norm": 435.08837890625, "learning_rate": 2.800861897024279e-05, "loss": 61.0167, "step": 128860 }, { "epoch": 0.5206511067926648, "grad_norm": 520.9664916992188, "learning_rate": 2.8005153651974614e-05, "loss": 68.8364, "step": 128870 }, { "epoch": 0.5206915080580323, "grad_norm": 1375.3497314453125, "learning_rate": 2.8001688275119432e-05, "loss": 73.5849, "step": 128880 }, { "epoch": 0.5207319093234, "grad_norm": 570.526611328125, "learning_rate": 2.799822283974478e-05, "loss": 45.2284, "step": 128890 }, { "epoch": 0.5207723105887676, "grad_norm": 3453.25927734375, "learning_rate": 2.7994757345918244e-05, "loss": 65.3527, "step": 128900 }, { "epoch": 0.5208127118541352, "grad_norm": 807.7914428710938, "learning_rate": 2.7991291793707357e-05, "loss": 37.759, "step": 128910 }, { "epoch": 0.5208531131195029, "grad_norm": 2692.131103515625, "learning_rate": 2.7987826183179712e-05, "loss": 70.0045, "step": 128920 }, { "epoch": 0.5208935143848705, "grad_norm": 316.426025390625, "learning_rate": 2.798436051440284e-05, "loss": 50.6802, "step": 128930 }, { "epoch": 0.5209339156502382, "grad_norm": 1145.9749755859375, "learning_rate": 2.7980894787444334e-05, "loss": 44.7317, "step": 128940 }, { "epoch": 0.5209743169156058, "grad_norm": 571.5775146484375, "learning_rate": 2.7977429002371747e-05, "loss": 39.6738, "step": 128950 }, { "epoch": 0.5210147181809734, "grad_norm": 625.548828125, "learning_rate": 2.797396315925265e-05, "loss": 34.7972, "step": 128960 }, { "epoch": 0.5210551194463411, "grad_norm": 733.4710083007812, "learning_rate": 2.7970497258154603e-05, "loss": 38.4996, "step": 128970 }, { "epoch": 0.5210955207117087, "grad_norm": 1038.670654296875, "learning_rate": 2.7967031299145193e-05, "loss": 49.1648, "step": 128980 }, { "epoch": 0.5211359219770764, "grad_norm": 545.0449829101562, "learning_rate": 2.7963565282291977e-05, "loss": 48.0368, "step": 128990 }, { "epoch": 0.521176323242444, "grad_norm": 428.3600769042969, "learning_rate": 2.7960099207662532e-05, "loss": 60.4256, "step": 129000 }, { "epoch": 0.5212167245078115, "grad_norm": 714.7028198242188, "learning_rate": 2.7956633075324424e-05, "loss": 60.8093, "step": 129010 }, { "epoch": 0.5212571257731792, "grad_norm": 1208.1380615234375, "learning_rate": 2.795316688534523e-05, "loss": 74.678, "step": 129020 }, { "epoch": 0.5212975270385468, "grad_norm": 534.9729614257812, "learning_rate": 2.794970063779253e-05, "loss": 51.2989, "step": 129030 }, { "epoch": 0.5213379283039145, "grad_norm": 756.8164672851562, "learning_rate": 2.79462343327339e-05, "loss": 66.086, "step": 129040 }, { "epoch": 0.5213783295692821, "grad_norm": 575.9217529296875, "learning_rate": 2.794276797023691e-05, "loss": 49.4426, "step": 129050 }, { "epoch": 0.5214187308346497, "grad_norm": 498.71441650390625, "learning_rate": 2.7939301550369146e-05, "loss": 48.8029, "step": 129060 }, { "epoch": 0.5214591321000174, "grad_norm": 594.4951782226562, "learning_rate": 2.7935835073198192e-05, "loss": 34.5576, "step": 129070 }, { "epoch": 0.521499533365385, "grad_norm": 1014.3102416992188, "learning_rate": 2.793236853879161e-05, "loss": 58.8379, "step": 129080 }, { "epoch": 0.5215399346307527, "grad_norm": 560.2127685546875, "learning_rate": 2.7928901947217008e-05, "loss": 63.1494, "step": 129090 }, { "epoch": 0.5215803358961203, "grad_norm": 666.1370239257812, "learning_rate": 2.792543529854194e-05, "loss": 53.1794, "step": 129100 }, { "epoch": 0.5216207371614879, "grad_norm": 636.662353515625, "learning_rate": 2.7921968592834006e-05, "loss": 25.9828, "step": 129110 }, { "epoch": 0.5216611384268556, "grad_norm": 835.5521850585938, "learning_rate": 2.79185018301608e-05, "loss": 57.4081, "step": 129120 }, { "epoch": 0.5217015396922232, "grad_norm": 992.4476928710938, "learning_rate": 2.791503501058989e-05, "loss": 52.8603, "step": 129130 }, { "epoch": 0.5217419409575907, "grad_norm": 328.57794189453125, "learning_rate": 2.7911568134188875e-05, "loss": 88.1504, "step": 129140 }, { "epoch": 0.5217823422229584, "grad_norm": 1140.0035400390625, "learning_rate": 2.7908101201025337e-05, "loss": 48.8337, "step": 129150 }, { "epoch": 0.521822743488326, "grad_norm": 745.549072265625, "learning_rate": 2.7904634211166876e-05, "loss": 55.3195, "step": 129160 }, { "epoch": 0.5218631447536937, "grad_norm": 969.3555297851562, "learning_rate": 2.7901167164681073e-05, "loss": 55.3426, "step": 129170 }, { "epoch": 0.5219035460190613, "grad_norm": 726.4353637695312, "learning_rate": 2.7897700061635517e-05, "loss": 49.0015, "step": 129180 }, { "epoch": 0.5219439472844289, "grad_norm": 374.95843505859375, "learning_rate": 2.7894232902097813e-05, "loss": 46.3873, "step": 129190 }, { "epoch": 0.5219843485497966, "grad_norm": 511.00885009765625, "learning_rate": 2.7890765686135544e-05, "loss": 47.9072, "step": 129200 }, { "epoch": 0.5220247498151642, "grad_norm": 512.494873046875, "learning_rate": 2.788729841381631e-05, "loss": 29.6466, "step": 129210 }, { "epoch": 0.5220651510805319, "grad_norm": 551.0260620117188, "learning_rate": 2.7883831085207707e-05, "loss": 65.4076, "step": 129220 }, { "epoch": 0.5221055523458995, "grad_norm": 621.8154907226562, "learning_rate": 2.788036370037733e-05, "loss": 55.3526, "step": 129230 }, { "epoch": 0.5221459536112671, "grad_norm": 477.7518310546875, "learning_rate": 2.7876896259392788e-05, "loss": 70.7479, "step": 129240 }, { "epoch": 0.5221863548766348, "grad_norm": 228.69644165039062, "learning_rate": 2.787342876232167e-05, "loss": 45.7491, "step": 129250 }, { "epoch": 0.5222267561420023, "grad_norm": 406.46026611328125, "learning_rate": 2.7869961209231577e-05, "loss": 40.5636, "step": 129260 }, { "epoch": 0.52226715740737, "grad_norm": 1248.6951904296875, "learning_rate": 2.7866493600190107e-05, "loss": 43.6552, "step": 129270 }, { "epoch": 0.5223075586727376, "grad_norm": 780.5540161132812, "learning_rate": 2.7863025935264875e-05, "loss": 42.3198, "step": 129280 }, { "epoch": 0.5223479599381052, "grad_norm": 830.3927612304688, "learning_rate": 2.785955821452348e-05, "loss": 50.129, "step": 129290 }, { "epoch": 0.5223883612034729, "grad_norm": 162.7823028564453, "learning_rate": 2.7856090438033522e-05, "loss": 35.8086, "step": 129300 }, { "epoch": 0.5224287624688405, "grad_norm": 577.8681030273438, "learning_rate": 2.785262260586261e-05, "loss": 48.4552, "step": 129310 }, { "epoch": 0.5224691637342082, "grad_norm": 495.5697021484375, "learning_rate": 2.7849154718078346e-05, "loss": 41.2047, "step": 129320 }, { "epoch": 0.5225095649995758, "grad_norm": 650.5645141601562, "learning_rate": 2.784568677474836e-05, "loss": 65.8662, "step": 129330 }, { "epoch": 0.5225499662649434, "grad_norm": 276.66241455078125, "learning_rate": 2.7842218775940237e-05, "loss": 33.7441, "step": 129340 }, { "epoch": 0.5225903675303111, "grad_norm": 840.7539672851562, "learning_rate": 2.783875072172159e-05, "loss": 49.3532, "step": 129350 }, { "epoch": 0.5226307687956787, "grad_norm": 315.2930603027344, "learning_rate": 2.783528261216004e-05, "loss": 37.2258, "step": 129360 }, { "epoch": 0.5226711700610464, "grad_norm": 398.0323791503906, "learning_rate": 2.78318144473232e-05, "loss": 85.9904, "step": 129370 }, { "epoch": 0.522711571326414, "grad_norm": 779.0153198242188, "learning_rate": 2.7828346227278674e-05, "loss": 53.5858, "step": 129380 }, { "epoch": 0.5227519725917815, "grad_norm": 941.3084106445312, "learning_rate": 2.782487795209408e-05, "loss": 77.1186, "step": 129390 }, { "epoch": 0.5227923738571492, "grad_norm": 1087.8790283203125, "learning_rate": 2.782140962183704e-05, "loss": 75.8238, "step": 129400 }, { "epoch": 0.5228327751225168, "grad_norm": 836.9957275390625, "learning_rate": 2.7817941236575173e-05, "loss": 60.6787, "step": 129410 }, { "epoch": 0.5228731763878844, "grad_norm": 542.893798828125, "learning_rate": 2.781447279637608e-05, "loss": 54.1359, "step": 129420 }, { "epoch": 0.5229135776532521, "grad_norm": 706.9591064453125, "learning_rate": 2.7811004301307403e-05, "loss": 42.0238, "step": 129430 }, { "epoch": 0.5229539789186197, "grad_norm": 975.5703735351562, "learning_rate": 2.7807535751436738e-05, "loss": 42.8671, "step": 129440 }, { "epoch": 0.5229943801839874, "grad_norm": 1424.2943115234375, "learning_rate": 2.7804067146831725e-05, "loss": 69.0784, "step": 129450 }, { "epoch": 0.523034781449355, "grad_norm": 1137.7862548828125, "learning_rate": 2.7800598487559975e-05, "loss": 46.7197, "step": 129460 }, { "epoch": 0.5230751827147226, "grad_norm": 195.2952423095703, "learning_rate": 2.7797129773689118e-05, "loss": 55.5766, "step": 129470 }, { "epoch": 0.5231155839800903, "grad_norm": 810.8615112304688, "learning_rate": 2.7793661005286774e-05, "loss": 42.1326, "step": 129480 }, { "epoch": 0.5231559852454579, "grad_norm": 259.7433166503906, "learning_rate": 2.7790192182420578e-05, "loss": 40.0814, "step": 129490 }, { "epoch": 0.5231963865108256, "grad_norm": 465.7834777832031, "learning_rate": 2.7786723305158136e-05, "loss": 41.8416, "step": 129500 }, { "epoch": 0.5232367877761932, "grad_norm": 309.6441650390625, "learning_rate": 2.7783254373567103e-05, "loss": 92.4435, "step": 129510 }, { "epoch": 0.5232771890415607, "grad_norm": 689.2532348632812, "learning_rate": 2.7779785387715078e-05, "loss": 50.8395, "step": 129520 }, { "epoch": 0.5233175903069284, "grad_norm": 551.0718994140625, "learning_rate": 2.7776316347669722e-05, "loss": 53.9811, "step": 129530 }, { "epoch": 0.523357991572296, "grad_norm": 792.376953125, "learning_rate": 2.7772847253498636e-05, "loss": 38.2251, "step": 129540 }, { "epoch": 0.5233983928376637, "grad_norm": 687.2542724609375, "learning_rate": 2.7769378105269467e-05, "loss": 72.9509, "step": 129550 }, { "epoch": 0.5234387941030313, "grad_norm": 856.1763916015625, "learning_rate": 2.7765908903049848e-05, "loss": 42.2359, "step": 129560 }, { "epoch": 0.5234791953683989, "grad_norm": 865.5512084960938, "learning_rate": 2.7762439646907417e-05, "loss": 46.5808, "step": 129570 }, { "epoch": 0.5235195966337666, "grad_norm": 218.27801513671875, "learning_rate": 2.7758970336909795e-05, "loss": 58.7573, "step": 129580 }, { "epoch": 0.5235599978991342, "grad_norm": 797.5692749023438, "learning_rate": 2.7755500973124625e-05, "loss": 64.8211, "step": 129590 }, { "epoch": 0.5236003991645019, "grad_norm": 1161.782470703125, "learning_rate": 2.7752031555619555e-05, "loss": 62.3158, "step": 129600 }, { "epoch": 0.5236408004298695, "grad_norm": 495.8021545410156, "learning_rate": 2.774856208446221e-05, "loss": 34.1338, "step": 129610 }, { "epoch": 0.5236812016952371, "grad_norm": 538.5377807617188, "learning_rate": 2.7745092559720227e-05, "loss": 84.3193, "step": 129620 }, { "epoch": 0.5237216029606048, "grad_norm": 472.7118225097656, "learning_rate": 2.7741622981461253e-05, "loss": 88.2121, "step": 129630 }, { "epoch": 0.5237620042259724, "grad_norm": 680.1412353515625, "learning_rate": 2.773815334975292e-05, "loss": 61.683, "step": 129640 }, { "epoch": 0.5238024054913399, "grad_norm": 194.98385620117188, "learning_rate": 2.7734683664662892e-05, "loss": 35.9615, "step": 129650 }, { "epoch": 0.5238428067567076, "grad_norm": 664.1810913085938, "learning_rate": 2.7731213926258794e-05, "loss": 76.9602, "step": 129660 }, { "epoch": 0.5238832080220752, "grad_norm": 675.6226806640625, "learning_rate": 2.7727744134608263e-05, "loss": 49.6265, "step": 129670 }, { "epoch": 0.5239236092874429, "grad_norm": 424.91912841796875, "learning_rate": 2.7724274289778974e-05, "loss": 49.0146, "step": 129680 }, { "epoch": 0.5239640105528105, "grad_norm": 1368.484619140625, "learning_rate": 2.7720804391838544e-05, "loss": 53.2469, "step": 129690 }, { "epoch": 0.5240044118181781, "grad_norm": 988.5595092773438, "learning_rate": 2.771733444085463e-05, "loss": 72.9639, "step": 129700 }, { "epoch": 0.5240448130835458, "grad_norm": 1024.283447265625, "learning_rate": 2.771386443689489e-05, "loss": 35.8563, "step": 129710 }, { "epoch": 0.5240852143489134, "grad_norm": 2501.96142578125, "learning_rate": 2.7710394380026954e-05, "loss": 93.8633, "step": 129720 }, { "epoch": 0.5241256156142811, "grad_norm": 397.55413818359375, "learning_rate": 2.7706924270318496e-05, "loss": 51.6667, "step": 129730 }, { "epoch": 0.5241660168796487, "grad_norm": 401.48687744140625, "learning_rate": 2.770345410783715e-05, "loss": 53.5041, "step": 129740 }, { "epoch": 0.5242064181450163, "grad_norm": 561.6734619140625, "learning_rate": 2.7699983892650573e-05, "loss": 60.5054, "step": 129750 }, { "epoch": 0.524246819410384, "grad_norm": 1459.1732177734375, "learning_rate": 2.769651362482642e-05, "loss": 80.3514, "step": 129760 }, { "epoch": 0.5242872206757516, "grad_norm": 211.4439239501953, "learning_rate": 2.7693043304432354e-05, "loss": 53.1344, "step": 129770 }, { "epoch": 0.5243276219411191, "grad_norm": 535.6337890625, "learning_rate": 2.7689572931536017e-05, "loss": 59.1036, "step": 129780 }, { "epoch": 0.5243680232064868, "grad_norm": 212.34646606445312, "learning_rate": 2.7686102506205068e-05, "loss": 50.72, "step": 129790 }, { "epoch": 0.5244084244718544, "grad_norm": 1038.571044921875, "learning_rate": 2.7682632028507167e-05, "loss": 43.0176, "step": 129800 }, { "epoch": 0.5244488257372221, "grad_norm": 621.1820678710938, "learning_rate": 2.7679161498509976e-05, "loss": 25.86, "step": 129810 }, { "epoch": 0.5244892270025897, "grad_norm": 441.5834045410156, "learning_rate": 2.7675690916281156e-05, "loss": 54.9453, "step": 129820 }, { "epoch": 0.5245296282679573, "grad_norm": 317.9010925292969, "learning_rate": 2.7672220281888357e-05, "loss": 95.5096, "step": 129830 }, { "epoch": 0.524570029533325, "grad_norm": 862.2459106445312, "learning_rate": 2.766874959539925e-05, "loss": 69.8221, "step": 129840 }, { "epoch": 0.5246104307986926, "grad_norm": 531.95263671875, "learning_rate": 2.76652788568815e-05, "loss": 49.4422, "step": 129850 }, { "epoch": 0.5246508320640603, "grad_norm": 694.7820434570312, "learning_rate": 2.7661808066402767e-05, "loss": 46.5116, "step": 129860 }, { "epoch": 0.5246912333294279, "grad_norm": 1010.5025024414062, "learning_rate": 2.765833722403071e-05, "loss": 71.7905, "step": 129870 }, { "epoch": 0.5247316345947955, "grad_norm": 285.8402099609375, "learning_rate": 2.7654866329833002e-05, "loss": 37.7266, "step": 129880 }, { "epoch": 0.5247720358601632, "grad_norm": 556.6085205078125, "learning_rate": 2.7651395383877304e-05, "loss": 54.9936, "step": 129890 }, { "epoch": 0.5248124371255307, "grad_norm": 1335.543701171875, "learning_rate": 2.76479243862313e-05, "loss": 71.0658, "step": 129900 }, { "epoch": 0.5248528383908984, "grad_norm": 510.7142639160156, "learning_rate": 2.7644453336962633e-05, "loss": 86.5262, "step": 129910 }, { "epoch": 0.524893239656266, "grad_norm": 629.845458984375, "learning_rate": 2.7640982236138992e-05, "loss": 47.5349, "step": 129920 }, { "epoch": 0.5249336409216336, "grad_norm": 1113.2122802734375, "learning_rate": 2.7637511083828043e-05, "loss": 66.3521, "step": 129930 }, { "epoch": 0.5249740421870013, "grad_norm": 365.0244140625, "learning_rate": 2.763403988009746e-05, "loss": 54.905, "step": 129940 }, { "epoch": 0.5250144434523689, "grad_norm": 543.3900756835938, "learning_rate": 2.7630568625014917e-05, "loss": 48.9247, "step": 129950 }, { "epoch": 0.5250548447177366, "grad_norm": 381.6798400878906, "learning_rate": 2.7627097318648076e-05, "loss": 39.9984, "step": 129960 }, { "epoch": 0.5250952459831042, "grad_norm": 1278.4371337890625, "learning_rate": 2.7623625961064618e-05, "loss": 60.6886, "step": 129970 }, { "epoch": 0.5251356472484718, "grad_norm": 500.2679443359375, "learning_rate": 2.7620154552332232e-05, "loss": 54.9953, "step": 129980 }, { "epoch": 0.5251760485138395, "grad_norm": 482.51458740234375, "learning_rate": 2.7616683092518576e-05, "loss": 55.6544, "step": 129990 }, { "epoch": 0.5252164497792071, "grad_norm": 359.9608154296875, "learning_rate": 2.761321158169134e-05, "loss": 64.5541, "step": 130000 }, { "epoch": 0.5252568510445748, "grad_norm": 398.5904541015625, "learning_rate": 2.7609740019918197e-05, "loss": 53.5339, "step": 130010 }, { "epoch": 0.5252972523099424, "grad_norm": 698.3560791015625, "learning_rate": 2.7606268407266827e-05, "loss": 61.4191, "step": 130020 }, { "epoch": 0.5253376535753099, "grad_norm": 768.4830932617188, "learning_rate": 2.7602796743804922e-05, "loss": 47.3355, "step": 130030 }, { "epoch": 0.5253780548406776, "grad_norm": 775.7195434570312, "learning_rate": 2.7599325029600143e-05, "loss": 25.0439, "step": 130040 }, { "epoch": 0.5254184561060452, "grad_norm": 1195.4205322265625, "learning_rate": 2.7595853264720184e-05, "loss": 70.0329, "step": 130050 }, { "epoch": 0.5254588573714128, "grad_norm": 606.2637329101562, "learning_rate": 2.759238144923274e-05, "loss": 53.4816, "step": 130060 }, { "epoch": 0.5254992586367805, "grad_norm": 469.8358459472656, "learning_rate": 2.7588909583205475e-05, "loss": 49.5405, "step": 130070 }, { "epoch": 0.5255396599021481, "grad_norm": 648.6143188476562, "learning_rate": 2.7585437666706087e-05, "loss": 69.9888, "step": 130080 }, { "epoch": 0.5255800611675158, "grad_norm": 612.1096801757812, "learning_rate": 2.758196569980226e-05, "loss": 51.9348, "step": 130090 }, { "epoch": 0.5256204624328834, "grad_norm": 1045.827392578125, "learning_rate": 2.7578493682561685e-05, "loss": 75.3227, "step": 130100 }, { "epoch": 0.525660863698251, "grad_norm": 990.9442138671875, "learning_rate": 2.757502161505205e-05, "loss": 73.4528, "step": 130110 }, { "epoch": 0.5257012649636187, "grad_norm": 510.2374572753906, "learning_rate": 2.7571549497341042e-05, "loss": 60.0831, "step": 130120 }, { "epoch": 0.5257416662289863, "grad_norm": 1088.753662109375, "learning_rate": 2.756807732949635e-05, "loss": 63.8334, "step": 130130 }, { "epoch": 0.525782067494354, "grad_norm": 1749.695068359375, "learning_rate": 2.756460511158567e-05, "loss": 54.9062, "step": 130140 }, { "epoch": 0.5258224687597216, "grad_norm": 992.9537963867188, "learning_rate": 2.756113284367669e-05, "loss": 55.7644, "step": 130150 }, { "epoch": 0.5258628700250891, "grad_norm": 679.5419311523438, "learning_rate": 2.7557660525837108e-05, "loss": 51.0159, "step": 130160 }, { "epoch": 0.5259032712904568, "grad_norm": 444.13031005859375, "learning_rate": 2.7554188158134616e-05, "loss": 19.1531, "step": 130170 }, { "epoch": 0.5259436725558244, "grad_norm": 416.7135314941406, "learning_rate": 2.7550715740636917e-05, "loss": 46.4561, "step": 130180 }, { "epoch": 0.525984073821192, "grad_norm": 933.5376586914062, "learning_rate": 2.7547243273411695e-05, "loss": 66.3492, "step": 130190 }, { "epoch": 0.5260244750865597, "grad_norm": 444.9652404785156, "learning_rate": 2.754377075652666e-05, "loss": 50.9244, "step": 130200 }, { "epoch": 0.5260648763519273, "grad_norm": 652.8956298828125, "learning_rate": 2.7540298190049503e-05, "loss": 59.4898, "step": 130210 }, { "epoch": 0.526105277617295, "grad_norm": 645.0548095703125, "learning_rate": 2.7536825574047925e-05, "loss": 35.8312, "step": 130220 }, { "epoch": 0.5261456788826626, "grad_norm": 534.1641845703125, "learning_rate": 2.7533352908589622e-05, "loss": 40.8292, "step": 130230 }, { "epoch": 0.5261860801480303, "grad_norm": 420.173583984375, "learning_rate": 2.7529880193742297e-05, "loss": 30.2268, "step": 130240 }, { "epoch": 0.5262264814133979, "grad_norm": 465.4990234375, "learning_rate": 2.7526407429573657e-05, "loss": 64.3663, "step": 130250 }, { "epoch": 0.5262668826787655, "grad_norm": 511.3827819824219, "learning_rate": 2.7522934616151414e-05, "loss": 56.9852, "step": 130260 }, { "epoch": 0.5263072839441332, "grad_norm": 624.65087890625, "learning_rate": 2.751946175354325e-05, "loss": 63.1493, "step": 130270 }, { "epoch": 0.5263476852095008, "grad_norm": 751.966064453125, "learning_rate": 2.7515988841816887e-05, "loss": 69.2688, "step": 130280 }, { "epoch": 0.5263880864748683, "grad_norm": 907.3563842773438, "learning_rate": 2.7512515881040028e-05, "loss": 54.0088, "step": 130290 }, { "epoch": 0.526428487740236, "grad_norm": 893.8932495117188, "learning_rate": 2.7509042871280372e-05, "loss": 54.6191, "step": 130300 }, { "epoch": 0.5264688890056036, "grad_norm": 1132.3380126953125, "learning_rate": 2.750556981260564e-05, "loss": 51.4256, "step": 130310 }, { "epoch": 0.5265092902709713, "grad_norm": 408.7311096191406, "learning_rate": 2.7502096705083535e-05, "loss": 44.0828, "step": 130320 }, { "epoch": 0.5265496915363389, "grad_norm": 569.1919555664062, "learning_rate": 2.749862354878176e-05, "loss": 34.8458, "step": 130330 }, { "epoch": 0.5265900928017065, "grad_norm": 759.1968383789062, "learning_rate": 2.7495150343768034e-05, "loss": 59.1251, "step": 130340 }, { "epoch": 0.5266304940670742, "grad_norm": 563.669677734375, "learning_rate": 2.7491677090110076e-05, "loss": 48.0987, "step": 130350 }, { "epoch": 0.5266708953324418, "grad_norm": 1401.8712158203125, "learning_rate": 2.7488203787875577e-05, "loss": 67.7033, "step": 130360 }, { "epoch": 0.5267112965978095, "grad_norm": 496.8099060058594, "learning_rate": 2.7484730437132278e-05, "loss": 44.0139, "step": 130370 }, { "epoch": 0.5267516978631771, "grad_norm": 533.447998046875, "learning_rate": 2.7481257037947872e-05, "loss": 47.0373, "step": 130380 }, { "epoch": 0.5267920991285447, "grad_norm": 825.5570068359375, "learning_rate": 2.7477783590390082e-05, "loss": 63.7042, "step": 130390 }, { "epoch": 0.5268325003939124, "grad_norm": 577.7359619140625, "learning_rate": 2.747431009452663e-05, "loss": 43.8473, "step": 130400 }, { "epoch": 0.52687290165928, "grad_norm": 2243.215087890625, "learning_rate": 2.747083655042522e-05, "loss": 58.3573, "step": 130410 }, { "epoch": 0.5269133029246476, "grad_norm": 897.6618041992188, "learning_rate": 2.7467362958153587e-05, "loss": 85.0911, "step": 130420 }, { "epoch": 0.5269537041900152, "grad_norm": 976.666259765625, "learning_rate": 2.7463889317779446e-05, "loss": 55.5755, "step": 130430 }, { "epoch": 0.5269941054553828, "grad_norm": 1133.8804931640625, "learning_rate": 2.7460415629370508e-05, "loss": 59.8554, "step": 130440 }, { "epoch": 0.5270345067207505, "grad_norm": 757.4534912109375, "learning_rate": 2.7456941892994497e-05, "loss": 43.9233, "step": 130450 }, { "epoch": 0.5270749079861181, "grad_norm": 962.9010009765625, "learning_rate": 2.7453468108719145e-05, "loss": 65.6795, "step": 130460 }, { "epoch": 0.5271153092514858, "grad_norm": 394.7574157714844, "learning_rate": 2.744999427661217e-05, "loss": 69.4007, "step": 130470 }, { "epoch": 0.5271557105168534, "grad_norm": 762.9472045898438, "learning_rate": 2.744652039674129e-05, "loss": 49.1858, "step": 130480 }, { "epoch": 0.527196111782221, "grad_norm": 2199.121337890625, "learning_rate": 2.7443046469174237e-05, "loss": 77.0622, "step": 130490 }, { "epoch": 0.5272365130475887, "grad_norm": 370.2516784667969, "learning_rate": 2.7439572493978736e-05, "loss": 51.3727, "step": 130500 }, { "epoch": 0.5272769143129563, "grad_norm": 1177.574951171875, "learning_rate": 2.7436098471222522e-05, "loss": 47.053, "step": 130510 }, { "epoch": 0.527317315578324, "grad_norm": 1359.2659912109375, "learning_rate": 2.74326244009733e-05, "loss": 61.0058, "step": 130520 }, { "epoch": 0.5273577168436916, "grad_norm": 1003.060546875, "learning_rate": 2.7429150283298817e-05, "loss": 52.1394, "step": 130530 }, { "epoch": 0.5273981181090591, "grad_norm": 1656.4512939453125, "learning_rate": 2.7425676118266808e-05, "loss": 46.5512, "step": 130540 }, { "epoch": 0.5274385193744268, "grad_norm": 509.01220703125, "learning_rate": 2.7422201905944982e-05, "loss": 52.2351, "step": 130550 }, { "epoch": 0.5274789206397944, "grad_norm": 1386.0987548828125, "learning_rate": 2.7418727646401094e-05, "loss": 44.8062, "step": 130560 }, { "epoch": 0.527519321905162, "grad_norm": 708.5176391601562, "learning_rate": 2.741525333970285e-05, "loss": 36.5751, "step": 130570 }, { "epoch": 0.5275597231705297, "grad_norm": 3067.193115234375, "learning_rate": 2.7411778985918006e-05, "loss": 98.0832, "step": 130580 }, { "epoch": 0.5276001244358973, "grad_norm": 598.675537109375, "learning_rate": 2.7408304585114298e-05, "loss": 60.4569, "step": 130590 }, { "epoch": 0.527640525701265, "grad_norm": 821.6907348632812, "learning_rate": 2.7404830137359444e-05, "loss": 71.1968, "step": 130600 }, { "epoch": 0.5276809269666326, "grad_norm": 796.5547485351562, "learning_rate": 2.740135564272119e-05, "loss": 64.8786, "step": 130610 }, { "epoch": 0.5277213282320002, "grad_norm": 587.248291015625, "learning_rate": 2.7397881101267263e-05, "loss": 61.1327, "step": 130620 }, { "epoch": 0.5277617294973679, "grad_norm": 603.5186157226562, "learning_rate": 2.7394406513065423e-05, "loss": 54.6609, "step": 130630 }, { "epoch": 0.5278021307627355, "grad_norm": 1664.0540771484375, "learning_rate": 2.739093187818339e-05, "loss": 69.9045, "step": 130640 }, { "epoch": 0.5278425320281032, "grad_norm": 847.181640625, "learning_rate": 2.7387457196688908e-05, "loss": 60.1619, "step": 130650 }, { "epoch": 0.5278829332934708, "grad_norm": 1435.385498046875, "learning_rate": 2.7383982468649714e-05, "loss": 48.167, "step": 130660 }, { "epoch": 0.5279233345588383, "grad_norm": 884.9189453125, "learning_rate": 2.738050769413357e-05, "loss": 57.8182, "step": 130670 }, { "epoch": 0.527963735824206, "grad_norm": 568.2256469726562, "learning_rate": 2.7377032873208186e-05, "loss": 66.162, "step": 130680 }, { "epoch": 0.5280041370895736, "grad_norm": 744.9805908203125, "learning_rate": 2.737355800594133e-05, "loss": 60.4696, "step": 130690 }, { "epoch": 0.5280445383549413, "grad_norm": 907.050537109375, "learning_rate": 2.7370083092400735e-05, "loss": 43.196, "step": 130700 }, { "epoch": 0.5280849396203089, "grad_norm": 778.4954833984375, "learning_rate": 2.7366608132654154e-05, "loss": 51.7738, "step": 130710 }, { "epoch": 0.5281253408856765, "grad_norm": 405.15338134765625, "learning_rate": 2.7363133126769325e-05, "loss": 64.0481, "step": 130720 }, { "epoch": 0.5281657421510442, "grad_norm": 591.9571533203125, "learning_rate": 2.735965807481401e-05, "loss": 45.1595, "step": 130730 }, { "epoch": 0.5282061434164118, "grad_norm": 985.9591064453125, "learning_rate": 2.7356182976855934e-05, "loss": 35.7408, "step": 130740 }, { "epoch": 0.5282465446817795, "grad_norm": 460.90887451171875, "learning_rate": 2.7352707832962865e-05, "loss": 52.3368, "step": 130750 }, { "epoch": 0.5282869459471471, "grad_norm": 900.5870971679688, "learning_rate": 2.734923264320254e-05, "loss": 31.7685, "step": 130760 }, { "epoch": 0.5283273472125147, "grad_norm": 851.9434814453125, "learning_rate": 2.7345757407642714e-05, "loss": 41.7662, "step": 130770 }, { "epoch": 0.5283677484778824, "grad_norm": 2293.946533203125, "learning_rate": 2.7342282126351144e-05, "loss": 96.9833, "step": 130780 }, { "epoch": 0.52840814974325, "grad_norm": 837.9186401367188, "learning_rate": 2.7338806799395577e-05, "loss": 53.9366, "step": 130790 }, { "epoch": 0.5284485510086175, "grad_norm": 314.82135009765625, "learning_rate": 2.733533142684377e-05, "loss": 56.3716, "step": 130800 }, { "epoch": 0.5284889522739852, "grad_norm": 918.69287109375, "learning_rate": 2.7331856008763472e-05, "loss": 50.4389, "step": 130810 }, { "epoch": 0.5285293535393528, "grad_norm": 775.286865234375, "learning_rate": 2.7328380545222436e-05, "loss": 64.7548, "step": 130820 }, { "epoch": 0.5285697548047205, "grad_norm": 461.6344909667969, "learning_rate": 2.732490503628843e-05, "loss": 53.2063, "step": 130830 }, { "epoch": 0.5286101560700881, "grad_norm": 415.2874755859375, "learning_rate": 2.73214294820292e-05, "loss": 94.1432, "step": 130840 }, { "epoch": 0.5286505573354557, "grad_norm": 718.537109375, "learning_rate": 2.7317953882512504e-05, "loss": 47.0089, "step": 130850 }, { "epoch": 0.5286909586008234, "grad_norm": 524.0968017578125, "learning_rate": 2.7314478237806107e-05, "loss": 50.3915, "step": 130860 }, { "epoch": 0.528731359866191, "grad_norm": 685.7463989257812, "learning_rate": 2.7311002547977766e-05, "loss": 47.7177, "step": 130870 }, { "epoch": 0.5287717611315587, "grad_norm": 658.6242065429688, "learning_rate": 2.730752681309524e-05, "loss": 44.9115, "step": 130880 }, { "epoch": 0.5288121623969263, "grad_norm": 831.5072021484375, "learning_rate": 2.730405103322629e-05, "loss": 36.1585, "step": 130890 }, { "epoch": 0.5288525636622939, "grad_norm": 2113.5791015625, "learning_rate": 2.7300575208438683e-05, "loss": 64.7953, "step": 130900 }, { "epoch": 0.5288929649276616, "grad_norm": 1694.7403564453125, "learning_rate": 2.729709933880017e-05, "loss": 64.9504, "step": 130910 }, { "epoch": 0.5289333661930292, "grad_norm": 959.0403442382812, "learning_rate": 2.7293623424378535e-05, "loss": 62.3716, "step": 130920 }, { "epoch": 0.5289737674583967, "grad_norm": 2491.924072265625, "learning_rate": 2.7290147465241517e-05, "loss": 52.1772, "step": 130930 }, { "epoch": 0.5290141687237644, "grad_norm": 243.4221954345703, "learning_rate": 2.7286671461456897e-05, "loss": 38.3009, "step": 130940 }, { "epoch": 0.529054569989132, "grad_norm": 1660.8577880859375, "learning_rate": 2.7283195413092445e-05, "loss": 60.126, "step": 130950 }, { "epoch": 0.5290949712544997, "grad_norm": 609.1519165039062, "learning_rate": 2.7279719320215924e-05, "loss": 59.0399, "step": 130960 }, { "epoch": 0.5291353725198673, "grad_norm": 379.8237609863281, "learning_rate": 2.7276243182895094e-05, "loss": 64.3225, "step": 130970 }, { "epoch": 0.529175773785235, "grad_norm": 535.1643676757812, "learning_rate": 2.7272767001197742e-05, "loss": 37.2136, "step": 130980 }, { "epoch": 0.5292161750506026, "grad_norm": 587.9727783203125, "learning_rate": 2.726929077519162e-05, "loss": 41.9734, "step": 130990 }, { "epoch": 0.5292565763159702, "grad_norm": 652.3465576171875, "learning_rate": 2.726581450494451e-05, "loss": 83.3366, "step": 131000 }, { "epoch": 0.5292969775813379, "grad_norm": 395.1925964355469, "learning_rate": 2.7262338190524173e-05, "loss": 70.538, "step": 131010 }, { "epoch": 0.5293373788467055, "grad_norm": 602.4820556640625, "learning_rate": 2.7258861831998388e-05, "loss": 55.8564, "step": 131020 }, { "epoch": 0.5293777801120731, "grad_norm": 2387.6884765625, "learning_rate": 2.7255385429434932e-05, "loss": 77.701, "step": 131030 }, { "epoch": 0.5294181813774408, "grad_norm": 626.4151000976562, "learning_rate": 2.725190898290158e-05, "loss": 71.4265, "step": 131040 }, { "epoch": 0.5294585826428083, "grad_norm": 1227.336669921875, "learning_rate": 2.7248432492466096e-05, "loss": 63.7168, "step": 131050 }, { "epoch": 0.529498983908176, "grad_norm": 506.81475830078125, "learning_rate": 2.7244955958196265e-05, "loss": 38.2177, "step": 131060 }, { "epoch": 0.5295393851735436, "grad_norm": 550.5950317382812, "learning_rate": 2.7241479380159868e-05, "loss": 69.3767, "step": 131070 }, { "epoch": 0.5295797864389112, "grad_norm": 751.3677368164062, "learning_rate": 2.723800275842468e-05, "loss": 45.0376, "step": 131080 }, { "epoch": 0.5296201877042789, "grad_norm": 459.6380310058594, "learning_rate": 2.7234526093058464e-05, "loss": 59.1021, "step": 131090 }, { "epoch": 0.5296605889696465, "grad_norm": 463.3095703125, "learning_rate": 2.7231049384129016e-05, "loss": 37.5911, "step": 131100 }, { "epoch": 0.5297009902350142, "grad_norm": 0.0, "learning_rate": 2.7227572631704107e-05, "loss": 52.0239, "step": 131110 }, { "epoch": 0.5297413915003818, "grad_norm": 1287.0667724609375, "learning_rate": 2.7224095835851525e-05, "loss": 85.9998, "step": 131120 }, { "epoch": 0.5297817927657494, "grad_norm": 710.130615234375, "learning_rate": 2.722061899663905e-05, "loss": 41.3392, "step": 131130 }, { "epoch": 0.5298221940311171, "grad_norm": 241.11277770996094, "learning_rate": 2.7217142114134463e-05, "loss": 75.5654, "step": 131140 }, { "epoch": 0.5298625952964847, "grad_norm": 2309.976806640625, "learning_rate": 2.7213665188405556e-05, "loss": 62.305, "step": 131150 }, { "epoch": 0.5299029965618524, "grad_norm": 592.4625244140625, "learning_rate": 2.721018821952011e-05, "loss": 49.6338, "step": 131160 }, { "epoch": 0.52994339782722, "grad_norm": 3443.605224609375, "learning_rate": 2.7206711207545893e-05, "loss": 65.5017, "step": 131170 }, { "epoch": 0.5299837990925875, "grad_norm": 924.45654296875, "learning_rate": 2.7203234152550712e-05, "loss": 52.5658, "step": 131180 }, { "epoch": 0.5300242003579552, "grad_norm": 385.39910888671875, "learning_rate": 2.719975705460234e-05, "loss": 35.2366, "step": 131190 }, { "epoch": 0.5300646016233228, "grad_norm": 709.4951171875, "learning_rate": 2.7196279913768584e-05, "loss": 56.7616, "step": 131200 }, { "epoch": 0.5301050028886904, "grad_norm": 849.27294921875, "learning_rate": 2.719280273011721e-05, "loss": 62.0372, "step": 131210 }, { "epoch": 0.5301454041540581, "grad_norm": 943.2574462890625, "learning_rate": 2.7189325503716022e-05, "loss": 81.1754, "step": 131220 }, { "epoch": 0.5301858054194257, "grad_norm": 839.4765014648438, "learning_rate": 2.7185848234632803e-05, "loss": 65.2041, "step": 131230 }, { "epoch": 0.5302262066847934, "grad_norm": 879.865966796875, "learning_rate": 2.7182370922935353e-05, "loss": 57.2173, "step": 131240 }, { "epoch": 0.530266607950161, "grad_norm": 980.9358520507812, "learning_rate": 2.717889356869146e-05, "loss": 47.6376, "step": 131250 }, { "epoch": 0.5303070092155286, "grad_norm": 680.2994384765625, "learning_rate": 2.717541617196891e-05, "loss": 35.0096, "step": 131260 }, { "epoch": 0.5303474104808963, "grad_norm": 870.2879638671875, "learning_rate": 2.71719387328355e-05, "loss": 54.6478, "step": 131270 }, { "epoch": 0.5303878117462639, "grad_norm": 456.5863952636719, "learning_rate": 2.716846125135903e-05, "loss": 66.5173, "step": 131280 }, { "epoch": 0.5304282130116316, "grad_norm": 1249.5938720703125, "learning_rate": 2.716498372760729e-05, "loss": 44.1854, "step": 131290 }, { "epoch": 0.5304686142769992, "grad_norm": 364.5703430175781, "learning_rate": 2.7161506161648076e-05, "loss": 58.9766, "step": 131300 }, { "epoch": 0.5305090155423667, "grad_norm": 890.297119140625, "learning_rate": 2.7158028553549187e-05, "loss": 56.6682, "step": 131310 }, { "epoch": 0.5305494168077344, "grad_norm": 925.9620361328125, "learning_rate": 2.715455090337842e-05, "loss": 43.9666, "step": 131320 }, { "epoch": 0.530589818073102, "grad_norm": 511.55609130859375, "learning_rate": 2.715107321120358e-05, "loss": 51.9629, "step": 131330 }, { "epoch": 0.5306302193384697, "grad_norm": 547.1328125, "learning_rate": 2.7147595477092457e-05, "loss": 43.9675, "step": 131340 }, { "epoch": 0.5306706206038373, "grad_norm": 784.9970703125, "learning_rate": 2.7144117701112846e-05, "loss": 65.4148, "step": 131350 }, { "epoch": 0.5307110218692049, "grad_norm": 653.8527221679688, "learning_rate": 2.7140639883332564e-05, "loss": 64.9822, "step": 131360 }, { "epoch": 0.5307514231345726, "grad_norm": 480.8045959472656, "learning_rate": 2.713716202381941e-05, "loss": 53.8297, "step": 131370 }, { "epoch": 0.5307918243999402, "grad_norm": 251.93240356445312, "learning_rate": 2.713368412264118e-05, "loss": 55.8464, "step": 131380 }, { "epoch": 0.5308322256653079, "grad_norm": 609.9501953125, "learning_rate": 2.713020617986567e-05, "loss": 38.5014, "step": 131390 }, { "epoch": 0.5308726269306755, "grad_norm": 921.5916748046875, "learning_rate": 2.7126728195560702e-05, "loss": 52.8066, "step": 131400 }, { "epoch": 0.5309130281960431, "grad_norm": 912.7449951171875, "learning_rate": 2.7123250169794075e-05, "loss": 54.2252, "step": 131410 }, { "epoch": 0.5309534294614108, "grad_norm": 718.9556274414062, "learning_rate": 2.711977210263359e-05, "loss": 57.7856, "step": 131420 }, { "epoch": 0.5309938307267784, "grad_norm": 457.2162170410156, "learning_rate": 2.7116293994147053e-05, "loss": 64.9568, "step": 131430 }, { "epoch": 0.5310342319921459, "grad_norm": 621.4869995117188, "learning_rate": 2.711281584440228e-05, "loss": 62.9255, "step": 131440 }, { "epoch": 0.5310746332575136, "grad_norm": 792.2979125976562, "learning_rate": 2.710933765346707e-05, "loss": 47.9132, "step": 131450 }, { "epoch": 0.5311150345228812, "grad_norm": 539.8186645507812, "learning_rate": 2.710585942140924e-05, "loss": 54.2183, "step": 131460 }, { "epoch": 0.5311554357882489, "grad_norm": 558.9265747070312, "learning_rate": 2.710238114829659e-05, "loss": 49.2721, "step": 131470 }, { "epoch": 0.5311958370536165, "grad_norm": 709.4071655273438, "learning_rate": 2.7098902834196943e-05, "loss": 58.9437, "step": 131480 }, { "epoch": 0.5312362383189841, "grad_norm": 654.2186889648438, "learning_rate": 2.7095424479178106e-05, "loss": 36.4761, "step": 131490 }, { "epoch": 0.5312766395843518, "grad_norm": 889.7582397460938, "learning_rate": 2.7091946083307896e-05, "loss": 68.6755, "step": 131500 }, { "epoch": 0.5313170408497194, "grad_norm": 1404.284912109375, "learning_rate": 2.708846764665411e-05, "loss": 67.0801, "step": 131510 }, { "epoch": 0.5313574421150871, "grad_norm": 774.0541381835938, "learning_rate": 2.7084989169284568e-05, "loss": 70.2885, "step": 131520 }, { "epoch": 0.5313978433804547, "grad_norm": 676.1631469726562, "learning_rate": 2.70815106512671e-05, "loss": 57.3858, "step": 131530 }, { "epoch": 0.5314382446458223, "grad_norm": 288.2684631347656, "learning_rate": 2.7078032092669502e-05, "loss": 57.0647, "step": 131540 }, { "epoch": 0.53147864591119, "grad_norm": 692.8833618164062, "learning_rate": 2.70745534935596e-05, "loss": 52.4784, "step": 131550 }, { "epoch": 0.5315190471765576, "grad_norm": 360.534912109375, "learning_rate": 2.707107485400521e-05, "loss": 76.2931, "step": 131560 }, { "epoch": 0.5315594484419252, "grad_norm": 902.7174682617188, "learning_rate": 2.7067596174074155e-05, "loss": 66.0437, "step": 131570 }, { "epoch": 0.5315998497072928, "grad_norm": 1684.62451171875, "learning_rate": 2.7064117453834243e-05, "loss": 48.1203, "step": 131580 }, { "epoch": 0.5316402509726604, "grad_norm": 74.62276458740234, "learning_rate": 2.70606386933533e-05, "loss": 69.9313, "step": 131590 }, { "epoch": 0.5316806522380281, "grad_norm": 347.76605224609375, "learning_rate": 2.705715989269914e-05, "loss": 67.9091, "step": 131600 }, { "epoch": 0.5317210535033957, "grad_norm": 2573.411865234375, "learning_rate": 2.70536810519396e-05, "loss": 58.865, "step": 131610 }, { "epoch": 0.5317614547687634, "grad_norm": 265.0232849121094, "learning_rate": 2.705020217114248e-05, "loss": 47.0783, "step": 131620 }, { "epoch": 0.531801856034131, "grad_norm": 801.228515625, "learning_rate": 2.7046723250375617e-05, "loss": 51.892, "step": 131630 }, { "epoch": 0.5318422572994986, "grad_norm": 833.2240600585938, "learning_rate": 2.7043244289706826e-05, "loss": 49.0188, "step": 131640 }, { "epoch": 0.5318826585648663, "grad_norm": 984.8556518554688, "learning_rate": 2.7039765289203946e-05, "loss": 55.2805, "step": 131650 }, { "epoch": 0.5319230598302339, "grad_norm": 779.8922119140625, "learning_rate": 2.703628624893478e-05, "loss": 40.7662, "step": 131660 }, { "epoch": 0.5319634610956016, "grad_norm": 687.851318359375, "learning_rate": 2.703280716896717e-05, "loss": 71.506, "step": 131670 }, { "epoch": 0.5320038623609692, "grad_norm": 561.60888671875, "learning_rate": 2.702932804936894e-05, "loss": 76.0556, "step": 131680 }, { "epoch": 0.5320442636263367, "grad_norm": 597.3526611328125, "learning_rate": 2.7025848890207917e-05, "loss": 51.0563, "step": 131690 }, { "epoch": 0.5320846648917044, "grad_norm": 1162.3173828125, "learning_rate": 2.7022369691551917e-05, "loss": 67.7418, "step": 131700 }, { "epoch": 0.532125066157072, "grad_norm": 1007.927734375, "learning_rate": 2.7018890453468788e-05, "loss": 55.7545, "step": 131710 }, { "epoch": 0.5321654674224396, "grad_norm": 583.6637573242188, "learning_rate": 2.7015411176026344e-05, "loss": 59.1643, "step": 131720 }, { "epoch": 0.5322058686878073, "grad_norm": 503.95025634765625, "learning_rate": 2.7011931859292427e-05, "loss": 57.7244, "step": 131730 }, { "epoch": 0.5322462699531749, "grad_norm": 545.329345703125, "learning_rate": 2.7008452503334858e-05, "loss": 46.0308, "step": 131740 }, { "epoch": 0.5322866712185426, "grad_norm": 380.2509765625, "learning_rate": 2.7004973108221472e-05, "loss": 40.5332, "step": 131750 }, { "epoch": 0.5323270724839102, "grad_norm": 740.1156005859375, "learning_rate": 2.700149367402011e-05, "loss": 60.6691, "step": 131760 }, { "epoch": 0.5323674737492778, "grad_norm": 1089.05712890625, "learning_rate": 2.69980142007986e-05, "loss": 65.5629, "step": 131770 }, { "epoch": 0.5324078750146455, "grad_norm": 537.0314331054688, "learning_rate": 2.699453468862477e-05, "loss": 55.9601, "step": 131780 }, { "epoch": 0.5324482762800131, "grad_norm": 266.5636291503906, "learning_rate": 2.699105513756645e-05, "loss": 59.2656, "step": 131790 }, { "epoch": 0.5324886775453808, "grad_norm": 889.6873779296875, "learning_rate": 2.6987575547691497e-05, "loss": 76.0885, "step": 131800 }, { "epoch": 0.5325290788107484, "grad_norm": 1049.7486572265625, "learning_rate": 2.698409591906773e-05, "loss": 50.9139, "step": 131810 }, { "epoch": 0.5325694800761159, "grad_norm": 820.5618286132812, "learning_rate": 2.6980616251762997e-05, "loss": 36.7931, "step": 131820 }, { "epoch": 0.5326098813414836, "grad_norm": 643.490966796875, "learning_rate": 2.6977136545845122e-05, "loss": 56.8342, "step": 131830 }, { "epoch": 0.5326502826068512, "grad_norm": 853.44970703125, "learning_rate": 2.6973656801381963e-05, "loss": 60.0717, "step": 131840 }, { "epoch": 0.5326906838722189, "grad_norm": 440.1711730957031, "learning_rate": 2.697017701844134e-05, "loss": 45.1691, "step": 131850 }, { "epoch": 0.5327310851375865, "grad_norm": 468.0743408203125, "learning_rate": 2.6966697197091108e-05, "loss": 54.0596, "step": 131860 }, { "epoch": 0.5327714864029541, "grad_norm": 1050.264892578125, "learning_rate": 2.69632173373991e-05, "loss": 55.2322, "step": 131870 }, { "epoch": 0.5328118876683218, "grad_norm": 720.6689453125, "learning_rate": 2.695973743943315e-05, "loss": 55.2848, "step": 131880 }, { "epoch": 0.5328522889336894, "grad_norm": 1108.028564453125, "learning_rate": 2.6956257503261116e-05, "loss": 40.9585, "step": 131890 }, { "epoch": 0.532892690199057, "grad_norm": 1092.5072021484375, "learning_rate": 2.695277752895084e-05, "loss": 44.4573, "step": 131900 }, { "epoch": 0.5329330914644247, "grad_norm": 625.7965087890625, "learning_rate": 2.6949297516570156e-05, "loss": 28.5109, "step": 131910 }, { "epoch": 0.5329734927297923, "grad_norm": 681.8804931640625, "learning_rate": 2.6945817466186912e-05, "loss": 43.6512, "step": 131920 }, { "epoch": 0.53301389399516, "grad_norm": 756.3519897460938, "learning_rate": 2.694233737786896e-05, "loss": 76.8343, "step": 131930 }, { "epoch": 0.5330542952605276, "grad_norm": 1092.2066650390625, "learning_rate": 2.693885725168414e-05, "loss": 46.8568, "step": 131940 }, { "epoch": 0.5330946965258951, "grad_norm": 988.5387573242188, "learning_rate": 2.6935377087700297e-05, "loss": 73.0625, "step": 131950 }, { "epoch": 0.5331350977912628, "grad_norm": 940.03076171875, "learning_rate": 2.693189688598528e-05, "loss": 63.5997, "step": 131960 }, { "epoch": 0.5331754990566304, "grad_norm": 372.99151611328125, "learning_rate": 2.6928416646606936e-05, "loss": 57.4221, "step": 131970 }, { "epoch": 0.5332159003219981, "grad_norm": 1288.4371337890625, "learning_rate": 2.6924936369633125e-05, "loss": 48.3105, "step": 131980 }, { "epoch": 0.5332563015873657, "grad_norm": 2535.275634765625, "learning_rate": 2.6921456055131683e-05, "loss": 67.3763, "step": 131990 }, { "epoch": 0.5332967028527333, "grad_norm": 3868.3837890625, "learning_rate": 2.6917975703170466e-05, "loss": 65.5701, "step": 132000 }, { "epoch": 0.533337104118101, "grad_norm": 810.7564697265625, "learning_rate": 2.691449531381733e-05, "loss": 64.9525, "step": 132010 }, { "epoch": 0.5333775053834686, "grad_norm": 1662.32373046875, "learning_rate": 2.6911014887140122e-05, "loss": 57.4445, "step": 132020 }, { "epoch": 0.5334179066488363, "grad_norm": 528.4577026367188, "learning_rate": 2.6907534423206692e-05, "loss": 49.0796, "step": 132030 }, { "epoch": 0.5334583079142039, "grad_norm": 798.1748657226562, "learning_rate": 2.6904053922084895e-05, "loss": 51.6413, "step": 132040 }, { "epoch": 0.5334987091795715, "grad_norm": 589.7070922851562, "learning_rate": 2.6900573383842583e-05, "loss": 33.4644, "step": 132050 }, { "epoch": 0.5335391104449392, "grad_norm": 757.95458984375, "learning_rate": 2.689709280854762e-05, "loss": 52.9057, "step": 132060 }, { "epoch": 0.5335795117103068, "grad_norm": 567.9570922851562, "learning_rate": 2.6893612196267853e-05, "loss": 46.4571, "step": 132070 }, { "epoch": 0.5336199129756743, "grad_norm": 732.6300048828125, "learning_rate": 2.6890131547071147e-05, "loss": 66.3549, "step": 132080 }, { "epoch": 0.533660314241042, "grad_norm": 1564.1744384765625, "learning_rate": 2.6886650861025343e-05, "loss": 82.0941, "step": 132090 }, { "epoch": 0.5337007155064096, "grad_norm": 6092.22314453125, "learning_rate": 2.6883170138198323e-05, "loss": 73.8271, "step": 132100 }, { "epoch": 0.5337411167717773, "grad_norm": 3677.495361328125, "learning_rate": 2.6879689378657923e-05, "loss": 83.4729, "step": 132110 }, { "epoch": 0.5337815180371449, "grad_norm": 830.318603515625, "learning_rate": 2.6876208582472012e-05, "loss": 43.1947, "step": 132120 }, { "epoch": 0.5338219193025125, "grad_norm": 861.20849609375, "learning_rate": 2.687272774970845e-05, "loss": 79.1685, "step": 132130 }, { "epoch": 0.5338623205678802, "grad_norm": 541.7510375976562, "learning_rate": 2.6869246880435095e-05, "loss": 50.6851, "step": 132140 }, { "epoch": 0.5339027218332478, "grad_norm": 2233.982177734375, "learning_rate": 2.686576597471981e-05, "loss": 64.9713, "step": 132150 }, { "epoch": 0.5339431230986155, "grad_norm": 801.9125366210938, "learning_rate": 2.686228503263045e-05, "loss": 67.6367, "step": 132160 }, { "epoch": 0.5339835243639831, "grad_norm": 577.6140747070312, "learning_rate": 2.685880405423489e-05, "loss": 56.8992, "step": 132170 }, { "epoch": 0.5340239256293507, "grad_norm": 303.9769592285156, "learning_rate": 2.6855323039601e-05, "loss": 55.4841, "step": 132180 }, { "epoch": 0.5340643268947184, "grad_norm": 0.0, "learning_rate": 2.685184198879662e-05, "loss": 79.864, "step": 132190 }, { "epoch": 0.534104728160086, "grad_norm": 1271.53759765625, "learning_rate": 2.684836090188963e-05, "loss": 53.1909, "step": 132200 }, { "epoch": 0.5341451294254536, "grad_norm": 390.48504638671875, "learning_rate": 2.6844879778947884e-05, "loss": 45.1817, "step": 132210 }, { "epoch": 0.5341855306908212, "grad_norm": 1045.8829345703125, "learning_rate": 2.6841398620039273e-05, "loss": 57.4035, "step": 132220 }, { "epoch": 0.5342259319561888, "grad_norm": 211.4333038330078, "learning_rate": 2.6837917425231633e-05, "loss": 93.9413, "step": 132230 }, { "epoch": 0.5342663332215565, "grad_norm": 362.2946472167969, "learning_rate": 2.6834436194592853e-05, "loss": 33.1701, "step": 132240 }, { "epoch": 0.5343067344869241, "grad_norm": 875.4388427734375, "learning_rate": 2.6830954928190794e-05, "loss": 76.5484, "step": 132250 }, { "epoch": 0.5343471357522918, "grad_norm": 546.877197265625, "learning_rate": 2.682747362609333e-05, "loss": 38.9788, "step": 132260 }, { "epoch": 0.5343875370176594, "grad_norm": 674.6320190429688, "learning_rate": 2.6823992288368322e-05, "loss": 70.6211, "step": 132270 }, { "epoch": 0.534427938283027, "grad_norm": 868.1038818359375, "learning_rate": 2.6820510915083648e-05, "loss": 49.9378, "step": 132280 }, { "epoch": 0.5344683395483947, "grad_norm": 355.66741943359375, "learning_rate": 2.681702950630717e-05, "loss": 34.1495, "step": 132290 }, { "epoch": 0.5345087408137623, "grad_norm": 460.1276550292969, "learning_rate": 2.6813548062106775e-05, "loss": 64.0979, "step": 132300 }, { "epoch": 0.53454914207913, "grad_norm": 0.0, "learning_rate": 2.6810066582550324e-05, "loss": 59.0976, "step": 132310 }, { "epoch": 0.5345895433444976, "grad_norm": 310.49627685546875, "learning_rate": 2.6806585067705692e-05, "loss": 64.1751, "step": 132320 }, { "epoch": 0.5346299446098651, "grad_norm": 862.5602416992188, "learning_rate": 2.680310351764075e-05, "loss": 63.3662, "step": 132330 }, { "epoch": 0.5346703458752328, "grad_norm": 802.3466186523438, "learning_rate": 2.679962193242338e-05, "loss": 71.8971, "step": 132340 }, { "epoch": 0.5347107471406004, "grad_norm": 1650.000244140625, "learning_rate": 2.6796140312121458e-05, "loss": 59.4168, "step": 132350 }, { "epoch": 0.534751148405968, "grad_norm": 832.1773071289062, "learning_rate": 2.6792658656802856e-05, "loss": 45.569, "step": 132360 }, { "epoch": 0.5347915496713357, "grad_norm": 917.97412109375, "learning_rate": 2.6789176966535444e-05, "loss": 45.4459, "step": 132370 }, { "epoch": 0.5348319509367033, "grad_norm": 583.6998901367188, "learning_rate": 2.678569524138711e-05, "loss": 66.7321, "step": 132380 }, { "epoch": 0.534872352202071, "grad_norm": 418.53680419921875, "learning_rate": 2.678221348142573e-05, "loss": 34.0593, "step": 132390 }, { "epoch": 0.5349127534674386, "grad_norm": 402.7685241699219, "learning_rate": 2.6778731686719178e-05, "loss": 55.3928, "step": 132400 }, { "epoch": 0.5349531547328062, "grad_norm": 372.7942199707031, "learning_rate": 2.6775249857335333e-05, "loss": 30.0867, "step": 132410 }, { "epoch": 0.5349935559981739, "grad_norm": 1119.2479248046875, "learning_rate": 2.677176799334208e-05, "loss": 58.895, "step": 132420 }, { "epoch": 0.5350339572635415, "grad_norm": 407.2303771972656, "learning_rate": 2.6768286094807298e-05, "loss": 64.0234, "step": 132430 }, { "epoch": 0.5350743585289092, "grad_norm": 855.4847412109375, "learning_rate": 2.6764804161798867e-05, "loss": 89.9496, "step": 132440 }, { "epoch": 0.5351147597942768, "grad_norm": 1015.7791748046875, "learning_rate": 2.6761322194384674e-05, "loss": 60.1903, "step": 132450 }, { "epoch": 0.5351551610596443, "grad_norm": 1254.9368896484375, "learning_rate": 2.6757840192632598e-05, "loss": 61.4563, "step": 132460 }, { "epoch": 0.535195562325012, "grad_norm": 299.9359436035156, "learning_rate": 2.6754358156610525e-05, "loss": 38.3524, "step": 132470 }, { "epoch": 0.5352359635903796, "grad_norm": 811.1012573242188, "learning_rate": 2.6750876086386328e-05, "loss": 47.9699, "step": 132480 }, { "epoch": 0.5352763648557473, "grad_norm": 893.4951171875, "learning_rate": 2.6747393982027903e-05, "loss": 48.4848, "step": 132490 }, { "epoch": 0.5353167661211149, "grad_norm": 727.3240966796875, "learning_rate": 2.674391184360313e-05, "loss": 34.6639, "step": 132500 }, { "epoch": 0.5353571673864825, "grad_norm": 2415.583251953125, "learning_rate": 2.6740429671179907e-05, "loss": 73.8259, "step": 132510 }, { "epoch": 0.5353975686518502, "grad_norm": 443.5478515625, "learning_rate": 2.6736947464826108e-05, "loss": 48.6447, "step": 132520 }, { "epoch": 0.5354379699172178, "grad_norm": 1031.6845703125, "learning_rate": 2.6733465224609622e-05, "loss": 54.8037, "step": 132530 }, { "epoch": 0.5354783711825855, "grad_norm": 564.192138671875, "learning_rate": 2.6729982950598338e-05, "loss": 51.8397, "step": 132540 }, { "epoch": 0.5355187724479531, "grad_norm": 472.5278625488281, "learning_rate": 2.6726500642860154e-05, "loss": 49.6563, "step": 132550 }, { "epoch": 0.5355591737133207, "grad_norm": 551.7787475585938, "learning_rate": 2.6723018301462937e-05, "loss": 43.9799, "step": 132560 }, { "epoch": 0.5355995749786884, "grad_norm": 1193.7701416015625, "learning_rate": 2.67195359264746e-05, "loss": 64.9433, "step": 132570 }, { "epoch": 0.535639976244056, "grad_norm": 799.9347534179688, "learning_rate": 2.671605351796302e-05, "loss": 41.734, "step": 132580 }, { "epoch": 0.5356803775094235, "grad_norm": 759.9945068359375, "learning_rate": 2.67125710759961e-05, "loss": 76.4194, "step": 132590 }, { "epoch": 0.5357207787747912, "grad_norm": 647.0215454101562, "learning_rate": 2.6709088600641717e-05, "loss": 43.6254, "step": 132600 }, { "epoch": 0.5357611800401588, "grad_norm": 720.9840698242188, "learning_rate": 2.6705606091967778e-05, "loss": 49.2521, "step": 132610 }, { "epoch": 0.5358015813055265, "grad_norm": 1699.47216796875, "learning_rate": 2.670212355004217e-05, "loss": 48.1515, "step": 132620 }, { "epoch": 0.5358419825708941, "grad_norm": 912.8600463867188, "learning_rate": 2.6698640974932793e-05, "loss": 60.5469, "step": 132630 }, { "epoch": 0.5358823838362617, "grad_norm": 562.8499755859375, "learning_rate": 2.6695158366707522e-05, "loss": 53.9552, "step": 132640 }, { "epoch": 0.5359227851016294, "grad_norm": 778.0060424804688, "learning_rate": 2.6691675725434272e-05, "loss": 52.4513, "step": 132650 }, { "epoch": 0.535963186366997, "grad_norm": 773.4092407226562, "learning_rate": 2.6688193051180933e-05, "loss": 39.1972, "step": 132660 }, { "epoch": 0.5360035876323647, "grad_norm": 608.3802490234375, "learning_rate": 2.66847103440154e-05, "loss": 37.7895, "step": 132670 }, { "epoch": 0.5360439888977323, "grad_norm": 775.6356811523438, "learning_rate": 2.6681227604005576e-05, "loss": 41.3886, "step": 132680 }, { "epoch": 0.5360843901630999, "grad_norm": 1359.913818359375, "learning_rate": 2.6677744831219348e-05, "loss": 69.0232, "step": 132690 }, { "epoch": 0.5361247914284676, "grad_norm": 540.3826293945312, "learning_rate": 2.6674262025724627e-05, "loss": 32.8396, "step": 132700 }, { "epoch": 0.5361651926938352, "grad_norm": 862.3062744140625, "learning_rate": 2.66707791875893e-05, "loss": 64.9376, "step": 132710 }, { "epoch": 0.5362055939592028, "grad_norm": 557.792236328125, "learning_rate": 2.666729631688128e-05, "loss": 53.6906, "step": 132720 }, { "epoch": 0.5362459952245704, "grad_norm": 711.6216430664062, "learning_rate": 2.6663813413668455e-05, "loss": 55.813, "step": 132730 }, { "epoch": 0.536286396489938, "grad_norm": 795.2600708007812, "learning_rate": 2.6660330478018726e-05, "loss": 57.3366, "step": 132740 }, { "epoch": 0.5363267977553057, "grad_norm": 375.18975830078125, "learning_rate": 2.6656847510000012e-05, "loss": 64.0476, "step": 132750 }, { "epoch": 0.5363671990206733, "grad_norm": 765.218994140625, "learning_rate": 2.6653364509680188e-05, "loss": 52.8319, "step": 132760 }, { "epoch": 0.536407600286041, "grad_norm": 609.1416015625, "learning_rate": 2.664988147712718e-05, "loss": 34.5201, "step": 132770 }, { "epoch": 0.5364480015514086, "grad_norm": 629.1380615234375, "learning_rate": 2.664639841240888e-05, "loss": 30.9847, "step": 132780 }, { "epoch": 0.5364884028167762, "grad_norm": 0.0, "learning_rate": 2.6642915315593204e-05, "loss": 41.8538, "step": 132790 }, { "epoch": 0.5365288040821439, "grad_norm": 751.7651977539062, "learning_rate": 2.6639432186748043e-05, "loss": 53.989, "step": 132800 }, { "epoch": 0.5365692053475115, "grad_norm": 829.0548095703125, "learning_rate": 2.6635949025941303e-05, "loss": 55.0998, "step": 132810 }, { "epoch": 0.5366096066128792, "grad_norm": 1614.1934814453125, "learning_rate": 2.6632465833240893e-05, "loss": 67.0182, "step": 132820 }, { "epoch": 0.5366500078782468, "grad_norm": 1317.6927490234375, "learning_rate": 2.662898260871473e-05, "loss": 61.6694, "step": 132830 }, { "epoch": 0.5366904091436144, "grad_norm": 427.5072937011719, "learning_rate": 2.662549935243071e-05, "loss": 50.9423, "step": 132840 }, { "epoch": 0.536730810408982, "grad_norm": 1174.3800048828125, "learning_rate": 2.6622016064456738e-05, "loss": 72.6584, "step": 132850 }, { "epoch": 0.5367712116743496, "grad_norm": 379.9467468261719, "learning_rate": 2.661853274486073e-05, "loss": 64.3494, "step": 132860 }, { "epoch": 0.5368116129397172, "grad_norm": 1121.682861328125, "learning_rate": 2.6615049393710596e-05, "loss": 87.2399, "step": 132870 }, { "epoch": 0.5368520142050849, "grad_norm": 1015.9825439453125, "learning_rate": 2.661156601107424e-05, "loss": 76.0801, "step": 132880 }, { "epoch": 0.5368924154704525, "grad_norm": 364.2757568359375, "learning_rate": 2.660808259701958e-05, "loss": 44.1075, "step": 132890 }, { "epoch": 0.5369328167358202, "grad_norm": 350.0528259277344, "learning_rate": 2.6604599151614513e-05, "loss": 41.6351, "step": 132900 }, { "epoch": 0.5369732180011878, "grad_norm": 742.4920654296875, "learning_rate": 2.660111567492696e-05, "loss": 47.2528, "step": 132910 }, { "epoch": 0.5370136192665554, "grad_norm": 588.64599609375, "learning_rate": 2.6597632167024843e-05, "loss": 49.0769, "step": 132920 }, { "epoch": 0.5370540205319231, "grad_norm": 519.329345703125, "learning_rate": 2.6594148627976056e-05, "loss": 27.8517, "step": 132930 }, { "epoch": 0.5370944217972907, "grad_norm": 620.1698608398438, "learning_rate": 2.659066505784852e-05, "loss": 30.6305, "step": 132940 }, { "epoch": 0.5371348230626584, "grad_norm": 615.6215209960938, "learning_rate": 2.6587181456710153e-05, "loss": 70.2314, "step": 132950 }, { "epoch": 0.537175224328026, "grad_norm": 766.043212890625, "learning_rate": 2.6583697824628868e-05, "loss": 59.5871, "step": 132960 }, { "epoch": 0.5372156255933935, "grad_norm": 719.8123168945312, "learning_rate": 2.6580214161672577e-05, "loss": 92.2357, "step": 132970 }, { "epoch": 0.5372560268587612, "grad_norm": 0.0, "learning_rate": 2.65767304679092e-05, "loss": 56.5093, "step": 132980 }, { "epoch": 0.5372964281241288, "grad_norm": 615.7906494140625, "learning_rate": 2.6573246743406643e-05, "loss": 82.8496, "step": 132990 }, { "epoch": 0.5373368293894965, "grad_norm": 377.28985595703125, "learning_rate": 2.656976298823284e-05, "loss": 61.0799, "step": 133000 }, { "epoch": 0.5373772306548641, "grad_norm": 839.7172241210938, "learning_rate": 2.656627920245569e-05, "loss": 57.0245, "step": 133010 }, { "epoch": 0.5374176319202317, "grad_norm": 296.6985168457031, "learning_rate": 2.6562795386143126e-05, "loss": 83.4302, "step": 133020 }, { "epoch": 0.5374580331855994, "grad_norm": 615.6727294921875, "learning_rate": 2.6559311539363057e-05, "loss": 83.2225, "step": 133030 }, { "epoch": 0.537498434450967, "grad_norm": 518.0780029296875, "learning_rate": 2.6555827662183414e-05, "loss": 61.7653, "step": 133040 }, { "epoch": 0.5375388357163347, "grad_norm": 585.662841796875, "learning_rate": 2.6552343754672103e-05, "loss": 30.4691, "step": 133050 }, { "epoch": 0.5375792369817023, "grad_norm": 763.933349609375, "learning_rate": 2.654885981689706e-05, "loss": 47.9403, "step": 133060 }, { "epoch": 0.5376196382470699, "grad_norm": 4562.193359375, "learning_rate": 2.654537584892619e-05, "loss": 94.3141, "step": 133070 }, { "epoch": 0.5376600395124376, "grad_norm": 2078.553466796875, "learning_rate": 2.6541891850827427e-05, "loss": 59.5855, "step": 133080 }, { "epoch": 0.5377004407778052, "grad_norm": 882.0210571289062, "learning_rate": 2.653840782266868e-05, "loss": 48.3455, "step": 133090 }, { "epoch": 0.5377408420431727, "grad_norm": 643.2105712890625, "learning_rate": 2.653492376451789e-05, "loss": 50.6237, "step": 133100 }, { "epoch": 0.5377812433085404, "grad_norm": 468.3760986328125, "learning_rate": 2.6531439676442966e-05, "loss": 50.7116, "step": 133110 }, { "epoch": 0.537821644573908, "grad_norm": 763.0037841796875, "learning_rate": 2.6527955558511842e-05, "loss": 78.7773, "step": 133120 }, { "epoch": 0.5378620458392757, "grad_norm": 289.5901184082031, "learning_rate": 2.652447141079243e-05, "loss": 46.9934, "step": 133130 }, { "epoch": 0.5379024471046433, "grad_norm": 776.5145874023438, "learning_rate": 2.6520987233352668e-05, "loss": 65.4828, "step": 133140 }, { "epoch": 0.5379428483700109, "grad_norm": 2137.632568359375, "learning_rate": 2.6517503026260477e-05, "loss": 51.402, "step": 133150 }, { "epoch": 0.5379832496353786, "grad_norm": 1670.868896484375, "learning_rate": 2.6514018789583784e-05, "loss": 58.1956, "step": 133160 }, { "epoch": 0.5380236509007462, "grad_norm": 451.3445739746094, "learning_rate": 2.651053452339051e-05, "loss": 47.8556, "step": 133170 }, { "epoch": 0.5380640521661139, "grad_norm": 993.9606323242188, "learning_rate": 2.650705022774859e-05, "loss": 53.2049, "step": 133180 }, { "epoch": 0.5381044534314815, "grad_norm": 836.609130859375, "learning_rate": 2.6503565902725945e-05, "loss": 43.5089, "step": 133190 }, { "epoch": 0.5381448546968491, "grad_norm": 999.381103515625, "learning_rate": 2.650008154839052e-05, "loss": 61.5361, "step": 133200 }, { "epoch": 0.5381852559622168, "grad_norm": 806.6781616210938, "learning_rate": 2.6496597164810228e-05, "loss": 82.2622, "step": 133210 }, { "epoch": 0.5382256572275844, "grad_norm": 576.73974609375, "learning_rate": 2.6493112752053e-05, "loss": 32.6687, "step": 133220 }, { "epoch": 0.538266058492952, "grad_norm": 362.6260986328125, "learning_rate": 2.6489628310186777e-05, "loss": 27.9188, "step": 133230 }, { "epoch": 0.5383064597583196, "grad_norm": 422.7489013671875, "learning_rate": 2.6486143839279487e-05, "loss": 54.0248, "step": 133240 }, { "epoch": 0.5383468610236872, "grad_norm": 578.5451049804688, "learning_rate": 2.6482659339399045e-05, "loss": 60.191, "step": 133250 }, { "epoch": 0.5383872622890549, "grad_norm": 2537.281005859375, "learning_rate": 2.64791748106134e-05, "loss": 58.6423, "step": 133260 }, { "epoch": 0.5384276635544225, "grad_norm": 537.8567504882812, "learning_rate": 2.647569025299048e-05, "loss": 39.9818, "step": 133270 }, { "epoch": 0.5384680648197901, "grad_norm": 415.552001953125, "learning_rate": 2.647220566659822e-05, "loss": 41.9125, "step": 133280 }, { "epoch": 0.5385084660851578, "grad_norm": 603.8895874023438, "learning_rate": 2.6468721051504554e-05, "loss": 74.081, "step": 133290 }, { "epoch": 0.5385488673505254, "grad_norm": 918.3076171875, "learning_rate": 2.646523640777741e-05, "loss": 67.8838, "step": 133300 }, { "epoch": 0.5385892686158931, "grad_norm": 644.3486938476562, "learning_rate": 2.646175173548474e-05, "loss": 39.718, "step": 133310 }, { "epoch": 0.5386296698812607, "grad_norm": 1127.198486328125, "learning_rate": 2.6458267034694463e-05, "loss": 50.1396, "step": 133320 }, { "epoch": 0.5386700711466283, "grad_norm": 1032.7508544921875, "learning_rate": 2.645478230547451e-05, "loss": 60.5964, "step": 133330 }, { "epoch": 0.538710472411996, "grad_norm": 2308.70654296875, "learning_rate": 2.6451297547892834e-05, "loss": 86.4414, "step": 133340 }, { "epoch": 0.5387508736773636, "grad_norm": 250.09999084472656, "learning_rate": 2.644781276201736e-05, "loss": 38.5809, "step": 133350 }, { "epoch": 0.5387912749427312, "grad_norm": 892.7601318359375, "learning_rate": 2.6444327947916036e-05, "loss": 54.9328, "step": 133360 }, { "epoch": 0.5388316762080988, "grad_norm": 742.9862670898438, "learning_rate": 2.6440843105656793e-05, "loss": 53.7666, "step": 133370 }, { "epoch": 0.5388720774734664, "grad_norm": 580.2630004882812, "learning_rate": 2.6437358235307576e-05, "loss": 42.9191, "step": 133380 }, { "epoch": 0.5389124787388341, "grad_norm": 657.9290161132812, "learning_rate": 2.643387333693631e-05, "loss": 48.353, "step": 133390 }, { "epoch": 0.5389528800042017, "grad_norm": 278.8210144042969, "learning_rate": 2.6430388410610955e-05, "loss": 28.9319, "step": 133400 }, { "epoch": 0.5389932812695694, "grad_norm": 4367.41015625, "learning_rate": 2.6426903456399442e-05, "loss": 80.4687, "step": 133410 }, { "epoch": 0.539033682534937, "grad_norm": 192.4734649658203, "learning_rate": 2.6423418474369704e-05, "loss": 34.9511, "step": 133420 }, { "epoch": 0.5390740838003046, "grad_norm": 2244.14892578125, "learning_rate": 2.6419933464589695e-05, "loss": 88.4328, "step": 133430 }, { "epoch": 0.5391144850656723, "grad_norm": 520.8280639648438, "learning_rate": 2.641644842712735e-05, "loss": 60.3455, "step": 133440 }, { "epoch": 0.5391548863310399, "grad_norm": 356.8963623046875, "learning_rate": 2.6412963362050618e-05, "loss": 42.0201, "step": 133450 }, { "epoch": 0.5391952875964076, "grad_norm": 578.19873046875, "learning_rate": 2.640947826942743e-05, "loss": 80.3492, "step": 133460 }, { "epoch": 0.5392356888617752, "grad_norm": 855.69677734375, "learning_rate": 2.640599314932574e-05, "loss": 64.0816, "step": 133470 }, { "epoch": 0.5392760901271428, "grad_norm": 1241.7418212890625, "learning_rate": 2.6402508001813496e-05, "loss": 59.2113, "step": 133480 }, { "epoch": 0.5393164913925104, "grad_norm": 245.85995483398438, "learning_rate": 2.6399022826958635e-05, "loss": 40.4469, "step": 133490 }, { "epoch": 0.539356892657878, "grad_norm": 1086.880615234375, "learning_rate": 2.6395537624829096e-05, "loss": 47.158, "step": 133500 }, { "epoch": 0.5393972939232456, "grad_norm": 1283.5257568359375, "learning_rate": 2.639205239549284e-05, "loss": 75.8677, "step": 133510 }, { "epoch": 0.5394376951886133, "grad_norm": 973.451904296875, "learning_rate": 2.63885671390178e-05, "loss": 60.6037, "step": 133520 }, { "epoch": 0.5394780964539809, "grad_norm": 743.983642578125, "learning_rate": 2.6385081855471937e-05, "loss": 46.7652, "step": 133530 }, { "epoch": 0.5395184977193486, "grad_norm": 338.3520812988281, "learning_rate": 2.638159654492318e-05, "loss": 84.5076, "step": 133540 }, { "epoch": 0.5395588989847162, "grad_norm": 460.87103271484375, "learning_rate": 2.6378111207439494e-05, "loss": 51.0171, "step": 133550 }, { "epoch": 0.5395993002500838, "grad_norm": 0.0, "learning_rate": 2.637462584308881e-05, "loss": 49.7129, "step": 133560 }, { "epoch": 0.5396397015154515, "grad_norm": 1022.263427734375, "learning_rate": 2.6371140451939103e-05, "loss": 69.8698, "step": 133570 }, { "epoch": 0.5396801027808191, "grad_norm": 1827.470947265625, "learning_rate": 2.6367655034058302e-05, "loss": 79.5532, "step": 133580 }, { "epoch": 0.5397205040461868, "grad_norm": 608.9415893554688, "learning_rate": 2.6364169589514358e-05, "loss": 61.6891, "step": 133590 }, { "epoch": 0.5397609053115544, "grad_norm": 1445.496826171875, "learning_rate": 2.636068411837523e-05, "loss": 67.8332, "step": 133600 }, { "epoch": 0.5398013065769219, "grad_norm": 633.453369140625, "learning_rate": 2.6357198620708868e-05, "loss": 41.0135, "step": 133610 }, { "epoch": 0.5398417078422896, "grad_norm": 1123.755615234375, "learning_rate": 2.635371309658321e-05, "loss": 65.9337, "step": 133620 }, { "epoch": 0.5398821091076572, "grad_norm": 412.05029296875, "learning_rate": 2.6350227546066218e-05, "loss": 57.8581, "step": 133630 }, { "epoch": 0.5399225103730249, "grad_norm": 794.2384643554688, "learning_rate": 2.634674196922585e-05, "loss": 47.7789, "step": 133640 }, { "epoch": 0.5399629116383925, "grad_norm": 839.2406616210938, "learning_rate": 2.6343256366130054e-05, "loss": 49.6203, "step": 133650 }, { "epoch": 0.5400033129037601, "grad_norm": 474.2958068847656, "learning_rate": 2.633977073684679e-05, "loss": 73.9534, "step": 133660 }, { "epoch": 0.5400437141691278, "grad_norm": 611.3136596679688, "learning_rate": 2.6336285081443996e-05, "loss": 90.5097, "step": 133670 }, { "epoch": 0.5400841154344954, "grad_norm": 1800.31884765625, "learning_rate": 2.633279939998964e-05, "loss": 66.3581, "step": 133680 }, { "epoch": 0.540124516699863, "grad_norm": 437.4395751953125, "learning_rate": 2.6329313692551672e-05, "loss": 54.8113, "step": 133690 }, { "epoch": 0.5401649179652307, "grad_norm": 891.3457641601562, "learning_rate": 2.6325827959198045e-05, "loss": 59.0138, "step": 133700 }, { "epoch": 0.5402053192305983, "grad_norm": 971.3360595703125, "learning_rate": 2.6322342199996726e-05, "loss": 42.0446, "step": 133710 }, { "epoch": 0.540245720495966, "grad_norm": 1470.095703125, "learning_rate": 2.6318856415015664e-05, "loss": 67.5125, "step": 133720 }, { "epoch": 0.5402861217613336, "grad_norm": 553.43408203125, "learning_rate": 2.631537060432282e-05, "loss": 45.4572, "step": 133730 }, { "epoch": 0.5403265230267011, "grad_norm": 1151.4344482421875, "learning_rate": 2.631188476798614e-05, "loss": 41.0355, "step": 133740 }, { "epoch": 0.5403669242920688, "grad_norm": 1145.021728515625, "learning_rate": 2.63083989060736e-05, "loss": 48.7941, "step": 133750 }, { "epoch": 0.5404073255574364, "grad_norm": 640.4161987304688, "learning_rate": 2.6304913018653144e-05, "loss": 58.756, "step": 133760 }, { "epoch": 0.5404477268228041, "grad_norm": 887.5857543945312, "learning_rate": 2.630142710579274e-05, "loss": 54.0097, "step": 133770 }, { "epoch": 0.5404881280881717, "grad_norm": 817.3486328125, "learning_rate": 2.6297941167560346e-05, "loss": 45.1971, "step": 133780 }, { "epoch": 0.5405285293535393, "grad_norm": 959.50732421875, "learning_rate": 2.6294455204023915e-05, "loss": 43.2953, "step": 133790 }, { "epoch": 0.540568930618907, "grad_norm": 315.968017578125, "learning_rate": 2.6290969215251416e-05, "loss": 76.2952, "step": 133800 }, { "epoch": 0.5406093318842746, "grad_norm": 1908.3179931640625, "learning_rate": 2.628748320131081e-05, "loss": 71.9718, "step": 133810 }, { "epoch": 0.5406497331496423, "grad_norm": 869.3490600585938, "learning_rate": 2.6283997162270052e-05, "loss": 48.5477, "step": 133820 }, { "epoch": 0.5406901344150099, "grad_norm": 1390.5584716796875, "learning_rate": 2.6280511098197113e-05, "loss": 57.5297, "step": 133830 }, { "epoch": 0.5407305356803775, "grad_norm": 1634.8409423828125, "learning_rate": 2.627702500915995e-05, "loss": 56.8467, "step": 133840 }, { "epoch": 0.5407709369457452, "grad_norm": 796.09716796875, "learning_rate": 2.6273538895226522e-05, "loss": 43.6418, "step": 133850 }, { "epoch": 0.5408113382111128, "grad_norm": 676.2175903320312, "learning_rate": 2.6270052756464803e-05, "loss": 61.2231, "step": 133860 }, { "epoch": 0.5408517394764804, "grad_norm": 843.5305786132812, "learning_rate": 2.626656659294275e-05, "loss": 39.5511, "step": 133870 }, { "epoch": 0.540892140741848, "grad_norm": 584.4107055664062, "learning_rate": 2.6263080404728325e-05, "loss": 40.8211, "step": 133880 }, { "epoch": 0.5409325420072156, "grad_norm": 617.6050415039062, "learning_rate": 2.62595941918895e-05, "loss": 46.0637, "step": 133890 }, { "epoch": 0.5409729432725833, "grad_norm": 323.1057434082031, "learning_rate": 2.6256107954494242e-05, "loss": 59.272, "step": 133900 }, { "epoch": 0.5410133445379509, "grad_norm": 534.6497802734375, "learning_rate": 2.6252621692610507e-05, "loss": 47.1891, "step": 133910 }, { "epoch": 0.5410537458033186, "grad_norm": 1699.9815673828125, "learning_rate": 2.6249135406306273e-05, "loss": 34.6852, "step": 133920 }, { "epoch": 0.5410941470686862, "grad_norm": 595.4637451171875, "learning_rate": 2.6245649095649494e-05, "loss": 54.051, "step": 133930 }, { "epoch": 0.5411345483340538, "grad_norm": 538.2552490234375, "learning_rate": 2.6242162760708154e-05, "loss": 35.2021, "step": 133940 }, { "epoch": 0.5411749495994215, "grad_norm": 1009.6656494140625, "learning_rate": 2.6238676401550207e-05, "loss": 40.208, "step": 133950 }, { "epoch": 0.5412153508647891, "grad_norm": 941.228515625, "learning_rate": 2.623519001824362e-05, "loss": 48.1016, "step": 133960 }, { "epoch": 0.5412557521301568, "grad_norm": 884.5780029296875, "learning_rate": 2.6231703610856373e-05, "loss": 57.1634, "step": 133970 }, { "epoch": 0.5412961533955244, "grad_norm": 670.623291015625, "learning_rate": 2.6228217179456433e-05, "loss": 56.2344, "step": 133980 }, { "epoch": 0.541336554660892, "grad_norm": 432.82928466796875, "learning_rate": 2.6224730724111758e-05, "loss": 37.2939, "step": 133990 }, { "epoch": 0.5413769559262596, "grad_norm": 505.16387939453125, "learning_rate": 2.6221244244890336e-05, "loss": 44.5701, "step": 134000 } ], "logging_steps": 10, "max_steps": 250000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }