{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.29088911064694545, "eval_steps": 500, "global_step": 72000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.040126536763131e-05, "grad_norm": 37380620.0, "learning_rate": 2e-08, "loss": 1265640.2, "step": 10 }, { "epoch": 8.080253073526263e-05, "grad_norm": 9634068.0, "learning_rate": 4e-08, "loss": 1485744.3, "step": 20 }, { "epoch": 0.00012120379610289395, "grad_norm": 14280229.0, "learning_rate": 6e-08, "loss": 1357241.2, "step": 30 }, { "epoch": 0.00016160506147052525, "grad_norm": 47433768.0, "learning_rate": 8e-08, "loss": 1486355.4, "step": 40 }, { "epoch": 0.00020200632683815657, "grad_norm": 36763048.0, "learning_rate": 1.0000000000000001e-07, "loss": 1302607.4, "step": 50 }, { "epoch": 0.0002424075922057879, "grad_norm": 58413324.0, "learning_rate": 1.2e-07, "loss": 1164601.1, "step": 60 }, { "epoch": 0.0002828088575734192, "grad_norm": 20137730.0, "learning_rate": 1.4e-07, "loss": 1635228.2, "step": 70 }, { "epoch": 0.0003232101229410505, "grad_norm": 110655184.0, "learning_rate": 1.6e-07, "loss": 1525543.8, "step": 80 }, { "epoch": 0.0003636113883086818, "grad_norm": 14134144.0, "learning_rate": 1.8e-07, "loss": 1310257.0, "step": 90 }, { "epoch": 0.00040401265367631315, "grad_norm": 19299640.0, "learning_rate": 2.0000000000000002e-07, "loss": 851063.5, "step": 100 }, { "epoch": 0.00044441391904394446, "grad_norm": 17030668.0, "learning_rate": 2.2e-07, "loss": 1405910.8, "step": 110 }, { "epoch": 0.0004848151844115758, "grad_norm": 5774159.0, "learning_rate": 2.4e-07, "loss": 1021530.2, "step": 120 }, { "epoch": 0.000525216449779207, "grad_norm": 39249404.0, "learning_rate": 2.6e-07, "loss": 1118213.8, "step": 130 }, { "epoch": 0.0005656177151468384, "grad_norm": 3952896.25, "learning_rate": 2.8e-07, "loss": 802054.8, "step": 140 }, { "epoch": 0.0006060189805144697, "grad_norm": 17492202.0, "learning_rate": 3.0000000000000004e-07, "loss": 782337.9, "step": 150 }, { "epoch": 0.000646420245882101, "grad_norm": 14565722.0, "learning_rate": 3.2e-07, "loss": 762118.95, "step": 160 }, { "epoch": 0.0006868215112497323, "grad_norm": 12661834.0, "learning_rate": 3.4e-07, "loss": 617497.6, "step": 170 }, { "epoch": 0.0007272227766173637, "grad_norm": 4945727.5, "learning_rate": 3.6e-07, "loss": 360572.95, "step": 180 }, { "epoch": 0.000767624041984995, "grad_norm": 3134309.5, "learning_rate": 3.8e-07, "loss": 247792.15, "step": 190 }, { "epoch": 0.0008080253073526263, "grad_norm": 23188752.0, "learning_rate": 4.0000000000000003e-07, "loss": 302759.75, "step": 200 }, { "epoch": 0.0008484265727202576, "grad_norm": 2640093.75, "learning_rate": 4.2e-07, "loss": 177911.875, "step": 210 }, { "epoch": 0.0008888278380878889, "grad_norm": 3475973.0, "learning_rate": 4.4e-07, "loss": 124657.5875, "step": 220 }, { "epoch": 0.0009292291034555202, "grad_norm": 7612417.5, "learning_rate": 4.6e-07, "loss": 133348.3, "step": 230 }, { "epoch": 0.0009696303688231516, "grad_norm": 3544448.5, "learning_rate": 4.8e-07, "loss": 108178.025, "step": 240 }, { "epoch": 0.0010100316341907828, "grad_norm": 1792657.375, "learning_rate": 5.000000000000001e-07, "loss": 147530.7625, "step": 250 }, { "epoch": 0.001050432899558414, "grad_norm": 807344.25, "learning_rate": 5.2e-07, "loss": 46811.725, "step": 260 }, { "epoch": 0.0010908341649260454, "grad_norm": 9978717.0, "learning_rate": 5.4e-07, "loss": 46902.9469, "step": 270 }, { "epoch": 0.0011312354302936767, "grad_norm": 1081468.125, "learning_rate": 5.6e-07, "loss": 61798.675, "step": 280 }, { "epoch": 0.001171636695661308, "grad_norm": 836054.125, "learning_rate": 5.8e-07, "loss": 18199.025, "step": 290 }, { "epoch": 0.0012120379610289394, "grad_norm": 713206.125, "learning_rate": 6.000000000000001e-07, "loss": 16179.9375, "step": 300 }, { "epoch": 0.0012524392263965707, "grad_norm": 736075.8125, "learning_rate": 6.2e-07, "loss": 18567.8047, "step": 310 }, { "epoch": 0.001292840491764202, "grad_norm": 524556.5, "learning_rate": 6.4e-07, "loss": 18508.9297, "step": 320 }, { "epoch": 0.0013332417571318333, "grad_norm": 74135.0703125, "learning_rate": 6.6e-07, "loss": 7819.6453, "step": 330 }, { "epoch": 0.0013736430224994647, "grad_norm": 94151.4921875, "learning_rate": 6.8e-07, "loss": 6291.5781, "step": 340 }, { "epoch": 0.001414044287867096, "grad_norm": 17262.64453125, "learning_rate": 7.000000000000001e-07, "loss": 2254.5939, "step": 350 }, { "epoch": 0.0014544455532347273, "grad_norm": 6977.82470703125, "learning_rate": 7.2e-07, "loss": 806.7898, "step": 360 }, { "epoch": 0.0014948468186023586, "grad_norm": 720.22705078125, "learning_rate": 7.400000000000001e-07, "loss": 1413.3216, "step": 370 }, { "epoch": 0.00153524808396999, "grad_norm": 12234.6259765625, "learning_rate": 7.6e-07, "loss": 569.8974, "step": 380 }, { "epoch": 0.0015756493493376213, "grad_norm": 12985.419921875, "learning_rate": 7.8e-07, "loss": 645.2221, "step": 390 }, { "epoch": 0.0016160506147052526, "grad_norm": 17890.517578125, "learning_rate": 8.000000000000001e-07, "loss": 556.4382, "step": 400 }, { "epoch": 0.001656451880072884, "grad_norm": 1073.23046875, "learning_rate": 8.200000000000001e-07, "loss": 579.9454, "step": 410 }, { "epoch": 0.0016968531454405152, "grad_norm": 946.015380859375, "learning_rate": 8.4e-07, "loss": 339.6989, "step": 420 }, { "epoch": 0.0017372544108081465, "grad_norm": 919.689697265625, "learning_rate": 8.6e-07, "loss": 544.2018, "step": 430 }, { "epoch": 0.0017776556761757779, "grad_norm": 1076.61962890625, "learning_rate": 8.8e-07, "loss": 477.4546, "step": 440 }, { "epoch": 0.0018180569415434092, "grad_norm": 6896.8623046875, "learning_rate": 9e-07, "loss": 551.458, "step": 450 }, { "epoch": 0.0018584582069110405, "grad_norm": 1263.240478515625, "learning_rate": 9.2e-07, "loss": 478.671, "step": 460 }, { "epoch": 0.0018988594722786718, "grad_norm": 6494.14501953125, "learning_rate": 9.400000000000001e-07, "loss": 551.3243, "step": 470 }, { "epoch": 0.0019392607376463031, "grad_norm": 1367.335205078125, "learning_rate": 9.6e-07, "loss": 566.3516, "step": 480 }, { "epoch": 0.0019796620030139342, "grad_norm": 2059.615478515625, "learning_rate": 9.8e-07, "loss": 440.9915, "step": 490 }, { "epoch": 0.0020200632683815656, "grad_norm": 1089.5028076171875, "learning_rate": 1.0000000000000002e-06, "loss": 450.5991, "step": 500 }, { "epoch": 0.002060464533749197, "grad_norm": 1123.5267333984375, "learning_rate": 1.0200000000000002e-06, "loss": 450.4043, "step": 510 }, { "epoch": 0.002100865799116828, "grad_norm": 1164.44677734375, "learning_rate": 1.04e-06, "loss": 401.3136, "step": 520 }, { "epoch": 0.0021412670644844595, "grad_norm": 1139.4189453125, "learning_rate": 1.06e-06, "loss": 510.3279, "step": 530 }, { "epoch": 0.002181668329852091, "grad_norm": 1584.498779296875, "learning_rate": 1.08e-06, "loss": 276.7529, "step": 540 }, { "epoch": 0.002222069595219722, "grad_norm": 13004.5224609375, "learning_rate": 1.1e-06, "loss": 517.8619, "step": 550 }, { "epoch": 0.0022624708605873535, "grad_norm": 6897.6220703125, "learning_rate": 1.12e-06, "loss": 371.9735, "step": 560 }, { "epoch": 0.002302872125954985, "grad_norm": 5952.30859375, "learning_rate": 1.14e-06, "loss": 321.4458, "step": 570 }, { "epoch": 0.002343273391322616, "grad_norm": 1368.4573974609375, "learning_rate": 1.16e-06, "loss": 444.6263, "step": 580 }, { "epoch": 0.0023836746566902474, "grad_norm": 6530.33154296875, "learning_rate": 1.18e-06, "loss": 576.2317, "step": 590 }, { "epoch": 0.0024240759220578788, "grad_norm": 918.2885131835938, "learning_rate": 1.2000000000000002e-06, "loss": 319.5849, "step": 600 }, { "epoch": 0.00246447718742551, "grad_norm": 5709.6923828125, "learning_rate": 1.2200000000000002e-06, "loss": 544.9418, "step": 610 }, { "epoch": 0.0025048784527931414, "grad_norm": 767.517822265625, "learning_rate": 1.24e-06, "loss": 475.3013, "step": 620 }, { "epoch": 0.0025452797181607727, "grad_norm": 1049.8690185546875, "learning_rate": 1.26e-06, "loss": 505.3646, "step": 630 }, { "epoch": 0.002585680983528404, "grad_norm": 6113.7568359375, "learning_rate": 1.28e-06, "loss": 371.3924, "step": 640 }, { "epoch": 0.0026260822488960354, "grad_norm": 961.7456665039062, "learning_rate": 1.3e-06, "loss": 434.1364, "step": 650 }, { "epoch": 0.0026664835142636667, "grad_norm": 827.18310546875, "learning_rate": 1.32e-06, "loss": 325.7226, "step": 660 }, { "epoch": 0.002706884779631298, "grad_norm": 800.2735595703125, "learning_rate": 1.34e-06, "loss": 362.5384, "step": 670 }, { "epoch": 0.0027472860449989293, "grad_norm": 2117.720703125, "learning_rate": 1.36e-06, "loss": 459.5714, "step": 680 }, { "epoch": 0.0027876873103665606, "grad_norm": 11787.2021484375, "learning_rate": 1.3800000000000001e-06, "loss": 452.4481, "step": 690 }, { "epoch": 0.002828088575734192, "grad_norm": 1027.6024169921875, "learning_rate": 1.4000000000000001e-06, "loss": 370.987, "step": 700 }, { "epoch": 0.0028684898411018233, "grad_norm": 2432.72802734375, "learning_rate": 1.4200000000000002e-06, "loss": 396.6002, "step": 710 }, { "epoch": 0.0029088911064694546, "grad_norm": 4263.763671875, "learning_rate": 1.44e-06, "loss": 514.6806, "step": 720 }, { "epoch": 0.002949292371837086, "grad_norm": 1061.5069580078125, "learning_rate": 1.46e-06, "loss": 367.6588, "step": 730 }, { "epoch": 0.0029896936372047172, "grad_norm": 7566.0791015625, "learning_rate": 1.4800000000000002e-06, "loss": 387.3527, "step": 740 }, { "epoch": 0.0030300949025723486, "grad_norm": 1594.1578369140625, "learning_rate": 1.5e-06, "loss": 286.798, "step": 750 }, { "epoch": 0.00307049616793998, "grad_norm": 1798.3486328125, "learning_rate": 1.52e-06, "loss": 265.2009, "step": 760 }, { "epoch": 0.003110897433307611, "grad_norm": 1887.8646240234375, "learning_rate": 1.54e-06, "loss": 511.3708, "step": 770 }, { "epoch": 0.0031512986986752425, "grad_norm": 1046.9554443359375, "learning_rate": 1.56e-06, "loss": 330.1199, "step": 780 }, { "epoch": 0.003191699964042874, "grad_norm": 14393.11328125, "learning_rate": 1.5800000000000003e-06, "loss": 459.7904, "step": 790 }, { "epoch": 0.003232101229410505, "grad_norm": 1148.2391357421875, "learning_rate": 1.6000000000000001e-06, "loss": 297.0352, "step": 800 }, { "epoch": 0.0032725024947781365, "grad_norm": 1655.5443115234375, "learning_rate": 1.62e-06, "loss": 380.9321, "step": 810 }, { "epoch": 0.003312903760145768, "grad_norm": 1723.20947265625, "learning_rate": 1.6400000000000002e-06, "loss": 356.5419, "step": 820 }, { "epoch": 0.003353305025513399, "grad_norm": 962.674560546875, "learning_rate": 1.6600000000000002e-06, "loss": 244.5568, "step": 830 }, { "epoch": 0.0033937062908810304, "grad_norm": 1860.3314208984375, "learning_rate": 1.68e-06, "loss": 314.3494, "step": 840 }, { "epoch": 0.0034341075562486618, "grad_norm": 677.5274047851562, "learning_rate": 1.7000000000000002e-06, "loss": 389.2205, "step": 850 }, { "epoch": 0.003474508821616293, "grad_norm": 847.7564697265625, "learning_rate": 1.72e-06, "loss": 421.029, "step": 860 }, { "epoch": 0.0035149100869839244, "grad_norm": 1153.310791015625, "learning_rate": 1.7399999999999999e-06, "loss": 338.7876, "step": 870 }, { "epoch": 0.0035553113523515557, "grad_norm": 702.7095947265625, "learning_rate": 1.76e-06, "loss": 375.8682, "step": 880 }, { "epoch": 0.003595712617719187, "grad_norm": 898.8328857421875, "learning_rate": 1.7800000000000001e-06, "loss": 437.3662, "step": 890 }, { "epoch": 0.0036361138830868184, "grad_norm": 1392.5733642578125, "learning_rate": 1.8e-06, "loss": 327.5625, "step": 900 }, { "epoch": 0.0036765151484544497, "grad_norm": 1176.8975830078125, "learning_rate": 1.8200000000000002e-06, "loss": 331.6111, "step": 910 }, { "epoch": 0.003716916413822081, "grad_norm": 844.1708374023438, "learning_rate": 1.84e-06, "loss": 462.9411, "step": 920 }, { "epoch": 0.0037573176791897123, "grad_norm": 1110.1351318359375, "learning_rate": 1.86e-06, "loss": 291.5723, "step": 930 }, { "epoch": 0.0037977189445573436, "grad_norm": 1204.96826171875, "learning_rate": 1.8800000000000002e-06, "loss": 453.21, "step": 940 }, { "epoch": 0.003838120209924975, "grad_norm": 788.923095703125, "learning_rate": 1.9e-06, "loss": 338.4679, "step": 950 }, { "epoch": 0.0038785214752926063, "grad_norm": 828.4852294921875, "learning_rate": 1.92e-06, "loss": 304.8583, "step": 960 }, { "epoch": 0.003918922740660237, "grad_norm": 3126.61181640625, "learning_rate": 1.94e-06, "loss": 368.0714, "step": 970 }, { "epoch": 0.0039593240060278685, "grad_norm": 3534.13232421875, "learning_rate": 1.96e-06, "loss": 321.7589, "step": 980 }, { "epoch": 0.0039997252713955, "grad_norm": 676.08251953125, "learning_rate": 1.98e-06, "loss": 336.8801, "step": 990 }, { "epoch": 0.004040126536763131, "grad_norm": 1422.3154296875, "learning_rate": 2.0000000000000003e-06, "loss": 310.1541, "step": 1000 }, { "epoch": 0.0040805278021307624, "grad_norm": 754.2792358398438, "learning_rate": 2.02e-06, "loss": 294.2774, "step": 1010 }, { "epoch": 0.004120929067498394, "grad_norm": 1239.5504150390625, "learning_rate": 2.0400000000000004e-06, "loss": 379.9203, "step": 1020 }, { "epoch": 0.004161330332866025, "grad_norm": 1107.9769287109375, "learning_rate": 2.06e-06, "loss": 394.3968, "step": 1030 }, { "epoch": 0.004201731598233656, "grad_norm": 747.5755615234375, "learning_rate": 2.08e-06, "loss": 357.4806, "step": 1040 }, { "epoch": 0.004242132863601288, "grad_norm": 897.203125, "learning_rate": 2.1000000000000002e-06, "loss": 376.0113, "step": 1050 }, { "epoch": 0.004282534128968919, "grad_norm": 3561.832763671875, "learning_rate": 2.12e-06, "loss": 385.5077, "step": 1060 }, { "epoch": 0.00432293539433655, "grad_norm": 2549.35205078125, "learning_rate": 2.14e-06, "loss": 448.231, "step": 1070 }, { "epoch": 0.004363336659704182, "grad_norm": 746.3321533203125, "learning_rate": 2.16e-06, "loss": 308.5054, "step": 1080 }, { "epoch": 0.004403737925071813, "grad_norm": 4975.05615234375, "learning_rate": 2.1800000000000003e-06, "loss": 604.6913, "step": 1090 }, { "epoch": 0.004444139190439444, "grad_norm": 865.9938354492188, "learning_rate": 2.2e-06, "loss": 239.4941, "step": 1100 }, { "epoch": 0.004484540455807076, "grad_norm": 1072.074462890625, "learning_rate": 2.2200000000000003e-06, "loss": 401.7543, "step": 1110 }, { "epoch": 0.004524941721174707, "grad_norm": 1182.0032958984375, "learning_rate": 2.24e-06, "loss": 330.9987, "step": 1120 }, { "epoch": 0.004565342986542338, "grad_norm": 814.4903564453125, "learning_rate": 2.26e-06, "loss": 379.418, "step": 1130 }, { "epoch": 0.00460574425190997, "grad_norm": 2526.64501953125, "learning_rate": 2.28e-06, "loss": 389.4854, "step": 1140 }, { "epoch": 0.004646145517277601, "grad_norm": 1358.009033203125, "learning_rate": 2.3e-06, "loss": 432.4065, "step": 1150 }, { "epoch": 0.004686546782645232, "grad_norm": 1503.1463623046875, "learning_rate": 2.32e-06, "loss": 417.3679, "step": 1160 }, { "epoch": 0.0047269480480128636, "grad_norm": 2745.59619140625, "learning_rate": 2.34e-06, "loss": 339.9085, "step": 1170 }, { "epoch": 0.004767349313380495, "grad_norm": 636.4331665039062, "learning_rate": 2.36e-06, "loss": 324.7576, "step": 1180 }, { "epoch": 0.004807750578748126, "grad_norm": 1743.3262939453125, "learning_rate": 2.38e-06, "loss": 337.6445, "step": 1190 }, { "epoch": 0.0048481518441157575, "grad_norm": 912.2630004882812, "learning_rate": 2.4000000000000003e-06, "loss": 342.137, "step": 1200 }, { "epoch": 0.004888553109483389, "grad_norm": 1142.1710205078125, "learning_rate": 2.42e-06, "loss": 280.1062, "step": 1210 }, { "epoch": 0.00492895437485102, "grad_norm": 981.51806640625, "learning_rate": 2.4400000000000004e-06, "loss": 291.3894, "step": 1220 }, { "epoch": 0.0049693556402186515, "grad_norm": 1795.322265625, "learning_rate": 2.46e-06, "loss": 349.2701, "step": 1230 }, { "epoch": 0.005009756905586283, "grad_norm": 1389.8359375, "learning_rate": 2.48e-06, "loss": 363.471, "step": 1240 }, { "epoch": 0.005050158170953914, "grad_norm": 1739.7503662109375, "learning_rate": 2.5e-06, "loss": 414.4333, "step": 1250 }, { "epoch": 0.0050905594363215454, "grad_norm": 1108.7142333984375, "learning_rate": 2.52e-06, "loss": 376.2317, "step": 1260 }, { "epoch": 0.005130960701689177, "grad_norm": 1234.8914794921875, "learning_rate": 2.54e-06, "loss": 432.4711, "step": 1270 }, { "epoch": 0.005171361967056808, "grad_norm": 1600.1077880859375, "learning_rate": 2.56e-06, "loss": 273.5047, "step": 1280 }, { "epoch": 0.005211763232424439, "grad_norm": 5998.88134765625, "learning_rate": 2.5800000000000003e-06, "loss": 417.7149, "step": 1290 }, { "epoch": 0.005252164497792071, "grad_norm": 1022.2550659179688, "learning_rate": 2.6e-06, "loss": 337.5613, "step": 1300 }, { "epoch": 0.005292565763159702, "grad_norm": 821.97998046875, "learning_rate": 2.6200000000000003e-06, "loss": 355.1617, "step": 1310 }, { "epoch": 0.005332967028527333, "grad_norm": 0.0, "learning_rate": 2.64e-06, "loss": 441.1887, "step": 1320 }, { "epoch": 0.005373368293894965, "grad_norm": 1180.22607421875, "learning_rate": 2.66e-06, "loss": 349.4191, "step": 1330 }, { "epoch": 0.005413769559262596, "grad_norm": 1288.874755859375, "learning_rate": 2.68e-06, "loss": 358.9353, "step": 1340 }, { "epoch": 0.005454170824630227, "grad_norm": 858.6513671875, "learning_rate": 2.7e-06, "loss": 298.1967, "step": 1350 }, { "epoch": 0.005494572089997859, "grad_norm": 826.8380737304688, "learning_rate": 2.72e-06, "loss": 266.4493, "step": 1360 }, { "epoch": 0.00553497335536549, "grad_norm": 1527.0496826171875, "learning_rate": 2.74e-06, "loss": 385.1624, "step": 1370 }, { "epoch": 0.005575374620733121, "grad_norm": 1727.4271240234375, "learning_rate": 2.7600000000000003e-06, "loss": 409.4935, "step": 1380 }, { "epoch": 0.005615775886100753, "grad_norm": 1242.2200927734375, "learning_rate": 2.78e-06, "loss": 423.3242, "step": 1390 }, { "epoch": 0.005656177151468384, "grad_norm": 784.5136108398438, "learning_rate": 2.8000000000000003e-06, "loss": 378.621, "step": 1400 }, { "epoch": 0.005696578416836015, "grad_norm": 686.701416015625, "learning_rate": 2.82e-06, "loss": 366.9333, "step": 1410 }, { "epoch": 0.0057369796822036466, "grad_norm": 2546.25390625, "learning_rate": 2.8400000000000003e-06, "loss": 372.8342, "step": 1420 }, { "epoch": 0.005777380947571278, "grad_norm": 1681.410400390625, "learning_rate": 2.86e-06, "loss": 383.352, "step": 1430 }, { "epoch": 0.005817782212938909, "grad_norm": 1604.5748291015625, "learning_rate": 2.88e-06, "loss": 390.5337, "step": 1440 }, { "epoch": 0.0058581834783065405, "grad_norm": 642.487060546875, "learning_rate": 2.9e-06, "loss": 304.9479, "step": 1450 }, { "epoch": 0.005898584743674172, "grad_norm": 2208.190185546875, "learning_rate": 2.92e-06, "loss": 419.7739, "step": 1460 }, { "epoch": 0.005938986009041803, "grad_norm": 558.7657470703125, "learning_rate": 2.9400000000000002e-06, "loss": 243.1836, "step": 1470 }, { "epoch": 0.0059793872744094345, "grad_norm": 866.2562866210938, "learning_rate": 2.9600000000000005e-06, "loss": 254.9255, "step": 1480 }, { "epoch": 0.006019788539777066, "grad_norm": 827.721923828125, "learning_rate": 2.9800000000000003e-06, "loss": 301.6956, "step": 1490 }, { "epoch": 0.006060189805144697, "grad_norm": 1020.5379638671875, "learning_rate": 3e-06, "loss": 236.8173, "step": 1500 }, { "epoch": 0.0061005910705123284, "grad_norm": 12217.869140625, "learning_rate": 3.0200000000000003e-06, "loss": 315.658, "step": 1510 }, { "epoch": 0.00614099233587996, "grad_norm": 1178.291259765625, "learning_rate": 3.04e-06, "loss": 300.3993, "step": 1520 }, { "epoch": 0.006181393601247591, "grad_norm": 2459.315185546875, "learning_rate": 3.06e-06, "loss": 315.2213, "step": 1530 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 3.08e-06, "loss": 271.6832, "step": 1540 }, { "epoch": 0.006262196131982854, "grad_norm": 1094.0899658203125, "learning_rate": 3.1e-06, "loss": 273.5612, "step": 1550 }, { "epoch": 0.006302597397350485, "grad_norm": 1611.595703125, "learning_rate": 3.12e-06, "loss": 291.6243, "step": 1560 }, { "epoch": 0.006342998662718116, "grad_norm": 1336.66259765625, "learning_rate": 3.14e-06, "loss": 322.2612, "step": 1570 }, { "epoch": 0.006383399928085748, "grad_norm": 2437.919189453125, "learning_rate": 3.1600000000000007e-06, "loss": 281.6811, "step": 1580 }, { "epoch": 0.006423801193453379, "grad_norm": 945.635498046875, "learning_rate": 3.1800000000000005e-06, "loss": 331.3091, "step": 1590 }, { "epoch": 0.00646420245882101, "grad_norm": 1270.96826171875, "learning_rate": 3.2000000000000003e-06, "loss": 337.9903, "step": 1600 }, { "epoch": 0.006504603724188642, "grad_norm": 1054.9232177734375, "learning_rate": 3.22e-06, "loss": 371.3227, "step": 1610 }, { "epoch": 0.006545004989556273, "grad_norm": 1814.677734375, "learning_rate": 3.24e-06, "loss": 390.2153, "step": 1620 }, { "epoch": 0.006585406254923904, "grad_norm": 648.9537963867188, "learning_rate": 3.2599999999999997e-06, "loss": 368.1877, "step": 1630 }, { "epoch": 0.006625807520291536, "grad_norm": 1473.499267578125, "learning_rate": 3.2800000000000004e-06, "loss": 386.7809, "step": 1640 }, { "epoch": 0.006666208785659167, "grad_norm": 2527.870849609375, "learning_rate": 3.3e-06, "loss": 414.1417, "step": 1650 }, { "epoch": 0.006706610051026798, "grad_norm": 1029.5302734375, "learning_rate": 3.3200000000000004e-06, "loss": 258.2289, "step": 1660 }, { "epoch": 0.0067470113163944296, "grad_norm": 748.2296142578125, "learning_rate": 3.34e-06, "loss": 348.1511, "step": 1670 }, { "epoch": 0.006787412581762061, "grad_norm": 2257.800537109375, "learning_rate": 3.36e-06, "loss": 326.2408, "step": 1680 }, { "epoch": 0.006827813847129692, "grad_norm": 987.0545654296875, "learning_rate": 3.38e-06, "loss": 285.0146, "step": 1690 }, { "epoch": 0.0068682151124973235, "grad_norm": 3093.51171875, "learning_rate": 3.4000000000000005e-06, "loss": 357.8721, "step": 1700 }, { "epoch": 0.006908616377864955, "grad_norm": 769.6875610351562, "learning_rate": 3.4200000000000003e-06, "loss": 312.937, "step": 1710 }, { "epoch": 0.006949017643232586, "grad_norm": 5268.0009765625, "learning_rate": 3.44e-06, "loss": 437.4973, "step": 1720 }, { "epoch": 0.0069894189086002175, "grad_norm": 875.3474731445312, "learning_rate": 3.46e-06, "loss": 303.2312, "step": 1730 }, { "epoch": 0.007029820173967849, "grad_norm": 988.2050170898438, "learning_rate": 3.4799999999999997e-06, "loss": 358.3378, "step": 1740 }, { "epoch": 0.00707022143933548, "grad_norm": 1848.81884765625, "learning_rate": 3.5000000000000004e-06, "loss": 354.324, "step": 1750 }, { "epoch": 0.0071106227047031114, "grad_norm": 985.3126220703125, "learning_rate": 3.52e-06, "loss": 387.196, "step": 1760 }, { "epoch": 0.007151023970070743, "grad_norm": 847.1213989257812, "learning_rate": 3.5400000000000004e-06, "loss": 298.6457, "step": 1770 }, { "epoch": 0.007191425235438374, "grad_norm": 1782.748779296875, "learning_rate": 3.5600000000000002e-06, "loss": 410.3063, "step": 1780 }, { "epoch": 0.007231826500806005, "grad_norm": 731.6522216796875, "learning_rate": 3.58e-06, "loss": 233.0396, "step": 1790 }, { "epoch": 0.007272227766173637, "grad_norm": 498.69366455078125, "learning_rate": 3.6e-06, "loss": 287.9018, "step": 1800 }, { "epoch": 0.007312629031541268, "grad_norm": 906.2838134765625, "learning_rate": 3.6200000000000005e-06, "loss": 345.1769, "step": 1810 }, { "epoch": 0.007353030296908899, "grad_norm": 713.8414916992188, "learning_rate": 3.6400000000000003e-06, "loss": 291.5412, "step": 1820 }, { "epoch": 0.007393431562276531, "grad_norm": 1315.886962890625, "learning_rate": 3.66e-06, "loss": 394.6411, "step": 1830 }, { "epoch": 0.007433832827644162, "grad_norm": 1429.893798828125, "learning_rate": 3.68e-06, "loss": 247.4847, "step": 1840 }, { "epoch": 0.007474234093011793, "grad_norm": 1034.032470703125, "learning_rate": 3.7e-06, "loss": 406.1072, "step": 1850 }, { "epoch": 0.007514635358379425, "grad_norm": 955.8413696289062, "learning_rate": 3.72e-06, "loss": 392.0067, "step": 1860 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 3.7400000000000006e-06, "loss": 235.0364, "step": 1870 }, { "epoch": 0.007595437889114687, "grad_norm": 13884.802734375, "learning_rate": 3.7600000000000004e-06, "loss": 336.9673, "step": 1880 }, { "epoch": 0.007635839154482319, "grad_norm": 3753.5439453125, "learning_rate": 3.7800000000000002e-06, "loss": 362.8436, "step": 1890 }, { "epoch": 0.00767624041984995, "grad_norm": 1046.0020751953125, "learning_rate": 3.8e-06, "loss": 370.7911, "step": 1900 }, { "epoch": 0.007716641685217581, "grad_norm": 2058.227783203125, "learning_rate": 3.82e-06, "loss": 356.5814, "step": 1910 }, { "epoch": 0.0077570429505852126, "grad_norm": 833.1560668945312, "learning_rate": 3.84e-06, "loss": 362.6392, "step": 1920 }, { "epoch": 0.007797444215952844, "grad_norm": 1860.0916748046875, "learning_rate": 3.86e-06, "loss": 327.3118, "step": 1930 }, { "epoch": 0.007837845481320474, "grad_norm": 2047.00146484375, "learning_rate": 3.88e-06, "loss": 431.6941, "step": 1940 }, { "epoch": 0.007878246746688106, "grad_norm": 1433.5374755859375, "learning_rate": 3.9e-06, "loss": 283.749, "step": 1950 }, { "epoch": 0.007918648012055737, "grad_norm": 1340.44287109375, "learning_rate": 3.92e-06, "loss": 323.1868, "step": 1960 }, { "epoch": 0.007959049277423368, "grad_norm": 5642.4560546875, "learning_rate": 3.9399999999999995e-06, "loss": 428.7537, "step": 1970 }, { "epoch": 0.007999450542791, "grad_norm": 519.6063232421875, "learning_rate": 3.96e-06, "loss": 304.1046, "step": 1980 }, { "epoch": 0.008039851808158631, "grad_norm": 1227.509765625, "learning_rate": 3.98e-06, "loss": 295.7083, "step": 1990 }, { "epoch": 0.008080253073526262, "grad_norm": 788.1338500976562, "learning_rate": 4.000000000000001e-06, "loss": 383.1164, "step": 2000 }, { "epoch": 0.008120654338893894, "grad_norm": 1502.9964599609375, "learning_rate": 4.0200000000000005e-06, "loss": 312.3206, "step": 2010 }, { "epoch": 0.008161055604261525, "grad_norm": 6165.92529296875, "learning_rate": 4.04e-06, "loss": 307.8958, "step": 2020 }, { "epoch": 0.008201456869629156, "grad_norm": 915.6934814453125, "learning_rate": 4.06e-06, "loss": 374.9606, "step": 2030 }, { "epoch": 0.008241858134996788, "grad_norm": 730.7386474609375, "learning_rate": 4.080000000000001e-06, "loss": 329.4409, "step": 2040 }, { "epoch": 0.008282259400364419, "grad_norm": 1635.5831298828125, "learning_rate": 4.1000000000000006e-06, "loss": 209.305, "step": 2050 }, { "epoch": 0.00832266066573205, "grad_norm": 4476.35888671875, "learning_rate": 4.12e-06, "loss": 415.3398, "step": 2060 }, { "epoch": 0.008363061931099681, "grad_norm": 1422.126220703125, "learning_rate": 4.14e-06, "loss": 360.3677, "step": 2070 }, { "epoch": 0.008403463196467313, "grad_norm": 620.626220703125, "learning_rate": 4.16e-06, "loss": 310.8916, "step": 2080 }, { "epoch": 0.008443864461834944, "grad_norm": 1371.109130859375, "learning_rate": 4.18e-06, "loss": 339.1394, "step": 2090 }, { "epoch": 0.008484265727202575, "grad_norm": 1084.7042236328125, "learning_rate": 4.2000000000000004e-06, "loss": 282.2104, "step": 2100 }, { "epoch": 0.008524666992570207, "grad_norm": 678.981689453125, "learning_rate": 4.22e-06, "loss": 248.9129, "step": 2110 }, { "epoch": 0.008565068257937838, "grad_norm": 1121.5936279296875, "learning_rate": 4.24e-06, "loss": 333.5619, "step": 2120 }, { "epoch": 0.00860546952330547, "grad_norm": 795.3826293945312, "learning_rate": 4.26e-06, "loss": 293.3469, "step": 2130 }, { "epoch": 0.0086458707886731, "grad_norm": 1109.0904541015625, "learning_rate": 4.28e-06, "loss": 365.5112, "step": 2140 }, { "epoch": 0.008686272054040732, "grad_norm": 778.438232421875, "learning_rate": 4.2999999999999995e-06, "loss": 211.6606, "step": 2150 }, { "epoch": 0.008726673319408363, "grad_norm": 1132.38818359375, "learning_rate": 4.32e-06, "loss": 386.3405, "step": 2160 }, { "epoch": 0.008767074584775995, "grad_norm": 986.1419677734375, "learning_rate": 4.34e-06, "loss": 326.4904, "step": 2170 }, { "epoch": 0.008807475850143626, "grad_norm": 880.376220703125, "learning_rate": 4.360000000000001e-06, "loss": 308.4621, "step": 2180 }, { "epoch": 0.008847877115511257, "grad_norm": 2747.5908203125, "learning_rate": 4.38e-06, "loss": 240.2334, "step": 2190 }, { "epoch": 0.008888278380878889, "grad_norm": 1022.6881103515625, "learning_rate": 4.4e-06, "loss": 348.5623, "step": 2200 }, { "epoch": 0.00892867964624652, "grad_norm": 701.6885986328125, "learning_rate": 4.420000000000001e-06, "loss": 239.2583, "step": 2210 }, { "epoch": 0.008969080911614151, "grad_norm": 812.2908325195312, "learning_rate": 4.440000000000001e-06, "loss": 305.8652, "step": 2220 }, { "epoch": 0.009009482176981783, "grad_norm": 936.9786376953125, "learning_rate": 4.4600000000000005e-06, "loss": 406.4304, "step": 2230 }, { "epoch": 0.009049883442349414, "grad_norm": 849.5304565429688, "learning_rate": 4.48e-06, "loss": 365.7475, "step": 2240 }, { "epoch": 0.009090284707717045, "grad_norm": 1223.7852783203125, "learning_rate": 4.5e-06, "loss": 295.9342, "step": 2250 }, { "epoch": 0.009130685973084677, "grad_norm": 2563.434814453125, "learning_rate": 4.52e-06, "loss": 282.5878, "step": 2260 }, { "epoch": 0.009171087238452308, "grad_norm": 3302.397705078125, "learning_rate": 4.540000000000001e-06, "loss": 292.655, "step": 2270 }, { "epoch": 0.00921148850381994, "grad_norm": 726.690185546875, "learning_rate": 4.56e-06, "loss": 278.9063, "step": 2280 }, { "epoch": 0.00925188976918757, "grad_norm": 611.144287109375, "learning_rate": 4.58e-06, "loss": 297.2235, "step": 2290 }, { "epoch": 0.009292291034555202, "grad_norm": 962.685302734375, "learning_rate": 4.6e-06, "loss": 224.3765, "step": 2300 }, { "epoch": 0.009332692299922833, "grad_norm": 792.4270629882812, "learning_rate": 4.62e-06, "loss": 339.5073, "step": 2310 }, { "epoch": 0.009373093565290464, "grad_norm": 993.0485229492188, "learning_rate": 4.64e-06, "loss": 268.8986, "step": 2320 }, { "epoch": 0.009413494830658096, "grad_norm": 10737.74609375, "learning_rate": 4.66e-06, "loss": 290.0178, "step": 2330 }, { "epoch": 0.009453896096025727, "grad_norm": 1019.1024780273438, "learning_rate": 4.68e-06, "loss": 290.3607, "step": 2340 }, { "epoch": 0.009494297361393358, "grad_norm": 582.261962890625, "learning_rate": 4.7e-06, "loss": 263.2255, "step": 2350 }, { "epoch": 0.00953469862676099, "grad_norm": 928.1985473632812, "learning_rate": 4.72e-06, "loss": 496.6325, "step": 2360 }, { "epoch": 0.009575099892128621, "grad_norm": 1354.3148193359375, "learning_rate": 4.74e-06, "loss": 317.678, "step": 2370 }, { "epoch": 0.009615501157496252, "grad_norm": 1359.7713623046875, "learning_rate": 4.76e-06, "loss": 263.0336, "step": 2380 }, { "epoch": 0.009655902422863884, "grad_norm": 759.7086791992188, "learning_rate": 4.780000000000001e-06, "loss": 328.0415, "step": 2390 }, { "epoch": 0.009696303688231515, "grad_norm": 593.6005249023438, "learning_rate": 4.800000000000001e-06, "loss": 368.25, "step": 2400 }, { "epoch": 0.009736704953599146, "grad_norm": 732.736328125, "learning_rate": 4.8200000000000004e-06, "loss": 175.4389, "step": 2410 }, { "epoch": 0.009777106218966778, "grad_norm": 1101.7640380859375, "learning_rate": 4.84e-06, "loss": 293.6888, "step": 2420 }, { "epoch": 0.009817507484334409, "grad_norm": 4335.19873046875, "learning_rate": 4.86e-06, "loss": 348.8911, "step": 2430 }, { "epoch": 0.00985790874970204, "grad_norm": 1072.8892822265625, "learning_rate": 4.880000000000001e-06, "loss": 231.7467, "step": 2440 }, { "epoch": 0.009898310015069672, "grad_norm": 608.339111328125, "learning_rate": 4.9000000000000005e-06, "loss": 241.5925, "step": 2450 }, { "epoch": 0.009938711280437303, "grad_norm": 1088.7662353515625, "learning_rate": 4.92e-06, "loss": 185.2763, "step": 2460 }, { "epoch": 0.009979112545804934, "grad_norm": 901.8641967773438, "learning_rate": 4.94e-06, "loss": 355.3011, "step": 2470 }, { "epoch": 0.010019513811172566, "grad_norm": 1227.393798828125, "learning_rate": 4.96e-06, "loss": 310.6582, "step": 2480 }, { "epoch": 0.010059915076540197, "grad_norm": 820.9786376953125, "learning_rate": 4.98e-06, "loss": 388.506, "step": 2490 }, { "epoch": 0.010100316341907828, "grad_norm": 631.5418701171875, "learning_rate": 5e-06, "loss": 186.973, "step": 2500 }, { "epoch": 0.01014071760727546, "grad_norm": 976.1464233398438, "learning_rate": 5.02e-06, "loss": 260.0689, "step": 2510 }, { "epoch": 0.010181118872643091, "grad_norm": 537.5474853515625, "learning_rate": 5.04e-06, "loss": 245.8573, "step": 2520 }, { "epoch": 0.010221520138010722, "grad_norm": 1130.83154296875, "learning_rate": 5.06e-06, "loss": 213.0081, "step": 2530 }, { "epoch": 0.010261921403378354, "grad_norm": 1045.5367431640625, "learning_rate": 5.08e-06, "loss": 273.8134, "step": 2540 }, { "epoch": 0.010302322668745985, "grad_norm": 934.2299194335938, "learning_rate": 5.1e-06, "loss": 351.3407, "step": 2550 }, { "epoch": 0.010342723934113616, "grad_norm": 603.625, "learning_rate": 5.12e-06, "loss": 189.3821, "step": 2560 }, { "epoch": 0.010383125199481247, "grad_norm": 2282.93115234375, "learning_rate": 5.140000000000001e-06, "loss": 306.1943, "step": 2570 }, { "epoch": 0.010423526464848879, "grad_norm": 1158.8792724609375, "learning_rate": 5.1600000000000006e-06, "loss": 293.8449, "step": 2580 }, { "epoch": 0.01046392773021651, "grad_norm": 1000.369140625, "learning_rate": 5.18e-06, "loss": 252.3016, "step": 2590 }, { "epoch": 0.010504328995584141, "grad_norm": 1298.2783203125, "learning_rate": 5.2e-06, "loss": 304.2064, "step": 2600 }, { "epoch": 0.010544730260951773, "grad_norm": 2631.5205078125, "learning_rate": 5.220000000000001e-06, "loss": 272.8798, "step": 2610 }, { "epoch": 0.010585131526319404, "grad_norm": 1061.5810546875, "learning_rate": 5.240000000000001e-06, "loss": 403.6962, "step": 2620 }, { "epoch": 0.010625532791687035, "grad_norm": 835.53369140625, "learning_rate": 5.2600000000000005e-06, "loss": 367.6394, "step": 2630 }, { "epoch": 0.010665934057054667, "grad_norm": 867.9429931640625, "learning_rate": 5.28e-06, "loss": 321.1476, "step": 2640 }, { "epoch": 0.010706335322422298, "grad_norm": 3146.14013671875, "learning_rate": 5.3e-06, "loss": 266.9892, "step": 2650 }, { "epoch": 0.01074673658778993, "grad_norm": 640.1483764648438, "learning_rate": 5.32e-06, "loss": 206.9268, "step": 2660 }, { "epoch": 0.01078713785315756, "grad_norm": 842.3226928710938, "learning_rate": 5.3400000000000005e-06, "loss": 325.724, "step": 2670 }, { "epoch": 0.010827539118525192, "grad_norm": 717.4343872070312, "learning_rate": 5.36e-06, "loss": 314.9767, "step": 2680 }, { "epoch": 0.010867940383892823, "grad_norm": 5089.775390625, "learning_rate": 5.38e-06, "loss": 329.5498, "step": 2690 }, { "epoch": 0.010908341649260455, "grad_norm": 1157.4693603515625, "learning_rate": 5.4e-06, "loss": 339.251, "step": 2700 }, { "epoch": 0.010948742914628086, "grad_norm": 1268.937744140625, "learning_rate": 5.42e-06, "loss": 361.0937, "step": 2710 }, { "epoch": 0.010989144179995717, "grad_norm": 855.2471313476562, "learning_rate": 5.44e-06, "loss": 285.6735, "step": 2720 }, { "epoch": 0.011029545445363349, "grad_norm": 777.139892578125, "learning_rate": 5.46e-06, "loss": 275.3022, "step": 2730 }, { "epoch": 0.01106994671073098, "grad_norm": 2281.784423828125, "learning_rate": 5.48e-06, "loss": 264.5153, "step": 2740 }, { "epoch": 0.011110347976098611, "grad_norm": 737.9111328125, "learning_rate": 5.500000000000001e-06, "loss": 200.1999, "step": 2750 }, { "epoch": 0.011150749241466243, "grad_norm": 7024.08251953125, "learning_rate": 5.5200000000000005e-06, "loss": 316.7326, "step": 2760 }, { "epoch": 0.011191150506833874, "grad_norm": 1147.371826171875, "learning_rate": 5.54e-06, "loss": 238.9098, "step": 2770 }, { "epoch": 0.011231551772201505, "grad_norm": 873.0858154296875, "learning_rate": 5.56e-06, "loss": 249.4478, "step": 2780 }, { "epoch": 0.011271953037569137, "grad_norm": 2017.5811767578125, "learning_rate": 5.580000000000001e-06, "loss": 280.6727, "step": 2790 }, { "epoch": 0.011312354302936768, "grad_norm": 689.9805908203125, "learning_rate": 5.600000000000001e-06, "loss": 238.7173, "step": 2800 }, { "epoch": 0.0113527555683044, "grad_norm": 1248.52490234375, "learning_rate": 5.62e-06, "loss": 243.3881, "step": 2810 }, { "epoch": 0.01139315683367203, "grad_norm": 756.8421020507812, "learning_rate": 5.64e-06, "loss": 192.3346, "step": 2820 }, { "epoch": 0.011433558099039662, "grad_norm": 1675.668701171875, "learning_rate": 5.66e-06, "loss": 323.5421, "step": 2830 }, { "epoch": 0.011473959364407293, "grad_norm": 730.6026000976562, "learning_rate": 5.680000000000001e-06, "loss": 235.7116, "step": 2840 }, { "epoch": 0.011514360629774924, "grad_norm": 962.4594116210938, "learning_rate": 5.7000000000000005e-06, "loss": 284.3505, "step": 2850 }, { "epoch": 0.011554761895142556, "grad_norm": 737.1028442382812, "learning_rate": 5.72e-06, "loss": 279.0431, "step": 2860 }, { "epoch": 0.011595163160510187, "grad_norm": 1873.3394775390625, "learning_rate": 5.74e-06, "loss": 293.4966, "step": 2870 }, { "epoch": 0.011635564425877818, "grad_norm": 699.8680419921875, "learning_rate": 5.76e-06, "loss": 201.6691, "step": 2880 }, { "epoch": 0.01167596569124545, "grad_norm": 1195.6563720703125, "learning_rate": 5.78e-06, "loss": 339.2745, "step": 2890 }, { "epoch": 0.011716366956613081, "grad_norm": 1244.9783935546875, "learning_rate": 5.8e-06, "loss": 295.2553, "step": 2900 }, { "epoch": 0.011756768221980712, "grad_norm": 1545.2462158203125, "learning_rate": 5.82e-06, "loss": 380.4514, "step": 2910 }, { "epoch": 0.011797169487348344, "grad_norm": 580.0228271484375, "learning_rate": 5.84e-06, "loss": 219.7412, "step": 2920 }, { "epoch": 0.011837570752715975, "grad_norm": 741.6790771484375, "learning_rate": 5.86e-06, "loss": 223.957, "step": 2930 }, { "epoch": 0.011877972018083606, "grad_norm": 633.643798828125, "learning_rate": 5.8800000000000005e-06, "loss": 292.0679, "step": 2940 }, { "epoch": 0.011918373283451238, "grad_norm": 11992.681640625, "learning_rate": 5.9e-06, "loss": 365.1848, "step": 2950 }, { "epoch": 0.011958774548818869, "grad_norm": 2407.924072265625, "learning_rate": 5.920000000000001e-06, "loss": 284.2829, "step": 2960 }, { "epoch": 0.0119991758141865, "grad_norm": 9947.2861328125, "learning_rate": 5.940000000000001e-06, "loss": 336.0969, "step": 2970 }, { "epoch": 0.012039577079554132, "grad_norm": 1071.607177734375, "learning_rate": 5.9600000000000005e-06, "loss": 288.0666, "step": 2980 }, { "epoch": 0.012079978344921763, "grad_norm": 1607.6715087890625, "learning_rate": 5.98e-06, "loss": 334.0282, "step": 2990 }, { "epoch": 0.012120379610289394, "grad_norm": 616.33642578125, "learning_rate": 6e-06, "loss": 176.2524, "step": 3000 }, { "epoch": 0.012160780875657026, "grad_norm": 1787.1187744140625, "learning_rate": 6.02e-06, "loss": 262.5472, "step": 3010 }, { "epoch": 0.012201182141024657, "grad_norm": 680.6809692382812, "learning_rate": 6.040000000000001e-06, "loss": 195.7003, "step": 3020 }, { "epoch": 0.012241583406392288, "grad_norm": 1115.2088623046875, "learning_rate": 6.0600000000000004e-06, "loss": 291.6268, "step": 3030 }, { "epoch": 0.01228198467175992, "grad_norm": 1072.4854736328125, "learning_rate": 6.08e-06, "loss": 399.983, "step": 3040 }, { "epoch": 0.01232238593712755, "grad_norm": 1406.6270751953125, "learning_rate": 6.1e-06, "loss": 332.8581, "step": 3050 }, { "epoch": 0.012362787202495182, "grad_norm": 915.8823852539062, "learning_rate": 6.12e-06, "loss": 242.4036, "step": 3060 }, { "epoch": 0.012403188467862813, "grad_norm": 1265.71826171875, "learning_rate": 6.1400000000000005e-06, "loss": 240.312, "step": 3070 }, { "epoch": 0.012443589733230445, "grad_norm": 1575.8834228515625, "learning_rate": 6.16e-06, "loss": 207.945, "step": 3080 }, { "epoch": 0.012483990998598076, "grad_norm": 1585.046142578125, "learning_rate": 6.18e-06, "loss": 328.1433, "step": 3090 }, { "epoch": 0.012524392263965707, "grad_norm": 3717.249755859375, "learning_rate": 6.2e-06, "loss": 368.012, "step": 3100 }, { "epoch": 0.012564793529333339, "grad_norm": 784.789306640625, "learning_rate": 6.22e-06, "loss": 339.9229, "step": 3110 }, { "epoch": 0.01260519479470097, "grad_norm": 1485.326171875, "learning_rate": 6.24e-06, "loss": 345.8715, "step": 3120 }, { "epoch": 0.012645596060068601, "grad_norm": 883.3239135742188, "learning_rate": 6.26e-06, "loss": 215.2944, "step": 3130 }, { "epoch": 0.012685997325436233, "grad_norm": 665.870849609375, "learning_rate": 6.28e-06, "loss": 247.7601, "step": 3140 }, { "epoch": 0.012726398590803864, "grad_norm": 4960.85546875, "learning_rate": 6.300000000000001e-06, "loss": 272.3599, "step": 3150 }, { "epoch": 0.012766799856171495, "grad_norm": 1986.215576171875, "learning_rate": 6.320000000000001e-06, "loss": 261.7445, "step": 3160 }, { "epoch": 0.012807201121539127, "grad_norm": 689.2445678710938, "learning_rate": 6.34e-06, "loss": 186.6741, "step": 3170 }, { "epoch": 0.012847602386906758, "grad_norm": 1272.3314208984375, "learning_rate": 6.360000000000001e-06, "loss": 362.3708, "step": 3180 }, { "epoch": 0.01288800365227439, "grad_norm": 1243.4554443359375, "learning_rate": 6.38e-06, "loss": 223.126, "step": 3190 }, { "epoch": 0.01292840491764202, "grad_norm": 0.0, "learning_rate": 6.4000000000000006e-06, "loss": 220.924, "step": 3200 }, { "epoch": 0.012968806183009652, "grad_norm": 1140.0751953125, "learning_rate": 6.4199999999999995e-06, "loss": 304.4165, "step": 3210 }, { "epoch": 0.013009207448377283, "grad_norm": 865.4572143554688, "learning_rate": 6.44e-06, "loss": 225.4173, "step": 3220 }, { "epoch": 0.013049608713744915, "grad_norm": 1003.0341796875, "learning_rate": 6.460000000000001e-06, "loss": 318.8464, "step": 3230 }, { "epoch": 0.013090009979112546, "grad_norm": 3230.90966796875, "learning_rate": 6.48e-06, "loss": 266.8362, "step": 3240 }, { "epoch": 0.013130411244480177, "grad_norm": 5314.744140625, "learning_rate": 6.5000000000000004e-06, "loss": 356.5823, "step": 3250 }, { "epoch": 0.013170812509847809, "grad_norm": 1545.9931640625, "learning_rate": 6.519999999999999e-06, "loss": 267.9298, "step": 3260 }, { "epoch": 0.01321121377521544, "grad_norm": 1124.9794921875, "learning_rate": 6.54e-06, "loss": 350.8778, "step": 3270 }, { "epoch": 0.013251615040583071, "grad_norm": 1584.3470458984375, "learning_rate": 6.560000000000001e-06, "loss": 268.6283, "step": 3280 }, { "epoch": 0.013292016305950703, "grad_norm": 1276.875244140625, "learning_rate": 6.58e-06, "loss": 401.9857, "step": 3290 }, { "epoch": 0.013332417571318334, "grad_norm": 1092.377197265625, "learning_rate": 6.6e-06, "loss": 331.368, "step": 3300 }, { "epoch": 0.013372818836685965, "grad_norm": 1179.2205810546875, "learning_rate": 6.62e-06, "loss": 177.4306, "step": 3310 }, { "epoch": 0.013413220102053596, "grad_norm": 1153.752685546875, "learning_rate": 6.640000000000001e-06, "loss": 229.9581, "step": 3320 }, { "epoch": 0.013453621367421228, "grad_norm": 998.432373046875, "learning_rate": 6.660000000000001e-06, "loss": 266.7427, "step": 3330 }, { "epoch": 0.013494022632788859, "grad_norm": 1151.6092529296875, "learning_rate": 6.68e-06, "loss": 331.803, "step": 3340 }, { "epoch": 0.01353442389815649, "grad_norm": 2472.0546875, "learning_rate": 6.700000000000001e-06, "loss": 236.3656, "step": 3350 }, { "epoch": 0.013574825163524122, "grad_norm": 1243.1534423828125, "learning_rate": 6.72e-06, "loss": 180.7836, "step": 3360 }, { "epoch": 0.013615226428891753, "grad_norm": 1034.3489990234375, "learning_rate": 6.740000000000001e-06, "loss": 343.4018, "step": 3370 }, { "epoch": 0.013655627694259384, "grad_norm": 905.5031127929688, "learning_rate": 6.76e-06, "loss": 140.6687, "step": 3380 }, { "epoch": 0.013696028959627016, "grad_norm": 1340.9151611328125, "learning_rate": 6.78e-06, "loss": 289.7602, "step": 3390 }, { "epoch": 0.013736430224994647, "grad_norm": 960.6358642578125, "learning_rate": 6.800000000000001e-06, "loss": 226.2994, "step": 3400 }, { "epoch": 0.013776831490362278, "grad_norm": 1423.4761962890625, "learning_rate": 6.82e-06, "loss": 430.8338, "step": 3410 }, { "epoch": 0.01381723275572991, "grad_norm": 1304.45263671875, "learning_rate": 6.840000000000001e-06, "loss": 328.4004, "step": 3420 }, { "epoch": 0.013857634021097541, "grad_norm": 9199.7373046875, "learning_rate": 6.8599999999999995e-06, "loss": 270.6068, "step": 3430 }, { "epoch": 0.013898035286465172, "grad_norm": 776.5551147460938, "learning_rate": 6.88e-06, "loss": 176.4451, "step": 3440 }, { "epoch": 0.013938436551832804, "grad_norm": 774.5919799804688, "learning_rate": 6.900000000000001e-06, "loss": 258.9075, "step": 3450 }, { "epoch": 0.013978837817200435, "grad_norm": 1491.8228759765625, "learning_rate": 6.92e-06, "loss": 295.3933, "step": 3460 }, { "epoch": 0.014019239082568066, "grad_norm": 1491.8526611328125, "learning_rate": 6.9400000000000005e-06, "loss": 239.9352, "step": 3470 }, { "epoch": 0.014059640347935698, "grad_norm": 1934.498779296875, "learning_rate": 6.9599999999999994e-06, "loss": 275.0346, "step": 3480 }, { "epoch": 0.014100041613303329, "grad_norm": 1181.4918212890625, "learning_rate": 6.98e-06, "loss": 238.241, "step": 3490 }, { "epoch": 0.01414044287867096, "grad_norm": 1785.65478515625, "learning_rate": 7.000000000000001e-06, "loss": 163.911, "step": 3500 }, { "epoch": 0.014180844144038592, "grad_norm": 601.7042846679688, "learning_rate": 7.0200000000000006e-06, "loss": 200.9242, "step": 3510 }, { "epoch": 0.014221245409406223, "grad_norm": 1390.438720703125, "learning_rate": 7.04e-06, "loss": 225.1192, "step": 3520 }, { "epoch": 0.014261646674773854, "grad_norm": 1239.8173828125, "learning_rate": 7.06e-06, "loss": 207.541, "step": 3530 }, { "epoch": 0.014302047940141486, "grad_norm": 5047.65478515625, "learning_rate": 7.080000000000001e-06, "loss": 200.282, "step": 3540 }, { "epoch": 0.014342449205509117, "grad_norm": 684.9512939453125, "learning_rate": 7.1e-06, "loss": 200.1313, "step": 3550 }, { "epoch": 0.014382850470876748, "grad_norm": 538.3458862304688, "learning_rate": 7.1200000000000004e-06, "loss": 271.5458, "step": 3560 }, { "epoch": 0.01442325173624438, "grad_norm": 1814.7567138671875, "learning_rate": 7.140000000000001e-06, "loss": 227.9855, "step": 3570 }, { "epoch": 0.01446365300161201, "grad_norm": 617.8508911132812, "learning_rate": 7.16e-06, "loss": 152.5538, "step": 3580 }, { "epoch": 0.014504054266979642, "grad_norm": 1195.5587158203125, "learning_rate": 7.180000000000001e-06, "loss": 220.7645, "step": 3590 }, { "epoch": 0.014544455532347273, "grad_norm": 963.2885131835938, "learning_rate": 7.2e-06, "loss": 237.8198, "step": 3600 }, { "epoch": 0.014584856797714905, "grad_norm": 1143.399658203125, "learning_rate": 7.22e-06, "loss": 246.5691, "step": 3610 }, { "epoch": 0.014625258063082536, "grad_norm": 936.9075927734375, "learning_rate": 7.240000000000001e-06, "loss": 225.2431, "step": 3620 }, { "epoch": 0.014665659328450167, "grad_norm": 505.3612365722656, "learning_rate": 7.26e-06, "loss": 209.9184, "step": 3630 }, { "epoch": 0.014706060593817799, "grad_norm": 4472.5458984375, "learning_rate": 7.280000000000001e-06, "loss": 251.8955, "step": 3640 }, { "epoch": 0.01474646185918543, "grad_norm": 1152.7425537109375, "learning_rate": 7.2999999999999996e-06, "loss": 267.1611, "step": 3650 }, { "epoch": 0.014786863124553061, "grad_norm": 1535.3359375, "learning_rate": 7.32e-06, "loss": 258.2474, "step": 3660 }, { "epoch": 0.014827264389920693, "grad_norm": 788.0407104492188, "learning_rate": 7.340000000000001e-06, "loss": 308.7459, "step": 3670 }, { "epoch": 0.014867665655288324, "grad_norm": 3632.613037109375, "learning_rate": 7.36e-06, "loss": 270.6488, "step": 3680 }, { "epoch": 0.014908066920655955, "grad_norm": 1338.3414306640625, "learning_rate": 7.3800000000000005e-06, "loss": 402.1751, "step": 3690 }, { "epoch": 0.014948468186023587, "grad_norm": 2017.5732421875, "learning_rate": 7.4e-06, "loss": 335.9066, "step": 3700 }, { "epoch": 0.014988869451391218, "grad_norm": 866.8572998046875, "learning_rate": 7.420000000000001e-06, "loss": 228.7708, "step": 3710 }, { "epoch": 0.01502927071675885, "grad_norm": 2627.818359375, "learning_rate": 7.44e-06, "loss": 237.4983, "step": 3720 }, { "epoch": 0.01506967198212648, "grad_norm": 1014.5845947265625, "learning_rate": 7.4600000000000006e-06, "loss": 289.3517, "step": 3730 }, { "epoch": 0.015110073247494112, "grad_norm": 1206.6943359375, "learning_rate": 7.480000000000001e-06, "loss": 253.7986, "step": 3740 }, { "epoch": 0.015150474512861743, "grad_norm": 1855.4755859375, "learning_rate": 7.5e-06, "loss": 384.5513, "step": 3750 }, { "epoch": 0.015190875778229375, "grad_norm": 745.6865844726562, "learning_rate": 7.520000000000001e-06, "loss": 246.6602, "step": 3760 }, { "epoch": 0.015231277043597006, "grad_norm": 1637.106201171875, "learning_rate": 7.54e-06, "loss": 291.9528, "step": 3770 }, { "epoch": 0.015271678308964637, "grad_norm": 2112.7109375, "learning_rate": 7.5600000000000005e-06, "loss": 228.7163, "step": 3780 }, { "epoch": 0.015312079574332269, "grad_norm": 1741.5140380859375, "learning_rate": 7.580000000000001e-06, "loss": 238.9058, "step": 3790 }, { "epoch": 0.0153524808396999, "grad_norm": 991.5599365234375, "learning_rate": 7.6e-06, "loss": 172.588, "step": 3800 }, { "epoch": 0.015392882105067531, "grad_norm": 1450.9693603515625, "learning_rate": 7.620000000000001e-06, "loss": 298.7596, "step": 3810 }, { "epoch": 0.015433283370435162, "grad_norm": 602.7521362304688, "learning_rate": 7.64e-06, "loss": 212.8531, "step": 3820 }, { "epoch": 0.015473684635802794, "grad_norm": 1867.60546875, "learning_rate": 7.660000000000001e-06, "loss": 314.0657, "step": 3830 }, { "epoch": 0.015514085901170425, "grad_norm": 6575.3671875, "learning_rate": 7.68e-06, "loss": 181.4101, "step": 3840 }, { "epoch": 0.015554487166538056, "grad_norm": 811.929443359375, "learning_rate": 7.7e-06, "loss": 203.4754, "step": 3850 }, { "epoch": 0.015594888431905688, "grad_norm": 1337.7786865234375, "learning_rate": 7.72e-06, "loss": 251.0148, "step": 3860 }, { "epoch": 0.01563528969727332, "grad_norm": 1872.110595703125, "learning_rate": 7.74e-06, "loss": 220.2635, "step": 3870 }, { "epoch": 0.01567569096264095, "grad_norm": 421.63671875, "learning_rate": 7.76e-06, "loss": 219.4246, "step": 3880 }, { "epoch": 0.01571609222800858, "grad_norm": 901.9820556640625, "learning_rate": 7.78e-06, "loss": 253.3245, "step": 3890 }, { "epoch": 0.01575649349337621, "grad_norm": 1378.5067138671875, "learning_rate": 7.8e-06, "loss": 272.5337, "step": 3900 }, { "epoch": 0.015796894758743844, "grad_norm": 899.2632446289062, "learning_rate": 7.820000000000001e-06, "loss": 221.5219, "step": 3910 }, { "epoch": 0.015837296024111474, "grad_norm": 2511.511474609375, "learning_rate": 7.84e-06, "loss": 183.7072, "step": 3920 }, { "epoch": 0.015877697289479107, "grad_norm": 1480.0848388671875, "learning_rate": 7.860000000000001e-06, "loss": 226.5134, "step": 3930 }, { "epoch": 0.015918098554846737, "grad_norm": 1251.0631103515625, "learning_rate": 7.879999999999999e-06, "loss": 220.8395, "step": 3940 }, { "epoch": 0.01595849982021437, "grad_norm": 1017.7593994140625, "learning_rate": 7.9e-06, "loss": 299.8441, "step": 3950 }, { "epoch": 0.015998901085582, "grad_norm": 750.101318359375, "learning_rate": 7.92e-06, "loss": 256.8377, "step": 3960 }, { "epoch": 0.016039302350949632, "grad_norm": 850.2648315429688, "learning_rate": 7.94e-06, "loss": 300.5525, "step": 3970 }, { "epoch": 0.016079703616317262, "grad_norm": 688.6962280273438, "learning_rate": 7.96e-06, "loss": 238.5739, "step": 3980 }, { "epoch": 0.016120104881684895, "grad_norm": 1160.3623046875, "learning_rate": 7.98e-06, "loss": 230.5895, "step": 3990 }, { "epoch": 0.016160506147052525, "grad_norm": 768.995849609375, "learning_rate": 8.000000000000001e-06, "loss": 273.8474, "step": 4000 }, { "epoch": 0.016200907412420158, "grad_norm": 733.1216430664062, "learning_rate": 8.02e-06, "loss": 213.2722, "step": 4010 }, { "epoch": 0.016241308677787787, "grad_norm": 0.0, "learning_rate": 8.040000000000001e-06, "loss": 222.9751, "step": 4020 }, { "epoch": 0.01628170994315542, "grad_norm": 800.3453979492188, "learning_rate": 8.06e-06, "loss": 268.3867, "step": 4030 }, { "epoch": 0.01632211120852305, "grad_norm": 1095.6239013671875, "learning_rate": 8.08e-06, "loss": 321.233, "step": 4040 }, { "epoch": 0.016362512473890683, "grad_norm": 2067.449462890625, "learning_rate": 8.1e-06, "loss": 218.0929, "step": 4050 }, { "epoch": 0.016402913739258312, "grad_norm": 1188.5040283203125, "learning_rate": 8.12e-06, "loss": 326.4176, "step": 4060 }, { "epoch": 0.016443315004625945, "grad_norm": 1143.6793212890625, "learning_rate": 8.14e-06, "loss": 244.4551, "step": 4070 }, { "epoch": 0.016483716269993575, "grad_norm": 878.0779418945312, "learning_rate": 8.160000000000001e-06, "loss": 235.9173, "step": 4080 }, { "epoch": 0.016524117535361208, "grad_norm": 737.4267578125, "learning_rate": 8.18e-06, "loss": 331.0821, "step": 4090 }, { "epoch": 0.016564518800728838, "grad_norm": 1495.6761474609375, "learning_rate": 8.200000000000001e-06, "loss": 248.3052, "step": 4100 }, { "epoch": 0.01660492006609647, "grad_norm": 814.0003051757812, "learning_rate": 8.22e-06, "loss": 199.0726, "step": 4110 }, { "epoch": 0.0166453213314641, "grad_norm": 634.8568725585938, "learning_rate": 8.24e-06, "loss": 313.8576, "step": 4120 }, { "epoch": 0.016685722596831733, "grad_norm": 714.36328125, "learning_rate": 8.26e-06, "loss": 215.2515, "step": 4130 }, { "epoch": 0.016726123862199363, "grad_norm": 1302.689697265625, "learning_rate": 8.28e-06, "loss": 262.0066, "step": 4140 }, { "epoch": 0.016766525127566996, "grad_norm": 1403.1646728515625, "learning_rate": 8.3e-06, "loss": 314.0872, "step": 4150 }, { "epoch": 0.016806926392934626, "grad_norm": 944.2760009765625, "learning_rate": 8.32e-06, "loss": 276.365, "step": 4160 }, { "epoch": 0.01684732765830226, "grad_norm": 2125.658447265625, "learning_rate": 8.34e-06, "loss": 337.9024, "step": 4170 }, { "epoch": 0.016887728923669888, "grad_norm": 2500.66455078125, "learning_rate": 8.36e-06, "loss": 241.9205, "step": 4180 }, { "epoch": 0.01692813018903752, "grad_norm": 637.8106689453125, "learning_rate": 8.380000000000001e-06, "loss": 239.1331, "step": 4190 }, { "epoch": 0.01696853145440515, "grad_norm": 1099.7269287109375, "learning_rate": 8.400000000000001e-06, "loss": 170.0579, "step": 4200 }, { "epoch": 0.017008932719772784, "grad_norm": 777.8555297851562, "learning_rate": 8.42e-06, "loss": 269.5526, "step": 4210 }, { "epoch": 0.017049333985140414, "grad_norm": 624.0740356445312, "learning_rate": 8.44e-06, "loss": 180.0498, "step": 4220 }, { "epoch": 0.017089735250508047, "grad_norm": 1288.8555908203125, "learning_rate": 8.46e-06, "loss": 287.3699, "step": 4230 }, { "epoch": 0.017130136515875676, "grad_norm": 689.3584594726562, "learning_rate": 8.48e-06, "loss": 308.6741, "step": 4240 }, { "epoch": 0.01717053778124331, "grad_norm": 1398.036376953125, "learning_rate": 8.500000000000002e-06, "loss": 266.2167, "step": 4250 }, { "epoch": 0.01721093904661094, "grad_norm": 789.7576293945312, "learning_rate": 8.52e-06, "loss": 255.249, "step": 4260 }, { "epoch": 0.017251340311978572, "grad_norm": 815.0007934570312, "learning_rate": 8.540000000000001e-06, "loss": 194.3588, "step": 4270 }, { "epoch": 0.0172917415773462, "grad_norm": 1327.666259765625, "learning_rate": 8.56e-06, "loss": 329.2384, "step": 4280 }, { "epoch": 0.017332142842713835, "grad_norm": 626.3024291992188, "learning_rate": 8.580000000000001e-06, "loss": 137.7933, "step": 4290 }, { "epoch": 0.017372544108081464, "grad_norm": 1358.869384765625, "learning_rate": 8.599999999999999e-06, "loss": 234.6338, "step": 4300 }, { "epoch": 0.017412945373449097, "grad_norm": 935.4063110351562, "learning_rate": 8.62e-06, "loss": 326.7994, "step": 4310 }, { "epoch": 0.017453346638816727, "grad_norm": 1433.051025390625, "learning_rate": 8.64e-06, "loss": 242.4297, "step": 4320 }, { "epoch": 0.01749374790418436, "grad_norm": 1025.8055419921875, "learning_rate": 8.66e-06, "loss": 168.3089, "step": 4330 }, { "epoch": 0.01753414916955199, "grad_norm": 2163.64794921875, "learning_rate": 8.68e-06, "loss": 246.4337, "step": 4340 }, { "epoch": 0.017574550434919622, "grad_norm": 1108.7593994140625, "learning_rate": 8.7e-06, "loss": 211.9252, "step": 4350 }, { "epoch": 0.017614951700287252, "grad_norm": 1170.3670654296875, "learning_rate": 8.720000000000001e-06, "loss": 196.4572, "step": 4360 }, { "epoch": 0.017655352965654885, "grad_norm": 822.1085815429688, "learning_rate": 8.740000000000001e-06, "loss": 236.9191, "step": 4370 }, { "epoch": 0.017695754231022515, "grad_norm": 551.1714477539062, "learning_rate": 8.76e-06, "loss": 262.1803, "step": 4380 }, { "epoch": 0.017736155496390148, "grad_norm": 941.1051635742188, "learning_rate": 8.78e-06, "loss": 197.1894, "step": 4390 }, { "epoch": 0.017776556761757777, "grad_norm": 812.2485961914062, "learning_rate": 8.8e-06, "loss": 304.7698, "step": 4400 }, { "epoch": 0.01781695802712541, "grad_norm": 740.1641845703125, "learning_rate": 8.82e-06, "loss": 207.9336, "step": 4410 }, { "epoch": 0.01785735929249304, "grad_norm": 1118.2490234375, "learning_rate": 8.840000000000002e-06, "loss": 201.0788, "step": 4420 }, { "epoch": 0.017897760557860673, "grad_norm": 2099.23583984375, "learning_rate": 8.86e-06, "loss": 222.6882, "step": 4430 }, { "epoch": 0.017938161823228303, "grad_norm": 1029.5682373046875, "learning_rate": 8.880000000000001e-06, "loss": 194.8024, "step": 4440 }, { "epoch": 0.017978563088595936, "grad_norm": 1188.818603515625, "learning_rate": 8.9e-06, "loss": 206.5788, "step": 4450 }, { "epoch": 0.018018964353963565, "grad_norm": 820.3424682617188, "learning_rate": 8.920000000000001e-06, "loss": 222.0988, "step": 4460 }, { "epoch": 0.0180593656193312, "grad_norm": 1431.5162353515625, "learning_rate": 8.939999999999999e-06, "loss": 210.3697, "step": 4470 }, { "epoch": 0.018099766884698828, "grad_norm": 1079.22314453125, "learning_rate": 8.96e-06, "loss": 236.7236, "step": 4480 }, { "epoch": 0.01814016815006646, "grad_norm": 1381.0302734375, "learning_rate": 8.98e-06, "loss": 200.5311, "step": 4490 }, { "epoch": 0.01818056941543409, "grad_norm": 799.3221435546875, "learning_rate": 9e-06, "loss": 229.802, "step": 4500 }, { "epoch": 0.018220970680801724, "grad_norm": 1207.725830078125, "learning_rate": 9.02e-06, "loss": 214.4318, "step": 4510 }, { "epoch": 0.018261371946169353, "grad_norm": 725.3284301757812, "learning_rate": 9.04e-06, "loss": 193.9023, "step": 4520 }, { "epoch": 0.018301773211536986, "grad_norm": 1104.83642578125, "learning_rate": 9.06e-06, "loss": 220.9035, "step": 4530 }, { "epoch": 0.018342174476904616, "grad_norm": 1509.99462890625, "learning_rate": 9.080000000000001e-06, "loss": 188.4938, "step": 4540 }, { "epoch": 0.01838257574227225, "grad_norm": 1129.591064453125, "learning_rate": 9.100000000000001e-06, "loss": 308.8094, "step": 4550 }, { "epoch": 0.01842297700763988, "grad_norm": 2700.010986328125, "learning_rate": 9.12e-06, "loss": 310.8748, "step": 4560 }, { "epoch": 0.01846337827300751, "grad_norm": 1198.5032958984375, "learning_rate": 9.14e-06, "loss": 219.3821, "step": 4570 }, { "epoch": 0.01850377953837514, "grad_norm": 907.4744873046875, "learning_rate": 9.16e-06, "loss": 272.8794, "step": 4580 }, { "epoch": 0.018544180803742774, "grad_norm": 480.4779968261719, "learning_rate": 9.180000000000002e-06, "loss": 303.4508, "step": 4590 }, { "epoch": 0.018584582069110404, "grad_norm": 5315.6416015625, "learning_rate": 9.2e-06, "loss": 298.2705, "step": 4600 }, { "epoch": 0.018624983334478037, "grad_norm": 872.8356323242188, "learning_rate": 9.220000000000002e-06, "loss": 203.8982, "step": 4610 }, { "epoch": 0.018665384599845666, "grad_norm": 593.9172973632812, "learning_rate": 9.24e-06, "loss": 175.9667, "step": 4620 }, { "epoch": 0.0187057858652133, "grad_norm": 869.2783203125, "learning_rate": 9.260000000000001e-06, "loss": 180.2353, "step": 4630 }, { "epoch": 0.01874618713058093, "grad_norm": 1448.7855224609375, "learning_rate": 9.28e-06, "loss": 180.4345, "step": 4640 }, { "epoch": 0.018786588395948562, "grad_norm": 2569.0322265625, "learning_rate": 9.3e-06, "loss": 263.0333, "step": 4650 }, { "epoch": 0.01882698966131619, "grad_norm": 1611.3876953125, "learning_rate": 9.32e-06, "loss": 286.5421, "step": 4660 }, { "epoch": 0.018867390926683825, "grad_norm": 762.2740478515625, "learning_rate": 9.34e-06, "loss": 272.6539, "step": 4670 }, { "epoch": 0.018907792192051454, "grad_norm": 1819.1978759765625, "learning_rate": 9.36e-06, "loss": 197.3641, "step": 4680 }, { "epoch": 0.018948193457419087, "grad_norm": 1281.1929931640625, "learning_rate": 9.38e-06, "loss": 199.3052, "step": 4690 }, { "epoch": 0.018988594722786717, "grad_norm": 4967.193359375, "learning_rate": 9.4e-06, "loss": 230.312, "step": 4700 }, { "epoch": 0.01902899598815435, "grad_norm": 1749.0260009765625, "learning_rate": 9.420000000000001e-06, "loss": 245.5051, "step": 4710 }, { "epoch": 0.01906939725352198, "grad_norm": 0.0, "learning_rate": 9.44e-06, "loss": 190.8588, "step": 4720 }, { "epoch": 0.019109798518889613, "grad_norm": 1861.452880859375, "learning_rate": 9.460000000000001e-06, "loss": 190.6377, "step": 4730 }, { "epoch": 0.019150199784257242, "grad_norm": 2230.98876953125, "learning_rate": 9.48e-06, "loss": 252.1078, "step": 4740 }, { "epoch": 0.019190601049624875, "grad_norm": 803.5484008789062, "learning_rate": 9.5e-06, "loss": 254.6172, "step": 4750 }, { "epoch": 0.019231002314992505, "grad_norm": 765.6219482421875, "learning_rate": 9.52e-06, "loss": 249.983, "step": 4760 }, { "epoch": 0.019271403580360138, "grad_norm": 932.5681762695312, "learning_rate": 9.54e-06, "loss": 235.5316, "step": 4770 }, { "epoch": 0.019311804845727767, "grad_norm": 981.9686889648438, "learning_rate": 9.560000000000002e-06, "loss": 229.3628, "step": 4780 }, { "epoch": 0.0193522061110954, "grad_norm": 436.2134704589844, "learning_rate": 9.58e-06, "loss": 158.716, "step": 4790 }, { "epoch": 0.01939260737646303, "grad_norm": 792.7994995117188, "learning_rate": 9.600000000000001e-06, "loss": 170.3208, "step": 4800 }, { "epoch": 0.019433008641830663, "grad_norm": 844.0726318359375, "learning_rate": 9.62e-06, "loss": 224.4318, "step": 4810 }, { "epoch": 0.019473409907198293, "grad_norm": 2132.043701171875, "learning_rate": 9.640000000000001e-06, "loss": 252.1749, "step": 4820 }, { "epoch": 0.019513811172565926, "grad_norm": 2546.78271484375, "learning_rate": 9.66e-06, "loss": 207.1322, "step": 4830 }, { "epoch": 0.019554212437933555, "grad_norm": 1447.4708251953125, "learning_rate": 9.68e-06, "loss": 163.8928, "step": 4840 }, { "epoch": 0.01959461370330119, "grad_norm": 1765.4603271484375, "learning_rate": 9.7e-06, "loss": 285.0885, "step": 4850 }, { "epoch": 0.019635014968668818, "grad_norm": 385.1683349609375, "learning_rate": 9.72e-06, "loss": 159.1181, "step": 4860 }, { "epoch": 0.01967541623403645, "grad_norm": 3586.742919921875, "learning_rate": 9.74e-06, "loss": 281.9318, "step": 4870 }, { "epoch": 0.01971581749940408, "grad_norm": 1368.7694091796875, "learning_rate": 9.760000000000001e-06, "loss": 191.7896, "step": 4880 }, { "epoch": 0.019756218764771714, "grad_norm": 826.7830810546875, "learning_rate": 9.78e-06, "loss": 174.5084, "step": 4890 }, { "epoch": 0.019796620030139343, "grad_norm": 890.3446044921875, "learning_rate": 9.800000000000001e-06, "loss": 290.2659, "step": 4900 }, { "epoch": 0.019837021295506976, "grad_norm": 1318.1915283203125, "learning_rate": 9.820000000000001e-06, "loss": 272.5934, "step": 4910 }, { "epoch": 0.019877422560874606, "grad_norm": 772.6763916015625, "learning_rate": 9.84e-06, "loss": 151.6077, "step": 4920 }, { "epoch": 0.01991782382624224, "grad_norm": 1587.762451171875, "learning_rate": 9.86e-06, "loss": 248.4316, "step": 4930 }, { "epoch": 0.01995822509160987, "grad_norm": 1252.715576171875, "learning_rate": 9.88e-06, "loss": 258.2403, "step": 4940 }, { "epoch": 0.0199986263569775, "grad_norm": 1054.4132080078125, "learning_rate": 9.900000000000002e-06, "loss": 212.0352, "step": 4950 }, { "epoch": 0.02003902762234513, "grad_norm": 1351.8643798828125, "learning_rate": 9.92e-06, "loss": 135.1369, "step": 4960 }, { "epoch": 0.020079428887712764, "grad_norm": 0.0, "learning_rate": 9.940000000000001e-06, "loss": 259.982, "step": 4970 }, { "epoch": 0.020119830153080394, "grad_norm": 1335.3504638671875, "learning_rate": 9.96e-06, "loss": 232.5153, "step": 4980 }, { "epoch": 0.020160231418448027, "grad_norm": 677.4559326171875, "learning_rate": 9.980000000000001e-06, "loss": 240.9019, "step": 4990 }, { "epoch": 0.020200632683815656, "grad_norm": 1134.4188232421875, "learning_rate": 1e-05, "loss": 172.6664, "step": 5000 }, { "epoch": 0.02024103394918329, "grad_norm": 1606.3121337890625, "learning_rate": 1.002e-05, "loss": 218.7504, "step": 5010 }, { "epoch": 0.02028143521455092, "grad_norm": 1248.30615234375, "learning_rate": 1.004e-05, "loss": 143.733, "step": 5020 }, { "epoch": 0.020321836479918552, "grad_norm": 1033.4849853515625, "learning_rate": 1.006e-05, "loss": 252.8728, "step": 5030 }, { "epoch": 0.020362237745286182, "grad_norm": 1231.0445556640625, "learning_rate": 1.008e-05, "loss": 243.1658, "step": 5040 }, { "epoch": 0.020402639010653815, "grad_norm": 1193.17236328125, "learning_rate": 1.0100000000000002e-05, "loss": 181.7209, "step": 5050 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 1.012e-05, "loss": 276.8536, "step": 5060 }, { "epoch": 0.020483441541389077, "grad_norm": 1306.9161376953125, "learning_rate": 1.0140000000000001e-05, "loss": 205.8765, "step": 5070 }, { "epoch": 0.020523842806756707, "grad_norm": 3135.527099609375, "learning_rate": 1.016e-05, "loss": 227.9693, "step": 5080 }, { "epoch": 0.02056424407212434, "grad_norm": 2155.43994140625, "learning_rate": 1.018e-05, "loss": 284.9991, "step": 5090 }, { "epoch": 0.02060464533749197, "grad_norm": 528.388427734375, "learning_rate": 1.02e-05, "loss": 214.0478, "step": 5100 }, { "epoch": 0.020645046602859603, "grad_norm": 924.0242309570312, "learning_rate": 1.022e-05, "loss": 253.1813, "step": 5110 }, { "epoch": 0.020685447868227232, "grad_norm": 1019.5430297851562, "learning_rate": 1.024e-05, "loss": 193.0214, "step": 5120 }, { "epoch": 0.020725849133594865, "grad_norm": 3559.9951171875, "learning_rate": 1.026e-05, "loss": 180.7234, "step": 5130 }, { "epoch": 0.020766250398962495, "grad_norm": 686.9107055664062, "learning_rate": 1.0280000000000002e-05, "loss": 177.0602, "step": 5140 }, { "epoch": 0.020806651664330128, "grad_norm": 616.1886596679688, "learning_rate": 1.03e-05, "loss": 150.3324, "step": 5150 }, { "epoch": 0.020847052929697758, "grad_norm": 768.7440185546875, "learning_rate": 1.0320000000000001e-05, "loss": 218.8612, "step": 5160 }, { "epoch": 0.02088745419506539, "grad_norm": 927.5042724609375, "learning_rate": 1.0340000000000001e-05, "loss": 247.3814, "step": 5170 }, { "epoch": 0.02092785546043302, "grad_norm": 1545.854736328125, "learning_rate": 1.036e-05, "loss": 202.6357, "step": 5180 }, { "epoch": 0.020968256725800653, "grad_norm": 950.5018310546875, "learning_rate": 1.038e-05, "loss": 245.9979, "step": 5190 }, { "epoch": 0.021008657991168283, "grad_norm": 829.0062866210938, "learning_rate": 1.04e-05, "loss": 244.4165, "step": 5200 }, { "epoch": 0.021049059256535916, "grad_norm": 1268.8380126953125, "learning_rate": 1.042e-05, "loss": 216.124, "step": 5210 }, { "epoch": 0.021089460521903546, "grad_norm": 1645.070068359375, "learning_rate": 1.0440000000000002e-05, "loss": 224.5982, "step": 5220 }, { "epoch": 0.02112986178727118, "grad_norm": 1425.95947265625, "learning_rate": 1.046e-05, "loss": 241.4969, "step": 5230 }, { "epoch": 0.021170263052638808, "grad_norm": 1239.9669189453125, "learning_rate": 1.0480000000000001e-05, "loss": 296.2492, "step": 5240 }, { "epoch": 0.02121066431800644, "grad_norm": 3276.483154296875, "learning_rate": 1.05e-05, "loss": 257.1036, "step": 5250 }, { "epoch": 0.02125106558337407, "grad_norm": 1606.2689208984375, "learning_rate": 1.0520000000000001e-05, "loss": 273.2055, "step": 5260 }, { "epoch": 0.021291466848741704, "grad_norm": 2068.524658203125, "learning_rate": 1.0539999999999999e-05, "loss": 209.8239, "step": 5270 }, { "epoch": 0.021331868114109333, "grad_norm": 1133.3856201171875, "learning_rate": 1.056e-05, "loss": 248.1236, "step": 5280 }, { "epoch": 0.021372269379476967, "grad_norm": 1275.0018310546875, "learning_rate": 1.058e-05, "loss": 238.0178, "step": 5290 }, { "epoch": 0.021412670644844596, "grad_norm": 759.3450927734375, "learning_rate": 1.06e-05, "loss": 269.6895, "step": 5300 }, { "epoch": 0.02145307191021223, "grad_norm": 4057.62744140625, "learning_rate": 1.062e-05, "loss": 268.4212, "step": 5310 }, { "epoch": 0.02149347317557986, "grad_norm": 933.5989379882812, "learning_rate": 1.064e-05, "loss": 200.5304, "step": 5320 }, { "epoch": 0.021533874440947492, "grad_norm": 709.9401245117188, "learning_rate": 1.0660000000000001e-05, "loss": 227.5934, "step": 5330 }, { "epoch": 0.02157427570631512, "grad_norm": 1363.613525390625, "learning_rate": 1.0680000000000001e-05, "loss": 195.4376, "step": 5340 }, { "epoch": 0.021614676971682754, "grad_norm": 860.6060180664062, "learning_rate": 1.0700000000000001e-05, "loss": 220.5302, "step": 5350 }, { "epoch": 0.021655078237050384, "grad_norm": 809.3516845703125, "learning_rate": 1.072e-05, "loss": 289.8672, "step": 5360 }, { "epoch": 0.021695479502418017, "grad_norm": 688.4315185546875, "learning_rate": 1.074e-05, "loss": 231.9644, "step": 5370 }, { "epoch": 0.021735880767785647, "grad_norm": 2667.476318359375, "learning_rate": 1.076e-05, "loss": 270.5944, "step": 5380 }, { "epoch": 0.02177628203315328, "grad_norm": 1891.450927734375, "learning_rate": 1.0780000000000002e-05, "loss": 249.0208, "step": 5390 }, { "epoch": 0.02181668329852091, "grad_norm": 3481.829345703125, "learning_rate": 1.08e-05, "loss": 301.6664, "step": 5400 }, { "epoch": 0.021857084563888542, "grad_norm": 956.6407470703125, "learning_rate": 1.0820000000000001e-05, "loss": 245.2214, "step": 5410 }, { "epoch": 0.021897485829256172, "grad_norm": 4194.12841796875, "learning_rate": 1.084e-05, "loss": 260.2813, "step": 5420 }, { "epoch": 0.021937887094623805, "grad_norm": 3680.693603515625, "learning_rate": 1.0860000000000001e-05, "loss": 249.7451, "step": 5430 }, { "epoch": 0.021978288359991435, "grad_norm": 1049.5306396484375, "learning_rate": 1.088e-05, "loss": 227.2931, "step": 5440 }, { "epoch": 0.022018689625359068, "grad_norm": 10489.931640625, "learning_rate": 1.09e-05, "loss": 334.8502, "step": 5450 }, { "epoch": 0.022059090890726697, "grad_norm": 1121.204833984375, "learning_rate": 1.092e-05, "loss": 220.4895, "step": 5460 }, { "epoch": 0.02209949215609433, "grad_norm": 1375.3389892578125, "learning_rate": 1.094e-05, "loss": 212.8332, "step": 5470 }, { "epoch": 0.02213989342146196, "grad_norm": 2596.01318359375, "learning_rate": 1.096e-05, "loss": 242.3982, "step": 5480 }, { "epoch": 0.022180294686829593, "grad_norm": 1790.5255126953125, "learning_rate": 1.098e-05, "loss": 228.799, "step": 5490 }, { "epoch": 0.022220695952197222, "grad_norm": 1782.9239501953125, "learning_rate": 1.1000000000000001e-05, "loss": 247.0614, "step": 5500 }, { "epoch": 0.022261097217564856, "grad_norm": 3071.753173828125, "learning_rate": 1.1020000000000001e-05, "loss": 190.9815, "step": 5510 }, { "epoch": 0.022301498482932485, "grad_norm": 778.1385498046875, "learning_rate": 1.1040000000000001e-05, "loss": 208.1198, "step": 5520 }, { "epoch": 0.022341899748300118, "grad_norm": 4282.1689453125, "learning_rate": 1.106e-05, "loss": 187.0153, "step": 5530 }, { "epoch": 0.022382301013667748, "grad_norm": 3060.544921875, "learning_rate": 1.108e-05, "loss": 243.2019, "step": 5540 }, { "epoch": 0.02242270227903538, "grad_norm": 1174.474853515625, "learning_rate": 1.11e-05, "loss": 240.466, "step": 5550 }, { "epoch": 0.02246310354440301, "grad_norm": 835.2548828125, "learning_rate": 1.112e-05, "loss": 195.4574, "step": 5560 }, { "epoch": 0.022503504809770643, "grad_norm": 679.6596069335938, "learning_rate": 1.114e-05, "loss": 116.5087, "step": 5570 }, { "epoch": 0.022543906075138273, "grad_norm": 4872.7197265625, "learning_rate": 1.1160000000000002e-05, "loss": 208.2604, "step": 5580 }, { "epoch": 0.022584307340505906, "grad_norm": 507.1872253417969, "learning_rate": 1.118e-05, "loss": 240.8744, "step": 5590 }, { "epoch": 0.022624708605873536, "grad_norm": 1005.294189453125, "learning_rate": 1.1200000000000001e-05, "loss": 184.084, "step": 5600 }, { "epoch": 0.02266510987124117, "grad_norm": 7254.05029296875, "learning_rate": 1.122e-05, "loss": 216.7616, "step": 5610 }, { "epoch": 0.0227055111366088, "grad_norm": 1527.6485595703125, "learning_rate": 1.124e-05, "loss": 197.5231, "step": 5620 }, { "epoch": 0.02274591240197643, "grad_norm": 1460.7916259765625, "learning_rate": 1.126e-05, "loss": 280.5447, "step": 5630 }, { "epoch": 0.02278631366734406, "grad_norm": 1024.780517578125, "learning_rate": 1.128e-05, "loss": 248.3504, "step": 5640 }, { "epoch": 0.022826714932711694, "grad_norm": 1925.320068359375, "learning_rate": 1.13e-05, "loss": 226.1158, "step": 5650 }, { "epoch": 0.022867116198079324, "grad_norm": 1352.9869384765625, "learning_rate": 1.132e-05, "loss": 243.9569, "step": 5660 }, { "epoch": 0.022907517463446957, "grad_norm": 1091.9189453125, "learning_rate": 1.134e-05, "loss": 186.5363, "step": 5670 }, { "epoch": 0.022947918728814586, "grad_norm": 544.18994140625, "learning_rate": 1.1360000000000001e-05, "loss": 178.7458, "step": 5680 }, { "epoch": 0.02298831999418222, "grad_norm": 0.0, "learning_rate": 1.1380000000000001e-05, "loss": 237.9985, "step": 5690 }, { "epoch": 0.02302872125954985, "grad_norm": 1084.4674072265625, "learning_rate": 1.1400000000000001e-05, "loss": 201.7748, "step": 5700 }, { "epoch": 0.023069122524917482, "grad_norm": 1035.4659423828125, "learning_rate": 1.142e-05, "loss": 207.2124, "step": 5710 }, { "epoch": 0.02310952379028511, "grad_norm": 733.1090087890625, "learning_rate": 1.144e-05, "loss": 178.1889, "step": 5720 }, { "epoch": 0.023149925055652745, "grad_norm": 876.4588012695312, "learning_rate": 1.146e-05, "loss": 158.9155, "step": 5730 }, { "epoch": 0.023190326321020374, "grad_norm": 657.6229248046875, "learning_rate": 1.148e-05, "loss": 194.4797, "step": 5740 }, { "epoch": 0.023230727586388007, "grad_norm": 1495.9139404296875, "learning_rate": 1.1500000000000002e-05, "loss": 222.9885, "step": 5750 }, { "epoch": 0.023271128851755637, "grad_norm": 2601.6259765625, "learning_rate": 1.152e-05, "loss": 304.1291, "step": 5760 }, { "epoch": 0.02331153011712327, "grad_norm": 1693.1273193359375, "learning_rate": 1.1540000000000001e-05, "loss": 199.6243, "step": 5770 }, { "epoch": 0.0233519313824909, "grad_norm": 945.2630615234375, "learning_rate": 1.156e-05, "loss": 126.1058, "step": 5780 }, { "epoch": 0.023392332647858533, "grad_norm": 616.036865234375, "learning_rate": 1.1580000000000001e-05, "loss": 265.3511, "step": 5790 }, { "epoch": 0.023432733913226162, "grad_norm": 908.6356811523438, "learning_rate": 1.16e-05, "loss": 215.007, "step": 5800 }, { "epoch": 0.023473135178593795, "grad_norm": 1166.1875, "learning_rate": 1.162e-05, "loss": 171.8394, "step": 5810 }, { "epoch": 0.023513536443961425, "grad_norm": 936.7083740234375, "learning_rate": 1.164e-05, "loss": 209.5779, "step": 5820 }, { "epoch": 0.023553937709329058, "grad_norm": 689.0086669921875, "learning_rate": 1.166e-05, "loss": 165.4548, "step": 5830 }, { "epoch": 0.023594338974696687, "grad_norm": 1105.93212890625, "learning_rate": 1.168e-05, "loss": 275.4574, "step": 5840 }, { "epoch": 0.02363474024006432, "grad_norm": 2241.417236328125, "learning_rate": 1.1700000000000001e-05, "loss": 359.449, "step": 5850 }, { "epoch": 0.02367514150543195, "grad_norm": 1302.2642822265625, "learning_rate": 1.172e-05, "loss": 259.1196, "step": 5860 }, { "epoch": 0.023715542770799583, "grad_norm": 1307.6990966796875, "learning_rate": 1.1740000000000001e-05, "loss": 215.3923, "step": 5870 }, { "epoch": 0.023755944036167213, "grad_norm": 523.6953735351562, "learning_rate": 1.1760000000000001e-05, "loss": 189.7961, "step": 5880 }, { "epoch": 0.023796345301534846, "grad_norm": 1645.471923828125, "learning_rate": 1.178e-05, "loss": 197.7059, "step": 5890 }, { "epoch": 0.023836746566902475, "grad_norm": 4609.2265625, "learning_rate": 1.18e-05, "loss": 312.7925, "step": 5900 }, { "epoch": 0.02387714783227011, "grad_norm": 886.41015625, "learning_rate": 1.182e-05, "loss": 227.9965, "step": 5910 }, { "epoch": 0.023917549097637738, "grad_norm": 1345.154052734375, "learning_rate": 1.1840000000000002e-05, "loss": 143.6501, "step": 5920 }, { "epoch": 0.02395795036300537, "grad_norm": 2758.603515625, "learning_rate": 1.186e-05, "loss": 189.3002, "step": 5930 }, { "epoch": 0.023998351628373, "grad_norm": 933.2481079101562, "learning_rate": 1.1880000000000001e-05, "loss": 170.3514, "step": 5940 }, { "epoch": 0.024038752893740634, "grad_norm": 1366.0477294921875, "learning_rate": 1.19e-05, "loss": 204.9711, "step": 5950 }, { "epoch": 0.024079154159108263, "grad_norm": 870.7377319335938, "learning_rate": 1.1920000000000001e-05, "loss": 182.9471, "step": 5960 }, { "epoch": 0.024119555424475896, "grad_norm": 1715.831298828125, "learning_rate": 1.1940000000000001e-05, "loss": 206.0364, "step": 5970 }, { "epoch": 0.024159956689843526, "grad_norm": 1668.3841552734375, "learning_rate": 1.196e-05, "loss": 226.4282, "step": 5980 }, { "epoch": 0.02420035795521116, "grad_norm": 1048.4296875, "learning_rate": 1.198e-05, "loss": 109.3125, "step": 5990 }, { "epoch": 0.02424075922057879, "grad_norm": 879.1497192382812, "learning_rate": 1.2e-05, "loss": 201.5203, "step": 6000 }, { "epoch": 0.02428116048594642, "grad_norm": 1213.1204833984375, "learning_rate": 1.202e-05, "loss": 163.2346, "step": 6010 }, { "epoch": 0.02432156175131405, "grad_norm": 1427.281494140625, "learning_rate": 1.204e-05, "loss": 174.4642, "step": 6020 }, { "epoch": 0.024361963016681684, "grad_norm": 1276.7529296875, "learning_rate": 1.206e-05, "loss": 204.9076, "step": 6030 }, { "epoch": 0.024402364282049314, "grad_norm": 2190.304443359375, "learning_rate": 1.2080000000000001e-05, "loss": 159.9595, "step": 6040 }, { "epoch": 0.024442765547416947, "grad_norm": 785.987060546875, "learning_rate": 1.2100000000000001e-05, "loss": 209.5406, "step": 6050 }, { "epoch": 0.024483166812784576, "grad_norm": 611.1929931640625, "learning_rate": 1.2120000000000001e-05, "loss": 221.088, "step": 6060 }, { "epoch": 0.02452356807815221, "grad_norm": 1893.1925048828125, "learning_rate": 1.214e-05, "loss": 215.4067, "step": 6070 }, { "epoch": 0.02456396934351984, "grad_norm": 4432.23095703125, "learning_rate": 1.216e-05, "loss": 221.237, "step": 6080 }, { "epoch": 0.02460437060888747, "grad_norm": 1176.1290283203125, "learning_rate": 1.2180000000000002e-05, "loss": 220.3091, "step": 6090 }, { "epoch": 0.0246447718742551, "grad_norm": 1318.6954345703125, "learning_rate": 1.22e-05, "loss": 250.0935, "step": 6100 }, { "epoch": 0.02468517313962273, "grad_norm": 1313.9632568359375, "learning_rate": 1.2220000000000002e-05, "loss": 226.1311, "step": 6110 }, { "epoch": 0.024725574404990364, "grad_norm": 1210.4168701171875, "learning_rate": 1.224e-05, "loss": 348.8165, "step": 6120 }, { "epoch": 0.024765975670357994, "grad_norm": 1387.2384033203125, "learning_rate": 1.2260000000000001e-05, "loss": 208.124, "step": 6130 }, { "epoch": 0.024806376935725627, "grad_norm": 1911.8060302734375, "learning_rate": 1.2280000000000001e-05, "loss": 208.8157, "step": 6140 }, { "epoch": 0.024846778201093257, "grad_norm": 770.5685424804688, "learning_rate": 1.23e-05, "loss": 205.734, "step": 6150 }, { "epoch": 0.02488717946646089, "grad_norm": 718.35498046875, "learning_rate": 1.232e-05, "loss": 200.1725, "step": 6160 }, { "epoch": 0.02492758073182852, "grad_norm": 1146.25244140625, "learning_rate": 1.234e-05, "loss": 197.4777, "step": 6170 }, { "epoch": 0.024967981997196152, "grad_norm": 773.2630004882812, "learning_rate": 1.236e-05, "loss": 217.971, "step": 6180 }, { "epoch": 0.025008383262563782, "grad_norm": 888.3221435546875, "learning_rate": 1.238e-05, "loss": 198.487, "step": 6190 }, { "epoch": 0.025048784527931415, "grad_norm": 971.0499267578125, "learning_rate": 1.24e-05, "loss": 239.6828, "step": 6200 }, { "epoch": 0.025089185793299044, "grad_norm": 10692.5146484375, "learning_rate": 1.2420000000000001e-05, "loss": 258.4363, "step": 6210 }, { "epoch": 0.025129587058666678, "grad_norm": 791.8817138671875, "learning_rate": 1.244e-05, "loss": 213.3801, "step": 6220 }, { "epoch": 0.025169988324034307, "grad_norm": 1000.6807861328125, "learning_rate": 1.2460000000000001e-05, "loss": 181.9501, "step": 6230 }, { "epoch": 0.02521038958940194, "grad_norm": 892.7822875976562, "learning_rate": 1.248e-05, "loss": 167.6201, "step": 6240 }, { "epoch": 0.02525079085476957, "grad_norm": 537.8553466796875, "learning_rate": 1.25e-05, "loss": 242.3135, "step": 6250 }, { "epoch": 0.025291192120137203, "grad_norm": 1086.2513427734375, "learning_rate": 1.252e-05, "loss": 222.7271, "step": 6260 }, { "epoch": 0.025331593385504832, "grad_norm": 4348.87353515625, "learning_rate": 1.2540000000000002e-05, "loss": 316.0617, "step": 6270 }, { "epoch": 0.025371994650872465, "grad_norm": 1642.019287109375, "learning_rate": 1.256e-05, "loss": 202.9976, "step": 6280 }, { "epoch": 0.025412395916240095, "grad_norm": 685.2844848632812, "learning_rate": 1.258e-05, "loss": 147.7885, "step": 6290 }, { "epoch": 0.025452797181607728, "grad_norm": 2664.384765625, "learning_rate": 1.2600000000000001e-05, "loss": 178.4338, "step": 6300 }, { "epoch": 0.025493198446975358, "grad_norm": 727.255126953125, "learning_rate": 1.2620000000000001e-05, "loss": 142.211, "step": 6310 }, { "epoch": 0.02553359971234299, "grad_norm": 2616.48486328125, "learning_rate": 1.2640000000000003e-05, "loss": 215.0363, "step": 6320 }, { "epoch": 0.02557400097771062, "grad_norm": 1724.4073486328125, "learning_rate": 1.2659999999999999e-05, "loss": 268.9592, "step": 6330 }, { "epoch": 0.025614402243078253, "grad_norm": 757.4139404296875, "learning_rate": 1.268e-05, "loss": 245.0375, "step": 6340 }, { "epoch": 0.025654803508445883, "grad_norm": 804.900634765625, "learning_rate": 1.27e-05, "loss": 258.7294, "step": 6350 }, { "epoch": 0.025695204773813516, "grad_norm": 1432.1217041015625, "learning_rate": 1.2720000000000002e-05, "loss": 226.9255, "step": 6360 }, { "epoch": 0.025735606039181146, "grad_norm": 1246.9285888671875, "learning_rate": 1.2740000000000002e-05, "loss": 190.6593, "step": 6370 }, { "epoch": 0.02577600730454878, "grad_norm": 1320.610107421875, "learning_rate": 1.276e-05, "loss": 254.1634, "step": 6380 }, { "epoch": 0.025816408569916408, "grad_norm": 5738.62939453125, "learning_rate": 1.278e-05, "loss": 214.6841, "step": 6390 }, { "epoch": 0.02585680983528404, "grad_norm": 649.080810546875, "learning_rate": 1.2800000000000001e-05, "loss": 170.7529, "step": 6400 }, { "epoch": 0.02589721110065167, "grad_norm": 756.0393676757812, "learning_rate": 1.2820000000000001e-05, "loss": 186.7894, "step": 6410 }, { "epoch": 0.025937612366019304, "grad_norm": 2048.561279296875, "learning_rate": 1.2839999999999999e-05, "loss": 182.1876, "step": 6420 }, { "epoch": 0.025978013631386934, "grad_norm": 805.5757446289062, "learning_rate": 1.286e-05, "loss": 196.0959, "step": 6430 }, { "epoch": 0.026018414896754567, "grad_norm": 5938.0439453125, "learning_rate": 1.288e-05, "loss": 254.1976, "step": 6440 }, { "epoch": 0.026058816162122196, "grad_norm": 1488.987060546875, "learning_rate": 1.29e-05, "loss": 250.4606, "step": 6450 }, { "epoch": 0.02609921742748983, "grad_norm": 1574.7130126953125, "learning_rate": 1.2920000000000002e-05, "loss": 157.1144, "step": 6460 }, { "epoch": 0.02613961869285746, "grad_norm": 736.7561645507812, "learning_rate": 1.294e-05, "loss": 220.315, "step": 6470 }, { "epoch": 0.026180019958225092, "grad_norm": 972.5496826171875, "learning_rate": 1.296e-05, "loss": 257.4698, "step": 6480 }, { "epoch": 0.02622042122359272, "grad_norm": 4659.64794921875, "learning_rate": 1.2980000000000001e-05, "loss": 201.6754, "step": 6490 }, { "epoch": 0.026260822488960354, "grad_norm": 803.3303833007812, "learning_rate": 1.3000000000000001e-05, "loss": 227.2734, "step": 6500 }, { "epoch": 0.026301223754327984, "grad_norm": 900.7424926757812, "learning_rate": 1.3020000000000002e-05, "loss": 204.3565, "step": 6510 }, { "epoch": 0.026341625019695617, "grad_norm": 880.3982543945312, "learning_rate": 1.3039999999999999e-05, "loss": 213.0109, "step": 6520 }, { "epoch": 0.026382026285063247, "grad_norm": 1765.91357421875, "learning_rate": 1.306e-05, "loss": 238.4321, "step": 6530 }, { "epoch": 0.02642242755043088, "grad_norm": 2665.921875, "learning_rate": 1.308e-05, "loss": 227.712, "step": 6540 }, { "epoch": 0.02646282881579851, "grad_norm": 2605.635009765625, "learning_rate": 1.3100000000000002e-05, "loss": 162.2536, "step": 6550 }, { "epoch": 0.026503230081166142, "grad_norm": 793.8833618164062, "learning_rate": 1.3120000000000001e-05, "loss": 192.5132, "step": 6560 }, { "epoch": 0.026543631346533772, "grad_norm": 2946.634765625, "learning_rate": 1.314e-05, "loss": 159.2729, "step": 6570 }, { "epoch": 0.026584032611901405, "grad_norm": 910.0969848632812, "learning_rate": 1.316e-05, "loss": 191.1152, "step": 6580 }, { "epoch": 0.026624433877269035, "grad_norm": 808.4738159179688, "learning_rate": 1.3180000000000001e-05, "loss": 117.279, "step": 6590 }, { "epoch": 0.026664835142636668, "grad_norm": 716.1993408203125, "learning_rate": 1.32e-05, "loss": 246.6991, "step": 6600 }, { "epoch": 0.026705236408004297, "grad_norm": 1362.393310546875, "learning_rate": 1.3220000000000002e-05, "loss": 233.1123, "step": 6610 }, { "epoch": 0.02674563767337193, "grad_norm": 1083.3197021484375, "learning_rate": 1.324e-05, "loss": 169.5999, "step": 6620 }, { "epoch": 0.02678603893873956, "grad_norm": 754.7849731445312, "learning_rate": 1.326e-05, "loss": 306.0971, "step": 6630 }, { "epoch": 0.026826440204107193, "grad_norm": 968.374755859375, "learning_rate": 1.3280000000000002e-05, "loss": 159.3224, "step": 6640 }, { "epoch": 0.026866841469474823, "grad_norm": 670.0838623046875, "learning_rate": 1.3300000000000001e-05, "loss": 155.3162, "step": 6650 }, { "epoch": 0.026907242734842456, "grad_norm": 2666.84326171875, "learning_rate": 1.3320000000000001e-05, "loss": 173.0536, "step": 6660 }, { "epoch": 0.026947644000210085, "grad_norm": 1368.1116943359375, "learning_rate": 1.334e-05, "loss": 197.1025, "step": 6670 }, { "epoch": 0.026988045265577718, "grad_norm": 1038.586181640625, "learning_rate": 1.336e-05, "loss": 156.9807, "step": 6680 }, { "epoch": 0.027028446530945348, "grad_norm": 1009.5262451171875, "learning_rate": 1.338e-05, "loss": 204.5245, "step": 6690 }, { "epoch": 0.02706884779631298, "grad_norm": 1839.5418701171875, "learning_rate": 1.3400000000000002e-05, "loss": 266.4659, "step": 6700 }, { "epoch": 0.02710924906168061, "grad_norm": 829.3477783203125, "learning_rate": 1.3420000000000002e-05, "loss": 240.5496, "step": 6710 }, { "epoch": 0.027149650327048244, "grad_norm": 2405.634033203125, "learning_rate": 1.344e-05, "loss": 190.1635, "step": 6720 }, { "epoch": 0.027190051592415873, "grad_norm": 3415.678466796875, "learning_rate": 1.346e-05, "loss": 256.6028, "step": 6730 }, { "epoch": 0.027230452857783506, "grad_norm": 1140.322998046875, "learning_rate": 1.3480000000000001e-05, "loss": 213.0321, "step": 6740 }, { "epoch": 0.027270854123151136, "grad_norm": 1814.612548828125, "learning_rate": 1.3500000000000001e-05, "loss": 289.2976, "step": 6750 }, { "epoch": 0.02731125538851877, "grad_norm": 5082.0244140625, "learning_rate": 1.352e-05, "loss": 213.1429, "step": 6760 }, { "epoch": 0.0273516566538864, "grad_norm": 791.2843627929688, "learning_rate": 1.3539999999999999e-05, "loss": 174.2937, "step": 6770 }, { "epoch": 0.02739205791925403, "grad_norm": 1486.5364990234375, "learning_rate": 1.356e-05, "loss": 211.7225, "step": 6780 }, { "epoch": 0.02743245918462166, "grad_norm": 2779.84765625, "learning_rate": 1.358e-05, "loss": 206.9393, "step": 6790 }, { "epoch": 0.027472860449989294, "grad_norm": 2755.901611328125, "learning_rate": 1.3600000000000002e-05, "loss": 244.6609, "step": 6800 }, { "epoch": 0.027513261715356924, "grad_norm": 1252.9466552734375, "learning_rate": 1.362e-05, "loss": 185.0354, "step": 6810 }, { "epoch": 0.027553662980724557, "grad_norm": 803.9773559570312, "learning_rate": 1.364e-05, "loss": 163.7829, "step": 6820 }, { "epoch": 0.027594064246092186, "grad_norm": 3479.50341796875, "learning_rate": 1.3660000000000001e-05, "loss": 199.5576, "step": 6830 }, { "epoch": 0.02763446551145982, "grad_norm": 4041.821533203125, "learning_rate": 1.3680000000000001e-05, "loss": 146.2625, "step": 6840 }, { "epoch": 0.02767486677682745, "grad_norm": 951.2683715820312, "learning_rate": 1.3700000000000001e-05, "loss": 242.1677, "step": 6850 }, { "epoch": 0.027715268042195082, "grad_norm": 2521.80224609375, "learning_rate": 1.3719999999999999e-05, "loss": 162.2531, "step": 6860 }, { "epoch": 0.02775566930756271, "grad_norm": 2120.54150390625, "learning_rate": 1.374e-05, "loss": 218.3948, "step": 6870 }, { "epoch": 0.027796070572930345, "grad_norm": 890.4383544921875, "learning_rate": 1.376e-05, "loss": 237.4137, "step": 6880 }, { "epoch": 0.027836471838297974, "grad_norm": 1014.8067626953125, "learning_rate": 1.3780000000000002e-05, "loss": 271.9613, "step": 6890 }, { "epoch": 0.027876873103665607, "grad_norm": 2228.795654296875, "learning_rate": 1.3800000000000002e-05, "loss": 160.2129, "step": 6900 }, { "epoch": 0.027917274369033237, "grad_norm": 1605.4246826171875, "learning_rate": 1.382e-05, "loss": 192.7938, "step": 6910 }, { "epoch": 0.02795767563440087, "grad_norm": 726.9761352539062, "learning_rate": 1.384e-05, "loss": 166.0171, "step": 6920 }, { "epoch": 0.0279980768997685, "grad_norm": 1300.53076171875, "learning_rate": 1.3860000000000001e-05, "loss": 155.9882, "step": 6930 }, { "epoch": 0.028038478165136133, "grad_norm": 1467.7305908203125, "learning_rate": 1.3880000000000001e-05, "loss": 175.5876, "step": 6940 }, { "epoch": 0.028078879430503762, "grad_norm": 653.6290283203125, "learning_rate": 1.3900000000000002e-05, "loss": 177.4526, "step": 6950 }, { "epoch": 0.028119280695871395, "grad_norm": 1209.3408203125, "learning_rate": 1.3919999999999999e-05, "loss": 139.5799, "step": 6960 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 1.394e-05, "loss": 114.6302, "step": 6970 }, { "epoch": 0.028200083226606658, "grad_norm": 2785.091796875, "learning_rate": 1.396e-05, "loss": 197.9756, "step": 6980 }, { "epoch": 0.028240484491974287, "grad_norm": 1010.1685180664062, "learning_rate": 1.3980000000000002e-05, "loss": 194.4374, "step": 6990 }, { "epoch": 0.02828088575734192, "grad_norm": 810.1452026367188, "learning_rate": 1.4000000000000001e-05, "loss": 185.4311, "step": 7000 }, { "epoch": 0.02832128702270955, "grad_norm": 793.3577270507812, "learning_rate": 1.402e-05, "loss": 186.9826, "step": 7010 }, { "epoch": 0.028361688288077183, "grad_norm": 1589.5145263671875, "learning_rate": 1.4040000000000001e-05, "loss": 224.4251, "step": 7020 }, { "epoch": 0.028402089553444813, "grad_norm": 730.669189453125, "learning_rate": 1.4060000000000001e-05, "loss": 129.3718, "step": 7030 }, { "epoch": 0.028442490818812446, "grad_norm": 2354.304443359375, "learning_rate": 1.408e-05, "loss": 176.9131, "step": 7040 }, { "epoch": 0.028482892084180075, "grad_norm": 1103.234619140625, "learning_rate": 1.4099999999999999e-05, "loss": 198.4438, "step": 7050 }, { "epoch": 0.02852329334954771, "grad_norm": 1036.2913818359375, "learning_rate": 1.412e-05, "loss": 228.0053, "step": 7060 }, { "epoch": 0.028563694614915338, "grad_norm": 3621.711181640625, "learning_rate": 1.414e-05, "loss": 140.6286, "step": 7070 }, { "epoch": 0.02860409588028297, "grad_norm": 1558.1011962890625, "learning_rate": 1.4160000000000002e-05, "loss": 187.4032, "step": 7080 }, { "epoch": 0.0286444971456506, "grad_norm": 2425.656494140625, "learning_rate": 1.4180000000000001e-05, "loss": 198.9401, "step": 7090 }, { "epoch": 0.028684898411018234, "grad_norm": 878.8297729492188, "learning_rate": 1.42e-05, "loss": 248.0995, "step": 7100 }, { "epoch": 0.028725299676385863, "grad_norm": 791.4844360351562, "learning_rate": 1.422e-05, "loss": 223.9328, "step": 7110 }, { "epoch": 0.028765700941753496, "grad_norm": 984.3999633789062, "learning_rate": 1.4240000000000001e-05, "loss": 207.1025, "step": 7120 }, { "epoch": 0.028806102207121126, "grad_norm": 1250.86669921875, "learning_rate": 1.426e-05, "loss": 168.4282, "step": 7130 }, { "epoch": 0.02884650347248876, "grad_norm": 993.6478881835938, "learning_rate": 1.4280000000000002e-05, "loss": 204.2866, "step": 7140 }, { "epoch": 0.02888690473785639, "grad_norm": 1394.8699951171875, "learning_rate": 1.43e-05, "loss": 321.6906, "step": 7150 }, { "epoch": 0.02892730600322402, "grad_norm": 1557.498291015625, "learning_rate": 1.432e-05, "loss": 240.2873, "step": 7160 }, { "epoch": 0.02896770726859165, "grad_norm": 3150.471435546875, "learning_rate": 1.434e-05, "loss": 173.3683, "step": 7170 }, { "epoch": 0.029008108533959284, "grad_norm": 40070.46875, "learning_rate": 1.4360000000000001e-05, "loss": 300.3157, "step": 7180 }, { "epoch": 0.029048509799326914, "grad_norm": 1962.33447265625, "learning_rate": 1.4380000000000001e-05, "loss": 240.5531, "step": 7190 }, { "epoch": 0.029088911064694547, "grad_norm": 9524.1953125, "learning_rate": 1.44e-05, "loss": 197.5709, "step": 7200 }, { "epoch": 0.029129312330062176, "grad_norm": 1291.194091796875, "learning_rate": 1.4420000000000001e-05, "loss": 173.9855, "step": 7210 }, { "epoch": 0.02916971359542981, "grad_norm": 508.72991943359375, "learning_rate": 1.444e-05, "loss": 198.4389, "step": 7220 }, { "epoch": 0.02921011486079744, "grad_norm": 3161.301513671875, "learning_rate": 1.4460000000000002e-05, "loss": 232.296, "step": 7230 }, { "epoch": 0.029250516126165072, "grad_norm": 888.337646484375, "learning_rate": 1.4480000000000002e-05, "loss": 165.0789, "step": 7240 }, { "epoch": 0.029290917391532702, "grad_norm": 1378.0032958984375, "learning_rate": 1.45e-05, "loss": 199.7313, "step": 7250 }, { "epoch": 0.029331318656900335, "grad_norm": 2439.7119140625, "learning_rate": 1.452e-05, "loss": 201.1323, "step": 7260 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 1.4540000000000001e-05, "loss": 139.8308, "step": 7270 }, { "epoch": 0.029412121187635597, "grad_norm": 697.4395751953125, "learning_rate": 1.4560000000000001e-05, "loss": 215.4804, "step": 7280 }, { "epoch": 0.029452522453003227, "grad_norm": 1335.80859375, "learning_rate": 1.4580000000000003e-05, "loss": 206.551, "step": 7290 }, { "epoch": 0.02949292371837086, "grad_norm": 3926.513427734375, "learning_rate": 1.4599999999999999e-05, "loss": 161.3456, "step": 7300 }, { "epoch": 0.02953332498373849, "grad_norm": 2071.3857421875, "learning_rate": 1.462e-05, "loss": 245.7546, "step": 7310 }, { "epoch": 0.029573726249106123, "grad_norm": 1162.638916015625, "learning_rate": 1.464e-05, "loss": 247.4758, "step": 7320 }, { "epoch": 0.029614127514473752, "grad_norm": 1904.095947265625, "learning_rate": 1.4660000000000002e-05, "loss": 209.4585, "step": 7330 }, { "epoch": 0.029654528779841385, "grad_norm": 2012.481689453125, "learning_rate": 1.4680000000000002e-05, "loss": 177.5473, "step": 7340 }, { "epoch": 0.029694930045209015, "grad_norm": 827.6367797851562, "learning_rate": 1.47e-05, "loss": 132.3128, "step": 7350 }, { "epoch": 0.029735331310576648, "grad_norm": 1051.8994140625, "learning_rate": 1.472e-05, "loss": 201.6083, "step": 7360 }, { "epoch": 0.029775732575944278, "grad_norm": 2179.918701171875, "learning_rate": 1.4740000000000001e-05, "loss": 142.6359, "step": 7370 }, { "epoch": 0.02981613384131191, "grad_norm": 1439.194091796875, "learning_rate": 1.4760000000000001e-05, "loss": 170.9291, "step": 7380 }, { "epoch": 0.02985653510667954, "grad_norm": 1795.2176513671875, "learning_rate": 1.4779999999999999e-05, "loss": 205.2162, "step": 7390 }, { "epoch": 0.029896936372047173, "grad_norm": 1061.8426513671875, "learning_rate": 1.48e-05, "loss": 291.1986, "step": 7400 }, { "epoch": 0.029937337637414803, "grad_norm": 1072.127685546875, "learning_rate": 1.482e-05, "loss": 190.8879, "step": 7410 }, { "epoch": 0.029977738902782436, "grad_norm": 1368.1405029296875, "learning_rate": 1.4840000000000002e-05, "loss": 196.2706, "step": 7420 }, { "epoch": 0.030018140168150065, "grad_norm": 1639.9654541015625, "learning_rate": 1.4860000000000002e-05, "loss": 129.1992, "step": 7430 }, { "epoch": 0.0300585414335177, "grad_norm": 1488.4791259765625, "learning_rate": 1.488e-05, "loss": 151.4941, "step": 7440 }, { "epoch": 0.030098942698885328, "grad_norm": 864.4747924804688, "learning_rate": 1.49e-05, "loss": 199.6265, "step": 7450 }, { "epoch": 0.03013934396425296, "grad_norm": 1255.5771484375, "learning_rate": 1.4920000000000001e-05, "loss": 226.9639, "step": 7460 }, { "epoch": 0.03017974522962059, "grad_norm": 1635.7647705078125, "learning_rate": 1.4940000000000001e-05, "loss": 174.848, "step": 7470 }, { "epoch": 0.030220146494988224, "grad_norm": 1959.9365234375, "learning_rate": 1.4960000000000002e-05, "loss": 215.0835, "step": 7480 }, { "epoch": 0.030260547760355853, "grad_norm": 1300.870361328125, "learning_rate": 1.4979999999999999e-05, "loss": 187.4061, "step": 7490 }, { "epoch": 0.030300949025723486, "grad_norm": 5733.919921875, "learning_rate": 1.5e-05, "loss": 203.0896, "step": 7500 }, { "epoch": 0.030341350291091116, "grad_norm": 6152.53173828125, "learning_rate": 1.502e-05, "loss": 292.1695, "step": 7510 }, { "epoch": 0.03038175155645875, "grad_norm": 1118.759033203125, "learning_rate": 1.5040000000000002e-05, "loss": 179.8201, "step": 7520 }, { "epoch": 0.03042215282182638, "grad_norm": 1003.1773681640625, "learning_rate": 1.5060000000000001e-05, "loss": 168.1782, "step": 7530 }, { "epoch": 0.030462554087194012, "grad_norm": 1641.7908935546875, "learning_rate": 1.508e-05, "loss": 234.2666, "step": 7540 }, { "epoch": 0.03050295535256164, "grad_norm": 6260.85791015625, "learning_rate": 1.51e-05, "loss": 151.5416, "step": 7550 }, { "epoch": 0.030543356617929274, "grad_norm": 1593.7576904296875, "learning_rate": 1.5120000000000001e-05, "loss": 159.2535, "step": 7560 }, { "epoch": 0.030583757883296904, "grad_norm": 1710.3023681640625, "learning_rate": 1.514e-05, "loss": 128.8493, "step": 7570 }, { "epoch": 0.030624159148664537, "grad_norm": 1299.740966796875, "learning_rate": 1.5160000000000002e-05, "loss": 291.4667, "step": 7580 }, { "epoch": 0.030664560414032167, "grad_norm": 772.3851318359375, "learning_rate": 1.518e-05, "loss": 254.7446, "step": 7590 }, { "epoch": 0.0307049616793998, "grad_norm": 4841.77294921875, "learning_rate": 1.52e-05, "loss": 261.962, "step": 7600 }, { "epoch": 0.03074536294476743, "grad_norm": 1306.54296875, "learning_rate": 1.5220000000000002e-05, "loss": 251.6979, "step": 7610 }, { "epoch": 0.030785764210135062, "grad_norm": 1031.349365234375, "learning_rate": 1.5240000000000001e-05, "loss": 180.2815, "step": 7620 }, { "epoch": 0.030826165475502692, "grad_norm": 1182.68994140625, "learning_rate": 1.5260000000000003e-05, "loss": 202.774, "step": 7630 }, { "epoch": 0.030866566740870325, "grad_norm": 524.015869140625, "learning_rate": 1.528e-05, "loss": 148.2353, "step": 7640 }, { "epoch": 0.030906968006237955, "grad_norm": 1932.3292236328125, "learning_rate": 1.53e-05, "loss": 169.3441, "step": 7650 }, { "epoch": 0.030947369271605588, "grad_norm": 840.219970703125, "learning_rate": 1.5320000000000002e-05, "loss": 129.2729, "step": 7660 }, { "epoch": 0.030987770536973217, "grad_norm": 1622.7508544921875, "learning_rate": 1.5340000000000002e-05, "loss": 160.2887, "step": 7670 }, { "epoch": 0.03102817180234085, "grad_norm": 2095.8623046875, "learning_rate": 1.536e-05, "loss": 245.4731, "step": 7680 }, { "epoch": 0.03106857306770848, "grad_norm": 3175.37646484375, "learning_rate": 1.538e-05, "loss": 208.5475, "step": 7690 }, { "epoch": 0.031108974333076113, "grad_norm": 924.9345092773438, "learning_rate": 1.54e-05, "loss": 170.3805, "step": 7700 }, { "epoch": 0.031149375598443742, "grad_norm": 1739.8531494140625, "learning_rate": 1.542e-05, "loss": 194.0813, "step": 7710 }, { "epoch": 0.031189776863811376, "grad_norm": 1644.390869140625, "learning_rate": 1.544e-05, "loss": 136.579, "step": 7720 }, { "epoch": 0.031230178129179005, "grad_norm": 789.648193359375, "learning_rate": 1.546e-05, "loss": 187.856, "step": 7730 }, { "epoch": 0.03127057939454664, "grad_norm": 1162.065185546875, "learning_rate": 1.548e-05, "loss": 114.7405, "step": 7740 }, { "epoch": 0.03131098065991427, "grad_norm": 1140.320556640625, "learning_rate": 1.55e-05, "loss": 181.2212, "step": 7750 }, { "epoch": 0.0313513819252819, "grad_norm": 2704.695556640625, "learning_rate": 1.552e-05, "loss": 241.535, "step": 7760 }, { "epoch": 0.03139178319064953, "grad_norm": 939.2614135742188, "learning_rate": 1.554e-05, "loss": 151.1776, "step": 7770 }, { "epoch": 0.03143218445601716, "grad_norm": 1043.3226318359375, "learning_rate": 1.556e-05, "loss": 200.2128, "step": 7780 }, { "epoch": 0.031472585721384796, "grad_norm": 1068.1412353515625, "learning_rate": 1.558e-05, "loss": 181.7895, "step": 7790 }, { "epoch": 0.03151298698675242, "grad_norm": 1479.6229248046875, "learning_rate": 1.56e-05, "loss": 196.9196, "step": 7800 }, { "epoch": 0.031553388252120056, "grad_norm": 701.58544921875, "learning_rate": 1.5620000000000003e-05, "loss": 137.6865, "step": 7810 }, { "epoch": 0.03159378951748769, "grad_norm": 1822.54345703125, "learning_rate": 1.5640000000000003e-05, "loss": 227.0767, "step": 7820 }, { "epoch": 0.03163419078285532, "grad_norm": 546.575439453125, "learning_rate": 1.566e-05, "loss": 146.3474, "step": 7830 }, { "epoch": 0.03167459204822295, "grad_norm": 1540.7342529296875, "learning_rate": 1.568e-05, "loss": 192.144, "step": 7840 }, { "epoch": 0.03171499331359058, "grad_norm": 1766.8616943359375, "learning_rate": 1.5700000000000002e-05, "loss": 194.4416, "step": 7850 }, { "epoch": 0.031755394578958214, "grad_norm": 1534.951416015625, "learning_rate": 1.5720000000000002e-05, "loss": 158.2836, "step": 7860 }, { "epoch": 0.03179579584432585, "grad_norm": 980.215576171875, "learning_rate": 1.5740000000000002e-05, "loss": 166.7142, "step": 7870 }, { "epoch": 0.03183619710969347, "grad_norm": 5774.56396484375, "learning_rate": 1.5759999999999998e-05, "loss": 305.0332, "step": 7880 }, { "epoch": 0.031876598375061106, "grad_norm": 1329.786865234375, "learning_rate": 1.578e-05, "loss": 137.7941, "step": 7890 }, { "epoch": 0.03191699964042874, "grad_norm": 1180.7379150390625, "learning_rate": 1.58e-05, "loss": 122.4153, "step": 7900 }, { "epoch": 0.03195740090579637, "grad_norm": 867.472900390625, "learning_rate": 1.582e-05, "loss": 210.3185, "step": 7910 }, { "epoch": 0.031997802171164, "grad_norm": 1057.9029541015625, "learning_rate": 1.584e-05, "loss": 209.8075, "step": 7920 }, { "epoch": 0.03203820343653163, "grad_norm": 1288.350341796875, "learning_rate": 1.586e-05, "loss": 198.4188, "step": 7930 }, { "epoch": 0.032078604701899265, "grad_norm": 0.0, "learning_rate": 1.588e-05, "loss": 145.5795, "step": 7940 }, { "epoch": 0.0321190059672669, "grad_norm": 983.6244506835938, "learning_rate": 1.59e-05, "loss": 227.0045, "step": 7950 }, { "epoch": 0.032159407232634524, "grad_norm": 2391.579345703125, "learning_rate": 1.592e-05, "loss": 261.5729, "step": 7960 }, { "epoch": 0.03219980849800216, "grad_norm": 2793.90234375, "learning_rate": 1.594e-05, "loss": 247.0734, "step": 7970 }, { "epoch": 0.03224020976336979, "grad_norm": 1532.1849365234375, "learning_rate": 1.596e-05, "loss": 199.9248, "step": 7980 }, { "epoch": 0.03228061102873742, "grad_norm": 1343.6947021484375, "learning_rate": 1.598e-05, "loss": 167.6554, "step": 7990 }, { "epoch": 0.03232101229410505, "grad_norm": 717.1405639648438, "learning_rate": 1.6000000000000003e-05, "loss": 218.2069, "step": 8000 }, { "epoch": 0.03236141355947268, "grad_norm": 2144.71337890625, "learning_rate": 1.6020000000000002e-05, "loss": 210.7316, "step": 8010 }, { "epoch": 0.032401814824840315, "grad_norm": 2510.060546875, "learning_rate": 1.604e-05, "loss": 300.955, "step": 8020 }, { "epoch": 0.03244221609020795, "grad_norm": 1254.329833984375, "learning_rate": 1.606e-05, "loss": 224.6631, "step": 8030 }, { "epoch": 0.032482617355575574, "grad_norm": 533.80517578125, "learning_rate": 1.6080000000000002e-05, "loss": 195.5154, "step": 8040 }, { "epoch": 0.03252301862094321, "grad_norm": 932.7647705078125, "learning_rate": 1.6100000000000002e-05, "loss": 173.2508, "step": 8050 }, { "epoch": 0.03256341988631084, "grad_norm": 1513.1566162109375, "learning_rate": 1.612e-05, "loss": 252.4677, "step": 8060 }, { "epoch": 0.03260382115167847, "grad_norm": 8973.974609375, "learning_rate": 1.6139999999999998e-05, "loss": 160.9031, "step": 8070 }, { "epoch": 0.0326442224170461, "grad_norm": 1346.521728515625, "learning_rate": 1.616e-05, "loss": 190.9977, "step": 8080 }, { "epoch": 0.03268462368241373, "grad_norm": 696.8346557617188, "learning_rate": 1.618e-05, "loss": 148.2339, "step": 8090 }, { "epoch": 0.032725024947781366, "grad_norm": 2406.046142578125, "learning_rate": 1.62e-05, "loss": 129.0067, "step": 8100 }, { "epoch": 0.032765426213149, "grad_norm": 1443.768310546875, "learning_rate": 1.622e-05, "loss": 206.6445, "step": 8110 }, { "epoch": 0.032805827478516625, "grad_norm": 2201.40087890625, "learning_rate": 1.624e-05, "loss": 188.9112, "step": 8120 }, { "epoch": 0.03284622874388426, "grad_norm": 699.2227783203125, "learning_rate": 1.626e-05, "loss": 193.334, "step": 8130 }, { "epoch": 0.03288663000925189, "grad_norm": 545.5186767578125, "learning_rate": 1.628e-05, "loss": 140.6468, "step": 8140 }, { "epoch": 0.032927031274619524, "grad_norm": 611.3945922851562, "learning_rate": 1.63e-05, "loss": 131.4079, "step": 8150 }, { "epoch": 0.03296743253998715, "grad_norm": 1427.6285400390625, "learning_rate": 1.6320000000000003e-05, "loss": 186.4774, "step": 8160 }, { "epoch": 0.03300783380535478, "grad_norm": 769.5264892578125, "learning_rate": 1.634e-05, "loss": 140.9033, "step": 8170 }, { "epoch": 0.033048235070722416, "grad_norm": 988.6732788085938, "learning_rate": 1.636e-05, "loss": 193.9264, "step": 8180 }, { "epoch": 0.03308863633609005, "grad_norm": 1023.501708984375, "learning_rate": 1.6380000000000002e-05, "loss": 173.0703, "step": 8190 }, { "epoch": 0.033129037601457675, "grad_norm": 1767.6136474609375, "learning_rate": 1.6400000000000002e-05, "loss": 235.5256, "step": 8200 }, { "epoch": 0.03316943886682531, "grad_norm": 1146.1134033203125, "learning_rate": 1.6420000000000002e-05, "loss": 174.4802, "step": 8210 }, { "epoch": 0.03320984013219294, "grad_norm": 723.232666015625, "learning_rate": 1.644e-05, "loss": 126.8088, "step": 8220 }, { "epoch": 0.033250241397560575, "grad_norm": 939.8434448242188, "learning_rate": 1.646e-05, "loss": 169.1382, "step": 8230 }, { "epoch": 0.0332906426629282, "grad_norm": 986.3238525390625, "learning_rate": 1.648e-05, "loss": 111.5822, "step": 8240 }, { "epoch": 0.033331043928295834, "grad_norm": 1208.2476806640625, "learning_rate": 1.65e-05, "loss": 191.2895, "step": 8250 }, { "epoch": 0.03337144519366347, "grad_norm": 827.257568359375, "learning_rate": 1.652e-05, "loss": 175.8226, "step": 8260 }, { "epoch": 0.0334118464590311, "grad_norm": 1213.089111328125, "learning_rate": 1.654e-05, "loss": 214.4393, "step": 8270 }, { "epoch": 0.033452247724398726, "grad_norm": 985.6383666992188, "learning_rate": 1.656e-05, "loss": 240.4976, "step": 8280 }, { "epoch": 0.03349264898976636, "grad_norm": 1016.156494140625, "learning_rate": 1.658e-05, "loss": 211.4707, "step": 8290 }, { "epoch": 0.03353305025513399, "grad_norm": 590.7755737304688, "learning_rate": 1.66e-05, "loss": 173.2719, "step": 8300 }, { "epoch": 0.033573451520501625, "grad_norm": 850.2467041015625, "learning_rate": 1.662e-05, "loss": 169.5091, "step": 8310 }, { "epoch": 0.03361385278586925, "grad_norm": 6493.62109375, "learning_rate": 1.664e-05, "loss": 206.8252, "step": 8320 }, { "epoch": 0.033654254051236884, "grad_norm": 721.207275390625, "learning_rate": 1.666e-05, "loss": 172.7568, "step": 8330 }, { "epoch": 0.03369465531660452, "grad_norm": 941.2609252929688, "learning_rate": 1.668e-05, "loss": 169.4456, "step": 8340 }, { "epoch": 0.03373505658197215, "grad_norm": 2292.378173828125, "learning_rate": 1.6700000000000003e-05, "loss": 217.5546, "step": 8350 }, { "epoch": 0.033775457847339777, "grad_norm": 752.173583984375, "learning_rate": 1.672e-05, "loss": 186.4127, "step": 8360 }, { "epoch": 0.03381585911270741, "grad_norm": 1046.0406494140625, "learning_rate": 1.674e-05, "loss": 156.5079, "step": 8370 }, { "epoch": 0.03385626037807504, "grad_norm": 926.9135131835938, "learning_rate": 1.6760000000000002e-05, "loss": 145.1286, "step": 8380 }, { "epoch": 0.033896661643442676, "grad_norm": 2465.574951171875, "learning_rate": 1.6780000000000002e-05, "loss": 230.1866, "step": 8390 }, { "epoch": 0.0339370629088103, "grad_norm": 1376.998779296875, "learning_rate": 1.6800000000000002e-05, "loss": 149.4985, "step": 8400 }, { "epoch": 0.033977464174177935, "grad_norm": 1141.4237060546875, "learning_rate": 1.6819999999999998e-05, "loss": 199.0205, "step": 8410 }, { "epoch": 0.03401786543954557, "grad_norm": 823.1524047851562, "learning_rate": 1.684e-05, "loss": 160.2142, "step": 8420 }, { "epoch": 0.0340582667049132, "grad_norm": 557.1250610351562, "learning_rate": 1.686e-05, "loss": 177.5694, "step": 8430 }, { "epoch": 0.03409866797028083, "grad_norm": 841.5819702148438, "learning_rate": 1.688e-05, "loss": 165.8316, "step": 8440 }, { "epoch": 0.03413906923564846, "grad_norm": 949.8247680664062, "learning_rate": 1.69e-05, "loss": 109.1915, "step": 8450 }, { "epoch": 0.03417947050101609, "grad_norm": 1175.8765869140625, "learning_rate": 1.692e-05, "loss": 186.9094, "step": 8460 }, { "epoch": 0.034219871766383726, "grad_norm": 601.2833862304688, "learning_rate": 1.694e-05, "loss": 173.7073, "step": 8470 }, { "epoch": 0.03426027303175135, "grad_norm": 1607.2193603515625, "learning_rate": 1.696e-05, "loss": 193.047, "step": 8480 }, { "epoch": 0.034300674297118985, "grad_norm": 794.0247802734375, "learning_rate": 1.698e-05, "loss": 230.634, "step": 8490 }, { "epoch": 0.03434107556248662, "grad_norm": 506.083740234375, "learning_rate": 1.7000000000000003e-05, "loss": 194.7215, "step": 8500 }, { "epoch": 0.03438147682785425, "grad_norm": 1354.748046875, "learning_rate": 1.702e-05, "loss": 237.6669, "step": 8510 }, { "epoch": 0.03442187809322188, "grad_norm": 903.7907104492188, "learning_rate": 1.704e-05, "loss": 176.5095, "step": 8520 }, { "epoch": 0.03446227935858951, "grad_norm": 1148.167236328125, "learning_rate": 1.706e-05, "loss": 178.5096, "step": 8530 }, { "epoch": 0.034502680623957144, "grad_norm": 2201.17919921875, "learning_rate": 1.7080000000000002e-05, "loss": 149.1055, "step": 8540 }, { "epoch": 0.03454308188932478, "grad_norm": 2470.737060546875, "learning_rate": 1.7100000000000002e-05, "loss": 239.2134, "step": 8550 }, { "epoch": 0.0345834831546924, "grad_norm": 1102.5830078125, "learning_rate": 1.712e-05, "loss": 116.4302, "step": 8560 }, { "epoch": 0.034623884420060036, "grad_norm": 1073.0867919921875, "learning_rate": 1.7140000000000002e-05, "loss": 208.7902, "step": 8570 }, { "epoch": 0.03466428568542767, "grad_norm": 1288.13818359375, "learning_rate": 1.7160000000000002e-05, "loss": 177.9251, "step": 8580 }, { "epoch": 0.0347046869507953, "grad_norm": 3063.13525390625, "learning_rate": 1.718e-05, "loss": 132.933, "step": 8590 }, { "epoch": 0.03474508821616293, "grad_norm": 817.6849365234375, "learning_rate": 1.7199999999999998e-05, "loss": 329.9039, "step": 8600 }, { "epoch": 0.03478548948153056, "grad_norm": 871.9563598632812, "learning_rate": 1.722e-05, "loss": 198.9892, "step": 8610 }, { "epoch": 0.034825890746898194, "grad_norm": 2624.832275390625, "learning_rate": 1.724e-05, "loss": 134.0866, "step": 8620 }, { "epoch": 0.03486629201226583, "grad_norm": 1550.62548828125, "learning_rate": 1.726e-05, "loss": 195.2643, "step": 8630 }, { "epoch": 0.034906693277633453, "grad_norm": 2074.622314453125, "learning_rate": 1.728e-05, "loss": 202.7179, "step": 8640 }, { "epoch": 0.03494709454300109, "grad_norm": 1240.3668212890625, "learning_rate": 1.73e-05, "loss": 164.9288, "step": 8650 }, { "epoch": 0.03498749580836872, "grad_norm": 1201.8397216796875, "learning_rate": 1.732e-05, "loss": 173.0605, "step": 8660 }, { "epoch": 0.03502789707373635, "grad_norm": 1204.3741455078125, "learning_rate": 1.734e-05, "loss": 161.6967, "step": 8670 }, { "epoch": 0.03506829833910398, "grad_norm": 1461.201904296875, "learning_rate": 1.736e-05, "loss": 91.053, "step": 8680 }, { "epoch": 0.03510869960447161, "grad_norm": 669.2667846679688, "learning_rate": 1.7380000000000003e-05, "loss": 125.9261, "step": 8690 }, { "epoch": 0.035149100869839245, "grad_norm": 902.3343505859375, "learning_rate": 1.74e-05, "loss": 250.4465, "step": 8700 }, { "epoch": 0.03518950213520688, "grad_norm": 2767.30224609375, "learning_rate": 1.742e-05, "loss": 194.7519, "step": 8710 }, { "epoch": 0.035229903400574504, "grad_norm": 778.5036010742188, "learning_rate": 1.7440000000000002e-05, "loss": 205.9209, "step": 8720 }, { "epoch": 0.03527030466594214, "grad_norm": 2534.48681640625, "learning_rate": 1.7460000000000002e-05, "loss": 201.9174, "step": 8730 }, { "epoch": 0.03531070593130977, "grad_norm": 3830.944580078125, "learning_rate": 1.7480000000000002e-05, "loss": 249.3413, "step": 8740 }, { "epoch": 0.0353511071966774, "grad_norm": 1058.1517333984375, "learning_rate": 1.75e-05, "loss": 156.2294, "step": 8750 }, { "epoch": 0.03539150846204503, "grad_norm": 1754.2557373046875, "learning_rate": 1.752e-05, "loss": 169.7632, "step": 8760 }, { "epoch": 0.03543190972741266, "grad_norm": 631.4237060546875, "learning_rate": 1.754e-05, "loss": 143.3636, "step": 8770 }, { "epoch": 0.035472310992780295, "grad_norm": 1334.2506103515625, "learning_rate": 1.756e-05, "loss": 172.2922, "step": 8780 }, { "epoch": 0.03551271225814793, "grad_norm": 1203.375, "learning_rate": 1.758e-05, "loss": 174.6774, "step": 8790 }, { "epoch": 0.035553113523515555, "grad_norm": 1072.609375, "learning_rate": 1.76e-05, "loss": 177.6043, "step": 8800 }, { "epoch": 0.03559351478888319, "grad_norm": 1734.830810546875, "learning_rate": 1.762e-05, "loss": 213.749, "step": 8810 }, { "epoch": 0.03563391605425082, "grad_norm": 1706.71875, "learning_rate": 1.764e-05, "loss": 141.3769, "step": 8820 }, { "epoch": 0.035674317319618454, "grad_norm": 823.6603393554688, "learning_rate": 1.766e-05, "loss": 211.0458, "step": 8830 }, { "epoch": 0.03571471858498608, "grad_norm": 1883.2462158203125, "learning_rate": 1.7680000000000004e-05, "loss": 147.5754, "step": 8840 }, { "epoch": 0.03575511985035371, "grad_norm": 1167.08935546875, "learning_rate": 1.77e-05, "loss": 158.7962, "step": 8850 }, { "epoch": 0.035795521115721346, "grad_norm": 653.7176513671875, "learning_rate": 1.772e-05, "loss": 162.9139, "step": 8860 }, { "epoch": 0.03583592238108898, "grad_norm": 1610.84912109375, "learning_rate": 1.774e-05, "loss": 147.6589, "step": 8870 }, { "epoch": 0.035876323646456605, "grad_norm": 764.0466918945312, "learning_rate": 1.7760000000000003e-05, "loss": 188.6739, "step": 8880 }, { "epoch": 0.03591672491182424, "grad_norm": 1810.3558349609375, "learning_rate": 1.7780000000000003e-05, "loss": 205.71, "step": 8890 }, { "epoch": 0.03595712617719187, "grad_norm": 968.5712280273438, "learning_rate": 1.78e-05, "loss": 164.1197, "step": 8900 }, { "epoch": 0.035997527442559504, "grad_norm": 2819.0068359375, "learning_rate": 1.7820000000000002e-05, "loss": 162.7462, "step": 8910 }, { "epoch": 0.03603792870792713, "grad_norm": 2304.89306640625, "learning_rate": 1.7840000000000002e-05, "loss": 166.9301, "step": 8920 }, { "epoch": 0.036078329973294763, "grad_norm": 1451.4320068359375, "learning_rate": 1.7860000000000002e-05, "loss": 199.7026, "step": 8930 }, { "epoch": 0.0361187312386624, "grad_norm": 1088.584228515625, "learning_rate": 1.7879999999999998e-05, "loss": 122.1127, "step": 8940 }, { "epoch": 0.03615913250403003, "grad_norm": 1242.6817626953125, "learning_rate": 1.79e-05, "loss": 205.5091, "step": 8950 }, { "epoch": 0.036199533769397656, "grad_norm": 3576.470458984375, "learning_rate": 1.792e-05, "loss": 222.864, "step": 8960 }, { "epoch": 0.03623993503476529, "grad_norm": 7665.30419921875, "learning_rate": 1.794e-05, "loss": 177.141, "step": 8970 }, { "epoch": 0.03628033630013292, "grad_norm": 874.79052734375, "learning_rate": 1.796e-05, "loss": 151.6607, "step": 8980 }, { "epoch": 0.036320737565500555, "grad_norm": 2215.158203125, "learning_rate": 1.798e-05, "loss": 187.3096, "step": 8990 }, { "epoch": 0.03636113883086818, "grad_norm": 2184.711669921875, "learning_rate": 1.8e-05, "loss": 192.0445, "step": 9000 }, { "epoch": 0.036401540096235814, "grad_norm": 1615.7005615234375, "learning_rate": 1.802e-05, "loss": 200.9116, "step": 9010 }, { "epoch": 0.03644194136160345, "grad_norm": 1396.12109375, "learning_rate": 1.804e-05, "loss": 116.5097, "step": 9020 }, { "epoch": 0.03648234262697108, "grad_norm": 2052.5712890625, "learning_rate": 1.8060000000000003e-05, "loss": 91.6937, "step": 9030 }, { "epoch": 0.036522743892338706, "grad_norm": 1198.88232421875, "learning_rate": 1.808e-05, "loss": 162.9016, "step": 9040 }, { "epoch": 0.03656314515770634, "grad_norm": 1004.2339477539062, "learning_rate": 1.81e-05, "loss": 165.1726, "step": 9050 }, { "epoch": 0.03660354642307397, "grad_norm": 2106.733642578125, "learning_rate": 1.812e-05, "loss": 293.9893, "step": 9060 }, { "epoch": 0.036643947688441605, "grad_norm": 1582.510986328125, "learning_rate": 1.8140000000000003e-05, "loss": 190.1606, "step": 9070 }, { "epoch": 0.03668434895380923, "grad_norm": 905.4036254882812, "learning_rate": 1.8160000000000002e-05, "loss": 141.9137, "step": 9080 }, { "epoch": 0.036724750219176865, "grad_norm": 411.4434509277344, "learning_rate": 1.818e-05, "loss": 135.4074, "step": 9090 }, { "epoch": 0.0367651514845445, "grad_norm": 2590.279541015625, "learning_rate": 1.8200000000000002e-05, "loss": 237.3702, "step": 9100 }, { "epoch": 0.03680555274991213, "grad_norm": 966.46435546875, "learning_rate": 1.8220000000000002e-05, "loss": 99.4366, "step": 9110 }, { "epoch": 0.03684595401527976, "grad_norm": 1426.09375, "learning_rate": 1.824e-05, "loss": 128.6301, "step": 9120 }, { "epoch": 0.03688635528064739, "grad_norm": 661.1119384765625, "learning_rate": 1.826e-05, "loss": 119.7455, "step": 9130 }, { "epoch": 0.03692675654601502, "grad_norm": 1419.5643310546875, "learning_rate": 1.828e-05, "loss": 157.1782, "step": 9140 }, { "epoch": 0.03696715781138265, "grad_norm": 875.4746704101562, "learning_rate": 1.83e-05, "loss": 167.5964, "step": 9150 }, { "epoch": 0.03700755907675028, "grad_norm": 914.225341796875, "learning_rate": 1.832e-05, "loss": 229.4271, "step": 9160 }, { "epoch": 0.037047960342117915, "grad_norm": 1139.4866943359375, "learning_rate": 1.834e-05, "loss": 176.9276, "step": 9170 }, { "epoch": 0.03708836160748555, "grad_norm": 1400.506103515625, "learning_rate": 1.8360000000000004e-05, "loss": 137.9502, "step": 9180 }, { "epoch": 0.037128762872853174, "grad_norm": 1892.8255615234375, "learning_rate": 1.838e-05, "loss": 177.446, "step": 9190 }, { "epoch": 0.03716916413822081, "grad_norm": 6680.482421875, "learning_rate": 1.84e-05, "loss": 177.2692, "step": 9200 }, { "epoch": 0.03720956540358844, "grad_norm": 5937.34521484375, "learning_rate": 1.842e-05, "loss": 209.457, "step": 9210 }, { "epoch": 0.037249966668956074, "grad_norm": 724.9736938476562, "learning_rate": 1.8440000000000003e-05, "loss": 144.7552, "step": 9220 }, { "epoch": 0.0372903679343237, "grad_norm": 1280.418212890625, "learning_rate": 1.846e-05, "loss": 177.4063, "step": 9230 }, { "epoch": 0.03733076919969133, "grad_norm": 4052.574462890625, "learning_rate": 1.848e-05, "loss": 192.6805, "step": 9240 }, { "epoch": 0.037371170465058966, "grad_norm": 1359.7706298828125, "learning_rate": 1.85e-05, "loss": 187.3014, "step": 9250 }, { "epoch": 0.0374115717304266, "grad_norm": 3640.478515625, "learning_rate": 1.8520000000000002e-05, "loss": 134.8755, "step": 9260 }, { "epoch": 0.037451972995794225, "grad_norm": 495.39373779296875, "learning_rate": 1.8540000000000002e-05, "loss": 199.3222, "step": 9270 }, { "epoch": 0.03749237426116186, "grad_norm": 614.9301147460938, "learning_rate": 1.856e-05, "loss": 172.9295, "step": 9280 }, { "epoch": 0.03753277552652949, "grad_norm": 1786.66552734375, "learning_rate": 1.858e-05, "loss": 170.4556, "step": 9290 }, { "epoch": 0.037573176791897124, "grad_norm": 1209.83740234375, "learning_rate": 1.86e-05, "loss": 179.2815, "step": 9300 }, { "epoch": 0.03761357805726475, "grad_norm": 935.8829345703125, "learning_rate": 1.862e-05, "loss": 153.5103, "step": 9310 }, { "epoch": 0.03765397932263238, "grad_norm": 1041.441162109375, "learning_rate": 1.864e-05, "loss": 220.6227, "step": 9320 }, { "epoch": 0.037694380588000016, "grad_norm": 1657.690673828125, "learning_rate": 1.866e-05, "loss": 238.1361, "step": 9330 }, { "epoch": 0.03773478185336765, "grad_norm": 1077.7347412109375, "learning_rate": 1.868e-05, "loss": 156.283, "step": 9340 }, { "epoch": 0.037775183118735275, "grad_norm": 834.681396484375, "learning_rate": 1.87e-05, "loss": 94.2577, "step": 9350 }, { "epoch": 0.03781558438410291, "grad_norm": 1160.058349609375, "learning_rate": 1.872e-05, "loss": 231.7988, "step": 9360 }, { "epoch": 0.03785598564947054, "grad_norm": 997.937255859375, "learning_rate": 1.8740000000000004e-05, "loss": 134.2654, "step": 9370 }, { "epoch": 0.037896386914838175, "grad_norm": 882.4503784179688, "learning_rate": 1.876e-05, "loss": 193.2273, "step": 9380 }, { "epoch": 0.0379367881802058, "grad_norm": 3181.698974609375, "learning_rate": 1.878e-05, "loss": 132.2644, "step": 9390 }, { "epoch": 0.037977189445573434, "grad_norm": 725.4539184570312, "learning_rate": 1.88e-05, "loss": 153.5085, "step": 9400 }, { "epoch": 0.03801759071094107, "grad_norm": 1430.8912353515625, "learning_rate": 1.8820000000000003e-05, "loss": 196.1231, "step": 9410 }, { "epoch": 0.0380579919763087, "grad_norm": 1368.989501953125, "learning_rate": 1.8840000000000003e-05, "loss": 139.1376, "step": 9420 }, { "epoch": 0.038098393241676326, "grad_norm": 838.9828491210938, "learning_rate": 1.886e-05, "loss": 131.1992, "step": 9430 }, { "epoch": 0.03813879450704396, "grad_norm": 3527.458251953125, "learning_rate": 1.888e-05, "loss": 187.7699, "step": 9440 }, { "epoch": 0.03817919577241159, "grad_norm": 2998.174072265625, "learning_rate": 1.8900000000000002e-05, "loss": 175.4584, "step": 9450 }, { "epoch": 0.038219597037779225, "grad_norm": 1297.8941650390625, "learning_rate": 1.8920000000000002e-05, "loss": 210.6279, "step": 9460 }, { "epoch": 0.03825999830314685, "grad_norm": 3371.50244140625, "learning_rate": 1.894e-05, "loss": 217.5112, "step": 9470 }, { "epoch": 0.038300399568514484, "grad_norm": 1407.463623046875, "learning_rate": 1.896e-05, "loss": 192.8969, "step": 9480 }, { "epoch": 0.03834080083388212, "grad_norm": 1015.240966796875, "learning_rate": 1.898e-05, "loss": 173.7605, "step": 9490 }, { "epoch": 0.03838120209924975, "grad_norm": 731.1430053710938, "learning_rate": 1.9e-05, "loss": 191.5719, "step": 9500 }, { "epoch": 0.03842160336461738, "grad_norm": 849.3814086914062, "learning_rate": 1.902e-05, "loss": 91.8381, "step": 9510 }, { "epoch": 0.03846200462998501, "grad_norm": 1715.9556884765625, "learning_rate": 1.904e-05, "loss": 175.3195, "step": 9520 }, { "epoch": 0.03850240589535264, "grad_norm": 7559.9091796875, "learning_rate": 1.906e-05, "loss": 175.7841, "step": 9530 }, { "epoch": 0.038542807160720276, "grad_norm": 689.8999633789062, "learning_rate": 1.908e-05, "loss": 116.3339, "step": 9540 }, { "epoch": 0.0385832084260879, "grad_norm": 873.10009765625, "learning_rate": 1.91e-05, "loss": 140.4007, "step": 9550 }, { "epoch": 0.038623609691455535, "grad_norm": 2358.655517578125, "learning_rate": 1.9120000000000003e-05, "loss": 168.361, "step": 9560 }, { "epoch": 0.03866401095682317, "grad_norm": 5055.4892578125, "learning_rate": 1.914e-05, "loss": 174.0588, "step": 9570 }, { "epoch": 0.0387044122221908, "grad_norm": 897.6202392578125, "learning_rate": 1.916e-05, "loss": 153.3665, "step": 9580 }, { "epoch": 0.03874481348755843, "grad_norm": 729.1337280273438, "learning_rate": 1.918e-05, "loss": 131.9806, "step": 9590 }, { "epoch": 0.03878521475292606, "grad_norm": 795.0892944335938, "learning_rate": 1.9200000000000003e-05, "loss": 166.5195, "step": 9600 }, { "epoch": 0.03882561601829369, "grad_norm": 644.804443359375, "learning_rate": 1.9220000000000002e-05, "loss": 129.34, "step": 9610 }, { "epoch": 0.038866017283661326, "grad_norm": 1547.818359375, "learning_rate": 1.924e-05, "loss": 165.855, "step": 9620 }, { "epoch": 0.03890641854902895, "grad_norm": 0.0, "learning_rate": 1.9260000000000002e-05, "loss": 181.7364, "step": 9630 }, { "epoch": 0.038946819814396585, "grad_norm": 2220.852294921875, "learning_rate": 1.9280000000000002e-05, "loss": 154.0527, "step": 9640 }, { "epoch": 0.03898722107976422, "grad_norm": 1048.51123046875, "learning_rate": 1.93e-05, "loss": 160.1465, "step": 9650 }, { "epoch": 0.03902762234513185, "grad_norm": 0.0, "learning_rate": 1.932e-05, "loss": 149.3159, "step": 9660 }, { "epoch": 0.03906802361049948, "grad_norm": 1180.42919921875, "learning_rate": 1.934e-05, "loss": 145.5787, "step": 9670 }, { "epoch": 0.03910842487586711, "grad_norm": 1122.5113525390625, "learning_rate": 1.936e-05, "loss": 124.5202, "step": 9680 }, { "epoch": 0.039148826141234744, "grad_norm": 1447.1502685546875, "learning_rate": 1.938e-05, "loss": 178.7178, "step": 9690 }, { "epoch": 0.03918922740660238, "grad_norm": 1735.78125, "learning_rate": 1.94e-05, "loss": 184.0263, "step": 9700 }, { "epoch": 0.03922962867197, "grad_norm": 660.9119873046875, "learning_rate": 1.942e-05, "loss": 127.7493, "step": 9710 }, { "epoch": 0.039270029937337636, "grad_norm": 1763.235107421875, "learning_rate": 1.944e-05, "loss": 176.0283, "step": 9720 }, { "epoch": 0.03931043120270527, "grad_norm": 2768.470947265625, "learning_rate": 1.946e-05, "loss": 107.1719, "step": 9730 }, { "epoch": 0.0393508324680729, "grad_norm": 600.2669067382812, "learning_rate": 1.948e-05, "loss": 162.8209, "step": 9740 }, { "epoch": 0.03939123373344053, "grad_norm": 3563.5947265625, "learning_rate": 1.9500000000000003e-05, "loss": 197.7512, "step": 9750 }, { "epoch": 0.03943163499880816, "grad_norm": 2130.047607421875, "learning_rate": 1.9520000000000003e-05, "loss": 160.2494, "step": 9760 }, { "epoch": 0.039472036264175794, "grad_norm": 2377.900146484375, "learning_rate": 1.954e-05, "loss": 171.3171, "step": 9770 }, { "epoch": 0.03951243752954343, "grad_norm": 1052.0291748046875, "learning_rate": 1.956e-05, "loss": 113.5263, "step": 9780 }, { "epoch": 0.039552838794911054, "grad_norm": 2078.976318359375, "learning_rate": 1.9580000000000002e-05, "loss": 191.9397, "step": 9790 }, { "epoch": 0.03959324006027869, "grad_norm": 773.3158569335938, "learning_rate": 1.9600000000000002e-05, "loss": 126.11, "step": 9800 }, { "epoch": 0.03963364132564632, "grad_norm": 1939.7193603515625, "learning_rate": 1.9620000000000002e-05, "loss": 210.1035, "step": 9810 }, { "epoch": 0.03967404259101395, "grad_norm": 531.8984985351562, "learning_rate": 1.9640000000000002e-05, "loss": 161.9592, "step": 9820 }, { "epoch": 0.03971444385638158, "grad_norm": 441.3695983886719, "learning_rate": 1.966e-05, "loss": 131.7898, "step": 9830 }, { "epoch": 0.03975484512174921, "grad_norm": 2530.7509765625, "learning_rate": 1.968e-05, "loss": 190.4342, "step": 9840 }, { "epoch": 0.039795246387116845, "grad_norm": 1077.1409912109375, "learning_rate": 1.97e-05, "loss": 141.0169, "step": 9850 }, { "epoch": 0.03983564765248448, "grad_norm": 1837.5992431640625, "learning_rate": 1.972e-05, "loss": 197.5706, "step": 9860 }, { "epoch": 0.039876048917852104, "grad_norm": 1183.7669677734375, "learning_rate": 1.974e-05, "loss": 186.1175, "step": 9870 }, { "epoch": 0.03991645018321974, "grad_norm": 584.4487915039062, "learning_rate": 1.976e-05, "loss": 128.3029, "step": 9880 }, { "epoch": 0.03995685144858737, "grad_norm": 731.9237060546875, "learning_rate": 1.978e-05, "loss": 97.8851, "step": 9890 }, { "epoch": 0.039997252713955, "grad_norm": 734.3873901367188, "learning_rate": 1.9800000000000004e-05, "loss": 170.2385, "step": 9900 }, { "epoch": 0.04003765397932263, "grad_norm": 530.0155029296875, "learning_rate": 1.982e-05, "loss": 140.1505, "step": 9910 }, { "epoch": 0.04007805524469026, "grad_norm": 1368.81298828125, "learning_rate": 1.984e-05, "loss": 188.7803, "step": 9920 }, { "epoch": 0.040118456510057895, "grad_norm": 2284.725341796875, "learning_rate": 1.986e-05, "loss": 213.4412, "step": 9930 }, { "epoch": 0.04015885777542553, "grad_norm": 953.0332641601562, "learning_rate": 1.9880000000000003e-05, "loss": 81.9998, "step": 9940 }, { "epoch": 0.040199259040793155, "grad_norm": 2036.2686767578125, "learning_rate": 1.9900000000000003e-05, "loss": 124.487, "step": 9950 }, { "epoch": 0.04023966030616079, "grad_norm": 1137.9842529296875, "learning_rate": 1.992e-05, "loss": 166.7126, "step": 9960 }, { "epoch": 0.04028006157152842, "grad_norm": 906.6007080078125, "learning_rate": 1.994e-05, "loss": 188.0849, "step": 9970 }, { "epoch": 0.040320462836896054, "grad_norm": 1181.5977783203125, "learning_rate": 1.9960000000000002e-05, "loss": 168.7636, "step": 9980 }, { "epoch": 0.04036086410226368, "grad_norm": 1022.9138793945312, "learning_rate": 1.9980000000000002e-05, "loss": 168.0031, "step": 9990 }, { "epoch": 0.04040126536763131, "grad_norm": 717.4224243164062, "learning_rate": 2e-05, "loss": 122.0701, "step": 10000 }, { "epoch": 0.040441666632998946, "grad_norm": 595.6052856445312, "learning_rate": 2.002e-05, "loss": 143.9377, "step": 10010 }, { "epoch": 0.04048206789836658, "grad_norm": 1366.42431640625, "learning_rate": 2.004e-05, "loss": 167.5448, "step": 10020 }, { "epoch": 0.040522469163734205, "grad_norm": 1101.7801513671875, "learning_rate": 2.006e-05, "loss": 192.1521, "step": 10030 }, { "epoch": 0.04056287042910184, "grad_norm": 1699.7630615234375, "learning_rate": 2.008e-05, "loss": 154.8807, "step": 10040 }, { "epoch": 0.04060327169446947, "grad_norm": 1256.3902587890625, "learning_rate": 2.01e-05, "loss": 134.1945, "step": 10050 }, { "epoch": 0.040643672959837104, "grad_norm": 0.0, "learning_rate": 2.012e-05, "loss": 131.3899, "step": 10060 }, { "epoch": 0.04068407422520473, "grad_norm": 1447.4774169921875, "learning_rate": 2.014e-05, "loss": 159.1337, "step": 10070 }, { "epoch": 0.040724475490572364, "grad_norm": 4435.76904296875, "learning_rate": 2.016e-05, "loss": 198.5379, "step": 10080 }, { "epoch": 0.04076487675594, "grad_norm": 852.9769287109375, "learning_rate": 2.0180000000000003e-05, "loss": 160.13, "step": 10090 }, { "epoch": 0.04080527802130763, "grad_norm": 1151.4747314453125, "learning_rate": 2.0200000000000003e-05, "loss": 162.3118, "step": 10100 }, { "epoch": 0.040845679286675256, "grad_norm": 1879.0950927734375, "learning_rate": 2.022e-05, "loss": 172.1667, "step": 10110 }, { "epoch": 0.04088608055204289, "grad_norm": 953.0318603515625, "learning_rate": 2.024e-05, "loss": 152.1001, "step": 10120 }, { "epoch": 0.04092648181741052, "grad_norm": 834.5413208007812, "learning_rate": 2.0260000000000003e-05, "loss": 166.7469, "step": 10130 }, { "epoch": 0.040966883082778155, "grad_norm": 1954.07080078125, "learning_rate": 2.0280000000000002e-05, "loss": 170.3608, "step": 10140 }, { "epoch": 0.04100728434814578, "grad_norm": 1222.4248046875, "learning_rate": 2.0300000000000002e-05, "loss": 168.8605, "step": 10150 }, { "epoch": 0.041047685613513414, "grad_norm": 2559.77099609375, "learning_rate": 2.032e-05, "loss": 169.7865, "step": 10160 }, { "epoch": 0.04108808687888105, "grad_norm": 2988.0048828125, "learning_rate": 2.0340000000000002e-05, "loss": 175.7436, "step": 10170 }, { "epoch": 0.04112848814424868, "grad_norm": 647.4199829101562, "learning_rate": 2.036e-05, "loss": 161.4738, "step": 10180 }, { "epoch": 0.041168889409616306, "grad_norm": 1833.9415283203125, "learning_rate": 2.038e-05, "loss": 198.7381, "step": 10190 }, { "epoch": 0.04120929067498394, "grad_norm": 734.6459350585938, "learning_rate": 2.04e-05, "loss": 145.9311, "step": 10200 }, { "epoch": 0.04124969194035157, "grad_norm": 1672.8916015625, "learning_rate": 2.042e-05, "loss": 149.8653, "step": 10210 }, { "epoch": 0.041290093205719205, "grad_norm": 1029.32421875, "learning_rate": 2.044e-05, "loss": 178.9316, "step": 10220 }, { "epoch": 0.04133049447108683, "grad_norm": 714.8513793945312, "learning_rate": 2.046e-05, "loss": 112.8458, "step": 10230 }, { "epoch": 0.041370895736454465, "grad_norm": 1445.0704345703125, "learning_rate": 2.048e-05, "loss": 164.2769, "step": 10240 }, { "epoch": 0.0414112970018221, "grad_norm": 2015.292236328125, "learning_rate": 2.05e-05, "loss": 258.4405, "step": 10250 }, { "epoch": 0.04145169826718973, "grad_norm": 820.0475463867188, "learning_rate": 2.052e-05, "loss": 139.5321, "step": 10260 }, { "epoch": 0.04149209953255736, "grad_norm": 886.0709838867188, "learning_rate": 2.054e-05, "loss": 148.4284, "step": 10270 }, { "epoch": 0.04153250079792499, "grad_norm": 1072.3577880859375, "learning_rate": 2.0560000000000003e-05, "loss": 192.8738, "step": 10280 }, { "epoch": 0.04157290206329262, "grad_norm": 747.4786987304688, "learning_rate": 2.0580000000000003e-05, "loss": 101.708, "step": 10290 }, { "epoch": 0.041613303328660256, "grad_norm": 5367.84521484375, "learning_rate": 2.06e-05, "loss": 194.0073, "step": 10300 }, { "epoch": 0.04165370459402788, "grad_norm": 1179.1737060546875, "learning_rate": 2.062e-05, "loss": 121.6645, "step": 10310 }, { "epoch": 0.041694105859395515, "grad_norm": 779.1863403320312, "learning_rate": 2.0640000000000002e-05, "loss": 156.9918, "step": 10320 }, { "epoch": 0.04173450712476315, "grad_norm": 609.2747192382812, "learning_rate": 2.0660000000000002e-05, "loss": 121.481, "step": 10330 }, { "epoch": 0.04177490839013078, "grad_norm": 856.9229736328125, "learning_rate": 2.0680000000000002e-05, "loss": 126.45, "step": 10340 }, { "epoch": 0.04181530965549841, "grad_norm": 3409.32568359375, "learning_rate": 2.07e-05, "loss": 135.8212, "step": 10350 }, { "epoch": 0.04185571092086604, "grad_norm": 2779.28369140625, "learning_rate": 2.072e-05, "loss": 180.5799, "step": 10360 }, { "epoch": 0.041896112186233674, "grad_norm": 2200.98583984375, "learning_rate": 2.074e-05, "loss": 228.0418, "step": 10370 }, { "epoch": 0.04193651345160131, "grad_norm": 864.7232055664062, "learning_rate": 2.076e-05, "loss": 200.8099, "step": 10380 }, { "epoch": 0.04197691471696893, "grad_norm": 373.90771484375, "learning_rate": 2.078e-05, "loss": 123.1325, "step": 10390 }, { "epoch": 0.042017315982336566, "grad_norm": 865.7789916992188, "learning_rate": 2.08e-05, "loss": 177.7623, "step": 10400 }, { "epoch": 0.0420577172477042, "grad_norm": 1141.6817626953125, "learning_rate": 2.082e-05, "loss": 150.2677, "step": 10410 }, { "epoch": 0.04209811851307183, "grad_norm": 720.0677490234375, "learning_rate": 2.084e-05, "loss": 146.5872, "step": 10420 }, { "epoch": 0.04213851977843946, "grad_norm": 1222.44775390625, "learning_rate": 2.086e-05, "loss": 162.8006, "step": 10430 }, { "epoch": 0.04217892104380709, "grad_norm": 909.986083984375, "learning_rate": 2.0880000000000003e-05, "loss": 193.938, "step": 10440 }, { "epoch": 0.042219322309174724, "grad_norm": 1559.7752685546875, "learning_rate": 2.09e-05, "loss": 167.0518, "step": 10450 }, { "epoch": 0.04225972357454236, "grad_norm": 2482.876953125, "learning_rate": 2.092e-05, "loss": 138.0267, "step": 10460 }, { "epoch": 0.04230012483990998, "grad_norm": 1091.695556640625, "learning_rate": 2.0940000000000003e-05, "loss": 103.3883, "step": 10470 }, { "epoch": 0.042340526105277616, "grad_norm": 806.2134399414062, "learning_rate": 2.0960000000000003e-05, "loss": 124.8749, "step": 10480 }, { "epoch": 0.04238092737064525, "grad_norm": 1276.008544921875, "learning_rate": 2.098e-05, "loss": 163.3479, "step": 10490 }, { "epoch": 0.04242132863601288, "grad_norm": 4489.3603515625, "learning_rate": 2.1e-05, "loss": 171.6303, "step": 10500 }, { "epoch": 0.04246172990138051, "grad_norm": 838.2408447265625, "learning_rate": 2.1020000000000002e-05, "loss": 150.1332, "step": 10510 }, { "epoch": 0.04250213116674814, "grad_norm": 1553.7894287109375, "learning_rate": 2.1040000000000002e-05, "loss": 161.144, "step": 10520 }, { "epoch": 0.042542532432115775, "grad_norm": 1126.568115234375, "learning_rate": 2.106e-05, "loss": 152.1566, "step": 10530 }, { "epoch": 0.04258293369748341, "grad_norm": 832.1701049804688, "learning_rate": 2.1079999999999998e-05, "loss": 139.9062, "step": 10540 }, { "epoch": 0.042623334962851034, "grad_norm": 519.9462890625, "learning_rate": 2.11e-05, "loss": 108.9907, "step": 10550 }, { "epoch": 0.04266373622821867, "grad_norm": 2303.817626953125, "learning_rate": 2.112e-05, "loss": 150.1262, "step": 10560 }, { "epoch": 0.0427041374935863, "grad_norm": 1118.6610107421875, "learning_rate": 2.114e-05, "loss": 127.0093, "step": 10570 }, { "epoch": 0.04274453875895393, "grad_norm": 1355.4498291015625, "learning_rate": 2.116e-05, "loss": 165.2866, "step": 10580 }, { "epoch": 0.04278494002432156, "grad_norm": 2492.5751953125, "learning_rate": 2.118e-05, "loss": 167.1242, "step": 10590 }, { "epoch": 0.04282534128968919, "grad_norm": 3182.501953125, "learning_rate": 2.12e-05, "loss": 146.6266, "step": 10600 }, { "epoch": 0.042865742555056825, "grad_norm": 1109.6849365234375, "learning_rate": 2.122e-05, "loss": 195.9611, "step": 10610 }, { "epoch": 0.04290614382042446, "grad_norm": 2110.990234375, "learning_rate": 2.124e-05, "loss": 199.2886, "step": 10620 }, { "epoch": 0.042946545085792084, "grad_norm": 1315.0323486328125, "learning_rate": 2.1260000000000003e-05, "loss": 136.6857, "step": 10630 }, { "epoch": 0.04298694635115972, "grad_norm": 1708.447998046875, "learning_rate": 2.128e-05, "loss": 173.377, "step": 10640 }, { "epoch": 0.04302734761652735, "grad_norm": 1813.0264892578125, "learning_rate": 2.13e-05, "loss": 166.6691, "step": 10650 }, { "epoch": 0.043067748881894984, "grad_norm": 1098.9163818359375, "learning_rate": 2.1320000000000003e-05, "loss": 187.8283, "step": 10660 }, { "epoch": 0.04310815014726261, "grad_norm": 608.4671630859375, "learning_rate": 2.1340000000000002e-05, "loss": 141.6912, "step": 10670 }, { "epoch": 0.04314855141263024, "grad_norm": 830.4765625, "learning_rate": 2.1360000000000002e-05, "loss": 142.6794, "step": 10680 }, { "epoch": 0.043188952677997876, "grad_norm": 600.3438110351562, "learning_rate": 2.138e-05, "loss": 130.3432, "step": 10690 }, { "epoch": 0.04322935394336551, "grad_norm": 1370.6168212890625, "learning_rate": 2.1400000000000002e-05, "loss": 209.9451, "step": 10700 }, { "epoch": 0.043269755208733135, "grad_norm": 622.1041870117188, "learning_rate": 2.142e-05, "loss": 169.206, "step": 10710 }, { "epoch": 0.04331015647410077, "grad_norm": 2445.648681640625, "learning_rate": 2.144e-05, "loss": 200.1026, "step": 10720 }, { "epoch": 0.0433505577394684, "grad_norm": 1902.2943115234375, "learning_rate": 2.146e-05, "loss": 187.5802, "step": 10730 }, { "epoch": 0.043390959004836034, "grad_norm": 1008.3157348632812, "learning_rate": 2.148e-05, "loss": 143.9581, "step": 10740 }, { "epoch": 0.04343136027020366, "grad_norm": 594.2481079101562, "learning_rate": 2.15e-05, "loss": 186.3464, "step": 10750 }, { "epoch": 0.04347176153557129, "grad_norm": 13106.4541015625, "learning_rate": 2.152e-05, "loss": 218.3369, "step": 10760 }, { "epoch": 0.043512162800938926, "grad_norm": 2128.98291015625, "learning_rate": 2.154e-05, "loss": 170.2695, "step": 10770 }, { "epoch": 0.04355256406630656, "grad_norm": 1214.33642578125, "learning_rate": 2.1560000000000004e-05, "loss": 147.123, "step": 10780 }, { "epoch": 0.043592965331674186, "grad_norm": 1065.6256103515625, "learning_rate": 2.158e-05, "loss": 168.7387, "step": 10790 }, { "epoch": 0.04363336659704182, "grad_norm": 1667.8955078125, "learning_rate": 2.16e-05, "loss": 152.4974, "step": 10800 }, { "epoch": 0.04367376786240945, "grad_norm": 1360.904052734375, "learning_rate": 2.162e-05, "loss": 254.4096, "step": 10810 }, { "epoch": 0.043714169127777085, "grad_norm": 1592.132080078125, "learning_rate": 2.1640000000000003e-05, "loss": 174.9611, "step": 10820 }, { "epoch": 0.04375457039314471, "grad_norm": 776.6605834960938, "learning_rate": 2.166e-05, "loss": 184.3046, "step": 10830 }, { "epoch": 0.043794971658512344, "grad_norm": 2525.784423828125, "learning_rate": 2.168e-05, "loss": 164.588, "step": 10840 }, { "epoch": 0.04383537292387998, "grad_norm": 1608.6549072265625, "learning_rate": 2.1700000000000002e-05, "loss": 196.8271, "step": 10850 }, { "epoch": 0.04387577418924761, "grad_norm": 892.496826171875, "learning_rate": 2.1720000000000002e-05, "loss": 148.6461, "step": 10860 }, { "epoch": 0.043916175454615236, "grad_norm": 1432.486572265625, "learning_rate": 2.1740000000000002e-05, "loss": 182.0831, "step": 10870 }, { "epoch": 0.04395657671998287, "grad_norm": 1320.6700439453125, "learning_rate": 2.176e-05, "loss": 180.9173, "step": 10880 }, { "epoch": 0.0439969779853505, "grad_norm": 1471.670166015625, "learning_rate": 2.178e-05, "loss": 178.3413, "step": 10890 }, { "epoch": 0.044037379250718135, "grad_norm": 1130.7198486328125, "learning_rate": 2.18e-05, "loss": 148.5332, "step": 10900 }, { "epoch": 0.04407778051608576, "grad_norm": 1092.3216552734375, "learning_rate": 2.182e-05, "loss": 114.7338, "step": 10910 }, { "epoch": 0.044118181781453394, "grad_norm": 1079.041259765625, "learning_rate": 2.184e-05, "loss": 132.0338, "step": 10920 }, { "epoch": 0.04415858304682103, "grad_norm": 2612.80517578125, "learning_rate": 2.186e-05, "loss": 138.605, "step": 10930 }, { "epoch": 0.04419898431218866, "grad_norm": 1243.3662109375, "learning_rate": 2.188e-05, "loss": 157.8075, "step": 10940 }, { "epoch": 0.04423938557755629, "grad_norm": 1165.291015625, "learning_rate": 2.19e-05, "loss": 123.9359, "step": 10950 }, { "epoch": 0.04427978684292392, "grad_norm": 970.86083984375, "learning_rate": 2.192e-05, "loss": 99.9449, "step": 10960 }, { "epoch": 0.04432018810829155, "grad_norm": 282.0602722167969, "learning_rate": 2.1940000000000003e-05, "loss": 126.6935, "step": 10970 }, { "epoch": 0.044360589373659186, "grad_norm": 2742.96044921875, "learning_rate": 2.196e-05, "loss": 216.6111, "step": 10980 }, { "epoch": 0.04440099063902681, "grad_norm": 725.5316772460938, "learning_rate": 2.198e-05, "loss": 283.1829, "step": 10990 }, { "epoch": 0.044441391904394445, "grad_norm": 1572.414794921875, "learning_rate": 2.2000000000000003e-05, "loss": 171.7036, "step": 11000 }, { "epoch": 0.04448179316976208, "grad_norm": 0.0, "learning_rate": 2.2020000000000003e-05, "loss": 158.3066, "step": 11010 }, { "epoch": 0.04452219443512971, "grad_norm": 1373.786376953125, "learning_rate": 2.2040000000000002e-05, "loss": 125.9849, "step": 11020 }, { "epoch": 0.04456259570049734, "grad_norm": 1483.981689453125, "learning_rate": 2.206e-05, "loss": 176.718, "step": 11030 }, { "epoch": 0.04460299696586497, "grad_norm": 997.575927734375, "learning_rate": 2.2080000000000002e-05, "loss": 164.0399, "step": 11040 }, { "epoch": 0.0446433982312326, "grad_norm": 550.5761108398438, "learning_rate": 2.2100000000000002e-05, "loss": 135.1882, "step": 11050 }, { "epoch": 0.044683799496600236, "grad_norm": 733.0974731445312, "learning_rate": 2.212e-05, "loss": 112.1112, "step": 11060 }, { "epoch": 0.04472420076196786, "grad_norm": 856.7572021484375, "learning_rate": 2.214e-05, "loss": 113.8155, "step": 11070 }, { "epoch": 0.044764602027335496, "grad_norm": 1171.519287109375, "learning_rate": 2.216e-05, "loss": 144.4537, "step": 11080 }, { "epoch": 0.04480500329270313, "grad_norm": 1287.0703125, "learning_rate": 2.218e-05, "loss": 122.8268, "step": 11090 }, { "epoch": 0.04484540455807076, "grad_norm": 1202.9896240234375, "learning_rate": 2.22e-05, "loss": 206.9014, "step": 11100 }, { "epoch": 0.04488580582343839, "grad_norm": 1963.7164306640625, "learning_rate": 2.222e-05, "loss": 128.2789, "step": 11110 }, { "epoch": 0.04492620708880602, "grad_norm": 2504.82958984375, "learning_rate": 2.224e-05, "loss": 124.5799, "step": 11120 }, { "epoch": 0.044966608354173654, "grad_norm": 1163.331298828125, "learning_rate": 2.226e-05, "loss": 91.1706, "step": 11130 }, { "epoch": 0.04500700961954129, "grad_norm": 10312.1435546875, "learning_rate": 2.228e-05, "loss": 200.2307, "step": 11140 }, { "epoch": 0.04504741088490891, "grad_norm": 1264.3104248046875, "learning_rate": 2.23e-05, "loss": 161.8118, "step": 11150 }, { "epoch": 0.045087812150276546, "grad_norm": 8854.8544921875, "learning_rate": 2.2320000000000003e-05, "loss": 162.5124, "step": 11160 }, { "epoch": 0.04512821341564418, "grad_norm": 4647.8427734375, "learning_rate": 2.234e-05, "loss": 137.7809, "step": 11170 }, { "epoch": 0.04516861468101181, "grad_norm": 1007.0010375976562, "learning_rate": 2.236e-05, "loss": 128.0276, "step": 11180 }, { "epoch": 0.04520901594637944, "grad_norm": 2929.648681640625, "learning_rate": 2.2380000000000003e-05, "loss": 132.0453, "step": 11190 }, { "epoch": 0.04524941721174707, "grad_norm": 2017.7265625, "learning_rate": 2.2400000000000002e-05, "loss": 242.607, "step": 11200 }, { "epoch": 0.045289818477114704, "grad_norm": 783.3958129882812, "learning_rate": 2.2420000000000002e-05, "loss": 146.9575, "step": 11210 }, { "epoch": 0.04533021974248234, "grad_norm": 1278.9561767578125, "learning_rate": 2.244e-05, "loss": 183.9778, "step": 11220 }, { "epoch": 0.045370621007849964, "grad_norm": 879.9774169921875, "learning_rate": 2.2460000000000002e-05, "loss": 139.0227, "step": 11230 }, { "epoch": 0.0454110222732176, "grad_norm": 1722.348876953125, "learning_rate": 2.248e-05, "loss": 229.9668, "step": 11240 }, { "epoch": 0.04545142353858523, "grad_norm": 652.4034423828125, "learning_rate": 2.25e-05, "loss": 129.4687, "step": 11250 }, { "epoch": 0.04549182480395286, "grad_norm": 1721.231689453125, "learning_rate": 2.252e-05, "loss": 168.8096, "step": 11260 }, { "epoch": 0.04553222606932049, "grad_norm": 1650.2803955078125, "learning_rate": 2.254e-05, "loss": 162.424, "step": 11270 }, { "epoch": 0.04557262733468812, "grad_norm": 954.37890625, "learning_rate": 2.256e-05, "loss": 197.3434, "step": 11280 }, { "epoch": 0.045613028600055755, "grad_norm": 4419.76123046875, "learning_rate": 2.258e-05, "loss": 210.9756, "step": 11290 }, { "epoch": 0.04565342986542339, "grad_norm": 1498.54296875, "learning_rate": 2.26e-05, "loss": 134.9217, "step": 11300 }, { "epoch": 0.045693831130791014, "grad_norm": 987.8052368164062, "learning_rate": 2.2620000000000004e-05, "loss": 163.9031, "step": 11310 }, { "epoch": 0.04573423239615865, "grad_norm": 1421.3912353515625, "learning_rate": 2.264e-05, "loss": 175.6454, "step": 11320 }, { "epoch": 0.04577463366152628, "grad_norm": 4952.677734375, "learning_rate": 2.266e-05, "loss": 191.8143, "step": 11330 }, { "epoch": 0.04581503492689391, "grad_norm": 1982.3878173828125, "learning_rate": 2.268e-05, "loss": 163.27, "step": 11340 }, { "epoch": 0.04585543619226154, "grad_norm": 1355.781005859375, "learning_rate": 2.2700000000000003e-05, "loss": 169.8846, "step": 11350 }, { "epoch": 0.04589583745762917, "grad_norm": 1866.7637939453125, "learning_rate": 2.2720000000000003e-05, "loss": 195.6275, "step": 11360 }, { "epoch": 0.045936238722996806, "grad_norm": 1269.4019775390625, "learning_rate": 2.274e-05, "loss": 136.8505, "step": 11370 }, { "epoch": 0.04597663998836444, "grad_norm": 712.604736328125, "learning_rate": 2.2760000000000002e-05, "loss": 142.4693, "step": 11380 }, { "epoch": 0.046017041253732065, "grad_norm": 730.568115234375, "learning_rate": 2.2780000000000002e-05, "loss": 123.7075, "step": 11390 }, { "epoch": 0.0460574425190997, "grad_norm": 748.4280395507812, "learning_rate": 2.2800000000000002e-05, "loss": 143.0049, "step": 11400 }, { "epoch": 0.04609784378446733, "grad_norm": 1697.1856689453125, "learning_rate": 2.282e-05, "loss": 126.3755, "step": 11410 }, { "epoch": 0.046138245049834964, "grad_norm": 2124.771240234375, "learning_rate": 2.284e-05, "loss": 172.3426, "step": 11420 }, { "epoch": 0.04617864631520259, "grad_norm": 1425.5914306640625, "learning_rate": 2.286e-05, "loss": 152.4624, "step": 11430 }, { "epoch": 0.04621904758057022, "grad_norm": 1039.5628662109375, "learning_rate": 2.288e-05, "loss": 168.2912, "step": 11440 }, { "epoch": 0.046259448845937856, "grad_norm": 956.6918334960938, "learning_rate": 2.29e-05, "loss": 153.5313, "step": 11450 }, { "epoch": 0.04629985011130549, "grad_norm": 1411.84423828125, "learning_rate": 2.292e-05, "loss": 180.7848, "step": 11460 }, { "epoch": 0.046340251376673115, "grad_norm": 1310.616455078125, "learning_rate": 2.294e-05, "loss": 156.0493, "step": 11470 }, { "epoch": 0.04638065264204075, "grad_norm": 1483.5418701171875, "learning_rate": 2.296e-05, "loss": 194.3713, "step": 11480 }, { "epoch": 0.04642105390740838, "grad_norm": 1165.32763671875, "learning_rate": 2.298e-05, "loss": 143.955, "step": 11490 }, { "epoch": 0.046461455172776014, "grad_norm": 1113.7464599609375, "learning_rate": 2.3000000000000003e-05, "loss": 151.1558, "step": 11500 }, { "epoch": 0.04650185643814364, "grad_norm": 591.7924194335938, "learning_rate": 2.302e-05, "loss": 147.8064, "step": 11510 }, { "epoch": 0.046542257703511274, "grad_norm": 375.6275329589844, "learning_rate": 2.304e-05, "loss": 147.5092, "step": 11520 }, { "epoch": 0.04658265896887891, "grad_norm": 1058.679443359375, "learning_rate": 2.306e-05, "loss": 129.6008, "step": 11530 }, { "epoch": 0.04662306023424654, "grad_norm": 2693.062255859375, "learning_rate": 2.3080000000000003e-05, "loss": 168.9607, "step": 11540 }, { "epoch": 0.046663461499614166, "grad_norm": 1006.8905029296875, "learning_rate": 2.3100000000000002e-05, "loss": 186.7646, "step": 11550 }, { "epoch": 0.0467038627649818, "grad_norm": 1659.6192626953125, "learning_rate": 2.312e-05, "loss": 245.8637, "step": 11560 }, { "epoch": 0.04674426403034943, "grad_norm": 1359.0103759765625, "learning_rate": 2.3140000000000002e-05, "loss": 150.9513, "step": 11570 }, { "epoch": 0.046784665295717065, "grad_norm": 627.9051513671875, "learning_rate": 2.3160000000000002e-05, "loss": 96.2488, "step": 11580 }, { "epoch": 0.04682506656108469, "grad_norm": 1331.7342529296875, "learning_rate": 2.318e-05, "loss": 139.2599, "step": 11590 }, { "epoch": 0.046865467826452324, "grad_norm": 1107.905517578125, "learning_rate": 2.32e-05, "loss": 127.3733, "step": 11600 }, { "epoch": 0.04690586909181996, "grad_norm": 1476.7293701171875, "learning_rate": 2.322e-05, "loss": 88.5844, "step": 11610 }, { "epoch": 0.04694627035718759, "grad_norm": 1438.66748046875, "learning_rate": 2.324e-05, "loss": 146.773, "step": 11620 }, { "epoch": 0.046986671622555216, "grad_norm": 1060.174560546875, "learning_rate": 2.326e-05, "loss": 185.9057, "step": 11630 }, { "epoch": 0.04702707288792285, "grad_norm": 1116.5933837890625, "learning_rate": 2.328e-05, "loss": 161.711, "step": 11640 }, { "epoch": 0.04706747415329048, "grad_norm": 1230.9957275390625, "learning_rate": 2.3300000000000004e-05, "loss": 161.603, "step": 11650 }, { "epoch": 0.047107875418658116, "grad_norm": 1050.322265625, "learning_rate": 2.332e-05, "loss": 87.1483, "step": 11660 }, { "epoch": 0.04714827668402574, "grad_norm": 561.3192138671875, "learning_rate": 2.334e-05, "loss": 72.6746, "step": 11670 }, { "epoch": 0.047188677949393375, "grad_norm": 768.3349609375, "learning_rate": 2.336e-05, "loss": 245.4826, "step": 11680 }, { "epoch": 0.04722907921476101, "grad_norm": 1956.8682861328125, "learning_rate": 2.3380000000000003e-05, "loss": 246.9039, "step": 11690 }, { "epoch": 0.04726948048012864, "grad_norm": 1342.6962890625, "learning_rate": 2.3400000000000003e-05, "loss": 143.455, "step": 11700 }, { "epoch": 0.04730988174549627, "grad_norm": 1271.6512451171875, "learning_rate": 2.342e-05, "loss": 119.4163, "step": 11710 }, { "epoch": 0.0473502830108639, "grad_norm": 973.38427734375, "learning_rate": 2.344e-05, "loss": 142.8418, "step": 11720 }, { "epoch": 0.04739068427623153, "grad_norm": 725.7868041992188, "learning_rate": 2.3460000000000002e-05, "loss": 151.6831, "step": 11730 }, { "epoch": 0.047431085541599166, "grad_norm": 1243.775146484375, "learning_rate": 2.3480000000000002e-05, "loss": 129.1701, "step": 11740 }, { "epoch": 0.04747148680696679, "grad_norm": 1194.3792724609375, "learning_rate": 2.35e-05, "loss": 179.6937, "step": 11750 }, { "epoch": 0.047511888072334425, "grad_norm": 836.063720703125, "learning_rate": 2.3520000000000002e-05, "loss": 96.7801, "step": 11760 }, { "epoch": 0.04755228933770206, "grad_norm": 6927.7421875, "learning_rate": 2.354e-05, "loss": 170.788, "step": 11770 }, { "epoch": 0.04759269060306969, "grad_norm": 1191.6163330078125, "learning_rate": 2.356e-05, "loss": 95.2363, "step": 11780 }, { "epoch": 0.04763309186843732, "grad_norm": 4237.19384765625, "learning_rate": 2.358e-05, "loss": 149.3932, "step": 11790 }, { "epoch": 0.04767349313380495, "grad_norm": 770.5933227539062, "learning_rate": 2.36e-05, "loss": 163.007, "step": 11800 }, { "epoch": 0.047713894399172584, "grad_norm": 393.3005065917969, "learning_rate": 2.362e-05, "loss": 169.8844, "step": 11810 }, { "epoch": 0.04775429566454022, "grad_norm": 1076.961181640625, "learning_rate": 2.364e-05, "loss": 144.8054, "step": 11820 }, { "epoch": 0.04779469692990784, "grad_norm": 881.7391967773438, "learning_rate": 2.366e-05, "loss": 157.4215, "step": 11830 }, { "epoch": 0.047835098195275476, "grad_norm": 1243.5374755859375, "learning_rate": 2.3680000000000004e-05, "loss": 159.1609, "step": 11840 }, { "epoch": 0.04787549946064311, "grad_norm": 1366.5406494140625, "learning_rate": 2.37e-05, "loss": 132.554, "step": 11850 }, { "epoch": 0.04791590072601074, "grad_norm": 1306.282470703125, "learning_rate": 2.372e-05, "loss": 133.127, "step": 11860 }, { "epoch": 0.04795630199137837, "grad_norm": 1843.0394287109375, "learning_rate": 2.374e-05, "loss": 167.0897, "step": 11870 }, { "epoch": 0.047996703256746, "grad_norm": 1241.4002685546875, "learning_rate": 2.3760000000000003e-05, "loss": 195.5516, "step": 11880 }, { "epoch": 0.048037104522113634, "grad_norm": 1342.593017578125, "learning_rate": 2.3780000000000003e-05, "loss": 188.283, "step": 11890 }, { "epoch": 0.04807750578748127, "grad_norm": 6172.35498046875, "learning_rate": 2.38e-05, "loss": 202.552, "step": 11900 }, { "epoch": 0.04811790705284889, "grad_norm": 979.6281127929688, "learning_rate": 2.3820000000000002e-05, "loss": 126.3583, "step": 11910 }, { "epoch": 0.048158308318216526, "grad_norm": 3780.92236328125, "learning_rate": 2.3840000000000002e-05, "loss": 125.3818, "step": 11920 }, { "epoch": 0.04819870958358416, "grad_norm": 1145.655029296875, "learning_rate": 2.3860000000000002e-05, "loss": 135.4183, "step": 11930 }, { "epoch": 0.04823911084895179, "grad_norm": 3658.722412109375, "learning_rate": 2.3880000000000002e-05, "loss": 180.5494, "step": 11940 }, { "epoch": 0.04827951211431942, "grad_norm": 1273.83740234375, "learning_rate": 2.39e-05, "loss": 208.1243, "step": 11950 }, { "epoch": 0.04831991337968705, "grad_norm": 701.6416015625, "learning_rate": 2.392e-05, "loss": 119.6717, "step": 11960 }, { "epoch": 0.048360314645054685, "grad_norm": 598.38818359375, "learning_rate": 2.394e-05, "loss": 106.7641, "step": 11970 }, { "epoch": 0.04840071591042232, "grad_norm": 697.3867797851562, "learning_rate": 2.396e-05, "loss": 157.0546, "step": 11980 }, { "epoch": 0.048441117175789944, "grad_norm": 2313.9013671875, "learning_rate": 2.398e-05, "loss": 199.3208, "step": 11990 }, { "epoch": 0.04848151844115758, "grad_norm": 712.7728881835938, "learning_rate": 2.4e-05, "loss": 163.3351, "step": 12000 }, { "epoch": 0.04852191970652521, "grad_norm": 2287.64794921875, "learning_rate": 2.402e-05, "loss": 156.7126, "step": 12010 }, { "epoch": 0.04856232097189284, "grad_norm": 1171.4212646484375, "learning_rate": 2.404e-05, "loss": 154.6671, "step": 12020 }, { "epoch": 0.04860272223726047, "grad_norm": 928.2921142578125, "learning_rate": 2.4060000000000003e-05, "loss": 189.5635, "step": 12030 }, { "epoch": 0.0486431235026281, "grad_norm": 2594.64794921875, "learning_rate": 2.408e-05, "loss": 122.9152, "step": 12040 }, { "epoch": 0.048683524767995735, "grad_norm": 625.3611450195312, "learning_rate": 2.41e-05, "loss": 157.0619, "step": 12050 }, { "epoch": 0.04872392603336337, "grad_norm": 1715.5015869140625, "learning_rate": 2.412e-05, "loss": 146.0929, "step": 12060 }, { "epoch": 0.048764327298730994, "grad_norm": 1481.7435302734375, "learning_rate": 2.4140000000000003e-05, "loss": 150.0942, "step": 12070 }, { "epoch": 0.04880472856409863, "grad_norm": 1766.0885009765625, "learning_rate": 2.4160000000000002e-05, "loss": 122.9469, "step": 12080 }, { "epoch": 0.04884512982946626, "grad_norm": 1821.820556640625, "learning_rate": 2.418e-05, "loss": 236.4261, "step": 12090 }, { "epoch": 0.048885531094833894, "grad_norm": 1121.892333984375, "learning_rate": 2.4200000000000002e-05, "loss": 194.8495, "step": 12100 }, { "epoch": 0.04892593236020152, "grad_norm": 2687.8505859375, "learning_rate": 2.4220000000000002e-05, "loss": 199.661, "step": 12110 }, { "epoch": 0.04896633362556915, "grad_norm": 1251.5821533203125, "learning_rate": 2.4240000000000002e-05, "loss": 188.0581, "step": 12120 }, { "epoch": 0.049006734890936786, "grad_norm": 1508.91455078125, "learning_rate": 2.426e-05, "loss": 155.5656, "step": 12130 }, { "epoch": 0.04904713615630442, "grad_norm": 1015.0963745117188, "learning_rate": 2.428e-05, "loss": 156.3022, "step": 12140 }, { "epoch": 0.049087537421672045, "grad_norm": 992.3895874023438, "learning_rate": 2.43e-05, "loss": 173.4455, "step": 12150 }, { "epoch": 0.04912793868703968, "grad_norm": 5113.12451171875, "learning_rate": 2.432e-05, "loss": 201.295, "step": 12160 }, { "epoch": 0.04916833995240731, "grad_norm": 1270.9464111328125, "learning_rate": 2.434e-05, "loss": 185.7967, "step": 12170 }, { "epoch": 0.04920874121777494, "grad_norm": 1565.5892333984375, "learning_rate": 2.4360000000000004e-05, "loss": 170.4479, "step": 12180 }, { "epoch": 0.04924914248314257, "grad_norm": 1304.619140625, "learning_rate": 2.438e-05, "loss": 173.9784, "step": 12190 }, { "epoch": 0.0492895437485102, "grad_norm": 1918.267822265625, "learning_rate": 2.44e-05, "loss": 100.1017, "step": 12200 }, { "epoch": 0.049329945013877836, "grad_norm": 3872.838134765625, "learning_rate": 2.442e-05, "loss": 128.8565, "step": 12210 }, { "epoch": 0.04937034627924546, "grad_norm": 1211.8031005859375, "learning_rate": 2.4440000000000003e-05, "loss": 130.2835, "step": 12220 }, { "epoch": 0.049410747544613096, "grad_norm": 1228.3138427734375, "learning_rate": 2.4460000000000003e-05, "loss": 191.7344, "step": 12230 }, { "epoch": 0.04945114880998073, "grad_norm": 931.3762817382812, "learning_rate": 2.448e-05, "loss": 188.0092, "step": 12240 }, { "epoch": 0.04949155007534836, "grad_norm": 3841.882568359375, "learning_rate": 2.45e-05, "loss": 176.4354, "step": 12250 }, { "epoch": 0.04953195134071599, "grad_norm": 868.33935546875, "learning_rate": 2.4520000000000002e-05, "loss": 211.7608, "step": 12260 }, { "epoch": 0.04957235260608362, "grad_norm": 1211.6044921875, "learning_rate": 2.4540000000000002e-05, "loss": 130.6806, "step": 12270 }, { "epoch": 0.049612753871451254, "grad_norm": 1216.9310302734375, "learning_rate": 2.4560000000000002e-05, "loss": 163.6193, "step": 12280 }, { "epoch": 0.04965315513681889, "grad_norm": 745.621337890625, "learning_rate": 2.4580000000000002e-05, "loss": 108.5664, "step": 12290 }, { "epoch": 0.04969355640218651, "grad_norm": 737.6925048828125, "learning_rate": 2.46e-05, "loss": 163.5928, "step": 12300 }, { "epoch": 0.049733957667554146, "grad_norm": 1154.443115234375, "learning_rate": 2.462e-05, "loss": 114.6066, "step": 12310 }, { "epoch": 0.04977435893292178, "grad_norm": 1484.0311279296875, "learning_rate": 2.464e-05, "loss": 141.8092, "step": 12320 }, { "epoch": 0.04981476019828941, "grad_norm": 1149.4248046875, "learning_rate": 2.466e-05, "loss": 142.7456, "step": 12330 }, { "epoch": 0.04985516146365704, "grad_norm": 1364.304931640625, "learning_rate": 2.468e-05, "loss": 207.4074, "step": 12340 }, { "epoch": 0.04989556272902467, "grad_norm": 802.7201538085938, "learning_rate": 2.47e-05, "loss": 162.2597, "step": 12350 }, { "epoch": 0.049935963994392304, "grad_norm": 1634.9937744140625, "learning_rate": 2.472e-05, "loss": 158.4684, "step": 12360 }, { "epoch": 0.04997636525975994, "grad_norm": 2859.273681640625, "learning_rate": 2.4740000000000004e-05, "loss": 171.345, "step": 12370 }, { "epoch": 0.050016766525127564, "grad_norm": 1695.544189453125, "learning_rate": 2.476e-05, "loss": 153.0457, "step": 12380 }, { "epoch": 0.0500571677904952, "grad_norm": 1252.4989013671875, "learning_rate": 2.478e-05, "loss": 223.5171, "step": 12390 }, { "epoch": 0.05009756905586283, "grad_norm": 0.0, "learning_rate": 2.48e-05, "loss": 124.2808, "step": 12400 }, { "epoch": 0.05013797032123046, "grad_norm": 1869.185546875, "learning_rate": 2.4820000000000003e-05, "loss": 176.1387, "step": 12410 }, { "epoch": 0.05017837158659809, "grad_norm": 2657.873291015625, "learning_rate": 2.4840000000000003e-05, "loss": 136.5703, "step": 12420 }, { "epoch": 0.05021877285196572, "grad_norm": 1434.21337890625, "learning_rate": 2.486e-05, "loss": 185.8033, "step": 12430 }, { "epoch": 0.050259174117333355, "grad_norm": 4622.36328125, "learning_rate": 2.488e-05, "loss": 238.0834, "step": 12440 }, { "epoch": 0.05029957538270099, "grad_norm": 2024.818359375, "learning_rate": 2.4900000000000002e-05, "loss": 134.9255, "step": 12450 }, { "epoch": 0.050339976648068614, "grad_norm": 2336.821044921875, "learning_rate": 2.4920000000000002e-05, "loss": 112.3334, "step": 12460 }, { "epoch": 0.05038037791343625, "grad_norm": 617.8527221679688, "learning_rate": 2.4940000000000002e-05, "loss": 69.1087, "step": 12470 }, { "epoch": 0.05042077917880388, "grad_norm": 871.863037109375, "learning_rate": 2.496e-05, "loss": 201.0841, "step": 12480 }, { "epoch": 0.05046118044417151, "grad_norm": 1000.557373046875, "learning_rate": 2.498e-05, "loss": 167.9705, "step": 12490 }, { "epoch": 0.05050158170953914, "grad_norm": 1009.2303466796875, "learning_rate": 2.5e-05, "loss": 119.5033, "step": 12500 }, { "epoch": 0.05054198297490677, "grad_norm": 2307.47607421875, "learning_rate": 2.5019999999999998e-05, "loss": 138.7849, "step": 12510 }, { "epoch": 0.050582384240274406, "grad_norm": 2979.96533203125, "learning_rate": 2.504e-05, "loss": 132.3597, "step": 12520 }, { "epoch": 0.05062278550564204, "grad_norm": 1333.7869873046875, "learning_rate": 2.506e-05, "loss": 114.2453, "step": 12530 }, { "epoch": 0.050663186771009665, "grad_norm": 2323.83935546875, "learning_rate": 2.5080000000000004e-05, "loss": 199.8365, "step": 12540 }, { "epoch": 0.0507035880363773, "grad_norm": 988.7455444335938, "learning_rate": 2.51e-05, "loss": 158.2298, "step": 12550 }, { "epoch": 0.05074398930174493, "grad_norm": 1229.513427734375, "learning_rate": 2.512e-05, "loss": 177.6321, "step": 12560 }, { "epoch": 0.050784390567112564, "grad_norm": 598.7608032226562, "learning_rate": 2.5140000000000003e-05, "loss": 205.6968, "step": 12570 }, { "epoch": 0.05082479183248019, "grad_norm": 476.6854553222656, "learning_rate": 2.516e-05, "loss": 135.5016, "step": 12580 }, { "epoch": 0.05086519309784782, "grad_norm": 441.1506652832031, "learning_rate": 2.5180000000000003e-05, "loss": 183.0943, "step": 12590 }, { "epoch": 0.050905594363215456, "grad_norm": 861.925537109375, "learning_rate": 2.5200000000000003e-05, "loss": 110.7362, "step": 12600 }, { "epoch": 0.05094599562858309, "grad_norm": 1135.808837890625, "learning_rate": 2.522e-05, "loss": 157.3768, "step": 12610 }, { "epoch": 0.050986396893950715, "grad_norm": 764.6640014648438, "learning_rate": 2.5240000000000002e-05, "loss": 197.2682, "step": 12620 }, { "epoch": 0.05102679815931835, "grad_norm": 1185.298583984375, "learning_rate": 2.526e-05, "loss": 154.5542, "step": 12630 }, { "epoch": 0.05106719942468598, "grad_norm": 572.9112548828125, "learning_rate": 2.5280000000000005e-05, "loss": 185.382, "step": 12640 }, { "epoch": 0.051107600690053615, "grad_norm": 0.0, "learning_rate": 2.5300000000000002e-05, "loss": 155.8, "step": 12650 }, { "epoch": 0.05114800195542124, "grad_norm": 1676.220703125, "learning_rate": 2.5319999999999998e-05, "loss": 153.2075, "step": 12660 }, { "epoch": 0.051188403220788874, "grad_norm": 3056.052001953125, "learning_rate": 2.534e-05, "loss": 160.0424, "step": 12670 }, { "epoch": 0.05122880448615651, "grad_norm": 3853.407958984375, "learning_rate": 2.536e-05, "loss": 142.0041, "step": 12680 }, { "epoch": 0.05126920575152414, "grad_norm": 1022.783447265625, "learning_rate": 2.5380000000000004e-05, "loss": 173.1114, "step": 12690 }, { "epoch": 0.051309607016891766, "grad_norm": 609.7467041015625, "learning_rate": 2.54e-05, "loss": 124.7601, "step": 12700 }, { "epoch": 0.0513500082822594, "grad_norm": 871.4917602539062, "learning_rate": 2.542e-05, "loss": 107.3807, "step": 12710 }, { "epoch": 0.05139040954762703, "grad_norm": 1300.36279296875, "learning_rate": 2.5440000000000004e-05, "loss": 128.8927, "step": 12720 }, { "epoch": 0.051430810812994665, "grad_norm": 1378.396484375, "learning_rate": 2.546e-05, "loss": 179.4028, "step": 12730 }, { "epoch": 0.05147121207836229, "grad_norm": 5043.6845703125, "learning_rate": 2.5480000000000003e-05, "loss": 145.2417, "step": 12740 }, { "epoch": 0.051511613343729924, "grad_norm": 956.9037475585938, "learning_rate": 2.5500000000000003e-05, "loss": 122.089, "step": 12750 }, { "epoch": 0.05155201460909756, "grad_norm": 655.8726806640625, "learning_rate": 2.552e-05, "loss": 172.3547, "step": 12760 }, { "epoch": 0.05159241587446519, "grad_norm": 6090.908203125, "learning_rate": 2.5540000000000003e-05, "loss": 200.5886, "step": 12770 }, { "epoch": 0.051632817139832816, "grad_norm": 760.5556640625, "learning_rate": 2.556e-05, "loss": 168.6275, "step": 12780 }, { "epoch": 0.05167321840520045, "grad_norm": 1935.444091796875, "learning_rate": 2.5580000000000002e-05, "loss": 173.0487, "step": 12790 }, { "epoch": 0.05171361967056808, "grad_norm": 1355.29931640625, "learning_rate": 2.5600000000000002e-05, "loss": 169.9729, "step": 12800 }, { "epoch": 0.051754020935935716, "grad_norm": 1155.65380859375, "learning_rate": 2.562e-05, "loss": 230.3313, "step": 12810 }, { "epoch": 0.05179442220130334, "grad_norm": 1020.41064453125, "learning_rate": 2.5640000000000002e-05, "loss": 151.9028, "step": 12820 }, { "epoch": 0.051834823466670975, "grad_norm": 1026.9921875, "learning_rate": 2.566e-05, "loss": 194.6138, "step": 12830 }, { "epoch": 0.05187522473203861, "grad_norm": 859.4483032226562, "learning_rate": 2.5679999999999998e-05, "loss": 137.587, "step": 12840 }, { "epoch": 0.05191562599740624, "grad_norm": 1088.9954833984375, "learning_rate": 2.57e-05, "loss": 137.7595, "step": 12850 }, { "epoch": 0.05195602726277387, "grad_norm": 576.4170532226562, "learning_rate": 2.572e-05, "loss": 112.6303, "step": 12860 }, { "epoch": 0.0519964285281415, "grad_norm": 1509.91943359375, "learning_rate": 2.5740000000000004e-05, "loss": 152.7388, "step": 12870 }, { "epoch": 0.05203682979350913, "grad_norm": 1924.8671875, "learning_rate": 2.576e-05, "loss": 150.297, "step": 12880 }, { "epoch": 0.052077231058876766, "grad_norm": 4206.75048828125, "learning_rate": 2.5779999999999997e-05, "loss": 153.3789, "step": 12890 }, { "epoch": 0.05211763232424439, "grad_norm": 732.2725219726562, "learning_rate": 2.58e-05, "loss": 146.1245, "step": 12900 }, { "epoch": 0.052158033589612025, "grad_norm": 804.0054321289062, "learning_rate": 2.582e-05, "loss": 155.7175, "step": 12910 }, { "epoch": 0.05219843485497966, "grad_norm": 1622.7066650390625, "learning_rate": 2.5840000000000003e-05, "loss": 181.8288, "step": 12920 }, { "epoch": 0.05223883612034729, "grad_norm": 1736.3221435546875, "learning_rate": 2.586e-05, "loss": 156.0094, "step": 12930 }, { "epoch": 0.05227923738571492, "grad_norm": 1899.74072265625, "learning_rate": 2.588e-05, "loss": 161.6015, "step": 12940 }, { "epoch": 0.05231963865108255, "grad_norm": 2343.95556640625, "learning_rate": 2.5900000000000003e-05, "loss": 170.2185, "step": 12950 }, { "epoch": 0.052360039916450184, "grad_norm": 1888.6707763671875, "learning_rate": 2.592e-05, "loss": 204.9056, "step": 12960 }, { "epoch": 0.05240044118181782, "grad_norm": 498.68780517578125, "learning_rate": 2.5940000000000002e-05, "loss": 142.0052, "step": 12970 }, { "epoch": 0.05244084244718544, "grad_norm": 838.1019287109375, "learning_rate": 2.5960000000000002e-05, "loss": 113.003, "step": 12980 }, { "epoch": 0.052481243712553076, "grad_norm": 636.7835083007812, "learning_rate": 2.598e-05, "loss": 96.236, "step": 12990 }, { "epoch": 0.05252164497792071, "grad_norm": 1207.0146484375, "learning_rate": 2.6000000000000002e-05, "loss": 128.6797, "step": 13000 }, { "epoch": 0.05256204624328834, "grad_norm": 2183.191650390625, "learning_rate": 2.602e-05, "loss": 135.0814, "step": 13010 }, { "epoch": 0.05260244750865597, "grad_norm": 1018.5341796875, "learning_rate": 2.6040000000000005e-05, "loss": 83.9305, "step": 13020 }, { "epoch": 0.0526428487740236, "grad_norm": 437.8675537109375, "learning_rate": 2.606e-05, "loss": 141.287, "step": 13030 }, { "epoch": 0.052683250039391234, "grad_norm": 2055.59228515625, "learning_rate": 2.6079999999999998e-05, "loss": 131.6824, "step": 13040 }, { "epoch": 0.05272365130475887, "grad_norm": 1526.861572265625, "learning_rate": 2.61e-05, "loss": 104.1964, "step": 13050 }, { "epoch": 0.05276405257012649, "grad_norm": 1107.270263671875, "learning_rate": 2.612e-05, "loss": 119.3528, "step": 13060 }, { "epoch": 0.052804453835494126, "grad_norm": 0.0, "learning_rate": 2.6140000000000004e-05, "loss": 209.7389, "step": 13070 }, { "epoch": 0.05284485510086176, "grad_norm": 887.7672729492188, "learning_rate": 2.616e-05, "loss": 134.3163, "step": 13080 }, { "epoch": 0.05288525636622939, "grad_norm": 600.7372436523438, "learning_rate": 2.618e-05, "loss": 120.8824, "step": 13090 }, { "epoch": 0.05292565763159702, "grad_norm": 2771.222900390625, "learning_rate": 2.6200000000000003e-05, "loss": 116.1366, "step": 13100 }, { "epoch": 0.05296605889696465, "grad_norm": 1079.399658203125, "learning_rate": 2.622e-05, "loss": 120.0783, "step": 13110 }, { "epoch": 0.053006460162332285, "grad_norm": 3635.591796875, "learning_rate": 2.6240000000000003e-05, "loss": 114.0672, "step": 13120 }, { "epoch": 0.05304686142769992, "grad_norm": 967.0631713867188, "learning_rate": 2.6260000000000003e-05, "loss": 197.697, "step": 13130 }, { "epoch": 0.053087262693067544, "grad_norm": 540.1041259765625, "learning_rate": 2.628e-05, "loss": 169.7453, "step": 13140 }, { "epoch": 0.05312766395843518, "grad_norm": 1378.034912109375, "learning_rate": 2.6300000000000002e-05, "loss": 152.1042, "step": 13150 }, { "epoch": 0.05316806522380281, "grad_norm": 2139.15234375, "learning_rate": 2.632e-05, "loss": 219.7844, "step": 13160 }, { "epoch": 0.05320846648917044, "grad_norm": 2143.552734375, "learning_rate": 2.6340000000000002e-05, "loss": 152.3353, "step": 13170 }, { "epoch": 0.05324886775453807, "grad_norm": 1372.249755859375, "learning_rate": 2.6360000000000002e-05, "loss": 216.3558, "step": 13180 }, { "epoch": 0.0532892690199057, "grad_norm": 723.7531127929688, "learning_rate": 2.6379999999999998e-05, "loss": 106.8604, "step": 13190 }, { "epoch": 0.053329670285273335, "grad_norm": 1488.490234375, "learning_rate": 2.64e-05, "loss": 128.0357, "step": 13200 }, { "epoch": 0.05337007155064097, "grad_norm": 962.85498046875, "learning_rate": 2.642e-05, "loss": 214.9135, "step": 13210 }, { "epoch": 0.053410472816008595, "grad_norm": 916.577392578125, "learning_rate": 2.6440000000000004e-05, "loss": 100.3119, "step": 13220 }, { "epoch": 0.05345087408137623, "grad_norm": 520.3284912109375, "learning_rate": 2.646e-05, "loss": 159.4224, "step": 13230 }, { "epoch": 0.05349127534674386, "grad_norm": 1644.9588623046875, "learning_rate": 2.648e-05, "loss": 153.6036, "step": 13240 }, { "epoch": 0.053531676612111494, "grad_norm": 834.4176025390625, "learning_rate": 2.6500000000000004e-05, "loss": 179.9941, "step": 13250 }, { "epoch": 0.05357207787747912, "grad_norm": 753.1478881835938, "learning_rate": 2.652e-05, "loss": 114.1454, "step": 13260 }, { "epoch": 0.05361247914284675, "grad_norm": 1375.6973876953125, "learning_rate": 2.6540000000000003e-05, "loss": 121.5828, "step": 13270 }, { "epoch": 0.053652880408214386, "grad_norm": 837.9177856445312, "learning_rate": 2.6560000000000003e-05, "loss": 162.0316, "step": 13280 }, { "epoch": 0.05369328167358202, "grad_norm": 1957.6124267578125, "learning_rate": 2.658e-05, "loss": 123.4925, "step": 13290 }, { "epoch": 0.053733682938949645, "grad_norm": 2115.5908203125, "learning_rate": 2.6600000000000003e-05, "loss": 163.0786, "step": 13300 }, { "epoch": 0.05377408420431728, "grad_norm": 767.2696533203125, "learning_rate": 2.662e-05, "loss": 99.3506, "step": 13310 }, { "epoch": 0.05381448546968491, "grad_norm": 1096.5877685546875, "learning_rate": 2.6640000000000002e-05, "loss": 130.1019, "step": 13320 }, { "epoch": 0.053854886735052544, "grad_norm": 990.4303588867188, "learning_rate": 2.6660000000000002e-05, "loss": 218.8272, "step": 13330 }, { "epoch": 0.05389528800042017, "grad_norm": 1289.1722412109375, "learning_rate": 2.668e-05, "loss": 181.8528, "step": 13340 }, { "epoch": 0.0539356892657878, "grad_norm": 779.4696655273438, "learning_rate": 2.6700000000000002e-05, "loss": 129.5975, "step": 13350 }, { "epoch": 0.053976090531155436, "grad_norm": 928.5990600585938, "learning_rate": 2.672e-05, "loss": 186.8758, "step": 13360 }, { "epoch": 0.05401649179652307, "grad_norm": 3074.220947265625, "learning_rate": 2.6740000000000005e-05, "loss": 182.7136, "step": 13370 }, { "epoch": 0.054056893061890696, "grad_norm": 1353.44970703125, "learning_rate": 2.676e-05, "loss": 115.3092, "step": 13380 }, { "epoch": 0.05409729432725833, "grad_norm": 1833.181396484375, "learning_rate": 2.678e-05, "loss": 136.1055, "step": 13390 }, { "epoch": 0.05413769559262596, "grad_norm": 671.771484375, "learning_rate": 2.6800000000000004e-05, "loss": 101.5414, "step": 13400 }, { "epoch": 0.054178096857993595, "grad_norm": 1407.6787109375, "learning_rate": 2.682e-05, "loss": 140.1277, "step": 13410 }, { "epoch": 0.05421849812336122, "grad_norm": 999.6378784179688, "learning_rate": 2.6840000000000004e-05, "loss": 173.1913, "step": 13420 }, { "epoch": 0.054258899388728854, "grad_norm": 1126.044189453125, "learning_rate": 2.686e-05, "loss": 100.9075, "step": 13430 }, { "epoch": 0.05429930065409649, "grad_norm": 995.6368408203125, "learning_rate": 2.688e-05, "loss": 152.3883, "step": 13440 }, { "epoch": 0.05433970191946412, "grad_norm": 1857.345947265625, "learning_rate": 2.6900000000000003e-05, "loss": 164.2901, "step": 13450 }, { "epoch": 0.054380103184831746, "grad_norm": 1917.1810302734375, "learning_rate": 2.692e-05, "loss": 117.7432, "step": 13460 }, { "epoch": 0.05442050445019938, "grad_norm": 1812.2779541015625, "learning_rate": 2.694e-05, "loss": 158.6927, "step": 13470 }, { "epoch": 0.05446090571556701, "grad_norm": 823.9198608398438, "learning_rate": 2.6960000000000003e-05, "loss": 147.1588, "step": 13480 }, { "epoch": 0.054501306980934645, "grad_norm": 1157.1685791015625, "learning_rate": 2.698e-05, "loss": 167.8175, "step": 13490 }, { "epoch": 0.05454170824630227, "grad_norm": 1123.951416015625, "learning_rate": 2.7000000000000002e-05, "loss": 194.7915, "step": 13500 }, { "epoch": 0.054582109511669905, "grad_norm": 1402.1549072265625, "learning_rate": 2.7020000000000002e-05, "loss": 137.6079, "step": 13510 }, { "epoch": 0.05462251077703754, "grad_norm": 786.1826782226562, "learning_rate": 2.704e-05, "loss": 113.2719, "step": 13520 }, { "epoch": 0.05466291204240517, "grad_norm": 1365.093017578125, "learning_rate": 2.7060000000000002e-05, "loss": 104.7612, "step": 13530 }, { "epoch": 0.0547033133077728, "grad_norm": 1461.1785888671875, "learning_rate": 2.7079999999999998e-05, "loss": 168.2114, "step": 13540 }, { "epoch": 0.05474371457314043, "grad_norm": 1377.26806640625, "learning_rate": 2.7100000000000005e-05, "loss": 184.7103, "step": 13550 }, { "epoch": 0.05478411583850806, "grad_norm": 1345.474853515625, "learning_rate": 2.712e-05, "loss": 137.9565, "step": 13560 }, { "epoch": 0.054824517103875696, "grad_norm": 1087.7122802734375, "learning_rate": 2.7139999999999998e-05, "loss": 152.1091, "step": 13570 }, { "epoch": 0.05486491836924332, "grad_norm": 0.0, "learning_rate": 2.716e-05, "loss": 82.2364, "step": 13580 }, { "epoch": 0.054905319634610955, "grad_norm": 730.9200439453125, "learning_rate": 2.718e-05, "loss": 165.8833, "step": 13590 }, { "epoch": 0.05494572089997859, "grad_norm": 1368.674072265625, "learning_rate": 2.7200000000000004e-05, "loss": 121.1773, "step": 13600 }, { "epoch": 0.05498612216534622, "grad_norm": 1869.849853515625, "learning_rate": 2.722e-05, "loss": 231.0817, "step": 13610 }, { "epoch": 0.05502652343071385, "grad_norm": 976.0435180664062, "learning_rate": 2.724e-05, "loss": 111.1706, "step": 13620 }, { "epoch": 0.05506692469608148, "grad_norm": 1996.230224609375, "learning_rate": 2.7260000000000003e-05, "loss": 189.7502, "step": 13630 }, { "epoch": 0.05510732596144911, "grad_norm": 971.2109985351562, "learning_rate": 2.728e-05, "loss": 117.1644, "step": 13640 }, { "epoch": 0.055147727226816746, "grad_norm": 1315.8665771484375, "learning_rate": 2.7300000000000003e-05, "loss": 185.0801, "step": 13650 }, { "epoch": 0.05518812849218437, "grad_norm": 600.6631469726562, "learning_rate": 2.7320000000000003e-05, "loss": 157.1564, "step": 13660 }, { "epoch": 0.055228529757552006, "grad_norm": 888.3346557617188, "learning_rate": 2.734e-05, "loss": 117.6915, "step": 13670 }, { "epoch": 0.05526893102291964, "grad_norm": 808.2348022460938, "learning_rate": 2.7360000000000002e-05, "loss": 99.7357, "step": 13680 }, { "epoch": 0.05530933228828727, "grad_norm": 1063.2569580078125, "learning_rate": 2.738e-05, "loss": 143.7845, "step": 13690 }, { "epoch": 0.0553497335536549, "grad_norm": 1069.0965576171875, "learning_rate": 2.7400000000000002e-05, "loss": 155.6282, "step": 13700 }, { "epoch": 0.05539013481902253, "grad_norm": 1055.5029296875, "learning_rate": 2.7420000000000002e-05, "loss": 173.6117, "step": 13710 }, { "epoch": 0.055430536084390164, "grad_norm": 940.4775390625, "learning_rate": 2.7439999999999998e-05, "loss": 120.3455, "step": 13720 }, { "epoch": 0.0554709373497578, "grad_norm": 1010.4285888671875, "learning_rate": 2.746e-05, "loss": 142.2922, "step": 13730 }, { "epoch": 0.05551133861512542, "grad_norm": 1600.6968994140625, "learning_rate": 2.748e-05, "loss": 157.9479, "step": 13740 }, { "epoch": 0.055551739880493056, "grad_norm": 1101.0772705078125, "learning_rate": 2.7500000000000004e-05, "loss": 137.5087, "step": 13750 }, { "epoch": 0.05559214114586069, "grad_norm": 2070.27978515625, "learning_rate": 2.752e-05, "loss": 125.7709, "step": 13760 }, { "epoch": 0.05563254241122832, "grad_norm": 869.279052734375, "learning_rate": 2.754e-05, "loss": 135.6122, "step": 13770 }, { "epoch": 0.05567294367659595, "grad_norm": 635.1331787109375, "learning_rate": 2.7560000000000004e-05, "loss": 120.2631, "step": 13780 }, { "epoch": 0.05571334494196358, "grad_norm": 712.9791870117188, "learning_rate": 2.758e-05, "loss": 132.9366, "step": 13790 }, { "epoch": 0.055753746207331215, "grad_norm": 1173.3760986328125, "learning_rate": 2.7600000000000003e-05, "loss": 174.3566, "step": 13800 }, { "epoch": 0.05579414747269885, "grad_norm": 1058.1973876953125, "learning_rate": 2.762e-05, "loss": 101.473, "step": 13810 }, { "epoch": 0.055834548738066474, "grad_norm": 1767.0400390625, "learning_rate": 2.764e-05, "loss": 149.575, "step": 13820 }, { "epoch": 0.05587495000343411, "grad_norm": 438.71697998046875, "learning_rate": 2.7660000000000003e-05, "loss": 150.794, "step": 13830 }, { "epoch": 0.05591535126880174, "grad_norm": 1495.567626953125, "learning_rate": 2.768e-05, "loss": 116.9854, "step": 13840 }, { "epoch": 0.05595575253416937, "grad_norm": 1169.4012451171875, "learning_rate": 2.7700000000000002e-05, "loss": 158.6589, "step": 13850 }, { "epoch": 0.055996153799537, "grad_norm": 1633.1995849609375, "learning_rate": 2.7720000000000002e-05, "loss": 126.1669, "step": 13860 }, { "epoch": 0.05603655506490463, "grad_norm": 1052.2532958984375, "learning_rate": 2.774e-05, "loss": 180.1025, "step": 13870 }, { "epoch": 0.056076956330272265, "grad_norm": 1021.2620239257812, "learning_rate": 2.7760000000000002e-05, "loss": 136.4397, "step": 13880 }, { "epoch": 0.0561173575956399, "grad_norm": 886.8902587890625, "learning_rate": 2.778e-05, "loss": 91.0113, "step": 13890 }, { "epoch": 0.056157758861007524, "grad_norm": 1001.5513305664062, "learning_rate": 2.7800000000000005e-05, "loss": 110.4852, "step": 13900 }, { "epoch": 0.05619816012637516, "grad_norm": 668.9522094726562, "learning_rate": 2.782e-05, "loss": 126.04, "step": 13910 }, { "epoch": 0.05623856139174279, "grad_norm": 3181.50146484375, "learning_rate": 2.7839999999999998e-05, "loss": 175.8362, "step": 13920 }, { "epoch": 0.05627896265711042, "grad_norm": 1067.25732421875, "learning_rate": 2.7860000000000004e-05, "loss": 124.6091, "step": 13930 }, { "epoch": 0.05631936392247805, "grad_norm": 758.8274536132812, "learning_rate": 2.788e-05, "loss": 129.1508, "step": 13940 }, { "epoch": 0.05635976518784568, "grad_norm": 845.1571044921875, "learning_rate": 2.7900000000000004e-05, "loss": 164.5396, "step": 13950 }, { "epoch": 0.056400166453213316, "grad_norm": 812.546875, "learning_rate": 2.792e-05, "loss": 144.2103, "step": 13960 }, { "epoch": 0.05644056771858095, "grad_norm": 1005.88720703125, "learning_rate": 2.794e-05, "loss": 131.6613, "step": 13970 }, { "epoch": 0.056480968983948575, "grad_norm": 2054.029541015625, "learning_rate": 2.7960000000000003e-05, "loss": 127.4749, "step": 13980 }, { "epoch": 0.05652137024931621, "grad_norm": 466.14068603515625, "learning_rate": 2.798e-05, "loss": 163.5435, "step": 13990 }, { "epoch": 0.05656177151468384, "grad_norm": 599.2301025390625, "learning_rate": 2.8000000000000003e-05, "loss": 107.817, "step": 14000 }, { "epoch": 0.056602172780051474, "grad_norm": 2105.31640625, "learning_rate": 2.8020000000000003e-05, "loss": 197.3428, "step": 14010 }, { "epoch": 0.0566425740454191, "grad_norm": 953.76025390625, "learning_rate": 2.804e-05, "loss": 147.2875, "step": 14020 }, { "epoch": 0.05668297531078673, "grad_norm": 495.1427001953125, "learning_rate": 2.8060000000000002e-05, "loss": 132.651, "step": 14030 }, { "epoch": 0.056723376576154366, "grad_norm": 1009.431884765625, "learning_rate": 2.8080000000000002e-05, "loss": 145.7106, "step": 14040 }, { "epoch": 0.056763777841522, "grad_norm": 939.3583374023438, "learning_rate": 2.8100000000000005e-05, "loss": 139.3796, "step": 14050 }, { "epoch": 0.056804179106889625, "grad_norm": 1285.1646728515625, "learning_rate": 2.8120000000000002e-05, "loss": 142.4715, "step": 14060 }, { "epoch": 0.05684458037225726, "grad_norm": 1945.31494140625, "learning_rate": 2.8139999999999998e-05, "loss": 181.1451, "step": 14070 }, { "epoch": 0.05688498163762489, "grad_norm": 811.0607299804688, "learning_rate": 2.816e-05, "loss": 126.362, "step": 14080 }, { "epoch": 0.056925382902992525, "grad_norm": 4073.4345703125, "learning_rate": 2.818e-05, "loss": 116.4024, "step": 14090 }, { "epoch": 0.05696578416836015, "grad_norm": 3792.585205078125, "learning_rate": 2.8199999999999998e-05, "loss": 192.2604, "step": 14100 }, { "epoch": 0.057006185433727784, "grad_norm": 2066.12060546875, "learning_rate": 2.822e-05, "loss": 119.7598, "step": 14110 }, { "epoch": 0.05704658669909542, "grad_norm": 938.1848754882812, "learning_rate": 2.824e-05, "loss": 127.1674, "step": 14120 }, { "epoch": 0.05708698796446305, "grad_norm": 1010.5850830078125, "learning_rate": 2.8260000000000004e-05, "loss": 144.3951, "step": 14130 }, { "epoch": 0.057127389229830676, "grad_norm": 2251.332275390625, "learning_rate": 2.828e-05, "loss": 202.1318, "step": 14140 }, { "epoch": 0.05716779049519831, "grad_norm": 701.1483154296875, "learning_rate": 2.83e-05, "loss": 138.087, "step": 14150 }, { "epoch": 0.05720819176056594, "grad_norm": 646.7645263671875, "learning_rate": 2.8320000000000003e-05, "loss": 114.6259, "step": 14160 }, { "epoch": 0.057248593025933575, "grad_norm": 1258.2159423828125, "learning_rate": 2.834e-05, "loss": 134.9526, "step": 14170 }, { "epoch": 0.0572889942913012, "grad_norm": 994.9586181640625, "learning_rate": 2.8360000000000003e-05, "loss": 116.3534, "step": 14180 }, { "epoch": 0.057329395556668834, "grad_norm": 1028.46240234375, "learning_rate": 2.8380000000000003e-05, "loss": 140.937, "step": 14190 }, { "epoch": 0.05736979682203647, "grad_norm": 814.48681640625, "learning_rate": 2.84e-05, "loss": 117.722, "step": 14200 }, { "epoch": 0.0574101980874041, "grad_norm": 1542.7169189453125, "learning_rate": 2.8420000000000002e-05, "loss": 115.8539, "step": 14210 }, { "epoch": 0.057450599352771727, "grad_norm": 576.6505737304688, "learning_rate": 2.844e-05, "loss": 111.9559, "step": 14220 }, { "epoch": 0.05749100061813936, "grad_norm": 1681.9095458984375, "learning_rate": 2.8460000000000002e-05, "loss": 145.2461, "step": 14230 }, { "epoch": 0.05753140188350699, "grad_norm": 1178.3514404296875, "learning_rate": 2.8480000000000002e-05, "loss": 178.6759, "step": 14240 }, { "epoch": 0.057571803148874626, "grad_norm": 1500.439208984375, "learning_rate": 2.8499999999999998e-05, "loss": 131.998, "step": 14250 }, { "epoch": 0.05761220441424225, "grad_norm": 1533.505126953125, "learning_rate": 2.852e-05, "loss": 147.4367, "step": 14260 }, { "epoch": 0.057652605679609885, "grad_norm": 495.7720947265625, "learning_rate": 2.854e-05, "loss": 115.4109, "step": 14270 }, { "epoch": 0.05769300694497752, "grad_norm": 561.2579956054688, "learning_rate": 2.8560000000000004e-05, "loss": 147.6145, "step": 14280 }, { "epoch": 0.05773340821034515, "grad_norm": 571.2040405273438, "learning_rate": 2.858e-05, "loss": 197.6198, "step": 14290 }, { "epoch": 0.05777380947571278, "grad_norm": 2524.6904296875, "learning_rate": 2.86e-05, "loss": 171.4188, "step": 14300 }, { "epoch": 0.05781421074108041, "grad_norm": 2208.06396484375, "learning_rate": 2.8620000000000004e-05, "loss": 124.7192, "step": 14310 }, { "epoch": 0.05785461200644804, "grad_norm": 1544.6385498046875, "learning_rate": 2.864e-05, "loss": 145.8727, "step": 14320 }, { "epoch": 0.057895013271815676, "grad_norm": 953.2141723632812, "learning_rate": 2.8660000000000003e-05, "loss": 98.7387, "step": 14330 }, { "epoch": 0.0579354145371833, "grad_norm": 497.53021240234375, "learning_rate": 2.868e-05, "loss": 167.9052, "step": 14340 }, { "epoch": 0.057975815802550935, "grad_norm": 441.49951171875, "learning_rate": 2.87e-05, "loss": 166.814, "step": 14350 }, { "epoch": 0.05801621706791857, "grad_norm": 2393.28271484375, "learning_rate": 2.8720000000000003e-05, "loss": 155.3981, "step": 14360 }, { "epoch": 0.0580566183332862, "grad_norm": 5085.328125, "learning_rate": 2.874e-05, "loss": 126.6876, "step": 14370 }, { "epoch": 0.05809701959865383, "grad_norm": 1251.988525390625, "learning_rate": 2.8760000000000002e-05, "loss": 161.9475, "step": 14380 }, { "epoch": 0.05813742086402146, "grad_norm": 2122.52392578125, "learning_rate": 2.8780000000000002e-05, "loss": 177.3616, "step": 14390 }, { "epoch": 0.058177822129389094, "grad_norm": 886.3876342773438, "learning_rate": 2.88e-05, "loss": 102.448, "step": 14400 }, { "epoch": 0.05821822339475673, "grad_norm": 1496.39697265625, "learning_rate": 2.8820000000000002e-05, "loss": 177.4485, "step": 14410 }, { "epoch": 0.05825862466012435, "grad_norm": 1387.0819091796875, "learning_rate": 2.8840000000000002e-05, "loss": 134.0211, "step": 14420 }, { "epoch": 0.058299025925491986, "grad_norm": 571.4561157226562, "learning_rate": 2.8860000000000005e-05, "loss": 109.2302, "step": 14430 }, { "epoch": 0.05833942719085962, "grad_norm": 679.628173828125, "learning_rate": 2.888e-05, "loss": 124.9287, "step": 14440 }, { "epoch": 0.05837982845622725, "grad_norm": 1030.914306640625, "learning_rate": 2.8899999999999998e-05, "loss": 141.9723, "step": 14450 }, { "epoch": 0.05842022972159488, "grad_norm": 1213.34033203125, "learning_rate": 2.8920000000000004e-05, "loss": 133.645, "step": 14460 }, { "epoch": 0.05846063098696251, "grad_norm": 867.3865356445312, "learning_rate": 2.894e-05, "loss": 109.0037, "step": 14470 }, { "epoch": 0.058501032252330144, "grad_norm": 1019.241943359375, "learning_rate": 2.8960000000000004e-05, "loss": 179.6844, "step": 14480 }, { "epoch": 0.05854143351769778, "grad_norm": 3133.170166015625, "learning_rate": 2.898e-05, "loss": 141.274, "step": 14490 }, { "epoch": 0.058581834783065403, "grad_norm": 675.6682739257812, "learning_rate": 2.9e-05, "loss": 116.3539, "step": 14500 }, { "epoch": 0.05862223604843304, "grad_norm": 915.9588012695312, "learning_rate": 2.9020000000000003e-05, "loss": 95.1154, "step": 14510 }, { "epoch": 0.05866263731380067, "grad_norm": 1290.10546875, "learning_rate": 2.904e-05, "loss": 179.5605, "step": 14520 }, { "epoch": 0.0587030385791683, "grad_norm": 712.0923461914062, "learning_rate": 2.9060000000000003e-05, "loss": 146.6162, "step": 14530 }, { "epoch": 0.05874343984453593, "grad_norm": 893.6064453125, "learning_rate": 2.9080000000000003e-05, "loss": 104.5347, "step": 14540 }, { "epoch": 0.05878384110990356, "grad_norm": 1025.2301025390625, "learning_rate": 2.91e-05, "loss": 104.5477, "step": 14550 }, { "epoch": 0.058824242375271195, "grad_norm": 2338.9072265625, "learning_rate": 2.9120000000000002e-05, "loss": 142.9934, "step": 14560 }, { "epoch": 0.05886464364063883, "grad_norm": 633.3606567382812, "learning_rate": 2.9140000000000002e-05, "loss": 98.545, "step": 14570 }, { "epoch": 0.058905044906006454, "grad_norm": 1361.783935546875, "learning_rate": 2.9160000000000005e-05, "loss": 113.2945, "step": 14580 }, { "epoch": 0.05894544617137409, "grad_norm": 1828.3387451171875, "learning_rate": 2.9180000000000002e-05, "loss": 119.5862, "step": 14590 }, { "epoch": 0.05898584743674172, "grad_norm": 2555.387939453125, "learning_rate": 2.9199999999999998e-05, "loss": 177.2158, "step": 14600 }, { "epoch": 0.05902624870210935, "grad_norm": 2425.751220703125, "learning_rate": 2.922e-05, "loss": 118.8401, "step": 14610 }, { "epoch": 0.05906664996747698, "grad_norm": 758.7974243164062, "learning_rate": 2.924e-05, "loss": 136.7834, "step": 14620 }, { "epoch": 0.05910705123284461, "grad_norm": 2093.38720703125, "learning_rate": 2.9260000000000004e-05, "loss": 91.4101, "step": 14630 }, { "epoch": 0.059147452498212245, "grad_norm": 1140.317138671875, "learning_rate": 2.928e-05, "loss": 142.2483, "step": 14640 }, { "epoch": 0.05918785376357988, "grad_norm": 764.8568115234375, "learning_rate": 2.93e-05, "loss": 105.0373, "step": 14650 }, { "epoch": 0.059228255028947505, "grad_norm": 562.0804443359375, "learning_rate": 2.9320000000000004e-05, "loss": 140.6483, "step": 14660 }, { "epoch": 0.05926865629431514, "grad_norm": 1717.2381591796875, "learning_rate": 2.934e-05, "loss": 120.7989, "step": 14670 }, { "epoch": 0.05930905755968277, "grad_norm": 1295.689208984375, "learning_rate": 2.9360000000000003e-05, "loss": 145.4503, "step": 14680 }, { "epoch": 0.059349458825050404, "grad_norm": 1297.196533203125, "learning_rate": 2.9380000000000003e-05, "loss": 127.0184, "step": 14690 }, { "epoch": 0.05938986009041803, "grad_norm": 483.24420166015625, "learning_rate": 2.94e-05, "loss": 153.2117, "step": 14700 }, { "epoch": 0.05943026135578566, "grad_norm": 777.18408203125, "learning_rate": 2.9420000000000003e-05, "loss": 126.1312, "step": 14710 }, { "epoch": 0.059470662621153296, "grad_norm": 1387.6092529296875, "learning_rate": 2.944e-05, "loss": 158.3849, "step": 14720 }, { "epoch": 0.05951106388652093, "grad_norm": 3654.914306640625, "learning_rate": 2.946e-05, "loss": 177.4717, "step": 14730 }, { "epoch": 0.059551465151888555, "grad_norm": 2103.89599609375, "learning_rate": 2.9480000000000002e-05, "loss": 133.8275, "step": 14740 }, { "epoch": 0.05959186641725619, "grad_norm": 506.49200439453125, "learning_rate": 2.95e-05, "loss": 121.0761, "step": 14750 }, { "epoch": 0.05963226768262382, "grad_norm": 1055.931396484375, "learning_rate": 2.9520000000000002e-05, "loss": 143.0577, "step": 14760 }, { "epoch": 0.059672668947991454, "grad_norm": 745.9114379882812, "learning_rate": 2.9540000000000002e-05, "loss": 140.8658, "step": 14770 }, { "epoch": 0.05971307021335908, "grad_norm": 809.5819702148438, "learning_rate": 2.9559999999999998e-05, "loss": 104.4415, "step": 14780 }, { "epoch": 0.059753471478726713, "grad_norm": 1688.5380859375, "learning_rate": 2.958e-05, "loss": 152.457, "step": 14790 }, { "epoch": 0.05979387274409435, "grad_norm": 910.4462280273438, "learning_rate": 2.96e-05, "loss": 149.1814, "step": 14800 }, { "epoch": 0.05983427400946198, "grad_norm": 753.3027954101562, "learning_rate": 2.9620000000000004e-05, "loss": 142.1045, "step": 14810 }, { "epoch": 0.059874675274829606, "grad_norm": 545.16552734375, "learning_rate": 2.964e-05, "loss": 120.4504, "step": 14820 }, { "epoch": 0.05991507654019724, "grad_norm": 1327.6309814453125, "learning_rate": 2.9659999999999997e-05, "loss": 109.4865, "step": 14830 }, { "epoch": 0.05995547780556487, "grad_norm": 867.6209106445312, "learning_rate": 2.9680000000000004e-05, "loss": 128.9373, "step": 14840 }, { "epoch": 0.059995879070932505, "grad_norm": 552.6983032226562, "learning_rate": 2.97e-05, "loss": 148.3395, "step": 14850 }, { "epoch": 0.06003628033630013, "grad_norm": 1947.0716552734375, "learning_rate": 2.9720000000000003e-05, "loss": 115.2734, "step": 14860 }, { "epoch": 0.060076681601667764, "grad_norm": 793.457763671875, "learning_rate": 2.974e-05, "loss": 112.6054, "step": 14870 }, { "epoch": 0.0601170828670354, "grad_norm": 849.2798461914062, "learning_rate": 2.976e-05, "loss": 126.2524, "step": 14880 }, { "epoch": 0.06015748413240303, "grad_norm": 1418.7293701171875, "learning_rate": 2.9780000000000003e-05, "loss": 135.3141, "step": 14890 }, { "epoch": 0.060197885397770656, "grad_norm": 787.8895263671875, "learning_rate": 2.98e-05, "loss": 150.2098, "step": 14900 }, { "epoch": 0.06023828666313829, "grad_norm": 1818.129150390625, "learning_rate": 2.9820000000000002e-05, "loss": 142.2445, "step": 14910 }, { "epoch": 0.06027868792850592, "grad_norm": 910.6742553710938, "learning_rate": 2.9840000000000002e-05, "loss": 178.141, "step": 14920 }, { "epoch": 0.060319089193873555, "grad_norm": 2399.4091796875, "learning_rate": 2.986e-05, "loss": 175.9365, "step": 14930 }, { "epoch": 0.06035949045924118, "grad_norm": 1148.7041015625, "learning_rate": 2.9880000000000002e-05, "loss": 145.5652, "step": 14940 }, { "epoch": 0.060399891724608815, "grad_norm": 1392.08740234375, "learning_rate": 2.9900000000000002e-05, "loss": 139.3894, "step": 14950 }, { "epoch": 0.06044029298997645, "grad_norm": 878.8516235351562, "learning_rate": 2.9920000000000005e-05, "loss": 65.7636, "step": 14960 }, { "epoch": 0.06048069425534408, "grad_norm": 2386.329833984375, "learning_rate": 2.994e-05, "loss": 186.3342, "step": 14970 }, { "epoch": 0.06052109552071171, "grad_norm": 365.4548645019531, "learning_rate": 2.9959999999999998e-05, "loss": 117.0936, "step": 14980 }, { "epoch": 0.06056149678607934, "grad_norm": 557.1009521484375, "learning_rate": 2.998e-05, "loss": 98.8324, "step": 14990 }, { "epoch": 0.06060189805144697, "grad_norm": 1404.343017578125, "learning_rate": 3e-05, "loss": 129.4244, "step": 15000 }, { "epoch": 0.060642299316814606, "grad_norm": 1246.394775390625, "learning_rate": 3.0020000000000004e-05, "loss": 126.248, "step": 15010 }, { "epoch": 0.06068270058218223, "grad_norm": 1066.50048828125, "learning_rate": 3.004e-05, "loss": 124.5184, "step": 15020 }, { "epoch": 0.060723101847549865, "grad_norm": 705.3773803710938, "learning_rate": 3.006e-05, "loss": 133.4564, "step": 15030 }, { "epoch": 0.0607635031129175, "grad_norm": 874.49072265625, "learning_rate": 3.0080000000000003e-05, "loss": 111.9202, "step": 15040 }, { "epoch": 0.06080390437828513, "grad_norm": 725.5115356445312, "learning_rate": 3.01e-05, "loss": 116.5958, "step": 15050 }, { "epoch": 0.06084430564365276, "grad_norm": 1046.1822509765625, "learning_rate": 3.0120000000000003e-05, "loss": 108.4798, "step": 15060 }, { "epoch": 0.06088470690902039, "grad_norm": 1189.979248046875, "learning_rate": 3.0140000000000003e-05, "loss": 141.0663, "step": 15070 }, { "epoch": 0.060925108174388024, "grad_norm": 3112.551513671875, "learning_rate": 3.016e-05, "loss": 172.9219, "step": 15080 }, { "epoch": 0.06096550943975566, "grad_norm": 2147.462890625, "learning_rate": 3.0180000000000002e-05, "loss": 146.7627, "step": 15090 }, { "epoch": 0.06100591070512328, "grad_norm": 1260.1737060546875, "learning_rate": 3.02e-05, "loss": 151.8984, "step": 15100 }, { "epoch": 0.061046311970490916, "grad_norm": 1158.15771484375, "learning_rate": 3.0220000000000005e-05, "loss": 116.2214, "step": 15110 }, { "epoch": 0.06108671323585855, "grad_norm": 3242.16259765625, "learning_rate": 3.0240000000000002e-05, "loss": 153.6299, "step": 15120 }, { "epoch": 0.06112711450122618, "grad_norm": 837.7076416015625, "learning_rate": 3.0259999999999998e-05, "loss": 84.5595, "step": 15130 }, { "epoch": 0.06116751576659381, "grad_norm": 2217.368408203125, "learning_rate": 3.028e-05, "loss": 154.1678, "step": 15140 }, { "epoch": 0.06120791703196144, "grad_norm": 753.1808471679688, "learning_rate": 3.03e-05, "loss": 130.1928, "step": 15150 }, { "epoch": 0.061248318297329074, "grad_norm": 1219.47607421875, "learning_rate": 3.0320000000000004e-05, "loss": 169.753, "step": 15160 }, { "epoch": 0.06128871956269671, "grad_norm": 6474.40478515625, "learning_rate": 3.034e-05, "loss": 149.4557, "step": 15170 }, { "epoch": 0.06132912082806433, "grad_norm": 5858.99658203125, "learning_rate": 3.036e-05, "loss": 191.529, "step": 15180 }, { "epoch": 0.061369522093431966, "grad_norm": 1600.056396484375, "learning_rate": 3.0380000000000004e-05, "loss": 130.8423, "step": 15190 }, { "epoch": 0.0614099233587996, "grad_norm": 814.158935546875, "learning_rate": 3.04e-05, "loss": 148.7461, "step": 15200 }, { "epoch": 0.061450324624167225, "grad_norm": 844.9783935546875, "learning_rate": 3.0420000000000004e-05, "loss": 148.264, "step": 15210 }, { "epoch": 0.06149072588953486, "grad_norm": 1373.4136962890625, "learning_rate": 3.0440000000000003e-05, "loss": 172.1749, "step": 15220 }, { "epoch": 0.06153112715490249, "grad_norm": 897.79248046875, "learning_rate": 3.046e-05, "loss": 113.2348, "step": 15230 }, { "epoch": 0.061571528420270125, "grad_norm": 1343.7520751953125, "learning_rate": 3.0480000000000003e-05, "loss": 98.1409, "step": 15240 }, { "epoch": 0.06161192968563775, "grad_norm": 828.697998046875, "learning_rate": 3.05e-05, "loss": 162.0212, "step": 15250 }, { "epoch": 0.061652330951005384, "grad_norm": 366.743896484375, "learning_rate": 3.0520000000000006e-05, "loss": 75.6308, "step": 15260 }, { "epoch": 0.06169273221637302, "grad_norm": 2019.9920654296875, "learning_rate": 3.054e-05, "loss": 237.3679, "step": 15270 }, { "epoch": 0.06173313348174065, "grad_norm": 1361.7977294921875, "learning_rate": 3.056e-05, "loss": 108.2089, "step": 15280 }, { "epoch": 0.061773534747108276, "grad_norm": 1101.6763916015625, "learning_rate": 3.058e-05, "loss": 90.1339, "step": 15290 }, { "epoch": 0.06181393601247591, "grad_norm": 1785.1817626953125, "learning_rate": 3.06e-05, "loss": 142.6872, "step": 15300 }, { "epoch": 0.06185433727784354, "grad_norm": 1439.0386962890625, "learning_rate": 3.062e-05, "loss": 135.7678, "step": 15310 }, { "epoch": 0.061894738543211175, "grad_norm": 1499.1324462890625, "learning_rate": 3.0640000000000005e-05, "loss": 123.4259, "step": 15320 }, { "epoch": 0.0619351398085788, "grad_norm": 1074.73681640625, "learning_rate": 3.066e-05, "loss": 98.1504, "step": 15330 }, { "epoch": 0.061975541073946434, "grad_norm": 1967.539306640625, "learning_rate": 3.0680000000000004e-05, "loss": 151.5911, "step": 15340 }, { "epoch": 0.06201594233931407, "grad_norm": 485.7192077636719, "learning_rate": 3.07e-05, "loss": 137.3458, "step": 15350 }, { "epoch": 0.0620563436046817, "grad_norm": 753.7666015625, "learning_rate": 3.072e-05, "loss": 103.7652, "step": 15360 }, { "epoch": 0.06209674487004933, "grad_norm": 1353.95556640625, "learning_rate": 3.074e-05, "loss": 117.3775, "step": 15370 }, { "epoch": 0.06213714613541696, "grad_norm": 438.0009765625, "learning_rate": 3.076e-05, "loss": 115.8443, "step": 15380 }, { "epoch": 0.06217754740078459, "grad_norm": 1873.119140625, "learning_rate": 3.078e-05, "loss": 134.3721, "step": 15390 }, { "epoch": 0.062217948666152226, "grad_norm": 1559.0321044921875, "learning_rate": 3.08e-05, "loss": 146.9015, "step": 15400 }, { "epoch": 0.06225834993151985, "grad_norm": 953.1947021484375, "learning_rate": 3.082e-05, "loss": 146.0465, "step": 15410 }, { "epoch": 0.062298751196887485, "grad_norm": 1465.51806640625, "learning_rate": 3.084e-05, "loss": 127.4744, "step": 15420 }, { "epoch": 0.06233915246225512, "grad_norm": 871.5877685546875, "learning_rate": 3.086e-05, "loss": 110.5899, "step": 15430 }, { "epoch": 0.06237955372762275, "grad_norm": 2501.28564453125, "learning_rate": 3.088e-05, "loss": 202.4409, "step": 15440 }, { "epoch": 0.06241995499299038, "grad_norm": 1591.2879638671875, "learning_rate": 3.09e-05, "loss": 129.12, "step": 15450 }, { "epoch": 0.06246035625835801, "grad_norm": 838.3685302734375, "learning_rate": 3.092e-05, "loss": 102.4406, "step": 15460 }, { "epoch": 0.06250075752372564, "grad_norm": 1001.7224731445312, "learning_rate": 3.0940000000000005e-05, "loss": 108.1954, "step": 15470 }, { "epoch": 0.06254115878909328, "grad_norm": 1513.2293701171875, "learning_rate": 3.096e-05, "loss": 103.5236, "step": 15480 }, { "epoch": 0.0625815600544609, "grad_norm": 1613.0614013671875, "learning_rate": 3.0980000000000005e-05, "loss": 159.1191, "step": 15490 }, { "epoch": 0.06262196131982854, "grad_norm": 1024.2308349609375, "learning_rate": 3.1e-05, "loss": 131.2479, "step": 15500 }, { "epoch": 0.06266236258519617, "grad_norm": 2692.8251953125, "learning_rate": 3.102e-05, "loss": 125.2973, "step": 15510 }, { "epoch": 0.0627027638505638, "grad_norm": 1083.9881591796875, "learning_rate": 3.104e-05, "loss": 121.1962, "step": 15520 }, { "epoch": 0.06274316511593143, "grad_norm": 1520.9947509765625, "learning_rate": 3.106e-05, "loss": 127.8408, "step": 15530 }, { "epoch": 0.06278356638129906, "grad_norm": 871.9234008789062, "learning_rate": 3.108e-05, "loss": 107.3415, "step": 15540 }, { "epoch": 0.06282396764666669, "grad_norm": 1313.199462890625, "learning_rate": 3.1100000000000004e-05, "loss": 124.2825, "step": 15550 }, { "epoch": 0.06286436891203433, "grad_norm": 997.6826782226562, "learning_rate": 3.112e-05, "loss": 116.8165, "step": 15560 }, { "epoch": 0.06290477017740195, "grad_norm": 713.2454833984375, "learning_rate": 3.1140000000000003e-05, "loss": 142.1327, "step": 15570 }, { "epoch": 0.06294517144276959, "grad_norm": 841.623779296875, "learning_rate": 3.116e-05, "loss": 136.2796, "step": 15580 }, { "epoch": 0.06298557270813722, "grad_norm": 1948.3192138671875, "learning_rate": 3.118e-05, "loss": 192.9212, "step": 15590 }, { "epoch": 0.06302597397350485, "grad_norm": 1726.7506103515625, "learning_rate": 3.12e-05, "loss": 172.2863, "step": 15600 }, { "epoch": 0.06306637523887249, "grad_norm": 1170.4307861328125, "learning_rate": 3.122e-05, "loss": 130.6975, "step": 15610 }, { "epoch": 0.06310677650424011, "grad_norm": 1774.3511962890625, "learning_rate": 3.1240000000000006e-05, "loss": 137.2435, "step": 15620 }, { "epoch": 0.06314717776960774, "grad_norm": 4738.48193359375, "learning_rate": 3.126e-05, "loss": 140.0143, "step": 15630 }, { "epoch": 0.06318757903497538, "grad_norm": 793.2022094726562, "learning_rate": 3.1280000000000005e-05, "loss": 163.8472, "step": 15640 }, { "epoch": 0.063227980300343, "grad_norm": 0.0, "learning_rate": 3.13e-05, "loss": 113.3165, "step": 15650 }, { "epoch": 0.06326838156571064, "grad_norm": 1195.0482177734375, "learning_rate": 3.132e-05, "loss": 151.147, "step": 15660 }, { "epoch": 0.06330878283107827, "grad_norm": 1180.14453125, "learning_rate": 3.134e-05, "loss": 100.6629, "step": 15670 }, { "epoch": 0.0633491840964459, "grad_norm": 926.8880615234375, "learning_rate": 3.136e-05, "loss": 161.5127, "step": 15680 }, { "epoch": 0.06338958536181354, "grad_norm": 1517.244873046875, "learning_rate": 3.138e-05, "loss": 164.7377, "step": 15690 }, { "epoch": 0.06342998662718116, "grad_norm": 942.4938354492188, "learning_rate": 3.1400000000000004e-05, "loss": 127.0913, "step": 15700 }, { "epoch": 0.06347038789254879, "grad_norm": 519.3255004882812, "learning_rate": 3.142e-05, "loss": 110.9436, "step": 15710 }, { "epoch": 0.06351078915791643, "grad_norm": 931.0354614257812, "learning_rate": 3.1440000000000004e-05, "loss": 155.0716, "step": 15720 }, { "epoch": 0.06355119042328405, "grad_norm": 2942.558837890625, "learning_rate": 3.146e-05, "loss": 106.2107, "step": 15730 }, { "epoch": 0.0635915916886517, "grad_norm": 1383.4794921875, "learning_rate": 3.1480000000000004e-05, "loss": 146.2436, "step": 15740 }, { "epoch": 0.06363199295401932, "grad_norm": 3386.990966796875, "learning_rate": 3.15e-05, "loss": 148.2886, "step": 15750 }, { "epoch": 0.06367239421938695, "grad_norm": 1014.3907470703125, "learning_rate": 3.1519999999999996e-05, "loss": 120.5053, "step": 15760 }, { "epoch": 0.06371279548475459, "grad_norm": 762.3274536132812, "learning_rate": 3.154e-05, "loss": 139.9023, "step": 15770 }, { "epoch": 0.06375319675012221, "grad_norm": 1421.413330078125, "learning_rate": 3.156e-05, "loss": 146.9265, "step": 15780 }, { "epoch": 0.06379359801548984, "grad_norm": 1463.6063232421875, "learning_rate": 3.1580000000000006e-05, "loss": 134.0383, "step": 15790 }, { "epoch": 0.06383399928085748, "grad_norm": 1137.2342529296875, "learning_rate": 3.16e-05, "loss": 161.4233, "step": 15800 }, { "epoch": 0.0638744005462251, "grad_norm": 1729.1817626953125, "learning_rate": 3.162e-05, "loss": 109.8035, "step": 15810 }, { "epoch": 0.06391480181159274, "grad_norm": 757.51025390625, "learning_rate": 3.164e-05, "loss": 83.0763, "step": 15820 }, { "epoch": 0.06395520307696037, "grad_norm": 5324.74169921875, "learning_rate": 3.166e-05, "loss": 132.4072, "step": 15830 }, { "epoch": 0.063995604342328, "grad_norm": 1629.523681640625, "learning_rate": 3.168e-05, "loss": 84.4823, "step": 15840 }, { "epoch": 0.06403600560769564, "grad_norm": 1089.279296875, "learning_rate": 3.1700000000000005e-05, "loss": 130.4876, "step": 15850 }, { "epoch": 0.06407640687306326, "grad_norm": 1170.0994873046875, "learning_rate": 3.172e-05, "loss": 144.4939, "step": 15860 }, { "epoch": 0.06411680813843089, "grad_norm": 1094.1689453125, "learning_rate": 3.1740000000000004e-05, "loss": 124.1748, "step": 15870 }, { "epoch": 0.06415720940379853, "grad_norm": 742.95703125, "learning_rate": 3.176e-05, "loss": 85.3714, "step": 15880 }, { "epoch": 0.06419761066916616, "grad_norm": 1239.1507568359375, "learning_rate": 3.1780000000000004e-05, "loss": 215.5378, "step": 15890 }, { "epoch": 0.0642380119345338, "grad_norm": 734.71484375, "learning_rate": 3.18e-05, "loss": 109.2592, "step": 15900 }, { "epoch": 0.06427841319990142, "grad_norm": 1894.3482666015625, "learning_rate": 3.182e-05, "loss": 115.1967, "step": 15910 }, { "epoch": 0.06431881446526905, "grad_norm": 626.6444091796875, "learning_rate": 3.184e-05, "loss": 85.9778, "step": 15920 }, { "epoch": 0.06435921573063669, "grad_norm": 1511.507568359375, "learning_rate": 3.186e-05, "loss": 182.5304, "step": 15930 }, { "epoch": 0.06439961699600431, "grad_norm": 693.2582397460938, "learning_rate": 3.188e-05, "loss": 90.9457, "step": 15940 }, { "epoch": 0.06444001826137194, "grad_norm": 683.79150390625, "learning_rate": 3.19e-05, "loss": 98.913, "step": 15950 }, { "epoch": 0.06448041952673958, "grad_norm": 1719.1845703125, "learning_rate": 3.192e-05, "loss": 129.3339, "step": 15960 }, { "epoch": 0.0645208207921072, "grad_norm": 1166.0206298828125, "learning_rate": 3.194e-05, "loss": 153.576, "step": 15970 }, { "epoch": 0.06456122205747485, "grad_norm": 1580.4215087890625, "learning_rate": 3.196e-05, "loss": 170.2588, "step": 15980 }, { "epoch": 0.06460162332284247, "grad_norm": 984.2314453125, "learning_rate": 3.198e-05, "loss": 156.6156, "step": 15990 }, { "epoch": 0.0646420245882101, "grad_norm": 1595.7958984375, "learning_rate": 3.2000000000000005e-05, "loss": 174.3226, "step": 16000 }, { "epoch": 0.06468242585357774, "grad_norm": 12306.052734375, "learning_rate": 3.202e-05, "loss": 203.855, "step": 16010 }, { "epoch": 0.06472282711894536, "grad_norm": 2370.59765625, "learning_rate": 3.2040000000000005e-05, "loss": 144.5134, "step": 16020 }, { "epoch": 0.06476322838431299, "grad_norm": 1784.508056640625, "learning_rate": 3.206e-05, "loss": 159.992, "step": 16030 }, { "epoch": 0.06480362964968063, "grad_norm": 894.4352416992188, "learning_rate": 3.208e-05, "loss": 162.522, "step": 16040 }, { "epoch": 0.06484403091504826, "grad_norm": 1397.84814453125, "learning_rate": 3.21e-05, "loss": 113.1159, "step": 16050 }, { "epoch": 0.0648844321804159, "grad_norm": 2406.892578125, "learning_rate": 3.212e-05, "loss": 104.4846, "step": 16060 }, { "epoch": 0.06492483344578352, "grad_norm": 1434.818115234375, "learning_rate": 3.214e-05, "loss": 142.6461, "step": 16070 }, { "epoch": 0.06496523471115115, "grad_norm": 3077.646240234375, "learning_rate": 3.2160000000000004e-05, "loss": 101.5211, "step": 16080 }, { "epoch": 0.06500563597651879, "grad_norm": 1358.7418212890625, "learning_rate": 3.218e-05, "loss": 129.6278, "step": 16090 }, { "epoch": 0.06504603724188641, "grad_norm": 11825.310546875, "learning_rate": 3.2200000000000003e-05, "loss": 145.8424, "step": 16100 }, { "epoch": 0.06508643850725404, "grad_norm": 1621.6112060546875, "learning_rate": 3.222e-05, "loss": 117.4145, "step": 16110 }, { "epoch": 0.06512683977262168, "grad_norm": 1896.356689453125, "learning_rate": 3.224e-05, "loss": 103.7541, "step": 16120 }, { "epoch": 0.0651672410379893, "grad_norm": 8822.8671875, "learning_rate": 3.226e-05, "loss": 174.6907, "step": 16130 }, { "epoch": 0.06520764230335695, "grad_norm": 1795.896728515625, "learning_rate": 3.2279999999999996e-05, "loss": 187.9359, "step": 16140 }, { "epoch": 0.06524804356872457, "grad_norm": 870.3344116210938, "learning_rate": 3.2300000000000006e-05, "loss": 113.2782, "step": 16150 }, { "epoch": 0.0652884448340922, "grad_norm": 1710.718505859375, "learning_rate": 3.232e-05, "loss": 114.0318, "step": 16160 }, { "epoch": 0.06532884609945984, "grad_norm": 879.309326171875, "learning_rate": 3.2340000000000005e-05, "loss": 159.0129, "step": 16170 }, { "epoch": 0.06536924736482747, "grad_norm": 1179.8787841796875, "learning_rate": 3.236e-05, "loss": 108.0792, "step": 16180 }, { "epoch": 0.06540964863019509, "grad_norm": 1053.493896484375, "learning_rate": 3.238e-05, "loss": 162.0999, "step": 16190 }, { "epoch": 0.06545004989556273, "grad_norm": 1169.240478515625, "learning_rate": 3.24e-05, "loss": 117.8543, "step": 16200 }, { "epoch": 0.06549045116093036, "grad_norm": 1077.4576416015625, "learning_rate": 3.242e-05, "loss": 161.1472, "step": 16210 }, { "epoch": 0.065530852426298, "grad_norm": 827.2643432617188, "learning_rate": 3.244e-05, "loss": 122.5282, "step": 16220 }, { "epoch": 0.06557125369166562, "grad_norm": 1683.487060546875, "learning_rate": 3.2460000000000004e-05, "loss": 163.201, "step": 16230 }, { "epoch": 0.06561165495703325, "grad_norm": 1506.306884765625, "learning_rate": 3.248e-05, "loss": 123.3369, "step": 16240 }, { "epoch": 0.06565205622240089, "grad_norm": 410.9195861816406, "learning_rate": 3.2500000000000004e-05, "loss": 102.7201, "step": 16250 }, { "epoch": 0.06569245748776852, "grad_norm": 484.0295104980469, "learning_rate": 3.252e-05, "loss": 191.6438, "step": 16260 }, { "epoch": 0.06573285875313614, "grad_norm": 649.029541015625, "learning_rate": 3.2540000000000004e-05, "loss": 139.1572, "step": 16270 }, { "epoch": 0.06577326001850378, "grad_norm": 2727.947509765625, "learning_rate": 3.256e-05, "loss": 198.9751, "step": 16280 }, { "epoch": 0.06581366128387141, "grad_norm": 879.7364501953125, "learning_rate": 3.2579999999999996e-05, "loss": 119.9532, "step": 16290 }, { "epoch": 0.06585406254923905, "grad_norm": 2567.528564453125, "learning_rate": 3.26e-05, "loss": 182.2825, "step": 16300 }, { "epoch": 0.06589446381460667, "grad_norm": 1746.7139892578125, "learning_rate": 3.262e-05, "loss": 109.7434, "step": 16310 }, { "epoch": 0.0659348650799743, "grad_norm": 1511.91845703125, "learning_rate": 3.2640000000000006e-05, "loss": 117.7299, "step": 16320 }, { "epoch": 0.06597526634534194, "grad_norm": 626.0679321289062, "learning_rate": 3.266e-05, "loss": 154.5593, "step": 16330 }, { "epoch": 0.06601566761070957, "grad_norm": 1950.873779296875, "learning_rate": 3.268e-05, "loss": 134.659, "step": 16340 }, { "epoch": 0.06605606887607719, "grad_norm": 2815.42919921875, "learning_rate": 3.27e-05, "loss": 189.5417, "step": 16350 }, { "epoch": 0.06609647014144483, "grad_norm": 791.9197387695312, "learning_rate": 3.272e-05, "loss": 104.6571, "step": 16360 }, { "epoch": 0.06613687140681246, "grad_norm": 961.1111450195312, "learning_rate": 3.274e-05, "loss": 110.3847, "step": 16370 }, { "epoch": 0.0661772726721801, "grad_norm": 1224.700927734375, "learning_rate": 3.2760000000000005e-05, "loss": 129.7902, "step": 16380 }, { "epoch": 0.06621767393754772, "grad_norm": 892.2734985351562, "learning_rate": 3.278e-05, "loss": 131.0276, "step": 16390 }, { "epoch": 0.06625807520291535, "grad_norm": 2211.9521484375, "learning_rate": 3.2800000000000004e-05, "loss": 102.1123, "step": 16400 }, { "epoch": 0.06629847646828299, "grad_norm": 2223.035888671875, "learning_rate": 3.282e-05, "loss": 130.2351, "step": 16410 }, { "epoch": 0.06633887773365062, "grad_norm": 896.9553833007812, "learning_rate": 3.2840000000000004e-05, "loss": 113.7288, "step": 16420 }, { "epoch": 0.06637927899901824, "grad_norm": 0.0, "learning_rate": 3.286e-05, "loss": 70.063, "step": 16430 }, { "epoch": 0.06641968026438588, "grad_norm": 665.3837890625, "learning_rate": 3.288e-05, "loss": 102.8864, "step": 16440 }, { "epoch": 0.06646008152975351, "grad_norm": 529.5450439453125, "learning_rate": 3.29e-05, "loss": 107.0171, "step": 16450 }, { "epoch": 0.06650048279512115, "grad_norm": 445.30950927734375, "learning_rate": 3.292e-05, "loss": 118.4387, "step": 16460 }, { "epoch": 0.06654088406048878, "grad_norm": 1200.6650390625, "learning_rate": 3.2940000000000006e-05, "loss": 115.6011, "step": 16470 }, { "epoch": 0.0665812853258564, "grad_norm": 997.8995971679688, "learning_rate": 3.296e-05, "loss": 101.1121, "step": 16480 }, { "epoch": 0.06662168659122404, "grad_norm": 1181.9188232421875, "learning_rate": 3.298e-05, "loss": 94.8608, "step": 16490 }, { "epoch": 0.06666208785659167, "grad_norm": 816.2405395507812, "learning_rate": 3.3e-05, "loss": 125.1284, "step": 16500 }, { "epoch": 0.0667024891219593, "grad_norm": 703.4663696289062, "learning_rate": 3.302e-05, "loss": 123.1175, "step": 16510 }, { "epoch": 0.06674289038732693, "grad_norm": 671.1204833984375, "learning_rate": 3.304e-05, "loss": 133.0177, "step": 16520 }, { "epoch": 0.06678329165269456, "grad_norm": 635.96728515625, "learning_rate": 3.3060000000000005e-05, "loss": 117.4494, "step": 16530 }, { "epoch": 0.0668236929180622, "grad_norm": 0.0, "learning_rate": 3.308e-05, "loss": 84.0806, "step": 16540 }, { "epoch": 0.06686409418342983, "grad_norm": 1110.686279296875, "learning_rate": 3.3100000000000005e-05, "loss": 149.0371, "step": 16550 }, { "epoch": 0.06690449544879745, "grad_norm": 1048.002197265625, "learning_rate": 3.312e-05, "loss": 118.8765, "step": 16560 }, { "epoch": 0.06694489671416509, "grad_norm": 532.6653442382812, "learning_rate": 3.314e-05, "loss": 115.488, "step": 16570 }, { "epoch": 0.06698529797953272, "grad_norm": 1357.1689453125, "learning_rate": 3.316e-05, "loss": 121.7834, "step": 16580 }, { "epoch": 0.06702569924490034, "grad_norm": 3220.8486328125, "learning_rate": 3.318e-05, "loss": 146.6197, "step": 16590 }, { "epoch": 0.06706610051026798, "grad_norm": 809.4137573242188, "learning_rate": 3.32e-05, "loss": 157.1161, "step": 16600 }, { "epoch": 0.06710650177563561, "grad_norm": 479.6476135253906, "learning_rate": 3.3220000000000004e-05, "loss": 123.665, "step": 16610 }, { "epoch": 0.06714690304100325, "grad_norm": 1147.013916015625, "learning_rate": 3.324e-05, "loss": 151.3696, "step": 16620 }, { "epoch": 0.06718730430637088, "grad_norm": 1325.5657958984375, "learning_rate": 3.3260000000000003e-05, "loss": 228.0247, "step": 16630 }, { "epoch": 0.0672277055717385, "grad_norm": 1421.17578125, "learning_rate": 3.328e-05, "loss": 174.1318, "step": 16640 }, { "epoch": 0.06726810683710614, "grad_norm": 872.6542358398438, "learning_rate": 3.33e-05, "loss": 118.2037, "step": 16650 }, { "epoch": 0.06730850810247377, "grad_norm": 936.040283203125, "learning_rate": 3.332e-05, "loss": 113.6235, "step": 16660 }, { "epoch": 0.0673489093678414, "grad_norm": 829.9392700195312, "learning_rate": 3.3339999999999996e-05, "loss": 124.7012, "step": 16670 }, { "epoch": 0.06738931063320903, "grad_norm": 829.8602294921875, "learning_rate": 3.336e-05, "loss": 122.1225, "step": 16680 }, { "epoch": 0.06742971189857666, "grad_norm": 591.7499389648438, "learning_rate": 3.338e-05, "loss": 117.5648, "step": 16690 }, { "epoch": 0.0674701131639443, "grad_norm": 585.739013671875, "learning_rate": 3.3400000000000005e-05, "loss": 120.4557, "step": 16700 }, { "epoch": 0.06751051442931193, "grad_norm": 1151.9459228515625, "learning_rate": 3.342e-05, "loss": 122.6646, "step": 16710 }, { "epoch": 0.06755091569467955, "grad_norm": 1100.1302490234375, "learning_rate": 3.344e-05, "loss": 124.8112, "step": 16720 }, { "epoch": 0.06759131696004719, "grad_norm": 1049.39501953125, "learning_rate": 3.346e-05, "loss": 131.1703, "step": 16730 }, { "epoch": 0.06763171822541482, "grad_norm": 976.2811889648438, "learning_rate": 3.348e-05, "loss": 104.992, "step": 16740 }, { "epoch": 0.06767211949078245, "grad_norm": 1059.683837890625, "learning_rate": 3.35e-05, "loss": 109.1284, "step": 16750 }, { "epoch": 0.06771252075615009, "grad_norm": 1005.8837280273438, "learning_rate": 3.3520000000000004e-05, "loss": 107.3349, "step": 16760 }, { "epoch": 0.06775292202151771, "grad_norm": 850.0352783203125, "learning_rate": 3.354e-05, "loss": 126.588, "step": 16770 }, { "epoch": 0.06779332328688535, "grad_norm": 1083.05712890625, "learning_rate": 3.3560000000000004e-05, "loss": 139.6427, "step": 16780 }, { "epoch": 0.06783372455225298, "grad_norm": 1554.07080078125, "learning_rate": 3.358e-05, "loss": 145.7689, "step": 16790 }, { "epoch": 0.0678741258176206, "grad_norm": 3450.517333984375, "learning_rate": 3.3600000000000004e-05, "loss": 140.9184, "step": 16800 }, { "epoch": 0.06791452708298824, "grad_norm": 1706.325439453125, "learning_rate": 3.362e-05, "loss": 199.2792, "step": 16810 }, { "epoch": 0.06795492834835587, "grad_norm": 1185.1922607421875, "learning_rate": 3.3639999999999996e-05, "loss": 140.3882, "step": 16820 }, { "epoch": 0.0679953296137235, "grad_norm": 1169.992919921875, "learning_rate": 3.366e-05, "loss": 123.5538, "step": 16830 }, { "epoch": 0.06803573087909114, "grad_norm": 1750.4180908203125, "learning_rate": 3.368e-05, "loss": 159.0448, "step": 16840 }, { "epoch": 0.06807613214445876, "grad_norm": 1771.9908447265625, "learning_rate": 3.3700000000000006e-05, "loss": 153.4333, "step": 16850 }, { "epoch": 0.0681165334098264, "grad_norm": 2022.061279296875, "learning_rate": 3.372e-05, "loss": 139.1972, "step": 16860 }, { "epoch": 0.06815693467519403, "grad_norm": 1393.0201416015625, "learning_rate": 3.374e-05, "loss": 152.1366, "step": 16870 }, { "epoch": 0.06819733594056165, "grad_norm": 2371.81494140625, "learning_rate": 3.376e-05, "loss": 142.447, "step": 16880 }, { "epoch": 0.0682377372059293, "grad_norm": 1049.164794921875, "learning_rate": 3.378e-05, "loss": 115.6586, "step": 16890 }, { "epoch": 0.06827813847129692, "grad_norm": 1043.5615234375, "learning_rate": 3.38e-05, "loss": 174.1537, "step": 16900 }, { "epoch": 0.06831853973666455, "grad_norm": 1559.866455078125, "learning_rate": 3.3820000000000005e-05, "loss": 108.5037, "step": 16910 }, { "epoch": 0.06835894100203219, "grad_norm": 2484.740478515625, "learning_rate": 3.384e-05, "loss": 184.837, "step": 16920 }, { "epoch": 0.06839934226739981, "grad_norm": 775.7838745117188, "learning_rate": 3.3860000000000004e-05, "loss": 126.9209, "step": 16930 }, { "epoch": 0.06843974353276745, "grad_norm": 471.7488708496094, "learning_rate": 3.388e-05, "loss": 135.7176, "step": 16940 }, { "epoch": 0.06848014479813508, "grad_norm": 969.9213256835938, "learning_rate": 3.3900000000000004e-05, "loss": 95.2846, "step": 16950 }, { "epoch": 0.0685205460635027, "grad_norm": 1296.1826171875, "learning_rate": 3.392e-05, "loss": 159.3641, "step": 16960 }, { "epoch": 0.06856094732887034, "grad_norm": 2220.822021484375, "learning_rate": 3.394e-05, "loss": 171.076, "step": 16970 }, { "epoch": 0.06860134859423797, "grad_norm": 764.9531860351562, "learning_rate": 3.396e-05, "loss": 139.6084, "step": 16980 }, { "epoch": 0.0686417498596056, "grad_norm": 2453.2900390625, "learning_rate": 3.398e-05, "loss": 162.632, "step": 16990 }, { "epoch": 0.06868215112497324, "grad_norm": 1016.1632690429688, "learning_rate": 3.4000000000000007e-05, "loss": 236.2789, "step": 17000 }, { "epoch": 0.06872255239034086, "grad_norm": 1268.531494140625, "learning_rate": 3.402e-05, "loss": 119.6548, "step": 17010 }, { "epoch": 0.0687629536557085, "grad_norm": 0.0, "learning_rate": 3.404e-05, "loss": 115.3437, "step": 17020 }, { "epoch": 0.06880335492107613, "grad_norm": 867.4444580078125, "learning_rate": 3.406e-05, "loss": 173.6102, "step": 17030 }, { "epoch": 0.06884375618644376, "grad_norm": 1306.5975341796875, "learning_rate": 3.408e-05, "loss": 147.4384, "step": 17040 }, { "epoch": 0.0688841574518114, "grad_norm": 701.0490112304688, "learning_rate": 3.41e-05, "loss": 138.4343, "step": 17050 }, { "epoch": 0.06892455871717902, "grad_norm": 1092.9249267578125, "learning_rate": 3.412e-05, "loss": 112.9486, "step": 17060 }, { "epoch": 0.06896495998254665, "grad_norm": 1403.239990234375, "learning_rate": 3.414e-05, "loss": 106.8123, "step": 17070 }, { "epoch": 0.06900536124791429, "grad_norm": 2368.7109375, "learning_rate": 3.4160000000000005e-05, "loss": 90.9559, "step": 17080 }, { "epoch": 0.06904576251328191, "grad_norm": 723.0379028320312, "learning_rate": 3.418e-05, "loss": 92.075, "step": 17090 }, { "epoch": 0.06908616377864955, "grad_norm": 1454.345703125, "learning_rate": 3.4200000000000005e-05, "loss": 122.138, "step": 17100 }, { "epoch": 0.06912656504401718, "grad_norm": 849.4160766601562, "learning_rate": 3.422e-05, "loss": 132.3787, "step": 17110 }, { "epoch": 0.0691669663093848, "grad_norm": 1275.969482421875, "learning_rate": 3.424e-05, "loss": 80.5995, "step": 17120 }, { "epoch": 0.06920736757475245, "grad_norm": 977.9779052734375, "learning_rate": 3.426e-05, "loss": 126.0984, "step": 17130 }, { "epoch": 0.06924776884012007, "grad_norm": 1691.71826171875, "learning_rate": 3.4280000000000004e-05, "loss": 123.841, "step": 17140 }, { "epoch": 0.0692881701054877, "grad_norm": 3872.140869140625, "learning_rate": 3.430000000000001e-05, "loss": 148.4945, "step": 17150 }, { "epoch": 0.06932857137085534, "grad_norm": 3100.976318359375, "learning_rate": 3.4320000000000003e-05, "loss": 117.4309, "step": 17160 }, { "epoch": 0.06936897263622296, "grad_norm": 1637.591552734375, "learning_rate": 3.434e-05, "loss": 119.0649, "step": 17170 }, { "epoch": 0.0694093739015906, "grad_norm": 8878.0693359375, "learning_rate": 3.436e-05, "loss": 140.0768, "step": 17180 }, { "epoch": 0.06944977516695823, "grad_norm": 959.3743286132812, "learning_rate": 3.438e-05, "loss": 172.569, "step": 17190 }, { "epoch": 0.06949017643232586, "grad_norm": 520.9686889648438, "learning_rate": 3.4399999999999996e-05, "loss": 93.1335, "step": 17200 }, { "epoch": 0.0695305776976935, "grad_norm": 1169.4296875, "learning_rate": 3.442e-05, "loss": 109.4803, "step": 17210 }, { "epoch": 0.06957097896306112, "grad_norm": 2471.82568359375, "learning_rate": 3.444e-05, "loss": 149.8675, "step": 17220 }, { "epoch": 0.06961138022842875, "grad_norm": 1193.904541015625, "learning_rate": 3.4460000000000005e-05, "loss": 137.5065, "step": 17230 }, { "epoch": 0.06965178149379639, "grad_norm": 796.9132080078125, "learning_rate": 3.448e-05, "loss": 89.8475, "step": 17240 }, { "epoch": 0.06969218275916401, "grad_norm": 688.8414916992188, "learning_rate": 3.45e-05, "loss": 125.9576, "step": 17250 }, { "epoch": 0.06973258402453165, "grad_norm": 480.16009521484375, "learning_rate": 3.452e-05, "loss": 78.7464, "step": 17260 }, { "epoch": 0.06977298528989928, "grad_norm": 783.6867065429688, "learning_rate": 3.454e-05, "loss": 108.8815, "step": 17270 }, { "epoch": 0.06981338655526691, "grad_norm": 0.0, "learning_rate": 3.456e-05, "loss": 116.7318, "step": 17280 }, { "epoch": 0.06985378782063455, "grad_norm": 972.9898681640625, "learning_rate": 3.4580000000000004e-05, "loss": 163.7325, "step": 17290 }, { "epoch": 0.06989418908600217, "grad_norm": 2305.32470703125, "learning_rate": 3.46e-05, "loss": 102.6796, "step": 17300 }, { "epoch": 0.0699345903513698, "grad_norm": 985.5175170898438, "learning_rate": 3.4620000000000004e-05, "loss": 133.6453, "step": 17310 }, { "epoch": 0.06997499161673744, "grad_norm": 966.200439453125, "learning_rate": 3.464e-05, "loss": 133.9981, "step": 17320 }, { "epoch": 0.07001539288210507, "grad_norm": 582.17041015625, "learning_rate": 3.4660000000000004e-05, "loss": 119.5202, "step": 17330 }, { "epoch": 0.0700557941474727, "grad_norm": 841.1046752929688, "learning_rate": 3.468e-05, "loss": 133.1583, "step": 17340 }, { "epoch": 0.07009619541284033, "grad_norm": 675.8413696289062, "learning_rate": 3.4699999999999996e-05, "loss": 139.8855, "step": 17350 }, { "epoch": 0.07013659667820796, "grad_norm": 957.8898315429688, "learning_rate": 3.472e-05, "loss": 137.3781, "step": 17360 }, { "epoch": 0.0701769979435756, "grad_norm": 3106.9208984375, "learning_rate": 3.474e-05, "loss": 167.9274, "step": 17370 }, { "epoch": 0.07021739920894322, "grad_norm": 1493.3551025390625, "learning_rate": 3.4760000000000006e-05, "loss": 163.2431, "step": 17380 }, { "epoch": 0.07025780047431085, "grad_norm": 993.3938598632812, "learning_rate": 3.478e-05, "loss": 114.7312, "step": 17390 }, { "epoch": 0.07029820173967849, "grad_norm": 1034.116943359375, "learning_rate": 3.48e-05, "loss": 70.717, "step": 17400 }, { "epoch": 0.07033860300504612, "grad_norm": 921.6234741210938, "learning_rate": 3.482e-05, "loss": 185.619, "step": 17410 }, { "epoch": 0.07037900427041376, "grad_norm": 1703.0343017578125, "learning_rate": 3.484e-05, "loss": 142.7865, "step": 17420 }, { "epoch": 0.07041940553578138, "grad_norm": 1368.336181640625, "learning_rate": 3.486e-05, "loss": 116.2519, "step": 17430 }, { "epoch": 0.07045980680114901, "grad_norm": 1559.697265625, "learning_rate": 3.4880000000000005e-05, "loss": 110.2219, "step": 17440 }, { "epoch": 0.07050020806651665, "grad_norm": 3143.30517578125, "learning_rate": 3.49e-05, "loss": 91.7808, "step": 17450 }, { "epoch": 0.07054060933188427, "grad_norm": 1277.235595703125, "learning_rate": 3.4920000000000004e-05, "loss": 143.4617, "step": 17460 }, { "epoch": 0.0705810105972519, "grad_norm": 2107.630615234375, "learning_rate": 3.494e-05, "loss": 141.62, "step": 17470 }, { "epoch": 0.07062141186261954, "grad_norm": 916.9269409179688, "learning_rate": 3.4960000000000004e-05, "loss": 124.4125, "step": 17480 }, { "epoch": 0.07066181312798717, "grad_norm": 495.5371398925781, "learning_rate": 3.498e-05, "loss": 113.1787, "step": 17490 }, { "epoch": 0.0707022143933548, "grad_norm": 722.7784423828125, "learning_rate": 3.5e-05, "loss": 113.6147, "step": 17500 }, { "epoch": 0.07074261565872243, "grad_norm": 8005.68017578125, "learning_rate": 3.502e-05, "loss": 160.4929, "step": 17510 }, { "epoch": 0.07078301692409006, "grad_norm": 836.4596557617188, "learning_rate": 3.504e-05, "loss": 93.0578, "step": 17520 }, { "epoch": 0.0708234181894577, "grad_norm": 1631.076171875, "learning_rate": 3.5060000000000007e-05, "loss": 126.6171, "step": 17530 }, { "epoch": 0.07086381945482532, "grad_norm": 639.418212890625, "learning_rate": 3.508e-05, "loss": 105.7042, "step": 17540 }, { "epoch": 0.07090422072019295, "grad_norm": 1241.2578125, "learning_rate": 3.51e-05, "loss": 201.5257, "step": 17550 }, { "epoch": 0.07094462198556059, "grad_norm": 835.3104248046875, "learning_rate": 3.512e-05, "loss": 159.556, "step": 17560 }, { "epoch": 0.07098502325092822, "grad_norm": 565.4751586914062, "learning_rate": 3.514e-05, "loss": 93.872, "step": 17570 }, { "epoch": 0.07102542451629586, "grad_norm": 599.105712890625, "learning_rate": 3.516e-05, "loss": 132.7931, "step": 17580 }, { "epoch": 0.07106582578166348, "grad_norm": 1342.3175048828125, "learning_rate": 3.518e-05, "loss": 121.4404, "step": 17590 }, { "epoch": 0.07110622704703111, "grad_norm": 853.9281616210938, "learning_rate": 3.52e-05, "loss": 185.0158, "step": 17600 }, { "epoch": 0.07114662831239875, "grad_norm": 1526.38427734375, "learning_rate": 3.5220000000000005e-05, "loss": 130.5014, "step": 17610 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 3.524e-05, "loss": 155.2073, "step": 17620 }, { "epoch": 0.071227430843134, "grad_norm": 722.3214111328125, "learning_rate": 3.5260000000000005e-05, "loss": 98.7197, "step": 17630 }, { "epoch": 0.07126783210850164, "grad_norm": 769.6561889648438, "learning_rate": 3.528e-05, "loss": 98.4344, "step": 17640 }, { "epoch": 0.07130823337386927, "grad_norm": 1916.5062255859375, "learning_rate": 3.53e-05, "loss": 169.392, "step": 17650 }, { "epoch": 0.07134863463923691, "grad_norm": 790.8156127929688, "learning_rate": 3.532e-05, "loss": 112.2699, "step": 17660 }, { "epoch": 0.07138903590460453, "grad_norm": 0.0, "learning_rate": 3.5340000000000004e-05, "loss": 153.1781, "step": 17670 }, { "epoch": 0.07142943716997216, "grad_norm": 1370.72216796875, "learning_rate": 3.536000000000001e-05, "loss": 170.2639, "step": 17680 }, { "epoch": 0.0714698384353398, "grad_norm": 466.6986999511719, "learning_rate": 3.5380000000000003e-05, "loss": 140.3446, "step": 17690 }, { "epoch": 0.07151023970070743, "grad_norm": 1512.586181640625, "learning_rate": 3.54e-05, "loss": 123.2379, "step": 17700 }, { "epoch": 0.07155064096607505, "grad_norm": 1187.423828125, "learning_rate": 3.542e-05, "loss": 121.9325, "step": 17710 }, { "epoch": 0.07159104223144269, "grad_norm": 1491.8526611328125, "learning_rate": 3.544e-05, "loss": 97.88, "step": 17720 }, { "epoch": 0.07163144349681032, "grad_norm": 819.60400390625, "learning_rate": 3.546e-05, "loss": 136.5916, "step": 17730 }, { "epoch": 0.07167184476217796, "grad_norm": 650.4725952148438, "learning_rate": 3.548e-05, "loss": 100.4026, "step": 17740 }, { "epoch": 0.07171224602754558, "grad_norm": 822.773681640625, "learning_rate": 3.55e-05, "loss": 103.9474, "step": 17750 }, { "epoch": 0.07175264729291321, "grad_norm": 879.6776123046875, "learning_rate": 3.5520000000000006e-05, "loss": 90.2404, "step": 17760 }, { "epoch": 0.07179304855828085, "grad_norm": 1487.169921875, "learning_rate": 3.554e-05, "loss": 172.1061, "step": 17770 }, { "epoch": 0.07183344982364848, "grad_norm": 953.589111328125, "learning_rate": 3.5560000000000005e-05, "loss": 150.0098, "step": 17780 }, { "epoch": 0.0718738510890161, "grad_norm": 688.1337890625, "learning_rate": 3.558e-05, "loss": 78.3569, "step": 17790 }, { "epoch": 0.07191425235438374, "grad_norm": 789.90283203125, "learning_rate": 3.56e-05, "loss": 106.5888, "step": 17800 }, { "epoch": 0.07195465361975137, "grad_norm": 934.8272094726562, "learning_rate": 3.562e-05, "loss": 111.0842, "step": 17810 }, { "epoch": 0.07199505488511901, "grad_norm": 336.4629211425781, "learning_rate": 3.5640000000000004e-05, "loss": 99.3697, "step": 17820 }, { "epoch": 0.07203545615048663, "grad_norm": 3090.107666015625, "learning_rate": 3.566e-05, "loss": 97.4285, "step": 17830 }, { "epoch": 0.07207585741585426, "grad_norm": 1428.7044677734375, "learning_rate": 3.5680000000000004e-05, "loss": 118.1064, "step": 17840 }, { "epoch": 0.0721162586812219, "grad_norm": 2355.272705078125, "learning_rate": 3.57e-05, "loss": 151.7737, "step": 17850 }, { "epoch": 0.07215665994658953, "grad_norm": 1052.816650390625, "learning_rate": 3.5720000000000004e-05, "loss": 86.3124, "step": 17860 }, { "epoch": 0.07219706121195715, "grad_norm": 683.0614624023438, "learning_rate": 3.574e-05, "loss": 120.8411, "step": 17870 }, { "epoch": 0.0722374624773248, "grad_norm": 1161.9776611328125, "learning_rate": 3.5759999999999996e-05, "loss": 145.4579, "step": 17880 }, { "epoch": 0.07227786374269242, "grad_norm": 1213.32861328125, "learning_rate": 3.578e-05, "loss": 160.1318, "step": 17890 }, { "epoch": 0.07231826500806006, "grad_norm": 1079.72119140625, "learning_rate": 3.58e-05, "loss": 125.1568, "step": 17900 }, { "epoch": 0.07235866627342769, "grad_norm": 960.3634643554688, "learning_rate": 3.5820000000000006e-05, "loss": 116.5516, "step": 17910 }, { "epoch": 0.07239906753879531, "grad_norm": 1030.158447265625, "learning_rate": 3.584e-05, "loss": 78.2271, "step": 17920 }, { "epoch": 0.07243946880416295, "grad_norm": 812.413818359375, "learning_rate": 3.586e-05, "loss": 122.26, "step": 17930 }, { "epoch": 0.07247987006953058, "grad_norm": 2242.89794921875, "learning_rate": 3.588e-05, "loss": 150.4362, "step": 17940 }, { "epoch": 0.0725202713348982, "grad_norm": 1707.64306640625, "learning_rate": 3.59e-05, "loss": 107.4575, "step": 17950 }, { "epoch": 0.07256067260026584, "grad_norm": 1091.1968994140625, "learning_rate": 3.592e-05, "loss": 176.9033, "step": 17960 }, { "epoch": 0.07260107386563347, "grad_norm": 660.7951049804688, "learning_rate": 3.594e-05, "loss": 120.4594, "step": 17970 }, { "epoch": 0.07264147513100111, "grad_norm": 1081.6932373046875, "learning_rate": 3.596e-05, "loss": 101.9581, "step": 17980 }, { "epoch": 0.07268187639636874, "grad_norm": 1247.1197509765625, "learning_rate": 3.5980000000000004e-05, "loss": 90.3047, "step": 17990 }, { "epoch": 0.07272227766173636, "grad_norm": 2352.89208984375, "learning_rate": 3.6e-05, "loss": 121.5598, "step": 18000 }, { "epoch": 0.072762678927104, "grad_norm": 774.28466796875, "learning_rate": 3.6020000000000004e-05, "loss": 126.1789, "step": 18010 }, { "epoch": 0.07280308019247163, "grad_norm": 821.19384765625, "learning_rate": 3.604e-05, "loss": 100.4537, "step": 18020 }, { "epoch": 0.07284348145783925, "grad_norm": 919.6599731445312, "learning_rate": 3.606e-05, "loss": 85.4199, "step": 18030 }, { "epoch": 0.0728838827232069, "grad_norm": 805.6433715820312, "learning_rate": 3.608e-05, "loss": 76.5743, "step": 18040 }, { "epoch": 0.07292428398857452, "grad_norm": 1283.3468017578125, "learning_rate": 3.61e-05, "loss": 128.92, "step": 18050 }, { "epoch": 0.07296468525394216, "grad_norm": 2695.972900390625, "learning_rate": 3.6120000000000007e-05, "loss": 131.2778, "step": 18060 }, { "epoch": 0.07300508651930979, "grad_norm": 2971.26904296875, "learning_rate": 3.614e-05, "loss": 103.6507, "step": 18070 }, { "epoch": 0.07304548778467741, "grad_norm": 1631.7698974609375, "learning_rate": 3.616e-05, "loss": 101.8744, "step": 18080 }, { "epoch": 0.07308588905004505, "grad_norm": 707.8176879882812, "learning_rate": 3.618e-05, "loss": 106.0584, "step": 18090 }, { "epoch": 0.07312629031541268, "grad_norm": 497.0185546875, "learning_rate": 3.62e-05, "loss": 74.8409, "step": 18100 }, { "epoch": 0.0731666915807803, "grad_norm": 1669.726806640625, "learning_rate": 3.622e-05, "loss": 105.9762, "step": 18110 }, { "epoch": 0.07320709284614794, "grad_norm": 557.0554809570312, "learning_rate": 3.624e-05, "loss": 116.4344, "step": 18120 }, { "epoch": 0.07324749411151557, "grad_norm": 2284.02099609375, "learning_rate": 3.626e-05, "loss": 122.7274, "step": 18130 }, { "epoch": 0.07328789537688321, "grad_norm": 1483.209228515625, "learning_rate": 3.6280000000000005e-05, "loss": 100.2296, "step": 18140 }, { "epoch": 0.07332829664225084, "grad_norm": 2553.381591796875, "learning_rate": 3.63e-05, "loss": 134.3823, "step": 18150 }, { "epoch": 0.07336869790761846, "grad_norm": 465.40289306640625, "learning_rate": 3.6320000000000005e-05, "loss": 104.473, "step": 18160 }, { "epoch": 0.0734090991729861, "grad_norm": 2143.05419921875, "learning_rate": 3.634e-05, "loss": 133.1543, "step": 18170 }, { "epoch": 0.07344950043835373, "grad_norm": 854.1031494140625, "learning_rate": 3.636e-05, "loss": 139.8475, "step": 18180 }, { "epoch": 0.07348990170372136, "grad_norm": 812.4703979492188, "learning_rate": 3.638e-05, "loss": 124.2388, "step": 18190 }, { "epoch": 0.073530302969089, "grad_norm": 693.6560668945312, "learning_rate": 3.6400000000000004e-05, "loss": 125.4126, "step": 18200 }, { "epoch": 0.07357070423445662, "grad_norm": 1367.3717041015625, "learning_rate": 3.642000000000001e-05, "loss": 141.6495, "step": 18210 }, { "epoch": 0.07361110549982426, "grad_norm": 1077.287353515625, "learning_rate": 3.6440000000000003e-05, "loss": 130.6979, "step": 18220 }, { "epoch": 0.07365150676519189, "grad_norm": 2508.4072265625, "learning_rate": 3.646e-05, "loss": 125.5967, "step": 18230 }, { "epoch": 0.07369190803055951, "grad_norm": 2781.412841796875, "learning_rate": 3.648e-05, "loss": 149.0504, "step": 18240 }, { "epoch": 0.07373230929592715, "grad_norm": 1022.107177734375, "learning_rate": 3.65e-05, "loss": 128.4223, "step": 18250 }, { "epoch": 0.07377271056129478, "grad_norm": 700.0909423828125, "learning_rate": 3.652e-05, "loss": 95.3364, "step": 18260 }, { "epoch": 0.0738131118266624, "grad_norm": 992.6243896484375, "learning_rate": 3.654e-05, "loss": 130.7981, "step": 18270 }, { "epoch": 0.07385351309203005, "grad_norm": 1268.7449951171875, "learning_rate": 3.656e-05, "loss": 116.5302, "step": 18280 }, { "epoch": 0.07389391435739767, "grad_norm": 915.7352905273438, "learning_rate": 3.6580000000000006e-05, "loss": 141.5618, "step": 18290 }, { "epoch": 0.0739343156227653, "grad_norm": 3412.534912109375, "learning_rate": 3.66e-05, "loss": 188.1361, "step": 18300 }, { "epoch": 0.07397471688813294, "grad_norm": 1475.6259765625, "learning_rate": 3.6620000000000005e-05, "loss": 125.9999, "step": 18310 }, { "epoch": 0.07401511815350056, "grad_norm": 838.94140625, "learning_rate": 3.664e-05, "loss": 142.8819, "step": 18320 }, { "epoch": 0.0740555194188682, "grad_norm": 2300.423583984375, "learning_rate": 3.666e-05, "loss": 168.7538, "step": 18330 }, { "epoch": 0.07409592068423583, "grad_norm": 2023.9940185546875, "learning_rate": 3.668e-05, "loss": 126.4141, "step": 18340 }, { "epoch": 0.07413632194960346, "grad_norm": 884.4683227539062, "learning_rate": 3.6700000000000004e-05, "loss": 64.7707, "step": 18350 }, { "epoch": 0.0741767232149711, "grad_norm": 598.0726928710938, "learning_rate": 3.672000000000001e-05, "loss": 90.2243, "step": 18360 }, { "epoch": 0.07421712448033872, "grad_norm": 852.4734497070312, "learning_rate": 3.6740000000000004e-05, "loss": 193.19, "step": 18370 }, { "epoch": 0.07425752574570635, "grad_norm": 926.2811889648438, "learning_rate": 3.676e-05, "loss": 113.1817, "step": 18380 }, { "epoch": 0.07429792701107399, "grad_norm": 840.8629760742188, "learning_rate": 3.6780000000000004e-05, "loss": 128.7378, "step": 18390 }, { "epoch": 0.07433832827644161, "grad_norm": 1284.182373046875, "learning_rate": 3.68e-05, "loss": 119.9407, "step": 18400 }, { "epoch": 0.07437872954180925, "grad_norm": 3427.597900390625, "learning_rate": 3.682e-05, "loss": 168.6104, "step": 18410 }, { "epoch": 0.07441913080717688, "grad_norm": 1045.6478271484375, "learning_rate": 3.684e-05, "loss": 92.3087, "step": 18420 }, { "epoch": 0.07445953207254451, "grad_norm": 1453.0216064453125, "learning_rate": 3.686e-05, "loss": 128.912, "step": 18430 }, { "epoch": 0.07449993333791215, "grad_norm": 1257.60986328125, "learning_rate": 3.6880000000000006e-05, "loss": 67.0141, "step": 18440 }, { "epoch": 0.07454033460327977, "grad_norm": 900.5206298828125, "learning_rate": 3.69e-05, "loss": 94.6233, "step": 18450 }, { "epoch": 0.0745807358686474, "grad_norm": 769.1461181640625, "learning_rate": 3.692e-05, "loss": 128.7582, "step": 18460 }, { "epoch": 0.07462113713401504, "grad_norm": 1010.8888549804688, "learning_rate": 3.694e-05, "loss": 115.287, "step": 18470 }, { "epoch": 0.07466153839938267, "grad_norm": 974.634765625, "learning_rate": 3.696e-05, "loss": 97.2538, "step": 18480 }, { "epoch": 0.0747019396647503, "grad_norm": 998.1504516601562, "learning_rate": 3.698e-05, "loss": 123.2764, "step": 18490 }, { "epoch": 0.07474234093011793, "grad_norm": 1043.8447265625, "learning_rate": 3.7e-05, "loss": 105.7509, "step": 18500 }, { "epoch": 0.07478274219548556, "grad_norm": 551.1446533203125, "learning_rate": 3.702e-05, "loss": 92.3631, "step": 18510 }, { "epoch": 0.0748231434608532, "grad_norm": 677.5746459960938, "learning_rate": 3.7040000000000005e-05, "loss": 92.7502, "step": 18520 }, { "epoch": 0.07486354472622082, "grad_norm": 935.2184448242188, "learning_rate": 3.706e-05, "loss": 94.4942, "step": 18530 }, { "epoch": 0.07490394599158845, "grad_norm": 4195.58837890625, "learning_rate": 3.7080000000000004e-05, "loss": 180.4082, "step": 18540 }, { "epoch": 0.07494434725695609, "grad_norm": 553.2954711914062, "learning_rate": 3.71e-05, "loss": 136.8525, "step": 18550 }, { "epoch": 0.07498474852232372, "grad_norm": 1196.5914306640625, "learning_rate": 3.712e-05, "loss": 87.8535, "step": 18560 }, { "epoch": 0.07502514978769136, "grad_norm": 925.3016967773438, "learning_rate": 3.714e-05, "loss": 99.2028, "step": 18570 }, { "epoch": 0.07506555105305898, "grad_norm": 743.458984375, "learning_rate": 3.716e-05, "loss": 115.926, "step": 18580 }, { "epoch": 0.07510595231842661, "grad_norm": 452.0498962402344, "learning_rate": 3.7180000000000007e-05, "loss": 72.2173, "step": 18590 }, { "epoch": 0.07514635358379425, "grad_norm": 1277.8033447265625, "learning_rate": 3.72e-05, "loss": 196.1923, "step": 18600 }, { "epoch": 0.07518675484916187, "grad_norm": 613.0692138671875, "learning_rate": 3.722e-05, "loss": 165.0734, "step": 18610 }, { "epoch": 0.0752271561145295, "grad_norm": 707.7522583007812, "learning_rate": 3.724e-05, "loss": 146.3568, "step": 18620 }, { "epoch": 0.07526755737989714, "grad_norm": 892.3648681640625, "learning_rate": 3.726e-05, "loss": 119.203, "step": 18630 }, { "epoch": 0.07530795864526477, "grad_norm": 763.5734252929688, "learning_rate": 3.728e-05, "loss": 154.0482, "step": 18640 }, { "epoch": 0.0753483599106324, "grad_norm": 779.2640991210938, "learning_rate": 3.73e-05, "loss": 73.6996, "step": 18650 }, { "epoch": 0.07538876117600003, "grad_norm": 2393.424560546875, "learning_rate": 3.732e-05, "loss": 129.6301, "step": 18660 }, { "epoch": 0.07542916244136766, "grad_norm": 2221.367919921875, "learning_rate": 3.7340000000000005e-05, "loss": 132.7365, "step": 18670 }, { "epoch": 0.0754695637067353, "grad_norm": 1009.402587890625, "learning_rate": 3.736e-05, "loss": 110.5086, "step": 18680 }, { "epoch": 0.07550996497210292, "grad_norm": 5788.82275390625, "learning_rate": 3.7380000000000005e-05, "loss": 93.0837, "step": 18690 }, { "epoch": 0.07555036623747055, "grad_norm": 989.4839477539062, "learning_rate": 3.74e-05, "loss": 120.5923, "step": 18700 }, { "epoch": 0.07559076750283819, "grad_norm": 1187.691650390625, "learning_rate": 3.742e-05, "loss": 191.3571, "step": 18710 }, { "epoch": 0.07563116876820582, "grad_norm": 1042.4420166015625, "learning_rate": 3.744e-05, "loss": 123.3321, "step": 18720 }, { "epoch": 0.07567157003357346, "grad_norm": 593.2315063476562, "learning_rate": 3.7460000000000004e-05, "loss": 124.6118, "step": 18730 }, { "epoch": 0.07571197129894108, "grad_norm": 1148.8316650390625, "learning_rate": 3.748000000000001e-05, "loss": 129.3611, "step": 18740 }, { "epoch": 0.07575237256430871, "grad_norm": 721.3247680664062, "learning_rate": 3.7500000000000003e-05, "loss": 115.121, "step": 18750 }, { "epoch": 0.07579277382967635, "grad_norm": 1153.5718994140625, "learning_rate": 3.752e-05, "loss": 96.7754, "step": 18760 }, { "epoch": 0.07583317509504398, "grad_norm": 559.9420166015625, "learning_rate": 3.754e-05, "loss": 117.4526, "step": 18770 }, { "epoch": 0.0758735763604116, "grad_norm": 1740.1900634765625, "learning_rate": 3.756e-05, "loss": 103.682, "step": 18780 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 3.758e-05, "loss": 74.3376, "step": 18790 }, { "epoch": 0.07595437889114687, "grad_norm": 1305.6412353515625, "learning_rate": 3.76e-05, "loss": 114.3259, "step": 18800 }, { "epoch": 0.07599478015651451, "grad_norm": 1425.0380859375, "learning_rate": 3.762e-05, "loss": 171.961, "step": 18810 }, { "epoch": 0.07603518142188213, "grad_norm": 1266.49072265625, "learning_rate": 3.7640000000000006e-05, "loss": 166.5889, "step": 18820 }, { "epoch": 0.07607558268724976, "grad_norm": 1901.879638671875, "learning_rate": 3.766e-05, "loss": 132.9882, "step": 18830 }, { "epoch": 0.0761159839526174, "grad_norm": 690.49951171875, "learning_rate": 3.7680000000000005e-05, "loss": 84.5623, "step": 18840 }, { "epoch": 0.07615638521798503, "grad_norm": 650.4954833984375, "learning_rate": 3.77e-05, "loss": 135.0686, "step": 18850 }, { "epoch": 0.07619678648335265, "grad_norm": 2350.57373046875, "learning_rate": 3.772e-05, "loss": 133.9776, "step": 18860 }, { "epoch": 0.07623718774872029, "grad_norm": 1563.426513671875, "learning_rate": 3.774e-05, "loss": 101.9517, "step": 18870 }, { "epoch": 0.07627758901408792, "grad_norm": 797.4249267578125, "learning_rate": 3.776e-05, "loss": 120.9917, "step": 18880 }, { "epoch": 0.07631799027945556, "grad_norm": 1120.25732421875, "learning_rate": 3.778000000000001e-05, "loss": 115.9363, "step": 18890 }, { "epoch": 0.07635839154482318, "grad_norm": 1334.6461181640625, "learning_rate": 3.7800000000000004e-05, "loss": 147.4801, "step": 18900 }, { "epoch": 0.07639879281019081, "grad_norm": 841.9830322265625, "learning_rate": 3.782e-05, "loss": 127.5146, "step": 18910 }, { "epoch": 0.07643919407555845, "grad_norm": 951.7286987304688, "learning_rate": 3.7840000000000004e-05, "loss": 222.0843, "step": 18920 }, { "epoch": 0.07647959534092608, "grad_norm": 1344.3798828125, "learning_rate": 3.786e-05, "loss": 118.1746, "step": 18930 }, { "epoch": 0.0765199966062937, "grad_norm": 763.527587890625, "learning_rate": 3.788e-05, "loss": 74.0978, "step": 18940 }, { "epoch": 0.07656039787166134, "grad_norm": 800.6107177734375, "learning_rate": 3.79e-05, "loss": 105.7953, "step": 18950 }, { "epoch": 0.07660079913702897, "grad_norm": 1480.68505859375, "learning_rate": 3.792e-05, "loss": 127.3082, "step": 18960 }, { "epoch": 0.07664120040239661, "grad_norm": 334.6526184082031, "learning_rate": 3.7940000000000006e-05, "loss": 85.6625, "step": 18970 }, { "epoch": 0.07668160166776423, "grad_norm": 1548.9031982421875, "learning_rate": 3.796e-05, "loss": 131.035, "step": 18980 }, { "epoch": 0.07672200293313186, "grad_norm": 1739.5743408203125, "learning_rate": 3.7980000000000006e-05, "loss": 172.2465, "step": 18990 }, { "epoch": 0.0767624041984995, "grad_norm": 3091.291015625, "learning_rate": 3.8e-05, "loss": 160.8068, "step": 19000 }, { "epoch": 0.07680280546386713, "grad_norm": 1500.0257568359375, "learning_rate": 3.802e-05, "loss": 76.5043, "step": 19010 }, { "epoch": 0.07684320672923475, "grad_norm": 1217.782958984375, "learning_rate": 3.804e-05, "loss": 102.2421, "step": 19020 }, { "epoch": 0.0768836079946024, "grad_norm": 1387.4803466796875, "learning_rate": 3.806e-05, "loss": 94.5826, "step": 19030 }, { "epoch": 0.07692400925997002, "grad_norm": 1300.2745361328125, "learning_rate": 3.808e-05, "loss": 130.5162, "step": 19040 }, { "epoch": 0.07696441052533766, "grad_norm": 1954.7742919921875, "learning_rate": 3.8100000000000005e-05, "loss": 157.192, "step": 19050 }, { "epoch": 0.07700481179070529, "grad_norm": 2402.571533203125, "learning_rate": 3.812e-05, "loss": 181.6468, "step": 19060 }, { "epoch": 0.07704521305607291, "grad_norm": 1889.8359375, "learning_rate": 3.8140000000000004e-05, "loss": 112.0516, "step": 19070 }, { "epoch": 0.07708561432144055, "grad_norm": 834.8114624023438, "learning_rate": 3.816e-05, "loss": 104.1592, "step": 19080 }, { "epoch": 0.07712601558680818, "grad_norm": 1373.436279296875, "learning_rate": 3.818e-05, "loss": 121.8585, "step": 19090 }, { "epoch": 0.0771664168521758, "grad_norm": 1009.5123901367188, "learning_rate": 3.82e-05, "loss": 156.3473, "step": 19100 }, { "epoch": 0.07720681811754344, "grad_norm": 986.4491577148438, "learning_rate": 3.822e-05, "loss": 123.8396, "step": 19110 }, { "epoch": 0.07724721938291107, "grad_norm": 690.7348022460938, "learning_rate": 3.8240000000000007e-05, "loss": 81.2586, "step": 19120 }, { "epoch": 0.07728762064827871, "grad_norm": 603.4257202148438, "learning_rate": 3.826e-05, "loss": 148.1277, "step": 19130 }, { "epoch": 0.07732802191364634, "grad_norm": 610.265869140625, "learning_rate": 3.828e-05, "loss": 107.6773, "step": 19140 }, { "epoch": 0.07736842317901396, "grad_norm": 596.2022094726562, "learning_rate": 3.83e-05, "loss": 81.1043, "step": 19150 }, { "epoch": 0.0774088244443816, "grad_norm": 630.5447387695312, "learning_rate": 3.832e-05, "loss": 75.0851, "step": 19160 }, { "epoch": 0.07744922570974923, "grad_norm": 967.669921875, "learning_rate": 3.834e-05, "loss": 68.293, "step": 19170 }, { "epoch": 0.07748962697511685, "grad_norm": 7127.17236328125, "learning_rate": 3.836e-05, "loss": 117.3339, "step": 19180 }, { "epoch": 0.0775300282404845, "grad_norm": 978.7310180664062, "learning_rate": 3.838e-05, "loss": 169.6352, "step": 19190 }, { "epoch": 0.07757042950585212, "grad_norm": 1465.735595703125, "learning_rate": 3.8400000000000005e-05, "loss": 133.1634, "step": 19200 }, { "epoch": 0.07761083077121976, "grad_norm": 1282.474609375, "learning_rate": 3.842e-05, "loss": 142.1115, "step": 19210 }, { "epoch": 0.07765123203658739, "grad_norm": 576.253173828125, "learning_rate": 3.8440000000000005e-05, "loss": 167.8867, "step": 19220 }, { "epoch": 0.07769163330195501, "grad_norm": 911.6886596679688, "learning_rate": 3.846e-05, "loss": 106.331, "step": 19230 }, { "epoch": 0.07773203456732265, "grad_norm": 1108.574462890625, "learning_rate": 3.848e-05, "loss": 159.7625, "step": 19240 }, { "epoch": 0.07777243583269028, "grad_norm": 1084.5352783203125, "learning_rate": 3.85e-05, "loss": 102.5427, "step": 19250 }, { "epoch": 0.0778128370980579, "grad_norm": 2015.353515625, "learning_rate": 3.8520000000000004e-05, "loss": 152.4607, "step": 19260 }, { "epoch": 0.07785323836342554, "grad_norm": 2406.21484375, "learning_rate": 3.854000000000001e-05, "loss": 115.7677, "step": 19270 }, { "epoch": 0.07789363962879317, "grad_norm": 1180.568115234375, "learning_rate": 3.8560000000000004e-05, "loss": 87.3702, "step": 19280 }, { "epoch": 0.07793404089416081, "grad_norm": 546.580810546875, "learning_rate": 3.858e-05, "loss": 71.9833, "step": 19290 }, { "epoch": 0.07797444215952844, "grad_norm": 1000.6842651367188, "learning_rate": 3.86e-05, "loss": 127.7328, "step": 19300 }, { "epoch": 0.07801484342489606, "grad_norm": 717.3246459960938, "learning_rate": 3.862e-05, "loss": 95.6857, "step": 19310 }, { "epoch": 0.0780552446902637, "grad_norm": 4112.51220703125, "learning_rate": 3.864e-05, "loss": 199.442, "step": 19320 }, { "epoch": 0.07809564595563133, "grad_norm": 972.7607421875, "learning_rate": 3.866e-05, "loss": 168.3066, "step": 19330 }, { "epoch": 0.07813604722099896, "grad_norm": 1725.4652099609375, "learning_rate": 3.868e-05, "loss": 125.3838, "step": 19340 }, { "epoch": 0.0781764484863666, "grad_norm": 991.9950561523438, "learning_rate": 3.8700000000000006e-05, "loss": 110.0063, "step": 19350 }, { "epoch": 0.07821684975173422, "grad_norm": 570.45751953125, "learning_rate": 3.872e-05, "loss": 148.7821, "step": 19360 }, { "epoch": 0.07825725101710186, "grad_norm": 998.0093994140625, "learning_rate": 3.8740000000000005e-05, "loss": 106.5317, "step": 19370 }, { "epoch": 0.07829765228246949, "grad_norm": 2785.006103515625, "learning_rate": 3.876e-05, "loss": 174.5811, "step": 19380 }, { "epoch": 0.07833805354783711, "grad_norm": 1203.6185302734375, "learning_rate": 3.878e-05, "loss": 159.747, "step": 19390 }, { "epoch": 0.07837845481320475, "grad_norm": 1574.8548583984375, "learning_rate": 3.88e-05, "loss": 161.0951, "step": 19400 }, { "epoch": 0.07841885607857238, "grad_norm": 3679.686279296875, "learning_rate": 3.882e-05, "loss": 140.7081, "step": 19410 }, { "epoch": 0.07845925734394, "grad_norm": 3366.86669921875, "learning_rate": 3.884e-05, "loss": 129.2223, "step": 19420 }, { "epoch": 0.07849965860930765, "grad_norm": 979.6845703125, "learning_rate": 3.8860000000000004e-05, "loss": 123.001, "step": 19430 }, { "epoch": 0.07854005987467527, "grad_norm": 706.135498046875, "learning_rate": 3.888e-05, "loss": 107.5821, "step": 19440 }, { "epoch": 0.07858046114004291, "grad_norm": 3030.436279296875, "learning_rate": 3.8900000000000004e-05, "loss": 136.2499, "step": 19450 }, { "epoch": 0.07862086240541054, "grad_norm": 341.5601501464844, "learning_rate": 3.892e-05, "loss": 87.6859, "step": 19460 }, { "epoch": 0.07866126367077816, "grad_norm": 890.729248046875, "learning_rate": 3.894e-05, "loss": 130.7491, "step": 19470 }, { "epoch": 0.0787016649361458, "grad_norm": 2620.10400390625, "learning_rate": 3.896e-05, "loss": 100.2728, "step": 19480 }, { "epoch": 0.07874206620151343, "grad_norm": 2374.006103515625, "learning_rate": 3.898e-05, "loss": 102.7446, "step": 19490 }, { "epoch": 0.07878246746688106, "grad_norm": 661.5973510742188, "learning_rate": 3.9000000000000006e-05, "loss": 103.1776, "step": 19500 }, { "epoch": 0.0788228687322487, "grad_norm": 496.9710693359375, "learning_rate": 3.902e-05, "loss": 104.6827, "step": 19510 }, { "epoch": 0.07886326999761632, "grad_norm": 950.9691772460938, "learning_rate": 3.9040000000000006e-05, "loss": 104.4576, "step": 19520 }, { "epoch": 0.07890367126298396, "grad_norm": 1480.8165283203125, "learning_rate": 3.906e-05, "loss": 123.9244, "step": 19530 }, { "epoch": 0.07894407252835159, "grad_norm": 1668.803955078125, "learning_rate": 3.908e-05, "loss": 114.0788, "step": 19540 }, { "epoch": 0.07898447379371921, "grad_norm": 998.9266357421875, "learning_rate": 3.91e-05, "loss": 116.9195, "step": 19550 }, { "epoch": 0.07902487505908685, "grad_norm": 685.7608032226562, "learning_rate": 3.912e-05, "loss": 120.8969, "step": 19560 }, { "epoch": 0.07906527632445448, "grad_norm": 1155.7894287109375, "learning_rate": 3.914e-05, "loss": 121.0664, "step": 19570 }, { "epoch": 0.07910567758982211, "grad_norm": 959.234130859375, "learning_rate": 3.9160000000000005e-05, "loss": 96.3062, "step": 19580 }, { "epoch": 0.07914607885518975, "grad_norm": 1889.412109375, "learning_rate": 3.918e-05, "loss": 81.9331, "step": 19590 }, { "epoch": 0.07918648012055737, "grad_norm": 966.55078125, "learning_rate": 3.9200000000000004e-05, "loss": 164.067, "step": 19600 }, { "epoch": 0.07922688138592501, "grad_norm": 645.4921875, "learning_rate": 3.922e-05, "loss": 126.4012, "step": 19610 }, { "epoch": 0.07926728265129264, "grad_norm": 1248.7474365234375, "learning_rate": 3.9240000000000004e-05, "loss": 109.2031, "step": 19620 }, { "epoch": 0.07930768391666027, "grad_norm": 2941.3408203125, "learning_rate": 3.926e-05, "loss": 100.5554, "step": 19630 }, { "epoch": 0.0793480851820279, "grad_norm": 1487.9503173828125, "learning_rate": 3.9280000000000003e-05, "loss": 120.6536, "step": 19640 }, { "epoch": 0.07938848644739553, "grad_norm": 956.5473022460938, "learning_rate": 3.9300000000000007e-05, "loss": 112.047, "step": 19650 }, { "epoch": 0.07942888771276316, "grad_norm": 675.1141967773438, "learning_rate": 3.932e-05, "loss": 121.3393, "step": 19660 }, { "epoch": 0.0794692889781308, "grad_norm": 1062.9857177734375, "learning_rate": 3.9340000000000006e-05, "loss": 118.6263, "step": 19670 }, { "epoch": 0.07950969024349842, "grad_norm": 796.38525390625, "learning_rate": 3.936e-05, "loss": 93.5364, "step": 19680 }, { "epoch": 0.07955009150886606, "grad_norm": 712.4339599609375, "learning_rate": 3.938e-05, "loss": 169.9139, "step": 19690 }, { "epoch": 0.07959049277423369, "grad_norm": 348.9084777832031, "learning_rate": 3.94e-05, "loss": 98.5714, "step": 19700 }, { "epoch": 0.07963089403960132, "grad_norm": 1002.7638549804688, "learning_rate": 3.942e-05, "loss": 101.6811, "step": 19710 }, { "epoch": 0.07967129530496896, "grad_norm": 1141.4864501953125, "learning_rate": 3.944e-05, "loss": 99.0602, "step": 19720 }, { "epoch": 0.07971169657033658, "grad_norm": 1789.052001953125, "learning_rate": 3.9460000000000005e-05, "loss": 146.9852, "step": 19730 }, { "epoch": 0.07975209783570421, "grad_norm": 1004.76611328125, "learning_rate": 3.948e-05, "loss": 108.5757, "step": 19740 }, { "epoch": 0.07979249910107185, "grad_norm": 1779.8487548828125, "learning_rate": 3.9500000000000005e-05, "loss": 92.2654, "step": 19750 }, { "epoch": 0.07983290036643947, "grad_norm": 1891.206787109375, "learning_rate": 3.952e-05, "loss": 130.2949, "step": 19760 }, { "epoch": 0.07987330163180711, "grad_norm": 1210.8375244140625, "learning_rate": 3.954e-05, "loss": 149.1875, "step": 19770 }, { "epoch": 0.07991370289717474, "grad_norm": 1035.046630859375, "learning_rate": 3.956e-05, "loss": 90.7728, "step": 19780 }, { "epoch": 0.07995410416254237, "grad_norm": 1166.8427734375, "learning_rate": 3.958e-05, "loss": 113.0428, "step": 19790 }, { "epoch": 0.07999450542791, "grad_norm": 1342.7369384765625, "learning_rate": 3.960000000000001e-05, "loss": 84.0289, "step": 19800 }, { "epoch": 0.08003490669327763, "grad_norm": 930.572509765625, "learning_rate": 3.9620000000000004e-05, "loss": 137.8586, "step": 19810 }, { "epoch": 0.08007530795864526, "grad_norm": 537.0020141601562, "learning_rate": 3.964e-05, "loss": 136.4454, "step": 19820 }, { "epoch": 0.0801157092240129, "grad_norm": 8671.7001953125, "learning_rate": 3.966e-05, "loss": 194.5568, "step": 19830 }, { "epoch": 0.08015611048938052, "grad_norm": 1121.135986328125, "learning_rate": 3.968e-05, "loss": 171.3159, "step": 19840 }, { "epoch": 0.08019651175474816, "grad_norm": 875.9639282226562, "learning_rate": 3.97e-05, "loss": 138.4204, "step": 19850 }, { "epoch": 0.08023691302011579, "grad_norm": 1031.24462890625, "learning_rate": 3.972e-05, "loss": 70.7438, "step": 19860 }, { "epoch": 0.08027731428548342, "grad_norm": 844.333251953125, "learning_rate": 3.974e-05, "loss": 86.4556, "step": 19870 }, { "epoch": 0.08031771555085106, "grad_norm": 775.6649780273438, "learning_rate": 3.9760000000000006e-05, "loss": 141.9916, "step": 19880 }, { "epoch": 0.08035811681621868, "grad_norm": 579.8970947265625, "learning_rate": 3.978e-05, "loss": 131.3396, "step": 19890 }, { "epoch": 0.08039851808158631, "grad_norm": 0.0, "learning_rate": 3.9800000000000005e-05, "loss": 98.0002, "step": 19900 }, { "epoch": 0.08043891934695395, "grad_norm": 1350.4500732421875, "learning_rate": 3.982e-05, "loss": 221.3071, "step": 19910 }, { "epoch": 0.08047932061232158, "grad_norm": 809.1064453125, "learning_rate": 3.984e-05, "loss": 82.145, "step": 19920 }, { "epoch": 0.08051972187768922, "grad_norm": 1032.8472900390625, "learning_rate": 3.986e-05, "loss": 90.5167, "step": 19930 }, { "epoch": 0.08056012314305684, "grad_norm": 548.9085693359375, "learning_rate": 3.988e-05, "loss": 82.506, "step": 19940 }, { "epoch": 0.08060052440842447, "grad_norm": 828.6427001953125, "learning_rate": 3.99e-05, "loss": 103.581, "step": 19950 }, { "epoch": 0.08064092567379211, "grad_norm": 1131.4522705078125, "learning_rate": 3.9920000000000004e-05, "loss": 124.3697, "step": 19960 }, { "epoch": 0.08068132693915973, "grad_norm": 3095.481689453125, "learning_rate": 3.994e-05, "loss": 180.1047, "step": 19970 }, { "epoch": 0.08072172820452736, "grad_norm": 1326.9815673828125, "learning_rate": 3.9960000000000004e-05, "loss": 111.703, "step": 19980 }, { "epoch": 0.080762129469895, "grad_norm": 563.6680297851562, "learning_rate": 3.998e-05, "loss": 86.8286, "step": 19990 }, { "epoch": 0.08080253073526263, "grad_norm": 867.2445068359375, "learning_rate": 4e-05, "loss": 120.7213, "step": 20000 }, { "epoch": 0.08084293200063027, "grad_norm": 3826.01513671875, "learning_rate": 4.002e-05, "loss": 188.3878, "step": 20010 }, { "epoch": 0.08088333326599789, "grad_norm": 626.7747192382812, "learning_rate": 4.004e-05, "loss": 102.346, "step": 20020 }, { "epoch": 0.08092373453136552, "grad_norm": 447.5019836425781, "learning_rate": 4.0060000000000006e-05, "loss": 112.2855, "step": 20030 }, { "epoch": 0.08096413579673316, "grad_norm": 1387.30029296875, "learning_rate": 4.008e-05, "loss": 144.4808, "step": 20040 }, { "epoch": 0.08100453706210078, "grad_norm": 711.4352416992188, "learning_rate": 4.0100000000000006e-05, "loss": 123.1025, "step": 20050 }, { "epoch": 0.08104493832746841, "grad_norm": 696.3189086914062, "learning_rate": 4.012e-05, "loss": 125.8491, "step": 20060 }, { "epoch": 0.08108533959283605, "grad_norm": 1703.4642333984375, "learning_rate": 4.014e-05, "loss": 180.9456, "step": 20070 }, { "epoch": 0.08112574085820368, "grad_norm": 931.850341796875, "learning_rate": 4.016e-05, "loss": 157.8773, "step": 20080 }, { "epoch": 0.08116614212357132, "grad_norm": 1737.6793212890625, "learning_rate": 4.018e-05, "loss": 108.9433, "step": 20090 }, { "epoch": 0.08120654338893894, "grad_norm": 1597.1063232421875, "learning_rate": 4.02e-05, "loss": 116.3655, "step": 20100 }, { "epoch": 0.08124694465430657, "grad_norm": 1292.120849609375, "learning_rate": 4.0220000000000005e-05, "loss": 128.3085, "step": 20110 }, { "epoch": 0.08128734591967421, "grad_norm": 784.8877563476562, "learning_rate": 4.024e-05, "loss": 142.1472, "step": 20120 }, { "epoch": 0.08132774718504183, "grad_norm": 518.2711181640625, "learning_rate": 4.0260000000000004e-05, "loss": 138.0721, "step": 20130 }, { "epoch": 0.08136814845040946, "grad_norm": 3041.567138671875, "learning_rate": 4.028e-05, "loss": 143.0653, "step": 20140 }, { "epoch": 0.0814085497157771, "grad_norm": 393.4328918457031, "learning_rate": 4.0300000000000004e-05, "loss": 123.1964, "step": 20150 }, { "epoch": 0.08144895098114473, "grad_norm": 433.4530334472656, "learning_rate": 4.032e-05, "loss": 108.3037, "step": 20160 }, { "epoch": 0.08148935224651237, "grad_norm": 1056.2037353515625, "learning_rate": 4.034e-05, "loss": 104.124, "step": 20170 }, { "epoch": 0.08152975351188, "grad_norm": 1283.22607421875, "learning_rate": 4.0360000000000007e-05, "loss": 146.2523, "step": 20180 }, { "epoch": 0.08157015477724762, "grad_norm": 1303.0574951171875, "learning_rate": 4.038e-05, "loss": 108.7274, "step": 20190 }, { "epoch": 0.08161055604261526, "grad_norm": 748.68115234375, "learning_rate": 4.0400000000000006e-05, "loss": 118.8553, "step": 20200 }, { "epoch": 0.08165095730798289, "grad_norm": 559.082763671875, "learning_rate": 4.042e-05, "loss": 104.5537, "step": 20210 }, { "epoch": 0.08169135857335051, "grad_norm": 883.0214233398438, "learning_rate": 4.044e-05, "loss": 119.6957, "step": 20220 }, { "epoch": 0.08173175983871815, "grad_norm": 3064.196533203125, "learning_rate": 4.046e-05, "loss": 167.4422, "step": 20230 }, { "epoch": 0.08177216110408578, "grad_norm": 0.0, "learning_rate": 4.048e-05, "loss": 128.549, "step": 20240 }, { "epoch": 0.08181256236945342, "grad_norm": 755.3316040039062, "learning_rate": 4.05e-05, "loss": 108.4741, "step": 20250 }, { "epoch": 0.08185296363482104, "grad_norm": 1562.85498046875, "learning_rate": 4.0520000000000005e-05, "loss": 111.5061, "step": 20260 }, { "epoch": 0.08189336490018867, "grad_norm": 871.8521118164062, "learning_rate": 4.054e-05, "loss": 110.9914, "step": 20270 }, { "epoch": 0.08193376616555631, "grad_norm": 772.6279907226562, "learning_rate": 4.0560000000000005e-05, "loss": 145.3599, "step": 20280 }, { "epoch": 0.08197416743092394, "grad_norm": 529.8480224609375, "learning_rate": 4.058e-05, "loss": 101.3806, "step": 20290 }, { "epoch": 0.08201456869629156, "grad_norm": 2998.20556640625, "learning_rate": 4.0600000000000004e-05, "loss": 111.4782, "step": 20300 }, { "epoch": 0.0820549699616592, "grad_norm": 1405.43505859375, "learning_rate": 4.062e-05, "loss": 163.5577, "step": 20310 }, { "epoch": 0.08209537122702683, "grad_norm": 1239.15087890625, "learning_rate": 4.064e-05, "loss": 94.2629, "step": 20320 }, { "epoch": 0.08213577249239447, "grad_norm": 1255.966552734375, "learning_rate": 4.066e-05, "loss": 126.8316, "step": 20330 }, { "epoch": 0.0821761737577621, "grad_norm": 737.30126953125, "learning_rate": 4.0680000000000004e-05, "loss": 113.6555, "step": 20340 }, { "epoch": 0.08221657502312972, "grad_norm": 2296.812744140625, "learning_rate": 4.07e-05, "loss": 109.0417, "step": 20350 }, { "epoch": 0.08225697628849736, "grad_norm": 1186.671875, "learning_rate": 4.072e-05, "loss": 86.3001, "step": 20360 }, { "epoch": 0.08229737755386499, "grad_norm": 1619.2152099609375, "learning_rate": 4.074e-05, "loss": 148.3378, "step": 20370 }, { "epoch": 0.08233777881923261, "grad_norm": 3074.595703125, "learning_rate": 4.076e-05, "loss": 112.1198, "step": 20380 }, { "epoch": 0.08237818008460025, "grad_norm": 2610.1181640625, "learning_rate": 4.078e-05, "loss": 118.642, "step": 20390 }, { "epoch": 0.08241858134996788, "grad_norm": 944.0482788085938, "learning_rate": 4.08e-05, "loss": 118.4644, "step": 20400 }, { "epoch": 0.08245898261533552, "grad_norm": 2997.534423828125, "learning_rate": 4.0820000000000006e-05, "loss": 95.5434, "step": 20410 }, { "epoch": 0.08249938388070314, "grad_norm": 1543.2197265625, "learning_rate": 4.084e-05, "loss": 155.0003, "step": 20420 }, { "epoch": 0.08253978514607077, "grad_norm": 749.4830932617188, "learning_rate": 4.0860000000000005e-05, "loss": 98.9934, "step": 20430 }, { "epoch": 0.08258018641143841, "grad_norm": 2779.212646484375, "learning_rate": 4.088e-05, "loss": 115.3504, "step": 20440 }, { "epoch": 0.08262058767680604, "grad_norm": 2938.130615234375, "learning_rate": 4.09e-05, "loss": 121.3548, "step": 20450 }, { "epoch": 0.08266098894217366, "grad_norm": 364.253173828125, "learning_rate": 4.092e-05, "loss": 118.8423, "step": 20460 }, { "epoch": 0.0827013902075413, "grad_norm": 956.0880737304688, "learning_rate": 4.094e-05, "loss": 162.3844, "step": 20470 }, { "epoch": 0.08274179147290893, "grad_norm": 651.1348266601562, "learning_rate": 4.096e-05, "loss": 101.0112, "step": 20480 }, { "epoch": 0.08278219273827657, "grad_norm": 1472.0135498046875, "learning_rate": 4.0980000000000004e-05, "loss": 140.2291, "step": 20490 }, { "epoch": 0.0828225940036442, "grad_norm": 1709.4990234375, "learning_rate": 4.1e-05, "loss": 156.1177, "step": 20500 }, { "epoch": 0.08286299526901182, "grad_norm": 1078.7275390625, "learning_rate": 4.1020000000000004e-05, "loss": 149.2032, "step": 20510 }, { "epoch": 0.08290339653437946, "grad_norm": 1183.4559326171875, "learning_rate": 4.104e-05, "loss": 85.2116, "step": 20520 }, { "epoch": 0.08294379779974709, "grad_norm": 787.4295043945312, "learning_rate": 4.106e-05, "loss": 143.7207, "step": 20530 }, { "epoch": 0.08298419906511471, "grad_norm": 497.1183776855469, "learning_rate": 4.108e-05, "loss": 89.7213, "step": 20540 }, { "epoch": 0.08302460033048235, "grad_norm": 1972.8258056640625, "learning_rate": 4.11e-05, "loss": 115.1941, "step": 20550 }, { "epoch": 0.08306500159584998, "grad_norm": 969.597900390625, "learning_rate": 4.1120000000000006e-05, "loss": 102.9652, "step": 20560 }, { "epoch": 0.08310540286121762, "grad_norm": 698.786376953125, "learning_rate": 4.114e-05, "loss": 63.844, "step": 20570 }, { "epoch": 0.08314580412658525, "grad_norm": 1212.814697265625, "learning_rate": 4.1160000000000006e-05, "loss": 149.4642, "step": 20580 }, { "epoch": 0.08318620539195287, "grad_norm": 552.6875610351562, "learning_rate": 4.118e-05, "loss": 140.7715, "step": 20590 }, { "epoch": 0.08322660665732051, "grad_norm": 1048.22607421875, "learning_rate": 4.12e-05, "loss": 154.9678, "step": 20600 }, { "epoch": 0.08326700792268814, "grad_norm": 1216.959716796875, "learning_rate": 4.122e-05, "loss": 167.41, "step": 20610 }, { "epoch": 0.08330740918805576, "grad_norm": 2889.177490234375, "learning_rate": 4.124e-05, "loss": 142.2961, "step": 20620 }, { "epoch": 0.0833478104534234, "grad_norm": 1590.9716796875, "learning_rate": 4.126e-05, "loss": 158.046, "step": 20630 }, { "epoch": 0.08338821171879103, "grad_norm": 1097.50146484375, "learning_rate": 4.1280000000000005e-05, "loss": 132.7002, "step": 20640 }, { "epoch": 0.08342861298415867, "grad_norm": 2981.930908203125, "learning_rate": 4.13e-05, "loss": 95.1602, "step": 20650 }, { "epoch": 0.0834690142495263, "grad_norm": 2784.6552734375, "learning_rate": 4.1320000000000004e-05, "loss": 173.26, "step": 20660 }, { "epoch": 0.08350941551489392, "grad_norm": 1329.2828369140625, "learning_rate": 4.134e-05, "loss": 162.6604, "step": 20670 }, { "epoch": 0.08354981678026156, "grad_norm": 808.767822265625, "learning_rate": 4.1360000000000004e-05, "loss": 112.3144, "step": 20680 }, { "epoch": 0.08359021804562919, "grad_norm": 1342.0172119140625, "learning_rate": 4.138e-05, "loss": 131.2333, "step": 20690 }, { "epoch": 0.08363061931099681, "grad_norm": 848.967529296875, "learning_rate": 4.14e-05, "loss": 147.4289, "step": 20700 }, { "epoch": 0.08367102057636445, "grad_norm": 566.413818359375, "learning_rate": 4.142000000000001e-05, "loss": 150.0216, "step": 20710 }, { "epoch": 0.08371142184173208, "grad_norm": 707.4857788085938, "learning_rate": 4.144e-05, "loss": 113.268, "step": 20720 }, { "epoch": 0.08375182310709972, "grad_norm": 851.73779296875, "learning_rate": 4.1460000000000006e-05, "loss": 128.7113, "step": 20730 }, { "epoch": 0.08379222437246735, "grad_norm": 28336.75390625, "learning_rate": 4.148e-05, "loss": 211.0017, "step": 20740 }, { "epoch": 0.08383262563783497, "grad_norm": 1380.7489013671875, "learning_rate": 4.15e-05, "loss": 72.6195, "step": 20750 }, { "epoch": 0.08387302690320261, "grad_norm": 1030.243408203125, "learning_rate": 4.152e-05, "loss": 127.0354, "step": 20760 }, { "epoch": 0.08391342816857024, "grad_norm": 7085.2041015625, "learning_rate": 4.154e-05, "loss": 119.6303, "step": 20770 }, { "epoch": 0.08395382943393787, "grad_norm": 783.239013671875, "learning_rate": 4.156e-05, "loss": 123.3007, "step": 20780 }, { "epoch": 0.0839942306993055, "grad_norm": 1146.0455322265625, "learning_rate": 4.1580000000000005e-05, "loss": 111.6334, "step": 20790 }, { "epoch": 0.08403463196467313, "grad_norm": 3808.298583984375, "learning_rate": 4.16e-05, "loss": 155.187, "step": 20800 }, { "epoch": 0.08407503323004077, "grad_norm": 2816.82275390625, "learning_rate": 4.1620000000000005e-05, "loss": 146.4871, "step": 20810 }, { "epoch": 0.0841154344954084, "grad_norm": 1327.615966796875, "learning_rate": 4.164e-05, "loss": 99.5535, "step": 20820 }, { "epoch": 0.08415583576077602, "grad_norm": 759.77490234375, "learning_rate": 4.1660000000000004e-05, "loss": 135.0864, "step": 20830 }, { "epoch": 0.08419623702614366, "grad_norm": 1578.577392578125, "learning_rate": 4.168e-05, "loss": 134.005, "step": 20840 }, { "epoch": 0.08423663829151129, "grad_norm": 845.4844970703125, "learning_rate": 4.17e-05, "loss": 72.3538, "step": 20850 }, { "epoch": 0.08427703955687892, "grad_norm": 681.817626953125, "learning_rate": 4.172e-05, "loss": 89.2945, "step": 20860 }, { "epoch": 0.08431744082224656, "grad_norm": 610.7493896484375, "learning_rate": 4.1740000000000004e-05, "loss": 88.0488, "step": 20870 }, { "epoch": 0.08435784208761418, "grad_norm": 1551.756103515625, "learning_rate": 4.176000000000001e-05, "loss": 144.3311, "step": 20880 }, { "epoch": 0.08439824335298182, "grad_norm": 941.2823486328125, "learning_rate": 4.178e-05, "loss": 108.5786, "step": 20890 }, { "epoch": 0.08443864461834945, "grad_norm": 2806.956787109375, "learning_rate": 4.18e-05, "loss": 139.1996, "step": 20900 }, { "epoch": 0.08447904588371707, "grad_norm": 2764.781982421875, "learning_rate": 4.182e-05, "loss": 202.5234, "step": 20910 }, { "epoch": 0.08451944714908471, "grad_norm": 4884.42431640625, "learning_rate": 4.184e-05, "loss": 113.7894, "step": 20920 }, { "epoch": 0.08455984841445234, "grad_norm": 752.3662719726562, "learning_rate": 4.186e-05, "loss": 108.6684, "step": 20930 }, { "epoch": 0.08460024967981997, "grad_norm": 997.4862060546875, "learning_rate": 4.1880000000000006e-05, "loss": 119.868, "step": 20940 }, { "epoch": 0.0846406509451876, "grad_norm": 1489.1739501953125, "learning_rate": 4.19e-05, "loss": 118.152, "step": 20950 }, { "epoch": 0.08468105221055523, "grad_norm": 509.3271789550781, "learning_rate": 4.1920000000000005e-05, "loss": 95.0759, "step": 20960 }, { "epoch": 0.08472145347592287, "grad_norm": 1403.126708984375, "learning_rate": 4.194e-05, "loss": 130.0431, "step": 20970 }, { "epoch": 0.0847618547412905, "grad_norm": 2763.188232421875, "learning_rate": 4.196e-05, "loss": 82.064, "step": 20980 }, { "epoch": 0.08480225600665812, "grad_norm": 643.8973388671875, "learning_rate": 4.198e-05, "loss": 120.0969, "step": 20990 }, { "epoch": 0.08484265727202576, "grad_norm": 953.9916381835938, "learning_rate": 4.2e-05, "loss": 99.3082, "step": 21000 }, { "epoch": 0.08488305853739339, "grad_norm": 7278.77392578125, "learning_rate": 4.202e-05, "loss": 185.1893, "step": 21010 }, { "epoch": 0.08492345980276102, "grad_norm": 4063.332763671875, "learning_rate": 4.2040000000000004e-05, "loss": 172.6903, "step": 21020 }, { "epoch": 0.08496386106812866, "grad_norm": 355.11962890625, "learning_rate": 4.206e-05, "loss": 97.8671, "step": 21030 }, { "epoch": 0.08500426233349628, "grad_norm": 803.3870239257812, "learning_rate": 4.2080000000000004e-05, "loss": 90.9247, "step": 21040 }, { "epoch": 0.08504466359886392, "grad_norm": 1197.3392333984375, "learning_rate": 4.21e-05, "loss": 111.2428, "step": 21050 }, { "epoch": 0.08508506486423155, "grad_norm": 1060.21728515625, "learning_rate": 4.212e-05, "loss": 80.9763, "step": 21060 }, { "epoch": 0.08512546612959918, "grad_norm": 553.6648559570312, "learning_rate": 4.214e-05, "loss": 164.6878, "step": 21070 }, { "epoch": 0.08516586739496682, "grad_norm": 1732.870361328125, "learning_rate": 4.2159999999999996e-05, "loss": 127.5546, "step": 21080 }, { "epoch": 0.08520626866033444, "grad_norm": 3130.48828125, "learning_rate": 4.2180000000000006e-05, "loss": 125.8488, "step": 21090 }, { "epoch": 0.08524666992570207, "grad_norm": 1232.9683837890625, "learning_rate": 4.22e-05, "loss": 110.2355, "step": 21100 }, { "epoch": 0.08528707119106971, "grad_norm": 1068.6566162109375, "learning_rate": 4.2220000000000006e-05, "loss": 116.9769, "step": 21110 }, { "epoch": 0.08532747245643733, "grad_norm": 687.2569580078125, "learning_rate": 4.224e-05, "loss": 124.6508, "step": 21120 }, { "epoch": 0.08536787372180497, "grad_norm": 680.8721313476562, "learning_rate": 4.226e-05, "loss": 123.4505, "step": 21130 }, { "epoch": 0.0854082749871726, "grad_norm": 1629.7750244140625, "learning_rate": 4.228e-05, "loss": 165.3711, "step": 21140 }, { "epoch": 0.08544867625254023, "grad_norm": 1123.4427490234375, "learning_rate": 4.23e-05, "loss": 104.645, "step": 21150 }, { "epoch": 0.08548907751790787, "grad_norm": 684.88134765625, "learning_rate": 4.232e-05, "loss": 87.5831, "step": 21160 }, { "epoch": 0.08552947878327549, "grad_norm": 1038.5843505859375, "learning_rate": 4.2340000000000005e-05, "loss": 111.1774, "step": 21170 }, { "epoch": 0.08556988004864312, "grad_norm": 441.3775634765625, "learning_rate": 4.236e-05, "loss": 97.0562, "step": 21180 }, { "epoch": 0.08561028131401076, "grad_norm": 1054.5010986328125, "learning_rate": 4.2380000000000004e-05, "loss": 137.0658, "step": 21190 }, { "epoch": 0.08565068257937838, "grad_norm": 2301.26708984375, "learning_rate": 4.24e-05, "loss": 121.703, "step": 21200 }, { "epoch": 0.08569108384474602, "grad_norm": 1787.3651123046875, "learning_rate": 4.2420000000000004e-05, "loss": 168.9554, "step": 21210 }, { "epoch": 0.08573148511011365, "grad_norm": 3590.007568359375, "learning_rate": 4.244e-05, "loss": 162.3379, "step": 21220 }, { "epoch": 0.08577188637548128, "grad_norm": 1609.46142578125, "learning_rate": 4.246e-05, "loss": 124.5303, "step": 21230 }, { "epoch": 0.08581228764084892, "grad_norm": 558.2980346679688, "learning_rate": 4.248e-05, "loss": 95.647, "step": 21240 }, { "epoch": 0.08585268890621654, "grad_norm": 1266.0831298828125, "learning_rate": 4.25e-05, "loss": 95.554, "step": 21250 }, { "epoch": 0.08589309017158417, "grad_norm": 1427.5911865234375, "learning_rate": 4.2520000000000006e-05, "loss": 136.6243, "step": 21260 }, { "epoch": 0.08593349143695181, "grad_norm": 1910.5550537109375, "learning_rate": 4.254e-05, "loss": 104.6323, "step": 21270 }, { "epoch": 0.08597389270231943, "grad_norm": 2682.699951171875, "learning_rate": 4.256e-05, "loss": 123.6776, "step": 21280 }, { "epoch": 0.08601429396768706, "grad_norm": 7804.3984375, "learning_rate": 4.258e-05, "loss": 165.5531, "step": 21290 }, { "epoch": 0.0860546952330547, "grad_norm": 790.6614990234375, "learning_rate": 4.26e-05, "loss": 114.8768, "step": 21300 }, { "epoch": 0.08609509649842233, "grad_norm": 1748.3701171875, "learning_rate": 4.262e-05, "loss": 120.4578, "step": 21310 }, { "epoch": 0.08613549776378997, "grad_norm": 948.3206176757812, "learning_rate": 4.2640000000000005e-05, "loss": 100.5346, "step": 21320 }, { "epoch": 0.0861758990291576, "grad_norm": 695.2459106445312, "learning_rate": 4.266e-05, "loss": 97.4773, "step": 21330 }, { "epoch": 0.08621630029452522, "grad_norm": 2172.08837890625, "learning_rate": 4.2680000000000005e-05, "loss": 132.5773, "step": 21340 }, { "epoch": 0.08625670155989286, "grad_norm": 867.505859375, "learning_rate": 4.27e-05, "loss": 104.199, "step": 21350 }, { "epoch": 0.08629710282526049, "grad_norm": 1258.5723876953125, "learning_rate": 4.2720000000000004e-05, "loss": 99.1206, "step": 21360 }, { "epoch": 0.08633750409062811, "grad_norm": 974.3964233398438, "learning_rate": 4.274e-05, "loss": 98.0185, "step": 21370 }, { "epoch": 0.08637790535599575, "grad_norm": 1084.6614990234375, "learning_rate": 4.276e-05, "loss": 95.9101, "step": 21380 }, { "epoch": 0.08641830662136338, "grad_norm": 1883.6488037109375, "learning_rate": 4.278e-05, "loss": 125.3519, "step": 21390 }, { "epoch": 0.08645870788673102, "grad_norm": 3283.783935546875, "learning_rate": 4.2800000000000004e-05, "loss": 166.7042, "step": 21400 }, { "epoch": 0.08649910915209864, "grad_norm": 942.7132568359375, "learning_rate": 4.282000000000001e-05, "loss": 154.2377, "step": 21410 }, { "epoch": 0.08653951041746627, "grad_norm": 1186.5281982421875, "learning_rate": 4.284e-05, "loss": 104.8457, "step": 21420 }, { "epoch": 0.08657991168283391, "grad_norm": 1021.5708618164062, "learning_rate": 4.286e-05, "loss": 100.3719, "step": 21430 }, { "epoch": 0.08662031294820154, "grad_norm": 1118.336669921875, "learning_rate": 4.288e-05, "loss": 145.7167, "step": 21440 }, { "epoch": 0.08666071421356916, "grad_norm": 686.393310546875, "learning_rate": 4.29e-05, "loss": 107.4845, "step": 21450 }, { "epoch": 0.0867011154789368, "grad_norm": 10137.349609375, "learning_rate": 4.292e-05, "loss": 131.5519, "step": 21460 }, { "epoch": 0.08674151674430443, "grad_norm": 4428.59521484375, "learning_rate": 4.2940000000000006e-05, "loss": 163.3146, "step": 21470 }, { "epoch": 0.08678191800967207, "grad_norm": 816.3525390625, "learning_rate": 4.296e-05, "loss": 69.2457, "step": 21480 }, { "epoch": 0.0868223192750397, "grad_norm": 1754.2156982421875, "learning_rate": 4.2980000000000005e-05, "loss": 163.8832, "step": 21490 }, { "epoch": 0.08686272054040732, "grad_norm": 1116.2850341796875, "learning_rate": 4.3e-05, "loss": 86.4771, "step": 21500 }, { "epoch": 0.08690312180577496, "grad_norm": 542.83984375, "learning_rate": 4.3020000000000005e-05, "loss": 121.8374, "step": 21510 }, { "epoch": 0.08694352307114259, "grad_norm": 1956.363037109375, "learning_rate": 4.304e-05, "loss": 108.1872, "step": 21520 }, { "epoch": 0.08698392433651021, "grad_norm": 1243.8492431640625, "learning_rate": 4.306e-05, "loss": 115.3768, "step": 21530 }, { "epoch": 0.08702432560187785, "grad_norm": 1239.7283935546875, "learning_rate": 4.308e-05, "loss": 81.8431, "step": 21540 }, { "epoch": 0.08706472686724548, "grad_norm": 759.3988037109375, "learning_rate": 4.3100000000000004e-05, "loss": 128.8443, "step": 21550 }, { "epoch": 0.08710512813261312, "grad_norm": 1724.4061279296875, "learning_rate": 4.312000000000001e-05, "loss": 132.2185, "step": 21560 }, { "epoch": 0.08714552939798074, "grad_norm": 815.0044555664062, "learning_rate": 4.3140000000000004e-05, "loss": 76.6797, "step": 21570 }, { "epoch": 0.08718593066334837, "grad_norm": 458.9404602050781, "learning_rate": 4.316e-05, "loss": 115.9924, "step": 21580 }, { "epoch": 0.08722633192871601, "grad_norm": 553.5025024414062, "learning_rate": 4.318e-05, "loss": 121.1555, "step": 21590 }, { "epoch": 0.08726673319408364, "grad_norm": 1283.981689453125, "learning_rate": 4.32e-05, "loss": 134.989, "step": 21600 }, { "epoch": 0.08730713445945126, "grad_norm": 438.9365234375, "learning_rate": 4.3219999999999996e-05, "loss": 96.8193, "step": 21610 }, { "epoch": 0.0873475357248189, "grad_norm": 963.8663940429688, "learning_rate": 4.324e-05, "loss": 109.2355, "step": 21620 }, { "epoch": 0.08738793699018653, "grad_norm": 389.078369140625, "learning_rate": 4.326e-05, "loss": 179.7937, "step": 21630 }, { "epoch": 0.08742833825555417, "grad_norm": 1283.707763671875, "learning_rate": 4.3280000000000006e-05, "loss": 93.5443, "step": 21640 }, { "epoch": 0.0874687395209218, "grad_norm": 1297.939208984375, "learning_rate": 4.33e-05, "loss": 119.5012, "step": 21650 }, { "epoch": 0.08750914078628942, "grad_norm": 1461.7098388671875, "learning_rate": 4.332e-05, "loss": 112.2618, "step": 21660 }, { "epoch": 0.08754954205165706, "grad_norm": 370.33056640625, "learning_rate": 4.334e-05, "loss": 118.6215, "step": 21670 }, { "epoch": 0.08758994331702469, "grad_norm": 1230.4083251953125, "learning_rate": 4.336e-05, "loss": 83.1036, "step": 21680 }, { "epoch": 0.08763034458239231, "grad_norm": 713.216796875, "learning_rate": 4.338e-05, "loss": 116.6274, "step": 21690 }, { "epoch": 0.08767074584775995, "grad_norm": 333.86663818359375, "learning_rate": 4.3400000000000005e-05, "loss": 138.7234, "step": 21700 }, { "epoch": 0.08771114711312758, "grad_norm": 1813.9105224609375, "learning_rate": 4.342e-05, "loss": 124.1437, "step": 21710 }, { "epoch": 0.08775154837849522, "grad_norm": 1551.122802734375, "learning_rate": 4.3440000000000004e-05, "loss": 117.1614, "step": 21720 }, { "epoch": 0.08779194964386285, "grad_norm": 556.9833984375, "learning_rate": 4.346e-05, "loss": 85.8124, "step": 21730 }, { "epoch": 0.08783235090923047, "grad_norm": 1249.522705078125, "learning_rate": 4.3480000000000004e-05, "loss": 82.4492, "step": 21740 }, { "epoch": 0.08787275217459811, "grad_norm": 644.7138671875, "learning_rate": 4.35e-05, "loss": 103.3423, "step": 21750 }, { "epoch": 0.08791315343996574, "grad_norm": 893.0758056640625, "learning_rate": 4.352e-05, "loss": 140.0829, "step": 21760 }, { "epoch": 0.08795355470533336, "grad_norm": 527.2681274414062, "learning_rate": 4.354e-05, "loss": 99.5734, "step": 21770 }, { "epoch": 0.087993955970701, "grad_norm": 623.2860717773438, "learning_rate": 4.356e-05, "loss": 75.2396, "step": 21780 }, { "epoch": 0.08803435723606863, "grad_norm": 590.4302978515625, "learning_rate": 4.3580000000000006e-05, "loss": 117.1983, "step": 21790 }, { "epoch": 0.08807475850143627, "grad_norm": 945.4894409179688, "learning_rate": 4.36e-05, "loss": 156.6693, "step": 21800 }, { "epoch": 0.0881151597668039, "grad_norm": 892.6826782226562, "learning_rate": 4.362e-05, "loss": 128.0761, "step": 21810 }, { "epoch": 0.08815556103217152, "grad_norm": 1329.8853759765625, "learning_rate": 4.364e-05, "loss": 81.0603, "step": 21820 }, { "epoch": 0.08819596229753916, "grad_norm": 3241.863525390625, "learning_rate": 4.366e-05, "loss": 114.7735, "step": 21830 }, { "epoch": 0.08823636356290679, "grad_norm": 621.0053100585938, "learning_rate": 4.368e-05, "loss": 99.9302, "step": 21840 }, { "epoch": 0.08827676482827441, "grad_norm": 571.9194946289062, "learning_rate": 4.3700000000000005e-05, "loss": 60.8999, "step": 21850 }, { "epoch": 0.08831716609364205, "grad_norm": 1223.1873779296875, "learning_rate": 4.372e-05, "loss": 115.3964, "step": 21860 }, { "epoch": 0.08835756735900968, "grad_norm": 1025.819580078125, "learning_rate": 4.3740000000000005e-05, "loss": 115.1883, "step": 21870 }, { "epoch": 0.08839796862437732, "grad_norm": 1899.594970703125, "learning_rate": 4.376e-05, "loss": 118.3364, "step": 21880 }, { "epoch": 0.08843836988974495, "grad_norm": 1690.5460205078125, "learning_rate": 4.3780000000000004e-05, "loss": 88.396, "step": 21890 }, { "epoch": 0.08847877115511257, "grad_norm": 1255.8304443359375, "learning_rate": 4.38e-05, "loss": 95.6884, "step": 21900 }, { "epoch": 0.08851917242048021, "grad_norm": 2578.5478515625, "learning_rate": 4.382e-05, "loss": 143.0776, "step": 21910 }, { "epoch": 0.08855957368584784, "grad_norm": 4696.71826171875, "learning_rate": 4.384e-05, "loss": 95.2831, "step": 21920 }, { "epoch": 0.08859997495121547, "grad_norm": 3200.905517578125, "learning_rate": 4.3860000000000004e-05, "loss": 98.7196, "step": 21930 }, { "epoch": 0.0886403762165831, "grad_norm": 771.8877563476562, "learning_rate": 4.388000000000001e-05, "loss": 109.9122, "step": 21940 }, { "epoch": 0.08868077748195073, "grad_norm": 2375.28515625, "learning_rate": 4.39e-05, "loss": 97.5147, "step": 21950 }, { "epoch": 0.08872117874731837, "grad_norm": 1200.1995849609375, "learning_rate": 4.392e-05, "loss": 163.8434, "step": 21960 }, { "epoch": 0.088761580012686, "grad_norm": 743.7747192382812, "learning_rate": 4.394e-05, "loss": 152.2877, "step": 21970 }, { "epoch": 0.08880198127805362, "grad_norm": 748.3530883789062, "learning_rate": 4.396e-05, "loss": 102.9261, "step": 21980 }, { "epoch": 0.08884238254342126, "grad_norm": 0.0, "learning_rate": 4.398e-05, "loss": 86.7398, "step": 21990 }, { "epoch": 0.08888278380878889, "grad_norm": 641.658935546875, "learning_rate": 4.4000000000000006e-05, "loss": 115.4044, "step": 22000 }, { "epoch": 0.08892318507415652, "grad_norm": 757.2327880859375, "learning_rate": 4.402e-05, "loss": 106.9586, "step": 22010 }, { "epoch": 0.08896358633952416, "grad_norm": 1741.6318359375, "learning_rate": 4.4040000000000005e-05, "loss": 61.3795, "step": 22020 }, { "epoch": 0.08900398760489178, "grad_norm": 3420.192138671875, "learning_rate": 4.406e-05, "loss": 150.3777, "step": 22030 }, { "epoch": 0.08904438887025942, "grad_norm": 785.1651000976562, "learning_rate": 4.4080000000000005e-05, "loss": 112.2344, "step": 22040 }, { "epoch": 0.08908479013562705, "grad_norm": 2430.542236328125, "learning_rate": 4.41e-05, "loss": 84.5093, "step": 22050 }, { "epoch": 0.08912519140099467, "grad_norm": 1095.4105224609375, "learning_rate": 4.412e-05, "loss": 129.3121, "step": 22060 }, { "epoch": 0.08916559266636231, "grad_norm": 654.0252075195312, "learning_rate": 4.414e-05, "loss": 105.0435, "step": 22070 }, { "epoch": 0.08920599393172994, "grad_norm": 6289.986328125, "learning_rate": 4.4160000000000004e-05, "loss": 141.9109, "step": 22080 }, { "epoch": 0.08924639519709757, "grad_norm": 3365.2255859375, "learning_rate": 4.418000000000001e-05, "loss": 164.1694, "step": 22090 }, { "epoch": 0.0892867964624652, "grad_norm": 1369.172607421875, "learning_rate": 4.4200000000000004e-05, "loss": 133.5227, "step": 22100 }, { "epoch": 0.08932719772783283, "grad_norm": 2439.97265625, "learning_rate": 4.422e-05, "loss": 126.4849, "step": 22110 }, { "epoch": 0.08936759899320047, "grad_norm": 1391.3780517578125, "learning_rate": 4.424e-05, "loss": 103.1816, "step": 22120 }, { "epoch": 0.0894080002585681, "grad_norm": 2001.207275390625, "learning_rate": 4.426e-05, "loss": 124.9317, "step": 22130 }, { "epoch": 0.08944840152393572, "grad_norm": 1090.6702880859375, "learning_rate": 4.428e-05, "loss": 86.8185, "step": 22140 }, { "epoch": 0.08948880278930336, "grad_norm": 1352.2353515625, "learning_rate": 4.43e-05, "loss": 123.4582, "step": 22150 }, { "epoch": 0.08952920405467099, "grad_norm": 1216.326171875, "learning_rate": 4.432e-05, "loss": 77.8633, "step": 22160 }, { "epoch": 0.08956960532003862, "grad_norm": 805.3572998046875, "learning_rate": 4.4340000000000006e-05, "loss": 112.9714, "step": 22170 }, { "epoch": 0.08961000658540626, "grad_norm": 558.133544921875, "learning_rate": 4.436e-05, "loss": 89.9262, "step": 22180 }, { "epoch": 0.08965040785077388, "grad_norm": 2130.873046875, "learning_rate": 4.438e-05, "loss": 112.3018, "step": 22190 }, { "epoch": 0.08969080911614152, "grad_norm": 1761.7283935546875, "learning_rate": 4.44e-05, "loss": 111.0985, "step": 22200 }, { "epoch": 0.08973121038150915, "grad_norm": 915.045166015625, "learning_rate": 4.442e-05, "loss": 107.6649, "step": 22210 }, { "epoch": 0.08977161164687678, "grad_norm": 1191.688232421875, "learning_rate": 4.444e-05, "loss": 165.1702, "step": 22220 }, { "epoch": 0.08981201291224442, "grad_norm": 496.5788879394531, "learning_rate": 4.4460000000000005e-05, "loss": 82.1613, "step": 22230 }, { "epoch": 0.08985241417761204, "grad_norm": 500.4427185058594, "learning_rate": 4.448e-05, "loss": 154.052, "step": 22240 }, { "epoch": 0.08989281544297967, "grad_norm": 1123.475341796875, "learning_rate": 4.4500000000000004e-05, "loss": 103.6876, "step": 22250 }, { "epoch": 0.08993321670834731, "grad_norm": 763.8755493164062, "learning_rate": 4.452e-05, "loss": 105.7942, "step": 22260 }, { "epoch": 0.08997361797371493, "grad_norm": 659.6030883789062, "learning_rate": 4.4540000000000004e-05, "loss": 111.4345, "step": 22270 }, { "epoch": 0.09001401923908257, "grad_norm": 649.8712158203125, "learning_rate": 4.456e-05, "loss": 78.0137, "step": 22280 }, { "epoch": 0.0900544205044502, "grad_norm": 510.55853271484375, "learning_rate": 4.458e-05, "loss": 142.013, "step": 22290 }, { "epoch": 0.09009482176981783, "grad_norm": 3399.17236328125, "learning_rate": 4.46e-05, "loss": 159.2714, "step": 22300 }, { "epoch": 0.09013522303518547, "grad_norm": 998.1200561523438, "learning_rate": 4.462e-05, "loss": 100.7152, "step": 22310 }, { "epoch": 0.09017562430055309, "grad_norm": 531.780517578125, "learning_rate": 4.4640000000000006e-05, "loss": 168.3419, "step": 22320 }, { "epoch": 0.09021602556592072, "grad_norm": 910.6287841796875, "learning_rate": 4.466e-05, "loss": 98.8065, "step": 22330 }, { "epoch": 0.09025642683128836, "grad_norm": 1744.2012939453125, "learning_rate": 4.468e-05, "loss": 105.1678, "step": 22340 }, { "epoch": 0.09029682809665598, "grad_norm": 597.9017944335938, "learning_rate": 4.47e-05, "loss": 112.0376, "step": 22350 }, { "epoch": 0.09033722936202362, "grad_norm": 769.6776733398438, "learning_rate": 4.472e-05, "loss": 94.9958, "step": 22360 }, { "epoch": 0.09037763062739125, "grad_norm": 1181.265625, "learning_rate": 4.474e-05, "loss": 110.0878, "step": 22370 }, { "epoch": 0.09041803189275888, "grad_norm": 757.1105346679688, "learning_rate": 4.4760000000000005e-05, "loss": 100.3439, "step": 22380 }, { "epoch": 0.09045843315812652, "grad_norm": 717.3568115234375, "learning_rate": 4.478e-05, "loss": 134.2481, "step": 22390 }, { "epoch": 0.09049883442349414, "grad_norm": 892.8013305664062, "learning_rate": 4.4800000000000005e-05, "loss": 101.8901, "step": 22400 }, { "epoch": 0.09053923568886177, "grad_norm": 796.65380859375, "learning_rate": 4.482e-05, "loss": 107.8212, "step": 22410 }, { "epoch": 0.09057963695422941, "grad_norm": 599.0886840820312, "learning_rate": 4.4840000000000004e-05, "loss": 112.4707, "step": 22420 }, { "epoch": 0.09062003821959703, "grad_norm": 384.7815246582031, "learning_rate": 4.486e-05, "loss": 103.7676, "step": 22430 }, { "epoch": 0.09066043948496467, "grad_norm": 0.0, "learning_rate": 4.488e-05, "loss": 83.413, "step": 22440 }, { "epoch": 0.0907008407503323, "grad_norm": 1596.82373046875, "learning_rate": 4.49e-05, "loss": 125.9112, "step": 22450 }, { "epoch": 0.09074124201569993, "grad_norm": 8902.1123046875, "learning_rate": 4.4920000000000004e-05, "loss": 158.18, "step": 22460 }, { "epoch": 0.09078164328106757, "grad_norm": 809.1474609375, "learning_rate": 4.494000000000001e-05, "loss": 129.158, "step": 22470 }, { "epoch": 0.0908220445464352, "grad_norm": 657.6831665039062, "learning_rate": 4.496e-05, "loss": 121.9886, "step": 22480 }, { "epoch": 0.09086244581180282, "grad_norm": 866.2117309570312, "learning_rate": 4.498e-05, "loss": 123.8151, "step": 22490 }, { "epoch": 0.09090284707717046, "grad_norm": 585.983642578125, "learning_rate": 4.5e-05, "loss": 74.8665, "step": 22500 }, { "epoch": 0.09094324834253809, "grad_norm": 1273.0323486328125, "learning_rate": 4.502e-05, "loss": 96.7427, "step": 22510 }, { "epoch": 0.09098364960790573, "grad_norm": 1038.0701904296875, "learning_rate": 4.504e-05, "loss": 85.5748, "step": 22520 }, { "epoch": 0.09102405087327335, "grad_norm": 1610.0634765625, "learning_rate": 4.506e-05, "loss": 92.2955, "step": 22530 }, { "epoch": 0.09106445213864098, "grad_norm": 2549.61572265625, "learning_rate": 4.508e-05, "loss": 147.1345, "step": 22540 }, { "epoch": 0.09110485340400862, "grad_norm": 931.28173828125, "learning_rate": 4.5100000000000005e-05, "loss": 82.7087, "step": 22550 }, { "epoch": 0.09114525466937624, "grad_norm": 765.2784423828125, "learning_rate": 4.512e-05, "loss": 135.216, "step": 22560 }, { "epoch": 0.09118565593474387, "grad_norm": 597.312255859375, "learning_rate": 4.5140000000000005e-05, "loss": 147.5036, "step": 22570 }, { "epoch": 0.09122605720011151, "grad_norm": 756.5013427734375, "learning_rate": 4.516e-05, "loss": 128.549, "step": 22580 }, { "epoch": 0.09126645846547914, "grad_norm": 719.6322631835938, "learning_rate": 4.518e-05, "loss": 80.9833, "step": 22590 }, { "epoch": 0.09130685973084678, "grad_norm": 721.7781982421875, "learning_rate": 4.52e-05, "loss": 144.421, "step": 22600 }, { "epoch": 0.0913472609962144, "grad_norm": 1193.63232421875, "learning_rate": 4.5220000000000004e-05, "loss": 99.2124, "step": 22610 }, { "epoch": 0.09138766226158203, "grad_norm": 886.3824462890625, "learning_rate": 4.524000000000001e-05, "loss": 117.6559, "step": 22620 }, { "epoch": 0.09142806352694967, "grad_norm": 869.9031372070312, "learning_rate": 4.5260000000000004e-05, "loss": 128.7948, "step": 22630 }, { "epoch": 0.0914684647923173, "grad_norm": 323.6915283203125, "learning_rate": 4.528e-05, "loss": 103.8211, "step": 22640 }, { "epoch": 0.09150886605768492, "grad_norm": 648.2799072265625, "learning_rate": 4.53e-05, "loss": 124.0231, "step": 22650 }, { "epoch": 0.09154926732305256, "grad_norm": 3163.934814453125, "learning_rate": 4.532e-05, "loss": 133.5165, "step": 22660 }, { "epoch": 0.09158966858842019, "grad_norm": 694.5257568359375, "learning_rate": 4.534e-05, "loss": 117.9862, "step": 22670 }, { "epoch": 0.09163006985378783, "grad_norm": 1048.819091796875, "learning_rate": 4.536e-05, "loss": 110.8099, "step": 22680 }, { "epoch": 0.09167047111915545, "grad_norm": 760.2977905273438, "learning_rate": 4.538e-05, "loss": 93.8862, "step": 22690 }, { "epoch": 0.09171087238452308, "grad_norm": 1749.94580078125, "learning_rate": 4.5400000000000006e-05, "loss": 141.0014, "step": 22700 }, { "epoch": 0.09175127364989072, "grad_norm": 0.0, "learning_rate": 4.542e-05, "loss": 100.4336, "step": 22710 }, { "epoch": 0.09179167491525834, "grad_norm": 520.8677978515625, "learning_rate": 4.5440000000000005e-05, "loss": 85.8024, "step": 22720 }, { "epoch": 0.09183207618062597, "grad_norm": 1674.249755859375, "learning_rate": 4.546e-05, "loss": 138.2154, "step": 22730 }, { "epoch": 0.09187247744599361, "grad_norm": 917.3933715820312, "learning_rate": 4.548e-05, "loss": 103.3672, "step": 22740 }, { "epoch": 0.09191287871136124, "grad_norm": 1019.2123413085938, "learning_rate": 4.55e-05, "loss": 56.5289, "step": 22750 }, { "epoch": 0.09195327997672888, "grad_norm": 2317.489501953125, "learning_rate": 4.5520000000000005e-05, "loss": 155.0266, "step": 22760 }, { "epoch": 0.0919936812420965, "grad_norm": 819.0970458984375, "learning_rate": 4.554000000000001e-05, "loss": 116.7189, "step": 22770 }, { "epoch": 0.09203408250746413, "grad_norm": 1225.271240234375, "learning_rate": 4.5560000000000004e-05, "loss": 111.6631, "step": 22780 }, { "epoch": 0.09207448377283177, "grad_norm": 1159.3927001953125, "learning_rate": 4.558e-05, "loss": 66.4148, "step": 22790 }, { "epoch": 0.0921148850381994, "grad_norm": 1292.975341796875, "learning_rate": 4.5600000000000004e-05, "loss": 105.9983, "step": 22800 }, { "epoch": 0.09215528630356702, "grad_norm": 1175.0252685546875, "learning_rate": 4.562e-05, "loss": 143.92, "step": 22810 }, { "epoch": 0.09219568756893466, "grad_norm": 759.6773681640625, "learning_rate": 4.564e-05, "loss": 90.7138, "step": 22820 }, { "epoch": 0.09223608883430229, "grad_norm": 857.7040405273438, "learning_rate": 4.566e-05, "loss": 92.1247, "step": 22830 }, { "epoch": 0.09227649009966993, "grad_norm": 1580.8248291015625, "learning_rate": 4.568e-05, "loss": 128.0727, "step": 22840 }, { "epoch": 0.09231689136503755, "grad_norm": 567.8139038085938, "learning_rate": 4.5700000000000006e-05, "loss": 97.4495, "step": 22850 }, { "epoch": 0.09235729263040518, "grad_norm": 1576.912841796875, "learning_rate": 4.572e-05, "loss": 128.2519, "step": 22860 }, { "epoch": 0.09239769389577282, "grad_norm": 716.1073608398438, "learning_rate": 4.574e-05, "loss": 94.8568, "step": 22870 }, { "epoch": 0.09243809516114045, "grad_norm": 638.1775512695312, "learning_rate": 4.576e-05, "loss": 66.2251, "step": 22880 }, { "epoch": 0.09247849642650807, "grad_norm": 2102.469482421875, "learning_rate": 4.578e-05, "loss": 128.2732, "step": 22890 }, { "epoch": 0.09251889769187571, "grad_norm": 682.6776123046875, "learning_rate": 4.58e-05, "loss": 102.548, "step": 22900 }, { "epoch": 0.09255929895724334, "grad_norm": 586.2388916015625, "learning_rate": 4.5820000000000005e-05, "loss": 148.1805, "step": 22910 }, { "epoch": 0.09259970022261098, "grad_norm": 1975.264404296875, "learning_rate": 4.584e-05, "loss": 143.5229, "step": 22920 }, { "epoch": 0.0926401014879786, "grad_norm": 758.1207885742188, "learning_rate": 4.5860000000000005e-05, "loss": 128.5342, "step": 22930 }, { "epoch": 0.09268050275334623, "grad_norm": 2084.8974609375, "learning_rate": 4.588e-05, "loss": 94.417, "step": 22940 }, { "epoch": 0.09272090401871387, "grad_norm": 830.3482055664062, "learning_rate": 4.5900000000000004e-05, "loss": 97.3431, "step": 22950 }, { "epoch": 0.0927613052840815, "grad_norm": 562.5604858398438, "learning_rate": 4.592e-05, "loss": 115.6859, "step": 22960 }, { "epoch": 0.09280170654944912, "grad_norm": 1079.0665283203125, "learning_rate": 4.594e-05, "loss": 118.9365, "step": 22970 }, { "epoch": 0.09284210781481676, "grad_norm": 1732.04638671875, "learning_rate": 4.596e-05, "loss": 86.5984, "step": 22980 }, { "epoch": 0.09288250908018439, "grad_norm": 1780.00537109375, "learning_rate": 4.5980000000000004e-05, "loss": 113.6839, "step": 22990 }, { "epoch": 0.09292291034555203, "grad_norm": 1192.3056640625, "learning_rate": 4.600000000000001e-05, "loss": 89.4596, "step": 23000 }, { "epoch": 0.09296331161091966, "grad_norm": 1134.75830078125, "learning_rate": 4.602e-05, "loss": 87.713, "step": 23010 }, { "epoch": 0.09300371287628728, "grad_norm": 1336.7357177734375, "learning_rate": 4.604e-05, "loss": 65.8475, "step": 23020 }, { "epoch": 0.09304411414165492, "grad_norm": 815.8203125, "learning_rate": 4.606e-05, "loss": 148.7902, "step": 23030 }, { "epoch": 0.09308451540702255, "grad_norm": 1082.975830078125, "learning_rate": 4.608e-05, "loss": 114.2601, "step": 23040 }, { "epoch": 0.09312491667239017, "grad_norm": 778.52587890625, "learning_rate": 4.61e-05, "loss": 284.198, "step": 23050 }, { "epoch": 0.09316531793775781, "grad_norm": 591.0042114257812, "learning_rate": 4.612e-05, "loss": 106.8146, "step": 23060 }, { "epoch": 0.09320571920312544, "grad_norm": 2606.027099609375, "learning_rate": 4.614e-05, "loss": 109.8302, "step": 23070 }, { "epoch": 0.09324612046849308, "grad_norm": 1184.9754638671875, "learning_rate": 4.6160000000000005e-05, "loss": 118.0777, "step": 23080 }, { "epoch": 0.0932865217338607, "grad_norm": 1047.3983154296875, "learning_rate": 4.618e-05, "loss": 119.2703, "step": 23090 }, { "epoch": 0.09332692299922833, "grad_norm": 522.4287719726562, "learning_rate": 4.6200000000000005e-05, "loss": 115.0015, "step": 23100 }, { "epoch": 0.09336732426459597, "grad_norm": 387.2696838378906, "learning_rate": 4.622e-05, "loss": 112.657, "step": 23110 }, { "epoch": 0.0934077255299636, "grad_norm": 680.420166015625, "learning_rate": 4.624e-05, "loss": 81.0697, "step": 23120 }, { "epoch": 0.09344812679533122, "grad_norm": 1640.8831787109375, "learning_rate": 4.626e-05, "loss": 128.7615, "step": 23130 }, { "epoch": 0.09348852806069886, "grad_norm": 666.759765625, "learning_rate": 4.6280000000000004e-05, "loss": 86.7895, "step": 23140 }, { "epoch": 0.09352892932606649, "grad_norm": 2630.477783203125, "learning_rate": 4.630000000000001e-05, "loss": 174.0495, "step": 23150 }, { "epoch": 0.09356933059143413, "grad_norm": 2171.40185546875, "learning_rate": 4.6320000000000004e-05, "loss": 126.4433, "step": 23160 }, { "epoch": 0.09360973185680176, "grad_norm": 952.8081665039062, "learning_rate": 4.634e-05, "loss": 60.8379, "step": 23170 }, { "epoch": 0.09365013312216938, "grad_norm": 1052.6181640625, "learning_rate": 4.636e-05, "loss": 155.2496, "step": 23180 }, { "epoch": 0.09369053438753702, "grad_norm": 1012.393310546875, "learning_rate": 4.638e-05, "loss": 137.2208, "step": 23190 }, { "epoch": 0.09373093565290465, "grad_norm": 1177.2757568359375, "learning_rate": 4.64e-05, "loss": 123.9954, "step": 23200 }, { "epoch": 0.09377133691827227, "grad_norm": 1205.6624755859375, "learning_rate": 4.642e-05, "loss": 133.0356, "step": 23210 }, { "epoch": 0.09381173818363991, "grad_norm": 790.0210571289062, "learning_rate": 4.644e-05, "loss": 134.6905, "step": 23220 }, { "epoch": 0.09385213944900754, "grad_norm": 1255.626953125, "learning_rate": 4.6460000000000006e-05, "loss": 182.1293, "step": 23230 }, { "epoch": 0.09389254071437518, "grad_norm": 705.6256103515625, "learning_rate": 4.648e-05, "loss": 80.2685, "step": 23240 }, { "epoch": 0.0939329419797428, "grad_norm": 1400.9781494140625, "learning_rate": 4.6500000000000005e-05, "loss": 65.442, "step": 23250 }, { "epoch": 0.09397334324511043, "grad_norm": 475.5975646972656, "learning_rate": 4.652e-05, "loss": 104.4814, "step": 23260 }, { "epoch": 0.09401374451047807, "grad_norm": 656.1465454101562, "learning_rate": 4.654e-05, "loss": 64.6497, "step": 23270 }, { "epoch": 0.0940541457758457, "grad_norm": 870.1063232421875, "learning_rate": 4.656e-05, "loss": 106.1441, "step": 23280 }, { "epoch": 0.09409454704121333, "grad_norm": 461.961181640625, "learning_rate": 4.6580000000000005e-05, "loss": 77.1417, "step": 23290 }, { "epoch": 0.09413494830658097, "grad_norm": 677.691162109375, "learning_rate": 4.660000000000001e-05, "loss": 99.122, "step": 23300 }, { "epoch": 0.09417534957194859, "grad_norm": 651.6929931640625, "learning_rate": 4.6620000000000004e-05, "loss": 80.5311, "step": 23310 }, { "epoch": 0.09421575083731623, "grad_norm": 4944.45703125, "learning_rate": 4.664e-05, "loss": 122.0651, "step": 23320 }, { "epoch": 0.09425615210268386, "grad_norm": 585.9901733398438, "learning_rate": 4.6660000000000004e-05, "loss": 105.5626, "step": 23330 }, { "epoch": 0.09429655336805148, "grad_norm": 869.3453979492188, "learning_rate": 4.668e-05, "loss": 107.5253, "step": 23340 }, { "epoch": 0.09433695463341912, "grad_norm": 1403.1729736328125, "learning_rate": 4.6700000000000003e-05, "loss": 82.5326, "step": 23350 }, { "epoch": 0.09437735589878675, "grad_norm": 1053.485595703125, "learning_rate": 4.672e-05, "loss": 110.7292, "step": 23360 }, { "epoch": 0.09441775716415438, "grad_norm": 495.44793701171875, "learning_rate": 4.674e-05, "loss": 85.2537, "step": 23370 }, { "epoch": 0.09445815842952202, "grad_norm": 1976.839599609375, "learning_rate": 4.6760000000000006e-05, "loss": 122.28, "step": 23380 }, { "epoch": 0.09449855969488964, "grad_norm": 840.1267700195312, "learning_rate": 4.678e-05, "loss": 61.5691, "step": 23390 }, { "epoch": 0.09453896096025728, "grad_norm": 1114.9774169921875, "learning_rate": 4.6800000000000006e-05, "loss": 179.376, "step": 23400 }, { "epoch": 0.09457936222562491, "grad_norm": 2026.411376953125, "learning_rate": 4.682e-05, "loss": 117.7227, "step": 23410 }, { "epoch": 0.09461976349099253, "grad_norm": 1031.648193359375, "learning_rate": 4.684e-05, "loss": 95.3115, "step": 23420 }, { "epoch": 0.09466016475636017, "grad_norm": 774.4593505859375, "learning_rate": 4.686e-05, "loss": 74.8853, "step": 23430 }, { "epoch": 0.0947005660217278, "grad_norm": 799.5897216796875, "learning_rate": 4.688e-05, "loss": 106.8804, "step": 23440 }, { "epoch": 0.09474096728709543, "grad_norm": 801.3352661132812, "learning_rate": 4.69e-05, "loss": 117.8924, "step": 23450 }, { "epoch": 0.09478136855246307, "grad_norm": 562.31494140625, "learning_rate": 4.6920000000000005e-05, "loss": 70.9862, "step": 23460 }, { "epoch": 0.09482176981783069, "grad_norm": 1088.81103515625, "learning_rate": 4.694e-05, "loss": 105.8252, "step": 23470 }, { "epoch": 0.09486217108319833, "grad_norm": 773.8255004882812, "learning_rate": 4.6960000000000004e-05, "loss": 74.9372, "step": 23480 }, { "epoch": 0.09490257234856596, "grad_norm": 2185.90771484375, "learning_rate": 4.698e-05, "loss": 137.0428, "step": 23490 }, { "epoch": 0.09494297361393358, "grad_norm": 1338.7786865234375, "learning_rate": 4.7e-05, "loss": 138.3532, "step": 23500 }, { "epoch": 0.09498337487930122, "grad_norm": 828.383544921875, "learning_rate": 4.702e-05, "loss": 103.3043, "step": 23510 }, { "epoch": 0.09502377614466885, "grad_norm": 1320.6363525390625, "learning_rate": 4.7040000000000004e-05, "loss": 112.5485, "step": 23520 }, { "epoch": 0.09506417741003648, "grad_norm": 1088.3404541015625, "learning_rate": 4.706000000000001e-05, "loss": 107.0843, "step": 23530 }, { "epoch": 0.09510457867540412, "grad_norm": 697.0968627929688, "learning_rate": 4.708e-05, "loss": 86.7037, "step": 23540 }, { "epoch": 0.09514497994077174, "grad_norm": 1503.7210693359375, "learning_rate": 4.71e-05, "loss": 113.7888, "step": 23550 }, { "epoch": 0.09518538120613938, "grad_norm": 840.5352783203125, "learning_rate": 4.712e-05, "loss": 107.902, "step": 23560 }, { "epoch": 0.09522578247150701, "grad_norm": 1638.9781494140625, "learning_rate": 4.714e-05, "loss": 102.9792, "step": 23570 }, { "epoch": 0.09526618373687464, "grad_norm": 518.2499389648438, "learning_rate": 4.716e-05, "loss": 78.3873, "step": 23580 }, { "epoch": 0.09530658500224228, "grad_norm": 837.2974853515625, "learning_rate": 4.718e-05, "loss": 71.646, "step": 23590 }, { "epoch": 0.0953469862676099, "grad_norm": 560.1998291015625, "learning_rate": 4.72e-05, "loss": 131.5562, "step": 23600 }, { "epoch": 0.09538738753297753, "grad_norm": 789.7793579101562, "learning_rate": 4.7220000000000005e-05, "loss": 95.4459, "step": 23610 }, { "epoch": 0.09542778879834517, "grad_norm": 836.2322998046875, "learning_rate": 4.724e-05, "loss": 118.2009, "step": 23620 }, { "epoch": 0.0954681900637128, "grad_norm": 1441.4818115234375, "learning_rate": 4.7260000000000005e-05, "loss": 164.6346, "step": 23630 }, { "epoch": 0.09550859132908043, "grad_norm": 965.8220825195312, "learning_rate": 4.728e-05, "loss": 109.1539, "step": 23640 }, { "epoch": 0.09554899259444806, "grad_norm": 1859.1063232421875, "learning_rate": 4.73e-05, "loss": 180.398, "step": 23650 }, { "epoch": 0.09558939385981569, "grad_norm": 823.5313110351562, "learning_rate": 4.732e-05, "loss": 95.1897, "step": 23660 }, { "epoch": 0.09562979512518333, "grad_norm": 880.113037109375, "learning_rate": 4.7340000000000004e-05, "loss": 107.1814, "step": 23670 }, { "epoch": 0.09567019639055095, "grad_norm": 1045.581298828125, "learning_rate": 4.736000000000001e-05, "loss": 151.5835, "step": 23680 }, { "epoch": 0.09571059765591858, "grad_norm": 1099.427978515625, "learning_rate": 4.7380000000000004e-05, "loss": 93.5232, "step": 23690 }, { "epoch": 0.09575099892128622, "grad_norm": 640.9761962890625, "learning_rate": 4.74e-05, "loss": 117.26, "step": 23700 }, { "epoch": 0.09579140018665384, "grad_norm": 337.58795166015625, "learning_rate": 4.742e-05, "loss": 114.4822, "step": 23710 }, { "epoch": 0.09583180145202148, "grad_norm": 1089.921875, "learning_rate": 4.744e-05, "loss": 104.9418, "step": 23720 }, { "epoch": 0.09587220271738911, "grad_norm": 2395.615234375, "learning_rate": 4.746e-05, "loss": 99.8505, "step": 23730 }, { "epoch": 0.09591260398275674, "grad_norm": 1333.4490966796875, "learning_rate": 4.748e-05, "loss": 136.7725, "step": 23740 }, { "epoch": 0.09595300524812438, "grad_norm": 1200.8509521484375, "learning_rate": 4.75e-05, "loss": 108.3065, "step": 23750 }, { "epoch": 0.095993406513492, "grad_norm": 674.5408325195312, "learning_rate": 4.7520000000000006e-05, "loss": 104.4726, "step": 23760 }, { "epoch": 0.09603380777885963, "grad_norm": 789.4915161132812, "learning_rate": 4.754e-05, "loss": 145.3513, "step": 23770 }, { "epoch": 0.09607420904422727, "grad_norm": 797.5672607421875, "learning_rate": 4.7560000000000005e-05, "loss": 99.0135, "step": 23780 }, { "epoch": 0.0961146103095949, "grad_norm": 1080.6739501953125, "learning_rate": 4.758e-05, "loss": 128.2187, "step": 23790 }, { "epoch": 0.09615501157496253, "grad_norm": 1180.974365234375, "learning_rate": 4.76e-05, "loss": 112.952, "step": 23800 }, { "epoch": 0.09619541284033016, "grad_norm": 782.664794921875, "learning_rate": 4.762e-05, "loss": 88.9206, "step": 23810 }, { "epoch": 0.09623581410569779, "grad_norm": 731.3271484375, "learning_rate": 4.7640000000000005e-05, "loss": 109.8621, "step": 23820 }, { "epoch": 0.09627621537106543, "grad_norm": 1363.64501953125, "learning_rate": 4.766000000000001e-05, "loss": 151.2704, "step": 23830 }, { "epoch": 0.09631661663643305, "grad_norm": 802.5873413085938, "learning_rate": 4.7680000000000004e-05, "loss": 126.6234, "step": 23840 }, { "epoch": 0.09635701790180068, "grad_norm": 1359.4644775390625, "learning_rate": 4.77e-05, "loss": 73.3318, "step": 23850 }, { "epoch": 0.09639741916716832, "grad_norm": 733.4736328125, "learning_rate": 4.7720000000000004e-05, "loss": 115.1329, "step": 23860 }, { "epoch": 0.09643782043253595, "grad_norm": 620.9826049804688, "learning_rate": 4.774e-05, "loss": 86.756, "step": 23870 }, { "epoch": 0.09647822169790359, "grad_norm": 508.6841125488281, "learning_rate": 4.7760000000000004e-05, "loss": 90.5046, "step": 23880 }, { "epoch": 0.09651862296327121, "grad_norm": 756.0390014648438, "learning_rate": 4.778e-05, "loss": 125.9883, "step": 23890 }, { "epoch": 0.09655902422863884, "grad_norm": 531.0929565429688, "learning_rate": 4.78e-05, "loss": 152.4438, "step": 23900 }, { "epoch": 0.09659942549400648, "grad_norm": 2243.71630859375, "learning_rate": 4.7820000000000006e-05, "loss": 155.722, "step": 23910 }, { "epoch": 0.0966398267593741, "grad_norm": 1023.2913818359375, "learning_rate": 4.784e-05, "loss": 76.3717, "step": 23920 }, { "epoch": 0.09668022802474173, "grad_norm": 896.178955078125, "learning_rate": 4.7860000000000006e-05, "loss": 114.2663, "step": 23930 }, { "epoch": 0.09672062929010937, "grad_norm": 656.4266967773438, "learning_rate": 4.788e-05, "loss": 122.8116, "step": 23940 }, { "epoch": 0.096761030555477, "grad_norm": 1333.90234375, "learning_rate": 4.79e-05, "loss": 119.1918, "step": 23950 }, { "epoch": 0.09680143182084464, "grad_norm": 929.0885620117188, "learning_rate": 4.792e-05, "loss": 101.0765, "step": 23960 }, { "epoch": 0.09684183308621226, "grad_norm": 783.88134765625, "learning_rate": 4.794e-05, "loss": 123.106, "step": 23970 }, { "epoch": 0.09688223435157989, "grad_norm": 495.6070861816406, "learning_rate": 4.796e-05, "loss": 77.067, "step": 23980 }, { "epoch": 0.09692263561694753, "grad_norm": 757.9783325195312, "learning_rate": 4.7980000000000005e-05, "loss": 108.6014, "step": 23990 }, { "epoch": 0.09696303688231515, "grad_norm": 1557.5858154296875, "learning_rate": 4.8e-05, "loss": 132.5803, "step": 24000 }, { "epoch": 0.09700343814768278, "grad_norm": 1372.1710205078125, "learning_rate": 4.8020000000000004e-05, "loss": 140.0741, "step": 24010 }, { "epoch": 0.09704383941305042, "grad_norm": 708.5230712890625, "learning_rate": 4.804e-05, "loss": 136.9571, "step": 24020 }, { "epoch": 0.09708424067841805, "grad_norm": 949.0543823242188, "learning_rate": 4.8060000000000004e-05, "loss": 119.4232, "step": 24030 }, { "epoch": 0.09712464194378569, "grad_norm": 779.013916015625, "learning_rate": 4.808e-05, "loss": 132.5883, "step": 24040 }, { "epoch": 0.09716504320915331, "grad_norm": 1494.10595703125, "learning_rate": 4.8100000000000004e-05, "loss": 101.6414, "step": 24050 }, { "epoch": 0.09720544447452094, "grad_norm": 628.3251342773438, "learning_rate": 4.812000000000001e-05, "loss": 100.0648, "step": 24060 }, { "epoch": 0.09724584573988858, "grad_norm": 518.7921752929688, "learning_rate": 4.814e-05, "loss": 55.6692, "step": 24070 }, { "epoch": 0.0972862470052562, "grad_norm": 1588.790771484375, "learning_rate": 4.816e-05, "loss": 133.6212, "step": 24080 }, { "epoch": 0.09732664827062383, "grad_norm": 843.473388671875, "learning_rate": 4.818e-05, "loss": 87.3461, "step": 24090 }, { "epoch": 0.09736704953599147, "grad_norm": 659.4796752929688, "learning_rate": 4.82e-05, "loss": 117.1845, "step": 24100 }, { "epoch": 0.0974074508013591, "grad_norm": 584.1058959960938, "learning_rate": 4.822e-05, "loss": 103.56, "step": 24110 }, { "epoch": 0.09744785206672674, "grad_norm": 1724.538330078125, "learning_rate": 4.824e-05, "loss": 89.6239, "step": 24120 }, { "epoch": 0.09748825333209436, "grad_norm": 693.462646484375, "learning_rate": 4.826e-05, "loss": 93.3504, "step": 24130 }, { "epoch": 0.09752865459746199, "grad_norm": 1428.2559814453125, "learning_rate": 4.8280000000000005e-05, "loss": 112.4596, "step": 24140 }, { "epoch": 0.09756905586282963, "grad_norm": 1738.0125732421875, "learning_rate": 4.83e-05, "loss": 107.2036, "step": 24150 }, { "epoch": 0.09760945712819726, "grad_norm": 590.5738525390625, "learning_rate": 4.8320000000000005e-05, "loss": 95.526, "step": 24160 }, { "epoch": 0.09764985839356488, "grad_norm": 1145.53515625, "learning_rate": 4.834e-05, "loss": 82.7261, "step": 24170 }, { "epoch": 0.09769025965893252, "grad_norm": 1240.5450439453125, "learning_rate": 4.836e-05, "loss": 138.7817, "step": 24180 }, { "epoch": 0.09773066092430015, "grad_norm": 1606.5479736328125, "learning_rate": 4.838e-05, "loss": 101.9048, "step": 24190 }, { "epoch": 0.09777106218966779, "grad_norm": 1542.466796875, "learning_rate": 4.8400000000000004e-05, "loss": 106.2202, "step": 24200 }, { "epoch": 0.09781146345503541, "grad_norm": 889.3588256835938, "learning_rate": 4.842000000000001e-05, "loss": 144.4749, "step": 24210 }, { "epoch": 0.09785186472040304, "grad_norm": 639.1673583984375, "learning_rate": 4.8440000000000004e-05, "loss": 103.4213, "step": 24220 }, { "epoch": 0.09789226598577068, "grad_norm": 3171.987060546875, "learning_rate": 4.846e-05, "loss": 156.6668, "step": 24230 }, { "epoch": 0.0979326672511383, "grad_norm": 709.1254272460938, "learning_rate": 4.8480000000000003e-05, "loss": 80.9064, "step": 24240 }, { "epoch": 0.09797306851650593, "grad_norm": 561.7057495117188, "learning_rate": 4.85e-05, "loss": 171.3163, "step": 24250 }, { "epoch": 0.09801346978187357, "grad_norm": 929.6841430664062, "learning_rate": 4.852e-05, "loss": 157.7511, "step": 24260 }, { "epoch": 0.0980538710472412, "grad_norm": 1814.5821533203125, "learning_rate": 4.854e-05, "loss": 91.1438, "step": 24270 }, { "epoch": 0.09809427231260884, "grad_norm": 4481.90673828125, "learning_rate": 4.856e-05, "loss": 116.3378, "step": 24280 }, { "epoch": 0.09813467357797646, "grad_norm": 1093.2393798828125, "learning_rate": 4.8580000000000006e-05, "loss": 95.4637, "step": 24290 }, { "epoch": 0.09817507484334409, "grad_norm": 559.2593383789062, "learning_rate": 4.86e-05, "loss": 80.5582, "step": 24300 }, { "epoch": 0.09821547610871173, "grad_norm": 538.036865234375, "learning_rate": 4.8620000000000005e-05, "loss": 91.296, "step": 24310 }, { "epoch": 0.09825587737407936, "grad_norm": 1021.3819580078125, "learning_rate": 4.864e-05, "loss": 96.4481, "step": 24320 }, { "epoch": 0.09829627863944698, "grad_norm": 905.1778564453125, "learning_rate": 4.866e-05, "loss": 93.2619, "step": 24330 }, { "epoch": 0.09833667990481462, "grad_norm": 604.1958618164062, "learning_rate": 4.868e-05, "loss": 62.2251, "step": 24340 }, { "epoch": 0.09837708117018225, "grad_norm": 707.2637939453125, "learning_rate": 4.87e-05, "loss": 81.4563, "step": 24350 }, { "epoch": 0.09841748243554987, "grad_norm": 1935.6922607421875, "learning_rate": 4.872000000000001e-05, "loss": 150.9556, "step": 24360 }, { "epoch": 0.09845788370091751, "grad_norm": 529.8268432617188, "learning_rate": 4.8740000000000004e-05, "loss": 95.0914, "step": 24370 }, { "epoch": 0.09849828496628514, "grad_norm": 1129.89599609375, "learning_rate": 4.876e-05, "loss": 128.5505, "step": 24380 }, { "epoch": 0.09853868623165278, "grad_norm": 2489.7001953125, "learning_rate": 4.8780000000000004e-05, "loss": 138.6563, "step": 24390 }, { "epoch": 0.0985790874970204, "grad_norm": 1026.3582763671875, "learning_rate": 4.88e-05, "loss": 126.4132, "step": 24400 }, { "epoch": 0.09861948876238803, "grad_norm": 2506.63916015625, "learning_rate": 4.8820000000000004e-05, "loss": 151.2192, "step": 24410 }, { "epoch": 0.09865989002775567, "grad_norm": 1200.955810546875, "learning_rate": 4.884e-05, "loss": 131.9323, "step": 24420 }, { "epoch": 0.0987002912931233, "grad_norm": 888.474365234375, "learning_rate": 4.886e-05, "loss": 80.7957, "step": 24430 }, { "epoch": 0.09874069255849093, "grad_norm": 853.1484375, "learning_rate": 4.8880000000000006e-05, "loss": 152.3066, "step": 24440 }, { "epoch": 0.09878109382385857, "grad_norm": 931.940673828125, "learning_rate": 4.89e-05, "loss": 124.5411, "step": 24450 }, { "epoch": 0.09882149508922619, "grad_norm": 755.48828125, "learning_rate": 4.8920000000000006e-05, "loss": 113.8801, "step": 24460 }, { "epoch": 0.09886189635459383, "grad_norm": 1995.7860107421875, "learning_rate": 4.894e-05, "loss": 101.5088, "step": 24470 }, { "epoch": 0.09890229761996146, "grad_norm": 645.9288940429688, "learning_rate": 4.896e-05, "loss": 75.8798, "step": 24480 }, { "epoch": 0.09894269888532908, "grad_norm": 1073.67626953125, "learning_rate": 4.898e-05, "loss": 161.3777, "step": 24490 }, { "epoch": 0.09898310015069672, "grad_norm": 1047.078125, "learning_rate": 4.9e-05, "loss": 136.8098, "step": 24500 }, { "epoch": 0.09902350141606435, "grad_norm": 1193.5416259765625, "learning_rate": 4.902e-05, "loss": 112.7242, "step": 24510 }, { "epoch": 0.09906390268143198, "grad_norm": 2082.607421875, "learning_rate": 4.9040000000000005e-05, "loss": 104.1718, "step": 24520 }, { "epoch": 0.09910430394679962, "grad_norm": 657.3272705078125, "learning_rate": 4.906e-05, "loss": 94.1763, "step": 24530 }, { "epoch": 0.09914470521216724, "grad_norm": 822.24462890625, "learning_rate": 4.9080000000000004e-05, "loss": 120.4415, "step": 24540 }, { "epoch": 0.09918510647753488, "grad_norm": 1214.71142578125, "learning_rate": 4.91e-05, "loss": 95.7002, "step": 24550 }, { "epoch": 0.09922550774290251, "grad_norm": 974.8594360351562, "learning_rate": 4.9120000000000004e-05, "loss": 122.8078, "step": 24560 }, { "epoch": 0.09926590900827013, "grad_norm": 2204.4921875, "learning_rate": 4.914e-05, "loss": 181.9215, "step": 24570 }, { "epoch": 0.09930631027363777, "grad_norm": 667.4011840820312, "learning_rate": 4.9160000000000004e-05, "loss": 80.917, "step": 24580 }, { "epoch": 0.0993467115390054, "grad_norm": 896.2610473632812, "learning_rate": 4.918000000000001e-05, "loss": 144.0085, "step": 24590 }, { "epoch": 0.09938711280437303, "grad_norm": 0.0, "learning_rate": 4.92e-05, "loss": 132.3478, "step": 24600 }, { "epoch": 0.09942751406974067, "grad_norm": 941.837646484375, "learning_rate": 4.9220000000000006e-05, "loss": 98.3554, "step": 24610 }, { "epoch": 0.09946791533510829, "grad_norm": 975.3572387695312, "learning_rate": 4.924e-05, "loss": 71.2832, "step": 24620 }, { "epoch": 0.09950831660047593, "grad_norm": 467.11871337890625, "learning_rate": 4.926e-05, "loss": 84.4188, "step": 24630 }, { "epoch": 0.09954871786584356, "grad_norm": 1425.18603515625, "learning_rate": 4.928e-05, "loss": 117.7631, "step": 24640 }, { "epoch": 0.09958911913121118, "grad_norm": 992.929443359375, "learning_rate": 4.93e-05, "loss": 74.3671, "step": 24650 }, { "epoch": 0.09962952039657882, "grad_norm": 1589.87646484375, "learning_rate": 4.932e-05, "loss": 127.9026, "step": 24660 }, { "epoch": 0.09966992166194645, "grad_norm": 726.9409790039062, "learning_rate": 4.9340000000000005e-05, "loss": 127.6771, "step": 24670 }, { "epoch": 0.09971032292731408, "grad_norm": 1687.093017578125, "learning_rate": 4.936e-05, "loss": 112.6813, "step": 24680 }, { "epoch": 0.09975072419268172, "grad_norm": 891.7119140625, "learning_rate": 4.9380000000000005e-05, "loss": 131.5728, "step": 24690 }, { "epoch": 0.09979112545804934, "grad_norm": 601.4475708007812, "learning_rate": 4.94e-05, "loss": 80.0845, "step": 24700 }, { "epoch": 0.09983152672341698, "grad_norm": 1217.7177734375, "learning_rate": 4.942e-05, "loss": 108.1615, "step": 24710 }, { "epoch": 0.09987192798878461, "grad_norm": 1115.55517578125, "learning_rate": 4.944e-05, "loss": 86.1, "step": 24720 }, { "epoch": 0.09991232925415224, "grad_norm": 623.4889526367188, "learning_rate": 4.946e-05, "loss": 99.014, "step": 24730 }, { "epoch": 0.09995273051951988, "grad_norm": 3404.5966796875, "learning_rate": 4.948000000000001e-05, "loss": 111.524, "step": 24740 }, { "epoch": 0.0999931317848875, "grad_norm": 1838.208251953125, "learning_rate": 4.9500000000000004e-05, "loss": 162.1484, "step": 24750 }, { "epoch": 0.10003353305025513, "grad_norm": 371.3464660644531, "learning_rate": 4.952e-05, "loss": 85.2928, "step": 24760 }, { "epoch": 0.10007393431562277, "grad_norm": 718.6541748046875, "learning_rate": 4.9540000000000003e-05, "loss": 86.9755, "step": 24770 }, { "epoch": 0.1001143355809904, "grad_norm": 1240.49169921875, "learning_rate": 4.956e-05, "loss": 72.236, "step": 24780 }, { "epoch": 0.10015473684635803, "grad_norm": 2078.4365234375, "learning_rate": 4.958e-05, "loss": 130.2423, "step": 24790 }, { "epoch": 0.10019513811172566, "grad_norm": 1719.6787109375, "learning_rate": 4.96e-05, "loss": 147.8617, "step": 24800 }, { "epoch": 0.10023553937709329, "grad_norm": 1234.83203125, "learning_rate": 4.962e-05, "loss": 116.7895, "step": 24810 }, { "epoch": 0.10027594064246093, "grad_norm": 1845.6007080078125, "learning_rate": 4.9640000000000006e-05, "loss": 123.7702, "step": 24820 }, { "epoch": 0.10031634190782855, "grad_norm": 1477.3031005859375, "learning_rate": 4.966e-05, "loss": 175.7533, "step": 24830 }, { "epoch": 0.10035674317319618, "grad_norm": 753.8565063476562, "learning_rate": 4.9680000000000005e-05, "loss": 137.9543, "step": 24840 }, { "epoch": 0.10039714443856382, "grad_norm": 458.0389709472656, "learning_rate": 4.97e-05, "loss": 99.559, "step": 24850 }, { "epoch": 0.10043754570393144, "grad_norm": 1369.2882080078125, "learning_rate": 4.972e-05, "loss": 151.6343, "step": 24860 }, { "epoch": 0.10047794696929908, "grad_norm": 2486.626220703125, "learning_rate": 4.974e-05, "loss": 118.1052, "step": 24870 }, { "epoch": 0.10051834823466671, "grad_norm": 1189.0513916015625, "learning_rate": 4.976e-05, "loss": 115.5461, "step": 24880 }, { "epoch": 0.10055874950003434, "grad_norm": 3800.11083984375, "learning_rate": 4.978e-05, "loss": 113.4657, "step": 24890 }, { "epoch": 0.10059915076540198, "grad_norm": 588.5828857421875, "learning_rate": 4.9800000000000004e-05, "loss": 116.0871, "step": 24900 }, { "epoch": 0.1006395520307696, "grad_norm": 748.7584228515625, "learning_rate": 4.982e-05, "loss": 96.397, "step": 24910 }, { "epoch": 0.10067995329613723, "grad_norm": 883.9682006835938, "learning_rate": 4.9840000000000004e-05, "loss": 73.1233, "step": 24920 }, { "epoch": 0.10072035456150487, "grad_norm": 747.5608520507812, "learning_rate": 4.986e-05, "loss": 101.5735, "step": 24930 }, { "epoch": 0.1007607558268725, "grad_norm": 1152.760009765625, "learning_rate": 4.9880000000000004e-05, "loss": 106.2813, "step": 24940 }, { "epoch": 0.10080115709224013, "grad_norm": 1556.455322265625, "learning_rate": 4.99e-05, "loss": 92.2315, "step": 24950 }, { "epoch": 0.10084155835760776, "grad_norm": 564.4570922851562, "learning_rate": 4.992e-05, "loss": 105.7234, "step": 24960 }, { "epoch": 0.10088195962297539, "grad_norm": 582.0642700195312, "learning_rate": 4.9940000000000006e-05, "loss": 85.4253, "step": 24970 }, { "epoch": 0.10092236088834303, "grad_norm": 2132.204833984375, "learning_rate": 4.996e-05, "loss": 145.5456, "step": 24980 }, { "epoch": 0.10096276215371065, "grad_norm": 1085.25048828125, "learning_rate": 4.9980000000000006e-05, "loss": 105.3973, "step": 24990 }, { "epoch": 0.10100316341907828, "grad_norm": 482.8312072753906, "learning_rate": 5e-05, "loss": 107.6187, "step": 25000 }, { "epoch": 0.10104356468444592, "grad_norm": 12273.1533203125, "learning_rate": 4.999999975630607e-05, "loss": 166.6157, "step": 25010 }, { "epoch": 0.10108396594981355, "grad_norm": 1100.8514404296875, "learning_rate": 4.999999902522426e-05, "loss": 170.2824, "step": 25020 }, { "epoch": 0.10112436721518119, "grad_norm": 1492.0712890625, "learning_rate": 4.9999997806754614e-05, "loss": 105.2635, "step": 25030 }, { "epoch": 0.10116476848054881, "grad_norm": 2506.78173828125, "learning_rate": 4.9999996100897126e-05, "loss": 76.4485, "step": 25040 }, { "epoch": 0.10120516974591644, "grad_norm": 2876.141845703125, "learning_rate": 4.999999390765185e-05, "loss": 115.6973, "step": 25050 }, { "epoch": 0.10124557101128408, "grad_norm": 1318.9537353515625, "learning_rate": 4.999999122701883e-05, "loss": 120.0407, "step": 25060 }, { "epoch": 0.1012859722766517, "grad_norm": 598.3362426757812, "learning_rate": 4.99999880589981e-05, "loss": 99.9749, "step": 25070 }, { "epoch": 0.10132637354201933, "grad_norm": 680.303466796875, "learning_rate": 4.999998440358973e-05, "loss": 86.1489, "step": 25080 }, { "epoch": 0.10136677480738697, "grad_norm": 1665.12548828125, "learning_rate": 4.99999802607938e-05, "loss": 125.203, "step": 25090 }, { "epoch": 0.1014071760727546, "grad_norm": 4897.3798828125, "learning_rate": 4.999997563061038e-05, "loss": 99.004, "step": 25100 }, { "epoch": 0.10144757733812224, "grad_norm": 561.159423828125, "learning_rate": 4.999997051303956e-05, "loss": 128.6623, "step": 25110 }, { "epoch": 0.10148797860348986, "grad_norm": 407.36114501953125, "learning_rate": 4.9999964908081455e-05, "loss": 107.5606, "step": 25120 }, { "epoch": 0.10152837986885749, "grad_norm": 602.0693359375, "learning_rate": 4.999995881573616e-05, "loss": 99.1402, "step": 25130 }, { "epoch": 0.10156878113422513, "grad_norm": 428.0013427734375, "learning_rate": 4.999995223600379e-05, "loss": 176.0661, "step": 25140 }, { "epoch": 0.10160918239959275, "grad_norm": 1730.8555908203125, "learning_rate": 4.999994516888449e-05, "loss": 143.4497, "step": 25150 }, { "epoch": 0.10164958366496038, "grad_norm": 1302.3773193359375, "learning_rate": 4.999993761437838e-05, "loss": 137.5996, "step": 25160 }, { "epoch": 0.10168998493032802, "grad_norm": 3554.90771484375, "learning_rate": 4.9999929572485616e-05, "loss": 139.0566, "step": 25170 }, { "epoch": 0.10173038619569565, "grad_norm": 1487.5889892578125, "learning_rate": 4.999992104320636e-05, "loss": 108.4844, "step": 25180 }, { "epoch": 0.10177078746106329, "grad_norm": 1060.8238525390625, "learning_rate": 4.999991202654076e-05, "loss": 110.4729, "step": 25190 }, { "epoch": 0.10181118872643091, "grad_norm": 899.1284790039062, "learning_rate": 4.9999902522489015e-05, "loss": 117.7502, "step": 25200 }, { "epoch": 0.10185158999179854, "grad_norm": 824.8314819335938, "learning_rate": 4.99998925310513e-05, "loss": 96.4199, "step": 25210 }, { "epoch": 0.10189199125716618, "grad_norm": 729.9413452148438, "learning_rate": 4.999988205222781e-05, "loss": 58.8153, "step": 25220 }, { "epoch": 0.1019323925225338, "grad_norm": 1475.010498046875, "learning_rate": 4.999987108601874e-05, "loss": 118.004, "step": 25230 }, { "epoch": 0.10197279378790143, "grad_norm": 3924.0517578125, "learning_rate": 4.999985963242432e-05, "loss": 161.4323, "step": 25240 }, { "epoch": 0.10201319505326907, "grad_norm": 684.8850708007812, "learning_rate": 4.999984769144476e-05, "loss": 93.1785, "step": 25250 }, { "epoch": 0.1020535963186367, "grad_norm": 617.2882690429688, "learning_rate": 4.99998352630803e-05, "loss": 108.2454, "step": 25260 }, { "epoch": 0.10209399758400434, "grad_norm": 713.0025634765625, "learning_rate": 4.999982234733118e-05, "loss": 128.2708, "step": 25270 }, { "epoch": 0.10213439884937196, "grad_norm": 830.2299194335938, "learning_rate": 4.9999808944197666e-05, "loss": 93.2193, "step": 25280 }, { "epoch": 0.10217480011473959, "grad_norm": 635.0062255859375, "learning_rate": 4.999979505367999e-05, "loss": 95.8657, "step": 25290 }, { "epoch": 0.10221520138010723, "grad_norm": 681.8064575195312, "learning_rate": 4.999978067577844e-05, "loss": 81.1595, "step": 25300 }, { "epoch": 0.10225560264547486, "grad_norm": 1729.1885986328125, "learning_rate": 4.999976581049331e-05, "loss": 104.3257, "step": 25310 }, { "epoch": 0.10229600391084248, "grad_norm": 499.2225341796875, "learning_rate": 4.999975045782486e-05, "loss": 150.2334, "step": 25320 }, { "epoch": 0.10233640517621012, "grad_norm": 1919.6650390625, "learning_rate": 4.9999734617773405e-05, "loss": 93.5073, "step": 25330 }, { "epoch": 0.10237680644157775, "grad_norm": 628.6290283203125, "learning_rate": 4.9999718290339256e-05, "loss": 138.5961, "step": 25340 }, { "epoch": 0.10241720770694539, "grad_norm": 877.5479125976562, "learning_rate": 4.999970147552272e-05, "loss": 106.7546, "step": 25350 }, { "epoch": 0.10245760897231301, "grad_norm": 426.84027099609375, "learning_rate": 4.999968417332415e-05, "loss": 102.9967, "step": 25360 }, { "epoch": 0.10249801023768064, "grad_norm": 1005.803466796875, "learning_rate": 4.9999666383743854e-05, "loss": 133.2389, "step": 25370 }, { "epoch": 0.10253841150304828, "grad_norm": 973.8045654296875, "learning_rate": 4.999964810678219e-05, "loss": 127.7309, "step": 25380 }, { "epoch": 0.1025788127684159, "grad_norm": 1207.312255859375, "learning_rate": 4.9999629342439524e-05, "loss": 91.1231, "step": 25390 }, { "epoch": 0.10261921403378353, "grad_norm": 2072.83837890625, "learning_rate": 4.999961009071621e-05, "loss": 154.6031, "step": 25400 }, { "epoch": 0.10265961529915117, "grad_norm": 547.6760864257812, "learning_rate": 4.999959035161263e-05, "loss": 112.7586, "step": 25410 }, { "epoch": 0.1027000165645188, "grad_norm": 784.879150390625, "learning_rate": 4.999957012512916e-05, "loss": 104.8253, "step": 25420 }, { "epoch": 0.10274041782988644, "grad_norm": 625.8154296875, "learning_rate": 4.99995494112662e-05, "loss": 80.9909, "step": 25430 }, { "epoch": 0.10278081909525406, "grad_norm": 976.3950805664062, "learning_rate": 4.999952821002415e-05, "loss": 121.5834, "step": 25440 }, { "epoch": 0.10282122036062169, "grad_norm": 658.44140625, "learning_rate": 4.999950652140343e-05, "loss": 152.7022, "step": 25450 }, { "epoch": 0.10286162162598933, "grad_norm": 948.6815795898438, "learning_rate": 4.999948434540446e-05, "loss": 119.2311, "step": 25460 }, { "epoch": 0.10290202289135696, "grad_norm": 2785.1484375, "learning_rate": 4.999946168202767e-05, "loss": 123.5097, "step": 25470 }, { "epoch": 0.10294242415672458, "grad_norm": 2336.2509765625, "learning_rate": 4.999943853127351e-05, "loss": 92.8932, "step": 25480 }, { "epoch": 0.10298282542209222, "grad_norm": 324.7106018066406, "learning_rate": 4.9999414893142425e-05, "loss": 84.0734, "step": 25490 }, { "epoch": 0.10302322668745985, "grad_norm": 1529.5069580078125, "learning_rate": 4.999939076763487e-05, "loss": 151.1412, "step": 25500 }, { "epoch": 0.10306362795282749, "grad_norm": 3243.043701171875, "learning_rate": 4.999936615475133e-05, "loss": 98.3063, "step": 25510 }, { "epoch": 0.10310402921819511, "grad_norm": 656.1243286132812, "learning_rate": 4.9999341054492265e-05, "loss": 132.48, "step": 25520 }, { "epoch": 0.10314443048356274, "grad_norm": 358.87744140625, "learning_rate": 4.999931546685819e-05, "loss": 160.6029, "step": 25530 }, { "epoch": 0.10318483174893038, "grad_norm": 1219.5975341796875, "learning_rate": 4.999928939184958e-05, "loss": 122.454, "step": 25540 }, { "epoch": 0.103225233014298, "grad_norm": 1476.14208984375, "learning_rate": 4.999926282946695e-05, "loss": 121.1642, "step": 25550 }, { "epoch": 0.10326563427966563, "grad_norm": 737.3023071289062, "learning_rate": 4.9999235779710826e-05, "loss": 68.9884, "step": 25560 }, { "epoch": 0.10330603554503327, "grad_norm": 0.0, "learning_rate": 4.999920824258173e-05, "loss": 70.6594, "step": 25570 }, { "epoch": 0.1033464368104009, "grad_norm": 585.1388549804688, "learning_rate": 4.999918021808019e-05, "loss": 82.4569, "step": 25580 }, { "epoch": 0.10338683807576854, "grad_norm": 1436.7415771484375, "learning_rate": 4.999915170620677e-05, "loss": 135.1959, "step": 25590 }, { "epoch": 0.10342723934113617, "grad_norm": 1880.6636962890625, "learning_rate": 4.999912270696202e-05, "loss": 110.9196, "step": 25600 }, { "epoch": 0.10346764060650379, "grad_norm": 435.46234130859375, "learning_rate": 4.9999093220346495e-05, "loss": 101.5219, "step": 25610 }, { "epoch": 0.10350804187187143, "grad_norm": 1327.9521484375, "learning_rate": 4.9999063246360786e-05, "loss": 92.5202, "step": 25620 }, { "epoch": 0.10354844313723906, "grad_norm": 1072.19775390625, "learning_rate": 4.9999032785005464e-05, "loss": 104.1919, "step": 25630 }, { "epoch": 0.10358884440260668, "grad_norm": 1694.939697265625, "learning_rate": 4.999900183628112e-05, "loss": 165.591, "step": 25640 }, { "epoch": 0.10362924566797432, "grad_norm": 773.9560546875, "learning_rate": 4.999897040018837e-05, "loss": 104.817, "step": 25650 }, { "epoch": 0.10366964693334195, "grad_norm": 1010.9199829101562, "learning_rate": 4.9998938476727826e-05, "loss": 114.004, "step": 25660 }, { "epoch": 0.10371004819870959, "grad_norm": 2661.3837890625, "learning_rate": 4.99989060659001e-05, "loss": 99.0087, "step": 25670 }, { "epoch": 0.10375044946407722, "grad_norm": 553.9918823242188, "learning_rate": 4.999887316770584e-05, "loss": 72.9581, "step": 25680 }, { "epoch": 0.10379085072944484, "grad_norm": 580.9917602539062, "learning_rate": 4.999883978214567e-05, "loss": 103.5152, "step": 25690 }, { "epoch": 0.10383125199481248, "grad_norm": 4983.98974609375, "learning_rate": 4.999880590922025e-05, "loss": 135.9269, "step": 25700 }, { "epoch": 0.10387165326018011, "grad_norm": 1303.5921630859375, "learning_rate": 4.999877154893023e-05, "loss": 125.694, "step": 25710 }, { "epoch": 0.10391205452554773, "grad_norm": 416.0110778808594, "learning_rate": 4.9998736701276295e-05, "loss": 61.3954, "step": 25720 }, { "epoch": 0.10395245579091537, "grad_norm": 1457.8369140625, "learning_rate": 4.999870136625912e-05, "loss": 148.0761, "step": 25730 }, { "epoch": 0.103992857056283, "grad_norm": 758.3458251953125, "learning_rate": 4.999866554387939e-05, "loss": 94.0594, "step": 25740 }, { "epoch": 0.10403325832165064, "grad_norm": 1217.02978515625, "learning_rate": 4.999862923413781e-05, "loss": 101.6771, "step": 25750 }, { "epoch": 0.10407365958701827, "grad_norm": 1469.3179931640625, "learning_rate": 4.9998592437035076e-05, "loss": 119.159, "step": 25760 }, { "epoch": 0.10411406085238589, "grad_norm": 2354.025390625, "learning_rate": 4.9998555152571914e-05, "loss": 115.2768, "step": 25770 }, { "epoch": 0.10415446211775353, "grad_norm": 789.3027954101562, "learning_rate": 4.999851738074904e-05, "loss": 73.4068, "step": 25780 }, { "epoch": 0.10419486338312116, "grad_norm": 841.7200317382812, "learning_rate": 4.9998479121567214e-05, "loss": 134.448, "step": 25790 }, { "epoch": 0.10423526464848878, "grad_norm": 431.4704895019531, "learning_rate": 4.9998440375027166e-05, "loss": 150.1897, "step": 25800 }, { "epoch": 0.10427566591385642, "grad_norm": 508.6786804199219, "learning_rate": 4.999840114112965e-05, "loss": 82.7051, "step": 25810 }, { "epoch": 0.10431606717922405, "grad_norm": 818.539794921875, "learning_rate": 4.999836141987543e-05, "loss": 138.6107, "step": 25820 }, { "epoch": 0.10435646844459169, "grad_norm": 503.4788513183594, "learning_rate": 4.999832121126529e-05, "loss": 119.86, "step": 25830 }, { "epoch": 0.10439686970995932, "grad_norm": 838.92041015625, "learning_rate": 4.99982805153e-05, "loss": 121.616, "step": 25840 }, { "epoch": 0.10443727097532694, "grad_norm": 631.1499633789062, "learning_rate": 4.9998239331980366e-05, "loss": 85.7237, "step": 25850 }, { "epoch": 0.10447767224069458, "grad_norm": 566.1675415039062, "learning_rate": 4.999819766130719e-05, "loss": 91.9205, "step": 25860 }, { "epoch": 0.10451807350606221, "grad_norm": 0.0, "learning_rate": 4.999815550328128e-05, "loss": 88.1384, "step": 25870 }, { "epoch": 0.10455847477142984, "grad_norm": 735.2154541015625, "learning_rate": 4.9998112857903454e-05, "loss": 114.0636, "step": 25880 }, { "epoch": 0.10459887603679748, "grad_norm": 1965.4488525390625, "learning_rate": 4.9998069725174546e-05, "loss": 117.463, "step": 25890 }, { "epoch": 0.1046392773021651, "grad_norm": 954.0660400390625, "learning_rate": 4.9998026105095405e-05, "loss": 110.6166, "step": 25900 }, { "epoch": 0.10467967856753274, "grad_norm": 2468.442626953125, "learning_rate": 4.9997981997666874e-05, "loss": 118.2137, "step": 25910 }, { "epoch": 0.10472007983290037, "grad_norm": 1254.0142822265625, "learning_rate": 4.999793740288982e-05, "loss": 74.752, "step": 25920 }, { "epoch": 0.104760481098268, "grad_norm": 625.595458984375, "learning_rate": 4.999789232076509e-05, "loss": 89.1264, "step": 25930 }, { "epoch": 0.10480088236363563, "grad_norm": 579.2570190429688, "learning_rate": 4.999784675129359e-05, "loss": 128.8163, "step": 25940 }, { "epoch": 0.10484128362900326, "grad_norm": 4238.0439453125, "learning_rate": 4.999780069447619e-05, "loss": 135.1213, "step": 25950 }, { "epoch": 0.10488168489437089, "grad_norm": 2267.60693359375, "learning_rate": 4.9997754150313815e-05, "loss": 106.0803, "step": 25960 }, { "epoch": 0.10492208615973853, "grad_norm": 1275.7972412109375, "learning_rate": 4.999770711880734e-05, "loss": 114.9827, "step": 25970 }, { "epoch": 0.10496248742510615, "grad_norm": 1259.9520263671875, "learning_rate": 4.999765959995769e-05, "loss": 122.4034, "step": 25980 }, { "epoch": 0.10500288869047379, "grad_norm": 1249.4486083984375, "learning_rate": 4.99976115937658e-05, "loss": 113.4047, "step": 25990 }, { "epoch": 0.10504328995584142, "grad_norm": 829.5191650390625, "learning_rate": 4.999756310023261e-05, "loss": 100.0737, "step": 26000 }, { "epoch": 0.10508369122120904, "grad_norm": 1288.53515625, "learning_rate": 4.999751411935905e-05, "loss": 86.3596, "step": 26010 }, { "epoch": 0.10512409248657668, "grad_norm": 1140.1302490234375, "learning_rate": 4.999746465114609e-05, "loss": 62.8034, "step": 26020 }, { "epoch": 0.10516449375194431, "grad_norm": 694.5878295898438, "learning_rate": 4.999741469559468e-05, "loss": 92.5841, "step": 26030 }, { "epoch": 0.10520489501731194, "grad_norm": 1500.5892333984375, "learning_rate": 4.99973642527058e-05, "loss": 110.6927, "step": 26040 }, { "epoch": 0.10524529628267958, "grad_norm": 874.5784301757812, "learning_rate": 4.999731332248044e-05, "loss": 63.3448, "step": 26050 }, { "epoch": 0.1052856975480472, "grad_norm": 932.64501953125, "learning_rate": 4.999726190491958e-05, "loss": 107.8189, "step": 26060 }, { "epoch": 0.10532609881341484, "grad_norm": 736.0410766601562, "learning_rate": 4.9997210000024236e-05, "loss": 99.0618, "step": 26070 }, { "epoch": 0.10536650007878247, "grad_norm": 1410.8321533203125, "learning_rate": 4.999715760779541e-05, "loss": 126.3873, "step": 26080 }, { "epoch": 0.1054069013441501, "grad_norm": 1724.4503173828125, "learning_rate": 4.999710472823414e-05, "loss": 104.2871, "step": 26090 }, { "epoch": 0.10544730260951773, "grad_norm": 881.2642822265625, "learning_rate": 4.9997051361341425e-05, "loss": 67.8849, "step": 26100 }, { "epoch": 0.10548770387488536, "grad_norm": 1165.669921875, "learning_rate": 4.999699750711833e-05, "loss": 99.132, "step": 26110 }, { "epoch": 0.10552810514025299, "grad_norm": 994.9390869140625, "learning_rate": 4.9996943165565905e-05, "loss": 62.6341, "step": 26120 }, { "epoch": 0.10556850640562063, "grad_norm": 1422.3941650390625, "learning_rate": 4.99968883366852e-05, "loss": 85.9586, "step": 26130 }, { "epoch": 0.10560890767098825, "grad_norm": 791.3880615234375, "learning_rate": 4.9996833020477285e-05, "loss": 76.5841, "step": 26140 }, { "epoch": 0.10564930893635589, "grad_norm": 1224.0389404296875, "learning_rate": 4.999677721694325e-05, "loss": 161.3394, "step": 26150 }, { "epoch": 0.10568971020172352, "grad_norm": 562.48486328125, "learning_rate": 4.9996720926084164e-05, "loss": 117.8859, "step": 26160 }, { "epoch": 0.10573011146709115, "grad_norm": 1088.6715087890625, "learning_rate": 4.999666414790113e-05, "loss": 119.7692, "step": 26170 }, { "epoch": 0.10577051273245879, "grad_norm": 716.9673461914062, "learning_rate": 4.999660688239527e-05, "loss": 85.0265, "step": 26180 }, { "epoch": 0.10581091399782641, "grad_norm": 469.45477294921875, "learning_rate": 4.999654912956769e-05, "loss": 75.7188, "step": 26190 }, { "epoch": 0.10585131526319404, "grad_norm": 434.962890625, "learning_rate": 4.9996490889419514e-05, "loss": 92.6741, "step": 26200 }, { "epoch": 0.10589171652856168, "grad_norm": 1787.8798828125, "learning_rate": 4.9996432161951875e-05, "loss": 101.0907, "step": 26210 }, { "epoch": 0.1059321177939293, "grad_norm": 619.8673706054688, "learning_rate": 4.999637294716593e-05, "loss": 144.3787, "step": 26220 }, { "epoch": 0.10597251905929694, "grad_norm": 1517.8831787109375, "learning_rate": 4.9996313245062823e-05, "loss": 106.3331, "step": 26230 }, { "epoch": 0.10601292032466457, "grad_norm": 7921.58203125, "learning_rate": 4.999625305564371e-05, "loss": 83.1559, "step": 26240 }, { "epoch": 0.1060533215900322, "grad_norm": 839.67822265625, "learning_rate": 4.9996192378909786e-05, "loss": 57.181, "step": 26250 }, { "epoch": 0.10609372285539984, "grad_norm": 696.2799682617188, "learning_rate": 4.999613121486222e-05, "loss": 49.9272, "step": 26260 }, { "epoch": 0.10613412412076746, "grad_norm": 1080.623291015625, "learning_rate": 4.99960695635022e-05, "loss": 120.3179, "step": 26270 }, { "epoch": 0.10617452538613509, "grad_norm": 499.45166015625, "learning_rate": 4.999600742483094e-05, "loss": 85.3591, "step": 26280 }, { "epoch": 0.10621492665150273, "grad_norm": 938.3174438476562, "learning_rate": 4.999594479884965e-05, "loss": 136.4883, "step": 26290 }, { "epoch": 0.10625532791687035, "grad_norm": 1555.7998046875, "learning_rate": 4.999588168555954e-05, "loss": 122.8322, "step": 26300 }, { "epoch": 0.106295729182238, "grad_norm": 4008.9033203125, "learning_rate": 4.999581808496185e-05, "loss": 91.5676, "step": 26310 }, { "epoch": 0.10633613044760562, "grad_norm": 2325.766845703125, "learning_rate": 4.999575399705783e-05, "loss": 85.4827, "step": 26320 }, { "epoch": 0.10637653171297325, "grad_norm": 954.0802612304688, "learning_rate": 4.999568942184871e-05, "loss": 161.0085, "step": 26330 }, { "epoch": 0.10641693297834089, "grad_norm": 1845.4427490234375, "learning_rate": 4.999562435933575e-05, "loss": 106.469, "step": 26340 }, { "epoch": 0.10645733424370851, "grad_norm": 1453.1446533203125, "learning_rate": 4.999555880952023e-05, "loss": 142.5144, "step": 26350 }, { "epoch": 0.10649773550907614, "grad_norm": 824.1065673828125, "learning_rate": 4.999549277240342e-05, "loss": 84.602, "step": 26360 }, { "epoch": 0.10653813677444378, "grad_norm": 3219.528564453125, "learning_rate": 4.999542624798661e-05, "loss": 79.4629, "step": 26370 }, { "epoch": 0.1065785380398114, "grad_norm": 655.1124267578125, "learning_rate": 4.999535923627109e-05, "loss": 103.1578, "step": 26380 }, { "epoch": 0.10661893930517904, "grad_norm": 840.5703125, "learning_rate": 4.999529173725819e-05, "loss": 146.742, "step": 26390 }, { "epoch": 0.10665934057054667, "grad_norm": 1991.2275390625, "learning_rate": 4.999522375094919e-05, "loss": 88.6063, "step": 26400 }, { "epoch": 0.1066997418359143, "grad_norm": 1580.2342529296875, "learning_rate": 4.999515527734545e-05, "loss": 113.3611, "step": 26410 }, { "epoch": 0.10674014310128194, "grad_norm": 1060.3555908203125, "learning_rate": 4.9995086316448284e-05, "loss": 176.4278, "step": 26420 }, { "epoch": 0.10678054436664956, "grad_norm": 2521.33447265625, "learning_rate": 4.999501686825904e-05, "loss": 104.2664, "step": 26430 }, { "epoch": 0.10682094563201719, "grad_norm": 1635.3087158203125, "learning_rate": 4.999494693277907e-05, "loss": 129.5747, "step": 26440 }, { "epoch": 0.10686134689738483, "grad_norm": 0.0, "learning_rate": 4.999487651000975e-05, "loss": 83.0078, "step": 26450 }, { "epoch": 0.10690174816275246, "grad_norm": 1445.0679931640625, "learning_rate": 4.9994805599952445e-05, "loss": 115.5746, "step": 26460 }, { "epoch": 0.1069421494281201, "grad_norm": 861.307861328125, "learning_rate": 4.999473420260853e-05, "loss": 63.1311, "step": 26470 }, { "epoch": 0.10698255069348772, "grad_norm": 2009.19189453125, "learning_rate": 4.999466231797941e-05, "loss": 119.9433, "step": 26480 }, { "epoch": 0.10702295195885535, "grad_norm": 1041.7818603515625, "learning_rate": 4.9994589946066475e-05, "loss": 134.1364, "step": 26490 }, { "epoch": 0.10706335322422299, "grad_norm": 717.532470703125, "learning_rate": 4.999451708687114e-05, "loss": 99.0042, "step": 26500 }, { "epoch": 0.10710375448959061, "grad_norm": 1365.530517578125, "learning_rate": 4.999444374039483e-05, "loss": 126.3179, "step": 26510 }, { "epoch": 0.10714415575495824, "grad_norm": 570.315673828125, "learning_rate": 4.999436990663897e-05, "loss": 75.446, "step": 26520 }, { "epoch": 0.10718455702032588, "grad_norm": 1172.0423583984375, "learning_rate": 4.9994295585605e-05, "loss": 117.2075, "step": 26530 }, { "epoch": 0.1072249582856935, "grad_norm": 773.590087890625, "learning_rate": 4.9994220777294364e-05, "loss": 90.5786, "step": 26540 }, { "epoch": 0.10726535955106115, "grad_norm": 983.9734497070312, "learning_rate": 4.999414548170853e-05, "loss": 89.9236, "step": 26550 }, { "epoch": 0.10730576081642877, "grad_norm": 1194.6666259765625, "learning_rate": 4.999406969884897e-05, "loss": 64.2603, "step": 26560 }, { "epoch": 0.1073461620817964, "grad_norm": 1597.0323486328125, "learning_rate": 4.9993993428717144e-05, "loss": 130.4407, "step": 26570 }, { "epoch": 0.10738656334716404, "grad_norm": 1673.6080322265625, "learning_rate": 4.999391667131455e-05, "loss": 163.3066, "step": 26580 }, { "epoch": 0.10742696461253166, "grad_norm": 663.078857421875, "learning_rate": 4.9993839426642685e-05, "loss": 130.6437, "step": 26590 }, { "epoch": 0.10746736587789929, "grad_norm": 1640.7069091796875, "learning_rate": 4.999376169470306e-05, "loss": 131.2924, "step": 26600 }, { "epoch": 0.10750776714326693, "grad_norm": 986.1610717773438, "learning_rate": 4.9993683475497174e-05, "loss": 115.7689, "step": 26610 }, { "epoch": 0.10754816840863456, "grad_norm": 322.355224609375, "learning_rate": 4.999360476902656e-05, "loss": 75.3167, "step": 26620 }, { "epoch": 0.1075885696740022, "grad_norm": 507.4998779296875, "learning_rate": 4.999352557529275e-05, "loss": 96.3537, "step": 26630 }, { "epoch": 0.10762897093936982, "grad_norm": 1944.9493408203125, "learning_rate": 4.99934458942973e-05, "loss": 145.2764, "step": 26640 }, { "epoch": 0.10766937220473745, "grad_norm": 2324.57470703125, "learning_rate": 4.999336572604175e-05, "loss": 90.6306, "step": 26650 }, { "epoch": 0.10770977347010509, "grad_norm": 1668.3519287109375, "learning_rate": 4.999328507052768e-05, "loss": 104.2174, "step": 26660 }, { "epoch": 0.10775017473547271, "grad_norm": 3942.664794921875, "learning_rate": 4.999320392775663e-05, "loss": 126.4141, "step": 26670 }, { "epoch": 0.10779057600084034, "grad_norm": 1391.0018310546875, "learning_rate": 4.999312229773022e-05, "loss": 83.7744, "step": 26680 }, { "epoch": 0.10783097726620798, "grad_norm": 1609.7559814453125, "learning_rate": 4.999304018045001e-05, "loss": 100.6681, "step": 26690 }, { "epoch": 0.1078713785315756, "grad_norm": 1184.3446044921875, "learning_rate": 4.999295757591762e-05, "loss": 95.7704, "step": 26700 }, { "epoch": 0.10791177979694325, "grad_norm": 2358.531494140625, "learning_rate": 4.9992874484134653e-05, "loss": 74.604, "step": 26710 }, { "epoch": 0.10795218106231087, "grad_norm": 576.525146484375, "learning_rate": 4.9992790905102734e-05, "loss": 114.726, "step": 26720 }, { "epoch": 0.1079925823276785, "grad_norm": 3263.18701171875, "learning_rate": 4.999270683882349e-05, "loss": 147.2892, "step": 26730 }, { "epoch": 0.10803298359304614, "grad_norm": 882.455322265625, "learning_rate": 4.999262228529855e-05, "loss": 127.5431, "step": 26740 }, { "epoch": 0.10807338485841377, "grad_norm": 1100.7166748046875, "learning_rate": 4.999253724452958e-05, "loss": 106.7672, "step": 26750 }, { "epoch": 0.10811378612378139, "grad_norm": 687.6595458984375, "learning_rate": 4.999245171651823e-05, "loss": 89.8419, "step": 26760 }, { "epoch": 0.10815418738914903, "grad_norm": 443.3103942871094, "learning_rate": 4.999236570126616e-05, "loss": 125.1742, "step": 26770 }, { "epoch": 0.10819458865451666, "grad_norm": 1187.34912109375, "learning_rate": 4.999227919877506e-05, "loss": 127.4198, "step": 26780 }, { "epoch": 0.1082349899198843, "grad_norm": 496.15240478515625, "learning_rate": 4.9992192209046603e-05, "loss": 117.4117, "step": 26790 }, { "epoch": 0.10827539118525192, "grad_norm": 1453.9019775390625, "learning_rate": 4.99921047320825e-05, "loss": 93.6351, "step": 26800 }, { "epoch": 0.10831579245061955, "grad_norm": 773.209228515625, "learning_rate": 4.999201676788445e-05, "loss": 105.4083, "step": 26810 }, { "epoch": 0.10835619371598719, "grad_norm": 1077.73193359375, "learning_rate": 4.999192831645416e-05, "loss": 113.7033, "step": 26820 }, { "epoch": 0.10839659498135482, "grad_norm": 3824.103515625, "learning_rate": 4.999183937779336e-05, "loss": 128.3056, "step": 26830 }, { "epoch": 0.10843699624672244, "grad_norm": 1111.69189453125, "learning_rate": 4.999174995190379e-05, "loss": 69.169, "step": 26840 }, { "epoch": 0.10847739751209008, "grad_norm": 355.9375, "learning_rate": 4.999166003878718e-05, "loss": 95.577, "step": 26850 }, { "epoch": 0.10851779877745771, "grad_norm": 566.212158203125, "learning_rate": 4.99915696384453e-05, "loss": 92.3288, "step": 26860 }, { "epoch": 0.10855820004282535, "grad_norm": 433.31170654296875, "learning_rate": 4.99914787508799e-05, "loss": 77.2623, "step": 26870 }, { "epoch": 0.10859860130819297, "grad_norm": 1091.27734375, "learning_rate": 4.999138737609276e-05, "loss": 97.2431, "step": 26880 }, { "epoch": 0.1086390025735606, "grad_norm": 779.114501953125, "learning_rate": 4.9991295514085644e-05, "loss": 90.7876, "step": 26890 }, { "epoch": 0.10867940383892824, "grad_norm": 1826.2213134765625, "learning_rate": 4.9991203164860365e-05, "loss": 164.2726, "step": 26900 }, { "epoch": 0.10871980510429587, "grad_norm": 1381.0306396484375, "learning_rate": 4.999111032841871e-05, "loss": 163.8954, "step": 26910 }, { "epoch": 0.10876020636966349, "grad_norm": 535.5016479492188, "learning_rate": 4.9991017004762496e-05, "loss": 85.5764, "step": 26920 }, { "epoch": 0.10880060763503113, "grad_norm": 822.9321899414062, "learning_rate": 4.999092319389354e-05, "loss": 117.8981, "step": 26930 }, { "epoch": 0.10884100890039876, "grad_norm": 800.758544921875, "learning_rate": 4.999082889581367e-05, "loss": 76.8381, "step": 26940 }, { "epoch": 0.1088814101657664, "grad_norm": 557.4238891601562, "learning_rate": 4.999073411052472e-05, "loss": 130.8072, "step": 26950 }, { "epoch": 0.10892181143113402, "grad_norm": 1034.1680908203125, "learning_rate": 4.9990638838028546e-05, "loss": 75.0051, "step": 26960 }, { "epoch": 0.10896221269650165, "grad_norm": 460.87469482421875, "learning_rate": 4.9990543078327e-05, "loss": 97.5543, "step": 26970 }, { "epoch": 0.10900261396186929, "grad_norm": 726.0296630859375, "learning_rate": 4.9990446831421955e-05, "loss": 97.844, "step": 26980 }, { "epoch": 0.10904301522723692, "grad_norm": 857.0488891601562, "learning_rate": 4.9990350097315275e-05, "loss": 110.277, "step": 26990 }, { "epoch": 0.10908341649260454, "grad_norm": 494.9609375, "learning_rate": 4.999025287600886e-05, "loss": 78.5458, "step": 27000 }, { "epoch": 0.10912381775797218, "grad_norm": 893.1571655273438, "learning_rate": 4.99901551675046e-05, "loss": 109.9289, "step": 27010 }, { "epoch": 0.10916421902333981, "grad_norm": 970.8618774414062, "learning_rate": 4.99900569718044e-05, "loss": 116.1883, "step": 27020 }, { "epoch": 0.10920462028870745, "grad_norm": 745.3575439453125, "learning_rate": 4.9989958288910164e-05, "loss": 161.3541, "step": 27030 }, { "epoch": 0.10924502155407508, "grad_norm": 804.6139526367188, "learning_rate": 4.998985911882384e-05, "loss": 105.6759, "step": 27040 }, { "epoch": 0.1092854228194427, "grad_norm": 988.0171508789062, "learning_rate": 4.998975946154734e-05, "loss": 167.7433, "step": 27050 }, { "epoch": 0.10932582408481034, "grad_norm": 570.0489501953125, "learning_rate": 4.998965931708261e-05, "loss": 102.1875, "step": 27060 }, { "epoch": 0.10936622535017797, "grad_norm": 677.1341552734375, "learning_rate": 4.998955868543161e-05, "loss": 128.7201, "step": 27070 }, { "epoch": 0.1094066266155456, "grad_norm": 905.8904418945312, "learning_rate": 4.99894575665963e-05, "loss": 107.9805, "step": 27080 }, { "epoch": 0.10944702788091323, "grad_norm": 737.1869506835938, "learning_rate": 4.9989355960578645e-05, "loss": 89.4114, "step": 27090 }, { "epoch": 0.10948742914628086, "grad_norm": 670.5479125976562, "learning_rate": 4.998925386738063e-05, "loss": 175.9301, "step": 27100 }, { "epoch": 0.1095278304116485, "grad_norm": 1188.797607421875, "learning_rate": 4.9989151287004244e-05, "loss": 90.327, "step": 27110 }, { "epoch": 0.10956823167701613, "grad_norm": 1030.21533203125, "learning_rate": 4.9989048219451495e-05, "loss": 63.2848, "step": 27120 }, { "epoch": 0.10960863294238375, "grad_norm": 539.4580688476562, "learning_rate": 4.998894466472438e-05, "loss": 104.4276, "step": 27130 }, { "epoch": 0.10964903420775139, "grad_norm": 826.8709106445312, "learning_rate": 4.998884062282492e-05, "loss": 92.5154, "step": 27140 }, { "epoch": 0.10968943547311902, "grad_norm": 916.9140014648438, "learning_rate": 4.998873609375516e-05, "loss": 104.0753, "step": 27150 }, { "epoch": 0.10972983673848664, "grad_norm": 478.9193115234375, "learning_rate": 4.998863107751711e-05, "loss": 117.0698, "step": 27160 }, { "epoch": 0.10977023800385428, "grad_norm": 944.5336303710938, "learning_rate": 4.9988525574112846e-05, "loss": 151.4209, "step": 27170 }, { "epoch": 0.10981063926922191, "grad_norm": 552.6795654296875, "learning_rate": 4.99884195835444e-05, "loss": 88.4628, "step": 27180 }, { "epoch": 0.10985104053458955, "grad_norm": 954.2455444335938, "learning_rate": 4.9988313105813856e-05, "loss": 112.3373, "step": 27190 }, { "epoch": 0.10989144179995718, "grad_norm": 936.270751953125, "learning_rate": 4.998820614092328e-05, "loss": 75.6448, "step": 27200 }, { "epoch": 0.1099318430653248, "grad_norm": 777.1414184570312, "learning_rate": 4.9988098688874763e-05, "loss": 96.6067, "step": 27210 }, { "epoch": 0.10997224433069244, "grad_norm": 1599.469482421875, "learning_rate": 4.9987990749670395e-05, "loss": 126.3036, "step": 27220 }, { "epoch": 0.11001264559606007, "grad_norm": 398.20196533203125, "learning_rate": 4.9987882323312287e-05, "loss": 86.396, "step": 27230 }, { "epoch": 0.1100530468614277, "grad_norm": 1596.5191650390625, "learning_rate": 4.998777340980254e-05, "loss": 93.4679, "step": 27240 }, { "epoch": 0.11009344812679533, "grad_norm": 807.2232055664062, "learning_rate": 4.998766400914329e-05, "loss": 75.585, "step": 27250 }, { "epoch": 0.11013384939216296, "grad_norm": 1436.22216796875, "learning_rate": 4.9987554121336666e-05, "loss": 102.0856, "step": 27260 }, { "epoch": 0.1101742506575306, "grad_norm": 1310.2745361328125, "learning_rate": 4.998744374638481e-05, "loss": 102.2359, "step": 27270 }, { "epoch": 0.11021465192289823, "grad_norm": 1259.0927734375, "learning_rate": 4.998733288428987e-05, "loss": 82.0602, "step": 27280 }, { "epoch": 0.11025505318826585, "grad_norm": 2409.748046875, "learning_rate": 4.998722153505402e-05, "loss": 95.3449, "step": 27290 }, { "epoch": 0.11029545445363349, "grad_norm": 1046.286865234375, "learning_rate": 4.998710969867942e-05, "loss": 113.0733, "step": 27300 }, { "epoch": 0.11033585571900112, "grad_norm": 630.7343139648438, "learning_rate": 4.9986997375168246e-05, "loss": 68.2742, "step": 27310 }, { "epoch": 0.11037625698436875, "grad_norm": 553.9161987304688, "learning_rate": 4.9986884564522696e-05, "loss": 145.594, "step": 27320 }, { "epoch": 0.11041665824973639, "grad_norm": 823.818603515625, "learning_rate": 4.998677126674497e-05, "loss": 106.8122, "step": 27330 }, { "epoch": 0.11045705951510401, "grad_norm": 1561.4769287109375, "learning_rate": 4.9986657481837277e-05, "loss": 154.897, "step": 27340 }, { "epoch": 0.11049746078047164, "grad_norm": 1459.7176513671875, "learning_rate": 4.9986543209801825e-05, "loss": 140.0131, "step": 27350 }, { "epoch": 0.11053786204583928, "grad_norm": 797.2266235351562, "learning_rate": 4.998642845064086e-05, "loss": 117.8721, "step": 27360 }, { "epoch": 0.1105782633112069, "grad_norm": 902.2604370117188, "learning_rate": 4.9986313204356594e-05, "loss": 169.9797, "step": 27370 }, { "epoch": 0.11061866457657454, "grad_norm": 504.2903137207031, "learning_rate": 4.998619747095129e-05, "loss": 89.4714, "step": 27380 }, { "epoch": 0.11065906584194217, "grad_norm": 713.7076416015625, "learning_rate": 4.998608125042721e-05, "loss": 97.6478, "step": 27390 }, { "epoch": 0.1106994671073098, "grad_norm": 1151.92822265625, "learning_rate": 4.9985964542786614e-05, "loss": 115.0625, "step": 27400 }, { "epoch": 0.11073986837267744, "grad_norm": 1444.137451171875, "learning_rate": 4.9985847348031764e-05, "loss": 119.7467, "step": 27410 }, { "epoch": 0.11078026963804506, "grad_norm": 3067.8701171875, "learning_rate": 4.998572966616496e-05, "loss": 119.108, "step": 27420 }, { "epoch": 0.11082067090341269, "grad_norm": 759.7105712890625, "learning_rate": 4.99856114971885e-05, "loss": 99.5433, "step": 27430 }, { "epoch": 0.11086107216878033, "grad_norm": 1036.834716796875, "learning_rate": 4.998549284110468e-05, "loss": 85.0693, "step": 27440 }, { "epoch": 0.11090147343414795, "grad_norm": 1072.163818359375, "learning_rate": 4.998537369791581e-05, "loss": 122.0357, "step": 27450 }, { "epoch": 0.1109418746995156, "grad_norm": 1213.7109375, "learning_rate": 4.9985254067624215e-05, "loss": 81.71, "step": 27460 }, { "epoch": 0.11098227596488322, "grad_norm": 641.5134887695312, "learning_rate": 4.998513395023223e-05, "loss": 111.3763, "step": 27470 }, { "epoch": 0.11102267723025085, "grad_norm": 1474.4072265625, "learning_rate": 4.99850133457422e-05, "loss": 103.8446, "step": 27480 }, { "epoch": 0.11106307849561849, "grad_norm": 837.8353271484375, "learning_rate": 4.9984892254156465e-05, "loss": 85.3186, "step": 27490 }, { "epoch": 0.11110347976098611, "grad_norm": 714.3938598632812, "learning_rate": 4.99847706754774e-05, "loss": 116.3188, "step": 27500 }, { "epoch": 0.11114388102635374, "grad_norm": 1514.875, "learning_rate": 4.998464860970736e-05, "loss": 61.2618, "step": 27510 }, { "epoch": 0.11118428229172138, "grad_norm": 122.57615661621094, "learning_rate": 4.998452605684874e-05, "loss": 103.8598, "step": 27520 }, { "epoch": 0.111224683557089, "grad_norm": 700.09521484375, "learning_rate": 4.9984403016903915e-05, "loss": 96.256, "step": 27530 }, { "epoch": 0.11126508482245664, "grad_norm": 2113.217041015625, "learning_rate": 4.998427948987528e-05, "loss": 94.1343, "step": 27540 }, { "epoch": 0.11130548608782427, "grad_norm": 710.7130126953125, "learning_rate": 4.998415547576527e-05, "loss": 134.276, "step": 27550 }, { "epoch": 0.1113458873531919, "grad_norm": 974.3263549804688, "learning_rate": 4.9984030974576285e-05, "loss": 70.3129, "step": 27560 }, { "epoch": 0.11138628861855954, "grad_norm": 1219.5396728515625, "learning_rate": 4.998390598631075e-05, "loss": 119.3183, "step": 27570 }, { "epoch": 0.11142668988392716, "grad_norm": 2306.385498046875, "learning_rate": 4.998378051097111e-05, "loss": 96.3586, "step": 27580 }, { "epoch": 0.11146709114929479, "grad_norm": 676.72998046875, "learning_rate": 4.99836545485598e-05, "loss": 87.2172, "step": 27590 }, { "epoch": 0.11150749241466243, "grad_norm": 1688.67578125, "learning_rate": 4.998352809907928e-05, "loss": 158.3624, "step": 27600 }, { "epoch": 0.11154789368003006, "grad_norm": 3076.226806640625, "learning_rate": 4.9983401162532025e-05, "loss": 105.7579, "step": 27610 }, { "epoch": 0.1115882949453977, "grad_norm": 789.8070678710938, "learning_rate": 4.99832737389205e-05, "loss": 114.1961, "step": 27620 }, { "epoch": 0.11162869621076532, "grad_norm": 1299.20166015625, "learning_rate": 4.998314582824719e-05, "loss": 85.1794, "step": 27630 }, { "epoch": 0.11166909747613295, "grad_norm": 611.26123046875, "learning_rate": 4.998301743051459e-05, "loss": 63.9051, "step": 27640 }, { "epoch": 0.11170949874150059, "grad_norm": 967.321044921875, "learning_rate": 4.99828885457252e-05, "loss": 127.1219, "step": 27650 }, { "epoch": 0.11174990000686821, "grad_norm": 1000.7772216796875, "learning_rate": 4.998275917388154e-05, "loss": 103.4082, "step": 27660 }, { "epoch": 0.11179030127223584, "grad_norm": 627.480224609375, "learning_rate": 4.9982629314986126e-05, "loss": 96.7553, "step": 27670 }, { "epoch": 0.11183070253760348, "grad_norm": 1001.6890869140625, "learning_rate": 4.998249896904149e-05, "loss": 92.7391, "step": 27680 }, { "epoch": 0.1118711038029711, "grad_norm": 781.9347534179688, "learning_rate": 4.998236813605017e-05, "loss": 86.0648, "step": 27690 }, { "epoch": 0.11191150506833875, "grad_norm": 2122.2900390625, "learning_rate": 4.998223681601473e-05, "loss": 106.2797, "step": 27700 }, { "epoch": 0.11195190633370637, "grad_norm": 2083.68798828125, "learning_rate": 4.9982105008937726e-05, "loss": 91.295, "step": 27710 }, { "epoch": 0.111992307599074, "grad_norm": 1193.2694091796875, "learning_rate": 4.998197271482171e-05, "loss": 69.5858, "step": 27720 }, { "epoch": 0.11203270886444164, "grad_norm": 2355.148193359375, "learning_rate": 4.998183993366928e-05, "loss": 133.6512, "step": 27730 }, { "epoch": 0.11207311012980926, "grad_norm": 745.9656372070312, "learning_rate": 4.998170666548302e-05, "loss": 99.9143, "step": 27740 }, { "epoch": 0.11211351139517689, "grad_norm": 559.490478515625, "learning_rate": 4.998157291026553e-05, "loss": 116.1021, "step": 27750 }, { "epoch": 0.11215391266054453, "grad_norm": 613.9931030273438, "learning_rate": 4.998143866801942e-05, "loss": 79.9714, "step": 27760 }, { "epoch": 0.11219431392591216, "grad_norm": 838.7278442382812, "learning_rate": 4.9981303938747286e-05, "loss": 74.1118, "step": 27770 }, { "epoch": 0.1122347151912798, "grad_norm": 731.7330322265625, "learning_rate": 4.9981168722451776e-05, "loss": 85.4063, "step": 27780 }, { "epoch": 0.11227511645664742, "grad_norm": 826.3333129882812, "learning_rate": 4.998103301913552e-05, "loss": 83.0917, "step": 27790 }, { "epoch": 0.11231551772201505, "grad_norm": 1546.168701171875, "learning_rate": 4.998089682880117e-05, "loss": 128.3176, "step": 27800 }, { "epoch": 0.11235591898738269, "grad_norm": 1162.286865234375, "learning_rate": 4.998076015145138e-05, "loss": 122.4255, "step": 27810 }, { "epoch": 0.11239632025275031, "grad_norm": 1132.9892578125, "learning_rate": 4.9980622987088795e-05, "loss": 98.7629, "step": 27820 }, { "epoch": 0.11243672151811794, "grad_norm": 1079.5322265625, "learning_rate": 4.9980485335716114e-05, "loss": 119.6111, "step": 27830 }, { "epoch": 0.11247712278348558, "grad_norm": 1292.2762451171875, "learning_rate": 4.9980347197336005e-05, "loss": 72.3516, "step": 27840 }, { "epoch": 0.1125175240488532, "grad_norm": 767.2022094726562, "learning_rate": 4.998020857195117e-05, "loss": 144.7648, "step": 27850 }, { "epoch": 0.11255792531422085, "grad_norm": 679.2119750976562, "learning_rate": 4.998006945956431e-05, "loss": 117.3672, "step": 27860 }, { "epoch": 0.11259832657958847, "grad_norm": 1033.4124755859375, "learning_rate": 4.997992986017813e-05, "loss": 91.8273, "step": 27870 }, { "epoch": 0.1126387278449561, "grad_norm": 1781.468505859375, "learning_rate": 4.997978977379536e-05, "loss": 108.8957, "step": 27880 }, { "epoch": 0.11267912911032374, "grad_norm": 875.4254150390625, "learning_rate": 4.9979649200418735e-05, "loss": 65.4817, "step": 27890 }, { "epoch": 0.11271953037569137, "grad_norm": 760.1170654296875, "learning_rate": 4.997950814005098e-05, "loss": 105.1915, "step": 27900 }, { "epoch": 0.11275993164105899, "grad_norm": 816.7872924804688, "learning_rate": 4.997936659269486e-05, "loss": 72.9768, "step": 27910 }, { "epoch": 0.11280033290642663, "grad_norm": 1037.8052978515625, "learning_rate": 4.997922455835311e-05, "loss": 84.1713, "step": 27920 }, { "epoch": 0.11284073417179426, "grad_norm": 599.27197265625, "learning_rate": 4.9979082037028535e-05, "loss": 89.3494, "step": 27930 }, { "epoch": 0.1128811354371619, "grad_norm": 2486.75830078125, "learning_rate": 4.9978939028723894e-05, "loss": 96.0437, "step": 27940 }, { "epoch": 0.11292153670252952, "grad_norm": 933.3982543945312, "learning_rate": 4.9978795533441966e-05, "loss": 159.8028, "step": 27950 }, { "epoch": 0.11296193796789715, "grad_norm": 783.9051513671875, "learning_rate": 4.997865155118557e-05, "loss": 80.7255, "step": 27960 }, { "epoch": 0.11300233923326479, "grad_norm": 1356.3843994140625, "learning_rate": 4.9978507081957494e-05, "loss": 135.2147, "step": 27970 }, { "epoch": 0.11304274049863242, "grad_norm": 2129.216796875, "learning_rate": 4.997836212576057e-05, "loss": 73.888, "step": 27980 }, { "epoch": 0.11308314176400004, "grad_norm": 959.6722412109375, "learning_rate": 4.9978216682597614e-05, "loss": 159.4311, "step": 27990 }, { "epoch": 0.11312354302936768, "grad_norm": 2303.733642578125, "learning_rate": 4.997807075247146e-05, "loss": 98.653, "step": 28000 }, { "epoch": 0.11316394429473531, "grad_norm": 906.4110717773438, "learning_rate": 4.997792433538496e-05, "loss": 97.6867, "step": 28010 }, { "epoch": 0.11320434556010295, "grad_norm": 814.7039184570312, "learning_rate": 4.997777743134097e-05, "loss": 91.2434, "step": 28020 }, { "epoch": 0.11324474682547057, "grad_norm": 496.1783752441406, "learning_rate": 4.9977630040342346e-05, "loss": 109.5629, "step": 28030 }, { "epoch": 0.1132851480908382, "grad_norm": 1374.5599365234375, "learning_rate": 4.997748216239196e-05, "loss": 115.9625, "step": 28040 }, { "epoch": 0.11332554935620584, "grad_norm": 1610.67529296875, "learning_rate": 4.997733379749271e-05, "loss": 138.7809, "step": 28050 }, { "epoch": 0.11336595062157347, "grad_norm": 655.5691528320312, "learning_rate": 4.9977184945647473e-05, "loss": 72.344, "step": 28060 }, { "epoch": 0.11340635188694109, "grad_norm": 1008.762939453125, "learning_rate": 4.9977035606859156e-05, "loss": 170.6503, "step": 28070 }, { "epoch": 0.11344675315230873, "grad_norm": 692.5789184570312, "learning_rate": 4.9976885781130665e-05, "loss": 79.409, "step": 28080 }, { "epoch": 0.11348715441767636, "grad_norm": 2432.585693359375, "learning_rate": 4.9976735468464935e-05, "loss": 181.1055, "step": 28090 }, { "epoch": 0.113527555683044, "grad_norm": 1090.251220703125, "learning_rate": 4.997658466886489e-05, "loss": 114.0751, "step": 28100 }, { "epoch": 0.11356795694841162, "grad_norm": 861.4591674804688, "learning_rate": 4.997643338233346e-05, "loss": 97.7165, "step": 28110 }, { "epoch": 0.11360835821377925, "grad_norm": 1879.217529296875, "learning_rate": 4.997628160887361e-05, "loss": 99.4693, "step": 28120 }, { "epoch": 0.11364875947914689, "grad_norm": 617.5592651367188, "learning_rate": 4.997612934848829e-05, "loss": 98.8888, "step": 28130 }, { "epoch": 0.11368916074451452, "grad_norm": 671.50341796875, "learning_rate": 4.997597660118046e-05, "loss": 96.949, "step": 28140 }, { "epoch": 0.11372956200988214, "grad_norm": 800.4517211914062, "learning_rate": 4.9975823366953124e-05, "loss": 71.7921, "step": 28150 }, { "epoch": 0.11376996327524978, "grad_norm": 889.1356201171875, "learning_rate": 4.9975669645809244e-05, "loss": 124.3523, "step": 28160 }, { "epoch": 0.11381036454061741, "grad_norm": 1013.8363037109375, "learning_rate": 4.997551543775182e-05, "loss": 111.264, "step": 28170 }, { "epoch": 0.11385076580598505, "grad_norm": 1395.8243408203125, "learning_rate": 4.997536074278387e-05, "loss": 112.4192, "step": 28180 }, { "epoch": 0.11389116707135268, "grad_norm": 1430.334228515625, "learning_rate": 4.997520556090841e-05, "loss": 121.1184, "step": 28190 }, { "epoch": 0.1139315683367203, "grad_norm": 1214.85400390625, "learning_rate": 4.9975049892128455e-05, "loss": 87.6032, "step": 28200 }, { "epoch": 0.11397196960208794, "grad_norm": 867.681396484375, "learning_rate": 4.9974893736447045e-05, "loss": 90.5741, "step": 28210 }, { "epoch": 0.11401237086745557, "grad_norm": 759.8204345703125, "learning_rate": 4.997473709386722e-05, "loss": 79.1436, "step": 28220 }, { "epoch": 0.1140527721328232, "grad_norm": 1119.670654296875, "learning_rate": 4.997457996439204e-05, "loss": 89.3735, "step": 28230 }, { "epoch": 0.11409317339819083, "grad_norm": 843.7247924804688, "learning_rate": 4.997442234802456e-05, "loss": 111.084, "step": 28240 }, { "epoch": 0.11413357466355846, "grad_norm": 775.5316772460938, "learning_rate": 4.997426424476787e-05, "loss": 96.1073, "step": 28250 }, { "epoch": 0.1141739759289261, "grad_norm": 1531.5697021484375, "learning_rate": 4.9974105654625036e-05, "loss": 82.1885, "step": 28260 }, { "epoch": 0.11421437719429373, "grad_norm": 693.6162719726562, "learning_rate": 4.997394657759915e-05, "loss": 119.7303, "step": 28270 }, { "epoch": 0.11425477845966135, "grad_norm": 4390.083984375, "learning_rate": 4.997378701369332e-05, "loss": 156.4863, "step": 28280 }, { "epoch": 0.11429517972502899, "grad_norm": 781.9625854492188, "learning_rate": 4.9973626962910656e-05, "loss": 90.4073, "step": 28290 }, { "epoch": 0.11433558099039662, "grad_norm": 1085.052978515625, "learning_rate": 4.9973466425254286e-05, "loss": 74.8492, "step": 28300 }, { "epoch": 0.11437598225576424, "grad_norm": 975.8035278320312, "learning_rate": 4.997330540072732e-05, "loss": 108.0216, "step": 28310 }, { "epoch": 0.11441638352113188, "grad_norm": 664.2997436523438, "learning_rate": 4.997314388933291e-05, "loss": 96.9389, "step": 28320 }, { "epoch": 0.11445678478649951, "grad_norm": 395.10052490234375, "learning_rate": 4.997298189107421e-05, "loss": 114.4039, "step": 28330 }, { "epoch": 0.11449718605186715, "grad_norm": 894.0714721679688, "learning_rate": 4.9972819405954366e-05, "loss": 66.7973, "step": 28340 }, { "epoch": 0.11453758731723478, "grad_norm": 3636.150390625, "learning_rate": 4.997265643397654e-05, "loss": 137.2116, "step": 28350 }, { "epoch": 0.1145779885826024, "grad_norm": 543.2451782226562, "learning_rate": 4.997249297514394e-05, "loss": 72.1923, "step": 28360 }, { "epoch": 0.11461838984797004, "grad_norm": 1202.521484375, "learning_rate": 4.997232902945971e-05, "loss": 101.5618, "step": 28370 }, { "epoch": 0.11465879111333767, "grad_norm": 1286.06494140625, "learning_rate": 4.997216459692709e-05, "loss": 99.6231, "step": 28380 }, { "epoch": 0.1146991923787053, "grad_norm": 747.28271484375, "learning_rate": 4.997199967754925e-05, "loss": 101.1043, "step": 28390 }, { "epoch": 0.11473959364407293, "grad_norm": 397.4785461425781, "learning_rate": 4.997183427132943e-05, "loss": 99.9764, "step": 28400 }, { "epoch": 0.11477999490944056, "grad_norm": 1810.4814453125, "learning_rate": 4.997166837827084e-05, "loss": 107.5269, "step": 28410 }, { "epoch": 0.1148203961748082, "grad_norm": 1796.510009765625, "learning_rate": 4.997150199837671e-05, "loss": 98.7808, "step": 28420 }, { "epoch": 0.11486079744017583, "grad_norm": 997.3446044921875, "learning_rate": 4.997133513165031e-05, "loss": 101.5585, "step": 28430 }, { "epoch": 0.11490119870554345, "grad_norm": 575.0126342773438, "learning_rate": 4.9971167778094863e-05, "loss": 115.6277, "step": 28440 }, { "epoch": 0.11494159997091109, "grad_norm": 1271.26953125, "learning_rate": 4.997099993771365e-05, "loss": 60.656, "step": 28450 }, { "epoch": 0.11498200123627872, "grad_norm": 1329.1561279296875, "learning_rate": 4.997083161050994e-05, "loss": 83.6766, "step": 28460 }, { "epoch": 0.11502240250164635, "grad_norm": 527.1378784179688, "learning_rate": 4.9970662796487e-05, "loss": 87.6013, "step": 28470 }, { "epoch": 0.11506280376701399, "grad_norm": 888.7230224609375, "learning_rate": 4.997049349564814e-05, "loss": 86.2665, "step": 28480 }, { "epoch": 0.11510320503238161, "grad_norm": 338.1375732421875, "learning_rate": 4.997032370799666e-05, "loss": 130.7292, "step": 28490 }, { "epoch": 0.11514360629774925, "grad_norm": 2081.712158203125, "learning_rate": 4.997015343353585e-05, "loss": 128.4502, "step": 28500 }, { "epoch": 0.11518400756311688, "grad_norm": 1699.38623046875, "learning_rate": 4.996998267226905e-05, "loss": 115.8591, "step": 28510 }, { "epoch": 0.1152244088284845, "grad_norm": 467.2063293457031, "learning_rate": 4.996981142419959e-05, "loss": 73.2754, "step": 28520 }, { "epoch": 0.11526481009385214, "grad_norm": 804.551025390625, "learning_rate": 4.996963968933079e-05, "loss": 67.7496, "step": 28530 }, { "epoch": 0.11530521135921977, "grad_norm": 1599.342529296875, "learning_rate": 4.996946746766601e-05, "loss": 91.9091, "step": 28540 }, { "epoch": 0.1153456126245874, "grad_norm": 1292.8143310546875, "learning_rate": 4.996929475920862e-05, "loss": 113.546, "step": 28550 }, { "epoch": 0.11538601388995504, "grad_norm": 1679.7252197265625, "learning_rate": 4.9969121563961956e-05, "loss": 83.8834, "step": 28560 }, { "epoch": 0.11542641515532266, "grad_norm": 1275.5130615234375, "learning_rate": 4.9968947881929414e-05, "loss": 115.3006, "step": 28570 }, { "epoch": 0.1154668164206903, "grad_norm": 959.7946166992188, "learning_rate": 4.996877371311439e-05, "loss": 87.6582, "step": 28580 }, { "epoch": 0.11550721768605793, "grad_norm": 1284.703369140625, "learning_rate": 4.996859905752026e-05, "loss": 132.2889, "step": 28590 }, { "epoch": 0.11554761895142555, "grad_norm": 0.0, "learning_rate": 4.996842391515044e-05, "loss": 91.88, "step": 28600 }, { "epoch": 0.1155880202167932, "grad_norm": 1002.761962890625, "learning_rate": 4.996824828600834e-05, "loss": 125.265, "step": 28610 }, { "epoch": 0.11562842148216082, "grad_norm": 665.4733276367188, "learning_rate": 4.996807217009738e-05, "loss": 99.0348, "step": 28620 }, { "epoch": 0.11566882274752845, "grad_norm": 1002.7938232421875, "learning_rate": 4.996789556742101e-05, "loss": 75.1195, "step": 28630 }, { "epoch": 0.11570922401289609, "grad_norm": 858.1141357421875, "learning_rate": 4.996771847798265e-05, "loss": 73.2454, "step": 28640 }, { "epoch": 0.11574962527826371, "grad_norm": 1226.3555908203125, "learning_rate": 4.996754090178577e-05, "loss": 146.9862, "step": 28650 }, { "epoch": 0.11579002654363135, "grad_norm": 1391.651123046875, "learning_rate": 4.996736283883382e-05, "loss": 142.1579, "step": 28660 }, { "epoch": 0.11583042780899898, "grad_norm": 1489.2674560546875, "learning_rate": 4.9967184289130286e-05, "loss": 115.9638, "step": 28670 }, { "epoch": 0.1158708290743666, "grad_norm": 1078.4761962890625, "learning_rate": 4.9967005252678634e-05, "loss": 118.9085, "step": 28680 }, { "epoch": 0.11591123033973424, "grad_norm": 2236.838623046875, "learning_rate": 4.9966825729482364e-05, "loss": 130.9481, "step": 28690 }, { "epoch": 0.11595163160510187, "grad_norm": 862.959716796875, "learning_rate": 4.996664571954497e-05, "loss": 111.0184, "step": 28700 }, { "epoch": 0.1159920328704695, "grad_norm": 1136.712646484375, "learning_rate": 4.996646522286997e-05, "loss": 103.9295, "step": 28710 }, { "epoch": 0.11603243413583714, "grad_norm": 1675.60791015625, "learning_rate": 4.996628423946087e-05, "loss": 109.6175, "step": 28720 }, { "epoch": 0.11607283540120476, "grad_norm": 1874.5843505859375, "learning_rate": 4.996610276932121e-05, "loss": 85.4252, "step": 28730 }, { "epoch": 0.1161132366665724, "grad_norm": 1308.438232421875, "learning_rate": 4.996592081245451e-05, "loss": 115.684, "step": 28740 }, { "epoch": 0.11615363793194003, "grad_norm": 428.2576904296875, "learning_rate": 4.996573836886435e-05, "loss": 57.5053, "step": 28750 }, { "epoch": 0.11619403919730766, "grad_norm": 1469.609619140625, "learning_rate": 4.9965555438554254e-05, "loss": 77.9164, "step": 28760 }, { "epoch": 0.1162344404626753, "grad_norm": 2238.658447265625, "learning_rate": 4.9965372021527814e-05, "loss": 129.5649, "step": 28770 }, { "epoch": 0.11627484172804292, "grad_norm": 1731.619140625, "learning_rate": 4.996518811778858e-05, "loss": 92.2746, "step": 28780 }, { "epoch": 0.11631524299341055, "grad_norm": 2262.8583984375, "learning_rate": 4.996500372734015e-05, "loss": 126.2437, "step": 28790 }, { "epoch": 0.11635564425877819, "grad_norm": 1330.55908203125, "learning_rate": 4.9964818850186135e-05, "loss": 118.3177, "step": 28800 }, { "epoch": 0.11639604552414581, "grad_norm": 1293.2772216796875, "learning_rate": 4.9964633486330116e-05, "loss": 131.9247, "step": 28810 }, { "epoch": 0.11643644678951345, "grad_norm": 626.7706298828125, "learning_rate": 4.9964447635775714e-05, "loss": 80.6821, "step": 28820 }, { "epoch": 0.11647684805488108, "grad_norm": 680.4353637695312, "learning_rate": 4.996426129852655e-05, "loss": 79.4926, "step": 28830 }, { "epoch": 0.1165172493202487, "grad_norm": 1769.197998046875, "learning_rate": 4.996407447458626e-05, "loss": 105.4986, "step": 28840 }, { "epoch": 0.11655765058561635, "grad_norm": 1792.2196044921875, "learning_rate": 4.996388716395848e-05, "loss": 108.3764, "step": 28850 }, { "epoch": 0.11659805185098397, "grad_norm": 582.6719970703125, "learning_rate": 4.996369936664688e-05, "loss": 79.2789, "step": 28860 }, { "epoch": 0.1166384531163516, "grad_norm": 1305.8157958984375, "learning_rate": 4.99635110826551e-05, "loss": 112.6347, "step": 28870 }, { "epoch": 0.11667885438171924, "grad_norm": 1331.4168701171875, "learning_rate": 4.996332231198683e-05, "loss": 76.5, "step": 28880 }, { "epoch": 0.11671925564708686, "grad_norm": 1037.36767578125, "learning_rate": 4.996313305464573e-05, "loss": 159.2613, "step": 28890 }, { "epoch": 0.1167596569124545, "grad_norm": 1378.345458984375, "learning_rate": 4.99629433106355e-05, "loss": 157.8604, "step": 28900 }, { "epoch": 0.11680005817782213, "grad_norm": 846.559814453125, "learning_rate": 4.9962753079959836e-05, "loss": 112.2688, "step": 28910 }, { "epoch": 0.11684045944318976, "grad_norm": 1050.1434326171875, "learning_rate": 4.996256236262245e-05, "loss": 88.0946, "step": 28920 }, { "epoch": 0.1168808607085574, "grad_norm": 410.0029296875, "learning_rate": 4.996237115862706e-05, "loss": 59.4956, "step": 28930 }, { "epoch": 0.11692126197392502, "grad_norm": 573.1680908203125, "learning_rate": 4.99621794679774e-05, "loss": 111.6136, "step": 28940 }, { "epoch": 0.11696166323929265, "grad_norm": 869.5213012695312, "learning_rate": 4.996198729067719e-05, "loss": 146.8108, "step": 28950 }, { "epoch": 0.11700206450466029, "grad_norm": 634.8200073242188, "learning_rate": 4.99617946267302e-05, "loss": 119.8773, "step": 28960 }, { "epoch": 0.11704246577002791, "grad_norm": 1196.7987060546875, "learning_rate": 4.996160147614016e-05, "loss": 71.1213, "step": 28970 }, { "epoch": 0.11708286703539555, "grad_norm": 1217.557861328125, "learning_rate": 4.996140783891085e-05, "loss": 121.3136, "step": 28980 }, { "epoch": 0.11712326830076318, "grad_norm": 1255.81396484375, "learning_rate": 4.9961213715046045e-05, "loss": 116.6792, "step": 28990 }, { "epoch": 0.11716366956613081, "grad_norm": 627.504150390625, "learning_rate": 4.996101910454953e-05, "loss": 75.7639, "step": 29000 }, { "epoch": 0.11720407083149845, "grad_norm": 529.0521240234375, "learning_rate": 4.996082400742509e-05, "loss": 139.7255, "step": 29010 }, { "epoch": 0.11724447209686607, "grad_norm": 1349.41357421875, "learning_rate": 4.996062842367654e-05, "loss": 138.5896, "step": 29020 }, { "epoch": 0.1172848733622337, "grad_norm": 1360.37255859375, "learning_rate": 4.9960432353307686e-05, "loss": 80.0865, "step": 29030 }, { "epoch": 0.11732527462760134, "grad_norm": 1388.444091796875, "learning_rate": 4.996023579632236e-05, "loss": 112.3245, "step": 29040 }, { "epoch": 0.11736567589296897, "grad_norm": 1930.4471435546875, "learning_rate": 4.996003875272438e-05, "loss": 114.5048, "step": 29050 }, { "epoch": 0.1174060771583366, "grad_norm": 569.8203125, "learning_rate": 4.9959841222517596e-05, "loss": 62.5382, "step": 29060 }, { "epoch": 0.11744647842370423, "grad_norm": 455.6988830566406, "learning_rate": 4.9959643205705854e-05, "loss": 100.6463, "step": 29070 }, { "epoch": 0.11748687968907186, "grad_norm": 908.3660888671875, "learning_rate": 4.995944470229302e-05, "loss": 114.3609, "step": 29080 }, { "epoch": 0.1175272809544395, "grad_norm": 755.6206665039062, "learning_rate": 4.9959245712282966e-05, "loss": 95.5638, "step": 29090 }, { "epoch": 0.11756768221980712, "grad_norm": 0.0, "learning_rate": 4.9959046235679565e-05, "loss": 64.4007, "step": 29100 }, { "epoch": 0.11760808348517475, "grad_norm": 808.4530639648438, "learning_rate": 4.9958846272486704e-05, "loss": 112.1131, "step": 29110 }, { "epoch": 0.11764848475054239, "grad_norm": 1141.5194091796875, "learning_rate": 4.9958645822708285e-05, "loss": 87.7617, "step": 29120 }, { "epoch": 0.11768888601591002, "grad_norm": 653.6248779296875, "learning_rate": 4.995844488634822e-05, "loss": 70.7442, "step": 29130 }, { "epoch": 0.11772928728127766, "grad_norm": 1141.1375732421875, "learning_rate": 4.9958243463410414e-05, "loss": 100.08, "step": 29140 }, { "epoch": 0.11776968854664528, "grad_norm": 675.2451171875, "learning_rate": 4.995804155389881e-05, "loss": 113.5737, "step": 29150 }, { "epoch": 0.11781008981201291, "grad_norm": 822.73486328125, "learning_rate": 4.995783915781734e-05, "loss": 78.6838, "step": 29160 }, { "epoch": 0.11785049107738055, "grad_norm": 1152.279541015625, "learning_rate": 4.995763627516994e-05, "loss": 124.2494, "step": 29170 }, { "epoch": 0.11789089234274817, "grad_norm": 696.2450561523438, "learning_rate": 4.995743290596057e-05, "loss": 85.5477, "step": 29180 }, { "epoch": 0.1179312936081158, "grad_norm": 669.2838745117188, "learning_rate": 4.9957229050193197e-05, "loss": 158.3468, "step": 29190 }, { "epoch": 0.11797169487348344, "grad_norm": 629.5454711914062, "learning_rate": 4.9957024707871806e-05, "loss": 72.3909, "step": 29200 }, { "epoch": 0.11801209613885107, "grad_norm": 2218.03564453125, "learning_rate": 4.995681987900036e-05, "loss": 131.4396, "step": 29210 }, { "epoch": 0.1180524974042187, "grad_norm": 1187.0064697265625, "learning_rate": 4.995661456358286e-05, "loss": 91.9417, "step": 29220 }, { "epoch": 0.11809289866958633, "grad_norm": 539.1826171875, "learning_rate": 4.995640876162332e-05, "loss": 119.5583, "step": 29230 }, { "epoch": 0.11813329993495396, "grad_norm": 1318.2589111328125, "learning_rate": 4.9956202473125736e-05, "loss": 115.7666, "step": 29240 }, { "epoch": 0.1181737012003216, "grad_norm": 466.1606750488281, "learning_rate": 4.995599569809414e-05, "loss": 118.0604, "step": 29250 }, { "epoch": 0.11821410246568922, "grad_norm": 797.7738647460938, "learning_rate": 4.9955788436532545e-05, "loss": 72.8938, "step": 29260 }, { "epoch": 0.11825450373105685, "grad_norm": 1671.8890380859375, "learning_rate": 4.995558068844503e-05, "loss": 96.0493, "step": 29270 }, { "epoch": 0.11829490499642449, "grad_norm": 934.203125, "learning_rate": 4.99553724538356e-05, "loss": 97.1457, "step": 29280 }, { "epoch": 0.11833530626179212, "grad_norm": 1040.1793212890625, "learning_rate": 4.9955163732708346e-05, "loss": 138.6697, "step": 29290 }, { "epoch": 0.11837570752715976, "grad_norm": 2738.3046875, "learning_rate": 4.9954954525067334e-05, "loss": 120.2477, "step": 29300 }, { "epoch": 0.11841610879252738, "grad_norm": 543.4052124023438, "learning_rate": 4.995474483091662e-05, "loss": 129.9302, "step": 29310 }, { "epoch": 0.11845651005789501, "grad_norm": 534.146240234375, "learning_rate": 4.995453465026032e-05, "loss": 67.6277, "step": 29320 }, { "epoch": 0.11849691132326265, "grad_norm": 1281.582275390625, "learning_rate": 4.995432398310252e-05, "loss": 117.349, "step": 29330 }, { "epoch": 0.11853731258863028, "grad_norm": 3242.07568359375, "learning_rate": 4.995411282944732e-05, "loss": 136.7699, "step": 29340 }, { "epoch": 0.1185777138539979, "grad_norm": 1517.0467529296875, "learning_rate": 4.9953901189298845e-05, "loss": 127.2586, "step": 29350 }, { "epoch": 0.11861811511936554, "grad_norm": 689.41015625, "learning_rate": 4.9953689062661226e-05, "loss": 108.8368, "step": 29360 }, { "epoch": 0.11865851638473317, "grad_norm": 663.7586669921875, "learning_rate": 4.995347644953858e-05, "loss": 86.3188, "step": 29370 }, { "epoch": 0.11869891765010081, "grad_norm": 1621.5384521484375, "learning_rate": 4.9953263349935074e-05, "loss": 100.3996, "step": 29380 }, { "epoch": 0.11873931891546843, "grad_norm": 1026.15576171875, "learning_rate": 4.995304976385484e-05, "loss": 124.3401, "step": 29390 }, { "epoch": 0.11877972018083606, "grad_norm": 840.7376098632812, "learning_rate": 4.995283569130207e-05, "loss": 99.3957, "step": 29400 }, { "epoch": 0.1188201214462037, "grad_norm": 1305.7696533203125, "learning_rate": 4.995262113228091e-05, "loss": 87.9966, "step": 29410 }, { "epoch": 0.11886052271157133, "grad_norm": 1312.4971923828125, "learning_rate": 4.9952406086795564e-05, "loss": 114.1831, "step": 29420 }, { "epoch": 0.11890092397693895, "grad_norm": 1050.6826171875, "learning_rate": 4.995219055485021e-05, "loss": 117.2113, "step": 29430 }, { "epoch": 0.11894132524230659, "grad_norm": 733.8092041015625, "learning_rate": 4.995197453644905e-05, "loss": 120.497, "step": 29440 }, { "epoch": 0.11898172650767422, "grad_norm": 980.0513305664062, "learning_rate": 4.9951758031596304e-05, "loss": 97.3482, "step": 29450 }, { "epoch": 0.11902212777304186, "grad_norm": 646.7477416992188, "learning_rate": 4.995154104029619e-05, "loss": 103.4561, "step": 29460 }, { "epoch": 0.11906252903840948, "grad_norm": 2361.02978515625, "learning_rate": 4.9951323562552934e-05, "loss": 104.5779, "step": 29470 }, { "epoch": 0.11910293030377711, "grad_norm": 844.0553588867188, "learning_rate": 4.995110559837078e-05, "loss": 91.3419, "step": 29480 }, { "epoch": 0.11914333156914475, "grad_norm": 2193.67431640625, "learning_rate": 4.995088714775398e-05, "loss": 127.1109, "step": 29490 }, { "epoch": 0.11918373283451238, "grad_norm": 732.5401611328125, "learning_rate": 4.995066821070679e-05, "loss": 120.7417, "step": 29500 }, { "epoch": 0.11922413409988, "grad_norm": 1001.8763427734375, "learning_rate": 4.995044878723348e-05, "loss": 114.4594, "step": 29510 }, { "epoch": 0.11926453536524764, "grad_norm": 928.3828735351562, "learning_rate": 4.995022887733832e-05, "loss": 83.6775, "step": 29520 }, { "epoch": 0.11930493663061527, "grad_norm": 858.733154296875, "learning_rate": 4.99500084810256e-05, "loss": 144.456, "step": 29530 }, { "epoch": 0.11934533789598291, "grad_norm": 2473.096435546875, "learning_rate": 4.994978759829963e-05, "loss": 117.7391, "step": 29540 }, { "epoch": 0.11938573916135053, "grad_norm": 1232.745361328125, "learning_rate": 4.9949566229164704e-05, "loss": 87.8614, "step": 29550 }, { "epoch": 0.11942614042671816, "grad_norm": 730.4893188476562, "learning_rate": 4.994934437362513e-05, "loss": 94.9959, "step": 29560 }, { "epoch": 0.1194665416920858, "grad_norm": 1123.4429931640625, "learning_rate": 4.9949122031685245e-05, "loss": 116.839, "step": 29570 }, { "epoch": 0.11950694295745343, "grad_norm": 723.6360473632812, "learning_rate": 4.9948899203349384e-05, "loss": 86.0155, "step": 29580 }, { "epoch": 0.11954734422282105, "grad_norm": 480.5324401855469, "learning_rate": 4.994867588862189e-05, "loss": 94.0604, "step": 29590 }, { "epoch": 0.1195877454881887, "grad_norm": 1117.021484375, "learning_rate": 4.9948452087507116e-05, "loss": 69.5577, "step": 29600 }, { "epoch": 0.11962814675355632, "grad_norm": 1889.9290771484375, "learning_rate": 4.9948227800009416e-05, "loss": 130.7532, "step": 29610 }, { "epoch": 0.11966854801892396, "grad_norm": 811.6279907226562, "learning_rate": 4.994800302613318e-05, "loss": 93.5169, "step": 29620 }, { "epoch": 0.11970894928429159, "grad_norm": 339.779052734375, "learning_rate": 4.994777776588278e-05, "loss": 112.6328, "step": 29630 }, { "epoch": 0.11974935054965921, "grad_norm": 794.0516967773438, "learning_rate": 4.9947552019262605e-05, "loss": 61.7838, "step": 29640 }, { "epoch": 0.11978975181502685, "grad_norm": 1287.187255859375, "learning_rate": 4.994732578627706e-05, "loss": 156.2145, "step": 29650 }, { "epoch": 0.11983015308039448, "grad_norm": 921.3040161132812, "learning_rate": 4.994709906693056e-05, "loss": 77.835, "step": 29660 }, { "epoch": 0.1198705543457621, "grad_norm": 985.0899047851562, "learning_rate": 4.9946871861227514e-05, "loss": 101.6619, "step": 29670 }, { "epoch": 0.11991095561112974, "grad_norm": 1570.173583984375, "learning_rate": 4.9946644169172355e-05, "loss": 132.0515, "step": 29680 }, { "epoch": 0.11995135687649737, "grad_norm": 1566.288818359375, "learning_rate": 4.9946415990769534e-05, "loss": 109.4148, "step": 29690 }, { "epoch": 0.11999175814186501, "grad_norm": 654.2642822265625, "learning_rate": 4.994618732602349e-05, "loss": 92.2775, "step": 29700 }, { "epoch": 0.12003215940723264, "grad_norm": 5245.0458984375, "learning_rate": 4.994595817493867e-05, "loss": 144.156, "step": 29710 }, { "epoch": 0.12007256067260026, "grad_norm": 4157.51708984375, "learning_rate": 4.9945728537519555e-05, "loss": 124.7138, "step": 29720 }, { "epoch": 0.1201129619379679, "grad_norm": 1033.809814453125, "learning_rate": 4.994549841377063e-05, "loss": 108.5572, "step": 29730 }, { "epoch": 0.12015336320333553, "grad_norm": 681.5181884765625, "learning_rate": 4.9945267803696364e-05, "loss": 99.638, "step": 29740 }, { "epoch": 0.12019376446870315, "grad_norm": 2804.89501953125, "learning_rate": 4.994503670730125e-05, "loss": 99.5536, "step": 29750 }, { "epoch": 0.1202341657340708, "grad_norm": 346.3814392089844, "learning_rate": 4.994480512458981e-05, "loss": 108.1357, "step": 29760 }, { "epoch": 0.12027456699943842, "grad_norm": 1421.3365478515625, "learning_rate": 4.9944573055566556e-05, "loss": 103.8683, "step": 29770 }, { "epoch": 0.12031496826480606, "grad_norm": 1197.5367431640625, "learning_rate": 4.994434050023601e-05, "loss": 107.7595, "step": 29780 }, { "epoch": 0.12035536953017369, "grad_norm": 256.7560729980469, "learning_rate": 4.9944107458602693e-05, "loss": 81.2983, "step": 29790 }, { "epoch": 0.12039577079554131, "grad_norm": 1710.7000732421875, "learning_rate": 4.994387393067117e-05, "loss": 98.1409, "step": 29800 }, { "epoch": 0.12043617206090895, "grad_norm": 583.5015258789062, "learning_rate": 4.994363991644597e-05, "loss": 67.0093, "step": 29810 }, { "epoch": 0.12047657332627658, "grad_norm": 1057.0147705078125, "learning_rate": 4.9943405415931674e-05, "loss": 116.4649, "step": 29820 }, { "epoch": 0.1205169745916442, "grad_norm": 671.4649658203125, "learning_rate": 4.9943170429132855e-05, "loss": 95.8829, "step": 29830 }, { "epoch": 0.12055737585701184, "grad_norm": 1301.7279052734375, "learning_rate": 4.9942934956054076e-05, "loss": 90.3948, "step": 29840 }, { "epoch": 0.12059777712237947, "grad_norm": 1199.511474609375, "learning_rate": 4.994269899669994e-05, "loss": 85.8095, "step": 29850 }, { "epoch": 0.12063817838774711, "grad_norm": 648.86376953125, "learning_rate": 4.9942462551075056e-05, "loss": 77.0479, "step": 29860 }, { "epoch": 0.12067857965311474, "grad_norm": 788.9052124023438, "learning_rate": 4.994222561918401e-05, "loss": 81.1479, "step": 29870 }, { "epoch": 0.12071898091848236, "grad_norm": 931.2744750976562, "learning_rate": 4.994198820103145e-05, "loss": 85.522, "step": 29880 }, { "epoch": 0.12075938218385, "grad_norm": 1237.2655029296875, "learning_rate": 4.994175029662198e-05, "loss": 95.5039, "step": 29890 }, { "epoch": 0.12079978344921763, "grad_norm": 704.9238891601562, "learning_rate": 4.994151190596025e-05, "loss": 85.7873, "step": 29900 }, { "epoch": 0.12084018471458526, "grad_norm": 826.0241088867188, "learning_rate": 4.9941273029050894e-05, "loss": 129.6459, "step": 29910 }, { "epoch": 0.1208805859799529, "grad_norm": 479.6162109375, "learning_rate": 4.994103366589859e-05, "loss": 117.3531, "step": 29920 }, { "epoch": 0.12092098724532052, "grad_norm": 1103.497314453125, "learning_rate": 4.994079381650799e-05, "loss": 183.9052, "step": 29930 }, { "epoch": 0.12096138851068816, "grad_norm": 710.986572265625, "learning_rate": 4.994055348088378e-05, "loss": 105.431, "step": 29940 }, { "epoch": 0.12100178977605579, "grad_norm": 1297.952880859375, "learning_rate": 4.994031265903063e-05, "loss": 128.99, "step": 29950 }, { "epoch": 0.12104219104142341, "grad_norm": 1707.3641357421875, "learning_rate": 4.9940071350953255e-05, "loss": 127.0656, "step": 29960 }, { "epoch": 0.12108259230679105, "grad_norm": 787.633544921875, "learning_rate": 4.993982955665634e-05, "loss": 106.8927, "step": 29970 }, { "epoch": 0.12112299357215868, "grad_norm": 1233.0062255859375, "learning_rate": 4.9939587276144616e-05, "loss": 104.3319, "step": 29980 }, { "epoch": 0.1211633948375263, "grad_norm": 1296.74658203125, "learning_rate": 4.993934450942279e-05, "loss": 77.4372, "step": 29990 }, { "epoch": 0.12120379610289395, "grad_norm": 735.8043212890625, "learning_rate": 4.993910125649561e-05, "loss": 147.1198, "step": 30000 }, { "epoch": 0.12124419736826157, "grad_norm": 364.9338684082031, "learning_rate": 4.993885751736781e-05, "loss": 84.0477, "step": 30010 }, { "epoch": 0.12128459863362921, "grad_norm": 719.6060180664062, "learning_rate": 4.993861329204414e-05, "loss": 115.0903, "step": 30020 }, { "epoch": 0.12132499989899684, "grad_norm": 1294.3851318359375, "learning_rate": 4.993836858052937e-05, "loss": 151.7286, "step": 30030 }, { "epoch": 0.12136540116436446, "grad_norm": 711.005126953125, "learning_rate": 4.993812338282826e-05, "loss": 86.3682, "step": 30040 }, { "epoch": 0.1214058024297321, "grad_norm": 723.4884643554688, "learning_rate": 4.993787769894559e-05, "loss": 120.8179, "step": 30050 }, { "epoch": 0.12144620369509973, "grad_norm": 1060.3338623046875, "learning_rate": 4.993763152888617e-05, "loss": 134.1443, "step": 30060 }, { "epoch": 0.12148660496046736, "grad_norm": 829.1416625976562, "learning_rate": 4.993738487265478e-05, "loss": 119.1408, "step": 30070 }, { "epoch": 0.121527006225835, "grad_norm": 267.49755859375, "learning_rate": 4.993713773025623e-05, "loss": 112.0577, "step": 30080 }, { "epoch": 0.12156740749120262, "grad_norm": 2977.7763671875, "learning_rate": 4.993689010169534e-05, "loss": 115.676, "step": 30090 }, { "epoch": 0.12160780875657026, "grad_norm": 927.7879638671875, "learning_rate": 4.993664198697694e-05, "loss": 80.4086, "step": 30100 }, { "epoch": 0.12164821002193789, "grad_norm": 1482.4476318359375, "learning_rate": 4.993639338610587e-05, "loss": 90.1057, "step": 30110 }, { "epoch": 0.12168861128730551, "grad_norm": 1078.0257568359375, "learning_rate": 4.993614429908697e-05, "loss": 112.5462, "step": 30120 }, { "epoch": 0.12172901255267315, "grad_norm": 1431.45947265625, "learning_rate": 4.99358947259251e-05, "loss": 83.491, "step": 30130 }, { "epoch": 0.12176941381804078, "grad_norm": 792.9324340820312, "learning_rate": 4.9935644666625125e-05, "loss": 82.8931, "step": 30140 }, { "epoch": 0.12180981508340841, "grad_norm": 1509.551025390625, "learning_rate": 4.9935394121191915e-05, "loss": 103.0119, "step": 30150 }, { "epoch": 0.12185021634877605, "grad_norm": 1039.8441162109375, "learning_rate": 4.993514308963036e-05, "loss": 119.8, "step": 30160 }, { "epoch": 0.12189061761414367, "grad_norm": 399.6064758300781, "learning_rate": 4.993489157194536e-05, "loss": 115.0279, "step": 30170 }, { "epoch": 0.12193101887951131, "grad_norm": 794.4744873046875, "learning_rate": 4.993463956814181e-05, "loss": 101.3931, "step": 30180 }, { "epoch": 0.12197142014487894, "grad_norm": 2071.046142578125, "learning_rate": 4.993438707822462e-05, "loss": 114.3679, "step": 30190 }, { "epoch": 0.12201182141024657, "grad_norm": 996.6055297851562, "learning_rate": 4.993413410219871e-05, "loss": 84.8786, "step": 30200 }, { "epoch": 0.1220522226756142, "grad_norm": 1139.685302734375, "learning_rate": 4.993388064006903e-05, "loss": 78.988, "step": 30210 }, { "epoch": 0.12209262394098183, "grad_norm": 622.7205200195312, "learning_rate": 4.993362669184051e-05, "loss": 88.1808, "step": 30220 }, { "epoch": 0.12213302520634946, "grad_norm": 7026.68359375, "learning_rate": 4.9933372257518096e-05, "loss": 126.5632, "step": 30230 }, { "epoch": 0.1221734264717171, "grad_norm": 945.2219848632812, "learning_rate": 4.993311733710676e-05, "loss": 78.7985, "step": 30240 }, { "epoch": 0.12221382773708472, "grad_norm": 826.4041137695312, "learning_rate": 4.9932861930611454e-05, "loss": 72.6881, "step": 30250 }, { "epoch": 0.12225422900245236, "grad_norm": 714.5296630859375, "learning_rate": 4.993260603803718e-05, "loss": 94.0831, "step": 30260 }, { "epoch": 0.12229463026781999, "grad_norm": 692.105224609375, "learning_rate": 4.99323496593889e-05, "loss": 60.1789, "step": 30270 }, { "epoch": 0.12233503153318762, "grad_norm": 534.4913330078125, "learning_rate": 4.993209279467164e-05, "loss": 69.5706, "step": 30280 }, { "epoch": 0.12237543279855526, "grad_norm": 261.96044921875, "learning_rate": 4.99318354438904e-05, "loss": 77.032, "step": 30290 }, { "epoch": 0.12241583406392288, "grad_norm": 1099.048095703125, "learning_rate": 4.9931577607050175e-05, "loss": 92.2519, "step": 30300 }, { "epoch": 0.12245623532929051, "grad_norm": 1755.97705078125, "learning_rate": 4.993131928415602e-05, "loss": 159.9417, "step": 30310 }, { "epoch": 0.12249663659465815, "grad_norm": 1634.8353271484375, "learning_rate": 4.993106047521296e-05, "loss": 91.0221, "step": 30320 }, { "epoch": 0.12253703786002577, "grad_norm": 980.1278686523438, "learning_rate": 4.993080118022604e-05, "loss": 97.5692, "step": 30330 }, { "epoch": 0.12257743912539341, "grad_norm": 2488.35009765625, "learning_rate": 4.993054139920032e-05, "loss": 114.5058, "step": 30340 }, { "epoch": 0.12261784039076104, "grad_norm": 1487.991455078125, "learning_rate": 4.9930281132140846e-05, "loss": 82.5714, "step": 30350 }, { "epoch": 0.12265824165612867, "grad_norm": 513.70654296875, "learning_rate": 4.993002037905272e-05, "loss": 127.0465, "step": 30360 }, { "epoch": 0.1226986429214963, "grad_norm": 2360.14208984375, "learning_rate": 4.9929759139941e-05, "loss": 128.6292, "step": 30370 }, { "epoch": 0.12273904418686393, "grad_norm": 537.433349609375, "learning_rate": 4.99294974148108e-05, "loss": 84.4188, "step": 30380 }, { "epoch": 0.12277944545223156, "grad_norm": 339.3077697753906, "learning_rate": 4.9929235203667214e-05, "loss": 84.647, "step": 30390 }, { "epoch": 0.1228198467175992, "grad_norm": 697.3407592773438, "learning_rate": 4.992897250651535e-05, "loss": 96.315, "step": 30400 }, { "epoch": 0.12286024798296682, "grad_norm": 637.2626342773438, "learning_rate": 4.9928709323360337e-05, "loss": 87.7648, "step": 30410 }, { "epoch": 0.12290064924833445, "grad_norm": 591.4133911132812, "learning_rate": 4.99284456542073e-05, "loss": 57.4863, "step": 30420 }, { "epoch": 0.12294105051370209, "grad_norm": 1242.639404296875, "learning_rate": 4.992818149906138e-05, "loss": 105.4673, "step": 30430 }, { "epoch": 0.12298145177906972, "grad_norm": 946.1543579101562, "learning_rate": 4.992791685792772e-05, "loss": 101.4292, "step": 30440 }, { "epoch": 0.12302185304443736, "grad_norm": 554.2642211914062, "learning_rate": 4.9927651730811495e-05, "loss": 55.5201, "step": 30450 }, { "epoch": 0.12306225430980498, "grad_norm": 1658.3990478515625, "learning_rate": 4.992738611771787e-05, "loss": 90.6332, "step": 30460 }, { "epoch": 0.12310265557517261, "grad_norm": 1110.182373046875, "learning_rate": 4.9927120018652004e-05, "loss": 112.0799, "step": 30470 }, { "epoch": 0.12314305684054025, "grad_norm": 2135.968505859375, "learning_rate": 4.992685343361911e-05, "loss": 124.2809, "step": 30480 }, { "epoch": 0.12318345810590788, "grad_norm": 0.0, "learning_rate": 4.992658636262438e-05, "loss": 130.0038, "step": 30490 }, { "epoch": 0.1232238593712755, "grad_norm": 897.2399291992188, "learning_rate": 4.992631880567301e-05, "loss": 110.8611, "step": 30500 }, { "epoch": 0.12326426063664314, "grad_norm": 1721.460693359375, "learning_rate": 4.9926050762770224e-05, "loss": 110.5094, "step": 30510 }, { "epoch": 0.12330466190201077, "grad_norm": 994.7217407226562, "learning_rate": 4.992578223392124e-05, "loss": 95.7169, "step": 30520 }, { "epoch": 0.12334506316737841, "grad_norm": 1169.87939453125, "learning_rate": 4.9925513219131303e-05, "loss": 82.8276, "step": 30530 }, { "epoch": 0.12338546443274603, "grad_norm": 677.4993286132812, "learning_rate": 4.992524371840566e-05, "loss": 105.6593, "step": 30540 }, { "epoch": 0.12342586569811366, "grad_norm": 885.8995361328125, "learning_rate": 4.992497373174955e-05, "loss": 114.9273, "step": 30550 }, { "epoch": 0.1234662669634813, "grad_norm": 1765.774658203125, "learning_rate": 4.9924703259168244e-05, "loss": 73.1132, "step": 30560 }, { "epoch": 0.12350666822884893, "grad_norm": 1306.00244140625, "learning_rate": 4.992443230066701e-05, "loss": 96.6844, "step": 30570 }, { "epoch": 0.12354706949421655, "grad_norm": 2164.46044921875, "learning_rate": 4.992416085625115e-05, "loss": 101.1072, "step": 30580 }, { "epoch": 0.12358747075958419, "grad_norm": 1472.5419921875, "learning_rate": 4.992388892592594e-05, "loss": 93.9604, "step": 30590 }, { "epoch": 0.12362787202495182, "grad_norm": 376.5589294433594, "learning_rate": 4.9923616509696683e-05, "loss": 89.4311, "step": 30600 }, { "epoch": 0.12366827329031946, "grad_norm": 809.4692993164062, "learning_rate": 4.9923343607568684e-05, "loss": 90.6958, "step": 30610 }, { "epoch": 0.12370867455568708, "grad_norm": 1123.7208251953125, "learning_rate": 4.9923070219547275e-05, "loss": 113.3677, "step": 30620 }, { "epoch": 0.12374907582105471, "grad_norm": 1361.58056640625, "learning_rate": 4.9922796345637776e-05, "loss": 109.5225, "step": 30630 }, { "epoch": 0.12378947708642235, "grad_norm": 1546.5401611328125, "learning_rate": 4.992252198584554e-05, "loss": 156.2372, "step": 30640 }, { "epoch": 0.12382987835178998, "grad_norm": 1252.5113525390625, "learning_rate": 4.99222471401759e-05, "loss": 98.325, "step": 30650 }, { "epoch": 0.1238702796171576, "grad_norm": 587.108642578125, "learning_rate": 4.992197180863422e-05, "loss": 83.3731, "step": 30660 }, { "epoch": 0.12391068088252524, "grad_norm": 546.9779663085938, "learning_rate": 4.992169599122587e-05, "loss": 69.5896, "step": 30670 }, { "epoch": 0.12395108214789287, "grad_norm": 533.7169189453125, "learning_rate": 4.992141968795623e-05, "loss": 59.3518, "step": 30680 }, { "epoch": 0.12399148341326051, "grad_norm": 609.4190673828125, "learning_rate": 4.992114289883068e-05, "loss": 132.897, "step": 30690 }, { "epoch": 0.12403188467862813, "grad_norm": 1489.8643798828125, "learning_rate": 4.9920865623854615e-05, "loss": 84.5661, "step": 30700 }, { "epoch": 0.12407228594399576, "grad_norm": 1392.722900390625, "learning_rate": 4.992058786303345e-05, "loss": 80.1483, "step": 30710 }, { "epoch": 0.1241126872093634, "grad_norm": 1709.162841796875, "learning_rate": 4.9920309616372596e-05, "loss": 78.6077, "step": 30720 }, { "epoch": 0.12415308847473103, "grad_norm": 319.7249450683594, "learning_rate": 4.9920030883877476e-05, "loss": 49.0658, "step": 30730 }, { "epoch": 0.12419348974009865, "grad_norm": 2077.73095703125, "learning_rate": 4.9919751665553525e-05, "loss": 119.2495, "step": 30740 }, { "epoch": 0.1242338910054663, "grad_norm": 1575.6136474609375, "learning_rate": 4.991947196140618e-05, "loss": 65.2858, "step": 30750 }, { "epoch": 0.12427429227083392, "grad_norm": 891.6358032226562, "learning_rate": 4.9919191771440905e-05, "loss": 76.9454, "step": 30760 }, { "epoch": 0.12431469353620156, "grad_norm": 836.199951171875, "learning_rate": 4.991891109566316e-05, "loss": 86.9683, "step": 30770 }, { "epoch": 0.12435509480156919, "grad_norm": 1238.918701171875, "learning_rate": 4.99186299340784e-05, "loss": 95.3734, "step": 30780 }, { "epoch": 0.12439549606693681, "grad_norm": 1337.448974609375, "learning_rate": 4.991834828669213e-05, "loss": 103.7555, "step": 30790 }, { "epoch": 0.12443589733230445, "grad_norm": 2682.815673828125, "learning_rate": 4.9918066153509834e-05, "loss": 157.3261, "step": 30800 }, { "epoch": 0.12447629859767208, "grad_norm": 2427.59033203125, "learning_rate": 4.9917783534537006e-05, "loss": 79.6278, "step": 30810 }, { "epoch": 0.1245166998630397, "grad_norm": 668.7774658203125, "learning_rate": 4.991750042977916e-05, "loss": 77.9551, "step": 30820 }, { "epoch": 0.12455710112840734, "grad_norm": 1772.7213134765625, "learning_rate": 4.991721683924182e-05, "loss": 109.0242, "step": 30830 }, { "epoch": 0.12459750239377497, "grad_norm": 896.2188110351562, "learning_rate": 4.99169327629305e-05, "loss": 103.7983, "step": 30840 }, { "epoch": 0.12463790365914261, "grad_norm": 1777.919677734375, "learning_rate": 4.991664820085074e-05, "loss": 69.7507, "step": 30850 }, { "epoch": 0.12467830492451024, "grad_norm": 414.7098693847656, "learning_rate": 4.9916363153008114e-05, "loss": 137.439, "step": 30860 }, { "epoch": 0.12471870618987786, "grad_norm": 598.8345947265625, "learning_rate": 4.9916077619408155e-05, "loss": 120.2765, "step": 30870 }, { "epoch": 0.1247591074552455, "grad_norm": 1064.3018798828125, "learning_rate": 4.991579160005644e-05, "loss": 84.4768, "step": 30880 }, { "epoch": 0.12479950872061313, "grad_norm": 666.4903564453125, "learning_rate": 4.9915505094958526e-05, "loss": 136.433, "step": 30890 }, { "epoch": 0.12483990998598075, "grad_norm": 1181.445556640625, "learning_rate": 4.991521810412002e-05, "loss": 63.556, "step": 30900 }, { "epoch": 0.1248803112513484, "grad_norm": 605.0240478515625, "learning_rate": 4.991493062754651e-05, "loss": 94.3815, "step": 30910 }, { "epoch": 0.12492071251671602, "grad_norm": 794.223388671875, "learning_rate": 4.99146426652436e-05, "loss": 76.5061, "step": 30920 }, { "epoch": 0.12496111378208366, "grad_norm": 1127.827392578125, "learning_rate": 4.991435421721691e-05, "loss": 80.6668, "step": 30930 }, { "epoch": 0.12500151504745127, "grad_norm": 494.918212890625, "learning_rate": 4.991406528347206e-05, "loss": 137.6075, "step": 30940 }, { "epoch": 0.1250419163128189, "grad_norm": 688.9254760742188, "learning_rate": 4.9913775864014665e-05, "loss": 77.563, "step": 30950 }, { "epoch": 0.12508231757818655, "grad_norm": 1798.6043701171875, "learning_rate": 4.991348595885039e-05, "loss": 172.5805, "step": 30960 }, { "epoch": 0.1251227188435542, "grad_norm": 629.1405639648438, "learning_rate": 4.991319556798488e-05, "loss": 94.1099, "step": 30970 }, { "epoch": 0.1251631201089218, "grad_norm": 831.2258911132812, "learning_rate": 4.99129046914238e-05, "loss": 82.6184, "step": 30980 }, { "epoch": 0.12520352137428944, "grad_norm": 899.6831665039062, "learning_rate": 4.991261332917282e-05, "loss": 89.8317, "step": 30990 }, { "epoch": 0.12524392263965708, "grad_norm": 1243.12255859375, "learning_rate": 4.991232148123761e-05, "loss": 106.8137, "step": 31000 }, { "epoch": 0.1252843239050247, "grad_norm": 708.5358276367188, "learning_rate": 4.9912029147623875e-05, "loss": 71.765, "step": 31010 }, { "epoch": 0.12532472517039234, "grad_norm": 1078.65185546875, "learning_rate": 4.9911736328337296e-05, "loss": 98.1934, "step": 31020 }, { "epoch": 0.12536512643575998, "grad_norm": 863.23193359375, "learning_rate": 4.99114430233836e-05, "loss": 110.7916, "step": 31030 }, { "epoch": 0.1254055277011276, "grad_norm": 948.326416015625, "learning_rate": 4.991114923276849e-05, "loss": 128.7791, "step": 31040 }, { "epoch": 0.12544592896649523, "grad_norm": 1923.48681640625, "learning_rate": 4.9910854956497696e-05, "loss": 81.3787, "step": 31050 }, { "epoch": 0.12548633023186287, "grad_norm": 1134.7197265625, "learning_rate": 4.991056019457697e-05, "loss": 97.8454, "step": 31060 }, { "epoch": 0.12552673149723048, "grad_norm": 1076.4554443359375, "learning_rate": 4.991026494701205e-05, "loss": 93.0418, "step": 31070 }, { "epoch": 0.12556713276259812, "grad_norm": 1976.7174072265625, "learning_rate": 4.9909969213808683e-05, "loss": 87.8301, "step": 31080 }, { "epoch": 0.12560753402796576, "grad_norm": 1298.41162109375, "learning_rate": 4.990967299497264e-05, "loss": 125.1484, "step": 31090 }, { "epoch": 0.12564793529333337, "grad_norm": 532.9257202148438, "learning_rate": 4.990937629050971e-05, "loss": 101.9732, "step": 31100 }, { "epoch": 0.125688336558701, "grad_norm": 1158.4649658203125, "learning_rate": 4.990907910042566e-05, "loss": 88.4633, "step": 31110 }, { "epoch": 0.12572873782406865, "grad_norm": 1732.9954833984375, "learning_rate": 4.990878142472628e-05, "loss": 80.0504, "step": 31120 }, { "epoch": 0.1257691390894363, "grad_norm": 432.7400207519531, "learning_rate": 4.990848326341739e-05, "loss": 92.5247, "step": 31130 }, { "epoch": 0.1258095403548039, "grad_norm": 985.176025390625, "learning_rate": 4.990818461650479e-05, "loss": 100.5391, "step": 31140 }, { "epoch": 0.12584994162017155, "grad_norm": 789.0088500976562, "learning_rate": 4.990788548399432e-05, "loss": 88.8649, "step": 31150 }, { "epoch": 0.12589034288553919, "grad_norm": 1790.52978515625, "learning_rate": 4.990758586589178e-05, "loss": 108.7384, "step": 31160 }, { "epoch": 0.1259307441509068, "grad_norm": 1069.7830810546875, "learning_rate": 4.9907285762203046e-05, "loss": 104.5124, "step": 31170 }, { "epoch": 0.12597114541627444, "grad_norm": 766.6657104492188, "learning_rate": 4.990698517293395e-05, "loss": 59.5578, "step": 31180 }, { "epoch": 0.12601154668164208, "grad_norm": 1233.1490478515625, "learning_rate": 4.990668409809034e-05, "loss": 63.7087, "step": 31190 }, { "epoch": 0.1260519479470097, "grad_norm": 1039.3233642578125, "learning_rate": 4.990638253767812e-05, "loss": 110.4396, "step": 31200 }, { "epoch": 0.12609234921237733, "grad_norm": 1719.74267578125, "learning_rate": 4.9906080491703146e-05, "loss": 84.7901, "step": 31210 }, { "epoch": 0.12613275047774497, "grad_norm": 2833.946533203125, "learning_rate": 4.9905777960171304e-05, "loss": 146.1511, "step": 31220 }, { "epoch": 0.12617315174311258, "grad_norm": 820.8349609375, "learning_rate": 4.99054749430885e-05, "loss": 120.1305, "step": 31230 }, { "epoch": 0.12621355300848022, "grad_norm": 782.0814208984375, "learning_rate": 4.990517144046064e-05, "loss": 68.8798, "step": 31240 }, { "epoch": 0.12625395427384786, "grad_norm": 1526.1461181640625, "learning_rate": 4.990486745229364e-05, "loss": 109.5388, "step": 31250 }, { "epoch": 0.12629435553921547, "grad_norm": 2700.19677734375, "learning_rate": 4.9904562978593426e-05, "loss": 141.1139, "step": 31260 }, { "epoch": 0.12633475680458311, "grad_norm": 578.1785278320312, "learning_rate": 4.990425801936594e-05, "loss": 63.9474, "step": 31270 }, { "epoch": 0.12637515806995075, "grad_norm": 1284.421630859375, "learning_rate": 4.990395257461712e-05, "loss": 64.6959, "step": 31280 }, { "epoch": 0.1264155593353184, "grad_norm": 722.0364379882812, "learning_rate": 4.9903646644352925e-05, "loss": 136.6368, "step": 31290 }, { "epoch": 0.126455960600686, "grad_norm": 639.962646484375, "learning_rate": 4.990334022857932e-05, "loss": 85.277, "step": 31300 }, { "epoch": 0.12649636186605365, "grad_norm": 1369.47265625, "learning_rate": 4.990303332730226e-05, "loss": 95.2965, "step": 31310 }, { "epoch": 0.1265367631314213, "grad_norm": 710.7479858398438, "learning_rate": 4.990272594052776e-05, "loss": 103.4021, "step": 31320 }, { "epoch": 0.1265771643967889, "grad_norm": 967.9599609375, "learning_rate": 4.990241806826179e-05, "loss": 55.237, "step": 31330 }, { "epoch": 0.12661756566215654, "grad_norm": 1026.2791748046875, "learning_rate": 4.990210971051037e-05, "loss": 76.5986, "step": 31340 }, { "epoch": 0.12665796692752418, "grad_norm": 1351.9713134765625, "learning_rate": 4.990180086727949e-05, "loss": 78.9581, "step": 31350 }, { "epoch": 0.1266983681928918, "grad_norm": 1424.3463134765625, "learning_rate": 4.9901491538575185e-05, "loss": 72.768, "step": 31360 }, { "epoch": 0.12673876945825943, "grad_norm": 251.47360229492188, "learning_rate": 4.990118172440348e-05, "loss": 97.3149, "step": 31370 }, { "epoch": 0.12677917072362707, "grad_norm": 1101.8797607421875, "learning_rate": 4.9900871424770424e-05, "loss": 113.3635, "step": 31380 }, { "epoch": 0.12681957198899468, "grad_norm": 3568.4501953125, "learning_rate": 4.9900560639682045e-05, "loss": 123.9855, "step": 31390 }, { "epoch": 0.12685997325436232, "grad_norm": 1042.5748291015625, "learning_rate": 4.9900249369144434e-05, "loss": 74.6795, "step": 31400 }, { "epoch": 0.12690037451972996, "grad_norm": 2803.200439453125, "learning_rate": 4.9899937613163635e-05, "loss": 122.4246, "step": 31410 }, { "epoch": 0.12694077578509758, "grad_norm": 999.986083984375, "learning_rate": 4.9899625371745726e-05, "loss": 103.829, "step": 31420 }, { "epoch": 0.12698117705046522, "grad_norm": 420.03607177734375, "learning_rate": 4.989931264489681e-05, "loss": 109.0738, "step": 31430 }, { "epoch": 0.12702157831583286, "grad_norm": 679.0191040039062, "learning_rate": 4.9898999432622974e-05, "loss": 63.1434, "step": 31440 }, { "epoch": 0.1270619795812005, "grad_norm": 1536.908203125, "learning_rate": 4.989868573493032e-05, "loss": 114.7305, "step": 31450 }, { "epoch": 0.1271023808465681, "grad_norm": 2571.055419921875, "learning_rate": 4.9898371551824974e-05, "loss": 91.3618, "step": 31460 }, { "epoch": 0.12714278211193575, "grad_norm": 1286.792724609375, "learning_rate": 4.989805688331306e-05, "loss": 96.6507, "step": 31470 }, { "epoch": 0.1271831833773034, "grad_norm": 1014.9740600585938, "learning_rate": 4.9897741729400705e-05, "loss": 82.7385, "step": 31480 }, { "epoch": 0.127223584642671, "grad_norm": 3557.023681640625, "learning_rate": 4.989742609009405e-05, "loss": 105.2578, "step": 31490 }, { "epoch": 0.12726398590803864, "grad_norm": 1261.7489013671875, "learning_rate": 4.989710996539926e-05, "loss": 110.2257, "step": 31500 }, { "epoch": 0.12730438717340628, "grad_norm": 620.3264770507812, "learning_rate": 4.9896793355322495e-05, "loss": 130.9313, "step": 31510 }, { "epoch": 0.1273447884387739, "grad_norm": 1063.13623046875, "learning_rate": 4.989647625986993e-05, "loss": 84.6815, "step": 31520 }, { "epoch": 0.12738518970414153, "grad_norm": 1258.741943359375, "learning_rate": 4.989615867904773e-05, "loss": 101.0329, "step": 31530 }, { "epoch": 0.12742559096950917, "grad_norm": 483.7554931640625, "learning_rate": 4.989584061286211e-05, "loss": 115.0386, "step": 31540 }, { "epoch": 0.12746599223487678, "grad_norm": 1390.3809814453125, "learning_rate": 4.989552206131925e-05, "loss": 95.0824, "step": 31550 }, { "epoch": 0.12750639350024442, "grad_norm": 474.9691162109375, "learning_rate": 4.9895203024425385e-05, "loss": 123.0268, "step": 31560 }, { "epoch": 0.12754679476561206, "grad_norm": 953.542236328125, "learning_rate": 4.989488350218671e-05, "loss": 132.4043, "step": 31570 }, { "epoch": 0.12758719603097968, "grad_norm": 602.45751953125, "learning_rate": 4.989456349460947e-05, "loss": 81.3055, "step": 31580 }, { "epoch": 0.12762759729634732, "grad_norm": 1298.2872314453125, "learning_rate": 4.989424300169989e-05, "loss": 85.7651, "step": 31590 }, { "epoch": 0.12766799856171496, "grad_norm": 539.2799072265625, "learning_rate": 4.9893922023464236e-05, "loss": 73.1652, "step": 31600 }, { "epoch": 0.1277083998270826, "grad_norm": 506.7164001464844, "learning_rate": 4.989360055990875e-05, "loss": 85.522, "step": 31610 }, { "epoch": 0.1277488010924502, "grad_norm": 1420.2366943359375, "learning_rate": 4.98932786110397e-05, "loss": 101.5808, "step": 31620 }, { "epoch": 0.12778920235781785, "grad_norm": 931.4535522460938, "learning_rate": 4.989295617686337e-05, "loss": 104.2542, "step": 31630 }, { "epoch": 0.1278296036231855, "grad_norm": 1208.561767578125, "learning_rate": 4.989263325738605e-05, "loss": 118.4887, "step": 31640 }, { "epoch": 0.1278700048885531, "grad_norm": 1164.8492431640625, "learning_rate": 4.989230985261403e-05, "loss": 133.7977, "step": 31650 }, { "epoch": 0.12791040615392074, "grad_norm": 2086.97900390625, "learning_rate": 4.9891985962553606e-05, "loss": 138.2947, "step": 31660 }, { "epoch": 0.12795080741928838, "grad_norm": 896.4519653320312, "learning_rate": 4.98916615872111e-05, "loss": 108.6655, "step": 31670 }, { "epoch": 0.127991208684656, "grad_norm": 921.4371337890625, "learning_rate": 4.9891336726592844e-05, "loss": 72.2636, "step": 31680 }, { "epoch": 0.12803160995002363, "grad_norm": 2175.284423828125, "learning_rate": 4.989101138070516e-05, "loss": 108.8024, "step": 31690 }, { "epoch": 0.12807201121539127, "grad_norm": 456.76068115234375, "learning_rate": 4.989068554955439e-05, "loss": 124.3368, "step": 31700 }, { "epoch": 0.12811241248075889, "grad_norm": 2323.61962890625, "learning_rate": 4.9890359233146897e-05, "loss": 83.3551, "step": 31710 }, { "epoch": 0.12815281374612653, "grad_norm": 630.8578491210938, "learning_rate": 4.989003243148904e-05, "loss": 112.8626, "step": 31720 }, { "epoch": 0.12819321501149417, "grad_norm": 1055.7919921875, "learning_rate": 4.988970514458718e-05, "loss": 85.2178, "step": 31730 }, { "epoch": 0.12823361627686178, "grad_norm": 4026.397705078125, "learning_rate": 4.9889377372447706e-05, "loss": 136.7049, "step": 31740 }, { "epoch": 0.12827401754222942, "grad_norm": 1399.34130859375, "learning_rate": 4.9889049115077005e-05, "loss": 90.4039, "step": 31750 }, { "epoch": 0.12831441880759706, "grad_norm": 718.3490600585938, "learning_rate": 4.988872037248148e-05, "loss": 122.6609, "step": 31760 }, { "epoch": 0.1283548200729647, "grad_norm": 1984.33740234375, "learning_rate": 4.988839114466753e-05, "loss": 118.7678, "step": 31770 }, { "epoch": 0.1283952213383323, "grad_norm": 732.53662109375, "learning_rate": 4.988806143164159e-05, "loss": 59.4425, "step": 31780 }, { "epoch": 0.12843562260369995, "grad_norm": 559.675537109375, "learning_rate": 4.988773123341007e-05, "loss": 67.2548, "step": 31790 }, { "epoch": 0.1284760238690676, "grad_norm": 1089.072998046875, "learning_rate": 4.988740054997943e-05, "loss": 96.6428, "step": 31800 }, { "epoch": 0.1285164251344352, "grad_norm": 2197.211181640625, "learning_rate": 4.9887069381356094e-05, "loss": 116.156, "step": 31810 }, { "epoch": 0.12855682639980284, "grad_norm": 459.337158203125, "learning_rate": 4.988673772754653e-05, "loss": 105.2984, "step": 31820 }, { "epoch": 0.12859722766517048, "grad_norm": 1142.6385498046875, "learning_rate": 4.98864055885572e-05, "loss": 97.5586, "step": 31830 }, { "epoch": 0.1286376289305381, "grad_norm": 952.4442749023438, "learning_rate": 4.988607296439458e-05, "loss": 112.1516, "step": 31840 }, { "epoch": 0.12867803019590573, "grad_norm": 633.4367065429688, "learning_rate": 4.988573985506516e-05, "loss": 82.8759, "step": 31850 }, { "epoch": 0.12871843146127337, "grad_norm": 341.5099792480469, "learning_rate": 4.988540626057543e-05, "loss": 79.2266, "step": 31860 }, { "epoch": 0.128758832726641, "grad_norm": 521.6610717773438, "learning_rate": 4.988507218093189e-05, "loss": 74.551, "step": 31870 }, { "epoch": 0.12879923399200863, "grad_norm": 848.8890991210938, "learning_rate": 4.988473761614105e-05, "loss": 68.6997, "step": 31880 }, { "epoch": 0.12883963525737627, "grad_norm": 1142.2823486328125, "learning_rate": 4.9884402566209445e-05, "loss": 84.3612, "step": 31890 }, { "epoch": 0.12888003652274388, "grad_norm": 1104.7357177734375, "learning_rate": 4.98840670311436e-05, "loss": 114.3409, "step": 31900 }, { "epoch": 0.12892043778811152, "grad_norm": 1430.170654296875, "learning_rate": 4.988373101095006e-05, "loss": 66.6711, "step": 31910 }, { "epoch": 0.12896083905347916, "grad_norm": 962.3909301757812, "learning_rate": 4.9883394505635364e-05, "loss": 103.3977, "step": 31920 }, { "epoch": 0.1290012403188468, "grad_norm": 1590.525634765625, "learning_rate": 4.988305751520609e-05, "loss": 107.3967, "step": 31930 }, { "epoch": 0.1290416415842144, "grad_norm": 373.5509033203125, "learning_rate": 4.988272003966879e-05, "loss": 61.0093, "step": 31940 }, { "epoch": 0.12908204284958205, "grad_norm": 845.4537353515625, "learning_rate": 4.9882382079030064e-05, "loss": 87.1438, "step": 31950 }, { "epoch": 0.1291224441149497, "grad_norm": 945.7670288085938, "learning_rate": 4.988204363329648e-05, "loss": 67.9447, "step": 31960 }, { "epoch": 0.1291628453803173, "grad_norm": 398.878173828125, "learning_rate": 4.988170470247465e-05, "loss": 81.1569, "step": 31970 }, { "epoch": 0.12920324664568494, "grad_norm": 329.99859619140625, "learning_rate": 4.988136528657118e-05, "loss": 59.9264, "step": 31980 }, { "epoch": 0.12924364791105258, "grad_norm": 875.9232177734375, "learning_rate": 4.988102538559268e-05, "loss": 118.8401, "step": 31990 }, { "epoch": 0.1292840491764202, "grad_norm": 1466.2398681640625, "learning_rate": 4.988068499954578e-05, "loss": 106.1032, "step": 32000 }, { "epoch": 0.12932445044178784, "grad_norm": 1423.832763671875, "learning_rate": 4.9880344128437115e-05, "loss": 138.5585, "step": 32010 }, { "epoch": 0.12936485170715548, "grad_norm": 2598.307373046875, "learning_rate": 4.988000277227334e-05, "loss": 115.3402, "step": 32020 }, { "epoch": 0.1294052529725231, "grad_norm": 1134.399169921875, "learning_rate": 4.987966093106109e-05, "loss": 90.1487, "step": 32030 }, { "epoch": 0.12944565423789073, "grad_norm": 765.7633056640625, "learning_rate": 4.987931860480705e-05, "loss": 100.8343, "step": 32040 }, { "epoch": 0.12948605550325837, "grad_norm": 1252.3284912109375, "learning_rate": 4.987897579351788e-05, "loss": 78.8595, "step": 32050 }, { "epoch": 0.12952645676862598, "grad_norm": 4336.3466796875, "learning_rate": 4.987863249720027e-05, "loss": 95.2753, "step": 32060 }, { "epoch": 0.12956685803399362, "grad_norm": 1328.06396484375, "learning_rate": 4.987828871586091e-05, "loss": 101.3385, "step": 32070 }, { "epoch": 0.12960725929936126, "grad_norm": 520.3338012695312, "learning_rate": 4.987794444950651e-05, "loss": 153.1254, "step": 32080 }, { "epoch": 0.1296476605647289, "grad_norm": 694.16015625, "learning_rate": 4.987759969814377e-05, "loss": 93.5572, "step": 32090 }, { "epoch": 0.1296880618300965, "grad_norm": 1401.1295166015625, "learning_rate": 4.987725446177941e-05, "loss": 93.8664, "step": 32100 }, { "epoch": 0.12972846309546415, "grad_norm": 928.5399780273438, "learning_rate": 4.9876908740420175e-05, "loss": 74.3067, "step": 32110 }, { "epoch": 0.1297688643608318, "grad_norm": 1556.6954345703125, "learning_rate": 4.98765625340728e-05, "loss": 85.9339, "step": 32120 }, { "epoch": 0.1298092656261994, "grad_norm": 783.3317260742188, "learning_rate": 4.987621584274402e-05, "loss": 85.9349, "step": 32130 }, { "epoch": 0.12984966689156704, "grad_norm": 423.84014892578125, "learning_rate": 4.9875868666440604e-05, "loss": 49.8796, "step": 32140 }, { "epoch": 0.12989006815693468, "grad_norm": 806.3920288085938, "learning_rate": 4.987552100516933e-05, "loss": 66.9737, "step": 32150 }, { "epoch": 0.1299304694223023, "grad_norm": 974.2014770507812, "learning_rate": 4.987517285893697e-05, "loss": 131.7362, "step": 32160 }, { "epoch": 0.12997087068766994, "grad_norm": 3982.545654296875, "learning_rate": 4.9874824227750305e-05, "loss": 130.7934, "step": 32170 }, { "epoch": 0.13001127195303758, "grad_norm": 936.3251953125, "learning_rate": 4.987447511161612e-05, "loss": 86.1245, "step": 32180 }, { "epoch": 0.1300516732184052, "grad_norm": 1825.6453857421875, "learning_rate": 4.987412551054126e-05, "loss": 95.4473, "step": 32190 }, { "epoch": 0.13009207448377283, "grad_norm": 1493.9964599609375, "learning_rate": 4.987377542453251e-05, "loss": 78.5434, "step": 32200 }, { "epoch": 0.13013247574914047, "grad_norm": 748.4346923828125, "learning_rate": 4.9873424853596695e-05, "loss": 87.2075, "step": 32210 }, { "epoch": 0.13017287701450808, "grad_norm": 1414.2127685546875, "learning_rate": 4.987307379774066e-05, "loss": 95.8589, "step": 32220 }, { "epoch": 0.13021327827987572, "grad_norm": 799.97705078125, "learning_rate": 4.987272225697125e-05, "loss": 94.7114, "step": 32230 }, { "epoch": 0.13025367954524336, "grad_norm": 2368.305419921875, "learning_rate": 4.987237023129531e-05, "loss": 93.2615, "step": 32240 }, { "epoch": 0.130294080810611, "grad_norm": 525.53759765625, "learning_rate": 4.987201772071971e-05, "loss": 100.8188, "step": 32250 }, { "epoch": 0.1303344820759786, "grad_norm": 1076.550048828125, "learning_rate": 4.9871664725251314e-05, "loss": 59.2903, "step": 32260 }, { "epoch": 0.13037488334134625, "grad_norm": 1305.9718017578125, "learning_rate": 4.987131124489701e-05, "loss": 62.7737, "step": 32270 }, { "epoch": 0.1304152846067139, "grad_norm": 1377.45458984375, "learning_rate": 4.98709572796637e-05, "loss": 98.2218, "step": 32280 }, { "epoch": 0.1304556858720815, "grad_norm": 1250.3414306640625, "learning_rate": 4.987060282955826e-05, "loss": 141.3949, "step": 32290 }, { "epoch": 0.13049608713744915, "grad_norm": 362.1710205078125, "learning_rate": 4.987024789458762e-05, "loss": 56.7362, "step": 32300 }, { "epoch": 0.13053648840281679, "grad_norm": 1179.3179931640625, "learning_rate": 4.9869892474758694e-05, "loss": 110.4925, "step": 32310 }, { "epoch": 0.1305768896681844, "grad_norm": 885.8596801757812, "learning_rate": 4.986953657007841e-05, "loss": 103.9362, "step": 32320 }, { "epoch": 0.13061729093355204, "grad_norm": 2053.674072265625, "learning_rate": 4.98691801805537e-05, "loss": 108.4903, "step": 32330 }, { "epoch": 0.13065769219891968, "grad_norm": 628.6939086914062, "learning_rate": 4.986882330619152e-05, "loss": 66.5057, "step": 32340 }, { "epoch": 0.1306980934642873, "grad_norm": 9528.09765625, "learning_rate": 4.986846594699883e-05, "loss": 103.5715, "step": 32350 }, { "epoch": 0.13073849472965493, "grad_norm": 0.0, "learning_rate": 4.9868108102982604e-05, "loss": 121.9924, "step": 32360 }, { "epoch": 0.13077889599502257, "grad_norm": 710.1858520507812, "learning_rate": 4.986774977414979e-05, "loss": 65.1207, "step": 32370 }, { "epoch": 0.13081929726039018, "grad_norm": 474.6985778808594, "learning_rate": 4.98673909605074e-05, "loss": 102.3303, "step": 32380 }, { "epoch": 0.13085969852575782, "grad_norm": 0.0, "learning_rate": 4.986703166206242e-05, "loss": 75.2703, "step": 32390 }, { "epoch": 0.13090009979112546, "grad_norm": 781.9612426757812, "learning_rate": 4.986667187882186e-05, "loss": 115.7585, "step": 32400 }, { "epoch": 0.1309405010564931, "grad_norm": 1456.1153564453125, "learning_rate": 4.986631161079272e-05, "loss": 119.8207, "step": 32410 }, { "epoch": 0.13098090232186071, "grad_norm": 767.140625, "learning_rate": 4.986595085798204e-05, "loss": 79.3004, "step": 32420 }, { "epoch": 0.13102130358722835, "grad_norm": 1090.2476806640625, "learning_rate": 4.9865589620396837e-05, "loss": 104.849, "step": 32430 }, { "epoch": 0.131061704852596, "grad_norm": 618.4625854492188, "learning_rate": 4.986522789804417e-05, "loss": 88.606, "step": 32440 }, { "epoch": 0.1311021061179636, "grad_norm": 1400.4354248046875, "learning_rate": 4.9864865690931086e-05, "loss": 112.4108, "step": 32450 }, { "epoch": 0.13114250738333125, "grad_norm": 1226.9744873046875, "learning_rate": 4.986450299906464e-05, "loss": 110.0555, "step": 32460 }, { "epoch": 0.1311829086486989, "grad_norm": 1217.69970703125, "learning_rate": 4.9864139822451905e-05, "loss": 105.7761, "step": 32470 }, { "epoch": 0.1312233099140665, "grad_norm": 944.0647583007812, "learning_rate": 4.9863776161099964e-05, "loss": 68.9442, "step": 32480 }, { "epoch": 0.13126371117943414, "grad_norm": 1241.845458984375, "learning_rate": 4.986341201501591e-05, "loss": 95.3951, "step": 32490 }, { "epoch": 0.13130411244480178, "grad_norm": 1810.86572265625, "learning_rate": 4.9863047384206835e-05, "loss": 132.1521, "step": 32500 }, { "epoch": 0.1313445137101694, "grad_norm": 1479.045166015625, "learning_rate": 4.986268226867985e-05, "loss": 109.1159, "step": 32510 }, { "epoch": 0.13138491497553703, "grad_norm": 958.012451171875, "learning_rate": 4.986231666844208e-05, "loss": 84.0408, "step": 32520 }, { "epoch": 0.13142531624090467, "grad_norm": 1029.1376953125, "learning_rate": 4.9861950583500636e-05, "loss": 161.0161, "step": 32530 }, { "epoch": 0.13146571750627228, "grad_norm": 2178.518798828125, "learning_rate": 4.986158401386268e-05, "loss": 125.5231, "step": 32540 }, { "epoch": 0.13150611877163992, "grad_norm": 873.2677612304688, "learning_rate": 4.9861216959535335e-05, "loss": 82.9297, "step": 32550 }, { "epoch": 0.13154652003700756, "grad_norm": 3118.9599609375, "learning_rate": 4.9860849420525766e-05, "loss": 133.3087, "step": 32560 }, { "epoch": 0.1315869213023752, "grad_norm": 342.8658752441406, "learning_rate": 4.986048139684114e-05, "loss": 77.4298, "step": 32570 }, { "epoch": 0.13162732256774282, "grad_norm": 1096.45458984375, "learning_rate": 4.986011288848863e-05, "loss": 64.3582, "step": 32580 }, { "epoch": 0.13166772383311046, "grad_norm": 802.4067993164062, "learning_rate": 4.9859743895475416e-05, "loss": 106.3808, "step": 32590 }, { "epoch": 0.1317081250984781, "grad_norm": 1518.53173828125, "learning_rate": 4.98593744178087e-05, "loss": 86.2783, "step": 32600 }, { "epoch": 0.1317485263638457, "grad_norm": 662.360107421875, "learning_rate": 4.985900445549568e-05, "loss": 127.7222, "step": 32610 }, { "epoch": 0.13178892762921335, "grad_norm": 1433.412109375, "learning_rate": 4.985863400854358e-05, "loss": 109.9828, "step": 32620 }, { "epoch": 0.131829328894581, "grad_norm": 524.9939575195312, "learning_rate": 4.98582630769596e-05, "loss": 114.5297, "step": 32630 }, { "epoch": 0.1318697301599486, "grad_norm": 432.99395751953125, "learning_rate": 4.9857891660750986e-05, "loss": 74.3833, "step": 32640 }, { "epoch": 0.13191013142531624, "grad_norm": 1059.305908203125, "learning_rate": 4.9857519759924974e-05, "loss": 103.7896, "step": 32650 }, { "epoch": 0.13195053269068388, "grad_norm": 1521.5408935546875, "learning_rate": 4.985714737448882e-05, "loss": 110.8204, "step": 32660 }, { "epoch": 0.1319909339560515, "grad_norm": 712.7562255859375, "learning_rate": 4.9856774504449776e-05, "loss": 77.0656, "step": 32670 }, { "epoch": 0.13203133522141913, "grad_norm": 738.4374389648438, "learning_rate": 4.9856401149815126e-05, "loss": 76.575, "step": 32680 }, { "epoch": 0.13207173648678677, "grad_norm": 521.0355834960938, "learning_rate": 4.9856027310592134e-05, "loss": 79.7847, "step": 32690 }, { "epoch": 0.13211213775215438, "grad_norm": 738.5291137695312, "learning_rate": 4.985565298678809e-05, "loss": 74.041, "step": 32700 }, { "epoch": 0.13215253901752202, "grad_norm": 1271.053955078125, "learning_rate": 4.985527817841029e-05, "loss": 87.5718, "step": 32710 }, { "epoch": 0.13219294028288966, "grad_norm": 1135.2099609375, "learning_rate": 4.985490288546606e-05, "loss": 121.2673, "step": 32720 }, { "epoch": 0.1322333415482573, "grad_norm": 866.3362426757812, "learning_rate": 4.9854527107962686e-05, "loss": 107.6865, "step": 32730 }, { "epoch": 0.13227374281362492, "grad_norm": 1969.1063232421875, "learning_rate": 4.985415084590752e-05, "loss": 79.8334, "step": 32740 }, { "epoch": 0.13231414407899256, "grad_norm": 1149.1275634765625, "learning_rate": 4.985377409930789e-05, "loss": 109.8118, "step": 32750 }, { "epoch": 0.1323545453443602, "grad_norm": 756.9971313476562, "learning_rate": 4.985339686817113e-05, "loss": 118.7679, "step": 32760 }, { "epoch": 0.1323949466097278, "grad_norm": 795.5819702148438, "learning_rate": 4.9853019152504607e-05, "loss": 91.6352, "step": 32770 }, { "epoch": 0.13243534787509545, "grad_norm": 1227.4793701171875, "learning_rate": 4.9852640952315674e-05, "loss": 60.6576, "step": 32780 }, { "epoch": 0.1324757491404631, "grad_norm": 1278.319091796875, "learning_rate": 4.985226226761172e-05, "loss": 113.7857, "step": 32790 }, { "epoch": 0.1325161504058307, "grad_norm": 610.24560546875, "learning_rate": 4.985188309840012e-05, "loss": 78.6529, "step": 32800 }, { "epoch": 0.13255655167119834, "grad_norm": 1682.79736328125, "learning_rate": 4.9851503444688255e-05, "loss": 107.8541, "step": 32810 }, { "epoch": 0.13259695293656598, "grad_norm": 493.7384033203125, "learning_rate": 4.985112330648354e-05, "loss": 79.7233, "step": 32820 }, { "epoch": 0.1326373542019336, "grad_norm": 790.5695190429688, "learning_rate": 4.985074268379338e-05, "loss": 91.9399, "step": 32830 }, { "epoch": 0.13267775546730123, "grad_norm": 1110.0252685546875, "learning_rate": 4.985036157662521e-05, "loss": 160.5353, "step": 32840 }, { "epoch": 0.13271815673266887, "grad_norm": 1280.8482666015625, "learning_rate": 4.9849979984986426e-05, "loss": 139.881, "step": 32850 }, { "epoch": 0.13275855799803649, "grad_norm": 973.560302734375, "learning_rate": 4.98495979088845e-05, "loss": 124.0516, "step": 32860 }, { "epoch": 0.13279895926340413, "grad_norm": 496.84893798828125, "learning_rate": 4.9849215348326875e-05, "loss": 90.3565, "step": 32870 }, { "epoch": 0.13283936052877177, "grad_norm": 1748.6497802734375, "learning_rate": 4.984883230332099e-05, "loss": 84.9742, "step": 32880 }, { "epoch": 0.1328797617941394, "grad_norm": 1350.8636474609375, "learning_rate": 4.984844877387433e-05, "loss": 115.242, "step": 32890 }, { "epoch": 0.13292016305950702, "grad_norm": 776.8285522460938, "learning_rate": 4.984806475999437e-05, "loss": 77.2372, "step": 32900 }, { "epoch": 0.13296056432487466, "grad_norm": 863.73583984375, "learning_rate": 4.98476802616886e-05, "loss": 74.2184, "step": 32910 }, { "epoch": 0.1330009655902423, "grad_norm": 793.2598876953125, "learning_rate": 4.9847295278964514e-05, "loss": 67.6055, "step": 32920 }, { "epoch": 0.1330413668556099, "grad_norm": 1486.9317626953125, "learning_rate": 4.9846909811829604e-05, "loss": 127.2083, "step": 32930 }, { "epoch": 0.13308176812097755, "grad_norm": 773.4334716796875, "learning_rate": 4.984652386029139e-05, "loss": 116.8202, "step": 32940 }, { "epoch": 0.1331221693863452, "grad_norm": 1226.421630859375, "learning_rate": 4.984613742435742e-05, "loss": 112.299, "step": 32950 }, { "epoch": 0.1331625706517128, "grad_norm": 445.5863342285156, "learning_rate": 4.9845750504035195e-05, "loss": 87.6496, "step": 32960 }, { "epoch": 0.13320297191708044, "grad_norm": 894.6140747070312, "learning_rate": 4.984536309933227e-05, "loss": 95.4575, "step": 32970 }, { "epoch": 0.13324337318244808, "grad_norm": 823.8974609375, "learning_rate": 4.9844975210256217e-05, "loss": 93.921, "step": 32980 }, { "epoch": 0.1332837744478157, "grad_norm": 755.107177734375, "learning_rate": 4.984458683681457e-05, "loss": 84.3962, "step": 32990 }, { "epoch": 0.13332417571318333, "grad_norm": 885.6873779296875, "learning_rate": 4.984419797901491e-05, "loss": 105.6254, "step": 33000 }, { "epoch": 0.13336457697855097, "grad_norm": 411.86053466796875, "learning_rate": 4.984380863686482e-05, "loss": 97.5172, "step": 33010 }, { "epoch": 0.1334049782439186, "grad_norm": 563.2962646484375, "learning_rate": 4.98434188103719e-05, "loss": 80.259, "step": 33020 }, { "epoch": 0.13344537950928623, "grad_norm": 1379.732666015625, "learning_rate": 4.984302849954373e-05, "loss": 73.214, "step": 33030 }, { "epoch": 0.13348578077465387, "grad_norm": 750.8110961914062, "learning_rate": 4.984263770438793e-05, "loss": 106.9937, "step": 33040 }, { "epoch": 0.1335261820400215, "grad_norm": 1013.02392578125, "learning_rate": 4.984224642491212e-05, "loss": 75.5326, "step": 33050 }, { "epoch": 0.13356658330538912, "grad_norm": 1022.1004028320312, "learning_rate": 4.9841854661123936e-05, "loss": 108.553, "step": 33060 }, { "epoch": 0.13360698457075676, "grad_norm": 809.585205078125, "learning_rate": 4.9841462413030995e-05, "loss": 84.0129, "step": 33070 }, { "epoch": 0.1336473858361244, "grad_norm": 842.181884765625, "learning_rate": 4.984106968064095e-05, "loss": 112.834, "step": 33080 }, { "epoch": 0.133687787101492, "grad_norm": 467.4346618652344, "learning_rate": 4.984067646396147e-05, "loss": 100.528, "step": 33090 }, { "epoch": 0.13372818836685965, "grad_norm": 714.903076171875, "learning_rate": 4.984028276300021e-05, "loss": 129.6565, "step": 33100 }, { "epoch": 0.1337685896322273, "grad_norm": 709.3721923828125, "learning_rate": 4.983988857776486e-05, "loss": 101.3018, "step": 33110 }, { "epoch": 0.1338089908975949, "grad_norm": 988.5148315429688, "learning_rate": 4.983949390826308e-05, "loss": 86.796, "step": 33120 }, { "epoch": 0.13384939216296254, "grad_norm": 1472.410400390625, "learning_rate": 4.983909875450258e-05, "loss": 94.508, "step": 33130 }, { "epoch": 0.13388979342833018, "grad_norm": 900.3340454101562, "learning_rate": 4.983870311649107e-05, "loss": 95.947, "step": 33140 }, { "epoch": 0.1339301946936978, "grad_norm": 1466.88134765625, "learning_rate": 4.983830699423625e-05, "loss": 74.9536, "step": 33150 }, { "epoch": 0.13397059595906544, "grad_norm": 576.2639770507812, "learning_rate": 4.9837910387745845e-05, "loss": 115.5023, "step": 33160 }, { "epoch": 0.13401099722443308, "grad_norm": 1318.8853759765625, "learning_rate": 4.9837513297027594e-05, "loss": 84.5853, "step": 33170 }, { "epoch": 0.1340513984898007, "grad_norm": 934.3829345703125, "learning_rate": 4.983711572208924e-05, "loss": 75.3381, "step": 33180 }, { "epoch": 0.13409179975516833, "grad_norm": 1154.966796875, "learning_rate": 4.983671766293851e-05, "loss": 83.0238, "step": 33190 }, { "epoch": 0.13413220102053597, "grad_norm": 1403.0806884765625, "learning_rate": 4.983631911958319e-05, "loss": 82.2895, "step": 33200 }, { "epoch": 0.1341726022859036, "grad_norm": 1025.5306396484375, "learning_rate": 4.983592009203105e-05, "loss": 64.9099, "step": 33210 }, { "epoch": 0.13421300355127122, "grad_norm": 570.6565551757812, "learning_rate": 4.9835520580289854e-05, "loss": 60.6472, "step": 33220 }, { "epoch": 0.13425340481663886, "grad_norm": 241.88272094726562, "learning_rate": 4.98351205843674e-05, "loss": 101.3504, "step": 33230 }, { "epoch": 0.1342938060820065, "grad_norm": 786.84912109375, "learning_rate": 4.9834720104271484e-05, "loss": 80.2996, "step": 33240 }, { "epoch": 0.1343342073473741, "grad_norm": 1382.0052490234375, "learning_rate": 4.983431914000991e-05, "loss": 90.2512, "step": 33250 }, { "epoch": 0.13437460861274175, "grad_norm": 1485.1060791015625, "learning_rate": 4.9833917691590506e-05, "loss": 80.0044, "step": 33260 }, { "epoch": 0.1344150098781094, "grad_norm": 698.7577514648438, "learning_rate": 4.9833515759021085e-05, "loss": 102.9575, "step": 33270 }, { "epoch": 0.134455411143477, "grad_norm": 693.4671020507812, "learning_rate": 4.98331133423095e-05, "loss": 94.7776, "step": 33280 }, { "epoch": 0.13449581240884464, "grad_norm": 664.5126342773438, "learning_rate": 4.983271044146357e-05, "loss": 85.3929, "step": 33290 }, { "epoch": 0.13453621367421228, "grad_norm": 841.61962890625, "learning_rate": 4.983230705649118e-05, "loss": 92.7831, "step": 33300 }, { "epoch": 0.1345766149395799, "grad_norm": 1524.328125, "learning_rate": 4.9831903187400166e-05, "loss": 85.1477, "step": 33310 }, { "epoch": 0.13461701620494754, "grad_norm": 551.5435180664062, "learning_rate": 4.983149883419842e-05, "loss": 128.3945, "step": 33320 }, { "epoch": 0.13465741747031518, "grad_norm": 387.754638671875, "learning_rate": 4.983109399689382e-05, "loss": 75.982, "step": 33330 }, { "epoch": 0.1346978187356828, "grad_norm": 2052.11669921875, "learning_rate": 4.9830688675494265e-05, "loss": 166.9485, "step": 33340 }, { "epoch": 0.13473822000105043, "grad_norm": 656.3280639648438, "learning_rate": 4.9830282870007646e-05, "loss": 98.0935, "step": 33350 }, { "epoch": 0.13477862126641807, "grad_norm": 960.6358032226562, "learning_rate": 4.982987658044188e-05, "loss": 88.8502, "step": 33360 }, { "epoch": 0.1348190225317857, "grad_norm": 303.2107238769531, "learning_rate": 4.982946980680488e-05, "loss": 82.7864, "step": 33370 }, { "epoch": 0.13485942379715332, "grad_norm": 786.0392456054688, "learning_rate": 4.982906254910459e-05, "loss": 75.8299, "step": 33380 }, { "epoch": 0.13489982506252096, "grad_norm": 554.2486572265625, "learning_rate": 4.982865480734894e-05, "loss": 71.6516, "step": 33390 }, { "epoch": 0.1349402263278886, "grad_norm": 901.5972900390625, "learning_rate": 4.982824658154589e-05, "loss": 106.4935, "step": 33400 }, { "epoch": 0.1349806275932562, "grad_norm": 363.12115478515625, "learning_rate": 4.982783787170338e-05, "loss": 76.4278, "step": 33410 }, { "epoch": 0.13502102885862385, "grad_norm": 3618.558349609375, "learning_rate": 4.982742867782939e-05, "loss": 130.8146, "step": 33420 }, { "epoch": 0.1350614301239915, "grad_norm": 810.9340209960938, "learning_rate": 4.982701899993189e-05, "loss": 109.7431, "step": 33430 }, { "epoch": 0.1351018313893591, "grad_norm": 1115.0909423828125, "learning_rate": 4.982660883801889e-05, "loss": 97.727, "step": 33440 }, { "epoch": 0.13514223265472675, "grad_norm": 726.3258056640625, "learning_rate": 4.982619819209836e-05, "loss": 129.2327, "step": 33450 }, { "epoch": 0.13518263392009439, "grad_norm": 0.0, "learning_rate": 4.9825787062178315e-05, "loss": 62.2287, "step": 33460 }, { "epoch": 0.135223035185462, "grad_norm": 2895.0556640625, "learning_rate": 4.982537544826677e-05, "loss": 99.4745, "step": 33470 }, { "epoch": 0.13526343645082964, "grad_norm": 496.2646789550781, "learning_rate": 4.982496335037175e-05, "loss": 76.2467, "step": 33480 }, { "epoch": 0.13530383771619728, "grad_norm": 1723.678955078125, "learning_rate": 4.982455076850129e-05, "loss": 87.5003, "step": 33490 }, { "epoch": 0.1353442389815649, "grad_norm": 622.0546875, "learning_rate": 4.982413770266342e-05, "loss": 60.5188, "step": 33500 }, { "epoch": 0.13538464024693253, "grad_norm": 579.060302734375, "learning_rate": 4.9823724152866226e-05, "loss": 121.2273, "step": 33510 }, { "epoch": 0.13542504151230017, "grad_norm": 638.8704833984375, "learning_rate": 4.982331011911774e-05, "loss": 139.4159, "step": 33520 }, { "epoch": 0.13546544277766778, "grad_norm": 684.1176147460938, "learning_rate": 4.9822895601426034e-05, "loss": 107.1239, "step": 33530 }, { "epoch": 0.13550584404303542, "grad_norm": 626.8060302734375, "learning_rate": 4.982248059979921e-05, "loss": 86.9168, "step": 33540 }, { "epoch": 0.13554624530840306, "grad_norm": 539.0343017578125, "learning_rate": 4.982206511424534e-05, "loss": 69.0915, "step": 33550 }, { "epoch": 0.1355866465737707, "grad_norm": 2398.862548828125, "learning_rate": 4.9821649144772545e-05, "loss": 70.2374, "step": 33560 }, { "epoch": 0.13562704783913831, "grad_norm": 586.1361694335938, "learning_rate": 4.9821232691388906e-05, "loss": 70.2269, "step": 33570 }, { "epoch": 0.13566744910450595, "grad_norm": 850.7914428710938, "learning_rate": 4.982081575410256e-05, "loss": 113.3011, "step": 33580 }, { "epoch": 0.1357078503698736, "grad_norm": 642.4421997070312, "learning_rate": 4.9820398332921634e-05, "loss": 95.5486, "step": 33590 }, { "epoch": 0.1357482516352412, "grad_norm": 706.2947998046875, "learning_rate": 4.981998042785427e-05, "loss": 97.3085, "step": 33600 }, { "epoch": 0.13578865290060885, "grad_norm": 1023.1542358398438, "learning_rate": 4.98195620389086e-05, "loss": 117.8821, "step": 33610 }, { "epoch": 0.1358290541659765, "grad_norm": 480.221923828125, "learning_rate": 4.9819143166092796e-05, "loss": 93.8683, "step": 33620 }, { "epoch": 0.1358694554313441, "grad_norm": 520.1914672851562, "learning_rate": 4.9818723809415016e-05, "loss": 83.5975, "step": 33630 }, { "epoch": 0.13590985669671174, "grad_norm": 2167.59619140625, "learning_rate": 4.981830396888344e-05, "loss": 103.9532, "step": 33640 }, { "epoch": 0.13595025796207938, "grad_norm": 801.8353271484375, "learning_rate": 4.981788364450625e-05, "loss": 146.1724, "step": 33650 }, { "epoch": 0.135990659227447, "grad_norm": 862.556884765625, "learning_rate": 4.981746283629164e-05, "loss": 92.8243, "step": 33660 }, { "epoch": 0.13603106049281463, "grad_norm": 1226.8758544921875, "learning_rate": 4.981704154424781e-05, "loss": 107.6284, "step": 33670 }, { "epoch": 0.13607146175818227, "grad_norm": 1550.146484375, "learning_rate": 4.981661976838299e-05, "loss": 109.7946, "step": 33680 }, { "epoch": 0.13611186302354988, "grad_norm": 619.8020629882812, "learning_rate": 4.981619750870537e-05, "loss": 108.0941, "step": 33690 }, { "epoch": 0.13615226428891752, "grad_norm": 551.2567749023438, "learning_rate": 4.9815774765223226e-05, "loss": 75.5076, "step": 33700 }, { "epoch": 0.13619266555428516, "grad_norm": 1045.827880859375, "learning_rate": 4.9815351537944774e-05, "loss": 64.202, "step": 33710 }, { "epoch": 0.1362330668196528, "grad_norm": 435.85296630859375, "learning_rate": 4.9814927826878256e-05, "loss": 123.6437, "step": 33720 }, { "epoch": 0.13627346808502042, "grad_norm": 1007.7003784179688, "learning_rate": 4.9814503632031954e-05, "loss": 133.8719, "step": 33730 }, { "epoch": 0.13631386935038806, "grad_norm": 1092.2225341796875, "learning_rate": 4.981407895341412e-05, "loss": 91.8184, "step": 33740 }, { "epoch": 0.1363542706157557, "grad_norm": 656.14990234375, "learning_rate": 4.9813653791033057e-05, "loss": 68.1897, "step": 33750 }, { "epoch": 0.1363946718811233, "grad_norm": 1275.6177978515625, "learning_rate": 4.981322814489703e-05, "loss": 91.9896, "step": 33760 }, { "epoch": 0.13643507314649095, "grad_norm": 900.5950927734375, "learning_rate": 4.9812802015014334e-05, "loss": 90.7882, "step": 33770 }, { "epoch": 0.1364754744118586, "grad_norm": 0.0, "learning_rate": 4.981237540139331e-05, "loss": 62.0134, "step": 33780 }, { "epoch": 0.1365158756772262, "grad_norm": 1441.5318603515625, "learning_rate": 4.9811948304042234e-05, "loss": 75.5356, "step": 33790 }, { "epoch": 0.13655627694259384, "grad_norm": 783.706787109375, "learning_rate": 4.9811520722969465e-05, "loss": 109.1127, "step": 33800 }, { "epoch": 0.13659667820796148, "grad_norm": 1493.0281982421875, "learning_rate": 4.981109265818332e-05, "loss": 150.1899, "step": 33810 }, { "epoch": 0.1366370794733291, "grad_norm": 728.7564086914062, "learning_rate": 4.981066410969215e-05, "loss": 64.3308, "step": 33820 }, { "epoch": 0.13667748073869673, "grad_norm": 1312.0137939453125, "learning_rate": 4.981023507750431e-05, "loss": 119.9241, "step": 33830 }, { "epoch": 0.13671788200406437, "grad_norm": 805.4586181640625, "learning_rate": 4.980980556162816e-05, "loss": 107.8721, "step": 33840 }, { "epoch": 0.13675828326943198, "grad_norm": 609.840087890625, "learning_rate": 4.980937556207208e-05, "loss": 89.4216, "step": 33850 }, { "epoch": 0.13679868453479962, "grad_norm": 973.7212524414062, "learning_rate": 4.9808945078844456e-05, "loss": 81.5434, "step": 33860 }, { "epoch": 0.13683908580016726, "grad_norm": 982.0657348632812, "learning_rate": 4.9808514111953674e-05, "loss": 67.6982, "step": 33870 }, { "epoch": 0.1368794870655349, "grad_norm": 524.6974487304688, "learning_rate": 4.980808266140813e-05, "loss": 150.7908, "step": 33880 }, { "epoch": 0.13691988833090252, "grad_norm": 600.6156005859375, "learning_rate": 4.980765072721625e-05, "loss": 107.8641, "step": 33890 }, { "epoch": 0.13696028959627016, "grad_norm": 464.95550537109375, "learning_rate": 4.9807218309386444e-05, "loss": 81.4719, "step": 33900 }, { "epoch": 0.1370006908616378, "grad_norm": 810.7647094726562, "learning_rate": 4.980678540792715e-05, "loss": 91.2513, "step": 33910 }, { "epoch": 0.1370410921270054, "grad_norm": 904.2017211914062, "learning_rate": 4.980635202284679e-05, "loss": 128.9432, "step": 33920 }, { "epoch": 0.13708149339237305, "grad_norm": 751.0636596679688, "learning_rate": 4.980591815415384e-05, "loss": 67.3943, "step": 33930 }, { "epoch": 0.1371218946577407, "grad_norm": 999.1458740234375, "learning_rate": 4.980548380185674e-05, "loss": 122.1365, "step": 33940 }, { "epoch": 0.1371622959231083, "grad_norm": 1589.1033935546875, "learning_rate": 4.980504896596396e-05, "loss": 77.5396, "step": 33950 }, { "epoch": 0.13720269718847594, "grad_norm": 1546.7073974609375, "learning_rate": 4.980461364648398e-05, "loss": 117.3774, "step": 33960 }, { "epoch": 0.13724309845384358, "grad_norm": 920.711669921875, "learning_rate": 4.9804177843425295e-05, "loss": 112.9565, "step": 33970 }, { "epoch": 0.1372834997192112, "grad_norm": 1206.650146484375, "learning_rate": 4.980374155679639e-05, "loss": 111.5966, "step": 33980 }, { "epoch": 0.13732390098457883, "grad_norm": 1469.6834716796875, "learning_rate": 4.980330478660576e-05, "loss": 107.332, "step": 33990 }, { "epoch": 0.13736430224994647, "grad_norm": 1957.3115234375, "learning_rate": 4.980286753286195e-05, "loss": 128.1132, "step": 34000 }, { "epoch": 0.13740470351531409, "grad_norm": 337.71295166015625, "learning_rate": 4.9802429795573455e-05, "loss": 70.1459, "step": 34010 }, { "epoch": 0.13744510478068173, "grad_norm": 714.4182739257812, "learning_rate": 4.980199157474884e-05, "loss": 116.3985, "step": 34020 }, { "epoch": 0.13748550604604937, "grad_norm": 691.6640014648438, "learning_rate": 4.980155287039662e-05, "loss": 54.4459, "step": 34030 }, { "epoch": 0.137525907311417, "grad_norm": 1381.4683837890625, "learning_rate": 4.980111368252535e-05, "loss": 59.3446, "step": 34040 }, { "epoch": 0.13756630857678462, "grad_norm": 1003.1581420898438, "learning_rate": 4.9800674011143614e-05, "loss": 79.539, "step": 34050 }, { "epoch": 0.13760670984215226, "grad_norm": 705.7440795898438, "learning_rate": 4.980023385625996e-05, "loss": 104.7091, "step": 34060 }, { "epoch": 0.1376471111075199, "grad_norm": 655.6250610351562, "learning_rate": 4.979979321788298e-05, "loss": 65.6317, "step": 34070 }, { "epoch": 0.1376875123728875, "grad_norm": 741.8604125976562, "learning_rate": 4.9799352096021266e-05, "loss": 70.4658, "step": 34080 }, { "epoch": 0.13772791363825515, "grad_norm": 2940.106689453125, "learning_rate": 4.979891049068342e-05, "loss": 108.38, "step": 34090 }, { "epoch": 0.1377683149036228, "grad_norm": 924.1039428710938, "learning_rate": 4.979846840187804e-05, "loss": 69.7543, "step": 34100 }, { "epoch": 0.1378087161689904, "grad_norm": 937.28515625, "learning_rate": 4.979802582961375e-05, "loss": 97.4457, "step": 34110 }, { "epoch": 0.13784911743435804, "grad_norm": 1431.635498046875, "learning_rate": 4.979758277389919e-05, "loss": 91.3253, "step": 34120 }, { "epoch": 0.13788951869972568, "grad_norm": 953.5215454101562, "learning_rate": 4.9797139234742975e-05, "loss": 81.4589, "step": 34130 }, { "epoch": 0.1379299199650933, "grad_norm": 4141.5361328125, "learning_rate": 4.9796695212153764e-05, "loss": 154.3838, "step": 34140 }, { "epoch": 0.13797032123046093, "grad_norm": 779.7581176757812, "learning_rate": 4.9796250706140224e-05, "loss": 69.3488, "step": 34150 }, { "epoch": 0.13801072249582857, "grad_norm": 778.5997314453125, "learning_rate": 4.9795805716711e-05, "loss": 135.2858, "step": 34160 }, { "epoch": 0.1380511237611962, "grad_norm": 990.5801391601562, "learning_rate": 4.979536024387479e-05, "loss": 108.3737, "step": 34170 }, { "epoch": 0.13809152502656383, "grad_norm": 715.4221801757812, "learning_rate": 4.979491428764026e-05, "loss": 95.1732, "step": 34180 }, { "epoch": 0.13813192629193147, "grad_norm": 2391.443115234375, "learning_rate": 4.979446784801611e-05, "loss": 151.664, "step": 34190 }, { "epoch": 0.1381723275572991, "grad_norm": 1992.6207275390625, "learning_rate": 4.9794020925011044e-05, "loss": 82.8294, "step": 34200 }, { "epoch": 0.13821272882266672, "grad_norm": 1085.1295166015625, "learning_rate": 4.979357351863377e-05, "loss": 116.4222, "step": 34210 }, { "epoch": 0.13825313008803436, "grad_norm": 607.43603515625, "learning_rate": 4.979312562889302e-05, "loss": 71.7245, "step": 34220 }, { "epoch": 0.138293531353402, "grad_norm": 782.8214111328125, "learning_rate": 4.9792677255797525e-05, "loss": 93.9537, "step": 34230 }, { "epoch": 0.1383339326187696, "grad_norm": 735.2789306640625, "learning_rate": 4.979222839935602e-05, "loss": 71.7033, "step": 34240 }, { "epoch": 0.13837433388413725, "grad_norm": 1233.6981201171875, "learning_rate": 4.979177905957726e-05, "loss": 90.7622, "step": 34250 }, { "epoch": 0.1384147351495049, "grad_norm": 567.0103149414062, "learning_rate": 4.979132923647001e-05, "loss": 135.1103, "step": 34260 }, { "epoch": 0.1384551364148725, "grad_norm": 594.86669921875, "learning_rate": 4.979087893004302e-05, "loss": 82.0568, "step": 34270 }, { "epoch": 0.13849553768024014, "grad_norm": 900.0689086914062, "learning_rate": 4.979042814030509e-05, "loss": 91.8436, "step": 34280 }, { "epoch": 0.13853593894560778, "grad_norm": 0.0, "learning_rate": 4.9789976867265e-05, "loss": 67.3411, "step": 34290 }, { "epoch": 0.1385763402109754, "grad_norm": 1077.3468017578125, "learning_rate": 4.9789525110931545e-05, "loss": 119.6317, "step": 34300 }, { "epoch": 0.13861674147634304, "grad_norm": 2667.85400390625, "learning_rate": 4.978907287131354e-05, "loss": 95.113, "step": 34310 }, { "epoch": 0.13865714274171068, "grad_norm": 1062.1884765625, "learning_rate": 4.978862014841979e-05, "loss": 77.8131, "step": 34320 }, { "epoch": 0.1386975440070783, "grad_norm": 3490.418701171875, "learning_rate": 4.9788166942259135e-05, "loss": 124.19, "step": 34330 }, { "epoch": 0.13873794527244593, "grad_norm": 351.10784912109375, "learning_rate": 4.97877132528404e-05, "loss": 100.0667, "step": 34340 }, { "epoch": 0.13877834653781357, "grad_norm": 901.481201171875, "learning_rate": 4.978725908017243e-05, "loss": 80.7147, "step": 34350 }, { "epoch": 0.1388187478031812, "grad_norm": 575.747802734375, "learning_rate": 4.9786804424264085e-05, "loss": 107.6961, "step": 34360 }, { "epoch": 0.13885914906854882, "grad_norm": 816.9763793945312, "learning_rate": 4.9786349285124235e-05, "loss": 104.7212, "step": 34370 }, { "epoch": 0.13889955033391646, "grad_norm": 672.7227783203125, "learning_rate": 4.978589366276174e-05, "loss": 95.8187, "step": 34380 }, { "epoch": 0.1389399515992841, "grad_norm": 1123.2491455078125, "learning_rate": 4.978543755718549e-05, "loss": 97.5948, "step": 34390 }, { "epoch": 0.1389803528646517, "grad_norm": 741.401611328125, "learning_rate": 4.978498096840436e-05, "loss": 77.3318, "step": 34400 }, { "epoch": 0.13902075413001935, "grad_norm": 2102.46923828125, "learning_rate": 4.978452389642728e-05, "loss": 80.4057, "step": 34410 }, { "epoch": 0.139061155395387, "grad_norm": 1185.681396484375, "learning_rate": 4.978406634126315e-05, "loss": 105.0425, "step": 34420 }, { "epoch": 0.1391015566607546, "grad_norm": 1234.7724609375, "learning_rate": 4.9783608302920873e-05, "loss": 94.0603, "step": 34430 }, { "epoch": 0.13914195792612225, "grad_norm": 1568.1796875, "learning_rate": 4.9783149781409404e-05, "loss": 123.3275, "step": 34440 }, { "epoch": 0.13918235919148988, "grad_norm": 1667.6737060546875, "learning_rate": 4.978269077673767e-05, "loss": 62.5816, "step": 34450 }, { "epoch": 0.1392227604568575, "grad_norm": 880.3914794921875, "learning_rate": 4.9782231288914614e-05, "loss": 80.6274, "step": 34460 }, { "epoch": 0.13926316172222514, "grad_norm": 473.5460510253906, "learning_rate": 4.97817713179492e-05, "loss": 136.4202, "step": 34470 }, { "epoch": 0.13930356298759278, "grad_norm": 1051.316162109375, "learning_rate": 4.9781310863850405e-05, "loss": 79.2705, "step": 34480 }, { "epoch": 0.1393439642529604, "grad_norm": 942.3121948242188, "learning_rate": 4.978084992662719e-05, "loss": 87.1078, "step": 34490 }, { "epoch": 0.13938436551832803, "grad_norm": 663.4690551757812, "learning_rate": 4.978038850628854e-05, "loss": 87.4095, "step": 34500 }, { "epoch": 0.13942476678369567, "grad_norm": 469.06475830078125, "learning_rate": 4.977992660284347e-05, "loss": 92.2786, "step": 34510 }, { "epoch": 0.1394651680490633, "grad_norm": 452.9181213378906, "learning_rate": 4.977946421630098e-05, "loss": 109.0747, "step": 34520 }, { "epoch": 0.13950556931443092, "grad_norm": 3418.622314453125, "learning_rate": 4.977900134667006e-05, "loss": 92.9968, "step": 34530 }, { "epoch": 0.13954597057979856, "grad_norm": 934.139892578125, "learning_rate": 4.977853799395976e-05, "loss": 86.7001, "step": 34540 }, { "epoch": 0.1395863718451662, "grad_norm": 541.4644165039062, "learning_rate": 4.97780741581791e-05, "loss": 125.5777, "step": 34550 }, { "epoch": 0.13962677311053381, "grad_norm": 1703.493896484375, "learning_rate": 4.977760983933714e-05, "loss": 113.32, "step": 34560 }, { "epoch": 0.13966717437590145, "grad_norm": 469.1068115234375, "learning_rate": 4.9777145037442906e-05, "loss": 73.509, "step": 34570 }, { "epoch": 0.1397075756412691, "grad_norm": 445.8856201171875, "learning_rate": 4.9776679752505476e-05, "loss": 72.2943, "step": 34580 }, { "epoch": 0.1397479769066367, "grad_norm": 636.2007446289062, "learning_rate": 4.977621398453393e-05, "loss": 91.6618, "step": 34590 }, { "epoch": 0.13978837817200435, "grad_norm": 727.9812622070312, "learning_rate": 4.977574773353732e-05, "loss": 101.3671, "step": 34600 }, { "epoch": 0.13982877943737199, "grad_norm": 1291.48974609375, "learning_rate": 4.9775280999524766e-05, "loss": 75.3459, "step": 34610 }, { "epoch": 0.1398691807027396, "grad_norm": 975.12109375, "learning_rate": 4.9774813782505346e-05, "loss": 108.9374, "step": 34620 }, { "epoch": 0.13990958196810724, "grad_norm": 1339.5286865234375, "learning_rate": 4.9774346082488176e-05, "loss": 62.3226, "step": 34630 }, { "epoch": 0.13994998323347488, "grad_norm": 1278.2696533203125, "learning_rate": 4.977387789948238e-05, "loss": 111.346, "step": 34640 }, { "epoch": 0.1399903844988425, "grad_norm": 582.800537109375, "learning_rate": 4.977340923349707e-05, "loss": 59.1344, "step": 34650 }, { "epoch": 0.14003078576421013, "grad_norm": 980.9688110351562, "learning_rate": 4.9772940084541405e-05, "loss": 116.2872, "step": 34660 }, { "epoch": 0.14007118702957777, "grad_norm": 600.323486328125, "learning_rate": 4.9772470452624506e-05, "loss": 98.4954, "step": 34670 }, { "epoch": 0.1401115882949454, "grad_norm": 1239.6783447265625, "learning_rate": 4.977200033775555e-05, "loss": 108.7115, "step": 34680 }, { "epoch": 0.14015198956031302, "grad_norm": 1736.9547119140625, "learning_rate": 4.97715297399437e-05, "loss": 134.5554, "step": 34690 }, { "epoch": 0.14019239082568066, "grad_norm": 2366.77490234375, "learning_rate": 4.977105865919812e-05, "loss": 127.1646, "step": 34700 }, { "epoch": 0.1402327920910483, "grad_norm": 1988.361083984375, "learning_rate": 4.9770587095527995e-05, "loss": 107.3173, "step": 34710 }, { "epoch": 0.14027319335641592, "grad_norm": 632.1370239257812, "learning_rate": 4.977011504894252e-05, "loss": 84.2821, "step": 34720 }, { "epoch": 0.14031359462178356, "grad_norm": 1778.7666015625, "learning_rate": 4.9769642519450904e-05, "loss": 83.9897, "step": 34730 }, { "epoch": 0.1403539958871512, "grad_norm": 652.8768310546875, "learning_rate": 4.9769169507062355e-05, "loss": 102.6689, "step": 34740 }, { "epoch": 0.1403943971525188, "grad_norm": 1475.0391845703125, "learning_rate": 4.976869601178609e-05, "loss": 117.3591, "step": 34750 }, { "epoch": 0.14043479841788645, "grad_norm": 1975.065673828125, "learning_rate": 4.976822203363135e-05, "loss": 98.2487, "step": 34760 }, { "epoch": 0.1404751996832541, "grad_norm": 1050.9573974609375, "learning_rate": 4.976774757260737e-05, "loss": 94.4713, "step": 34770 }, { "epoch": 0.1405156009486217, "grad_norm": 1176.8922119140625, "learning_rate": 4.9767272628723396e-05, "loss": 99.8589, "step": 34780 }, { "epoch": 0.14055600221398934, "grad_norm": 1319.9659423828125, "learning_rate": 4.976679720198869e-05, "loss": 100.7659, "step": 34790 }, { "epoch": 0.14059640347935698, "grad_norm": 515.001220703125, "learning_rate": 4.976632129241252e-05, "loss": 77.551, "step": 34800 }, { "epoch": 0.1406368047447246, "grad_norm": 822.0165405273438, "learning_rate": 4.9765844900004176e-05, "loss": 82.4817, "step": 34810 }, { "epoch": 0.14067720601009223, "grad_norm": 1176.460693359375, "learning_rate": 4.976536802477293e-05, "loss": 116.3207, "step": 34820 }, { "epoch": 0.14071760727545987, "grad_norm": 1214.6893310546875, "learning_rate": 4.976489066672808e-05, "loss": 90.3449, "step": 34830 }, { "epoch": 0.1407580085408275, "grad_norm": 610.4821166992188, "learning_rate": 4.9764412825878943e-05, "loss": 104.2971, "step": 34840 }, { "epoch": 0.14079840980619512, "grad_norm": 1629.2425537109375, "learning_rate": 4.976393450223482e-05, "loss": 100.4276, "step": 34850 }, { "epoch": 0.14083881107156276, "grad_norm": 1354.5982666015625, "learning_rate": 4.9763455695805056e-05, "loss": 87.7344, "step": 34860 }, { "epoch": 0.1408792123369304, "grad_norm": 569.3548583984375, "learning_rate": 4.976297640659897e-05, "loss": 72.0556, "step": 34870 }, { "epoch": 0.14091961360229802, "grad_norm": 1153.3983154296875, "learning_rate": 4.97624966346259e-05, "loss": 76.03, "step": 34880 }, { "epoch": 0.14096001486766566, "grad_norm": 550.9409790039062, "learning_rate": 4.9762016379895225e-05, "loss": 106.2172, "step": 34890 }, { "epoch": 0.1410004161330333, "grad_norm": 621.2599487304688, "learning_rate": 4.976153564241628e-05, "loss": 85.7488, "step": 34900 }, { "epoch": 0.1410408173984009, "grad_norm": 1340.081787109375, "learning_rate": 4.976105442219846e-05, "loss": 108.7937, "step": 34910 }, { "epoch": 0.14108121866376855, "grad_norm": 2728.06201171875, "learning_rate": 4.976057271925113e-05, "loss": 80.4239, "step": 34920 }, { "epoch": 0.1411216199291362, "grad_norm": 571.938720703125, "learning_rate": 4.9760090533583686e-05, "loss": 61.1496, "step": 34930 }, { "epoch": 0.1411620211945038, "grad_norm": 421.5497131347656, "learning_rate": 4.9759607865205534e-05, "loss": 151.0666, "step": 34940 }, { "epoch": 0.14120242245987144, "grad_norm": 1543.9168701171875, "learning_rate": 4.975912471412607e-05, "loss": 121.0911, "step": 34950 }, { "epoch": 0.14124282372523908, "grad_norm": 1396.1658935546875, "learning_rate": 4.975864108035474e-05, "loss": 94.3398, "step": 34960 }, { "epoch": 0.1412832249906067, "grad_norm": 785.8390502929688, "learning_rate": 4.975815696390094e-05, "loss": 89.429, "step": 34970 }, { "epoch": 0.14132362625597433, "grad_norm": 719.430908203125, "learning_rate": 4.975767236477413e-05, "loss": 116.3824, "step": 34980 }, { "epoch": 0.14136402752134197, "grad_norm": 662.4757080078125, "learning_rate": 4.975718728298375e-05, "loss": 87.5541, "step": 34990 }, { "epoch": 0.1414044287867096, "grad_norm": 1017.3776245117188, "learning_rate": 4.975670171853926e-05, "loss": 230.2885, "step": 35000 }, { "epoch": 0.14144483005207723, "grad_norm": 391.976806640625, "learning_rate": 4.975621567145012e-05, "loss": 81.4888, "step": 35010 }, { "epoch": 0.14148523131744487, "grad_norm": 1794.4031982421875, "learning_rate": 4.975572914172582e-05, "loss": 102.6515, "step": 35020 }, { "epoch": 0.1415256325828125, "grad_norm": 937.76123046875, "learning_rate": 4.975524212937582e-05, "loss": 108.7728, "step": 35030 }, { "epoch": 0.14156603384818012, "grad_norm": 1814.7794189453125, "learning_rate": 4.975475463440964e-05, "loss": 93.3038, "step": 35040 }, { "epoch": 0.14160643511354776, "grad_norm": 1418.080322265625, "learning_rate": 4.975426665683678e-05, "loss": 92.7187, "step": 35050 }, { "epoch": 0.1416468363789154, "grad_norm": 1647.2652587890625, "learning_rate": 4.9753778196666737e-05, "loss": 99.5951, "step": 35060 }, { "epoch": 0.141687237644283, "grad_norm": 728.3031005859375, "learning_rate": 4.975328925390904e-05, "loss": 78.1284, "step": 35070 }, { "epoch": 0.14172763890965065, "grad_norm": 1265.145263671875, "learning_rate": 4.975279982857324e-05, "loss": 141.2944, "step": 35080 }, { "epoch": 0.1417680401750183, "grad_norm": 1588.732666015625, "learning_rate": 4.975230992066885e-05, "loss": 80.3437, "step": 35090 }, { "epoch": 0.1418084414403859, "grad_norm": 0.0, "learning_rate": 4.975181953020544e-05, "loss": 101.6614, "step": 35100 }, { "epoch": 0.14184884270575354, "grad_norm": 2776.19140625, "learning_rate": 4.9751328657192565e-05, "loss": 95.6715, "step": 35110 }, { "epoch": 0.14188924397112118, "grad_norm": 2080.83837890625, "learning_rate": 4.9750837301639796e-05, "loss": 153.1414, "step": 35120 }, { "epoch": 0.1419296452364888, "grad_norm": 912.7608642578125, "learning_rate": 4.975034546355671e-05, "loss": 106.0554, "step": 35130 }, { "epoch": 0.14197004650185643, "grad_norm": 358.8157043457031, "learning_rate": 4.97498531429529e-05, "loss": 95.7549, "step": 35140 }, { "epoch": 0.14201044776722407, "grad_norm": 387.9512634277344, "learning_rate": 4.974936033983795e-05, "loss": 88.8157, "step": 35150 }, { "epoch": 0.14205084903259171, "grad_norm": 1014.5821533203125, "learning_rate": 4.974886705422149e-05, "loss": 103.6284, "step": 35160 }, { "epoch": 0.14209125029795933, "grad_norm": 1092.1754150390625, "learning_rate": 4.974837328611312e-05, "loss": 107.7396, "step": 35170 }, { "epoch": 0.14213165156332697, "grad_norm": 926.5281372070312, "learning_rate": 4.974787903552247e-05, "loss": 78.8734, "step": 35180 }, { "epoch": 0.1421720528286946, "grad_norm": 546.8348999023438, "learning_rate": 4.974738430245918e-05, "loss": 189.5167, "step": 35190 }, { "epoch": 0.14221245409406222, "grad_norm": 581.217041015625, "learning_rate": 4.9746889086932895e-05, "loss": 82.9471, "step": 35200 }, { "epoch": 0.14225285535942986, "grad_norm": 414.46807861328125, "learning_rate": 4.974639338895326e-05, "loss": 61.2459, "step": 35210 }, { "epoch": 0.1422932566247975, "grad_norm": 1268.71826171875, "learning_rate": 4.9745897208529956e-05, "loss": 102.3453, "step": 35220 }, { "epoch": 0.1423336578901651, "grad_norm": 1139.93115234375, "learning_rate": 4.974540054567264e-05, "loss": 82.09, "step": 35230 }, { "epoch": 0.14237405915553275, "grad_norm": 1343.3614501953125, "learning_rate": 4.9744903400391e-05, "loss": 98.7603, "step": 35240 }, { "epoch": 0.1424144604209004, "grad_norm": 1799.45263671875, "learning_rate": 4.9744405772694725e-05, "loss": 90.476, "step": 35250 }, { "epoch": 0.142454861686268, "grad_norm": 740.581787109375, "learning_rate": 4.9743907662593524e-05, "loss": 80.5426, "step": 35260 }, { "epoch": 0.14249526295163564, "grad_norm": 689.4889526367188, "learning_rate": 4.97434090700971e-05, "loss": 109.3305, "step": 35270 }, { "epoch": 0.14253566421700328, "grad_norm": 1570.7303466796875, "learning_rate": 4.974290999521519e-05, "loss": 115.1295, "step": 35280 }, { "epoch": 0.1425760654823709, "grad_norm": 1887.6724853515625, "learning_rate": 4.97424104379575e-05, "loss": 78.6015, "step": 35290 }, { "epoch": 0.14261646674773854, "grad_norm": 906.8424072265625, "learning_rate": 4.974191039833378e-05, "loss": 62.9002, "step": 35300 }, { "epoch": 0.14265686801310618, "grad_norm": 1138.534423828125, "learning_rate": 4.974140987635378e-05, "loss": 114.5066, "step": 35310 }, { "epoch": 0.14269726927847382, "grad_norm": 5530.13232421875, "learning_rate": 4.974090887202726e-05, "loss": 101.5712, "step": 35320 }, { "epoch": 0.14273767054384143, "grad_norm": 571.6856079101562, "learning_rate": 4.9740407385363983e-05, "loss": 87.484, "step": 35330 }, { "epoch": 0.14277807180920907, "grad_norm": 742.740478515625, "learning_rate": 4.973990541637373e-05, "loss": 116.8295, "step": 35340 }, { "epoch": 0.1428184730745767, "grad_norm": 1002.74072265625, "learning_rate": 4.9739402965066276e-05, "loss": 61.5279, "step": 35350 }, { "epoch": 0.14285887433994432, "grad_norm": 782.5518188476562, "learning_rate": 4.973890003145143e-05, "loss": 104.612, "step": 35360 }, { "epoch": 0.14289927560531196, "grad_norm": 3795.69970703125, "learning_rate": 4.973839661553899e-05, "loss": 90.4234, "step": 35370 }, { "epoch": 0.1429396768706796, "grad_norm": 679.3167724609375, "learning_rate": 4.9737892717338774e-05, "loss": 101.094, "step": 35380 }, { "epoch": 0.1429800781360472, "grad_norm": 1362.4091796875, "learning_rate": 4.97373883368606e-05, "loss": 121.7002, "step": 35390 }, { "epoch": 0.14302047940141485, "grad_norm": 692.154296875, "learning_rate": 4.973688347411431e-05, "loss": 99.3686, "step": 35400 }, { "epoch": 0.1430608806667825, "grad_norm": 1434.7564697265625, "learning_rate": 4.973637812910973e-05, "loss": 98.6788, "step": 35410 }, { "epoch": 0.1431012819321501, "grad_norm": 993.9842529296875, "learning_rate": 4.9735872301856734e-05, "loss": 98.5837, "step": 35420 }, { "epoch": 0.14314168319751774, "grad_norm": 772.5789794921875, "learning_rate": 4.973536599236517e-05, "loss": 114.3075, "step": 35430 }, { "epoch": 0.14318208446288538, "grad_norm": 1945.123046875, "learning_rate": 4.9734859200644905e-05, "loss": 80.5528, "step": 35440 }, { "epoch": 0.143222485728253, "grad_norm": 1043.6229248046875, "learning_rate": 4.973435192670584e-05, "loss": 104.7841, "step": 35450 }, { "epoch": 0.14326288699362064, "grad_norm": 0.0, "learning_rate": 4.973384417055784e-05, "loss": 83.0118, "step": 35460 }, { "epoch": 0.14330328825898828, "grad_norm": 1075.596435546875, "learning_rate": 4.9733335932210814e-05, "loss": 87.0017, "step": 35470 }, { "epoch": 0.14334368952435592, "grad_norm": 1106.34765625, "learning_rate": 4.973282721167467e-05, "loss": 84.2105, "step": 35480 }, { "epoch": 0.14338409078972353, "grad_norm": 680.8936157226562, "learning_rate": 4.973231800895932e-05, "loss": 102.21, "step": 35490 }, { "epoch": 0.14342449205509117, "grad_norm": 971.0728759765625, "learning_rate": 4.9731808324074717e-05, "loss": 89.0123, "step": 35500 }, { "epoch": 0.1434648933204588, "grad_norm": 1773.6971435546875, "learning_rate": 4.973129815703076e-05, "loss": 60.7568, "step": 35510 }, { "epoch": 0.14350529458582642, "grad_norm": 1183.9600830078125, "learning_rate": 4.973078750783742e-05, "loss": 91.8321, "step": 35520 }, { "epoch": 0.14354569585119406, "grad_norm": 540.8861694335938, "learning_rate": 4.973027637650464e-05, "loss": 82.3345, "step": 35530 }, { "epoch": 0.1435860971165617, "grad_norm": 2457.4091796875, "learning_rate": 4.9729764763042394e-05, "loss": 98.1432, "step": 35540 }, { "epoch": 0.1436264983819293, "grad_norm": 778.1422729492188, "learning_rate": 4.9729252667460655e-05, "loss": 85.1171, "step": 35550 }, { "epoch": 0.14366689964729695, "grad_norm": 501.1910095214844, "learning_rate": 4.97287400897694e-05, "loss": 138.8153, "step": 35560 }, { "epoch": 0.1437073009126646, "grad_norm": 1607.3275146484375, "learning_rate": 4.972822702997863e-05, "loss": 80.1325, "step": 35570 }, { "epoch": 0.1437477021780322, "grad_norm": 796.763427734375, "learning_rate": 4.9727713488098335e-05, "loss": 79.0268, "step": 35580 }, { "epoch": 0.14378810344339985, "grad_norm": 661.5986938476562, "learning_rate": 4.972719946413854e-05, "loss": 88.3111, "step": 35590 }, { "epoch": 0.14382850470876749, "grad_norm": 1435.8564453125, "learning_rate": 4.9726684958109266e-05, "loss": 107.5516, "step": 35600 }, { "epoch": 0.1438689059741351, "grad_norm": 679.6939697265625, "learning_rate": 4.972616997002053e-05, "loss": 75.3328, "step": 35610 }, { "epoch": 0.14390930723950274, "grad_norm": 1368.540283203125, "learning_rate": 4.972565449988239e-05, "loss": 83.6638, "step": 35620 }, { "epoch": 0.14394970850487038, "grad_norm": 503.572998046875, "learning_rate": 4.972513854770487e-05, "loss": 82.9908, "step": 35630 }, { "epoch": 0.14399010977023802, "grad_norm": 1151.1427001953125, "learning_rate": 4.972462211349806e-05, "loss": 94.219, "step": 35640 }, { "epoch": 0.14403051103560563, "grad_norm": 2658.9482421875, "learning_rate": 4.972410519727201e-05, "loss": 91.6265, "step": 35650 }, { "epoch": 0.14407091230097327, "grad_norm": 953.5581665039062, "learning_rate": 4.97235877990368e-05, "loss": 96.4948, "step": 35660 }, { "epoch": 0.1441113135663409, "grad_norm": 898.052490234375, "learning_rate": 4.972306991880251e-05, "loss": 144.139, "step": 35670 }, { "epoch": 0.14415171483170852, "grad_norm": 338.9049072265625, "learning_rate": 4.972255155657925e-05, "loss": 81.2305, "step": 35680 }, { "epoch": 0.14419211609707616, "grad_norm": 662.849365234375, "learning_rate": 4.972203271237712e-05, "loss": 82.9598, "step": 35690 }, { "epoch": 0.1442325173624438, "grad_norm": 488.6361999511719, "learning_rate": 4.972151338620623e-05, "loss": 64.3388, "step": 35700 }, { "epoch": 0.14427291862781141, "grad_norm": 1745.017578125, "learning_rate": 4.972099357807671e-05, "loss": 146.5352, "step": 35710 }, { "epoch": 0.14431331989317905, "grad_norm": 717.8941650390625, "learning_rate": 4.9720473287998695e-05, "loss": 82.1567, "step": 35720 }, { "epoch": 0.1443537211585467, "grad_norm": 3314.039794921875, "learning_rate": 4.9719952515982324e-05, "loss": 81.9564, "step": 35730 }, { "epoch": 0.1443941224239143, "grad_norm": 716.9427490234375, "learning_rate": 4.9719431262037755e-05, "loss": 94.6004, "step": 35740 }, { "epoch": 0.14443452368928195, "grad_norm": 591.388427734375, "learning_rate": 4.971890952617515e-05, "loss": 107.062, "step": 35750 }, { "epoch": 0.1444749249546496, "grad_norm": 1554.075927734375, "learning_rate": 4.9718387308404675e-05, "loss": 119.4833, "step": 35760 }, { "epoch": 0.1445153262200172, "grad_norm": 440.0912780761719, "learning_rate": 4.9717864608736506e-05, "loss": 104.854, "step": 35770 }, { "epoch": 0.14455572748538484, "grad_norm": 692.7317504882812, "learning_rate": 4.971734142718085e-05, "loss": 116.6411, "step": 35780 }, { "epoch": 0.14459612875075248, "grad_norm": 520.4461059570312, "learning_rate": 4.971681776374789e-05, "loss": 111.4582, "step": 35790 }, { "epoch": 0.14463653001612012, "grad_norm": 762.310546875, "learning_rate": 4.971629361844785e-05, "loss": 102.7609, "step": 35800 }, { "epoch": 0.14467693128148773, "grad_norm": 852.2344970703125, "learning_rate": 4.971576899129094e-05, "loss": 121.1159, "step": 35810 }, { "epoch": 0.14471733254685537, "grad_norm": 445.6109924316406, "learning_rate": 4.9715243882287386e-05, "loss": 89.8152, "step": 35820 }, { "epoch": 0.144757733812223, "grad_norm": 950.6737060546875, "learning_rate": 4.971471829144743e-05, "loss": 75.9464, "step": 35830 }, { "epoch": 0.14479813507759062, "grad_norm": 726.3805541992188, "learning_rate": 4.9714192218781316e-05, "loss": 81.0347, "step": 35840 }, { "epoch": 0.14483853634295826, "grad_norm": 1368.5889892578125, "learning_rate": 4.97136656642993e-05, "loss": 94.3249, "step": 35850 }, { "epoch": 0.1448789376083259, "grad_norm": 2891.8837890625, "learning_rate": 4.9713138628011654e-05, "loss": 88.5745, "step": 35860 }, { "epoch": 0.14491933887369352, "grad_norm": 883.2448120117188, "learning_rate": 4.971261110992864e-05, "loss": 86.1088, "step": 35870 }, { "epoch": 0.14495974013906116, "grad_norm": 750.9356689453125, "learning_rate": 4.9712083110060556e-05, "loss": 157.1846, "step": 35880 }, { "epoch": 0.1450001414044288, "grad_norm": 1293.0548095703125, "learning_rate": 4.971155462841769e-05, "loss": 97.4993, "step": 35890 }, { "epoch": 0.1450405426697964, "grad_norm": 1317.3045654296875, "learning_rate": 4.971102566501034e-05, "loss": 77.7684, "step": 35900 }, { "epoch": 0.14508094393516405, "grad_norm": 698.4030151367188, "learning_rate": 4.971049621984882e-05, "loss": 68.1086, "step": 35910 }, { "epoch": 0.1451213452005317, "grad_norm": 491.6965637207031, "learning_rate": 4.9709966292943455e-05, "loss": 85.3335, "step": 35920 }, { "epoch": 0.1451617464658993, "grad_norm": 3422.90380859375, "learning_rate": 4.970943588430458e-05, "loss": 122.6019, "step": 35930 }, { "epoch": 0.14520214773126694, "grad_norm": 933.2683715820312, "learning_rate": 4.970890499394253e-05, "loss": 116.233, "step": 35940 }, { "epoch": 0.14524254899663458, "grad_norm": 1370.2513427734375, "learning_rate": 4.9708373621867656e-05, "loss": 68.2386, "step": 35950 }, { "epoch": 0.14528295026200222, "grad_norm": 2012.014892578125, "learning_rate": 4.9707841768090314e-05, "loss": 73.6452, "step": 35960 }, { "epoch": 0.14532335152736983, "grad_norm": 1204.0938720703125, "learning_rate": 4.9707309432620874e-05, "loss": 82.8698, "step": 35970 }, { "epoch": 0.14536375279273747, "grad_norm": 1037.1630859375, "learning_rate": 4.9706776615469716e-05, "loss": 80.2174, "step": 35980 }, { "epoch": 0.1454041540581051, "grad_norm": 566.7116088867188, "learning_rate": 4.970624331664724e-05, "loss": 80.2169, "step": 35990 }, { "epoch": 0.14544455532347272, "grad_norm": 2502.911865234375, "learning_rate": 4.9705709536163824e-05, "loss": 148.008, "step": 36000 }, { "epoch": 0.14548495658884036, "grad_norm": 569.8108520507812, "learning_rate": 4.970517527402988e-05, "loss": 70.3217, "step": 36010 }, { "epoch": 0.145525357854208, "grad_norm": 635.7393188476562, "learning_rate": 4.9704640530255826e-05, "loss": 84.3072, "step": 36020 }, { "epoch": 0.14556575911957562, "grad_norm": 365.2387390136719, "learning_rate": 4.970410530485209e-05, "loss": 60.3312, "step": 36030 }, { "epoch": 0.14560616038494326, "grad_norm": 529.880126953125, "learning_rate": 4.970356959782909e-05, "loss": 125.5716, "step": 36040 }, { "epoch": 0.1456465616503109, "grad_norm": 928.6548461914062, "learning_rate": 4.97030334091973e-05, "loss": 98.0715, "step": 36050 }, { "epoch": 0.1456869629156785, "grad_norm": 937.4443359375, "learning_rate": 4.970249673896714e-05, "loss": 123.7811, "step": 36060 }, { "epoch": 0.14572736418104615, "grad_norm": 664.5932006835938, "learning_rate": 4.970195958714909e-05, "loss": 103.4508, "step": 36070 }, { "epoch": 0.1457677654464138, "grad_norm": 600.3163452148438, "learning_rate": 4.970142195375363e-05, "loss": 72.4148, "step": 36080 }, { "epoch": 0.1458081667117814, "grad_norm": 602.3806762695312, "learning_rate": 4.970088383879123e-05, "loss": 103.1301, "step": 36090 }, { "epoch": 0.14584856797714904, "grad_norm": 737.54443359375, "learning_rate": 4.970034524227238e-05, "loss": 80.4645, "step": 36100 }, { "epoch": 0.14588896924251668, "grad_norm": 1445.3460693359375, "learning_rate": 4.969980616420759e-05, "loss": 75.5717, "step": 36110 }, { "epoch": 0.14592937050788432, "grad_norm": 385.9198913574219, "learning_rate": 4.9699266604607355e-05, "loss": 137.0707, "step": 36120 }, { "epoch": 0.14596977177325193, "grad_norm": 0.0, "learning_rate": 4.96987265634822e-05, "loss": 88.1988, "step": 36130 }, { "epoch": 0.14601017303861957, "grad_norm": 793.6890869140625, "learning_rate": 4.9698186040842654e-05, "loss": 93.8744, "step": 36140 }, { "epoch": 0.1460505743039872, "grad_norm": 1040.031494140625, "learning_rate": 4.969764503669926e-05, "loss": 67.5155, "step": 36150 }, { "epoch": 0.14609097556935483, "grad_norm": 1021.9496459960938, "learning_rate": 4.9697103551062556e-05, "loss": 69.8323, "step": 36160 }, { "epoch": 0.14613137683472247, "grad_norm": 1474.1693115234375, "learning_rate": 4.9696561583943106e-05, "loss": 100.1957, "step": 36170 }, { "epoch": 0.1461717781000901, "grad_norm": 1472.6767578125, "learning_rate": 4.969601913535148e-05, "loss": 91.6, "step": 36180 }, { "epoch": 0.14621217936545772, "grad_norm": 689.4755249023438, "learning_rate": 4.9695476205298235e-05, "loss": 146.9316, "step": 36190 }, { "epoch": 0.14625258063082536, "grad_norm": 1301.0394287109375, "learning_rate": 4.969493279379398e-05, "loss": 75.4633, "step": 36200 }, { "epoch": 0.146292981896193, "grad_norm": 708.2933349609375, "learning_rate": 4.9694388900849284e-05, "loss": 73.6771, "step": 36210 }, { "epoch": 0.1463333831615606, "grad_norm": 0.0, "learning_rate": 4.969384452647477e-05, "loss": 81.5898, "step": 36220 }, { "epoch": 0.14637378442692825, "grad_norm": 1018.5614013671875, "learning_rate": 4.969329967068104e-05, "loss": 98.2319, "step": 36230 }, { "epoch": 0.1464141856922959, "grad_norm": 1839.9981689453125, "learning_rate": 4.969275433347872e-05, "loss": 88.1999, "step": 36240 }, { "epoch": 0.1464545869576635, "grad_norm": 549.9813842773438, "learning_rate": 4.9692208514878444e-05, "loss": 79.6468, "step": 36250 }, { "epoch": 0.14649498822303114, "grad_norm": 615.52099609375, "learning_rate": 4.9691662214890856e-05, "loss": 93.557, "step": 36260 }, { "epoch": 0.14653538948839878, "grad_norm": 0.0, "learning_rate": 4.969111543352659e-05, "loss": 159.444, "step": 36270 }, { "epoch": 0.14657579075376642, "grad_norm": 1021.6466674804688, "learning_rate": 4.969056817079633e-05, "loss": 93.9579, "step": 36280 }, { "epoch": 0.14661619201913403, "grad_norm": 1050.8099365234375, "learning_rate": 4.969002042671072e-05, "loss": 92.0323, "step": 36290 }, { "epoch": 0.14665659328450167, "grad_norm": 933.8421020507812, "learning_rate": 4.968947220128045e-05, "loss": 74.9989, "step": 36300 }, { "epoch": 0.14669699454986931, "grad_norm": 775.214111328125, "learning_rate": 4.968892349451621e-05, "loss": 109.3062, "step": 36310 }, { "epoch": 0.14673739581523693, "grad_norm": 275.0602111816406, "learning_rate": 4.9688374306428696e-05, "loss": 79.9519, "step": 36320 }, { "epoch": 0.14677779708060457, "grad_norm": 1294.7794189453125, "learning_rate": 4.9687824637028625e-05, "loss": 113.0867, "step": 36330 }, { "epoch": 0.1468181983459722, "grad_norm": 745.861572265625, "learning_rate": 4.968727448632669e-05, "loss": 76.9137, "step": 36340 }, { "epoch": 0.14685859961133982, "grad_norm": 776.559326171875, "learning_rate": 4.968672385433364e-05, "loss": 86.1525, "step": 36350 }, { "epoch": 0.14689900087670746, "grad_norm": 1309.9783935546875, "learning_rate": 4.968617274106019e-05, "loss": 79.5693, "step": 36360 }, { "epoch": 0.1469394021420751, "grad_norm": 1547.9395751953125, "learning_rate": 4.968562114651709e-05, "loss": 116.9439, "step": 36370 }, { "epoch": 0.1469798034074427, "grad_norm": 453.9913024902344, "learning_rate": 4.9685069070715106e-05, "loss": 74.6536, "step": 36380 }, { "epoch": 0.14702020467281035, "grad_norm": 346.8396911621094, "learning_rate": 4.968451651366498e-05, "loss": 95.8461, "step": 36390 }, { "epoch": 0.147060605938178, "grad_norm": 537.4244995117188, "learning_rate": 4.968396347537751e-05, "loss": 74.5145, "step": 36400 }, { "epoch": 0.1471010072035456, "grad_norm": 897.6705932617188, "learning_rate": 4.968340995586346e-05, "loss": 109.4366, "step": 36410 }, { "epoch": 0.14714140846891324, "grad_norm": 712.1412963867188, "learning_rate": 4.9682855955133625e-05, "loss": 67.6998, "step": 36420 }, { "epoch": 0.14718180973428088, "grad_norm": 0.0, "learning_rate": 4.96823014731988e-05, "loss": 102.4061, "step": 36430 }, { "epoch": 0.14722221099964852, "grad_norm": 1074.514404296875, "learning_rate": 4.9681746510069805e-05, "loss": 79.5955, "step": 36440 }, { "epoch": 0.14726261226501614, "grad_norm": 1605.676025390625, "learning_rate": 4.9681191065757455e-05, "loss": 89.505, "step": 36450 }, { "epoch": 0.14730301353038378, "grad_norm": 981.2547607421875, "learning_rate": 4.9680635140272575e-05, "loss": 99.4992, "step": 36460 }, { "epoch": 0.14734341479575142, "grad_norm": 937.9696655273438, "learning_rate": 4.9680078733626015e-05, "loss": 75.9712, "step": 36470 }, { "epoch": 0.14738381606111903, "grad_norm": 501.3292236328125, "learning_rate": 4.9679521845828604e-05, "loss": 75.7125, "step": 36480 }, { "epoch": 0.14742421732648667, "grad_norm": 1447.997802734375, "learning_rate": 4.967896447689121e-05, "loss": 83.492, "step": 36490 }, { "epoch": 0.1474646185918543, "grad_norm": 555.2802734375, "learning_rate": 4.96784066268247e-05, "loss": 63.0523, "step": 36500 }, { "epoch": 0.14750501985722192, "grad_norm": 784.3593139648438, "learning_rate": 4.967784829563995e-05, "loss": 93.0994, "step": 36510 }, { "epoch": 0.14754542112258956, "grad_norm": 1216.3717041015625, "learning_rate": 4.967728948334784e-05, "loss": 99.9734, "step": 36520 }, { "epoch": 0.1475858223879572, "grad_norm": 717.1402587890625, "learning_rate": 4.967673018995926e-05, "loss": 80.4648, "step": 36530 }, { "epoch": 0.1476262236533248, "grad_norm": 468.6872253417969, "learning_rate": 4.967617041548513e-05, "loss": 96.3743, "step": 36540 }, { "epoch": 0.14766662491869245, "grad_norm": 709.4956665039062, "learning_rate": 4.967561015993635e-05, "loss": 96.8744, "step": 36550 }, { "epoch": 0.1477070261840601, "grad_norm": 691.0542602539062, "learning_rate": 4.967504942332385e-05, "loss": 86.1126, "step": 36560 }, { "epoch": 0.1477474274494277, "grad_norm": 1028.8739013671875, "learning_rate": 4.967448820565856e-05, "loss": 125.4938, "step": 36570 }, { "epoch": 0.14778782871479534, "grad_norm": 599.9513549804688, "learning_rate": 4.9673926506951404e-05, "loss": 117.7221, "step": 36580 }, { "epoch": 0.14782822998016298, "grad_norm": 609.7288818359375, "learning_rate": 4.967336432721337e-05, "loss": 80.1608, "step": 36590 }, { "epoch": 0.1478686312455306, "grad_norm": 1172.2431640625, "learning_rate": 4.967280166645538e-05, "loss": 97.0124, "step": 36600 }, { "epoch": 0.14790903251089824, "grad_norm": 774.5439453125, "learning_rate": 4.967223852468842e-05, "loss": 106.1079, "step": 36610 }, { "epoch": 0.14794943377626588, "grad_norm": 414.7299499511719, "learning_rate": 4.967167490192347e-05, "loss": 95.8307, "step": 36620 }, { "epoch": 0.14798983504163352, "grad_norm": 1053.8829345703125, "learning_rate": 4.967111079817151e-05, "loss": 102.4389, "step": 36630 }, { "epoch": 0.14803023630700113, "grad_norm": 730.1495971679688, "learning_rate": 4.967054621344356e-05, "loss": 87.2563, "step": 36640 }, { "epoch": 0.14807063757236877, "grad_norm": 2739.23583984375, "learning_rate": 4.96699811477506e-05, "loss": 126.1713, "step": 36650 }, { "epoch": 0.1481110388377364, "grad_norm": 3278.61767578125, "learning_rate": 4.966941560110366e-05, "loss": 115.4281, "step": 36660 }, { "epoch": 0.14815144010310402, "grad_norm": 1437.7108154296875, "learning_rate": 4.966884957351375e-05, "loss": 98.0438, "step": 36670 }, { "epoch": 0.14819184136847166, "grad_norm": 582.1204833984375, "learning_rate": 4.966828306499193e-05, "loss": 85.8503, "step": 36680 }, { "epoch": 0.1482322426338393, "grad_norm": 790.8889770507812, "learning_rate": 4.966771607554923e-05, "loss": 75.0409, "step": 36690 }, { "epoch": 0.1482726438992069, "grad_norm": 370.1599426269531, "learning_rate": 4.96671486051967e-05, "loss": 94.7309, "step": 36700 }, { "epoch": 0.14831304516457455, "grad_norm": 2078.58837890625, "learning_rate": 4.966658065394542e-05, "loss": 133.5383, "step": 36710 }, { "epoch": 0.1483534464299422, "grad_norm": 374.68475341796875, "learning_rate": 4.9666012221806434e-05, "loss": 102.0996, "step": 36720 }, { "epoch": 0.1483938476953098, "grad_norm": 1200.5596923828125, "learning_rate": 4.966544330879085e-05, "loss": 89.5257, "step": 36730 }, { "epoch": 0.14843424896067745, "grad_norm": 430.6545104980469, "learning_rate": 4.9664873914909755e-05, "loss": 73.1253, "step": 36740 }, { "epoch": 0.14847465022604509, "grad_norm": 1570.5799560546875, "learning_rate": 4.966430404017424e-05, "loss": 72.1421, "step": 36750 }, { "epoch": 0.1485150514914127, "grad_norm": 422.2970886230469, "learning_rate": 4.966373368459541e-05, "loss": 147.6407, "step": 36760 }, { "epoch": 0.14855545275678034, "grad_norm": 567.98046875, "learning_rate": 4.966316284818441e-05, "loss": 78.6361, "step": 36770 }, { "epoch": 0.14859585402214798, "grad_norm": 0.0, "learning_rate": 4.966259153095235e-05, "loss": 72.8352, "step": 36780 }, { "epoch": 0.14863625528751562, "grad_norm": 871.2714233398438, "learning_rate": 4.966201973291036e-05, "loss": 104.7993, "step": 36790 }, { "epoch": 0.14867665655288323, "grad_norm": 762.0390625, "learning_rate": 4.966144745406961e-05, "loss": 63.3974, "step": 36800 }, { "epoch": 0.14871705781825087, "grad_norm": 1256.638427734375, "learning_rate": 4.966087469444124e-05, "loss": 82.0591, "step": 36810 }, { "epoch": 0.1487574590836185, "grad_norm": 1347.9171142578125, "learning_rate": 4.966030145403642e-05, "loss": 97.7492, "step": 36820 }, { "epoch": 0.14879786034898612, "grad_norm": 2305.42236328125, "learning_rate": 4.965972773286633e-05, "loss": 98.8773, "step": 36830 }, { "epoch": 0.14883826161435376, "grad_norm": 759.54931640625, "learning_rate": 4.965915353094215e-05, "loss": 104.5514, "step": 36840 }, { "epoch": 0.1488786628797214, "grad_norm": 409.0998840332031, "learning_rate": 4.9658578848275076e-05, "loss": 72.8223, "step": 36850 }, { "epoch": 0.14891906414508901, "grad_norm": 464.468017578125, "learning_rate": 4.965800368487632e-05, "loss": 112.4646, "step": 36860 }, { "epoch": 0.14895946541045665, "grad_norm": 923.630859375, "learning_rate": 4.9657428040757084e-05, "loss": 107.7895, "step": 36870 }, { "epoch": 0.1489998666758243, "grad_norm": 1179.0242919921875, "learning_rate": 4.965685191592859e-05, "loss": 86.0383, "step": 36880 }, { "epoch": 0.1490402679411919, "grad_norm": 636.7333374023438, "learning_rate": 4.9656275310402074e-05, "loss": 114.6134, "step": 36890 }, { "epoch": 0.14908066920655955, "grad_norm": 2465.29296875, "learning_rate": 4.965569822418877e-05, "loss": 58.8899, "step": 36900 }, { "epoch": 0.1491210704719272, "grad_norm": 757.9246215820312, "learning_rate": 4.9655120657299945e-05, "loss": 66.7647, "step": 36910 }, { "epoch": 0.1491614717372948, "grad_norm": 671.7369995117188, "learning_rate": 4.965454260974685e-05, "loss": 87.831, "step": 36920 }, { "epoch": 0.14920187300266244, "grad_norm": 409.745849609375, "learning_rate": 4.9653964081540756e-05, "loss": 107.4229, "step": 36930 }, { "epoch": 0.14924227426803008, "grad_norm": 1466.3577880859375, "learning_rate": 4.965338507269294e-05, "loss": 93.4886, "step": 36940 }, { "epoch": 0.14928267553339772, "grad_norm": 554.5855712890625, "learning_rate": 4.965280558321468e-05, "loss": 97.7594, "step": 36950 }, { "epoch": 0.14932307679876533, "grad_norm": 1820.7589111328125, "learning_rate": 4.9652225613117284e-05, "loss": 122.0696, "step": 36960 }, { "epoch": 0.14936347806413297, "grad_norm": 1617.1182861328125, "learning_rate": 4.965164516241206e-05, "loss": 123.7657, "step": 36970 }, { "epoch": 0.1494038793295006, "grad_norm": 669.3226928710938, "learning_rate": 4.965106423111033e-05, "loss": 103.5812, "step": 36980 }, { "epoch": 0.14944428059486822, "grad_norm": 2139.23876953125, "learning_rate": 4.9650482819223405e-05, "loss": 119.0735, "step": 36990 }, { "epoch": 0.14948468186023586, "grad_norm": 532.9093017578125, "learning_rate": 4.964990092676263e-05, "loss": 82.2448, "step": 37000 }, { "epoch": 0.1495250831256035, "grad_norm": 804.912109375, "learning_rate": 4.964931855373934e-05, "loss": 80.909, "step": 37010 }, { "epoch": 0.14956548439097112, "grad_norm": 847.0060424804688, "learning_rate": 4.9648735700164895e-05, "loss": 92.2945, "step": 37020 }, { "epoch": 0.14960588565633876, "grad_norm": 1183.10302734375, "learning_rate": 4.964815236605066e-05, "loss": 94.6774, "step": 37030 }, { "epoch": 0.1496462869217064, "grad_norm": 885.68896484375, "learning_rate": 4.964756855140801e-05, "loss": 75.244, "step": 37040 }, { "epoch": 0.149686688187074, "grad_norm": 843.5673828125, "learning_rate": 4.964698425624831e-05, "loss": 82.0223, "step": 37050 }, { "epoch": 0.14972708945244165, "grad_norm": 817.964599609375, "learning_rate": 4.964639948058297e-05, "loss": 131.6297, "step": 37060 }, { "epoch": 0.1497674907178093, "grad_norm": 413.62335205078125, "learning_rate": 4.964581422442338e-05, "loss": 93.1363, "step": 37070 }, { "epoch": 0.1498078919831769, "grad_norm": 569.75146484375, "learning_rate": 4.964522848778096e-05, "loss": 70.2936, "step": 37080 }, { "epoch": 0.14984829324854454, "grad_norm": 507.4297180175781, "learning_rate": 4.964464227066712e-05, "loss": 53.4872, "step": 37090 }, { "epoch": 0.14988869451391218, "grad_norm": 1582.9383544921875, "learning_rate": 4.964405557309328e-05, "loss": 128.3349, "step": 37100 }, { "epoch": 0.14992909577927982, "grad_norm": 871.0317993164062, "learning_rate": 4.9643468395070904e-05, "loss": 106.9411, "step": 37110 }, { "epoch": 0.14996949704464743, "grad_norm": 4390.7822265625, "learning_rate": 4.964288073661142e-05, "loss": 98.8651, "step": 37120 }, { "epoch": 0.15000989831001507, "grad_norm": 1010.7752075195312, "learning_rate": 4.9642292597726284e-05, "loss": 134.204, "step": 37130 }, { "epoch": 0.1500502995753827, "grad_norm": 3052.423583984375, "learning_rate": 4.964170397842697e-05, "loss": 129.5846, "step": 37140 }, { "epoch": 0.15009070084075032, "grad_norm": 363.93865966796875, "learning_rate": 4.9641114878724956e-05, "loss": 101.122, "step": 37150 }, { "epoch": 0.15013110210611796, "grad_norm": 963.4545288085938, "learning_rate": 4.964052529863171e-05, "loss": 98.0533, "step": 37160 }, { "epoch": 0.1501715033714856, "grad_norm": 939.9786987304688, "learning_rate": 4.9639935238158744e-05, "loss": 89.1619, "step": 37170 }, { "epoch": 0.15021190463685322, "grad_norm": 1083.3187255859375, "learning_rate": 4.963934469731756e-05, "loss": 77.3688, "step": 37180 }, { "epoch": 0.15025230590222086, "grad_norm": 818.65478515625, "learning_rate": 4.963875367611966e-05, "loss": 96.2921, "step": 37190 }, { "epoch": 0.1502927071675885, "grad_norm": 723.4608154296875, "learning_rate": 4.963816217457657e-05, "loss": 128.7513, "step": 37200 }, { "epoch": 0.1503331084329561, "grad_norm": 737.654541015625, "learning_rate": 4.963757019269983e-05, "loss": 107.9872, "step": 37210 }, { "epoch": 0.15037350969832375, "grad_norm": 771.156005859375, "learning_rate": 4.963697773050097e-05, "loss": 109.9101, "step": 37220 }, { "epoch": 0.1504139109636914, "grad_norm": 748.536376953125, "learning_rate": 4.9636384787991547e-05, "loss": 65.0804, "step": 37230 }, { "epoch": 0.150454312229059, "grad_norm": 430.6962890625, "learning_rate": 4.963579136518312e-05, "loss": 59.6868, "step": 37240 }, { "epoch": 0.15049471349442664, "grad_norm": 703.9127807617188, "learning_rate": 4.963519746208726e-05, "loss": 111.1007, "step": 37250 }, { "epoch": 0.15053511475979428, "grad_norm": 813.0443115234375, "learning_rate": 4.963460307871553e-05, "loss": 102.864, "step": 37260 }, { "epoch": 0.15057551602516192, "grad_norm": 1400.834716796875, "learning_rate": 4.963400821507954e-05, "loss": 116.5066, "step": 37270 }, { "epoch": 0.15061591729052953, "grad_norm": 941.7633666992188, "learning_rate": 4.9633412871190873e-05, "loss": 97.3467, "step": 37280 }, { "epoch": 0.15065631855589717, "grad_norm": 895.1846923828125, "learning_rate": 4.963281704706115e-05, "loss": 88.1651, "step": 37290 }, { "epoch": 0.1506967198212648, "grad_norm": 1722.0013427734375, "learning_rate": 4.9632220742701965e-05, "loss": 64.2686, "step": 37300 }, { "epoch": 0.15073712108663243, "grad_norm": 499.2967224121094, "learning_rate": 4.963162395812496e-05, "loss": 57.7127, "step": 37310 }, { "epoch": 0.15077752235200007, "grad_norm": 642.2814331054688, "learning_rate": 4.9631026693341764e-05, "loss": 47.7118, "step": 37320 }, { "epoch": 0.1508179236173677, "grad_norm": 985.0557250976562, "learning_rate": 4.963042894836403e-05, "loss": 113.7733, "step": 37330 }, { "epoch": 0.15085832488273532, "grad_norm": 558.4752197265625, "learning_rate": 4.9629830723203384e-05, "loss": 55.6255, "step": 37340 }, { "epoch": 0.15089872614810296, "grad_norm": 366.1856689453125, "learning_rate": 4.9629232017871524e-05, "loss": 98.4935, "step": 37350 }, { "epoch": 0.1509391274134706, "grad_norm": 817.6262817382812, "learning_rate": 4.96286328323801e-05, "loss": 69.1073, "step": 37360 }, { "epoch": 0.1509795286788382, "grad_norm": 724.3482055664062, "learning_rate": 4.96280331667408e-05, "loss": 101.978, "step": 37370 }, { "epoch": 0.15101992994420585, "grad_norm": 2478.86962890625, "learning_rate": 4.9627433020965314e-05, "loss": 104.1645, "step": 37380 }, { "epoch": 0.1510603312095735, "grad_norm": 663.1130981445312, "learning_rate": 4.962683239506534e-05, "loss": 80.2843, "step": 37390 }, { "epoch": 0.1511007324749411, "grad_norm": 757.1906127929688, "learning_rate": 4.9626231289052596e-05, "loss": 87.6918, "step": 37400 }, { "epoch": 0.15114113374030874, "grad_norm": 427.3059997558594, "learning_rate": 4.962562970293879e-05, "loss": 79.3416, "step": 37410 }, { "epoch": 0.15118153500567638, "grad_norm": 821.1558837890625, "learning_rate": 4.962502763673565e-05, "loss": 116.5157, "step": 37420 }, { "epoch": 0.15122193627104402, "grad_norm": 1046.214111328125, "learning_rate": 4.962442509045493e-05, "loss": 53.0469, "step": 37430 }, { "epoch": 0.15126233753641163, "grad_norm": 1114.373291015625, "learning_rate": 4.9623822064108364e-05, "loss": 75.2076, "step": 37440 }, { "epoch": 0.15130273880177927, "grad_norm": 1147.0633544921875, "learning_rate": 4.9623218557707694e-05, "loss": 80.5338, "step": 37450 }, { "epoch": 0.15134314006714691, "grad_norm": 1153.3177490234375, "learning_rate": 4.9622614571264715e-05, "loss": 115.0096, "step": 37460 }, { "epoch": 0.15138354133251453, "grad_norm": 560.9694213867188, "learning_rate": 4.962201010479119e-05, "loss": 86.4352, "step": 37470 }, { "epoch": 0.15142394259788217, "grad_norm": 584.9893188476562, "learning_rate": 4.96214051582989e-05, "loss": 63.2892, "step": 37480 }, { "epoch": 0.1514643438632498, "grad_norm": 573.4566650390625, "learning_rate": 4.962079973179963e-05, "loss": 71.3577, "step": 37490 }, { "epoch": 0.15150474512861742, "grad_norm": 696.969482421875, "learning_rate": 4.962019382530521e-05, "loss": 111.7284, "step": 37500 }, { "epoch": 0.15154514639398506, "grad_norm": 674.3920288085938, "learning_rate": 4.961958743882742e-05, "loss": 82.3515, "step": 37510 }, { "epoch": 0.1515855476593527, "grad_norm": 835.2359008789062, "learning_rate": 4.96189805723781e-05, "loss": 81.7163, "step": 37520 }, { "epoch": 0.1516259489247203, "grad_norm": 1307.296142578125, "learning_rate": 4.96183732259691e-05, "loss": 92.0704, "step": 37530 }, { "epoch": 0.15166635019008795, "grad_norm": 808.1261596679688, "learning_rate": 4.961776539961222e-05, "loss": 82.1407, "step": 37540 }, { "epoch": 0.1517067514554556, "grad_norm": 1993.1292724609375, "learning_rate": 4.9617157093319326e-05, "loss": 85.8768, "step": 37550 }, { "epoch": 0.1517471527208232, "grad_norm": 784.2838134765625, "learning_rate": 4.961654830710229e-05, "loss": 92.2036, "step": 37560 }, { "epoch": 0.15178755398619084, "grad_norm": 412.1575622558594, "learning_rate": 4.961593904097297e-05, "loss": 82.7349, "step": 37570 }, { "epoch": 0.15182795525155848, "grad_norm": 726.8958129882812, "learning_rate": 4.961532929494325e-05, "loss": 71.485, "step": 37580 }, { "epoch": 0.15186835651692612, "grad_norm": 1616.0050048828125, "learning_rate": 4.9614719069025e-05, "loss": 65.669, "step": 37590 }, { "epoch": 0.15190875778229374, "grad_norm": 1195.6494140625, "learning_rate": 4.9614108363230135e-05, "loss": 120.262, "step": 37600 }, { "epoch": 0.15194915904766138, "grad_norm": 785.0847778320312, "learning_rate": 4.961349717757056e-05, "loss": 55.0058, "step": 37610 }, { "epoch": 0.15198956031302902, "grad_norm": 367.303466796875, "learning_rate": 4.961288551205818e-05, "loss": 85.8025, "step": 37620 }, { "epoch": 0.15202996157839663, "grad_norm": 1064.3392333984375, "learning_rate": 4.961227336670493e-05, "loss": 120.8695, "step": 37630 }, { "epoch": 0.15207036284376427, "grad_norm": 963.6973876953125, "learning_rate": 4.961166074152274e-05, "loss": 119.0628, "step": 37640 }, { "epoch": 0.1521107641091319, "grad_norm": 1672.767333984375, "learning_rate": 4.961104763652355e-05, "loss": 100.2525, "step": 37650 }, { "epoch": 0.15215116537449952, "grad_norm": 850.4930419921875, "learning_rate": 4.961043405171931e-05, "loss": 63.3998, "step": 37660 }, { "epoch": 0.15219156663986716, "grad_norm": 1047.08935546875, "learning_rate": 4.9609819987122e-05, "loss": 78.7605, "step": 37670 }, { "epoch": 0.1522319679052348, "grad_norm": 689.37744140625, "learning_rate": 4.9609205442743566e-05, "loss": 100.5812, "step": 37680 }, { "epoch": 0.1522723691706024, "grad_norm": 686.3281860351562, "learning_rate": 4.9608590418596016e-05, "loss": 58.7595, "step": 37690 }, { "epoch": 0.15231277043597005, "grad_norm": 992.6712036132812, "learning_rate": 4.9607974914691316e-05, "loss": 117.9191, "step": 37700 }, { "epoch": 0.1523531717013377, "grad_norm": 518.0152587890625, "learning_rate": 4.960735893104148e-05, "loss": 74.092, "step": 37710 }, { "epoch": 0.1523935729667053, "grad_norm": 0.0, "learning_rate": 4.960674246765851e-05, "loss": 74.5977, "step": 37720 }, { "epoch": 0.15243397423207294, "grad_norm": 524.2943115234375, "learning_rate": 4.9606125524554434e-05, "loss": 47.5876, "step": 37730 }, { "epoch": 0.15247437549744058, "grad_norm": 1206.4256591796875, "learning_rate": 4.960550810174126e-05, "loss": 84.7391, "step": 37740 }, { "epoch": 0.15251477676280822, "grad_norm": 762.7168579101562, "learning_rate": 4.960489019923105e-05, "loss": 67.8448, "step": 37750 }, { "epoch": 0.15255517802817584, "grad_norm": 1341.15869140625, "learning_rate": 4.9604271817035834e-05, "loss": 112.8694, "step": 37760 }, { "epoch": 0.15259557929354348, "grad_norm": 603.6417236328125, "learning_rate": 4.960365295516767e-05, "loss": 123.1844, "step": 37770 }, { "epoch": 0.15263598055891112, "grad_norm": 868.97998046875, "learning_rate": 4.9603033613638626e-05, "loss": 75.7884, "step": 37780 }, { "epoch": 0.15267638182427873, "grad_norm": 696.1629638671875, "learning_rate": 4.9602413792460776e-05, "loss": 82.7375, "step": 37790 }, { "epoch": 0.15271678308964637, "grad_norm": 713.6837768554688, "learning_rate": 4.960179349164621e-05, "loss": 60.8608, "step": 37800 }, { "epoch": 0.152757184355014, "grad_norm": 903.1224365234375, "learning_rate": 4.9601172711207005e-05, "loss": 111.0102, "step": 37810 }, { "epoch": 0.15279758562038162, "grad_norm": 1082.6353759765625, "learning_rate": 4.9600551451155274e-05, "loss": 102.1816, "step": 37820 }, { "epoch": 0.15283798688574926, "grad_norm": 1227.940673828125, "learning_rate": 4.959992971150313e-05, "loss": 98.8093, "step": 37830 }, { "epoch": 0.1528783881511169, "grad_norm": 1349.7529296875, "learning_rate": 4.959930749226269e-05, "loss": 99.6163, "step": 37840 }, { "epoch": 0.1529187894164845, "grad_norm": 1902.737548828125, "learning_rate": 4.9598684793446085e-05, "loss": 108.2246, "step": 37850 }, { "epoch": 0.15295919068185215, "grad_norm": 888.9658203125, "learning_rate": 4.959806161506545e-05, "loss": 79.167, "step": 37860 }, { "epoch": 0.1529995919472198, "grad_norm": 984.05322265625, "learning_rate": 4.9597437957132955e-05, "loss": 76.7845, "step": 37870 }, { "epoch": 0.1530399932125874, "grad_norm": 849.2679443359375, "learning_rate": 4.959681381966073e-05, "loss": 122.3138, "step": 37880 }, { "epoch": 0.15308039447795505, "grad_norm": 725.888916015625, "learning_rate": 4.959618920266096e-05, "loss": 73.6324, "step": 37890 }, { "epoch": 0.15312079574332269, "grad_norm": 792.6710205078125, "learning_rate": 4.959556410614582e-05, "loss": 54.1202, "step": 37900 }, { "epoch": 0.15316119700869033, "grad_norm": 583.5054931640625, "learning_rate": 4.959493853012749e-05, "loss": 81.214, "step": 37910 }, { "epoch": 0.15320159827405794, "grad_norm": 793.6455688476562, "learning_rate": 4.9594312474618175e-05, "loss": 70.9016, "step": 37920 }, { "epoch": 0.15324199953942558, "grad_norm": 1003.0465087890625, "learning_rate": 4.959368593963007e-05, "loss": 117.324, "step": 37930 }, { "epoch": 0.15328240080479322, "grad_norm": 450.6336364746094, "learning_rate": 4.9593058925175406e-05, "loss": 95.6128, "step": 37940 }, { "epoch": 0.15332280207016083, "grad_norm": 674.2539672851562, "learning_rate": 4.959243143126639e-05, "loss": 86.3896, "step": 37950 }, { "epoch": 0.15336320333552847, "grad_norm": 396.0630798339844, "learning_rate": 4.959180345791528e-05, "loss": 80.5643, "step": 37960 }, { "epoch": 0.1534036046008961, "grad_norm": 601.1489868164062, "learning_rate": 4.9591175005134286e-05, "loss": 105.7729, "step": 37970 }, { "epoch": 0.15344400586626372, "grad_norm": 884.35693359375, "learning_rate": 4.959054607293567e-05, "loss": 100.5772, "step": 37980 }, { "epoch": 0.15348440713163136, "grad_norm": 670.9600830078125, "learning_rate": 4.95899166613317e-05, "loss": 75.9151, "step": 37990 }, { "epoch": 0.153524808396999, "grad_norm": 1574.9869384765625, "learning_rate": 4.9589286770334654e-05, "loss": 74.2453, "step": 38000 }, { "epoch": 0.15356520966236661, "grad_norm": 1199.295654296875, "learning_rate": 4.958865639995679e-05, "loss": 89.8758, "step": 38010 }, { "epoch": 0.15360561092773425, "grad_norm": 605.9471435546875, "learning_rate": 4.958802555021042e-05, "loss": 96.6403, "step": 38020 }, { "epoch": 0.1536460121931019, "grad_norm": 1894.856201171875, "learning_rate": 4.958739422110783e-05, "loss": 87.7068, "step": 38030 }, { "epoch": 0.1536864134584695, "grad_norm": 1081.8231201171875, "learning_rate": 4.9586762412661333e-05, "loss": 88.5522, "step": 38040 }, { "epoch": 0.15372681472383715, "grad_norm": 452.8377685546875, "learning_rate": 4.958613012488324e-05, "loss": 75.0825, "step": 38050 }, { "epoch": 0.1537672159892048, "grad_norm": 855.8710327148438, "learning_rate": 4.958549735778589e-05, "loss": 106.2082, "step": 38060 }, { "epoch": 0.15380761725457243, "grad_norm": 0.0, "learning_rate": 4.958486411138161e-05, "loss": 50.9362, "step": 38070 }, { "epoch": 0.15384801851994004, "grad_norm": 677.1991577148438, "learning_rate": 4.958423038568274e-05, "loss": 95.3129, "step": 38080 }, { "epoch": 0.15388841978530768, "grad_norm": 470.16778564453125, "learning_rate": 4.958359618070165e-05, "loss": 92.6209, "step": 38090 }, { "epoch": 0.15392882105067532, "grad_norm": 839.3685302734375, "learning_rate": 4.958296149645069e-05, "loss": 96.7531, "step": 38100 }, { "epoch": 0.15396922231604293, "grad_norm": 1347.764892578125, "learning_rate": 4.9582326332942244e-05, "loss": 85.9113, "step": 38110 }, { "epoch": 0.15400962358141057, "grad_norm": 452.386962890625, "learning_rate": 4.958169069018869e-05, "loss": 104.578, "step": 38120 }, { "epoch": 0.1540500248467782, "grad_norm": 1746.528076171875, "learning_rate": 4.958105456820242e-05, "loss": 81.6607, "step": 38130 }, { "epoch": 0.15409042611214582, "grad_norm": 862.51708984375, "learning_rate": 4.958041796699583e-05, "loss": 92.2541, "step": 38140 }, { "epoch": 0.15413082737751346, "grad_norm": 1134.7611083984375, "learning_rate": 4.957978088658134e-05, "loss": 85.8542, "step": 38150 }, { "epoch": 0.1541712286428811, "grad_norm": 737.6195068359375, "learning_rate": 4.957914332697137e-05, "loss": 84.3832, "step": 38160 }, { "epoch": 0.15421162990824872, "grad_norm": 614.6725463867188, "learning_rate": 4.957850528817834e-05, "loss": 83.8153, "step": 38170 }, { "epoch": 0.15425203117361636, "grad_norm": 848.0565795898438, "learning_rate": 4.957786677021471e-05, "loss": 72.8551, "step": 38180 }, { "epoch": 0.154292432438984, "grad_norm": 1637.4676513671875, "learning_rate": 4.9577227773092904e-05, "loss": 69.5406, "step": 38190 }, { "epoch": 0.1543328337043516, "grad_norm": 1447.67431640625, "learning_rate": 4.9576588296825386e-05, "loss": 60.8636, "step": 38200 }, { "epoch": 0.15437323496971925, "grad_norm": 815.7072143554688, "learning_rate": 4.9575948341424634e-05, "loss": 86.5249, "step": 38210 }, { "epoch": 0.1544136362350869, "grad_norm": 455.055419921875, "learning_rate": 4.957530790690311e-05, "loss": 87.8417, "step": 38220 }, { "epoch": 0.15445403750045453, "grad_norm": 1008.4957275390625, "learning_rate": 4.957466699327331e-05, "loss": 111.3395, "step": 38230 }, { "epoch": 0.15449443876582214, "grad_norm": 0.0, "learning_rate": 4.957402560054773e-05, "loss": 127.6305, "step": 38240 }, { "epoch": 0.15453484003118978, "grad_norm": 2538.266357421875, "learning_rate": 4.957338372873886e-05, "loss": 126.8811, "step": 38250 }, { "epoch": 0.15457524129655742, "grad_norm": 765.6140747070312, "learning_rate": 4.957274137785922e-05, "loss": 98.6367, "step": 38260 }, { "epoch": 0.15461564256192503, "grad_norm": 1128.1064453125, "learning_rate": 4.957209854792135e-05, "loss": 115.5567, "step": 38270 }, { "epoch": 0.15465604382729267, "grad_norm": 1656.940673828125, "learning_rate": 4.957145523893776e-05, "loss": 96.0133, "step": 38280 }, { "epoch": 0.1546964450926603, "grad_norm": 776.5398559570312, "learning_rate": 4.9570811450921e-05, "loss": 96.1438, "step": 38290 }, { "epoch": 0.15473684635802792, "grad_norm": 541.0315551757812, "learning_rate": 4.957016718388362e-05, "loss": 94.2679, "step": 38300 }, { "epoch": 0.15477724762339556, "grad_norm": 1425.260498046875, "learning_rate": 4.956952243783818e-05, "loss": 85.9477, "step": 38310 }, { "epoch": 0.1548176488887632, "grad_norm": 495.0509948730469, "learning_rate": 4.956887721279726e-05, "loss": 128.2712, "step": 38320 }, { "epoch": 0.15485805015413082, "grad_norm": 1667.0662841796875, "learning_rate": 4.956823150877342e-05, "loss": 81.26, "step": 38330 }, { "epoch": 0.15489845141949846, "grad_norm": 483.2498474121094, "learning_rate": 4.956758532577926e-05, "loss": 76.7887, "step": 38340 }, { "epoch": 0.1549388526848661, "grad_norm": 940.1998901367188, "learning_rate": 4.9566938663827377e-05, "loss": 62.8369, "step": 38350 }, { "epoch": 0.1549792539502337, "grad_norm": 826.6288452148438, "learning_rate": 4.9566291522930375e-05, "loss": 82.23, "step": 38360 }, { "epoch": 0.15501965521560135, "grad_norm": 596.3945922851562, "learning_rate": 4.956564390310088e-05, "loss": 68.8034, "step": 38370 }, { "epoch": 0.155060056480969, "grad_norm": 843.1058349609375, "learning_rate": 4.95649958043515e-05, "loss": 92.2614, "step": 38380 }, { "epoch": 0.15510045774633663, "grad_norm": 368.0083923339844, "learning_rate": 4.956434722669489e-05, "loss": 80.4238, "step": 38390 }, { "epoch": 0.15514085901170424, "grad_norm": 527.663330078125, "learning_rate": 4.9563698170143666e-05, "loss": 98.6083, "step": 38400 }, { "epoch": 0.15518126027707188, "grad_norm": 847.8034057617188, "learning_rate": 4.9563048634710516e-05, "loss": 154.9312, "step": 38410 }, { "epoch": 0.15522166154243952, "grad_norm": 872.8273315429688, "learning_rate": 4.956239862040808e-05, "loss": 89.4222, "step": 38420 }, { "epoch": 0.15526206280780713, "grad_norm": 2756.822509765625, "learning_rate": 4.956174812724904e-05, "loss": 77.4832, "step": 38430 }, { "epoch": 0.15530246407317477, "grad_norm": 405.55841064453125, "learning_rate": 4.956109715524608e-05, "loss": 104.6995, "step": 38440 }, { "epoch": 0.1553428653385424, "grad_norm": 514.676513671875, "learning_rate": 4.956044570441188e-05, "loss": 112.402, "step": 38450 }, { "epoch": 0.15538326660391003, "grad_norm": 867.9771118164062, "learning_rate": 4.955979377475915e-05, "loss": 72.6036, "step": 38460 }, { "epoch": 0.15542366786927767, "grad_norm": 407.26898193359375, "learning_rate": 4.9559141366300594e-05, "loss": 89.936, "step": 38470 }, { "epoch": 0.1554640691346453, "grad_norm": 906.7838745117188, "learning_rate": 4.955848847904894e-05, "loss": 111.0247, "step": 38480 }, { "epoch": 0.15550447040001292, "grad_norm": 907.695068359375, "learning_rate": 4.955783511301689e-05, "loss": 87.2485, "step": 38490 }, { "epoch": 0.15554487166538056, "grad_norm": 1016.2330932617188, "learning_rate": 4.9557181268217227e-05, "loss": 130.9673, "step": 38500 }, { "epoch": 0.1555852729307482, "grad_norm": 1297.313232421875, "learning_rate": 4.955652694466265e-05, "loss": 104.8052, "step": 38510 }, { "epoch": 0.1556256741961158, "grad_norm": 704.80126953125, "learning_rate": 4.9555872142365945e-05, "loss": 97.9365, "step": 38520 }, { "epoch": 0.15566607546148345, "grad_norm": 1771.6796875, "learning_rate": 4.9555216861339876e-05, "loss": 74.8663, "step": 38530 }, { "epoch": 0.1557064767268511, "grad_norm": 527.6610717773438, "learning_rate": 4.9554561101597206e-05, "loss": 99.9333, "step": 38540 }, { "epoch": 0.15574687799221873, "grad_norm": 380.7573547363281, "learning_rate": 4.955390486315073e-05, "loss": 72.2091, "step": 38550 }, { "epoch": 0.15578727925758634, "grad_norm": 1313.071044921875, "learning_rate": 4.955324814601324e-05, "loss": 84.0544, "step": 38560 }, { "epoch": 0.15582768052295398, "grad_norm": 879.2240600585938, "learning_rate": 4.955259095019753e-05, "loss": 100.0556, "step": 38570 }, { "epoch": 0.15586808178832162, "grad_norm": 1069.8026123046875, "learning_rate": 4.955193327571642e-05, "loss": 60.2459, "step": 38580 }, { "epoch": 0.15590848305368923, "grad_norm": 1302.9310302734375, "learning_rate": 4.955127512258273e-05, "loss": 92.8039, "step": 38590 }, { "epoch": 0.15594888431905687, "grad_norm": 1001.041259765625, "learning_rate": 4.95506164908093e-05, "loss": 110.6155, "step": 38600 }, { "epoch": 0.15598928558442451, "grad_norm": 895.0310668945312, "learning_rate": 4.954995738040895e-05, "loss": 107.8179, "step": 38610 }, { "epoch": 0.15602968684979213, "grad_norm": 823.3916625976562, "learning_rate": 4.954929779139455e-05, "loss": 78.2662, "step": 38620 }, { "epoch": 0.15607008811515977, "grad_norm": 409.09527587890625, "learning_rate": 4.954863772377894e-05, "loss": 123.7662, "step": 38630 }, { "epoch": 0.1561104893805274, "grad_norm": 569.7661743164062, "learning_rate": 4.9547977177575014e-05, "loss": 89.6015, "step": 38640 }, { "epoch": 0.15615089064589502, "grad_norm": 616.7923583984375, "learning_rate": 4.954731615279563e-05, "loss": 94.4896, "step": 38650 }, { "epoch": 0.15619129191126266, "grad_norm": 479.69647216796875, "learning_rate": 4.9546654649453675e-05, "loss": 66.053, "step": 38660 }, { "epoch": 0.1562316931766303, "grad_norm": 2162.659912109375, "learning_rate": 4.954599266756205e-05, "loss": 138.2849, "step": 38670 }, { "epoch": 0.1562720944419979, "grad_norm": 729.6651000976562, "learning_rate": 4.9545330207133664e-05, "loss": 107.9969, "step": 38680 }, { "epoch": 0.15631249570736555, "grad_norm": 1174.257568359375, "learning_rate": 4.9544667268181436e-05, "loss": 94.6718, "step": 38690 }, { "epoch": 0.1563528969727332, "grad_norm": 638.7692260742188, "learning_rate": 4.9544003850718266e-05, "loss": 100.2124, "step": 38700 }, { "epoch": 0.15639329823810083, "grad_norm": 991.5917358398438, "learning_rate": 4.954333995475712e-05, "loss": 58.1896, "step": 38710 }, { "epoch": 0.15643369950346844, "grad_norm": 2954.35302734375, "learning_rate": 4.954267558031092e-05, "loss": 65.0267, "step": 38720 }, { "epoch": 0.15647410076883608, "grad_norm": 1842.9337158203125, "learning_rate": 4.954201072739262e-05, "loss": 92.5574, "step": 38730 }, { "epoch": 0.15651450203420372, "grad_norm": 1125.166748046875, "learning_rate": 4.9541345396015193e-05, "loss": 89.0116, "step": 38740 }, { "epoch": 0.15655490329957134, "grad_norm": 420.5506896972656, "learning_rate": 4.9540679586191605e-05, "loss": 91.9055, "step": 38750 }, { "epoch": 0.15659530456493898, "grad_norm": 1417.18115234375, "learning_rate": 4.9540013297934826e-05, "loss": 72.1004, "step": 38760 }, { "epoch": 0.15663570583030662, "grad_norm": 711.8256225585938, "learning_rate": 4.953934653125786e-05, "loss": 110.1404, "step": 38770 }, { "epoch": 0.15667610709567423, "grad_norm": 936.8480834960938, "learning_rate": 4.9538679286173696e-05, "loss": 63.1972, "step": 38780 }, { "epoch": 0.15671650836104187, "grad_norm": 965.4445190429688, "learning_rate": 4.953801156269534e-05, "loss": 96.635, "step": 38790 }, { "epoch": 0.1567569096264095, "grad_norm": 388.2055969238281, "learning_rate": 4.953734336083583e-05, "loss": 69.8237, "step": 38800 }, { "epoch": 0.15679731089177712, "grad_norm": 1032.505126953125, "learning_rate": 4.953667468060816e-05, "loss": 80.3915, "step": 38810 }, { "epoch": 0.15683771215714476, "grad_norm": 2164.00341796875, "learning_rate": 4.95360055220254e-05, "loss": 117.7873, "step": 38820 }, { "epoch": 0.1568781134225124, "grad_norm": 820.93310546875, "learning_rate": 4.9535335885100575e-05, "loss": 106.5048, "step": 38830 }, { "epoch": 0.15691851468788, "grad_norm": 513.0703735351562, "learning_rate": 4.953466576984675e-05, "loss": 67.942, "step": 38840 }, { "epoch": 0.15695891595324765, "grad_norm": 479.7278137207031, "learning_rate": 4.953399517627698e-05, "loss": 92.5827, "step": 38850 }, { "epoch": 0.1569993172186153, "grad_norm": 3824.49951171875, "learning_rate": 4.953332410440435e-05, "loss": 76.3334, "step": 38860 }, { "epoch": 0.15703971848398293, "grad_norm": 492.0223693847656, "learning_rate": 4.953265255424192e-05, "loss": 90.9314, "step": 38870 }, { "epoch": 0.15708011974935054, "grad_norm": 487.1667175292969, "learning_rate": 4.953198052580281e-05, "loss": 83.6485, "step": 38880 }, { "epoch": 0.15712052101471818, "grad_norm": 821.9052734375, "learning_rate": 4.953130801910011e-05, "loss": 66.1471, "step": 38890 }, { "epoch": 0.15716092228008582, "grad_norm": 501.9080810546875, "learning_rate": 4.953063503414692e-05, "loss": 59.9543, "step": 38900 }, { "epoch": 0.15720132354545344, "grad_norm": 1479.7249755859375, "learning_rate": 4.9529961570956383e-05, "loss": 83.3378, "step": 38910 }, { "epoch": 0.15724172481082108, "grad_norm": 636.5172119140625, "learning_rate": 4.952928762954161e-05, "loss": 94.2658, "step": 38920 }, { "epoch": 0.15728212607618872, "grad_norm": 1404.88134765625, "learning_rate": 4.952861320991575e-05, "loss": 86.6847, "step": 38930 }, { "epoch": 0.15732252734155633, "grad_norm": 1033.673828125, "learning_rate": 4.952793831209195e-05, "loss": 109.079, "step": 38940 }, { "epoch": 0.15736292860692397, "grad_norm": 990.5618896484375, "learning_rate": 4.952726293608335e-05, "loss": 80.7069, "step": 38950 }, { "epoch": 0.1574033298722916, "grad_norm": 675.2631225585938, "learning_rate": 4.9526587081903145e-05, "loss": 81.1813, "step": 38960 }, { "epoch": 0.15744373113765922, "grad_norm": 911.697509765625, "learning_rate": 4.9525910749564494e-05, "loss": 107.4708, "step": 38970 }, { "epoch": 0.15748413240302686, "grad_norm": 506.453369140625, "learning_rate": 4.952523393908059e-05, "loss": 94.5604, "step": 38980 }, { "epoch": 0.1575245336683945, "grad_norm": 1586.1180419921875, "learning_rate": 4.9524556650464616e-05, "loss": 97.9683, "step": 38990 }, { "epoch": 0.1575649349337621, "grad_norm": 525.472900390625, "learning_rate": 4.952387888372979e-05, "loss": 126.3809, "step": 39000 }, { "epoch": 0.15760533619912975, "grad_norm": 2915.437255859375, "learning_rate": 4.952320063888932e-05, "loss": 100.4383, "step": 39010 }, { "epoch": 0.1576457374644974, "grad_norm": 656.8854370117188, "learning_rate": 4.952252191595643e-05, "loss": 41.7866, "step": 39020 }, { "epoch": 0.15768613872986503, "grad_norm": 1504.2880859375, "learning_rate": 4.9521842714944345e-05, "loss": 95.3066, "step": 39030 }, { "epoch": 0.15772653999523265, "grad_norm": 425.61871337890625, "learning_rate": 4.952116303586631e-05, "loss": 89.983, "step": 39040 }, { "epoch": 0.15776694126060029, "grad_norm": 1021.040771484375, "learning_rate": 4.952048287873558e-05, "loss": 103.6552, "step": 39050 }, { "epoch": 0.15780734252596793, "grad_norm": 770.2539672851562, "learning_rate": 4.9519802243565414e-05, "loss": 88.245, "step": 39060 }, { "epoch": 0.15784774379133554, "grad_norm": 509.4902038574219, "learning_rate": 4.951912113036908e-05, "loss": 125.9709, "step": 39070 }, { "epoch": 0.15788814505670318, "grad_norm": 577.9376831054688, "learning_rate": 4.951843953915985e-05, "loss": 56.5803, "step": 39080 }, { "epoch": 0.15792854632207082, "grad_norm": 1800.318115234375, "learning_rate": 4.951775746995102e-05, "loss": 126.1482, "step": 39090 }, { "epoch": 0.15796894758743843, "grad_norm": 644.34521484375, "learning_rate": 4.951707492275589e-05, "loss": 62.9948, "step": 39100 }, { "epoch": 0.15800934885280607, "grad_norm": 2003.1749267578125, "learning_rate": 4.9516391897587764e-05, "loss": 83.1591, "step": 39110 }, { "epoch": 0.1580497501181737, "grad_norm": 663.0642700195312, "learning_rate": 4.951570839445995e-05, "loss": 86.2755, "step": 39120 }, { "epoch": 0.15809015138354132, "grad_norm": 2192.492919921875, "learning_rate": 4.951502441338578e-05, "loss": 69.6609, "step": 39130 }, { "epoch": 0.15813055264890896, "grad_norm": 437.15411376953125, "learning_rate": 4.951433995437859e-05, "loss": 98.7007, "step": 39140 }, { "epoch": 0.1581709539142766, "grad_norm": 643.024658203125, "learning_rate": 4.951365501745172e-05, "loss": 69.265, "step": 39150 }, { "epoch": 0.15821135517964421, "grad_norm": 818.115234375, "learning_rate": 4.951296960261853e-05, "loss": 82.4836, "step": 39160 }, { "epoch": 0.15825175644501185, "grad_norm": 0.0, "learning_rate": 4.9512283709892374e-05, "loss": 64.5836, "step": 39170 }, { "epoch": 0.1582921577103795, "grad_norm": 476.3262634277344, "learning_rate": 4.951159733928663e-05, "loss": 113.6856, "step": 39180 }, { "epoch": 0.15833255897574713, "grad_norm": 617.4471435546875, "learning_rate": 4.9510910490814666e-05, "loss": 53.7248, "step": 39190 }, { "epoch": 0.15837296024111475, "grad_norm": 1832.762939453125, "learning_rate": 4.95102231644899e-05, "loss": 80.7921, "step": 39200 }, { "epoch": 0.1584133615064824, "grad_norm": 736.6915283203125, "learning_rate": 4.95095353603257e-05, "loss": 81.6178, "step": 39210 }, { "epoch": 0.15845376277185003, "grad_norm": 881.178466796875, "learning_rate": 4.9508847078335495e-05, "loss": 78.0822, "step": 39220 }, { "epoch": 0.15849416403721764, "grad_norm": 482.23944091796875, "learning_rate": 4.9508158318532696e-05, "loss": 83.8018, "step": 39230 }, { "epoch": 0.15853456530258528, "grad_norm": 717.8798217773438, "learning_rate": 4.9507469080930734e-05, "loss": 69.0619, "step": 39240 }, { "epoch": 0.15857496656795292, "grad_norm": 502.1875305175781, "learning_rate": 4.9506779365543046e-05, "loss": 53.7278, "step": 39250 }, { "epoch": 0.15861536783332053, "grad_norm": 437.0827331542969, "learning_rate": 4.950608917238308e-05, "loss": 79.8096, "step": 39260 }, { "epoch": 0.15865576909868817, "grad_norm": 1342.6405029296875, "learning_rate": 4.9505398501464284e-05, "loss": 78.4081, "step": 39270 }, { "epoch": 0.1586961703640558, "grad_norm": 849.6737670898438, "learning_rate": 4.9504707352800125e-05, "loss": 79.9384, "step": 39280 }, { "epoch": 0.15873657162942342, "grad_norm": 925.02392578125, "learning_rate": 4.95040157264041e-05, "loss": 59.0614, "step": 39290 }, { "epoch": 0.15877697289479106, "grad_norm": 1974.9837646484375, "learning_rate": 4.9503323622289655e-05, "loss": 91.3175, "step": 39300 }, { "epoch": 0.1588173741601587, "grad_norm": 1192.585693359375, "learning_rate": 4.950263104047031e-05, "loss": 62.4365, "step": 39310 }, { "epoch": 0.15885777542552632, "grad_norm": 1465.087158203125, "learning_rate": 4.9501937980959545e-05, "loss": 75.6416, "step": 39320 }, { "epoch": 0.15889817669089396, "grad_norm": 669.2102661132812, "learning_rate": 4.950124444377089e-05, "loss": 101.0025, "step": 39330 }, { "epoch": 0.1589385779562616, "grad_norm": 1937.0089111328125, "learning_rate": 4.950055042891786e-05, "loss": 82.6265, "step": 39340 }, { "epoch": 0.15897897922162924, "grad_norm": 1510.9599609375, "learning_rate": 4.949985593641399e-05, "loss": 117.1577, "step": 39350 }, { "epoch": 0.15901938048699685, "grad_norm": 1171.20703125, "learning_rate": 4.949916096627282e-05, "loss": 106.9967, "step": 39360 }, { "epoch": 0.1590597817523645, "grad_norm": 1075.4632568359375, "learning_rate": 4.949846551850788e-05, "loss": 110.2698, "step": 39370 }, { "epoch": 0.15910018301773213, "grad_norm": 1426.9444580078125, "learning_rate": 4.949776959313275e-05, "loss": 68.2341, "step": 39380 }, { "epoch": 0.15914058428309974, "grad_norm": 734.63525390625, "learning_rate": 4.9497073190160994e-05, "loss": 97.0607, "step": 39390 }, { "epoch": 0.15918098554846738, "grad_norm": 682.6970825195312, "learning_rate": 4.949637630960617e-05, "loss": 83.067, "step": 39400 }, { "epoch": 0.15922138681383502, "grad_norm": 239.862060546875, "learning_rate": 4.9495678951481896e-05, "loss": 93.8866, "step": 39410 }, { "epoch": 0.15926178807920263, "grad_norm": 1805.31591796875, "learning_rate": 4.949498111580174e-05, "loss": 81.097, "step": 39420 }, { "epoch": 0.15930218934457027, "grad_norm": 1094.3526611328125, "learning_rate": 4.949428280257932e-05, "loss": 122.2046, "step": 39430 }, { "epoch": 0.1593425906099379, "grad_norm": 395.03936767578125, "learning_rate": 4.949358401182824e-05, "loss": 80.3976, "step": 39440 }, { "epoch": 0.15938299187530552, "grad_norm": 896.0783081054688, "learning_rate": 4.949288474356213e-05, "loss": 100.0894, "step": 39450 }, { "epoch": 0.15942339314067316, "grad_norm": 1049.211669921875, "learning_rate": 4.9492184997794624e-05, "loss": 115.2995, "step": 39460 }, { "epoch": 0.1594637944060408, "grad_norm": 1161.3402099609375, "learning_rate": 4.949148477453936e-05, "loss": 81.019, "step": 39470 }, { "epoch": 0.15950419567140842, "grad_norm": 2834.7724609375, "learning_rate": 4.949078407381e-05, "loss": 96.1574, "step": 39480 }, { "epoch": 0.15954459693677606, "grad_norm": 808.9170532226562, "learning_rate": 4.949008289562019e-05, "loss": 79.7376, "step": 39490 }, { "epoch": 0.1595849982021437, "grad_norm": 530.290771484375, "learning_rate": 4.94893812399836e-05, "loss": 69.7185, "step": 39500 }, { "epoch": 0.1596253994675113, "grad_norm": 1201.916015625, "learning_rate": 4.9488679106913924e-05, "loss": 76.6368, "step": 39510 }, { "epoch": 0.15966580073287895, "grad_norm": 1412.6275634765625, "learning_rate": 4.948797649642484e-05, "loss": 64.3421, "step": 39520 }, { "epoch": 0.1597062019982466, "grad_norm": 927.9067993164062, "learning_rate": 4.9487273408530044e-05, "loss": 103.6948, "step": 39530 }, { "epoch": 0.15974660326361423, "grad_norm": 722.8312377929688, "learning_rate": 4.9486569843243244e-05, "loss": 76.603, "step": 39540 }, { "epoch": 0.15978700452898184, "grad_norm": 606.1805419921875, "learning_rate": 4.948586580057816e-05, "loss": 96.2999, "step": 39550 }, { "epoch": 0.15982740579434948, "grad_norm": 787.3512573242188, "learning_rate": 4.948516128054852e-05, "loss": 91.6384, "step": 39560 }, { "epoch": 0.15986780705971712, "grad_norm": 477.5809631347656, "learning_rate": 4.948445628316805e-05, "loss": 84.977, "step": 39570 }, { "epoch": 0.15990820832508473, "grad_norm": 2748.46630859375, "learning_rate": 4.94837508084505e-05, "loss": 113.5827, "step": 39580 }, { "epoch": 0.15994860959045237, "grad_norm": 1089.883544921875, "learning_rate": 4.948304485640963e-05, "loss": 110.2332, "step": 39590 }, { "epoch": 0.15998901085582, "grad_norm": 2496.34326171875, "learning_rate": 4.948233842705919e-05, "loss": 62.8154, "step": 39600 }, { "epoch": 0.16002941212118763, "grad_norm": 1067.39501953125, "learning_rate": 4.948163152041295e-05, "loss": 92.6578, "step": 39610 }, { "epoch": 0.16006981338655527, "grad_norm": 401.0006408691406, "learning_rate": 4.948092413648471e-05, "loss": 94.2235, "step": 39620 }, { "epoch": 0.1601102146519229, "grad_norm": 599.7701416015625, "learning_rate": 4.948021627528825e-05, "loss": 119.0192, "step": 39630 }, { "epoch": 0.16015061591729052, "grad_norm": 5756.70068359375, "learning_rate": 4.9479507936837364e-05, "loss": 117.7867, "step": 39640 }, { "epoch": 0.16019101718265816, "grad_norm": 1125.0499267578125, "learning_rate": 4.947879912114588e-05, "loss": 90.0595, "step": 39650 }, { "epoch": 0.1602314184480258, "grad_norm": 5096.72216796875, "learning_rate": 4.947808982822759e-05, "loss": 109.1604, "step": 39660 }, { "epoch": 0.1602718197133934, "grad_norm": 750.5372314453125, "learning_rate": 4.9477380058096343e-05, "loss": 75.6724, "step": 39670 }, { "epoch": 0.16031222097876105, "grad_norm": 721.302734375, "learning_rate": 4.947666981076597e-05, "loss": 76.214, "step": 39680 }, { "epoch": 0.1603526222441287, "grad_norm": 430.94671630859375, "learning_rate": 4.947595908625032e-05, "loss": 78.1926, "step": 39690 }, { "epoch": 0.16039302350949633, "grad_norm": 871.6290893554688, "learning_rate": 4.947524788456325e-05, "loss": 56.1409, "step": 39700 }, { "epoch": 0.16043342477486394, "grad_norm": 1097.3226318359375, "learning_rate": 4.9474536205718615e-05, "loss": 70.7656, "step": 39710 }, { "epoch": 0.16047382604023158, "grad_norm": 821.6055908203125, "learning_rate": 4.94738240497303e-05, "loss": 116.6263, "step": 39720 }, { "epoch": 0.16051422730559922, "grad_norm": 988.2919311523438, "learning_rate": 4.947311141661218e-05, "loss": 75.0355, "step": 39730 }, { "epoch": 0.16055462857096683, "grad_norm": 1874.0450439453125, "learning_rate": 4.947239830637815e-05, "loss": 93.3599, "step": 39740 }, { "epoch": 0.16059502983633447, "grad_norm": 1665.5904541015625, "learning_rate": 4.947168471904213e-05, "loss": 110.6867, "step": 39750 }, { "epoch": 0.16063543110170211, "grad_norm": 768.0989990234375, "learning_rate": 4.947097065461801e-05, "loss": 74.4268, "step": 39760 }, { "epoch": 0.16067583236706973, "grad_norm": 1051.240234375, "learning_rate": 4.947025611311972e-05, "loss": 114.7893, "step": 39770 }, { "epoch": 0.16071623363243737, "grad_norm": 4345.51953125, "learning_rate": 4.946954109456118e-05, "loss": 108.7796, "step": 39780 }, { "epoch": 0.160756634897805, "grad_norm": 1246.1304931640625, "learning_rate": 4.946882559895635e-05, "loss": 58.0561, "step": 39790 }, { "epoch": 0.16079703616317262, "grad_norm": 3426.0712890625, "learning_rate": 4.946810962631916e-05, "loss": 86.2152, "step": 39800 }, { "epoch": 0.16083743742854026, "grad_norm": 971.04052734375, "learning_rate": 4.9467393176663576e-05, "loss": 80.0207, "step": 39810 }, { "epoch": 0.1608778386939079, "grad_norm": 791.5706787109375, "learning_rate": 4.9466676250003576e-05, "loss": 83.3073, "step": 39820 }, { "epoch": 0.1609182399592755, "grad_norm": 1009.4277954101562, "learning_rate": 4.9465958846353114e-05, "loss": 81.5609, "step": 39830 }, { "epoch": 0.16095864122464315, "grad_norm": 645.2051391601562, "learning_rate": 4.9465240965726195e-05, "loss": 110.7899, "step": 39840 }, { "epoch": 0.1609990424900108, "grad_norm": 2333.77294921875, "learning_rate": 4.9464522608136805e-05, "loss": 67.2439, "step": 39850 }, { "epoch": 0.16103944375537843, "grad_norm": 657.8132934570312, "learning_rate": 4.946380377359895e-05, "loss": 60.2396, "step": 39860 }, { "epoch": 0.16107984502074604, "grad_norm": 1080.5504150390625, "learning_rate": 4.9463084462126655e-05, "loss": 101.5745, "step": 39870 }, { "epoch": 0.16112024628611368, "grad_norm": 550.1509399414062, "learning_rate": 4.946236467373392e-05, "loss": 76.9606, "step": 39880 }, { "epoch": 0.16116064755148132, "grad_norm": 686.2342529296875, "learning_rate": 4.94616444084348e-05, "loss": 87.1771, "step": 39890 }, { "epoch": 0.16120104881684894, "grad_norm": 563.68017578125, "learning_rate": 4.946092366624333e-05, "loss": 90.5833, "step": 39900 }, { "epoch": 0.16124145008221658, "grad_norm": 508.9067687988281, "learning_rate": 4.946020244717355e-05, "loss": 110.8761, "step": 39910 }, { "epoch": 0.16128185134758422, "grad_norm": 447.7995910644531, "learning_rate": 4.945948075123954e-05, "loss": 84.4401, "step": 39920 }, { "epoch": 0.16132225261295183, "grad_norm": 4074.44482421875, "learning_rate": 4.9458758578455354e-05, "loss": 130.2105, "step": 39930 }, { "epoch": 0.16136265387831947, "grad_norm": 434.82470703125, "learning_rate": 4.945803592883509e-05, "loss": 76.4253, "step": 39940 }, { "epoch": 0.1614030551436871, "grad_norm": 761.01123046875, "learning_rate": 4.945731280239281e-05, "loss": 106.6753, "step": 39950 }, { "epoch": 0.16144345640905472, "grad_norm": 1182.587646484375, "learning_rate": 4.9456589199142637e-05, "loss": 101.2267, "step": 39960 }, { "epoch": 0.16148385767442236, "grad_norm": 1377.45947265625, "learning_rate": 4.945586511909865e-05, "loss": 135.843, "step": 39970 }, { "epoch": 0.16152425893979, "grad_norm": 1164.2666015625, "learning_rate": 4.9455140562274995e-05, "loss": 104.891, "step": 39980 }, { "epoch": 0.1615646602051576, "grad_norm": 602.760498046875, "learning_rate": 4.9454415528685785e-05, "loss": 117.2372, "step": 39990 }, { "epoch": 0.16160506147052525, "grad_norm": 1094.737548828125, "learning_rate": 4.9453690018345144e-05, "loss": 85.9165, "step": 40000 }, { "epoch": 0.1616454627358929, "grad_norm": 599.16015625, "learning_rate": 4.9452964031267236e-05, "loss": 69.1269, "step": 40010 }, { "epoch": 0.16168586400126053, "grad_norm": 613.2879028320312, "learning_rate": 4.9452237567466194e-05, "loss": 88.8299, "step": 40020 }, { "epoch": 0.16172626526662814, "grad_norm": 735.5689086914062, "learning_rate": 4.9451510626956196e-05, "loss": 76.922, "step": 40030 }, { "epoch": 0.16176666653199578, "grad_norm": 696.985595703125, "learning_rate": 4.945078320975142e-05, "loss": 79.6425, "step": 40040 }, { "epoch": 0.16180706779736342, "grad_norm": 1030.8795166015625, "learning_rate": 4.9450055315866026e-05, "loss": 104.0844, "step": 40050 }, { "epoch": 0.16184746906273104, "grad_norm": 3068.77294921875, "learning_rate": 4.944932694531422e-05, "loss": 85.7902, "step": 40060 }, { "epoch": 0.16188787032809868, "grad_norm": 483.2486267089844, "learning_rate": 4.94485980981102e-05, "loss": 63.3755, "step": 40070 }, { "epoch": 0.16192827159346632, "grad_norm": 2090.39208984375, "learning_rate": 4.9447868774268166e-05, "loss": 83.095, "step": 40080 }, { "epoch": 0.16196867285883393, "grad_norm": 837.720947265625, "learning_rate": 4.944713897380235e-05, "loss": 68.0856, "step": 40090 }, { "epoch": 0.16200907412420157, "grad_norm": 3449.74267578125, "learning_rate": 4.9446408696726974e-05, "loss": 80.644, "step": 40100 }, { "epoch": 0.1620494753895692, "grad_norm": 532.7826538085938, "learning_rate": 4.944567794305627e-05, "loss": 72.2116, "step": 40110 }, { "epoch": 0.16208987665493682, "grad_norm": 518.95556640625, "learning_rate": 4.9444946712804494e-05, "loss": 95.1311, "step": 40120 }, { "epoch": 0.16213027792030446, "grad_norm": 2341.940673828125, "learning_rate": 4.944421500598589e-05, "loss": 101.21, "step": 40130 }, { "epoch": 0.1621706791856721, "grad_norm": 694.1212158203125, "learning_rate": 4.944348282261474e-05, "loss": 124.4218, "step": 40140 }, { "epoch": 0.1622110804510397, "grad_norm": 900.7708129882812, "learning_rate": 4.9442750162705295e-05, "loss": 89.3225, "step": 40150 }, { "epoch": 0.16225148171640735, "grad_norm": 1264.3707275390625, "learning_rate": 4.9442017026271864e-05, "loss": 116.5199, "step": 40160 }, { "epoch": 0.162291882981775, "grad_norm": 853.1660766601562, "learning_rate": 4.944128341332872e-05, "loss": 101.1801, "step": 40170 }, { "epoch": 0.16233228424714263, "grad_norm": 696.7931518554688, "learning_rate": 4.9440549323890176e-05, "loss": 81.5634, "step": 40180 }, { "epoch": 0.16237268551251025, "grad_norm": 1033.1129150390625, "learning_rate": 4.9439814757970535e-05, "loss": 79.6033, "step": 40190 }, { "epoch": 0.16241308677787789, "grad_norm": 605.3699951171875, "learning_rate": 4.9439079715584135e-05, "loss": 107.7314, "step": 40200 }, { "epoch": 0.16245348804324553, "grad_norm": 3663.65380859375, "learning_rate": 4.943834419674529e-05, "loss": 97.9108, "step": 40210 }, { "epoch": 0.16249388930861314, "grad_norm": 504.4712829589844, "learning_rate": 4.9437608201468336e-05, "loss": 79.6529, "step": 40220 }, { "epoch": 0.16253429057398078, "grad_norm": 759.597412109375, "learning_rate": 4.9436871729767634e-05, "loss": 65.1314, "step": 40230 }, { "epoch": 0.16257469183934842, "grad_norm": 810.0010986328125, "learning_rate": 4.943613478165753e-05, "loss": 76.6986, "step": 40240 }, { "epoch": 0.16261509310471603, "grad_norm": 701.2088623046875, "learning_rate": 4.94353973571524e-05, "loss": 59.842, "step": 40250 }, { "epoch": 0.16265549437008367, "grad_norm": 460.05841064453125, "learning_rate": 4.943465945626662e-05, "loss": 74.4412, "step": 40260 }, { "epoch": 0.1626958956354513, "grad_norm": 1123.1805419921875, "learning_rate": 4.943392107901458e-05, "loss": 141.1575, "step": 40270 }, { "epoch": 0.16273629690081892, "grad_norm": 524.522705078125, "learning_rate": 4.943318222541066e-05, "loss": 117.3683, "step": 40280 }, { "epoch": 0.16277669816618656, "grad_norm": 406.4377136230469, "learning_rate": 4.943244289546928e-05, "loss": 68.4648, "step": 40290 }, { "epoch": 0.1628170994315542, "grad_norm": 752.1702880859375, "learning_rate": 4.943170308920484e-05, "loss": 74.2303, "step": 40300 }, { "epoch": 0.16285750069692181, "grad_norm": 1674.6246337890625, "learning_rate": 4.943096280663178e-05, "loss": 123.305, "step": 40310 }, { "epoch": 0.16289790196228945, "grad_norm": 1532.0059814453125, "learning_rate": 4.9430222047764506e-05, "loss": 82.7036, "step": 40320 }, { "epoch": 0.1629383032276571, "grad_norm": 723.3096923828125, "learning_rate": 4.942948081261749e-05, "loss": 131.268, "step": 40330 }, { "epoch": 0.16297870449302473, "grad_norm": 1614.462646484375, "learning_rate": 4.942873910120516e-05, "loss": 109.0338, "step": 40340 }, { "epoch": 0.16301910575839235, "grad_norm": 720.302734375, "learning_rate": 4.9427996913542e-05, "loss": 63.5732, "step": 40350 }, { "epoch": 0.16305950702376, "grad_norm": 1045.8563232421875, "learning_rate": 4.9427254249642444e-05, "loss": 96.6388, "step": 40360 }, { "epoch": 0.16309990828912763, "grad_norm": 983.675048828125, "learning_rate": 4.9426511109521e-05, "loss": 66.8883, "step": 40370 }, { "epoch": 0.16314030955449524, "grad_norm": 668.4130249023438, "learning_rate": 4.9425767493192144e-05, "loss": 107.8524, "step": 40380 }, { "epoch": 0.16318071081986288, "grad_norm": 542.26318359375, "learning_rate": 4.942502340067038e-05, "loss": 64.179, "step": 40390 }, { "epoch": 0.16322111208523052, "grad_norm": 931.4650268554688, "learning_rate": 4.942427883197021e-05, "loss": 120.4847, "step": 40400 }, { "epoch": 0.16326151335059813, "grad_norm": 606.0546264648438, "learning_rate": 4.942353378710614e-05, "loss": 54.7097, "step": 40410 }, { "epoch": 0.16330191461596577, "grad_norm": 1016.0202026367188, "learning_rate": 4.9422788266092715e-05, "loss": 108.11, "step": 40420 }, { "epoch": 0.1633423158813334, "grad_norm": 1137.9029541015625, "learning_rate": 4.942204226894445e-05, "loss": 105.8649, "step": 40430 }, { "epoch": 0.16338271714670102, "grad_norm": 1075.5977783203125, "learning_rate": 4.94212957956759e-05, "loss": 101.3763, "step": 40440 }, { "epoch": 0.16342311841206866, "grad_norm": 674.2771606445312, "learning_rate": 4.942054884630162e-05, "loss": 81.1654, "step": 40450 }, { "epoch": 0.1634635196774363, "grad_norm": 582.1860961914062, "learning_rate": 4.941980142083617e-05, "loss": 78.6935, "step": 40460 }, { "epoch": 0.16350392094280392, "grad_norm": 688.1861572265625, "learning_rate": 4.9419053519294115e-05, "loss": 79.6108, "step": 40470 }, { "epoch": 0.16354432220817156, "grad_norm": 1063.1949462890625, "learning_rate": 4.941830514169004e-05, "loss": 85.3794, "step": 40480 }, { "epoch": 0.1635847234735392, "grad_norm": 802.294189453125, "learning_rate": 4.941755628803853e-05, "loss": 86.5771, "step": 40490 }, { "epoch": 0.16362512473890684, "grad_norm": 2401.399169921875, "learning_rate": 4.94168069583542e-05, "loss": 123.1953, "step": 40500 }, { "epoch": 0.16366552600427445, "grad_norm": 472.39874267578125, "learning_rate": 4.941605715265164e-05, "loss": 40.4517, "step": 40510 }, { "epoch": 0.1637059272696421, "grad_norm": 502.11492919921875, "learning_rate": 4.941530687094548e-05, "loss": 93.8861, "step": 40520 }, { "epoch": 0.16374632853500973, "grad_norm": 481.69085693359375, "learning_rate": 4.9414556113250344e-05, "loss": 100.2681, "step": 40530 }, { "epoch": 0.16378672980037734, "grad_norm": 718.2989501953125, "learning_rate": 4.941380487958086e-05, "loss": 67.0155, "step": 40540 }, { "epoch": 0.16382713106574498, "grad_norm": 707.5899658203125, "learning_rate": 4.941305316995169e-05, "loss": 80.2658, "step": 40550 }, { "epoch": 0.16386753233111262, "grad_norm": 411.375244140625, "learning_rate": 4.941230098437747e-05, "loss": 86.6765, "step": 40560 }, { "epoch": 0.16390793359648023, "grad_norm": 1048.686279296875, "learning_rate": 4.941154832287288e-05, "loss": 90.3216, "step": 40570 }, { "epoch": 0.16394833486184787, "grad_norm": 951.8493041992188, "learning_rate": 4.941079518545258e-05, "loss": 64.0513, "step": 40580 }, { "epoch": 0.1639887361272155, "grad_norm": 5584.05908203125, "learning_rate": 4.9410041572131266e-05, "loss": 109.0281, "step": 40590 }, { "epoch": 0.16402913739258312, "grad_norm": 1825.2513427734375, "learning_rate": 4.940928748292363e-05, "loss": 83.6246, "step": 40600 }, { "epoch": 0.16406953865795076, "grad_norm": 562.28076171875, "learning_rate": 4.940853291784435e-05, "loss": 72.4572, "step": 40610 }, { "epoch": 0.1641099399233184, "grad_norm": 2325.668212890625, "learning_rate": 4.9407777876908174e-05, "loss": 104.7029, "step": 40620 }, { "epoch": 0.16415034118868602, "grad_norm": 1462.803955078125, "learning_rate": 4.9407022360129796e-05, "loss": 113.1334, "step": 40630 }, { "epoch": 0.16419074245405366, "grad_norm": 539.4075927734375, "learning_rate": 4.9406266367523945e-05, "loss": 87.7285, "step": 40640 }, { "epoch": 0.1642311437194213, "grad_norm": 1116.6656494140625, "learning_rate": 4.940550989910537e-05, "loss": 127.9007, "step": 40650 }, { "epoch": 0.16427154498478894, "grad_norm": 745.6444702148438, "learning_rate": 4.9404752954888824e-05, "loss": 95.8032, "step": 40660 }, { "epoch": 0.16431194625015655, "grad_norm": 1984.2926025390625, "learning_rate": 4.9403995534889044e-05, "loss": 61.1674, "step": 40670 }, { "epoch": 0.1643523475155242, "grad_norm": 553.41748046875, "learning_rate": 4.9403237639120805e-05, "loss": 87.0418, "step": 40680 }, { "epoch": 0.16439274878089183, "grad_norm": 786.6685791015625, "learning_rate": 4.9402479267598887e-05, "loss": 93.9468, "step": 40690 }, { "epoch": 0.16443315004625944, "grad_norm": 826.0059814453125, "learning_rate": 4.940172042033808e-05, "loss": 102.5501, "step": 40700 }, { "epoch": 0.16447355131162708, "grad_norm": 1732.5660400390625, "learning_rate": 4.9400961097353166e-05, "loss": 110.0081, "step": 40710 }, { "epoch": 0.16451395257699472, "grad_norm": 989.8864135742188, "learning_rate": 4.940020129865895e-05, "loss": 63.4497, "step": 40720 }, { "epoch": 0.16455435384236233, "grad_norm": 484.3291931152344, "learning_rate": 4.939944102427025e-05, "loss": 111.996, "step": 40730 }, { "epoch": 0.16459475510772997, "grad_norm": 218.7427520751953, "learning_rate": 4.939868027420189e-05, "loss": 83.2038, "step": 40740 }, { "epoch": 0.1646351563730976, "grad_norm": 3197.0927734375, "learning_rate": 4.939791904846869e-05, "loss": 99.5802, "step": 40750 }, { "epoch": 0.16467555763846523, "grad_norm": 805.2986450195312, "learning_rate": 4.93971573470855e-05, "loss": 85.1965, "step": 40760 }, { "epoch": 0.16471595890383287, "grad_norm": 846.134765625, "learning_rate": 4.939639517006717e-05, "loss": 67.5336, "step": 40770 }, { "epoch": 0.1647563601692005, "grad_norm": 780.6873779296875, "learning_rate": 4.939563251742855e-05, "loss": 85.52, "step": 40780 }, { "epoch": 0.16479676143456812, "grad_norm": 1174.27685546875, "learning_rate": 4.939486938918451e-05, "loss": 97.5447, "step": 40790 }, { "epoch": 0.16483716269993576, "grad_norm": 763.7103271484375, "learning_rate": 4.9394105785349944e-05, "loss": 50.4769, "step": 40800 }, { "epoch": 0.1648775639653034, "grad_norm": 704.8574829101562, "learning_rate": 4.939334170593972e-05, "loss": 107.5855, "step": 40810 }, { "epoch": 0.16491796523067104, "grad_norm": 695.6289672851562, "learning_rate": 4.9392577150968745e-05, "loss": 119.9477, "step": 40820 }, { "epoch": 0.16495836649603865, "grad_norm": 960.3919677734375, "learning_rate": 4.939181212045192e-05, "loss": 75.6716, "step": 40830 }, { "epoch": 0.1649987677614063, "grad_norm": 930.9579467773438, "learning_rate": 4.939104661440415e-05, "loss": 75.375, "step": 40840 }, { "epoch": 0.16503916902677393, "grad_norm": 517.5274658203125, "learning_rate": 4.939028063284038e-05, "loss": 48.7727, "step": 40850 }, { "epoch": 0.16507957029214154, "grad_norm": 559.23974609375, "learning_rate": 4.938951417577552e-05, "loss": 88.6447, "step": 40860 }, { "epoch": 0.16511997155750918, "grad_norm": 1341.4295654296875, "learning_rate": 4.938874724322454e-05, "loss": 58.3951, "step": 40870 }, { "epoch": 0.16516037282287682, "grad_norm": 818.9078369140625, "learning_rate": 4.938797983520237e-05, "loss": 90.6307, "step": 40880 }, { "epoch": 0.16520077408824443, "grad_norm": 719.2857055664062, "learning_rate": 4.938721195172398e-05, "loss": 89.5419, "step": 40890 }, { "epoch": 0.16524117535361207, "grad_norm": 1062.7843017578125, "learning_rate": 4.938644359280433e-05, "loss": 57.1088, "step": 40900 }, { "epoch": 0.16528157661897971, "grad_norm": 713.6543579101562, "learning_rate": 4.938567475845841e-05, "loss": 98.1421, "step": 40910 }, { "epoch": 0.16532197788434733, "grad_norm": 1030.1905517578125, "learning_rate": 4.938490544870121e-05, "loss": 115.5957, "step": 40920 }, { "epoch": 0.16536237914971497, "grad_norm": 4530.09912109375, "learning_rate": 4.938413566354772e-05, "loss": 105.8919, "step": 40930 }, { "epoch": 0.1654027804150826, "grad_norm": 1015.1611328125, "learning_rate": 4.938336540301295e-05, "loss": 130.0979, "step": 40940 }, { "epoch": 0.16544318168045022, "grad_norm": 974.16064453125, "learning_rate": 4.938259466711193e-05, "loss": 61.6297, "step": 40950 }, { "epoch": 0.16548358294581786, "grad_norm": 874.3550415039062, "learning_rate": 4.938182345585966e-05, "loss": 76.4001, "step": 40960 }, { "epoch": 0.1655239842111855, "grad_norm": 2546.218017578125, "learning_rate": 4.938105176927119e-05, "loss": 78.0685, "step": 40970 }, { "epoch": 0.16556438547655314, "grad_norm": 1073.3936767578125, "learning_rate": 4.9380279607361575e-05, "loss": 72.0243, "step": 40980 }, { "epoch": 0.16560478674192075, "grad_norm": 1150.05029296875, "learning_rate": 4.937950697014585e-05, "loss": 76.5943, "step": 40990 }, { "epoch": 0.1656451880072884, "grad_norm": 482.436279296875, "learning_rate": 4.937873385763908e-05, "loss": 75.9616, "step": 41000 }, { "epoch": 0.16568558927265603, "grad_norm": 650.821044921875, "learning_rate": 4.9377960269856346e-05, "loss": 80.1524, "step": 41010 }, { "epoch": 0.16572599053802364, "grad_norm": 486.8204040527344, "learning_rate": 4.937718620681273e-05, "loss": 73.0701, "step": 41020 }, { "epoch": 0.16576639180339128, "grad_norm": 1333.5208740234375, "learning_rate": 4.937641166852332e-05, "loss": 73.1544, "step": 41030 }, { "epoch": 0.16580679306875892, "grad_norm": 528.667236328125, "learning_rate": 4.937563665500321e-05, "loss": 89.639, "step": 41040 }, { "epoch": 0.16584719433412654, "grad_norm": 924.5604858398438, "learning_rate": 4.937486116626752e-05, "loss": 122.8974, "step": 41050 }, { "epoch": 0.16588759559949418, "grad_norm": 1802.9498291015625, "learning_rate": 4.9374085202331354e-05, "loss": 69.5255, "step": 41060 }, { "epoch": 0.16592799686486182, "grad_norm": 519.0206909179688, "learning_rate": 4.937330876320985e-05, "loss": 89.9981, "step": 41070 }, { "epoch": 0.16596839813022943, "grad_norm": 561.7305908203125, "learning_rate": 4.9372531848918145e-05, "loss": 76.0487, "step": 41080 }, { "epoch": 0.16600879939559707, "grad_norm": 740.5826416015625, "learning_rate": 4.9371754459471384e-05, "loss": 115.638, "step": 41090 }, { "epoch": 0.1660492006609647, "grad_norm": 674.3680419921875, "learning_rate": 4.9370976594884723e-05, "loss": 73.8446, "step": 41100 }, { "epoch": 0.16608960192633232, "grad_norm": 539.9810180664062, "learning_rate": 4.937019825517333e-05, "loss": 57.1198, "step": 41110 }, { "epoch": 0.16613000319169996, "grad_norm": 1040.4820556640625, "learning_rate": 4.936941944035237e-05, "loss": 144.8092, "step": 41120 }, { "epoch": 0.1661704044570676, "grad_norm": 849.093994140625, "learning_rate": 4.936864015043703e-05, "loss": 52.7319, "step": 41130 }, { "epoch": 0.16621080572243524, "grad_norm": 641.831787109375, "learning_rate": 4.936786038544251e-05, "loss": 59.0339, "step": 41140 }, { "epoch": 0.16625120698780285, "grad_norm": 554.9229736328125, "learning_rate": 4.9367080145384006e-05, "loss": 95.6639, "step": 41150 }, { "epoch": 0.1662916082531705, "grad_norm": 392.3255920410156, "learning_rate": 4.936629943027672e-05, "loss": 96.7075, "step": 41160 }, { "epoch": 0.16633200951853813, "grad_norm": 448.32647705078125, "learning_rate": 4.936551824013589e-05, "loss": 65.1692, "step": 41170 }, { "epoch": 0.16637241078390574, "grad_norm": 992.157470703125, "learning_rate": 4.9364736574976736e-05, "loss": 116.2756, "step": 41180 }, { "epoch": 0.16641281204927338, "grad_norm": 383.3032531738281, "learning_rate": 4.93639544348145e-05, "loss": 82.3815, "step": 41190 }, { "epoch": 0.16645321331464102, "grad_norm": 3468.877685546875, "learning_rate": 4.9363171819664434e-05, "loss": 124.7114, "step": 41200 }, { "epoch": 0.16649361458000864, "grad_norm": 959.5894165039062, "learning_rate": 4.936238872954178e-05, "loss": 115.3659, "step": 41210 }, { "epoch": 0.16653401584537628, "grad_norm": 654.4769287109375, "learning_rate": 4.936160516446182e-05, "loss": 84.5516, "step": 41220 }, { "epoch": 0.16657441711074392, "grad_norm": 513.7091064453125, "learning_rate": 4.936082112443983e-05, "loss": 72.7509, "step": 41230 }, { "epoch": 0.16661481837611153, "grad_norm": 1236.0504150390625, "learning_rate": 4.936003660949108e-05, "loss": 82.1154, "step": 41240 }, { "epoch": 0.16665521964147917, "grad_norm": 1588.6417236328125, "learning_rate": 4.9359251619630886e-05, "loss": 76.5787, "step": 41250 }, { "epoch": 0.1666956209068468, "grad_norm": 851.9100341796875, "learning_rate": 4.935846615487453e-05, "loss": 73.6734, "step": 41260 }, { "epoch": 0.16673602217221442, "grad_norm": 439.6374816894531, "learning_rate": 4.935768021523734e-05, "loss": 75.6803, "step": 41270 }, { "epoch": 0.16677642343758206, "grad_norm": 2391.953125, "learning_rate": 4.935689380073464e-05, "loss": 167.1796, "step": 41280 }, { "epoch": 0.1668168247029497, "grad_norm": 591.0051879882812, "learning_rate": 4.935610691138175e-05, "loss": 89.2333, "step": 41290 }, { "epoch": 0.16685722596831734, "grad_norm": 895.1334228515625, "learning_rate": 4.9355319547194014e-05, "loss": 110.6889, "step": 41300 }, { "epoch": 0.16689762723368495, "grad_norm": 873.7748413085938, "learning_rate": 4.935453170818679e-05, "loss": 82.6634, "step": 41310 }, { "epoch": 0.1669380284990526, "grad_norm": 582.4258422851562, "learning_rate": 4.935374339437543e-05, "loss": 82.0953, "step": 41320 }, { "epoch": 0.16697842976442023, "grad_norm": 1372.591064453125, "learning_rate": 4.9352954605775305e-05, "loss": 110.3943, "step": 41330 }, { "epoch": 0.16701883102978785, "grad_norm": 1086.363525390625, "learning_rate": 4.935216534240179e-05, "loss": 103.8, "step": 41340 }, { "epoch": 0.16705923229515549, "grad_norm": 1426.1312255859375, "learning_rate": 4.935137560427027e-05, "loss": 99.1618, "step": 41350 }, { "epoch": 0.16709963356052313, "grad_norm": 1669.37744140625, "learning_rate": 4.935058539139615e-05, "loss": 122.2307, "step": 41360 }, { "epoch": 0.16714003482589074, "grad_norm": 766.4739379882812, "learning_rate": 4.934979470379484e-05, "loss": 77.6077, "step": 41370 }, { "epoch": 0.16718043609125838, "grad_norm": 846.854248046875, "learning_rate": 4.934900354148173e-05, "loss": 87.1017, "step": 41380 }, { "epoch": 0.16722083735662602, "grad_norm": 1178.765869140625, "learning_rate": 4.934821190447228e-05, "loss": 118.6451, "step": 41390 }, { "epoch": 0.16726123862199363, "grad_norm": 622.7171020507812, "learning_rate": 4.9347419792781876e-05, "loss": 92.5647, "step": 41400 }, { "epoch": 0.16730163988736127, "grad_norm": 871.9891967773438, "learning_rate": 4.934662720642601e-05, "loss": 78.3886, "step": 41410 }, { "epoch": 0.1673420411527289, "grad_norm": 601.3154296875, "learning_rate": 4.934583414542011e-05, "loss": 69.2431, "step": 41420 }, { "epoch": 0.16738244241809652, "grad_norm": 620.5543823242188, "learning_rate": 4.9345040609779634e-05, "loss": 74.3392, "step": 41430 }, { "epoch": 0.16742284368346416, "grad_norm": 803.2666625976562, "learning_rate": 4.934424659952006e-05, "loss": 90.4268, "step": 41440 }, { "epoch": 0.1674632449488318, "grad_norm": 1084.6322021484375, "learning_rate": 4.934345211465686e-05, "loss": 91.4339, "step": 41450 }, { "epoch": 0.16750364621419944, "grad_norm": 570.4534912109375, "learning_rate": 4.934265715520553e-05, "loss": 67.0539, "step": 41460 }, { "epoch": 0.16754404747956705, "grad_norm": 5001.0390625, "learning_rate": 4.934186172118157e-05, "loss": 115.2946, "step": 41470 }, { "epoch": 0.1675844487449347, "grad_norm": 0.0, "learning_rate": 4.934106581260049e-05, "loss": 71.3725, "step": 41480 }, { "epoch": 0.16762485001030233, "grad_norm": 674.1163940429688, "learning_rate": 4.934026942947779e-05, "loss": 67.2382, "step": 41490 }, { "epoch": 0.16766525127566995, "grad_norm": 1031.8499755859375, "learning_rate": 4.933947257182901e-05, "loss": 81.7795, "step": 41500 }, { "epoch": 0.1677056525410376, "grad_norm": 1388.8055419921875, "learning_rate": 4.933867523966968e-05, "loss": 67.2926, "step": 41510 }, { "epoch": 0.16774605380640523, "grad_norm": 1057.6483154296875, "learning_rate": 4.933787743301534e-05, "loss": 78.5172, "step": 41520 }, { "epoch": 0.16778645507177284, "grad_norm": 1191.138427734375, "learning_rate": 4.933707915188156e-05, "loss": 115.7021, "step": 41530 }, { "epoch": 0.16782685633714048, "grad_norm": 803.36376953125, "learning_rate": 4.933628039628389e-05, "loss": 80.2134, "step": 41540 }, { "epoch": 0.16786725760250812, "grad_norm": 434.1454772949219, "learning_rate": 4.9335481166237904e-05, "loss": 93.0307, "step": 41550 }, { "epoch": 0.16790765886787573, "grad_norm": 1040.9764404296875, "learning_rate": 4.933468146175918e-05, "loss": 108.4676, "step": 41560 }, { "epoch": 0.16794806013324337, "grad_norm": 2650.668701171875, "learning_rate": 4.933388128286331e-05, "loss": 124.0231, "step": 41570 }, { "epoch": 0.167988461398611, "grad_norm": 850.1209716796875, "learning_rate": 4.933308062956591e-05, "loss": 98.8213, "step": 41580 }, { "epoch": 0.16802886266397862, "grad_norm": 1043.2742919921875, "learning_rate": 4.9332279501882564e-05, "loss": 86.2264, "step": 41590 }, { "epoch": 0.16806926392934626, "grad_norm": 1196.578369140625, "learning_rate": 4.93314778998289e-05, "loss": 80.5056, "step": 41600 }, { "epoch": 0.1681096651947139, "grad_norm": 738.3115234375, "learning_rate": 4.933067582342056e-05, "loss": 111.9835, "step": 41610 }, { "epoch": 0.16815006646008154, "grad_norm": 828.2302856445312, "learning_rate": 4.932987327267316e-05, "loss": 87.0005, "step": 41620 }, { "epoch": 0.16819046772544916, "grad_norm": 474.343017578125, "learning_rate": 4.932907024760236e-05, "loss": 93.0332, "step": 41630 }, { "epoch": 0.1682308689908168, "grad_norm": 718.5823364257812, "learning_rate": 4.93282667482238e-05, "loss": 66.0602, "step": 41640 }, { "epoch": 0.16827127025618444, "grad_norm": 491.5641784667969, "learning_rate": 4.9327462774553166e-05, "loss": 93.1942, "step": 41650 }, { "epoch": 0.16831167152155205, "grad_norm": 244.37391662597656, "learning_rate": 4.9326658326606114e-05, "loss": 80.5441, "step": 41660 }, { "epoch": 0.1683520727869197, "grad_norm": 621.1357421875, "learning_rate": 4.9325853404398337e-05, "loss": 107.9278, "step": 41670 }, { "epoch": 0.16839247405228733, "grad_norm": 1189.0419921875, "learning_rate": 4.9325048007945526e-05, "loss": 98.2764, "step": 41680 }, { "epoch": 0.16843287531765494, "grad_norm": 1160.5760498046875, "learning_rate": 4.9324242137263376e-05, "loss": 85.1483, "step": 41690 }, { "epoch": 0.16847327658302258, "grad_norm": 717.5256958007812, "learning_rate": 4.93234357923676e-05, "loss": 68.967, "step": 41700 }, { "epoch": 0.16851367784839022, "grad_norm": 738.8279418945312, "learning_rate": 4.932262897327393e-05, "loss": 94.0461, "step": 41710 }, { "epoch": 0.16855407911375783, "grad_norm": 372.3353576660156, "learning_rate": 4.9321821679998074e-05, "loss": 107.4272, "step": 41720 }, { "epoch": 0.16859448037912547, "grad_norm": 778.2406616210938, "learning_rate": 4.932101391255579e-05, "loss": 127.5508, "step": 41730 }, { "epoch": 0.1686348816444931, "grad_norm": 296.44610595703125, "learning_rate": 4.9320205670962814e-05, "loss": 68.8699, "step": 41740 }, { "epoch": 0.16867528290986072, "grad_norm": 1934.7763671875, "learning_rate": 4.931939695523492e-05, "loss": 96.5082, "step": 41750 }, { "epoch": 0.16871568417522836, "grad_norm": 920.8310546875, "learning_rate": 4.9318587765387845e-05, "loss": 84.8141, "step": 41760 }, { "epoch": 0.168756085440596, "grad_norm": 785.7876586914062, "learning_rate": 4.93177781014374e-05, "loss": 69.8683, "step": 41770 }, { "epoch": 0.16879648670596364, "grad_norm": 747.9173583984375, "learning_rate": 4.9316967963399335e-05, "loss": 87.638, "step": 41780 }, { "epoch": 0.16883688797133126, "grad_norm": 1194.38037109375, "learning_rate": 4.931615735128947e-05, "loss": 66.9209, "step": 41790 }, { "epoch": 0.1688772892366989, "grad_norm": 944.1591796875, "learning_rate": 4.9315346265123594e-05, "loss": 80.2107, "step": 41800 }, { "epoch": 0.16891769050206654, "grad_norm": 1037.114013671875, "learning_rate": 4.9314534704917525e-05, "loss": 82.2505, "step": 41810 }, { "epoch": 0.16895809176743415, "grad_norm": 723.7459716796875, "learning_rate": 4.931372267068708e-05, "loss": 65.5946, "step": 41820 }, { "epoch": 0.1689984930328018, "grad_norm": 800.6956176757812, "learning_rate": 4.93129101624481e-05, "loss": 88.3285, "step": 41830 }, { "epoch": 0.16903889429816943, "grad_norm": 634.937744140625, "learning_rate": 4.9312097180216414e-05, "loss": 85.8466, "step": 41840 }, { "epoch": 0.16907929556353704, "grad_norm": 438.7951965332031, "learning_rate": 4.9311283724007887e-05, "loss": 59.9148, "step": 41850 }, { "epoch": 0.16911969682890468, "grad_norm": 6580.62451171875, "learning_rate": 4.931046979383835e-05, "loss": 127.95, "step": 41860 }, { "epoch": 0.16916009809427232, "grad_norm": 1237.6083984375, "learning_rate": 4.9309655389723705e-05, "loss": 81.8312, "step": 41870 }, { "epoch": 0.16920049935963993, "grad_norm": 645.1431274414062, "learning_rate": 4.9308840511679804e-05, "loss": 83.625, "step": 41880 }, { "epoch": 0.16924090062500757, "grad_norm": 808.107177734375, "learning_rate": 4.930802515972255e-05, "loss": 82.5783, "step": 41890 }, { "epoch": 0.1692813018903752, "grad_norm": 542.672119140625, "learning_rate": 4.930720933386782e-05, "loss": 77.4106, "step": 41900 }, { "epoch": 0.16932170315574283, "grad_norm": 1336.5858154296875, "learning_rate": 4.930639303413154e-05, "loss": 124.7732, "step": 41910 }, { "epoch": 0.16936210442111047, "grad_norm": 591.5770263671875, "learning_rate": 4.9305576260529607e-05, "loss": 65.4329, "step": 41920 }, { "epoch": 0.1694025056864781, "grad_norm": 804.681396484375, "learning_rate": 4.930475901307795e-05, "loss": 76.3269, "step": 41930 }, { "epoch": 0.16944290695184575, "grad_norm": 987.9590454101562, "learning_rate": 4.930394129179251e-05, "loss": 71.606, "step": 41940 }, { "epoch": 0.16948330821721336, "grad_norm": 766.5450439453125, "learning_rate": 4.930312309668922e-05, "loss": 109.5266, "step": 41950 }, { "epoch": 0.169523709482581, "grad_norm": 337.4928894042969, "learning_rate": 4.930230442778403e-05, "loss": 74.3608, "step": 41960 }, { "epoch": 0.16956411074794864, "grad_norm": 3450.510009765625, "learning_rate": 4.930148528509291e-05, "loss": 109.2802, "step": 41970 }, { "epoch": 0.16960451201331625, "grad_norm": 864.0614624023438, "learning_rate": 4.930066566863182e-05, "loss": 78.3614, "step": 41980 }, { "epoch": 0.1696449132786839, "grad_norm": 930.443115234375, "learning_rate": 4.929984557841674e-05, "loss": 126.5678, "step": 41990 }, { "epoch": 0.16968531454405153, "grad_norm": 416.3900146484375, "learning_rate": 4.929902501446366e-05, "loss": 62.9766, "step": 42000 }, { "epoch": 0.16972571580941914, "grad_norm": 533.2973022460938, "learning_rate": 4.929820397678858e-05, "loss": 64.3053, "step": 42010 }, { "epoch": 0.16976611707478678, "grad_norm": 357.4167175292969, "learning_rate": 4.92973824654075e-05, "loss": 43.4001, "step": 42020 }, { "epoch": 0.16980651834015442, "grad_norm": 1577.94384765625, "learning_rate": 4.929656048033644e-05, "loss": 72.9057, "step": 42030 }, { "epoch": 0.16984691960552203, "grad_norm": 4723.44775390625, "learning_rate": 4.929573802159143e-05, "loss": 78.4315, "step": 42040 }, { "epoch": 0.16988732087088967, "grad_norm": 607.1101684570312, "learning_rate": 4.929491508918849e-05, "loss": 83.3789, "step": 42050 }, { "epoch": 0.16992772213625731, "grad_norm": 527.4968872070312, "learning_rate": 4.929409168314368e-05, "loss": 48.889, "step": 42060 }, { "epoch": 0.16996812340162493, "grad_norm": 1297.30029296875, "learning_rate": 4.9293267803473046e-05, "loss": 85.3707, "step": 42070 }, { "epoch": 0.17000852466699257, "grad_norm": 993.5496215820312, "learning_rate": 4.9292443450192645e-05, "loss": 103.5595, "step": 42080 }, { "epoch": 0.1700489259323602, "grad_norm": 948.1973876953125, "learning_rate": 4.929161862331855e-05, "loss": 91.8254, "step": 42090 }, { "epoch": 0.17008932719772785, "grad_norm": 740.2516479492188, "learning_rate": 4.929079332286685e-05, "loss": 65.2068, "step": 42100 }, { "epoch": 0.17012972846309546, "grad_norm": 745.3865356445312, "learning_rate": 4.9289967548853627e-05, "loss": 49.6714, "step": 42110 }, { "epoch": 0.1701701297284631, "grad_norm": 1320.7667236328125, "learning_rate": 4.928914130129498e-05, "loss": 86.5712, "step": 42120 }, { "epoch": 0.17021053099383074, "grad_norm": 1176.5162353515625, "learning_rate": 4.928831458020702e-05, "loss": 113.8014, "step": 42130 }, { "epoch": 0.17025093225919835, "grad_norm": 566.5439453125, "learning_rate": 4.928748738560586e-05, "loss": 72.2683, "step": 42140 }, { "epoch": 0.170291333524566, "grad_norm": 1156.8316650390625, "learning_rate": 4.9286659717507635e-05, "loss": 76.2927, "step": 42150 }, { "epoch": 0.17033173478993363, "grad_norm": 719.6638793945312, "learning_rate": 4.9285831575928465e-05, "loss": 69.5816, "step": 42160 }, { "epoch": 0.17037213605530124, "grad_norm": 1295.44384765625, "learning_rate": 4.9285002960884515e-05, "loss": 84.5863, "step": 42170 }, { "epoch": 0.17041253732066888, "grad_norm": 1051.867431640625, "learning_rate": 4.9284173872391925e-05, "loss": 82.4616, "step": 42180 }, { "epoch": 0.17045293858603652, "grad_norm": 859.74609375, "learning_rate": 4.928334431046686e-05, "loss": 111.8195, "step": 42190 }, { "epoch": 0.17049333985140414, "grad_norm": 1075.9024658203125, "learning_rate": 4.92825142751255e-05, "loss": 85.3877, "step": 42200 }, { "epoch": 0.17053374111677178, "grad_norm": 1071.4013671875, "learning_rate": 4.9281683766384026e-05, "loss": 77.8994, "step": 42210 }, { "epoch": 0.17057414238213942, "grad_norm": 841.3255615234375, "learning_rate": 4.9280852784258624e-05, "loss": 104.7014, "step": 42220 }, { "epoch": 0.17061454364750703, "grad_norm": 1585.5380859375, "learning_rate": 4.928002132876549e-05, "loss": 73.6184, "step": 42230 }, { "epoch": 0.17065494491287467, "grad_norm": 1064.724609375, "learning_rate": 4.9279189399920844e-05, "loss": 80.9862, "step": 42240 }, { "epoch": 0.1706953461782423, "grad_norm": 1124.08642578125, "learning_rate": 4.9278356997740904e-05, "loss": 81.2137, "step": 42250 }, { "epoch": 0.17073574744360995, "grad_norm": 1296.80078125, "learning_rate": 4.9277524122241894e-05, "loss": 76.6494, "step": 42260 }, { "epoch": 0.17077614870897756, "grad_norm": 705.2342529296875, "learning_rate": 4.927669077344005e-05, "loss": 124.0301, "step": 42270 }, { "epoch": 0.1708165499743452, "grad_norm": 699.51123046875, "learning_rate": 4.927585695135162e-05, "loss": 90.7621, "step": 42280 }, { "epoch": 0.17085695123971284, "grad_norm": 1193.2637939453125, "learning_rate": 4.9275022655992864e-05, "loss": 77.3909, "step": 42290 }, { "epoch": 0.17089735250508045, "grad_norm": 1224.87744140625, "learning_rate": 4.927418788738004e-05, "loss": 90.4889, "step": 42300 }, { "epoch": 0.1709377537704481, "grad_norm": 566.4915771484375, "learning_rate": 4.927335264552943e-05, "loss": 76.954, "step": 42310 }, { "epoch": 0.17097815503581573, "grad_norm": 485.48248291015625, "learning_rate": 4.9272516930457314e-05, "loss": 85.5353, "step": 42320 }, { "epoch": 0.17101855630118334, "grad_norm": 510.9328918457031, "learning_rate": 4.927168074217998e-05, "loss": 78.9659, "step": 42330 }, { "epoch": 0.17105895756655098, "grad_norm": 542.828125, "learning_rate": 4.927084408071373e-05, "loss": 77.3036, "step": 42340 }, { "epoch": 0.17109935883191862, "grad_norm": 1816.97265625, "learning_rate": 4.927000694607489e-05, "loss": 89.3204, "step": 42350 }, { "epoch": 0.17113976009728624, "grad_norm": 558.980712890625, "learning_rate": 4.9269169338279766e-05, "loss": 97.5531, "step": 42360 }, { "epoch": 0.17118016136265388, "grad_norm": 2137.310791015625, "learning_rate": 4.9268331257344685e-05, "loss": 111.0955, "step": 42370 }, { "epoch": 0.17122056262802152, "grad_norm": 349.211181640625, "learning_rate": 4.9267492703286e-05, "loss": 116.3871, "step": 42380 }, { "epoch": 0.17126096389338913, "grad_norm": 1252.0584716796875, "learning_rate": 4.926665367612005e-05, "loss": 78.8891, "step": 42390 }, { "epoch": 0.17130136515875677, "grad_norm": 587.7830200195312, "learning_rate": 4.9265814175863186e-05, "loss": 90.0841, "step": 42400 }, { "epoch": 0.1713417664241244, "grad_norm": 1336.141845703125, "learning_rate": 4.926497420253179e-05, "loss": 99.6293, "step": 42410 }, { "epoch": 0.17138216768949205, "grad_norm": 1955.26171875, "learning_rate": 4.9264133756142224e-05, "loss": 74.8807, "step": 42420 }, { "epoch": 0.17142256895485966, "grad_norm": 1671.5946044921875, "learning_rate": 4.926329283671088e-05, "loss": 100.2369, "step": 42430 }, { "epoch": 0.1714629702202273, "grad_norm": 489.6711120605469, "learning_rate": 4.926245144425415e-05, "loss": 92.0346, "step": 42440 }, { "epoch": 0.17150337148559494, "grad_norm": 1527.400634765625, "learning_rate": 4.9261609578788435e-05, "loss": 104.6484, "step": 42450 }, { "epoch": 0.17154377275096255, "grad_norm": 1441.6905517578125, "learning_rate": 4.926076724033016e-05, "loss": 118.5627, "step": 42460 }, { "epoch": 0.1715841740163302, "grad_norm": 1650.8604736328125, "learning_rate": 4.9259924428895734e-05, "loss": 95.2558, "step": 42470 }, { "epoch": 0.17162457528169783, "grad_norm": 1053.914794921875, "learning_rate": 4.925908114450158e-05, "loss": 92.1593, "step": 42480 }, { "epoch": 0.17166497654706545, "grad_norm": 485.0592956542969, "learning_rate": 4.925823738716416e-05, "loss": 48.5927, "step": 42490 }, { "epoch": 0.17170537781243309, "grad_norm": 551.2293701171875, "learning_rate": 4.925739315689991e-05, "loss": 111.4056, "step": 42500 }, { "epoch": 0.17174577907780073, "grad_norm": 462.1259460449219, "learning_rate": 4.92565484537253e-05, "loss": 81.3713, "step": 42510 }, { "epoch": 0.17178618034316834, "grad_norm": 976.673095703125, "learning_rate": 4.925570327765678e-05, "loss": 97.2899, "step": 42520 }, { "epoch": 0.17182658160853598, "grad_norm": 1159.7283935546875, "learning_rate": 4.9254857628710846e-05, "loss": 72.4565, "step": 42530 }, { "epoch": 0.17186698287390362, "grad_norm": 886.2018432617188, "learning_rate": 4.9254011506903963e-05, "loss": 75.7661, "step": 42540 }, { "epoch": 0.17190738413927123, "grad_norm": 716.5491333007812, "learning_rate": 4.925316491225265e-05, "loss": 88.0013, "step": 42550 }, { "epoch": 0.17194778540463887, "grad_norm": 741.2266235351562, "learning_rate": 4.925231784477339e-05, "loss": 106.4336, "step": 42560 }, { "epoch": 0.1719881866700065, "grad_norm": 1328.2393798828125, "learning_rate": 4.9251470304482716e-05, "loss": 76.8043, "step": 42570 }, { "epoch": 0.17202858793537412, "grad_norm": 3505.359375, "learning_rate": 4.925062229139714e-05, "loss": 113.7022, "step": 42580 }, { "epoch": 0.17206898920074176, "grad_norm": 560.7538452148438, "learning_rate": 4.924977380553321e-05, "loss": 78.8128, "step": 42590 }, { "epoch": 0.1721093904661094, "grad_norm": 936.1580810546875, "learning_rate": 4.924892484690743e-05, "loss": 86.6639, "step": 42600 }, { "epoch": 0.17214979173147704, "grad_norm": 484.0994873046875, "learning_rate": 4.924807541553639e-05, "loss": 91.6706, "step": 42610 }, { "epoch": 0.17219019299684465, "grad_norm": 490.91448974609375, "learning_rate": 4.924722551143664e-05, "loss": 52.3336, "step": 42620 }, { "epoch": 0.1722305942622123, "grad_norm": 1924.5557861328125, "learning_rate": 4.924637513462474e-05, "loss": 98.4392, "step": 42630 }, { "epoch": 0.17227099552757993, "grad_norm": 738.7843017578125, "learning_rate": 4.9245524285117274e-05, "loss": 90.864, "step": 42640 }, { "epoch": 0.17231139679294755, "grad_norm": 805.4695434570312, "learning_rate": 4.924467296293083e-05, "loss": 102.7175, "step": 42650 }, { "epoch": 0.1723517980583152, "grad_norm": 710.0062866210938, "learning_rate": 4.924382116808201e-05, "loss": 107.4096, "step": 42660 }, { "epoch": 0.17239219932368283, "grad_norm": 703.4191284179688, "learning_rate": 4.924296890058741e-05, "loss": 82.6557, "step": 42670 }, { "epoch": 0.17243260058905044, "grad_norm": 1078.1011962890625, "learning_rate": 4.924211616046365e-05, "loss": 86.0516, "step": 42680 }, { "epoch": 0.17247300185441808, "grad_norm": 970.2047729492188, "learning_rate": 4.924126294772735e-05, "loss": 69.9036, "step": 42690 }, { "epoch": 0.17251340311978572, "grad_norm": 1400.3450927734375, "learning_rate": 4.924040926239515e-05, "loss": 103.4677, "step": 42700 }, { "epoch": 0.17255380438515333, "grad_norm": 1445.791259765625, "learning_rate": 4.9239555104483695e-05, "loss": 115.1777, "step": 42710 }, { "epoch": 0.17259420565052097, "grad_norm": 570.3566284179688, "learning_rate": 4.923870047400964e-05, "loss": 83.1902, "step": 42720 }, { "epoch": 0.1726346069158886, "grad_norm": 880.5896606445312, "learning_rate": 4.923784537098963e-05, "loss": 106.7852, "step": 42730 }, { "epoch": 0.17267500818125622, "grad_norm": 13018.2724609375, "learning_rate": 4.9236989795440346e-05, "loss": 108.5354, "step": 42740 }, { "epoch": 0.17271540944662386, "grad_norm": 1054.298583984375, "learning_rate": 4.9236133747378475e-05, "loss": 81.4467, "step": 42750 }, { "epoch": 0.1727558107119915, "grad_norm": 1258.8382568359375, "learning_rate": 4.9235277226820695e-05, "loss": 115.5568, "step": 42760 }, { "epoch": 0.17279621197735914, "grad_norm": 955.5006103515625, "learning_rate": 4.923442023378371e-05, "loss": 104.4606, "step": 42770 }, { "epoch": 0.17283661324272676, "grad_norm": 797.49951171875, "learning_rate": 4.9233562768284225e-05, "loss": 75.7812, "step": 42780 }, { "epoch": 0.1728770145080944, "grad_norm": 550.285400390625, "learning_rate": 4.923270483033896e-05, "loss": 90.7212, "step": 42790 }, { "epoch": 0.17291741577346204, "grad_norm": 828.7791137695312, "learning_rate": 4.923184641996463e-05, "loss": 66.7497, "step": 42800 }, { "epoch": 0.17295781703882965, "grad_norm": 862.7196044921875, "learning_rate": 4.923098753717798e-05, "loss": 74.5591, "step": 42810 }, { "epoch": 0.1729982183041973, "grad_norm": 1932.47802734375, "learning_rate": 4.923012818199576e-05, "loss": 113.689, "step": 42820 }, { "epoch": 0.17303861956956493, "grad_norm": 467.0488586425781, "learning_rate": 4.922926835443472e-05, "loss": 80.5391, "step": 42830 }, { "epoch": 0.17307902083493254, "grad_norm": 1409.27734375, "learning_rate": 4.922840805451161e-05, "loss": 141.5631, "step": 42840 }, { "epoch": 0.17311942210030018, "grad_norm": 470.03057861328125, "learning_rate": 4.9227547282243214e-05, "loss": 55.4408, "step": 42850 }, { "epoch": 0.17315982336566782, "grad_norm": 399.5194091796875, "learning_rate": 4.9226686037646314e-05, "loss": 95.3332, "step": 42860 }, { "epoch": 0.17320022463103543, "grad_norm": 1028.5482177734375, "learning_rate": 4.92258243207377e-05, "loss": 84.2015, "step": 42870 }, { "epoch": 0.17324062589640307, "grad_norm": 936.9313354492188, "learning_rate": 4.922496213153416e-05, "loss": 85.3661, "step": 42880 }, { "epoch": 0.1732810271617707, "grad_norm": 614.690673828125, "learning_rate": 4.922409947005251e-05, "loss": 49.2994, "step": 42890 }, { "epoch": 0.17332142842713832, "grad_norm": 845.6972045898438, "learning_rate": 4.922323633630958e-05, "loss": 44.1013, "step": 42900 }, { "epoch": 0.17336182969250596, "grad_norm": 1084.47900390625, "learning_rate": 4.9222372730322176e-05, "loss": 131.938, "step": 42910 }, { "epoch": 0.1734022309578736, "grad_norm": 1015.8482666015625, "learning_rate": 4.922150865210715e-05, "loss": 90.8332, "step": 42920 }, { "epoch": 0.17344263222324124, "grad_norm": 548.1792602539062, "learning_rate": 4.922064410168134e-05, "loss": 94.3187, "step": 42930 }, { "epoch": 0.17348303348860886, "grad_norm": 625.8770751953125, "learning_rate": 4.92197790790616e-05, "loss": 86.6768, "step": 42940 }, { "epoch": 0.1735234347539765, "grad_norm": 0.0, "learning_rate": 4.9218913584264814e-05, "loss": 86.3633, "step": 42950 }, { "epoch": 0.17356383601934414, "grad_norm": 646.9606323242188, "learning_rate": 4.9218047617307824e-05, "loss": 88.7674, "step": 42960 }, { "epoch": 0.17360423728471175, "grad_norm": 249.1079864501953, "learning_rate": 4.9217181178207535e-05, "loss": 61.6128, "step": 42970 }, { "epoch": 0.1736446385500794, "grad_norm": 543.1034545898438, "learning_rate": 4.9216314266980824e-05, "loss": 79.8014, "step": 42980 }, { "epoch": 0.17368503981544703, "grad_norm": 662.4139404296875, "learning_rate": 4.921544688364461e-05, "loss": 95.9196, "step": 42990 }, { "epoch": 0.17372544108081464, "grad_norm": 422.02825927734375, "learning_rate": 4.9214579028215776e-05, "loss": 91.8231, "step": 43000 }, { "epoch": 0.17376584234618228, "grad_norm": 891.07958984375, "learning_rate": 4.921371070071127e-05, "loss": 74.39, "step": 43010 }, { "epoch": 0.17380624361154992, "grad_norm": 1176.2237548828125, "learning_rate": 4.9212841901148e-05, "loss": 90.9235, "step": 43020 }, { "epoch": 0.17384664487691753, "grad_norm": 4489.8310546875, "learning_rate": 4.9211972629542926e-05, "loss": 124.854, "step": 43030 }, { "epoch": 0.17388704614228517, "grad_norm": 639.3281860351562, "learning_rate": 4.9211102885912965e-05, "loss": 76.7055, "step": 43040 }, { "epoch": 0.1739274474076528, "grad_norm": 1555.4083251953125, "learning_rate": 4.9210232670275094e-05, "loss": 110.0548, "step": 43050 }, { "epoch": 0.17396784867302043, "grad_norm": 727.7996826171875, "learning_rate": 4.920936198264627e-05, "loss": 95.9471, "step": 43060 }, { "epoch": 0.17400824993838807, "grad_norm": 1188.0120849609375, "learning_rate": 4.920849082304347e-05, "loss": 113.6858, "step": 43070 }, { "epoch": 0.1740486512037557, "grad_norm": 1449.6580810546875, "learning_rate": 4.920761919148369e-05, "loss": 94.3256, "step": 43080 }, { "epoch": 0.17408905246912335, "grad_norm": 454.728759765625, "learning_rate": 4.9206747087983894e-05, "loss": 78.3794, "step": 43090 }, { "epoch": 0.17412945373449096, "grad_norm": 815.9066162109375, "learning_rate": 4.9205874512561115e-05, "loss": 89.8247, "step": 43100 }, { "epoch": 0.1741698549998586, "grad_norm": 759.9569702148438, "learning_rate": 4.920500146523234e-05, "loss": 63.8019, "step": 43110 }, { "epoch": 0.17421025626522624, "grad_norm": 2164.500732421875, "learning_rate": 4.920412794601461e-05, "loss": 100.4632, "step": 43120 }, { "epoch": 0.17425065753059385, "grad_norm": 662.5512084960938, "learning_rate": 4.920325395492493e-05, "loss": 107.611, "step": 43130 }, { "epoch": 0.1742910587959615, "grad_norm": 422.92645263671875, "learning_rate": 4.920237949198037e-05, "loss": 87.6762, "step": 43140 }, { "epoch": 0.17433146006132913, "grad_norm": 915.8494873046875, "learning_rate": 4.9201504557197955e-05, "loss": 66.672, "step": 43150 }, { "epoch": 0.17437186132669674, "grad_norm": 988.4778442382812, "learning_rate": 4.9200629150594744e-05, "loss": 91.4257, "step": 43160 }, { "epoch": 0.17441226259206438, "grad_norm": 794.6514282226562, "learning_rate": 4.919975327218781e-05, "loss": 87.8521, "step": 43170 }, { "epoch": 0.17445266385743202, "grad_norm": 884.67578125, "learning_rate": 4.919887692199423e-05, "loss": 74.1221, "step": 43180 }, { "epoch": 0.17449306512279963, "grad_norm": 764.0117797851562, "learning_rate": 4.919800010003108e-05, "loss": 84.2991, "step": 43190 }, { "epoch": 0.17453346638816727, "grad_norm": 1031.7420654296875, "learning_rate": 4.919712280631547e-05, "loss": 116.3033, "step": 43200 }, { "epoch": 0.17457386765353491, "grad_norm": 593.7903442382812, "learning_rate": 4.9196245040864486e-05, "loss": 81.53, "step": 43210 }, { "epoch": 0.17461426891890253, "grad_norm": 929.322021484375, "learning_rate": 4.919536680369525e-05, "loss": 87.4219, "step": 43220 }, { "epoch": 0.17465467018427017, "grad_norm": 3907.416015625, "learning_rate": 4.919448809482489e-05, "loss": 81.7991, "step": 43230 }, { "epoch": 0.1746950714496378, "grad_norm": 893.3704223632812, "learning_rate": 4.9193608914270515e-05, "loss": 96.2677, "step": 43240 }, { "epoch": 0.17473547271500545, "grad_norm": 659.1729125976562, "learning_rate": 4.919272926204929e-05, "loss": 98.551, "step": 43250 }, { "epoch": 0.17477587398037306, "grad_norm": 995.1451416015625, "learning_rate": 4.9191849138178334e-05, "loss": 71.9214, "step": 43260 }, { "epoch": 0.1748162752457407, "grad_norm": 1777.868896484375, "learning_rate": 4.919096854267484e-05, "loss": 95.7281, "step": 43270 }, { "epoch": 0.17485667651110834, "grad_norm": 779.3260498046875, "learning_rate": 4.9190087475555955e-05, "loss": 104.3814, "step": 43280 }, { "epoch": 0.17489707777647595, "grad_norm": 939.8629760742188, "learning_rate": 4.9189205936838864e-05, "loss": 71.219, "step": 43290 }, { "epoch": 0.1749374790418436, "grad_norm": 808.994140625, "learning_rate": 4.9188323926540746e-05, "loss": 63.1995, "step": 43300 }, { "epoch": 0.17497788030721123, "grad_norm": 1255.6544189453125, "learning_rate": 4.918744144467881e-05, "loss": 73.3233, "step": 43310 }, { "epoch": 0.17501828157257884, "grad_norm": 1129.1463623046875, "learning_rate": 4.918655849127024e-05, "loss": 80.2645, "step": 43320 }, { "epoch": 0.17505868283794648, "grad_norm": 0.0, "learning_rate": 4.918567506633226e-05, "loss": 107.1662, "step": 43330 }, { "epoch": 0.17509908410331412, "grad_norm": 219.4386444091797, "learning_rate": 4.91847911698821e-05, "loss": 66.5381, "step": 43340 }, { "epoch": 0.17513948536868174, "grad_norm": 432.4940185546875, "learning_rate": 4.918390680193698e-05, "loss": 73.7235, "step": 43350 }, { "epoch": 0.17517988663404938, "grad_norm": 645.31494140625, "learning_rate": 4.918302196251415e-05, "loss": 110.384, "step": 43360 }, { "epoch": 0.17522028789941702, "grad_norm": 3361.419677734375, "learning_rate": 4.918213665163085e-05, "loss": 122.9304, "step": 43370 }, { "epoch": 0.17526068916478463, "grad_norm": 898.6963500976562, "learning_rate": 4.918125086930435e-05, "loss": 79.2204, "step": 43380 }, { "epoch": 0.17530109043015227, "grad_norm": 982.2764892578125, "learning_rate": 4.918036461555192e-05, "loss": 87.4724, "step": 43390 }, { "epoch": 0.1753414916955199, "grad_norm": 423.7979431152344, "learning_rate": 4.9179477890390825e-05, "loss": 96.8271, "step": 43400 }, { "epoch": 0.17538189296088755, "grad_norm": 994.8875732421875, "learning_rate": 4.917859069383836e-05, "loss": 65.6866, "step": 43410 }, { "epoch": 0.17542229422625516, "grad_norm": 488.28973388671875, "learning_rate": 4.9177703025911825e-05, "loss": 67.0737, "step": 43420 }, { "epoch": 0.1754626954916228, "grad_norm": 927.5771484375, "learning_rate": 4.917681488662852e-05, "loss": 96.7948, "step": 43430 }, { "epoch": 0.17550309675699044, "grad_norm": 1078.08837890625, "learning_rate": 4.917592627600577e-05, "loss": 77.3857, "step": 43440 }, { "epoch": 0.17554349802235805, "grad_norm": 1068.2432861328125, "learning_rate": 4.917503719406088e-05, "loss": 82.1481, "step": 43450 }, { "epoch": 0.1755838992877257, "grad_norm": 622.7982177734375, "learning_rate": 4.91741476408112e-05, "loss": 88.3467, "step": 43460 }, { "epoch": 0.17562430055309333, "grad_norm": 529.6117553710938, "learning_rate": 4.917325761627406e-05, "loss": 94.7772, "step": 43470 }, { "epoch": 0.17566470181846094, "grad_norm": 527.8485107421875, "learning_rate": 4.917236712046682e-05, "loss": 49.5305, "step": 43480 }, { "epoch": 0.17570510308382858, "grad_norm": 714.2109375, "learning_rate": 4.917147615340684e-05, "loss": 84.2201, "step": 43490 }, { "epoch": 0.17574550434919622, "grad_norm": 836.039794921875, "learning_rate": 4.917058471511149e-05, "loss": 73.1889, "step": 43500 }, { "epoch": 0.17578590561456384, "grad_norm": 1239.9703369140625, "learning_rate": 4.9169692805598145e-05, "loss": 94.0238, "step": 43510 }, { "epoch": 0.17582630687993148, "grad_norm": 447.8238830566406, "learning_rate": 4.916880042488419e-05, "loss": 56.3747, "step": 43520 }, { "epoch": 0.17586670814529912, "grad_norm": 646.1732788085938, "learning_rate": 4.916790757298704e-05, "loss": 73.1238, "step": 43530 }, { "epoch": 0.17590710941066673, "grad_norm": 2080.33984375, "learning_rate": 4.9167014249924075e-05, "loss": 111.906, "step": 43540 }, { "epoch": 0.17594751067603437, "grad_norm": 500.1817932128906, "learning_rate": 4.9166120455712736e-05, "loss": 55.3391, "step": 43550 }, { "epoch": 0.175987911941402, "grad_norm": 495.2890319824219, "learning_rate": 4.916522619037043e-05, "loss": 111.2715, "step": 43560 }, { "epoch": 0.17602831320676965, "grad_norm": 539.0352783203125, "learning_rate": 4.91643314539146e-05, "loss": 72.2024, "step": 43570 }, { "epoch": 0.17606871447213726, "grad_norm": 1020.1885375976562, "learning_rate": 4.916343624636269e-05, "loss": 133.4901, "step": 43580 }, { "epoch": 0.1761091157375049, "grad_norm": 463.427978515625, "learning_rate": 4.916254056773215e-05, "loss": 79.8422, "step": 43590 }, { "epoch": 0.17614951700287254, "grad_norm": 1095.344970703125, "learning_rate": 4.916164441804044e-05, "loss": 79.1968, "step": 43600 }, { "epoch": 0.17618991826824015, "grad_norm": 1455.2132568359375, "learning_rate": 4.916074779730504e-05, "loss": 79.4097, "step": 43610 }, { "epoch": 0.1762303195336078, "grad_norm": 854.4801025390625, "learning_rate": 4.915985070554341e-05, "loss": 108.5478, "step": 43620 }, { "epoch": 0.17627072079897543, "grad_norm": 1308.75341796875, "learning_rate": 4.915895314277306e-05, "loss": 64.448, "step": 43630 }, { "epoch": 0.17631112206434305, "grad_norm": 755.5027465820312, "learning_rate": 4.915805510901148e-05, "loss": 99.9466, "step": 43640 }, { "epoch": 0.17635152332971069, "grad_norm": 3085.270751953125, "learning_rate": 4.9157156604276175e-05, "loss": 99.849, "step": 43650 }, { "epoch": 0.17639192459507833, "grad_norm": 1174.489013671875, "learning_rate": 4.915625762858467e-05, "loss": 71.8641, "step": 43660 }, { "epoch": 0.17643232586044594, "grad_norm": 1123.778076171875, "learning_rate": 4.9155358181954494e-05, "loss": 92.842, "step": 43670 }, { "epoch": 0.17647272712581358, "grad_norm": 424.877197265625, "learning_rate": 4.915445826440316e-05, "loss": 82.3663, "step": 43680 }, { "epoch": 0.17651312839118122, "grad_norm": 2889.92041015625, "learning_rate": 4.915355787594823e-05, "loss": 81.4271, "step": 43690 }, { "epoch": 0.17655352965654883, "grad_norm": 5192.76611328125, "learning_rate": 4.915265701660726e-05, "loss": 80.7691, "step": 43700 }, { "epoch": 0.17659393092191647, "grad_norm": 995.813720703125, "learning_rate": 4.9151755686397793e-05, "loss": 75.4785, "step": 43710 }, { "epoch": 0.1766343321872841, "grad_norm": 648.2084350585938, "learning_rate": 4.9150853885337426e-05, "loss": 93.2939, "step": 43720 }, { "epoch": 0.17667473345265175, "grad_norm": 601.667724609375, "learning_rate": 4.914995161344373e-05, "loss": 68.4272, "step": 43730 }, { "epoch": 0.17671513471801936, "grad_norm": 553.5189819335938, "learning_rate": 4.9149048870734296e-05, "loss": 75.1263, "step": 43740 }, { "epoch": 0.176755535983387, "grad_norm": 816.7628784179688, "learning_rate": 4.914814565722671e-05, "loss": 100.5337, "step": 43750 }, { "epoch": 0.17679593724875464, "grad_norm": 1510.35302734375, "learning_rate": 4.9147241972938596e-05, "loss": 94.5766, "step": 43760 }, { "epoch": 0.17683633851412225, "grad_norm": 834.1815795898438, "learning_rate": 4.9146337817887575e-05, "loss": 70.5322, "step": 43770 }, { "epoch": 0.1768767397794899, "grad_norm": 1132.84619140625, "learning_rate": 4.914543319209126e-05, "loss": 74.2719, "step": 43780 }, { "epoch": 0.17691714104485753, "grad_norm": 0.0, "learning_rate": 4.91445280955673e-05, "loss": 91.3708, "step": 43790 }, { "epoch": 0.17695754231022515, "grad_norm": 347.47705078125, "learning_rate": 4.914362252833332e-05, "loss": 70.5821, "step": 43800 }, { "epoch": 0.1769979435755928, "grad_norm": 820.30419921875, "learning_rate": 4.9142716490407e-05, "loss": 122.0043, "step": 43810 }, { "epoch": 0.17703834484096043, "grad_norm": 910.3120727539062, "learning_rate": 4.9141809981805995e-05, "loss": 86.2672, "step": 43820 }, { "epoch": 0.17707874610632804, "grad_norm": 536.81982421875, "learning_rate": 4.914090300254798e-05, "loss": 67.7232, "step": 43830 }, { "epoch": 0.17711914737169568, "grad_norm": 799.4282836914062, "learning_rate": 4.913999555265062e-05, "loss": 66.7534, "step": 43840 }, { "epoch": 0.17715954863706332, "grad_norm": 978.2288208007812, "learning_rate": 4.913908763213162e-05, "loss": 97.7384, "step": 43850 }, { "epoch": 0.17719994990243093, "grad_norm": 1155.240478515625, "learning_rate": 4.913817924100869e-05, "loss": 83.567, "step": 43860 }, { "epoch": 0.17724035116779857, "grad_norm": 4742.7177734375, "learning_rate": 4.913727037929952e-05, "loss": 106.2576, "step": 43870 }, { "epoch": 0.1772807524331662, "grad_norm": 725.6898193359375, "learning_rate": 4.913636104702183e-05, "loss": 95.1339, "step": 43880 }, { "epoch": 0.17732115369853385, "grad_norm": 933.677978515625, "learning_rate": 4.913545124419336e-05, "loss": 77.6824, "step": 43890 }, { "epoch": 0.17736155496390146, "grad_norm": 484.01092529296875, "learning_rate": 4.913454097083185e-05, "loss": 95.2952, "step": 43900 }, { "epoch": 0.1774019562292691, "grad_norm": 4791.2919921875, "learning_rate": 4.9133630226955026e-05, "loss": 66.6884, "step": 43910 }, { "epoch": 0.17744235749463674, "grad_norm": 750.3236694335938, "learning_rate": 4.913271901258067e-05, "loss": 71.9357, "step": 43920 }, { "epoch": 0.17748275876000436, "grad_norm": 738.4391479492188, "learning_rate": 4.913180732772652e-05, "loss": 73.5072, "step": 43930 }, { "epoch": 0.177523160025372, "grad_norm": 3437.857666015625, "learning_rate": 4.913089517241037e-05, "loss": 102.427, "step": 43940 }, { "epoch": 0.17756356129073964, "grad_norm": 790.71044921875, "learning_rate": 4.912998254665e-05, "loss": 75.0368, "step": 43950 }, { "epoch": 0.17760396255610725, "grad_norm": 417.4804382324219, "learning_rate": 4.9129069450463186e-05, "loss": 97.0161, "step": 43960 }, { "epoch": 0.1776443638214749, "grad_norm": 988.0493774414062, "learning_rate": 4.912815588386775e-05, "loss": 67.218, "step": 43970 }, { "epoch": 0.17768476508684253, "grad_norm": 867.89404296875, "learning_rate": 4.912724184688149e-05, "loss": 70.8273, "step": 43980 }, { "epoch": 0.17772516635221014, "grad_norm": 678.370849609375, "learning_rate": 4.9126327339522225e-05, "loss": 79.2882, "step": 43990 }, { "epoch": 0.17776556761757778, "grad_norm": 1945.7412109375, "learning_rate": 4.912541236180779e-05, "loss": 102.0449, "step": 44000 }, { "epoch": 0.17780596888294542, "grad_norm": 874.1896362304688, "learning_rate": 4.912449691375602e-05, "loss": 73.3128, "step": 44010 }, { "epoch": 0.17784637014831303, "grad_norm": 922.020751953125, "learning_rate": 4.912358099538476e-05, "loss": 105.76, "step": 44020 }, { "epoch": 0.17788677141368067, "grad_norm": 295.5516662597656, "learning_rate": 4.912266460671187e-05, "loss": 71.2611, "step": 44030 }, { "epoch": 0.1779271726790483, "grad_norm": 524.7007446289062, "learning_rate": 4.912174774775522e-05, "loss": 132.5543, "step": 44040 }, { "epoch": 0.17796757394441595, "grad_norm": 1069.502685546875, "learning_rate": 4.912083041853267e-05, "loss": 49.895, "step": 44050 }, { "epoch": 0.17800797520978356, "grad_norm": 1046.591796875, "learning_rate": 4.911991261906212e-05, "loss": 121.5496, "step": 44060 }, { "epoch": 0.1780483764751512, "grad_norm": 1215.2974853515625, "learning_rate": 4.9118994349361455e-05, "loss": 67.8808, "step": 44070 }, { "epoch": 0.17808877774051884, "grad_norm": 419.9411315917969, "learning_rate": 4.911807560944858e-05, "loss": 94.9882, "step": 44080 }, { "epoch": 0.17812917900588646, "grad_norm": 555.5469360351562, "learning_rate": 4.911715639934139e-05, "loss": 75.9314, "step": 44090 }, { "epoch": 0.1781695802712541, "grad_norm": 961.8922729492188, "learning_rate": 4.911623671905784e-05, "loss": 281.5219, "step": 44100 }, { "epoch": 0.17820998153662174, "grad_norm": 443.88385009765625, "learning_rate": 4.9115316568615824e-05, "loss": 72.1933, "step": 44110 }, { "epoch": 0.17825038280198935, "grad_norm": 670.9072265625, "learning_rate": 4.9114395948033296e-05, "loss": 59.2318, "step": 44120 }, { "epoch": 0.178290784067357, "grad_norm": 951.0693359375, "learning_rate": 4.911347485732821e-05, "loss": 92.5108, "step": 44130 }, { "epoch": 0.17833118533272463, "grad_norm": 742.0170288085938, "learning_rate": 4.911255329651851e-05, "loss": 81.8882, "step": 44140 }, { "epoch": 0.17837158659809224, "grad_norm": 1449.71826171875, "learning_rate": 4.9111631265622184e-05, "loss": 99.7724, "step": 44150 }, { "epoch": 0.17841198786345988, "grad_norm": 884.3943481445312, "learning_rate": 4.911070876465719e-05, "loss": 77.8581, "step": 44160 }, { "epoch": 0.17845238912882752, "grad_norm": 930.3131103515625, "learning_rate": 4.910978579364151e-05, "loss": 75.9937, "step": 44170 }, { "epoch": 0.17849279039419513, "grad_norm": 741.734130859375, "learning_rate": 4.910886235259314e-05, "loss": 49.2752, "step": 44180 }, { "epoch": 0.17853319165956277, "grad_norm": 1187.31982421875, "learning_rate": 4.910793844153009e-05, "loss": 72.5356, "step": 44190 }, { "epoch": 0.1785735929249304, "grad_norm": 1084.1396484375, "learning_rate": 4.910701406047037e-05, "loss": 54.3127, "step": 44200 }, { "epoch": 0.17861399419029805, "grad_norm": 1090.561767578125, "learning_rate": 4.910608920943199e-05, "loss": 89.8081, "step": 44210 }, { "epoch": 0.17865439545566567, "grad_norm": 926.6570434570312, "learning_rate": 4.9105163888433e-05, "loss": 102.4193, "step": 44220 }, { "epoch": 0.1786947967210333, "grad_norm": 823.6111450195312, "learning_rate": 4.910423809749143e-05, "loss": 71.3021, "step": 44230 }, { "epoch": 0.17873519798640095, "grad_norm": 1753.1083984375, "learning_rate": 4.910331183662533e-05, "loss": 73.3729, "step": 44240 }, { "epoch": 0.17877559925176856, "grad_norm": 1302.8763427734375, "learning_rate": 4.910238510585276e-05, "loss": 73.3519, "step": 44250 }, { "epoch": 0.1788160005171362, "grad_norm": 1096.9420166015625, "learning_rate": 4.9101457905191774e-05, "loss": 97.6709, "step": 44260 }, { "epoch": 0.17885640178250384, "grad_norm": 1942.8685302734375, "learning_rate": 4.910053023466046e-05, "loss": 144.2541, "step": 44270 }, { "epoch": 0.17889680304787145, "grad_norm": 784.9008178710938, "learning_rate": 4.90996020942769e-05, "loss": 80.2861, "step": 44280 }, { "epoch": 0.1789372043132391, "grad_norm": 1027.322509765625, "learning_rate": 4.9098673484059195e-05, "loss": 69.3643, "step": 44290 }, { "epoch": 0.17897760557860673, "grad_norm": 1583.6845703125, "learning_rate": 4.9097744404025435e-05, "loss": 92.1036, "step": 44300 }, { "epoch": 0.17901800684397434, "grad_norm": 735.26806640625, "learning_rate": 4.909681485419375e-05, "loss": 60.5722, "step": 44310 }, { "epoch": 0.17905840810934198, "grad_norm": 0.0, "learning_rate": 4.909588483458225e-05, "loss": 65.9718, "step": 44320 }, { "epoch": 0.17909880937470962, "grad_norm": 676.1497802734375, "learning_rate": 4.9094954345209075e-05, "loss": 85.0213, "step": 44330 }, { "epoch": 0.17913921064007723, "grad_norm": 873.891357421875, "learning_rate": 4.909402338609236e-05, "loss": 77.8588, "step": 44340 }, { "epoch": 0.17917961190544487, "grad_norm": 693.5972900390625, "learning_rate": 4.909309195725025e-05, "loss": 124.6474, "step": 44350 }, { "epoch": 0.17922001317081251, "grad_norm": 460.6761779785156, "learning_rate": 4.90921600587009e-05, "loss": 87.2825, "step": 44360 }, { "epoch": 0.17926041443618015, "grad_norm": 1261.747314453125, "learning_rate": 4.90912276904625e-05, "loss": 70.8723, "step": 44370 }, { "epoch": 0.17930081570154777, "grad_norm": 538.2672729492188, "learning_rate": 4.909029485255321e-05, "loss": 67.4372, "step": 44380 }, { "epoch": 0.1793412169669154, "grad_norm": 1068.8714599609375, "learning_rate": 4.9089361544991215e-05, "loss": 68.0589, "step": 44390 }, { "epoch": 0.17938161823228305, "grad_norm": 1119.72314453125, "learning_rate": 4.908842776779472e-05, "loss": 68.3467, "step": 44400 }, { "epoch": 0.17942201949765066, "grad_norm": 568.3760986328125, "learning_rate": 4.908749352098192e-05, "loss": 73.3346, "step": 44410 }, { "epoch": 0.1794624207630183, "grad_norm": 1218.2125244140625, "learning_rate": 4.9086558804571034e-05, "loss": 109.4271, "step": 44420 }, { "epoch": 0.17950282202838594, "grad_norm": 582.5836181640625, "learning_rate": 4.908562361858028e-05, "loss": 72.2208, "step": 44430 }, { "epoch": 0.17954322329375355, "grad_norm": 1486.9588623046875, "learning_rate": 4.9084687963027894e-05, "loss": 87.6006, "step": 44440 }, { "epoch": 0.1795836245591212, "grad_norm": 646.9586791992188, "learning_rate": 4.9083751837932126e-05, "loss": 84.1968, "step": 44450 }, { "epoch": 0.17962402582448883, "grad_norm": 665.9592895507812, "learning_rate": 4.908281524331121e-05, "loss": 61.7497, "step": 44460 }, { "epoch": 0.17966442708985644, "grad_norm": 514.8817749023438, "learning_rate": 4.908187817918341e-05, "loss": 71.9887, "step": 44470 }, { "epoch": 0.17970482835522408, "grad_norm": 883.0592651367188, "learning_rate": 4.9080940645567e-05, "loss": 72.1492, "step": 44480 }, { "epoch": 0.17974522962059172, "grad_norm": 1375.27001953125, "learning_rate": 4.908000264248025e-05, "loss": 96.8598, "step": 44490 }, { "epoch": 0.17978563088595934, "grad_norm": 645.9873046875, "learning_rate": 4.907906416994146e-05, "loss": 64.9413, "step": 44500 }, { "epoch": 0.17982603215132698, "grad_norm": 1111.731201171875, "learning_rate": 4.9078125227968904e-05, "loss": 86.5026, "step": 44510 }, { "epoch": 0.17986643341669462, "grad_norm": 934.5158081054688, "learning_rate": 4.907718581658091e-05, "loss": 110.799, "step": 44520 }, { "epoch": 0.17990683468206226, "grad_norm": 920.4935913085938, "learning_rate": 4.9076245935795786e-05, "loss": 62.2967, "step": 44530 }, { "epoch": 0.17994723594742987, "grad_norm": 892.095947265625, "learning_rate": 4.9075305585631845e-05, "loss": 82.56, "step": 44540 }, { "epoch": 0.1799876372127975, "grad_norm": 1195.0810546875, "learning_rate": 4.907436476610743e-05, "loss": 48.5051, "step": 44550 }, { "epoch": 0.18002803847816515, "grad_norm": 768.6405029296875, "learning_rate": 4.907342347724087e-05, "loss": 67.0793, "step": 44560 }, { "epoch": 0.18006843974353276, "grad_norm": 1605.87060546875, "learning_rate": 4.907248171905055e-05, "loss": 114.835, "step": 44570 }, { "epoch": 0.1801088410089004, "grad_norm": 718.5313720703125, "learning_rate": 4.907153949155479e-05, "loss": 100.6462, "step": 44580 }, { "epoch": 0.18014924227426804, "grad_norm": 430.45037841796875, "learning_rate": 4.907059679477197e-05, "loss": 67.9887, "step": 44590 }, { "epoch": 0.18018964353963565, "grad_norm": 234.9417266845703, "learning_rate": 4.906965362872047e-05, "loss": 77.6404, "step": 44600 }, { "epoch": 0.1802300448050033, "grad_norm": 911.5045776367188, "learning_rate": 4.906870999341869e-05, "loss": 88.723, "step": 44610 }, { "epoch": 0.18027044607037093, "grad_norm": 772.7266845703125, "learning_rate": 4.906776588888502e-05, "loss": 93.7803, "step": 44620 }, { "epoch": 0.18031084733573854, "grad_norm": 1137.0054931640625, "learning_rate": 4.9066821315137856e-05, "loss": 110.4208, "step": 44630 }, { "epoch": 0.18035124860110618, "grad_norm": 898.4769287109375, "learning_rate": 4.906587627219562e-05, "loss": 73.2961, "step": 44640 }, { "epoch": 0.18039164986647382, "grad_norm": 958.9808959960938, "learning_rate": 4.906493076007674e-05, "loss": 57.1335, "step": 44650 }, { "epoch": 0.18043205113184144, "grad_norm": 938.7362670898438, "learning_rate": 4.9063984778799645e-05, "loss": 101.8361, "step": 44660 }, { "epoch": 0.18047245239720908, "grad_norm": 515.5078735351562, "learning_rate": 4.906303832838278e-05, "loss": 54.5177, "step": 44670 }, { "epoch": 0.18051285366257672, "grad_norm": 531.8738403320312, "learning_rate": 4.906209140884459e-05, "loss": 82.9638, "step": 44680 }, { "epoch": 0.18055325492794436, "grad_norm": 1305.5316162109375, "learning_rate": 4.906114402020354e-05, "loss": 95.9825, "step": 44690 }, { "epoch": 0.18059365619331197, "grad_norm": 777.7756958007812, "learning_rate": 4.90601961624781e-05, "loss": 81.2786, "step": 44700 }, { "epoch": 0.1806340574586796, "grad_norm": 1133.5772705078125, "learning_rate": 4.905924783568675e-05, "loss": 85.8932, "step": 44710 }, { "epoch": 0.18067445872404725, "grad_norm": 599.99951171875, "learning_rate": 4.9058299039847975e-05, "loss": 101.0232, "step": 44720 }, { "epoch": 0.18071485998941486, "grad_norm": 675.3793334960938, "learning_rate": 4.9057349774980275e-05, "loss": 56.9, "step": 44730 }, { "epoch": 0.1807552612547825, "grad_norm": 961.1941528320312, "learning_rate": 4.905640004110216e-05, "loss": 84.8394, "step": 44740 }, { "epoch": 0.18079566252015014, "grad_norm": 1229.0406494140625, "learning_rate": 4.905544983823214e-05, "loss": 65.7035, "step": 44750 }, { "epoch": 0.18083606378551775, "grad_norm": 1367.959228515625, "learning_rate": 4.905449916638873e-05, "loss": 79.8659, "step": 44760 }, { "epoch": 0.1808764650508854, "grad_norm": 705.6975708007812, "learning_rate": 4.905354802559049e-05, "loss": 109.2609, "step": 44770 }, { "epoch": 0.18091686631625303, "grad_norm": 738.3800048828125, "learning_rate": 4.905259641585594e-05, "loss": 45.9301, "step": 44780 }, { "epoch": 0.18095726758162065, "grad_norm": 746.9801025390625, "learning_rate": 4.905164433720364e-05, "loss": 65.5784, "step": 44790 }, { "epoch": 0.18099766884698829, "grad_norm": 546.0762329101562, "learning_rate": 4.905069178965215e-05, "loss": 97.4426, "step": 44800 }, { "epoch": 0.18103807011235593, "grad_norm": 714.2156372070312, "learning_rate": 4.9049738773220046e-05, "loss": 83.1105, "step": 44810 }, { "epoch": 0.18107847137772354, "grad_norm": 862.12939453125, "learning_rate": 4.9048785287925895e-05, "loss": 56.6679, "step": 44820 }, { "epoch": 0.18111887264309118, "grad_norm": 233.91058349609375, "learning_rate": 4.9047831333788295e-05, "loss": 82.0974, "step": 44830 }, { "epoch": 0.18115927390845882, "grad_norm": 820.8073120117188, "learning_rate": 4.904687691082585e-05, "loss": 72.0476, "step": 44840 }, { "epoch": 0.18119967517382646, "grad_norm": 1259.7579345703125, "learning_rate": 4.9045922019057155e-05, "loss": 83.561, "step": 44850 }, { "epoch": 0.18124007643919407, "grad_norm": 879.6514282226562, "learning_rate": 4.904496665850084e-05, "loss": 82.4675, "step": 44860 }, { "epoch": 0.1812804777045617, "grad_norm": 684.139892578125, "learning_rate": 4.90440108291755e-05, "loss": 90.4461, "step": 44870 }, { "epoch": 0.18132087896992935, "grad_norm": 2262.56591796875, "learning_rate": 4.904305453109981e-05, "loss": 53.0787, "step": 44880 }, { "epoch": 0.18136128023529696, "grad_norm": 1157.9019775390625, "learning_rate": 4.9042097764292385e-05, "loss": 83.7821, "step": 44890 }, { "epoch": 0.1814016815006646, "grad_norm": 945.2867431640625, "learning_rate": 4.904114052877188e-05, "loss": 81.2998, "step": 44900 }, { "epoch": 0.18144208276603224, "grad_norm": 1119.50048828125, "learning_rate": 4.904018282455697e-05, "loss": 49.5603, "step": 44910 }, { "epoch": 0.18148248403139985, "grad_norm": 1104.3919677734375, "learning_rate": 4.9039224651666325e-05, "loss": 89.8467, "step": 44920 }, { "epoch": 0.1815228852967675, "grad_norm": 852.224365234375, "learning_rate": 4.903826601011861e-05, "loss": 69.1575, "step": 44930 }, { "epoch": 0.18156328656213513, "grad_norm": 473.6451110839844, "learning_rate": 4.903730689993253e-05, "loss": 92.4134, "step": 44940 }, { "epoch": 0.18160368782750275, "grad_norm": 406.23199462890625, "learning_rate": 4.903634732112678e-05, "loss": 42.3387, "step": 44950 }, { "epoch": 0.1816440890928704, "grad_norm": 1381.426025390625, "learning_rate": 4.903538727372005e-05, "loss": 81.0296, "step": 44960 }, { "epoch": 0.18168449035823803, "grad_norm": 1150.0416259765625, "learning_rate": 4.903442675773108e-05, "loss": 67.7141, "step": 44970 }, { "epoch": 0.18172489162360564, "grad_norm": 816.09619140625, "learning_rate": 4.903346577317859e-05, "loss": 85.8159, "step": 44980 }, { "epoch": 0.18176529288897328, "grad_norm": 602.0270385742188, "learning_rate": 4.90325043200813e-05, "loss": 63.4132, "step": 44990 }, { "epoch": 0.18180569415434092, "grad_norm": 815.766357421875, "learning_rate": 4.9031542398457974e-05, "loss": 88.5122, "step": 45000 }, { "epoch": 0.18184609541970856, "grad_norm": 863.7614135742188, "learning_rate": 4.9030580008327353e-05, "loss": 91.4247, "step": 45010 }, { "epoch": 0.18188649668507617, "grad_norm": 2180.32373046875, "learning_rate": 4.902961714970821e-05, "loss": 104.5198, "step": 45020 }, { "epoch": 0.1819268979504438, "grad_norm": 527.2975463867188, "learning_rate": 4.90286538226193e-05, "loss": 89.9132, "step": 45030 }, { "epoch": 0.18196729921581145, "grad_norm": 797.1927490234375, "learning_rate": 4.902769002707942e-05, "loss": 60.7632, "step": 45040 }, { "epoch": 0.18200770048117906, "grad_norm": 1439.7625732421875, "learning_rate": 4.902672576310735e-05, "loss": 94.8452, "step": 45050 }, { "epoch": 0.1820481017465467, "grad_norm": 962.4320678710938, "learning_rate": 4.902576103072189e-05, "loss": 93.4051, "step": 45060 }, { "epoch": 0.18208850301191434, "grad_norm": 509.36468505859375, "learning_rate": 4.902479582994185e-05, "loss": 101.6452, "step": 45070 }, { "epoch": 0.18212890427728196, "grad_norm": 674.908447265625, "learning_rate": 4.902383016078605e-05, "loss": 82.7371, "step": 45080 }, { "epoch": 0.1821693055426496, "grad_norm": 581.795166015625, "learning_rate": 4.902286402327331e-05, "loss": 78.2122, "step": 45090 }, { "epoch": 0.18220970680801724, "grad_norm": 1014.2080078125, "learning_rate": 4.902189741742247e-05, "loss": 93.4292, "step": 45100 }, { "epoch": 0.18225010807338485, "grad_norm": 1033.72412109375, "learning_rate": 4.902093034325237e-05, "loss": 86.2924, "step": 45110 }, { "epoch": 0.1822905093387525, "grad_norm": 553.49755859375, "learning_rate": 4.901996280078186e-05, "loss": 115.3292, "step": 45120 }, { "epoch": 0.18233091060412013, "grad_norm": 867.1639404296875, "learning_rate": 4.901899479002982e-05, "loss": 64.9478, "step": 45130 }, { "epoch": 0.18237131186948774, "grad_norm": 314.60528564453125, "learning_rate": 4.901802631101511e-05, "loss": 65.43, "step": 45140 }, { "epoch": 0.18241171313485538, "grad_norm": 676.8156127929688, "learning_rate": 4.90170573637566e-05, "loss": 71.8611, "step": 45150 }, { "epoch": 0.18245211440022302, "grad_norm": 947.8834228515625, "learning_rate": 4.90160879482732e-05, "loss": 94.2625, "step": 45160 }, { "epoch": 0.18249251566559066, "grad_norm": 628.1900634765625, "learning_rate": 4.901511806458381e-05, "loss": 59.9568, "step": 45170 }, { "epoch": 0.18253291693095827, "grad_norm": 1697.5426025390625, "learning_rate": 4.9014147712707316e-05, "loss": 78.6836, "step": 45180 }, { "epoch": 0.1825733181963259, "grad_norm": 1006.1732177734375, "learning_rate": 4.9013176892662654e-05, "loss": 82.129, "step": 45190 }, { "epoch": 0.18261371946169355, "grad_norm": 1150.0892333984375, "learning_rate": 4.9012205604468744e-05, "loss": 117.7069, "step": 45200 }, { "epoch": 0.18265412072706116, "grad_norm": 707.0164184570312, "learning_rate": 4.9011233848144525e-05, "loss": 82.716, "step": 45210 }, { "epoch": 0.1826945219924288, "grad_norm": 355.14959716796875, "learning_rate": 4.9010261623708944e-05, "loss": 47.5373, "step": 45220 }, { "epoch": 0.18273492325779644, "grad_norm": 784.4992065429688, "learning_rate": 4.9009288931180947e-05, "loss": 82.2301, "step": 45230 }, { "epoch": 0.18277532452316406, "grad_norm": 1023.0245361328125, "learning_rate": 4.90083157705795e-05, "loss": 67.702, "step": 45240 }, { "epoch": 0.1828157257885317, "grad_norm": 801.0816040039062, "learning_rate": 4.900734214192358e-05, "loss": 76.0119, "step": 45250 }, { "epoch": 0.18285612705389934, "grad_norm": 1161.0206298828125, "learning_rate": 4.900636804523217e-05, "loss": 75.7235, "step": 45260 }, { "epoch": 0.18289652831926695, "grad_norm": 1342.0335693359375, "learning_rate": 4.900539348052424e-05, "loss": 82.4319, "step": 45270 }, { "epoch": 0.1829369295846346, "grad_norm": 1176.3917236328125, "learning_rate": 4.9004418447818815e-05, "loss": 97.9844, "step": 45280 }, { "epoch": 0.18297733085000223, "grad_norm": 1022.2837524414062, "learning_rate": 4.9003442947134895e-05, "loss": 78.1378, "step": 45290 }, { "epoch": 0.18301773211536984, "grad_norm": 816.5732421875, "learning_rate": 4.90024669784915e-05, "loss": 70.9764, "step": 45300 }, { "epoch": 0.18305813338073748, "grad_norm": 830.2484741210938, "learning_rate": 4.9001490541907645e-05, "loss": 62.789, "step": 45310 }, { "epoch": 0.18309853464610512, "grad_norm": 1164.3995361328125, "learning_rate": 4.900051363740238e-05, "loss": 63.8086, "step": 45320 }, { "epoch": 0.18313893591147276, "grad_norm": 1487.6907958984375, "learning_rate": 4.899953626499475e-05, "loss": 78.6914, "step": 45330 }, { "epoch": 0.18317933717684037, "grad_norm": 570.93212890625, "learning_rate": 4.89985584247038e-05, "loss": 80.8218, "step": 45340 }, { "epoch": 0.183219738442208, "grad_norm": 822.6448974609375, "learning_rate": 4.8997580116548595e-05, "loss": 105.5186, "step": 45350 }, { "epoch": 0.18326013970757565, "grad_norm": 1853.758544921875, "learning_rate": 4.8996601340548215e-05, "loss": 116.5778, "step": 45360 }, { "epoch": 0.18330054097294327, "grad_norm": 712.744140625, "learning_rate": 4.899562209672174e-05, "loss": 67.3311, "step": 45370 }, { "epoch": 0.1833409422383109, "grad_norm": 752.5093994140625, "learning_rate": 4.899464238508825e-05, "loss": 75.8195, "step": 45380 }, { "epoch": 0.18338134350367855, "grad_norm": 1256.2913818359375, "learning_rate": 4.899366220566686e-05, "loss": 112.7811, "step": 45390 }, { "epoch": 0.18342174476904616, "grad_norm": 1340.5006103515625, "learning_rate": 4.899268155847667e-05, "loss": 103.8136, "step": 45400 }, { "epoch": 0.1834621460344138, "grad_norm": 698.527587890625, "learning_rate": 4.89917004435368e-05, "loss": 85.698, "step": 45410 }, { "epoch": 0.18350254729978144, "grad_norm": 697.1134033203125, "learning_rate": 4.899071886086638e-05, "loss": 95.746, "step": 45420 }, { "epoch": 0.18354294856514905, "grad_norm": 860.381591796875, "learning_rate": 4.898973681048454e-05, "loss": 71.6687, "step": 45430 }, { "epoch": 0.1835833498305167, "grad_norm": 300.5081481933594, "learning_rate": 4.898875429241044e-05, "loss": 57.1445, "step": 45440 }, { "epoch": 0.18362375109588433, "grad_norm": 325.8232727050781, "learning_rate": 4.898777130666322e-05, "loss": 102.5872, "step": 45450 }, { "epoch": 0.18366415236125194, "grad_norm": 714.4207153320312, "learning_rate": 4.898678785326205e-05, "loss": 96.0007, "step": 45460 }, { "epoch": 0.18370455362661958, "grad_norm": 879.5983276367188, "learning_rate": 4.8985803932226094e-05, "loss": 91.5776, "step": 45470 }, { "epoch": 0.18374495489198722, "grad_norm": 1637.9261474609375, "learning_rate": 4.898481954357455e-05, "loss": 108.3861, "step": 45480 }, { "epoch": 0.18378535615735486, "grad_norm": 839.2368774414062, "learning_rate": 4.8983834687326596e-05, "loss": 96.1661, "step": 45490 }, { "epoch": 0.18382575742272247, "grad_norm": 1169.771240234375, "learning_rate": 4.898284936350144e-05, "loss": 102.8537, "step": 45500 }, { "epoch": 0.18386615868809011, "grad_norm": 760.4490966796875, "learning_rate": 4.898186357211829e-05, "loss": 74.1794, "step": 45510 }, { "epoch": 0.18390655995345775, "grad_norm": 784.6276245117188, "learning_rate": 4.898087731319636e-05, "loss": 67.2023, "step": 45520 }, { "epoch": 0.18394696121882537, "grad_norm": 1203.34228515625, "learning_rate": 4.8979890586754875e-05, "loss": 104.9335, "step": 45530 }, { "epoch": 0.183987362484193, "grad_norm": 1361.6192626953125, "learning_rate": 4.897890339281309e-05, "loss": 83.1708, "step": 45540 }, { "epoch": 0.18402776374956065, "grad_norm": 1109.7901611328125, "learning_rate": 4.897791573139023e-05, "loss": 89.0058, "step": 45550 }, { "epoch": 0.18406816501492826, "grad_norm": 945.8731689453125, "learning_rate": 4.897692760250556e-05, "loss": 124.9025, "step": 45560 }, { "epoch": 0.1841085662802959, "grad_norm": 931.4639282226562, "learning_rate": 4.897593900617834e-05, "loss": 87.5932, "step": 45570 }, { "epoch": 0.18414896754566354, "grad_norm": 928.37646484375, "learning_rate": 4.897494994242785e-05, "loss": 121.7126, "step": 45580 }, { "epoch": 0.18418936881103115, "grad_norm": 1160.150634765625, "learning_rate": 4.8973960411273364e-05, "loss": 88.2009, "step": 45590 }, { "epoch": 0.1842297700763988, "grad_norm": 1830.1046142578125, "learning_rate": 4.8972970412734176e-05, "loss": 94.9112, "step": 45600 }, { "epoch": 0.18427017134176643, "grad_norm": 760.2313232421875, "learning_rate": 4.897197994682959e-05, "loss": 66.2683, "step": 45610 }, { "epoch": 0.18431057260713404, "grad_norm": 1265.816650390625, "learning_rate": 4.897098901357891e-05, "loss": 145.3422, "step": 45620 }, { "epoch": 0.18435097387250168, "grad_norm": 1088.4708251953125, "learning_rate": 4.896999761300146e-05, "loss": 124.1203, "step": 45630 }, { "epoch": 0.18439137513786932, "grad_norm": 2023.6080322265625, "learning_rate": 4.896900574511657e-05, "loss": 89.7067, "step": 45640 }, { "epoch": 0.18443177640323694, "grad_norm": 467.109130859375, "learning_rate": 4.896801340994357e-05, "loss": 67.6761, "step": 45650 }, { "epoch": 0.18447217766860458, "grad_norm": 1232.957763671875, "learning_rate": 4.896702060750181e-05, "loss": 87.1448, "step": 45660 }, { "epoch": 0.18451257893397222, "grad_norm": 1274.385009765625, "learning_rate": 4.896602733781065e-05, "loss": 90.0393, "step": 45670 }, { "epoch": 0.18455298019933986, "grad_norm": 913.9437255859375, "learning_rate": 4.8965033600889435e-05, "loss": 58.4704, "step": 45680 }, { "epoch": 0.18459338146470747, "grad_norm": 1077.12744140625, "learning_rate": 4.896403939675756e-05, "loss": 101.7226, "step": 45690 }, { "epoch": 0.1846337827300751, "grad_norm": 648.5958862304688, "learning_rate": 4.89630447254344e-05, "loss": 59.2482, "step": 45700 }, { "epoch": 0.18467418399544275, "grad_norm": 1022.220703125, "learning_rate": 4.896204958693934e-05, "loss": 62.0977, "step": 45710 }, { "epoch": 0.18471458526081036, "grad_norm": 902.6193237304688, "learning_rate": 4.8961053981291795e-05, "loss": 57.4871, "step": 45720 }, { "epoch": 0.184754986526178, "grad_norm": 971.0794067382812, "learning_rate": 4.896005790851116e-05, "loss": 67.6352, "step": 45730 }, { "epoch": 0.18479538779154564, "grad_norm": 696.0235595703125, "learning_rate": 4.8959061368616863e-05, "loss": 60.1176, "step": 45740 }, { "epoch": 0.18483578905691325, "grad_norm": 403.1430969238281, "learning_rate": 4.895806436162833e-05, "loss": 92.4626, "step": 45750 }, { "epoch": 0.1848761903222809, "grad_norm": 524.8573608398438, "learning_rate": 4.8957066887565e-05, "loss": 82.3987, "step": 45760 }, { "epoch": 0.18491659158764853, "grad_norm": 671.1500244140625, "learning_rate": 4.8956068946446314e-05, "loss": 63.0742, "step": 45770 }, { "epoch": 0.18495699285301614, "grad_norm": 1303.492431640625, "learning_rate": 4.8955070538291735e-05, "loss": 80.9564, "step": 45780 }, { "epoch": 0.18499739411838378, "grad_norm": 433.896240234375, "learning_rate": 4.8954071663120715e-05, "loss": 58.4002, "step": 45790 }, { "epoch": 0.18503779538375142, "grad_norm": 504.2688903808594, "learning_rate": 4.8953072320952745e-05, "loss": 73.8806, "step": 45800 }, { "epoch": 0.18507819664911904, "grad_norm": 974.8590087890625, "learning_rate": 4.895207251180729e-05, "loss": 112.7232, "step": 45810 }, { "epoch": 0.18511859791448668, "grad_norm": 1069.8795166015625, "learning_rate": 4.8951072235703855e-05, "loss": 108.1218, "step": 45820 }, { "epoch": 0.18515899917985432, "grad_norm": 385.00994873046875, "learning_rate": 4.895007149266193e-05, "loss": 55.9683, "step": 45830 }, { "epoch": 0.18519940044522196, "grad_norm": 680.8944091796875, "learning_rate": 4.8949070282701034e-05, "loss": 81.2045, "step": 45840 }, { "epoch": 0.18523980171058957, "grad_norm": 885.607177734375, "learning_rate": 4.8948068605840694e-05, "loss": 83.6066, "step": 45850 }, { "epoch": 0.1852802029759572, "grad_norm": 794.8469848632812, "learning_rate": 4.894706646210041e-05, "loss": 97.3249, "step": 45860 }, { "epoch": 0.18532060424132485, "grad_norm": 865.634033203125, "learning_rate": 4.8946063851499746e-05, "loss": 51.3998, "step": 45870 }, { "epoch": 0.18536100550669246, "grad_norm": 882.5504760742188, "learning_rate": 4.894506077405824e-05, "loss": 65.3006, "step": 45880 }, { "epoch": 0.1854014067720601, "grad_norm": 712.5584106445312, "learning_rate": 4.894405722979544e-05, "loss": 60.6922, "step": 45890 }, { "epoch": 0.18544180803742774, "grad_norm": 1048.72216796875, "learning_rate": 4.894305321873092e-05, "loss": 79.0462, "step": 45900 }, { "epoch": 0.18548220930279535, "grad_norm": 757.8132934570312, "learning_rate": 4.894204874088425e-05, "loss": 82.1683, "step": 45910 }, { "epoch": 0.185522610568163, "grad_norm": 946.23828125, "learning_rate": 4.8941043796275015e-05, "loss": 80.1746, "step": 45920 }, { "epoch": 0.18556301183353063, "grad_norm": 1357.65380859375, "learning_rate": 4.8940038384922806e-05, "loss": 77.1498, "step": 45930 }, { "epoch": 0.18560341309889825, "grad_norm": 1069.114990234375, "learning_rate": 4.8939032506847224e-05, "loss": 97.8804, "step": 45940 }, { "epoch": 0.18564381436426589, "grad_norm": 1382.1502685546875, "learning_rate": 4.893802616206787e-05, "loss": 108.694, "step": 45950 }, { "epoch": 0.18568421562963353, "grad_norm": 1493.73486328125, "learning_rate": 4.893701935060439e-05, "loss": 81.302, "step": 45960 }, { "epoch": 0.18572461689500114, "grad_norm": 873.5044555664062, "learning_rate": 4.893601207247638e-05, "loss": 76.8872, "step": 45970 }, { "epoch": 0.18576501816036878, "grad_norm": 2981.951416015625, "learning_rate": 4.893500432770349e-05, "loss": 68.5081, "step": 45980 }, { "epoch": 0.18580541942573642, "grad_norm": 396.176513671875, "learning_rate": 4.893399611630538e-05, "loss": 65.5169, "step": 45990 }, { "epoch": 0.18584582069110406, "grad_norm": 1273.2371826171875, "learning_rate": 4.893298743830168e-05, "loss": 69.0687, "step": 46000 }, { "epoch": 0.18588622195647167, "grad_norm": 2934.45361328125, "learning_rate": 4.8931978293712074e-05, "loss": 67.4254, "step": 46010 }, { "epoch": 0.1859266232218393, "grad_norm": 665.5234985351562, "learning_rate": 4.8930968682556234e-05, "loss": 49.5943, "step": 46020 }, { "epoch": 0.18596702448720695, "grad_norm": 688.6306762695312, "learning_rate": 4.892995860485384e-05, "loss": 50.1611, "step": 46030 }, { "epoch": 0.18600742575257456, "grad_norm": 544.0740356445312, "learning_rate": 4.892894806062458e-05, "loss": 90.3554, "step": 46040 }, { "epoch": 0.1860478270179422, "grad_norm": 817.1072998046875, "learning_rate": 4.892793704988816e-05, "loss": 100.2068, "step": 46050 }, { "epoch": 0.18608822828330984, "grad_norm": 901.2867431640625, "learning_rate": 4.892692557266429e-05, "loss": 82.2532, "step": 46060 }, { "epoch": 0.18612862954867745, "grad_norm": 670.2236328125, "learning_rate": 4.892591362897268e-05, "loss": 113.8737, "step": 46070 }, { "epoch": 0.1861690308140451, "grad_norm": 500.85870361328125, "learning_rate": 4.892490121883306e-05, "loss": 81.973, "step": 46080 }, { "epoch": 0.18620943207941273, "grad_norm": 0.0, "learning_rate": 4.892388834226519e-05, "loss": 63.0598, "step": 46090 }, { "epoch": 0.18624983334478035, "grad_norm": 1111.806396484375, "learning_rate": 4.892287499928879e-05, "loss": 70.0805, "step": 46100 }, { "epoch": 0.186290234610148, "grad_norm": 1321.56884765625, "learning_rate": 4.892186118992362e-05, "loss": 118.3066, "step": 46110 }, { "epoch": 0.18633063587551563, "grad_norm": 916.1724243164062, "learning_rate": 4.892084691418947e-05, "loss": 99.126, "step": 46120 }, { "epoch": 0.18637103714088324, "grad_norm": 1164.2841796875, "learning_rate": 4.891983217210607e-05, "loss": 66.5533, "step": 46130 }, { "epoch": 0.18641143840625088, "grad_norm": 589.622314453125, "learning_rate": 4.891881696369325e-05, "loss": 76.5307, "step": 46140 }, { "epoch": 0.18645183967161852, "grad_norm": 1253.39306640625, "learning_rate": 4.891780128897077e-05, "loss": 72.5351, "step": 46150 }, { "epoch": 0.18649224093698616, "grad_norm": 1563.49658203125, "learning_rate": 4.891678514795843e-05, "loss": 93.3726, "step": 46160 }, { "epoch": 0.18653264220235377, "grad_norm": 3200.355712890625, "learning_rate": 4.891576854067607e-05, "loss": 124.7522, "step": 46170 }, { "epoch": 0.1865730434677214, "grad_norm": 897.306640625, "learning_rate": 4.891475146714347e-05, "loss": 68.6371, "step": 46180 }, { "epoch": 0.18661344473308905, "grad_norm": 992.8071899414062, "learning_rate": 4.891373392738049e-05, "loss": 80.4052, "step": 46190 }, { "epoch": 0.18665384599845666, "grad_norm": 947.232421875, "learning_rate": 4.891271592140695e-05, "loss": 92.7935, "step": 46200 }, { "epoch": 0.1866942472638243, "grad_norm": 1065.53759765625, "learning_rate": 4.891169744924271e-05, "loss": 180.0239, "step": 46210 }, { "epoch": 0.18673464852919194, "grad_norm": 960.1724243164062, "learning_rate": 4.8910678510907606e-05, "loss": 61.9903, "step": 46220 }, { "epoch": 0.18677504979455956, "grad_norm": 1468.2706298828125, "learning_rate": 4.890965910642152e-05, "loss": 79.8836, "step": 46230 }, { "epoch": 0.1868154510599272, "grad_norm": 0.0, "learning_rate": 4.8908639235804324e-05, "loss": 87.0793, "step": 46240 }, { "epoch": 0.18685585232529484, "grad_norm": 0.0, "learning_rate": 4.890761889907589e-05, "loss": 78.9375, "step": 46250 }, { "epoch": 0.18689625359066245, "grad_norm": 1007.9310302734375, "learning_rate": 4.890659809625612e-05, "loss": 102.4231, "step": 46260 }, { "epoch": 0.1869366548560301, "grad_norm": 794.476318359375, "learning_rate": 4.890557682736491e-05, "loss": 84.0354, "step": 46270 }, { "epoch": 0.18697705612139773, "grad_norm": 530.2271118164062, "learning_rate": 4.890455509242218e-05, "loss": 68.9714, "step": 46280 }, { "epoch": 0.18701745738676534, "grad_norm": 1666.1671142578125, "learning_rate": 4.8903532891447836e-05, "loss": 84.9628, "step": 46290 }, { "epoch": 0.18705785865213298, "grad_norm": 787.04833984375, "learning_rate": 4.890251022446181e-05, "loss": 75.6712, "step": 46300 }, { "epoch": 0.18709825991750062, "grad_norm": 2045.2965087890625, "learning_rate": 4.890148709148404e-05, "loss": 98.4277, "step": 46310 }, { "epoch": 0.18713866118286826, "grad_norm": 949.1669311523438, "learning_rate": 4.890046349253448e-05, "loss": 63.0205, "step": 46320 }, { "epoch": 0.18717906244823587, "grad_norm": 578.8233642578125, "learning_rate": 4.8899439427633076e-05, "loss": 47.0746, "step": 46330 }, { "epoch": 0.1872194637136035, "grad_norm": 406.12615966796875, "learning_rate": 4.88984148967998e-05, "loss": 53.2635, "step": 46340 }, { "epoch": 0.18725986497897115, "grad_norm": 1593.3697509765625, "learning_rate": 4.889738990005462e-05, "loss": 93.6755, "step": 46350 }, { "epoch": 0.18730026624433876, "grad_norm": 722.7239990234375, "learning_rate": 4.889636443741752e-05, "loss": 77.3317, "step": 46360 }, { "epoch": 0.1873406675097064, "grad_norm": 2375.264892578125, "learning_rate": 4.88953385089085e-05, "loss": 88.7548, "step": 46370 }, { "epoch": 0.18738106877507404, "grad_norm": 641.643798828125, "learning_rate": 4.8894312114547535e-05, "loss": 95.3137, "step": 46380 }, { "epoch": 0.18742147004044166, "grad_norm": 1006.8427734375, "learning_rate": 4.889328525435467e-05, "loss": 74.9918, "step": 46390 }, { "epoch": 0.1874618713058093, "grad_norm": 638.9917602539062, "learning_rate": 4.889225792834991e-05, "loss": 73.9908, "step": 46400 }, { "epoch": 0.18750227257117694, "grad_norm": 2413.532470703125, "learning_rate": 4.889123013655327e-05, "loss": 129.9344, "step": 46410 }, { "epoch": 0.18754267383654455, "grad_norm": 447.9295349121094, "learning_rate": 4.8890201878984796e-05, "loss": 79.6906, "step": 46420 }, { "epoch": 0.1875830751019122, "grad_norm": 1658.515380859375, "learning_rate": 4.888917315566455e-05, "loss": 74.6684, "step": 46430 }, { "epoch": 0.18762347636727983, "grad_norm": 814.1004028320312, "learning_rate": 4.888814396661256e-05, "loss": 64.3334, "step": 46440 }, { "epoch": 0.18766387763264744, "grad_norm": 723.90673828125, "learning_rate": 4.8887114311848915e-05, "loss": 98.6432, "step": 46450 }, { "epoch": 0.18770427889801508, "grad_norm": 535.9703369140625, "learning_rate": 4.8886084191393677e-05, "loss": 50.3761, "step": 46460 }, { "epoch": 0.18774468016338272, "grad_norm": 452.6556701660156, "learning_rate": 4.888505360526693e-05, "loss": 109.0327, "step": 46470 }, { "epoch": 0.18778508142875036, "grad_norm": 691.0087280273438, "learning_rate": 4.888402255348876e-05, "loss": 101.6102, "step": 46480 }, { "epoch": 0.18782548269411797, "grad_norm": 391.9655456542969, "learning_rate": 4.888299103607928e-05, "loss": 82.483, "step": 46490 }, { "epoch": 0.1878658839594856, "grad_norm": 898.39892578125, "learning_rate": 4.888195905305859e-05, "loss": 58.2394, "step": 46500 }, { "epoch": 0.18790628522485325, "grad_norm": 902.1316528320312, "learning_rate": 4.888092660444682e-05, "loss": 48.3732, "step": 46510 }, { "epoch": 0.18794668649022087, "grad_norm": 873.6351928710938, "learning_rate": 4.887989369026409e-05, "loss": 55.4155, "step": 46520 }, { "epoch": 0.1879870877555885, "grad_norm": 684.0804443359375, "learning_rate": 4.887886031053053e-05, "loss": 80.0627, "step": 46530 }, { "epoch": 0.18802748902095615, "grad_norm": 932.9844360351562, "learning_rate": 4.887782646526631e-05, "loss": 62.5363, "step": 46540 }, { "epoch": 0.18806789028632376, "grad_norm": 676.0230102539062, "learning_rate": 4.8876792154491556e-05, "loss": 67.6069, "step": 46550 }, { "epoch": 0.1881082915516914, "grad_norm": 673.8368530273438, "learning_rate": 4.887575737822645e-05, "loss": 62.4173, "step": 46560 }, { "epoch": 0.18814869281705904, "grad_norm": 1231.91943359375, "learning_rate": 4.8874722136491155e-05, "loss": 58.3462, "step": 46570 }, { "epoch": 0.18818909408242665, "grad_norm": 669.4678344726562, "learning_rate": 4.887368642930588e-05, "loss": 140.696, "step": 46580 }, { "epoch": 0.1882294953477943, "grad_norm": 797.5696411132812, "learning_rate": 4.887265025669078e-05, "loss": 68.3853, "step": 46590 }, { "epoch": 0.18826989661316193, "grad_norm": 722.3099975585938, "learning_rate": 4.887161361866608e-05, "loss": 101.0911, "step": 46600 }, { "epoch": 0.18831029787852954, "grad_norm": 1002.6024169921875, "learning_rate": 4.887057651525198e-05, "loss": 109.1899, "step": 46610 }, { "epoch": 0.18835069914389718, "grad_norm": 950.6636352539062, "learning_rate": 4.8869538946468694e-05, "loss": 59.6831, "step": 46620 }, { "epoch": 0.18839110040926482, "grad_norm": 1075.5361328125, "learning_rate": 4.8868500912336465e-05, "loss": 73.4534, "step": 46630 }, { "epoch": 0.18843150167463246, "grad_norm": 371.59368896484375, "learning_rate": 4.8867462412875526e-05, "loss": 94.6471, "step": 46640 }, { "epoch": 0.18847190294000007, "grad_norm": 1247.0185546875, "learning_rate": 4.886642344810611e-05, "loss": 86.5103, "step": 46650 }, { "epoch": 0.18851230420536771, "grad_norm": 86.08003234863281, "learning_rate": 4.8865384018048494e-05, "loss": 77.3341, "step": 46660 }, { "epoch": 0.18855270547073535, "grad_norm": 517.982421875, "learning_rate": 4.886434412272293e-05, "loss": 58.2183, "step": 46670 }, { "epoch": 0.18859310673610297, "grad_norm": 772.2702026367188, "learning_rate": 4.886330376214968e-05, "loss": 90.4057, "step": 46680 }, { "epoch": 0.1886335080014706, "grad_norm": 827.40234375, "learning_rate": 4.886226293634904e-05, "loss": 62.3181, "step": 46690 }, { "epoch": 0.18867390926683825, "grad_norm": 662.444580078125, "learning_rate": 4.886122164534131e-05, "loss": 72.7424, "step": 46700 }, { "epoch": 0.18871431053220586, "grad_norm": 423.5435791015625, "learning_rate": 4.886017988914676e-05, "loss": 132.9958, "step": 46710 }, { "epoch": 0.1887547117975735, "grad_norm": 773.7642822265625, "learning_rate": 4.8859137667785735e-05, "loss": 112.7814, "step": 46720 }, { "epoch": 0.18879511306294114, "grad_norm": 942.0858154296875, "learning_rate": 4.8858094981278524e-05, "loss": 74.1822, "step": 46730 }, { "epoch": 0.18883551432830875, "grad_norm": 623.146484375, "learning_rate": 4.8857051829645485e-05, "loss": 67.047, "step": 46740 }, { "epoch": 0.1888759155936764, "grad_norm": 2191.332275390625, "learning_rate": 4.8856008212906925e-05, "loss": 74.4607, "step": 46750 }, { "epoch": 0.18891631685904403, "grad_norm": 796.4672241210938, "learning_rate": 4.88549641310832e-05, "loss": 65.4805, "step": 46760 }, { "epoch": 0.18895671812441164, "grad_norm": 897.36181640625, "learning_rate": 4.885391958419468e-05, "loss": 104.9449, "step": 46770 }, { "epoch": 0.18899711938977928, "grad_norm": 4131.201171875, "learning_rate": 4.885287457226172e-05, "loss": 109.5929, "step": 46780 }, { "epoch": 0.18903752065514692, "grad_norm": 1081.7864990234375, "learning_rate": 4.885182909530468e-05, "loss": 101.8282, "step": 46790 }, { "epoch": 0.18907792192051456, "grad_norm": 761.68408203125, "learning_rate": 4.885078315334395e-05, "loss": 59.1006, "step": 46800 }, { "epoch": 0.18911832318588218, "grad_norm": 1586.106689453125, "learning_rate": 4.884973674639993e-05, "loss": 71.1058, "step": 46810 }, { "epoch": 0.18915872445124982, "grad_norm": 1164.787841796875, "learning_rate": 4.884868987449301e-05, "loss": 64.9034, "step": 46820 }, { "epoch": 0.18919912571661746, "grad_norm": 629.8527221679688, "learning_rate": 4.8847642537643604e-05, "loss": 71.8605, "step": 46830 }, { "epoch": 0.18923952698198507, "grad_norm": 824.6557006835938, "learning_rate": 4.884659473587213e-05, "loss": 91.3435, "step": 46840 }, { "epoch": 0.1892799282473527, "grad_norm": 213.20458984375, "learning_rate": 4.884554646919901e-05, "loss": 64.7744, "step": 46850 }, { "epoch": 0.18932032951272035, "grad_norm": 957.3148803710938, "learning_rate": 4.884449773764469e-05, "loss": 68.4271, "step": 46860 }, { "epoch": 0.18936073077808796, "grad_norm": 1541.1634521484375, "learning_rate": 4.884344854122961e-05, "loss": 93.5629, "step": 46870 }, { "epoch": 0.1894011320434556, "grad_norm": 1493.8668212890625, "learning_rate": 4.884239887997423e-05, "loss": 80.8613, "step": 46880 }, { "epoch": 0.18944153330882324, "grad_norm": 678.560546875, "learning_rate": 4.8841348753899e-05, "loss": 87.8491, "step": 46890 }, { "epoch": 0.18948193457419085, "grad_norm": 711.7306518554688, "learning_rate": 4.88402981630244e-05, "loss": 93.6465, "step": 46900 }, { "epoch": 0.1895223358395585, "grad_norm": 554.8831176757812, "learning_rate": 4.883924710737092e-05, "loss": 73.9221, "step": 46910 }, { "epoch": 0.18956273710492613, "grad_norm": 1330.556884765625, "learning_rate": 4.8838195586959046e-05, "loss": 79.4741, "step": 46920 }, { "epoch": 0.18960313837029374, "grad_norm": 598.888427734375, "learning_rate": 4.883714360180927e-05, "loss": 66.7534, "step": 46930 }, { "epoch": 0.18964353963566138, "grad_norm": 614.5370483398438, "learning_rate": 4.883609115194211e-05, "loss": 90.6562, "step": 46940 }, { "epoch": 0.18968394090102902, "grad_norm": 1026.0899658203125, "learning_rate": 4.883503823737808e-05, "loss": 86.146, "step": 46950 }, { "epoch": 0.18972434216639666, "grad_norm": 1076.3065185546875, "learning_rate": 4.8833984858137715e-05, "loss": 74.9594, "step": 46960 }, { "epoch": 0.18976474343176428, "grad_norm": 555.6848754882812, "learning_rate": 4.8832931014241534e-05, "loss": 69.4619, "step": 46970 }, { "epoch": 0.18980514469713192, "grad_norm": 1051.6075439453125, "learning_rate": 4.88318767057101e-05, "loss": 67.6875, "step": 46980 }, { "epoch": 0.18984554596249956, "grad_norm": 1332.5283203125, "learning_rate": 4.883082193256397e-05, "loss": 73.1094, "step": 46990 }, { "epoch": 0.18988594722786717, "grad_norm": 1304.3714599609375, "learning_rate": 4.882976669482367e-05, "loss": 91.2835, "step": 47000 }, { "epoch": 0.1899263484932348, "grad_norm": 1050.1197509765625, "learning_rate": 4.882871099250982e-05, "loss": 96.9666, "step": 47010 }, { "epoch": 0.18996674975860245, "grad_norm": 2204.67626953125, "learning_rate": 4.882765482564298e-05, "loss": 79.6932, "step": 47020 }, { "epoch": 0.19000715102397006, "grad_norm": 308.5193786621094, "learning_rate": 4.882659819424374e-05, "loss": 60.4664, "step": 47030 }, { "epoch": 0.1900475522893377, "grad_norm": 760.51953125, "learning_rate": 4.8825541098332706e-05, "loss": 93.623, "step": 47040 }, { "epoch": 0.19008795355470534, "grad_norm": 1053.7843017578125, "learning_rate": 4.882448353793048e-05, "loss": 72.0362, "step": 47050 }, { "epoch": 0.19012835482007295, "grad_norm": 510.9100341796875, "learning_rate": 4.8823425513057674e-05, "loss": 76.0767, "step": 47060 }, { "epoch": 0.1901687560854406, "grad_norm": 1290.3902587890625, "learning_rate": 4.8822367023734925e-05, "loss": 76.7432, "step": 47070 }, { "epoch": 0.19020915735080823, "grad_norm": 669.0762939453125, "learning_rate": 4.8821308069982867e-05, "loss": 72.9376, "step": 47080 }, { "epoch": 0.19024955861617585, "grad_norm": 708.4724731445312, "learning_rate": 4.8820248651822145e-05, "loss": 74.2905, "step": 47090 }, { "epoch": 0.19028995988154349, "grad_norm": 810.4485473632812, "learning_rate": 4.8819188769273414e-05, "loss": 85.8345, "step": 47100 }, { "epoch": 0.19033036114691113, "grad_norm": 1206.7943115234375, "learning_rate": 4.8818128422357335e-05, "loss": 62.0077, "step": 47110 }, { "epoch": 0.19037076241227877, "grad_norm": 763.1806030273438, "learning_rate": 4.881706761109458e-05, "loss": 80.4839, "step": 47120 }, { "epoch": 0.19041116367764638, "grad_norm": 1197.156005859375, "learning_rate": 4.8816006335505825e-05, "loss": 130.0592, "step": 47130 }, { "epoch": 0.19045156494301402, "grad_norm": 522.46484375, "learning_rate": 4.8814944595611776e-05, "loss": 71.9689, "step": 47140 }, { "epoch": 0.19049196620838166, "grad_norm": 653.2218627929688, "learning_rate": 4.881388239143311e-05, "loss": 88.4613, "step": 47150 }, { "epoch": 0.19053236747374927, "grad_norm": 696.4855346679688, "learning_rate": 4.881281972299055e-05, "loss": 99.2132, "step": 47160 }, { "epoch": 0.1905727687391169, "grad_norm": 779.6743774414062, "learning_rate": 4.8811756590304815e-05, "loss": 54.9467, "step": 47170 }, { "epoch": 0.19061317000448455, "grad_norm": 489.7877197265625, "learning_rate": 4.881069299339662e-05, "loss": 98.4642, "step": 47180 }, { "epoch": 0.19065357126985216, "grad_norm": 1307.5743408203125, "learning_rate": 4.880962893228671e-05, "loss": 77.5308, "step": 47190 }, { "epoch": 0.1906939725352198, "grad_norm": 1113.280517578125, "learning_rate": 4.880856440699582e-05, "loss": 65.3198, "step": 47200 }, { "epoch": 0.19073437380058744, "grad_norm": 859.1810913085938, "learning_rate": 4.880749941754471e-05, "loss": 119.8937, "step": 47210 }, { "epoch": 0.19077477506595505, "grad_norm": 676.0623779296875, "learning_rate": 4.8806433963954154e-05, "loss": 88.6286, "step": 47220 }, { "epoch": 0.1908151763313227, "grad_norm": 922.7902221679688, "learning_rate": 4.880536804624491e-05, "loss": 102.1759, "step": 47230 }, { "epoch": 0.19085557759669033, "grad_norm": 1272.20068359375, "learning_rate": 4.880430166443775e-05, "loss": 107.2287, "step": 47240 }, { "epoch": 0.19089597886205795, "grad_norm": 1099.0933837890625, "learning_rate": 4.880323481855347e-05, "loss": 71.5676, "step": 47250 }, { "epoch": 0.1909363801274256, "grad_norm": 958.9481201171875, "learning_rate": 4.880216750861288e-05, "loss": 51.0081, "step": 47260 }, { "epoch": 0.19097678139279323, "grad_norm": 850.7756958007812, "learning_rate": 4.880109973463678e-05, "loss": 102.9255, "step": 47270 }, { "epoch": 0.19101718265816087, "grad_norm": 522.798095703125, "learning_rate": 4.880003149664599e-05, "loss": 73.2628, "step": 47280 }, { "epoch": 0.19105758392352848, "grad_norm": 1138.0860595703125, "learning_rate": 4.879896279466133e-05, "loss": 93.8309, "step": 47290 }, { "epoch": 0.19109798518889612, "grad_norm": 877.7420654296875, "learning_rate": 4.8797893628703635e-05, "loss": 68.0124, "step": 47300 }, { "epoch": 0.19113838645426376, "grad_norm": 372.2701416015625, "learning_rate": 4.879682399879375e-05, "loss": 83.2249, "step": 47310 }, { "epoch": 0.19117878771963137, "grad_norm": 600.6828002929688, "learning_rate": 4.8795753904952534e-05, "loss": 69.8246, "step": 47320 }, { "epoch": 0.191219188984999, "grad_norm": 1143.0029296875, "learning_rate": 4.879468334720085e-05, "loss": 61.1958, "step": 47330 }, { "epoch": 0.19125959025036665, "grad_norm": 1139.1126708984375, "learning_rate": 4.879361232555956e-05, "loss": 79.5726, "step": 47340 }, { "epoch": 0.19129999151573426, "grad_norm": 1177.59033203125, "learning_rate": 4.879254084004955e-05, "loss": 84.1079, "step": 47350 }, { "epoch": 0.1913403927811019, "grad_norm": 643.5814819335938, "learning_rate": 4.8791468890691696e-05, "loss": 89.2118, "step": 47360 }, { "epoch": 0.19138079404646954, "grad_norm": 273.9512939453125, "learning_rate": 4.879039647750692e-05, "loss": 72.0723, "step": 47370 }, { "epoch": 0.19142119531183716, "grad_norm": 670.9833984375, "learning_rate": 4.8789323600516104e-05, "loss": 79.4604, "step": 47380 }, { "epoch": 0.1914615965772048, "grad_norm": 573.1339721679688, "learning_rate": 4.8788250259740185e-05, "loss": 57.4144, "step": 47390 }, { "epoch": 0.19150199784257244, "grad_norm": 746.1387939453125, "learning_rate": 4.878717645520008e-05, "loss": 71.1703, "step": 47400 }, { "epoch": 0.19154239910794005, "grad_norm": 770.6351318359375, "learning_rate": 4.878610218691673e-05, "loss": 71.4196, "step": 47410 }, { "epoch": 0.1915828003733077, "grad_norm": 419.6976318359375, "learning_rate": 4.878502745491106e-05, "loss": 55.445, "step": 47420 }, { "epoch": 0.19162320163867533, "grad_norm": 4431.18115234375, "learning_rate": 4.8783952259204036e-05, "loss": 75.606, "step": 47430 }, { "epoch": 0.19166360290404297, "grad_norm": 534.2245483398438, "learning_rate": 4.878287659981662e-05, "loss": 74.2982, "step": 47440 }, { "epoch": 0.19170400416941058, "grad_norm": 1988.7900390625, "learning_rate": 4.878180047676978e-05, "loss": 100.465, "step": 47450 }, { "epoch": 0.19174440543477822, "grad_norm": 605.2179565429688, "learning_rate": 4.87807238900845e-05, "loss": 54.1781, "step": 47460 }, { "epoch": 0.19178480670014586, "grad_norm": 1887.808837890625, "learning_rate": 4.8779646839781765e-05, "loss": 103.9908, "step": 47470 }, { "epoch": 0.19182520796551347, "grad_norm": 1397.8228759765625, "learning_rate": 4.877856932588257e-05, "loss": 112.4378, "step": 47480 }, { "epoch": 0.1918656092308811, "grad_norm": 1550.006103515625, "learning_rate": 4.877749134840792e-05, "loss": 104.1645, "step": 47490 }, { "epoch": 0.19190601049624875, "grad_norm": 555.1295166015625, "learning_rate": 4.877641290737884e-05, "loss": 61.3664, "step": 47500 }, { "epoch": 0.19194641176161636, "grad_norm": 1102.827392578125, "learning_rate": 4.877533400281635e-05, "loss": 119.7094, "step": 47510 }, { "epoch": 0.191986813026984, "grad_norm": 890.612548828125, "learning_rate": 4.877425463474148e-05, "loss": 75.4402, "step": 47520 }, { "epoch": 0.19202721429235164, "grad_norm": 586.08056640625, "learning_rate": 4.877317480317528e-05, "loss": 87.8783, "step": 47530 }, { "epoch": 0.19206761555771926, "grad_norm": 574.8908081054688, "learning_rate": 4.8772094508138796e-05, "loss": 88.9533, "step": 47540 }, { "epoch": 0.1921080168230869, "grad_norm": 761.5361938476562, "learning_rate": 4.877101374965308e-05, "loss": 62.7146, "step": 47550 }, { "epoch": 0.19214841808845454, "grad_norm": 709.9255981445312, "learning_rate": 4.8769932527739225e-05, "loss": 78.0876, "step": 47560 }, { "epoch": 0.19218881935382215, "grad_norm": 1276.592529296875, "learning_rate": 4.87688508424183e-05, "loss": 74.2328, "step": 47570 }, { "epoch": 0.1922292206191898, "grad_norm": 1980.7711181640625, "learning_rate": 4.876776869371139e-05, "loss": 84.7106, "step": 47580 }, { "epoch": 0.19226962188455743, "grad_norm": 1390.198486328125, "learning_rate": 4.876668608163959e-05, "loss": 89.8972, "step": 47590 }, { "epoch": 0.19231002314992507, "grad_norm": 413.9287109375, "learning_rate": 4.8765603006224006e-05, "loss": 95.8701, "step": 47600 }, { "epoch": 0.19235042441529268, "grad_norm": 1712.273193359375, "learning_rate": 4.876451946748576e-05, "loss": 123.683, "step": 47610 }, { "epoch": 0.19239082568066032, "grad_norm": 1209.84423828125, "learning_rate": 4.8763435465445964e-05, "loss": 102.8881, "step": 47620 }, { "epoch": 0.19243122694602796, "grad_norm": 919.6061401367188, "learning_rate": 4.8762351000125766e-05, "loss": 83.7829, "step": 47630 }, { "epoch": 0.19247162821139557, "grad_norm": 938.0872192382812, "learning_rate": 4.87612660715463e-05, "loss": 43.0572, "step": 47640 }, { "epoch": 0.1925120294767632, "grad_norm": 1763.4127197265625, "learning_rate": 4.876018067972872e-05, "loss": 91.6115, "step": 47650 }, { "epoch": 0.19255243074213085, "grad_norm": 2353.800537109375, "learning_rate": 4.8759094824694184e-05, "loss": 79.1209, "step": 47660 }, { "epoch": 0.19259283200749847, "grad_norm": 1034.469482421875, "learning_rate": 4.875800850646387e-05, "loss": 63.04, "step": 47670 }, { "epoch": 0.1926332332728661, "grad_norm": 497.1041564941406, "learning_rate": 4.8756921725058934e-05, "loss": 78.9805, "step": 47680 }, { "epoch": 0.19267363453823375, "grad_norm": 410.94970703125, "learning_rate": 4.875583448050059e-05, "loss": 77.002, "step": 47690 }, { "epoch": 0.19271403580360136, "grad_norm": 1801.6767578125, "learning_rate": 4.875474677281002e-05, "loss": 80.726, "step": 47700 }, { "epoch": 0.192754437068969, "grad_norm": 946.7315673828125, "learning_rate": 4.8753658602008425e-05, "loss": 64.131, "step": 47710 }, { "epoch": 0.19279483833433664, "grad_norm": 1104.990478515625, "learning_rate": 4.875256996811703e-05, "loss": 63.1742, "step": 47720 }, { "epoch": 0.19283523959970425, "grad_norm": 889.265869140625, "learning_rate": 4.875148087115706e-05, "loss": 121.2112, "step": 47730 }, { "epoch": 0.1928756408650719, "grad_norm": 502.1475830078125, "learning_rate": 4.875039131114975e-05, "loss": 81.0771, "step": 47740 }, { "epoch": 0.19291604213043953, "grad_norm": 2143.898193359375, "learning_rate": 4.874930128811631e-05, "loss": 87.0424, "step": 47750 }, { "epoch": 0.19295644339580717, "grad_norm": 792.10693359375, "learning_rate": 4.874821080207803e-05, "loss": 73.0605, "step": 47760 }, { "epoch": 0.19299684466117478, "grad_norm": 483.91546630859375, "learning_rate": 4.8747119853056156e-05, "loss": 88.888, "step": 47770 }, { "epoch": 0.19303724592654242, "grad_norm": 1531.7535400390625, "learning_rate": 4.8746028441071943e-05, "loss": 59.7669, "step": 47780 }, { "epoch": 0.19307764719191006, "grad_norm": 677.5689086914062, "learning_rate": 4.874493656614669e-05, "loss": 74.3335, "step": 47790 }, { "epoch": 0.19311804845727767, "grad_norm": 515.4765625, "learning_rate": 4.874384422830167e-05, "loss": 56.0753, "step": 47800 }, { "epoch": 0.19315844972264531, "grad_norm": 763.7691040039062, "learning_rate": 4.8742751427558186e-05, "loss": 77.0294, "step": 47810 }, { "epoch": 0.19319885098801295, "grad_norm": 1666.2574462890625, "learning_rate": 4.874165816393754e-05, "loss": 46.0998, "step": 47820 }, { "epoch": 0.19323925225338057, "grad_norm": 753.572021484375, "learning_rate": 4.874056443746104e-05, "loss": 55.663, "step": 47830 }, { "epoch": 0.1932796535187482, "grad_norm": 664.1289672851562, "learning_rate": 4.873947024815002e-05, "loss": 145.2848, "step": 47840 }, { "epoch": 0.19332005478411585, "grad_norm": 910.7630615234375, "learning_rate": 4.87383755960258e-05, "loss": 100.3147, "step": 47850 }, { "epoch": 0.19336045604948346, "grad_norm": 1756.9093017578125, "learning_rate": 4.8737280481109724e-05, "loss": 103.6948, "step": 47860 }, { "epoch": 0.1934008573148511, "grad_norm": 587.9384155273438, "learning_rate": 4.8736184903423155e-05, "loss": 90.1571, "step": 47870 }, { "epoch": 0.19344125858021874, "grad_norm": 852.3531494140625, "learning_rate": 4.873508886298743e-05, "loss": 72.187, "step": 47880 }, { "epoch": 0.19348165984558635, "grad_norm": 844.1437377929688, "learning_rate": 4.8733992359823936e-05, "loss": 76.1646, "step": 47890 }, { "epoch": 0.193522061110954, "grad_norm": 1602.298095703125, "learning_rate": 4.8732895393954036e-05, "loss": 84.8224, "step": 47900 }, { "epoch": 0.19356246237632163, "grad_norm": 870.3892211914062, "learning_rate": 4.8731797965399125e-05, "loss": 115.4123, "step": 47910 }, { "epoch": 0.19360286364168927, "grad_norm": 1025.730224609375, "learning_rate": 4.873070007418059e-05, "loss": 74.2097, "step": 47920 }, { "epoch": 0.19364326490705688, "grad_norm": 1289.6861572265625, "learning_rate": 4.8729601720319845e-05, "loss": 112.8718, "step": 47930 }, { "epoch": 0.19368366617242452, "grad_norm": 592.2373657226562, "learning_rate": 4.8728502903838295e-05, "loss": 63.9621, "step": 47940 }, { "epoch": 0.19372406743779216, "grad_norm": 5277.1982421875, "learning_rate": 4.8727403624757365e-05, "loss": 100.8859, "step": 47950 }, { "epoch": 0.19376446870315978, "grad_norm": 816.9944458007812, "learning_rate": 4.872630388309849e-05, "loss": 82.8986, "step": 47960 }, { "epoch": 0.19380486996852742, "grad_norm": 181.56996154785156, "learning_rate": 4.8725203678883104e-05, "loss": 65.7631, "step": 47970 }, { "epoch": 0.19384527123389506, "grad_norm": 1124.836181640625, "learning_rate": 4.872410301213265e-05, "loss": 81.7168, "step": 47980 }, { "epoch": 0.19388567249926267, "grad_norm": 1639.8275146484375, "learning_rate": 4.8723001882868604e-05, "loss": 103.2546, "step": 47990 }, { "epoch": 0.1939260737646303, "grad_norm": 633.078857421875, "learning_rate": 4.8721900291112415e-05, "loss": 98.4941, "step": 48000 }, { "epoch": 0.19396647502999795, "grad_norm": 766.0423583984375, "learning_rate": 4.872079823688557e-05, "loss": 51.5216, "step": 48010 }, { "epoch": 0.19400687629536556, "grad_norm": 755.6162719726562, "learning_rate": 4.871969572020955e-05, "loss": 76.6551, "step": 48020 }, { "epoch": 0.1940472775607332, "grad_norm": 8129.7333984375, "learning_rate": 4.871859274110585e-05, "loss": 132.5075, "step": 48030 }, { "epoch": 0.19408767882610084, "grad_norm": 1120.7578125, "learning_rate": 4.871748929959598e-05, "loss": 87.0844, "step": 48040 }, { "epoch": 0.19412808009146845, "grad_norm": 651.3961181640625, "learning_rate": 4.8716385395701435e-05, "loss": 61.7545, "step": 48050 }, { "epoch": 0.1941684813568361, "grad_norm": 646.9969482421875, "learning_rate": 4.871528102944376e-05, "loss": 100.9905, "step": 48060 }, { "epoch": 0.19420888262220373, "grad_norm": 478.8792419433594, "learning_rate": 4.8714176200844464e-05, "loss": 70.1561, "step": 48070 }, { "epoch": 0.19424928388757137, "grad_norm": 617.067626953125, "learning_rate": 4.8713070909925094e-05, "loss": 87.2428, "step": 48080 }, { "epoch": 0.19428968515293898, "grad_norm": 773.7765502929688, "learning_rate": 4.8711965156707195e-05, "loss": 78.5379, "step": 48090 }, { "epoch": 0.19433008641830662, "grad_norm": 1457.695068359375, "learning_rate": 4.871085894121233e-05, "loss": 106.1761, "step": 48100 }, { "epoch": 0.19437048768367426, "grad_norm": 1464.89013671875, "learning_rate": 4.8709752263462064e-05, "loss": 59.2071, "step": 48110 }, { "epoch": 0.19441088894904188, "grad_norm": 1820.7398681640625, "learning_rate": 4.870864512347797e-05, "loss": 71.2924, "step": 48120 }, { "epoch": 0.19445129021440952, "grad_norm": 596.8287353515625, "learning_rate": 4.8707537521281635e-05, "loss": 60.9779, "step": 48130 }, { "epoch": 0.19449169147977716, "grad_norm": 1118.587890625, "learning_rate": 4.870642945689465e-05, "loss": 97.0064, "step": 48140 }, { "epoch": 0.19453209274514477, "grad_norm": 1756.2308349609375, "learning_rate": 4.8705320930338615e-05, "loss": 63.2261, "step": 48150 }, { "epoch": 0.1945724940105124, "grad_norm": 846.0660400390625, "learning_rate": 4.870421194163515e-05, "loss": 118.8603, "step": 48160 }, { "epoch": 0.19461289527588005, "grad_norm": 1581.7791748046875, "learning_rate": 4.8703102490805865e-05, "loss": 122.0613, "step": 48170 }, { "epoch": 0.19465329654124766, "grad_norm": 1016.5093383789062, "learning_rate": 4.87019925778724e-05, "loss": 60.2842, "step": 48180 }, { "epoch": 0.1946936978066153, "grad_norm": 717.7647094726562, "learning_rate": 4.870088220285638e-05, "loss": 79.8649, "step": 48190 }, { "epoch": 0.19473409907198294, "grad_norm": 1193.3416748046875, "learning_rate": 4.8699771365779453e-05, "loss": 60.5539, "step": 48200 }, { "epoch": 0.19477450033735055, "grad_norm": 2215.272216796875, "learning_rate": 4.8698660066663294e-05, "loss": 87.0759, "step": 48210 }, { "epoch": 0.1948149016027182, "grad_norm": 1735.73828125, "learning_rate": 4.869754830552956e-05, "loss": 96.5737, "step": 48220 }, { "epoch": 0.19485530286808583, "grad_norm": 375.5717468261719, "learning_rate": 4.869643608239991e-05, "loss": 88.8928, "step": 48230 }, { "epoch": 0.19489570413345347, "grad_norm": 612.527587890625, "learning_rate": 4.8695323397296044e-05, "loss": 66.2606, "step": 48240 }, { "epoch": 0.19493610539882109, "grad_norm": 1690.7991943359375, "learning_rate": 4.869421025023965e-05, "loss": 92.1369, "step": 48250 }, { "epoch": 0.19497650666418873, "grad_norm": 1483.668212890625, "learning_rate": 4.8693096641252424e-05, "loss": 65.0718, "step": 48260 }, { "epoch": 0.19501690792955637, "grad_norm": 601.1848754882812, "learning_rate": 4.8691982570356084e-05, "loss": 49.7445, "step": 48270 }, { "epoch": 0.19505730919492398, "grad_norm": 1077.0008544921875, "learning_rate": 4.8690868037572346e-05, "loss": 71.7525, "step": 48280 }, { "epoch": 0.19509771046029162, "grad_norm": 1292.0035400390625, "learning_rate": 4.8689753042922935e-05, "loss": 75.1074, "step": 48290 }, { "epoch": 0.19513811172565926, "grad_norm": 1112.646484375, "learning_rate": 4.8688637586429595e-05, "loss": 82.0207, "step": 48300 }, { "epoch": 0.19517851299102687, "grad_norm": 1545.366943359375, "learning_rate": 4.8687521668114064e-05, "loss": 102.4248, "step": 48310 }, { "epoch": 0.1952189142563945, "grad_norm": 814.1812744140625, "learning_rate": 4.8686405287998116e-05, "loss": 111.8968, "step": 48320 }, { "epoch": 0.19525931552176215, "grad_norm": 632.5703735351562, "learning_rate": 4.8685288446103495e-05, "loss": 78.0891, "step": 48330 }, { "epoch": 0.19529971678712976, "grad_norm": 2291.05859375, "learning_rate": 4.8684171142451986e-05, "loss": 63.0176, "step": 48340 }, { "epoch": 0.1953401180524974, "grad_norm": 954.6275024414062, "learning_rate": 4.8683053377065356e-05, "loss": 113.7088, "step": 48350 }, { "epoch": 0.19538051931786504, "grad_norm": 619.459228515625, "learning_rate": 4.8681935149965416e-05, "loss": 66.4437, "step": 48360 }, { "epoch": 0.19542092058323265, "grad_norm": 981.1046142578125, "learning_rate": 4.868081646117395e-05, "loss": 81.0435, "step": 48370 }, { "epoch": 0.1954613218486003, "grad_norm": 714.146728515625, "learning_rate": 4.867969731071279e-05, "loss": 101.9001, "step": 48380 }, { "epoch": 0.19550172311396793, "grad_norm": 1931.5303955078125, "learning_rate": 4.8678577698603734e-05, "loss": 95.807, "step": 48390 }, { "epoch": 0.19554212437933557, "grad_norm": 3275.3095703125, "learning_rate": 4.867745762486861e-05, "loss": 73.3455, "step": 48400 }, { "epoch": 0.1955825256447032, "grad_norm": 1230.974609375, "learning_rate": 4.867633708952926e-05, "loss": 77.9758, "step": 48410 }, { "epoch": 0.19562292691007083, "grad_norm": 849.453125, "learning_rate": 4.867521609260754e-05, "loss": 73.4305, "step": 48420 }, { "epoch": 0.19566332817543847, "grad_norm": 903.135009765625, "learning_rate": 4.867409463412528e-05, "loss": 56.7117, "step": 48430 }, { "epoch": 0.19570372944080608, "grad_norm": 1139.8895263671875, "learning_rate": 4.8672972714104357e-05, "loss": 67.9619, "step": 48440 }, { "epoch": 0.19574413070617372, "grad_norm": 908.2789916992188, "learning_rate": 4.867185033256665e-05, "loss": 62.4522, "step": 48450 }, { "epoch": 0.19578453197154136, "grad_norm": 1399.5394287109375, "learning_rate": 4.8670727489534034e-05, "loss": 113.5989, "step": 48460 }, { "epoch": 0.19582493323690897, "grad_norm": 2880.655029296875, "learning_rate": 4.8669604185028394e-05, "loss": 107.0346, "step": 48470 }, { "epoch": 0.1958653345022766, "grad_norm": 689.0923461914062, "learning_rate": 4.866848041907164e-05, "loss": 89.6595, "step": 48480 }, { "epoch": 0.19590573576764425, "grad_norm": 2163.55029296875, "learning_rate": 4.866735619168568e-05, "loss": 87.9743, "step": 48490 }, { "epoch": 0.19594613703301186, "grad_norm": 816.2272338867188, "learning_rate": 4.8666231502892415e-05, "loss": 102.1195, "step": 48500 }, { "epoch": 0.1959865382983795, "grad_norm": 3147.00830078125, "learning_rate": 4.866510635271379e-05, "loss": 98.8874, "step": 48510 }, { "epoch": 0.19602693956374714, "grad_norm": 693.9053955078125, "learning_rate": 4.8663980741171724e-05, "loss": 76.9899, "step": 48520 }, { "epoch": 0.19606734082911476, "grad_norm": 833.5950317382812, "learning_rate": 4.866285466828817e-05, "loss": 56.6829, "step": 48530 }, { "epoch": 0.1961077420944824, "grad_norm": 945.19873046875, "learning_rate": 4.86617281340851e-05, "loss": 82.8859, "step": 48540 }, { "epoch": 0.19614814335985004, "grad_norm": 607.2847290039062, "learning_rate": 4.866060113858444e-05, "loss": 71.8742, "step": 48550 }, { "epoch": 0.19618854462521768, "grad_norm": 787.0588989257812, "learning_rate": 4.865947368180818e-05, "loss": 81.9591, "step": 48560 }, { "epoch": 0.1962289458905853, "grad_norm": 406.9506530761719, "learning_rate": 4.865834576377831e-05, "loss": 42.3592, "step": 48570 }, { "epoch": 0.19626934715595293, "grad_norm": 525.7552490234375, "learning_rate": 4.86572173845168e-05, "loss": 109.6562, "step": 48580 }, { "epoch": 0.19630974842132057, "grad_norm": 1523.6407470703125, "learning_rate": 4.865608854404566e-05, "loss": 133.584, "step": 48590 }, { "epoch": 0.19635014968668818, "grad_norm": 620.2904052734375, "learning_rate": 4.8654959242386896e-05, "loss": 72.112, "step": 48600 }, { "epoch": 0.19639055095205582, "grad_norm": 926.3845825195312, "learning_rate": 4.865382947956253e-05, "loss": 106.2173, "step": 48610 }, { "epoch": 0.19643095221742346, "grad_norm": 1164.56982421875, "learning_rate": 4.865269925559457e-05, "loss": 75.1725, "step": 48620 }, { "epoch": 0.19647135348279107, "grad_norm": 946.3316650390625, "learning_rate": 4.865156857050507e-05, "loss": 91.8206, "step": 48630 }, { "epoch": 0.1965117547481587, "grad_norm": 1000.8588256835938, "learning_rate": 4.865043742431605e-05, "loss": 68.0882, "step": 48640 }, { "epoch": 0.19655215601352635, "grad_norm": 1161.449951171875, "learning_rate": 4.8649305817049596e-05, "loss": 70.0598, "step": 48650 }, { "epoch": 0.19659255727889396, "grad_norm": 1116.1414794921875, "learning_rate": 4.864817374872773e-05, "loss": 100.4134, "step": 48660 }, { "epoch": 0.1966329585442616, "grad_norm": 1149.848388671875, "learning_rate": 4.864704121937256e-05, "loss": 59.9185, "step": 48670 }, { "epoch": 0.19667335980962924, "grad_norm": 932.8038940429688, "learning_rate": 4.8645908229006135e-05, "loss": 49.7766, "step": 48680 }, { "epoch": 0.19671376107499686, "grad_norm": 592.9822387695312, "learning_rate": 4.864477477765056e-05, "loss": 46.122, "step": 48690 }, { "epoch": 0.1967541623403645, "grad_norm": 484.1387023925781, "learning_rate": 4.864364086532792e-05, "loss": 60.0866, "step": 48700 }, { "epoch": 0.19679456360573214, "grad_norm": 1089.63818359375, "learning_rate": 4.8642506492060335e-05, "loss": 102.3337, "step": 48710 }, { "epoch": 0.19683496487109975, "grad_norm": 1087.8228759765625, "learning_rate": 4.8641371657869916e-05, "loss": 83.8455, "step": 48720 }, { "epoch": 0.1968753661364674, "grad_norm": 465.7780456542969, "learning_rate": 4.864023636277878e-05, "loss": 58.4958, "step": 48730 }, { "epoch": 0.19691576740183503, "grad_norm": 1844.8414306640625, "learning_rate": 4.863910060680907e-05, "loss": 84.4384, "step": 48740 }, { "epoch": 0.19695616866720267, "grad_norm": 1077.40478515625, "learning_rate": 4.8637964389982926e-05, "loss": 134.2446, "step": 48750 }, { "epoch": 0.19699656993257028, "grad_norm": 755.9237060546875, "learning_rate": 4.863682771232248e-05, "loss": 101.4888, "step": 48760 }, { "epoch": 0.19703697119793792, "grad_norm": 947.1353149414062, "learning_rate": 4.8635690573849926e-05, "loss": 99.0996, "step": 48770 }, { "epoch": 0.19707737246330556, "grad_norm": 758.8486938476562, "learning_rate": 4.8634552974587414e-05, "loss": 65.2204, "step": 48780 }, { "epoch": 0.19711777372867317, "grad_norm": 2005.01025390625, "learning_rate": 4.863341491455712e-05, "loss": 119.6658, "step": 48790 }, { "epoch": 0.1971581749940408, "grad_norm": 980.075439453125, "learning_rate": 4.863227639378124e-05, "loss": 92.1868, "step": 48800 }, { "epoch": 0.19719857625940845, "grad_norm": 511.9769592285156, "learning_rate": 4.8631137412281954e-05, "loss": 54.6814, "step": 48810 }, { "epoch": 0.19723897752477607, "grad_norm": 993.7817993164062, "learning_rate": 4.862999797008149e-05, "loss": 90.118, "step": 48820 }, { "epoch": 0.1972793787901437, "grad_norm": 687.139404296875, "learning_rate": 4.8628858067202045e-05, "loss": 98.846, "step": 48830 }, { "epoch": 0.19731978005551135, "grad_norm": 1727.8497314453125, "learning_rate": 4.862771770366584e-05, "loss": 91.3292, "step": 48840 }, { "epoch": 0.19736018132087896, "grad_norm": 527.6170043945312, "learning_rate": 4.862657687949512e-05, "loss": 84.25, "step": 48850 }, { "epoch": 0.1974005825862466, "grad_norm": 657.8152465820312, "learning_rate": 4.862543559471212e-05, "loss": 49.7685, "step": 48860 }, { "epoch": 0.19744098385161424, "grad_norm": 537.2709350585938, "learning_rate": 4.8624293849339095e-05, "loss": 61.4911, "step": 48870 }, { "epoch": 0.19748138511698185, "grad_norm": 598.13671875, "learning_rate": 4.862315164339829e-05, "loss": 82.7801, "step": 48880 }, { "epoch": 0.1975217863823495, "grad_norm": 480.8949279785156, "learning_rate": 4.862200897691199e-05, "loss": 58.1646, "step": 48890 }, { "epoch": 0.19756218764771713, "grad_norm": 868.3237915039062, "learning_rate": 4.8620865849902456e-05, "loss": 73.8393, "step": 48900 }, { "epoch": 0.19760258891308477, "grad_norm": 451.89483642578125, "learning_rate": 4.861972226239199e-05, "loss": 63.7517, "step": 48910 }, { "epoch": 0.19764299017845238, "grad_norm": 598.8991088867188, "learning_rate": 4.861857821440287e-05, "loss": 76.1932, "step": 48920 }, { "epoch": 0.19768339144382002, "grad_norm": 717.087646484375, "learning_rate": 4.861743370595741e-05, "loss": 68.1542, "step": 48930 }, { "epoch": 0.19772379270918766, "grad_norm": 1464.67333984375, "learning_rate": 4.861628873707792e-05, "loss": 86.0041, "step": 48940 }, { "epoch": 0.19776419397455527, "grad_norm": 442.174560546875, "learning_rate": 4.861514330778673e-05, "loss": 81.9746, "step": 48950 }, { "epoch": 0.19780459523992291, "grad_norm": 1277.0028076171875, "learning_rate": 4.861399741810615e-05, "loss": 87.9091, "step": 48960 }, { "epoch": 0.19784499650529055, "grad_norm": 485.0671081542969, "learning_rate": 4.8612851068058544e-05, "loss": 57.811, "step": 48970 }, { "epoch": 0.19788539777065817, "grad_norm": 1858.374267578125, "learning_rate": 4.861170425766625e-05, "loss": 95.383, "step": 48980 }, { "epoch": 0.1979257990360258, "grad_norm": 944.0939331054688, "learning_rate": 4.861055698695162e-05, "loss": 104.2803, "step": 48990 }, { "epoch": 0.19796620030139345, "grad_norm": 970.7718505859375, "learning_rate": 4.860940925593703e-05, "loss": 100.1417, "step": 49000 }, { "epoch": 0.19800660156676106, "grad_norm": 1792.8084716796875, "learning_rate": 4.860826106464484e-05, "loss": 86.6734, "step": 49010 }, { "epoch": 0.1980470028321287, "grad_norm": 1215.9122314453125, "learning_rate": 4.8607112413097464e-05, "loss": 88.0753, "step": 49020 }, { "epoch": 0.19808740409749634, "grad_norm": 484.51873779296875, "learning_rate": 4.860596330131727e-05, "loss": 78.6937, "step": 49030 }, { "epoch": 0.19812780536286395, "grad_norm": 945.8279418945312, "learning_rate": 4.860481372932667e-05, "loss": 69.6853, "step": 49040 }, { "epoch": 0.1981682066282316, "grad_norm": 507.302978515625, "learning_rate": 4.860366369714807e-05, "loss": 91.7122, "step": 49050 }, { "epoch": 0.19820860789359923, "grad_norm": 921.6329956054688, "learning_rate": 4.8602513204803896e-05, "loss": 103.2615, "step": 49060 }, { "epoch": 0.19824900915896687, "grad_norm": 596.2329711914062, "learning_rate": 4.8601362252316574e-05, "loss": 103.5869, "step": 49070 }, { "epoch": 0.19828941042433448, "grad_norm": 1672.80615234375, "learning_rate": 4.860021083970855e-05, "loss": 83.6912, "step": 49080 }, { "epoch": 0.19832981168970212, "grad_norm": 271.271728515625, "learning_rate": 4.8599058967002254e-05, "loss": 74.6612, "step": 49090 }, { "epoch": 0.19837021295506976, "grad_norm": 497.50048828125, "learning_rate": 4.859790663422016e-05, "loss": 69.9331, "step": 49100 }, { "epoch": 0.19841061422043738, "grad_norm": 573.849365234375, "learning_rate": 4.8596753841384735e-05, "loss": 54.5267, "step": 49110 }, { "epoch": 0.19845101548580502, "grad_norm": 1481.6771240234375, "learning_rate": 4.859560058851844e-05, "loss": 63.2541, "step": 49120 }, { "epoch": 0.19849141675117266, "grad_norm": 3614.8828125, "learning_rate": 4.859444687564376e-05, "loss": 98.0111, "step": 49130 }, { "epoch": 0.19853181801654027, "grad_norm": 1466.5594482421875, "learning_rate": 4.859329270278319e-05, "loss": 67.2758, "step": 49140 }, { "epoch": 0.1985722192819079, "grad_norm": 611.2523803710938, "learning_rate": 4.859213806995924e-05, "loss": 99.23, "step": 49150 }, { "epoch": 0.19861262054727555, "grad_norm": 791.8298950195312, "learning_rate": 4.85909829771944e-05, "loss": 92.561, "step": 49160 }, { "epoch": 0.19865302181264316, "grad_norm": 1123.1046142578125, "learning_rate": 4.8589827424511216e-05, "loss": 90.6359, "step": 49170 }, { "epoch": 0.1986934230780108, "grad_norm": 0.0, "learning_rate": 4.858867141193219e-05, "loss": 75.7769, "step": 49180 }, { "epoch": 0.19873382434337844, "grad_norm": 654.7210693359375, "learning_rate": 4.858751493947987e-05, "loss": 39.3021, "step": 49190 }, { "epoch": 0.19877422560874605, "grad_norm": 1822.922607421875, "learning_rate": 4.858635800717681e-05, "loss": 112.5853, "step": 49200 }, { "epoch": 0.1988146268741137, "grad_norm": 422.10791015625, "learning_rate": 4.8585200615045555e-05, "loss": 66.1211, "step": 49210 }, { "epoch": 0.19885502813948133, "grad_norm": 1020.6856689453125, "learning_rate": 4.8584042763108675e-05, "loss": 70.5579, "step": 49220 }, { "epoch": 0.19889542940484897, "grad_norm": 628.3194580078125, "learning_rate": 4.858288445138873e-05, "loss": 110.4609, "step": 49230 }, { "epoch": 0.19893583067021658, "grad_norm": 1215.436767578125, "learning_rate": 4.8581725679908317e-05, "loss": 145.442, "step": 49240 }, { "epoch": 0.19897623193558422, "grad_norm": 255.2740478515625, "learning_rate": 4.858056644869002e-05, "loss": 74.1351, "step": 49250 }, { "epoch": 0.19901663320095186, "grad_norm": 686.521484375, "learning_rate": 4.8579406757756455e-05, "loss": 57.5146, "step": 49260 }, { "epoch": 0.19905703446631948, "grad_norm": 452.3636169433594, "learning_rate": 4.85782466071302e-05, "loss": 68.5748, "step": 49270 }, { "epoch": 0.19909743573168712, "grad_norm": 2242.72412109375, "learning_rate": 4.857708599683389e-05, "loss": 86.5858, "step": 49280 }, { "epoch": 0.19913783699705476, "grad_norm": 625.6463623046875, "learning_rate": 4.8575924926890145e-05, "loss": 50.7092, "step": 49290 }, { "epoch": 0.19917823826242237, "grad_norm": 558.0465698242188, "learning_rate": 4.8574763397321614e-05, "loss": 53.2504, "step": 49300 }, { "epoch": 0.19921863952779, "grad_norm": 932.68212890625, "learning_rate": 4.857360140815093e-05, "loss": 65.5452, "step": 49310 }, { "epoch": 0.19925904079315765, "grad_norm": 965.7637939453125, "learning_rate": 4.857243895940076e-05, "loss": 68.6537, "step": 49320 }, { "epoch": 0.19929944205852526, "grad_norm": 676.4684448242188, "learning_rate": 4.857127605109374e-05, "loss": 80.4764, "step": 49330 }, { "epoch": 0.1993398433238929, "grad_norm": 499.67083740234375, "learning_rate": 4.8570112683252565e-05, "loss": 96.8114, "step": 49340 }, { "epoch": 0.19938024458926054, "grad_norm": 1086.6888427734375, "learning_rate": 4.856894885589991e-05, "loss": 98.3765, "step": 49350 }, { "epoch": 0.19942064585462815, "grad_norm": 626.739990234375, "learning_rate": 4.856778456905846e-05, "loss": 64.8752, "step": 49360 }, { "epoch": 0.1994610471199958, "grad_norm": 464.48565673828125, "learning_rate": 4.856661982275093e-05, "loss": 52.9684, "step": 49370 }, { "epoch": 0.19950144838536343, "grad_norm": 536.5781860351562, "learning_rate": 4.8565454617e-05, "loss": 71.4869, "step": 49380 }, { "epoch": 0.19954184965073107, "grad_norm": 3326.29833984375, "learning_rate": 4.85642889518284e-05, "loss": 165.3745, "step": 49390 }, { "epoch": 0.19958225091609869, "grad_norm": 2225.459716796875, "learning_rate": 4.856312282725886e-05, "loss": 92.2177, "step": 49400 }, { "epoch": 0.19962265218146633, "grad_norm": 611.5123901367188, "learning_rate": 4.85619562433141e-05, "loss": 53.4561, "step": 49410 }, { "epoch": 0.19966305344683397, "grad_norm": 763.9044189453125, "learning_rate": 4.8560789200016884e-05, "loss": 65.2781, "step": 49420 }, { "epoch": 0.19970345471220158, "grad_norm": 1733.5440673828125, "learning_rate": 4.8559621697389946e-05, "loss": 149.0616, "step": 49430 }, { "epoch": 0.19974385597756922, "grad_norm": 411.6968688964844, "learning_rate": 4.855845373545605e-05, "loss": 71.7242, "step": 49440 }, { "epoch": 0.19978425724293686, "grad_norm": 258.44677734375, "learning_rate": 4.855728531423798e-05, "loss": 71.4265, "step": 49450 }, { "epoch": 0.19982465850830447, "grad_norm": 1239.368896484375, "learning_rate": 4.85561164337585e-05, "loss": 75.7033, "step": 49460 }, { "epoch": 0.1998650597736721, "grad_norm": 1232.6944580078125, "learning_rate": 4.85549470940404e-05, "loss": 54.9064, "step": 49470 }, { "epoch": 0.19990546103903975, "grad_norm": 698.7216796875, "learning_rate": 4.855377729510648e-05, "loss": 64.2161, "step": 49480 }, { "epoch": 0.19994586230440736, "grad_norm": 590.6932983398438, "learning_rate": 4.8552607036979553e-05, "loss": 59.4658, "step": 49490 }, { "epoch": 0.199986263569775, "grad_norm": 1102.7227783203125, "learning_rate": 4.855143631968242e-05, "loss": 64.1912, "step": 49500 }, { "epoch": 0.20002666483514264, "grad_norm": 2396.97119140625, "learning_rate": 4.855026514323792e-05, "loss": 101.3902, "step": 49510 }, { "epoch": 0.20006706610051025, "grad_norm": 622.559326171875, "learning_rate": 4.8549093507668865e-05, "loss": 91.4989, "step": 49520 }, { "epoch": 0.2001074673658779, "grad_norm": 1117.0147705078125, "learning_rate": 4.854792141299811e-05, "loss": 52.4889, "step": 49530 }, { "epoch": 0.20014786863124553, "grad_norm": 726.4915771484375, "learning_rate": 4.85467488592485e-05, "loss": 69.4123, "step": 49540 }, { "epoch": 0.20018826989661317, "grad_norm": 563.3633422851562, "learning_rate": 4.85455758464429e-05, "loss": 79.5041, "step": 49550 }, { "epoch": 0.2002286711619808, "grad_norm": 1249.004150390625, "learning_rate": 4.854440237460418e-05, "loss": 113.2019, "step": 49560 }, { "epoch": 0.20026907242734843, "grad_norm": 1637.2099609375, "learning_rate": 4.854322844375522e-05, "loss": 94.2531, "step": 49570 }, { "epoch": 0.20030947369271607, "grad_norm": 916.0922241210938, "learning_rate": 4.85420540539189e-05, "loss": 74.7405, "step": 49580 }, { "epoch": 0.20034987495808368, "grad_norm": 660.0616455078125, "learning_rate": 4.8540879205118106e-05, "loss": 63.3157, "step": 49590 }, { "epoch": 0.20039027622345132, "grad_norm": 1970.4896240234375, "learning_rate": 4.8539703897375755e-05, "loss": 93.7624, "step": 49600 }, { "epoch": 0.20043067748881896, "grad_norm": 765.5314331054688, "learning_rate": 4.853852813071476e-05, "loss": 83.952, "step": 49610 }, { "epoch": 0.20047107875418657, "grad_norm": 883.2976684570312, "learning_rate": 4.853735190515804e-05, "loss": 100.3142, "step": 49620 }, { "epoch": 0.2005114800195542, "grad_norm": 1285.146240234375, "learning_rate": 4.853617522072853e-05, "loss": 84.2881, "step": 49630 }, { "epoch": 0.20055188128492185, "grad_norm": 561.2415771484375, "learning_rate": 4.853499807744916e-05, "loss": 67.512, "step": 49640 }, { "epoch": 0.20059228255028946, "grad_norm": 759.5241088867188, "learning_rate": 4.85338204753429e-05, "loss": 70.0843, "step": 49650 }, { "epoch": 0.2006326838156571, "grad_norm": 1011.6978759765625, "learning_rate": 4.8532642414432674e-05, "loss": 91.1174, "step": 49660 }, { "epoch": 0.20067308508102474, "grad_norm": 710.1160888671875, "learning_rate": 4.853146389474148e-05, "loss": 78.4036, "step": 49670 }, { "epoch": 0.20071348634639236, "grad_norm": 1832.636474609375, "learning_rate": 4.853028491629228e-05, "loss": 69.5313, "step": 49680 }, { "epoch": 0.20075388761176, "grad_norm": 630.8060913085938, "learning_rate": 4.852910547910806e-05, "loss": 72.0492, "step": 49690 }, { "epoch": 0.20079428887712764, "grad_norm": 891.0122680664062, "learning_rate": 4.852792558321182e-05, "loss": 88.1902, "step": 49700 }, { "epoch": 0.20083469014249528, "grad_norm": 493.8849182128906, "learning_rate": 4.852674522862656e-05, "loss": 82.2126, "step": 49710 }, { "epoch": 0.2008750914078629, "grad_norm": 916.6065063476562, "learning_rate": 4.852556441537528e-05, "loss": 73.7906, "step": 49720 }, { "epoch": 0.20091549267323053, "grad_norm": 1729.7864990234375, "learning_rate": 4.852438314348101e-05, "loss": 98.6484, "step": 49730 }, { "epoch": 0.20095589393859817, "grad_norm": 971.2913818359375, "learning_rate": 4.852320141296679e-05, "loss": 74.1662, "step": 49740 }, { "epoch": 0.20099629520396578, "grad_norm": 673.4738159179688, "learning_rate": 4.852201922385564e-05, "loss": 87.5311, "step": 49750 }, { "epoch": 0.20103669646933342, "grad_norm": 700.3085327148438, "learning_rate": 4.852083657617061e-05, "loss": 119.7827, "step": 49760 }, { "epoch": 0.20107709773470106, "grad_norm": 574.7796630859375, "learning_rate": 4.851965346993478e-05, "loss": 118.024, "step": 49770 }, { "epoch": 0.20111749900006867, "grad_norm": 1227.26904296875, "learning_rate": 4.851846990517118e-05, "loss": 96.7121, "step": 49780 }, { "epoch": 0.2011579002654363, "grad_norm": 1115.0523681640625, "learning_rate": 4.8517285881902904e-05, "loss": 69.7559, "step": 49790 }, { "epoch": 0.20119830153080395, "grad_norm": 820.30126953125, "learning_rate": 4.851610140015304e-05, "loss": 94.3278, "step": 49800 }, { "epoch": 0.20123870279617156, "grad_norm": 3716.187744140625, "learning_rate": 4.8514916459944666e-05, "loss": 136.7065, "step": 49810 }, { "epoch": 0.2012791040615392, "grad_norm": 1158.3758544921875, "learning_rate": 4.8513731061300887e-05, "loss": 88.3688, "step": 49820 }, { "epoch": 0.20131950532690684, "grad_norm": 2444.747314453125, "learning_rate": 4.851254520424482e-05, "loss": 58.1214, "step": 49830 }, { "epoch": 0.20135990659227446, "grad_norm": 594.2299194335938, "learning_rate": 4.851135888879958e-05, "loss": 50.3362, "step": 49840 }, { "epoch": 0.2014003078576421, "grad_norm": 422.1976318359375, "learning_rate": 4.851017211498829e-05, "loss": 114.6325, "step": 49850 }, { "epoch": 0.20144070912300974, "grad_norm": 348.53741455078125, "learning_rate": 4.85089848828341e-05, "loss": 81.1028, "step": 49860 }, { "epoch": 0.20148111038837738, "grad_norm": 629.5589599609375, "learning_rate": 4.8507797192360134e-05, "loss": 47.3838, "step": 49870 }, { "epoch": 0.201521511653745, "grad_norm": 719.5811157226562, "learning_rate": 4.850660904358956e-05, "loss": 57.1744, "step": 49880 }, { "epoch": 0.20156191291911263, "grad_norm": 497.17913818359375, "learning_rate": 4.850542043654555e-05, "loss": 75.8821, "step": 49890 }, { "epoch": 0.20160231418448027, "grad_norm": 569.685546875, "learning_rate": 4.8504231371251255e-05, "loss": 59.8405, "step": 49900 }, { "epoch": 0.20164271544984788, "grad_norm": 503.3626708984375, "learning_rate": 4.850304184772988e-05, "loss": 64.7331, "step": 49910 }, { "epoch": 0.20168311671521552, "grad_norm": 444.8017883300781, "learning_rate": 4.85018518660046e-05, "loss": 132.0521, "step": 49920 }, { "epoch": 0.20172351798058316, "grad_norm": 557.6558227539062, "learning_rate": 4.850066142609862e-05, "loss": 66.8273, "step": 49930 }, { "epoch": 0.20176391924595077, "grad_norm": 625.9427490234375, "learning_rate": 4.849947052803514e-05, "loss": 97.3501, "step": 49940 }, { "epoch": 0.2018043205113184, "grad_norm": 413.3929748535156, "learning_rate": 4.849827917183739e-05, "loss": 104.1396, "step": 49950 }, { "epoch": 0.20184472177668605, "grad_norm": 1487.6632080078125, "learning_rate": 4.849708735752859e-05, "loss": 84.6355, "step": 49960 }, { "epoch": 0.20188512304205367, "grad_norm": 399.767578125, "learning_rate": 4.849589508513197e-05, "loss": 71.0594, "step": 49970 }, { "epoch": 0.2019255243074213, "grad_norm": 739.0697631835938, "learning_rate": 4.849470235467078e-05, "loss": 91.7866, "step": 49980 }, { "epoch": 0.20196592557278895, "grad_norm": 962.0905151367188, "learning_rate": 4.849350916616827e-05, "loss": 142.8859, "step": 49990 }, { "epoch": 0.20200632683815656, "grad_norm": 556.9208374023438, "learning_rate": 4.849231551964771e-05, "loss": 69.2205, "step": 50000 }, { "epoch": 0.2020467281035242, "grad_norm": 994.56298828125, "learning_rate": 4.849112141513236e-05, "loss": 98.4823, "step": 50010 }, { "epoch": 0.20208712936889184, "grad_norm": 446.2982482910156, "learning_rate": 4.8489926852645505e-05, "loss": 91.0375, "step": 50020 }, { "epoch": 0.20212753063425948, "grad_norm": 2271.33349609375, "learning_rate": 4.848873183221043e-05, "loss": 89.0778, "step": 50030 }, { "epoch": 0.2021679318996271, "grad_norm": 1082.55810546875, "learning_rate": 4.8487536353850444e-05, "loss": 109.4579, "step": 50040 }, { "epoch": 0.20220833316499473, "grad_norm": 2170.5400390625, "learning_rate": 4.8486340417588835e-05, "loss": 84.2964, "step": 50050 }, { "epoch": 0.20224873443036237, "grad_norm": 454.11248779296875, "learning_rate": 4.8485144023448936e-05, "loss": 62.0007, "step": 50060 }, { "epoch": 0.20228913569572998, "grad_norm": 494.5736083984375, "learning_rate": 4.848394717145406e-05, "loss": 87.3821, "step": 50070 }, { "epoch": 0.20232953696109762, "grad_norm": 1059.5152587890625, "learning_rate": 4.848274986162754e-05, "loss": 68.3891, "step": 50080 }, { "epoch": 0.20236993822646526, "grad_norm": 778.6331176757812, "learning_rate": 4.848155209399272e-05, "loss": 72.0611, "step": 50090 }, { "epoch": 0.20241033949183287, "grad_norm": 819.3021240234375, "learning_rate": 4.848035386857296e-05, "loss": 67.9465, "step": 50100 }, { "epoch": 0.20245074075720051, "grad_norm": 1114.4420166015625, "learning_rate": 4.847915518539161e-05, "loss": 75.596, "step": 50110 }, { "epoch": 0.20249114202256815, "grad_norm": 1062.3887939453125, "learning_rate": 4.847795604447204e-05, "loss": 102.3436, "step": 50120 }, { "epoch": 0.20253154328793577, "grad_norm": 1471.7073974609375, "learning_rate": 4.847675644583764e-05, "loss": 129.1764, "step": 50130 }, { "epoch": 0.2025719445533034, "grad_norm": 1061.678955078125, "learning_rate": 4.847555638951177e-05, "loss": 84.9815, "step": 50140 }, { "epoch": 0.20261234581867105, "grad_norm": 1374.27001953125, "learning_rate": 4.8474355875517854e-05, "loss": 88.13, "step": 50150 }, { "epoch": 0.20265274708403866, "grad_norm": 1075.933837890625, "learning_rate": 4.8473154903879276e-05, "loss": 94.9892, "step": 50160 }, { "epoch": 0.2026931483494063, "grad_norm": 314.6831359863281, "learning_rate": 4.8471953474619466e-05, "loss": 83.2022, "step": 50170 }, { "epoch": 0.20273354961477394, "grad_norm": 1138.607421875, "learning_rate": 4.847075158776183e-05, "loss": 78.0873, "step": 50180 }, { "epoch": 0.20277395088014158, "grad_norm": 561.6530151367188, "learning_rate": 4.846954924332981e-05, "loss": 57.0055, "step": 50190 }, { "epoch": 0.2028143521455092, "grad_norm": 1306.6680908203125, "learning_rate": 4.846834644134686e-05, "loss": 69.6428, "step": 50200 }, { "epoch": 0.20285475341087683, "grad_norm": 418.66046142578125, "learning_rate": 4.846714318183639e-05, "loss": 87.8257, "step": 50210 }, { "epoch": 0.20289515467624447, "grad_norm": 1192.7034912109375, "learning_rate": 4.84659394648219e-05, "loss": 93.4679, "step": 50220 }, { "epoch": 0.20293555594161208, "grad_norm": 1370.776611328125, "learning_rate": 4.846473529032684e-05, "loss": 112.6145, "step": 50230 }, { "epoch": 0.20297595720697972, "grad_norm": 555.2098388671875, "learning_rate": 4.846353065837467e-05, "loss": 104.6596, "step": 50240 }, { "epoch": 0.20301635847234736, "grad_norm": 714.0413208007812, "learning_rate": 4.84623255689889e-05, "loss": 63.1508, "step": 50250 }, { "epoch": 0.20305675973771498, "grad_norm": 779.4690551757812, "learning_rate": 4.846112002219301e-05, "loss": 77.2726, "step": 50260 }, { "epoch": 0.20309716100308262, "grad_norm": 857.3240966796875, "learning_rate": 4.845991401801051e-05, "loss": 86.2318, "step": 50270 }, { "epoch": 0.20313756226845026, "grad_norm": 700.0541381835938, "learning_rate": 4.845870755646491e-05, "loss": 72.9242, "step": 50280 }, { "epoch": 0.20317796353381787, "grad_norm": 357.010009765625, "learning_rate": 4.8457500637579726e-05, "loss": 53.2458, "step": 50290 }, { "epoch": 0.2032183647991855, "grad_norm": 1076.287841796875, "learning_rate": 4.845629326137849e-05, "loss": 60.6979, "step": 50300 }, { "epoch": 0.20325876606455315, "grad_norm": 1700.1820068359375, "learning_rate": 4.845508542788474e-05, "loss": 92.155, "step": 50310 }, { "epoch": 0.20329916732992076, "grad_norm": 800.1776123046875, "learning_rate": 4.845387713712203e-05, "loss": 89.9958, "step": 50320 }, { "epoch": 0.2033395685952884, "grad_norm": 953.5119018554688, "learning_rate": 4.8452668389113895e-05, "loss": 103.765, "step": 50330 }, { "epoch": 0.20337996986065604, "grad_norm": 558.611083984375, "learning_rate": 4.845145918388393e-05, "loss": 48.235, "step": 50340 }, { "epoch": 0.20342037112602368, "grad_norm": 106.23624420166016, "learning_rate": 4.8450249521455695e-05, "loss": 87.897, "step": 50350 }, { "epoch": 0.2034607723913913, "grad_norm": 1008.41748046875, "learning_rate": 4.844903940185276e-05, "loss": 55.0348, "step": 50360 }, { "epoch": 0.20350117365675893, "grad_norm": 1108.453369140625, "learning_rate": 4.844782882509874e-05, "loss": 79.4438, "step": 50370 }, { "epoch": 0.20354157492212657, "grad_norm": 985.6517944335938, "learning_rate": 4.844661779121723e-05, "loss": 61.7789, "step": 50380 }, { "epoch": 0.20358197618749418, "grad_norm": 863.3707885742188, "learning_rate": 4.844540630023182e-05, "loss": 75.1732, "step": 50390 }, { "epoch": 0.20362237745286182, "grad_norm": 843.4656372070312, "learning_rate": 4.844419435216615e-05, "loss": 61.1795, "step": 50400 }, { "epoch": 0.20366277871822946, "grad_norm": 739.8035888671875, "learning_rate": 4.844298194704384e-05, "loss": 89.2932, "step": 50410 }, { "epoch": 0.20370317998359708, "grad_norm": 455.288818359375, "learning_rate": 4.8441769084888534e-05, "loss": 58.3124, "step": 50420 }, { "epoch": 0.20374358124896472, "grad_norm": 1115.42333984375, "learning_rate": 4.844055576572387e-05, "loss": 70.2969, "step": 50430 }, { "epoch": 0.20378398251433236, "grad_norm": 1393.1253662109375, "learning_rate": 4.84393419895735e-05, "loss": 89.79, "step": 50440 }, { "epoch": 0.20382438377969997, "grad_norm": 1216.634033203125, "learning_rate": 4.84381277564611e-05, "loss": 72.9308, "step": 50450 }, { "epoch": 0.2038647850450676, "grad_norm": 548.3377075195312, "learning_rate": 4.8436913066410316e-05, "loss": 73.1693, "step": 50460 }, { "epoch": 0.20390518631043525, "grad_norm": 1117.44091796875, "learning_rate": 4.843569791944486e-05, "loss": 121.1625, "step": 50470 }, { "epoch": 0.20394558757580286, "grad_norm": 1097.303955078125, "learning_rate": 4.843448231558839e-05, "loss": 103.4902, "step": 50480 }, { "epoch": 0.2039859888411705, "grad_norm": 797.4445190429688, "learning_rate": 4.843326625486464e-05, "loss": 78.5437, "step": 50490 }, { "epoch": 0.20402639010653814, "grad_norm": 876.6442260742188, "learning_rate": 4.843204973729729e-05, "loss": 72.4397, "step": 50500 }, { "epoch": 0.20406679137190578, "grad_norm": 696.5914306640625, "learning_rate": 4.843083276291007e-05, "loss": 75.2865, "step": 50510 }, { "epoch": 0.2041071926372734, "grad_norm": 202.0533447265625, "learning_rate": 4.84296153317267e-05, "loss": 42.6595, "step": 50520 }, { "epoch": 0.20414759390264103, "grad_norm": 711.9306030273438, "learning_rate": 4.8428397443770926e-05, "loss": 76.4253, "step": 50530 }, { "epoch": 0.20418799516800867, "grad_norm": 1101.085205078125, "learning_rate": 4.842717909906647e-05, "loss": 91.274, "step": 50540 }, { "epoch": 0.20422839643337629, "grad_norm": 934.9185180664062, "learning_rate": 4.84259602976371e-05, "loss": 99.1536, "step": 50550 }, { "epoch": 0.20426879769874393, "grad_norm": 1069.0614013671875, "learning_rate": 4.8424741039506575e-05, "loss": 76.847, "step": 50560 }, { "epoch": 0.20430919896411157, "grad_norm": 5626.541015625, "learning_rate": 4.842352132469867e-05, "loss": 85.2173, "step": 50570 }, { "epoch": 0.20434960022947918, "grad_norm": 467.4293212890625, "learning_rate": 4.8422301153237145e-05, "loss": 57.4749, "step": 50580 }, { "epoch": 0.20439000149484682, "grad_norm": 721.87353515625, "learning_rate": 4.842108052514581e-05, "loss": 67.7757, "step": 50590 }, { "epoch": 0.20443040276021446, "grad_norm": 1129.6400146484375, "learning_rate": 4.841985944044845e-05, "loss": 69.0111, "step": 50600 }, { "epoch": 0.20447080402558207, "grad_norm": 566.9580688476562, "learning_rate": 4.8418637899168874e-05, "loss": 96.5319, "step": 50610 }, { "epoch": 0.2045112052909497, "grad_norm": 1207.4417724609375, "learning_rate": 4.8417415901330886e-05, "loss": 86.0825, "step": 50620 }, { "epoch": 0.20455160655631735, "grad_norm": 912.6641235351562, "learning_rate": 4.841619344695833e-05, "loss": 75.0874, "step": 50630 }, { "epoch": 0.20459200782168496, "grad_norm": 619.9535522460938, "learning_rate": 4.8414970536075024e-05, "loss": 82.5721, "step": 50640 }, { "epoch": 0.2046324090870526, "grad_norm": 522.5993041992188, "learning_rate": 4.841374716870481e-05, "loss": 95.127, "step": 50650 }, { "epoch": 0.20467281035242024, "grad_norm": 368.2784729003906, "learning_rate": 4.841252334487154e-05, "loss": 61.5005, "step": 50660 }, { "epoch": 0.20471321161778788, "grad_norm": 1060.747802734375, "learning_rate": 4.841129906459908e-05, "loss": 69.1981, "step": 50670 }, { "epoch": 0.2047536128831555, "grad_norm": 1138.5040283203125, "learning_rate": 4.841007432791129e-05, "loss": 56.1066, "step": 50680 }, { "epoch": 0.20479401414852313, "grad_norm": 1501.6522216796875, "learning_rate": 4.840884913483204e-05, "loss": 108.5628, "step": 50690 }, { "epoch": 0.20483441541389077, "grad_norm": 1450.2125244140625, "learning_rate": 4.8407623485385234e-05, "loss": 89.9987, "step": 50700 }, { "epoch": 0.2048748166792584, "grad_norm": 503.9439697265625, "learning_rate": 4.840639737959476e-05, "loss": 69.8462, "step": 50710 }, { "epoch": 0.20491521794462603, "grad_norm": 702.1473388671875, "learning_rate": 4.8405170817484515e-05, "loss": 102.1393, "step": 50720 }, { "epoch": 0.20495561920999367, "grad_norm": 557.0831909179688, "learning_rate": 4.840394379907841e-05, "loss": 70.9738, "step": 50730 }, { "epoch": 0.20499602047536128, "grad_norm": 905.9644775390625, "learning_rate": 4.840271632440038e-05, "loss": 63.7002, "step": 50740 }, { "epoch": 0.20503642174072892, "grad_norm": 1401.463623046875, "learning_rate": 4.840148839347434e-05, "loss": 82.3691, "step": 50750 }, { "epoch": 0.20507682300609656, "grad_norm": 457.6974182128906, "learning_rate": 4.8400260006324235e-05, "loss": 64.2692, "step": 50760 }, { "epoch": 0.20511722427146417, "grad_norm": 1235.328857421875, "learning_rate": 4.839903116297401e-05, "loss": 76.285, "step": 50770 }, { "epoch": 0.2051576255368318, "grad_norm": 1098.241943359375, "learning_rate": 4.8397801863447635e-05, "loss": 98.041, "step": 50780 }, { "epoch": 0.20519802680219945, "grad_norm": 1030.691650390625, "learning_rate": 4.8396572107769066e-05, "loss": 81.4219, "step": 50790 }, { "epoch": 0.20523842806756706, "grad_norm": 546.6036987304688, "learning_rate": 4.839534189596228e-05, "loss": 62.711, "step": 50800 }, { "epoch": 0.2052788293329347, "grad_norm": 595.7460327148438, "learning_rate": 4.839411122805125e-05, "loss": 78.6366, "step": 50810 }, { "epoch": 0.20531923059830234, "grad_norm": 435.6410217285156, "learning_rate": 4.839288010405998e-05, "loss": 54.1102, "step": 50820 }, { "epoch": 0.20535963186366998, "grad_norm": 756.9534301757812, "learning_rate": 4.839164852401247e-05, "loss": 55.358, "step": 50830 }, { "epoch": 0.2054000331290376, "grad_norm": 1443.138671875, "learning_rate": 4.8390416487932733e-05, "loss": 76.8449, "step": 50840 }, { "epoch": 0.20544043439440524, "grad_norm": 666.9951171875, "learning_rate": 4.8389183995844785e-05, "loss": 98.6886, "step": 50850 }, { "epoch": 0.20548083565977288, "grad_norm": 720.92919921875, "learning_rate": 4.838795104777265e-05, "loss": 101.151, "step": 50860 }, { "epoch": 0.2055212369251405, "grad_norm": 929.8797607421875, "learning_rate": 4.8386717643740366e-05, "loss": 87.1545, "step": 50870 }, { "epoch": 0.20556163819050813, "grad_norm": 1356.6619873046875, "learning_rate": 4.8385483783771986e-05, "loss": 92.7927, "step": 50880 }, { "epoch": 0.20560203945587577, "grad_norm": 2006.3673095703125, "learning_rate": 4.838424946789156e-05, "loss": 75.9321, "step": 50890 }, { "epoch": 0.20564244072124338, "grad_norm": 741.2835693359375, "learning_rate": 4.8383014696123144e-05, "loss": 74.4236, "step": 50900 }, { "epoch": 0.20568284198661102, "grad_norm": 719.8021850585938, "learning_rate": 4.838177946849083e-05, "loss": 100.4028, "step": 50910 }, { "epoch": 0.20572324325197866, "grad_norm": 410.4776306152344, "learning_rate": 4.8380543785018677e-05, "loss": 78.5173, "step": 50920 }, { "epoch": 0.20576364451734627, "grad_norm": 563.4188842773438, "learning_rate": 4.8379307645730795e-05, "loss": 116.8776, "step": 50930 }, { "epoch": 0.2058040457827139, "grad_norm": 882.4771728515625, "learning_rate": 4.837807105065127e-05, "loss": 63.1256, "step": 50940 }, { "epoch": 0.20584444704808155, "grad_norm": 576.0557861328125, "learning_rate": 4.837683399980421e-05, "loss": 100.8891, "step": 50950 }, { "epoch": 0.20588484831344916, "grad_norm": 2097.596923828125, "learning_rate": 4.837559649321374e-05, "loss": 107.397, "step": 50960 }, { "epoch": 0.2059252495788168, "grad_norm": 1427.4742431640625, "learning_rate": 4.837435853090398e-05, "loss": 68.9581, "step": 50970 }, { "epoch": 0.20596565084418444, "grad_norm": 1769.10107421875, "learning_rate": 4.837312011289907e-05, "loss": 77.3765, "step": 50980 }, { "epoch": 0.20600605210955208, "grad_norm": 745.1818237304688, "learning_rate": 4.837188123922314e-05, "loss": 58.5138, "step": 50990 }, { "epoch": 0.2060464533749197, "grad_norm": 1252.1639404296875, "learning_rate": 4.837064190990036e-05, "loss": 87.0167, "step": 51000 }, { "epoch": 0.20608685464028734, "grad_norm": 702.9400024414062, "learning_rate": 4.836940212495489e-05, "loss": 100.8639, "step": 51010 }, { "epoch": 0.20612725590565498, "grad_norm": 1918.0904541015625, "learning_rate": 4.836816188441089e-05, "loss": 67.2167, "step": 51020 }, { "epoch": 0.2061676571710226, "grad_norm": 1665.779296875, "learning_rate": 4.8366921188292534e-05, "loss": 70.5055, "step": 51030 }, { "epoch": 0.20620805843639023, "grad_norm": 1276.0400390625, "learning_rate": 4.8365680036624026e-05, "loss": 69.3662, "step": 51040 }, { "epoch": 0.20624845970175787, "grad_norm": 1376.8558349609375, "learning_rate": 4.836443842942956e-05, "loss": 88.9127, "step": 51050 }, { "epoch": 0.20628886096712548, "grad_norm": 1290.6573486328125, "learning_rate": 4.836319636673334e-05, "loss": 114.1035, "step": 51060 }, { "epoch": 0.20632926223249312, "grad_norm": 789.5421142578125, "learning_rate": 4.836195384855957e-05, "loss": 67.4084, "step": 51070 }, { "epoch": 0.20636966349786076, "grad_norm": 843.2109985351562, "learning_rate": 4.8360710874932485e-05, "loss": 77.7251, "step": 51080 }, { "epoch": 0.20641006476322837, "grad_norm": 873.6146850585938, "learning_rate": 4.8359467445876314e-05, "loss": 111.2144, "step": 51090 }, { "epoch": 0.206450466028596, "grad_norm": 1589.0201416015625, "learning_rate": 4.8358223561415304e-05, "loss": 95.8929, "step": 51100 }, { "epoch": 0.20649086729396365, "grad_norm": 914.6998291015625, "learning_rate": 4.8356979221573696e-05, "loss": 58.4896, "step": 51110 }, { "epoch": 0.20653126855933127, "grad_norm": 566.5862426757812, "learning_rate": 4.8355734426375753e-05, "loss": 82.7033, "step": 51120 }, { "epoch": 0.2065716698246989, "grad_norm": 1035.8038330078125, "learning_rate": 4.835448917584574e-05, "loss": 93.2203, "step": 51130 }, { "epoch": 0.20661207109006655, "grad_norm": 645.23388671875, "learning_rate": 4.8353243470007944e-05, "loss": 84.6093, "step": 51140 }, { "epoch": 0.20665247235543419, "grad_norm": 919.4197998046875, "learning_rate": 4.835199730888664e-05, "loss": 70.3317, "step": 51150 }, { "epoch": 0.2066928736208018, "grad_norm": 647.1675415039062, "learning_rate": 4.835075069250613e-05, "loss": 70.5222, "step": 51160 }, { "epoch": 0.20673327488616944, "grad_norm": 689.3214111328125, "learning_rate": 4.8349503620890705e-05, "loss": 73.7844, "step": 51170 }, { "epoch": 0.20677367615153708, "grad_norm": 974.9696044921875, "learning_rate": 4.8348256094064695e-05, "loss": 113.3237, "step": 51180 }, { "epoch": 0.2068140774169047, "grad_norm": 595.2847290039062, "learning_rate": 4.834700811205241e-05, "loss": 65.6414, "step": 51190 }, { "epoch": 0.20685447868227233, "grad_norm": 1128.4539794921875, "learning_rate": 4.834575967487817e-05, "loss": 105.4969, "step": 51200 }, { "epoch": 0.20689487994763997, "grad_norm": 445.4708557128906, "learning_rate": 4.834451078256634e-05, "loss": 62.121, "step": 51210 }, { "epoch": 0.20693528121300758, "grad_norm": 308.1874694824219, "learning_rate": 4.8343261435141244e-05, "loss": 74.7087, "step": 51220 }, { "epoch": 0.20697568247837522, "grad_norm": 579.1897583007812, "learning_rate": 4.8342011632627254e-05, "loss": 115.5755, "step": 51230 }, { "epoch": 0.20701608374374286, "grad_norm": 1018.190185546875, "learning_rate": 4.834076137504873e-05, "loss": 77.1754, "step": 51240 }, { "epoch": 0.20705648500911047, "grad_norm": 560.5921020507812, "learning_rate": 4.8339510662430046e-05, "loss": 89.5999, "step": 51250 }, { "epoch": 0.20709688627447811, "grad_norm": 554.189453125, "learning_rate": 4.833825949479558e-05, "loss": 60.0793, "step": 51260 }, { "epoch": 0.20713728753984575, "grad_norm": 666.8161010742188, "learning_rate": 4.8337007872169735e-05, "loss": 79.2109, "step": 51270 }, { "epoch": 0.20717768880521337, "grad_norm": 758.923828125, "learning_rate": 4.833575579457691e-05, "loss": 71.3804, "step": 51280 }, { "epoch": 0.207218090070581, "grad_norm": 768.92236328125, "learning_rate": 4.8334503262041505e-05, "loss": 54.9832, "step": 51290 }, { "epoch": 0.20725849133594865, "grad_norm": 453.7713317871094, "learning_rate": 4.833325027458795e-05, "loss": 86.5132, "step": 51300 }, { "epoch": 0.2072988926013163, "grad_norm": 688.4905395507812, "learning_rate": 4.8331996832240675e-05, "loss": 62.2691, "step": 51310 }, { "epoch": 0.2073392938666839, "grad_norm": 941.0972900390625, "learning_rate": 4.83307429350241e-05, "loss": 68.9129, "step": 51320 }, { "epoch": 0.20737969513205154, "grad_norm": 553.0337524414062, "learning_rate": 4.832948858296268e-05, "loss": 70.7225, "step": 51330 }, { "epoch": 0.20742009639741918, "grad_norm": 736.6832885742188, "learning_rate": 4.832823377608087e-05, "loss": 90.5628, "step": 51340 }, { "epoch": 0.2074604976627868, "grad_norm": 1483.525390625, "learning_rate": 4.832697851440313e-05, "loss": 60.9628, "step": 51350 }, { "epoch": 0.20750089892815443, "grad_norm": 1290.9232177734375, "learning_rate": 4.8325722797953945e-05, "loss": 56.6402, "step": 51360 }, { "epoch": 0.20754130019352207, "grad_norm": 1492.0635986328125, "learning_rate": 4.8324466626757775e-05, "loss": 88.9837, "step": 51370 }, { "epoch": 0.20758170145888968, "grad_norm": 1464.2392578125, "learning_rate": 4.8323210000839124e-05, "loss": 82.0623, "step": 51380 }, { "epoch": 0.20762210272425732, "grad_norm": 771.36767578125, "learning_rate": 4.832195292022249e-05, "loss": 99.1351, "step": 51390 }, { "epoch": 0.20766250398962496, "grad_norm": 513.316650390625, "learning_rate": 4.832069538493237e-05, "loss": 54.0263, "step": 51400 }, { "epoch": 0.20770290525499258, "grad_norm": 813.9430541992188, "learning_rate": 4.831943739499328e-05, "loss": 79.7442, "step": 51410 }, { "epoch": 0.20774330652036022, "grad_norm": 1018.91650390625, "learning_rate": 4.831817895042977e-05, "loss": 103.3693, "step": 51420 }, { "epoch": 0.20778370778572786, "grad_norm": 1169.8072509765625, "learning_rate": 4.8316920051266343e-05, "loss": 68.9814, "step": 51430 }, { "epoch": 0.20782410905109547, "grad_norm": 739.5846557617188, "learning_rate": 4.8315660697527566e-05, "loss": 47.6177, "step": 51440 }, { "epoch": 0.2078645103164631, "grad_norm": 996.7125854492188, "learning_rate": 4.831440088923797e-05, "loss": 72.8957, "step": 51450 }, { "epoch": 0.20790491158183075, "grad_norm": 765.3619384765625, "learning_rate": 4.8313140626422125e-05, "loss": 56.8616, "step": 51460 }, { "epoch": 0.2079453128471984, "grad_norm": 2611.9541015625, "learning_rate": 4.831187990910461e-05, "loss": 117.1232, "step": 51470 }, { "epoch": 0.207985714112566, "grad_norm": 1099.97314453125, "learning_rate": 4.831061873730999e-05, "loss": 65.8406, "step": 51480 }, { "epoch": 0.20802611537793364, "grad_norm": 672.7211303710938, "learning_rate": 4.8309357111062856e-05, "loss": 101.9847, "step": 51490 }, { "epoch": 0.20806651664330128, "grad_norm": 559.6053466796875, "learning_rate": 4.830809503038781e-05, "loss": 60.1119, "step": 51500 }, { "epoch": 0.2081069179086689, "grad_norm": 611.0339965820312, "learning_rate": 4.8306832495309445e-05, "loss": 77.4869, "step": 51510 }, { "epoch": 0.20814731917403653, "grad_norm": 955.992919921875, "learning_rate": 4.830556950585238e-05, "loss": 94.2499, "step": 51520 }, { "epoch": 0.20818772043940417, "grad_norm": 1458.7508544921875, "learning_rate": 4.830430606204125e-05, "loss": 107.4706, "step": 51530 }, { "epoch": 0.20822812170477178, "grad_norm": 550.4443359375, "learning_rate": 4.830304216390066e-05, "loss": 58.3049, "step": 51540 }, { "epoch": 0.20826852297013942, "grad_norm": 758.5188598632812, "learning_rate": 4.8301777811455276e-05, "loss": 82.5834, "step": 51550 }, { "epoch": 0.20830892423550706, "grad_norm": 715.7566528320312, "learning_rate": 4.8300513004729735e-05, "loss": 47.3867, "step": 51560 }, { "epoch": 0.20834932550087468, "grad_norm": 517.6349487304688, "learning_rate": 4.82992477437487e-05, "loss": 60.1919, "step": 51570 }, { "epoch": 0.20838972676624232, "grad_norm": 1060.4306640625, "learning_rate": 4.8297982028536826e-05, "loss": 106.7914, "step": 51580 }, { "epoch": 0.20843012803160996, "grad_norm": 603.7183227539062, "learning_rate": 4.82967158591188e-05, "loss": 59.8083, "step": 51590 }, { "epoch": 0.20847052929697757, "grad_norm": 1019.84326171875, "learning_rate": 4.829544923551931e-05, "loss": 63.1892, "step": 51600 }, { "epoch": 0.2085109305623452, "grad_norm": 648.5969848632812, "learning_rate": 4.8294182157763044e-05, "loss": 95.7722, "step": 51610 }, { "epoch": 0.20855133182771285, "grad_norm": 2398.117431640625, "learning_rate": 4.82929146258747e-05, "loss": 65.186, "step": 51620 }, { "epoch": 0.20859173309308046, "grad_norm": 342.774658203125, "learning_rate": 4.8291646639878995e-05, "loss": 81.3063, "step": 51630 }, { "epoch": 0.2086321343584481, "grad_norm": 1235.5203857421875, "learning_rate": 4.829037819980065e-05, "loss": 44.4151, "step": 51640 }, { "epoch": 0.20867253562381574, "grad_norm": 470.635498046875, "learning_rate": 4.828910930566439e-05, "loss": 62.5535, "step": 51650 }, { "epoch": 0.20871293688918338, "grad_norm": 830.7872924804688, "learning_rate": 4.828783995749495e-05, "loss": 80.7936, "step": 51660 }, { "epoch": 0.208753338154551, "grad_norm": 1270.7835693359375, "learning_rate": 4.828657015531709e-05, "loss": 89.139, "step": 51670 }, { "epoch": 0.20879373941991863, "grad_norm": 726.8765258789062, "learning_rate": 4.828529989915555e-05, "loss": 53.5606, "step": 51680 }, { "epoch": 0.20883414068528627, "grad_norm": 2210.846435546875, "learning_rate": 4.8284029189035094e-05, "loss": 97.6787, "step": 51690 }, { "epoch": 0.20887454195065389, "grad_norm": 0.0, "learning_rate": 4.828275802498051e-05, "loss": 71.8659, "step": 51700 }, { "epoch": 0.20891494321602153, "grad_norm": 935.220458984375, "learning_rate": 4.828148640701657e-05, "loss": 65.6444, "step": 51710 }, { "epoch": 0.20895534448138917, "grad_norm": 287.4408264160156, "learning_rate": 4.828021433516806e-05, "loss": 61.6178, "step": 51720 }, { "epoch": 0.20899574574675678, "grad_norm": 477.3240966796875, "learning_rate": 4.82789418094598e-05, "loss": 93.1937, "step": 51730 }, { "epoch": 0.20903614701212442, "grad_norm": 516.9016723632812, "learning_rate": 4.827766882991657e-05, "loss": 77.4756, "step": 51740 }, { "epoch": 0.20907654827749206, "grad_norm": 1626.0362548828125, "learning_rate": 4.827639539656321e-05, "loss": 100.3807, "step": 51750 }, { "epoch": 0.20911694954285967, "grad_norm": 530.7556762695312, "learning_rate": 4.827512150942454e-05, "loss": 96.0756, "step": 51760 }, { "epoch": 0.2091573508082273, "grad_norm": 865.0823974609375, "learning_rate": 4.827384716852539e-05, "loss": 50.7269, "step": 51770 }, { "epoch": 0.20919775207359495, "grad_norm": 1538.5042724609375, "learning_rate": 4.82725723738906e-05, "loss": 147.1135, "step": 51780 }, { "epoch": 0.20923815333896256, "grad_norm": 574.5728149414062, "learning_rate": 4.827129712554504e-05, "loss": 60.3232, "step": 51790 }, { "epoch": 0.2092785546043302, "grad_norm": 674.2813110351562, "learning_rate": 4.8270021423513554e-05, "loss": 58.9629, "step": 51800 }, { "epoch": 0.20931895586969784, "grad_norm": 937.812255859375, "learning_rate": 4.826874526782103e-05, "loss": 83.8374, "step": 51810 }, { "epoch": 0.20935935713506548, "grad_norm": 713.9767456054688, "learning_rate": 4.8267468658492335e-05, "loss": 63.4093, "step": 51820 }, { "epoch": 0.2093997584004331, "grad_norm": 1410.48583984375, "learning_rate": 4.826619159555236e-05, "loss": 77.9979, "step": 51830 }, { "epoch": 0.20944015966580073, "grad_norm": 845.0786743164062, "learning_rate": 4.826491407902599e-05, "loss": 86.6751, "step": 51840 }, { "epoch": 0.20948056093116837, "grad_norm": 2135.817626953125, "learning_rate": 4.8263636108938156e-05, "loss": 96.0103, "step": 51850 }, { "epoch": 0.209520962196536, "grad_norm": 1159.19775390625, "learning_rate": 4.8262357685313754e-05, "loss": 135.6379, "step": 51860 }, { "epoch": 0.20956136346190363, "grad_norm": 962.7410278320312, "learning_rate": 4.826107880817771e-05, "loss": 65.2435, "step": 51870 }, { "epoch": 0.20960176472727127, "grad_norm": 958.7470703125, "learning_rate": 4.8259799477554965e-05, "loss": 99.5246, "step": 51880 }, { "epoch": 0.20964216599263888, "grad_norm": 675.7080078125, "learning_rate": 4.825851969347045e-05, "loss": 61.0314, "step": 51890 }, { "epoch": 0.20968256725800652, "grad_norm": 577.6461791992188, "learning_rate": 4.8257239455949124e-05, "loss": 58.5346, "step": 51900 }, { "epoch": 0.20972296852337416, "grad_norm": 1043.0440673828125, "learning_rate": 4.825595876501593e-05, "loss": 90.7892, "step": 51910 }, { "epoch": 0.20976336978874177, "grad_norm": 597.583984375, "learning_rate": 4.825467762069585e-05, "loss": 89.8225, "step": 51920 }, { "epoch": 0.2098037710541094, "grad_norm": 588.63427734375, "learning_rate": 4.825339602301387e-05, "loss": 76.6868, "step": 51930 }, { "epoch": 0.20984417231947705, "grad_norm": 1137.5469970703125, "learning_rate": 4.825211397199495e-05, "loss": 77.4429, "step": 51940 }, { "epoch": 0.20988457358484466, "grad_norm": 1342.8388671875, "learning_rate": 4.82508314676641e-05, "loss": 87.7724, "step": 51950 }, { "epoch": 0.2099249748502123, "grad_norm": 549.8995971679688, "learning_rate": 4.824954851004633e-05, "loss": 62.8191, "step": 51960 }, { "epoch": 0.20996537611557994, "grad_norm": 347.0243835449219, "learning_rate": 4.8248265099166634e-05, "loss": 68.7288, "step": 51970 }, { "epoch": 0.21000577738094758, "grad_norm": 938.3527221679688, "learning_rate": 4.824698123505004e-05, "loss": 57.8288, "step": 51980 }, { "epoch": 0.2100461786463152, "grad_norm": 869.6561279296875, "learning_rate": 4.824569691772158e-05, "loss": 82.7138, "step": 51990 }, { "epoch": 0.21008657991168284, "grad_norm": 940.8391723632812, "learning_rate": 4.8244412147206284e-05, "loss": 87.2308, "step": 52000 }, { "epoch": 0.21012698117705048, "grad_norm": 1123.21826171875, "learning_rate": 4.8243126923529214e-05, "loss": 62.064, "step": 52010 }, { "epoch": 0.2101673824424181, "grad_norm": 919.0745849609375, "learning_rate": 4.824184124671542e-05, "loss": 80.0747, "step": 52020 }, { "epoch": 0.21020778370778573, "grad_norm": 1110.951904296875, "learning_rate": 4.8240555116789964e-05, "loss": 58.8613, "step": 52030 }, { "epoch": 0.21024818497315337, "grad_norm": 1179.7130126953125, "learning_rate": 4.823926853377791e-05, "loss": 85.1923, "step": 52040 }, { "epoch": 0.21028858623852098, "grad_norm": 451.0613708496094, "learning_rate": 4.8237981497704365e-05, "loss": 63.9672, "step": 52050 }, { "epoch": 0.21032898750388862, "grad_norm": 1503.4912109375, "learning_rate": 4.8236694008594405e-05, "loss": 85.0293, "step": 52060 }, { "epoch": 0.21036938876925626, "grad_norm": 691.0103149414062, "learning_rate": 4.823540606647313e-05, "loss": 83.7888, "step": 52070 }, { "epoch": 0.21040979003462387, "grad_norm": 393.8356628417969, "learning_rate": 4.823411767136565e-05, "loss": 71.635, "step": 52080 }, { "epoch": 0.2104501912999915, "grad_norm": 828.97119140625, "learning_rate": 4.8232828823297085e-05, "loss": 88.108, "step": 52090 }, { "epoch": 0.21049059256535915, "grad_norm": 1106.6741943359375, "learning_rate": 4.8231539522292564e-05, "loss": 95.024, "step": 52100 }, { "epoch": 0.21053099383072676, "grad_norm": 996.2150268554688, "learning_rate": 4.823024976837721e-05, "loss": 63.8951, "step": 52110 }, { "epoch": 0.2105713950960944, "grad_norm": 1018.7054443359375, "learning_rate": 4.822895956157619e-05, "loss": 83.2238, "step": 52120 }, { "epoch": 0.21061179636146204, "grad_norm": 648.1104736328125, "learning_rate": 4.8227668901914636e-05, "loss": 123.0206, "step": 52130 }, { "epoch": 0.21065219762682968, "grad_norm": 781.1571655273438, "learning_rate": 4.822637778941772e-05, "loss": 43.9732, "step": 52140 }, { "epoch": 0.2106925988921973, "grad_norm": 539.1663208007812, "learning_rate": 4.8225086224110615e-05, "loss": 77.1991, "step": 52150 }, { "epoch": 0.21073300015756494, "grad_norm": 654.8760986328125, "learning_rate": 4.822379420601849e-05, "loss": 95.2022, "step": 52160 }, { "epoch": 0.21077340142293258, "grad_norm": 666.3115234375, "learning_rate": 4.822250173516655e-05, "loss": 101.8235, "step": 52170 }, { "epoch": 0.2108138026883002, "grad_norm": 8625.4052734375, "learning_rate": 4.822120881157998e-05, "loss": 73.1473, "step": 52180 }, { "epoch": 0.21085420395366783, "grad_norm": 1041.82470703125, "learning_rate": 4.821991543528398e-05, "loss": 101.8729, "step": 52190 }, { "epoch": 0.21089460521903547, "grad_norm": 0.0, "learning_rate": 4.821862160630378e-05, "loss": 49.5271, "step": 52200 }, { "epoch": 0.21093500648440308, "grad_norm": 347.1128234863281, "learning_rate": 4.8217327324664595e-05, "loss": 109.6646, "step": 52210 }, { "epoch": 0.21097540774977072, "grad_norm": 786.9452514648438, "learning_rate": 4.821603259039167e-05, "loss": 69.5838, "step": 52220 }, { "epoch": 0.21101580901513836, "grad_norm": 308.2260437011719, "learning_rate": 4.821473740351023e-05, "loss": 41.9356, "step": 52230 }, { "epoch": 0.21105621028050597, "grad_norm": 730.7586669921875, "learning_rate": 4.821344176404554e-05, "loss": 64.5068, "step": 52240 }, { "epoch": 0.2110966115458736, "grad_norm": 724.8195190429688, "learning_rate": 4.8212145672022844e-05, "loss": 75.1824, "step": 52250 }, { "epoch": 0.21113701281124125, "grad_norm": 1213.788818359375, "learning_rate": 4.821084912746742e-05, "loss": 99.653, "step": 52260 }, { "epoch": 0.21117741407660887, "grad_norm": 886.8688354492188, "learning_rate": 4.820955213040454e-05, "loss": 85.2024, "step": 52270 }, { "epoch": 0.2112178153419765, "grad_norm": 746.9656372070312, "learning_rate": 4.8208254680859494e-05, "loss": 62.4096, "step": 52280 }, { "epoch": 0.21125821660734415, "grad_norm": 695.8228759765625, "learning_rate": 4.820695677885757e-05, "loss": 123.5433, "step": 52290 }, { "epoch": 0.21129861787271179, "grad_norm": 642.0867309570312, "learning_rate": 4.820565842442408e-05, "loss": 82.3376, "step": 52300 }, { "epoch": 0.2113390191380794, "grad_norm": 448.39202880859375, "learning_rate": 4.8204359617584336e-05, "loss": 58.77, "step": 52310 }, { "epoch": 0.21137942040344704, "grad_norm": 906.605712890625, "learning_rate": 4.820306035836365e-05, "loss": 79.2194, "step": 52320 }, { "epoch": 0.21141982166881468, "grad_norm": 478.28643798828125, "learning_rate": 4.8201760646787366e-05, "loss": 56.0789, "step": 52330 }, { "epoch": 0.2114602229341823, "grad_norm": 1141.816650390625, "learning_rate": 4.82004604828808e-05, "loss": 110.6768, "step": 52340 }, { "epoch": 0.21150062419954993, "grad_norm": 280.15692138671875, "learning_rate": 4.819915986666932e-05, "loss": 93.5777, "step": 52350 }, { "epoch": 0.21154102546491757, "grad_norm": 1224.0416259765625, "learning_rate": 4.819785879817827e-05, "loss": 104.5719, "step": 52360 }, { "epoch": 0.21158142673028518, "grad_norm": 960.8833618164062, "learning_rate": 4.819655727743302e-05, "loss": 88.1145, "step": 52370 }, { "epoch": 0.21162182799565282, "grad_norm": 717.3423461914062, "learning_rate": 4.8195255304458945e-05, "loss": 138.9197, "step": 52380 }, { "epoch": 0.21166222926102046, "grad_norm": 902.6900634765625, "learning_rate": 4.819395287928143e-05, "loss": 72.1466, "step": 52390 }, { "epoch": 0.21170263052638807, "grad_norm": 988.1195068359375, "learning_rate": 4.8192650001925855e-05, "loss": 63.8846, "step": 52400 }, { "epoch": 0.21174303179175571, "grad_norm": 1469.6943359375, "learning_rate": 4.8191346672417633e-05, "loss": 68.6236, "step": 52410 }, { "epoch": 0.21178343305712335, "grad_norm": 529.160888671875, "learning_rate": 4.819004289078217e-05, "loss": 71.5383, "step": 52420 }, { "epoch": 0.21182383432249097, "grad_norm": 1089.6658935546875, "learning_rate": 4.818873865704487e-05, "loss": 101.7536, "step": 52430 }, { "epoch": 0.2118642355878586, "grad_norm": 1817.579833984375, "learning_rate": 4.818743397123119e-05, "loss": 111.1048, "step": 52440 }, { "epoch": 0.21190463685322625, "grad_norm": 1221.6204833984375, "learning_rate": 4.818612883336654e-05, "loss": 59.6956, "step": 52450 }, { "epoch": 0.2119450381185939, "grad_norm": 572.4197998046875, "learning_rate": 4.8184823243476364e-05, "loss": 69.0064, "step": 52460 }, { "epoch": 0.2119854393839615, "grad_norm": 617.7356567382812, "learning_rate": 4.818351720158613e-05, "loss": 50.4712, "step": 52470 }, { "epoch": 0.21202584064932914, "grad_norm": 530.05224609375, "learning_rate": 4.8182210707721284e-05, "loss": 63.9792, "step": 52480 }, { "epoch": 0.21206624191469678, "grad_norm": 2283.126953125, "learning_rate": 4.8180903761907315e-05, "loss": 76.8574, "step": 52490 }, { "epoch": 0.2121066431800644, "grad_norm": 721.9110717773438, "learning_rate": 4.817959636416969e-05, "loss": 61.0024, "step": 52500 }, { "epoch": 0.21214704444543203, "grad_norm": 993.3989868164062, "learning_rate": 4.81782885145339e-05, "loss": 97.0186, "step": 52510 }, { "epoch": 0.21218744571079967, "grad_norm": 897.0438232421875, "learning_rate": 4.8176980213025434e-05, "loss": 83.1266, "step": 52520 }, { "epoch": 0.21222784697616728, "grad_norm": 677.8610229492188, "learning_rate": 4.817567145966982e-05, "loss": 46.0433, "step": 52530 }, { "epoch": 0.21226824824153492, "grad_norm": 893.5686645507812, "learning_rate": 4.817436225449255e-05, "loss": 66.1131, "step": 52540 }, { "epoch": 0.21230864950690256, "grad_norm": 879.1597290039062, "learning_rate": 4.817305259751916e-05, "loss": 56.4979, "step": 52550 }, { "epoch": 0.21234905077227018, "grad_norm": 1273.606689453125, "learning_rate": 4.817174248877518e-05, "loss": 68.6022, "step": 52560 }, { "epoch": 0.21238945203763782, "grad_norm": 958.6312255859375, "learning_rate": 4.8170431928286155e-05, "loss": 73.1606, "step": 52570 }, { "epoch": 0.21242985330300546, "grad_norm": 2562.6357421875, "learning_rate": 4.816912091607762e-05, "loss": 97.7864, "step": 52580 }, { "epoch": 0.21247025456837307, "grad_norm": 305.3066711425781, "learning_rate": 4.816780945217515e-05, "loss": 77.4546, "step": 52590 }, { "epoch": 0.2125106558337407, "grad_norm": 721.4779663085938, "learning_rate": 4.81664975366043e-05, "loss": 87.7981, "step": 52600 }, { "epoch": 0.21255105709910835, "grad_norm": 1225.9923095703125, "learning_rate": 4.816518516939067e-05, "loss": 82.7573, "step": 52610 }, { "epoch": 0.212591458364476, "grad_norm": 729.4354248046875, "learning_rate": 4.8163872350559816e-05, "loss": 66.5677, "step": 52620 }, { "epoch": 0.2126318596298436, "grad_norm": 1146.287109375, "learning_rate": 4.8162559080137346e-05, "loss": 108.5152, "step": 52630 }, { "epoch": 0.21267226089521124, "grad_norm": 1008.0455322265625, "learning_rate": 4.8161245358148866e-05, "loss": 91.5322, "step": 52640 }, { "epoch": 0.21271266216057888, "grad_norm": 193.93905639648438, "learning_rate": 4.815993118461998e-05, "loss": 40.234, "step": 52650 }, { "epoch": 0.2127530634259465, "grad_norm": 1779.3536376953125, "learning_rate": 4.815861655957632e-05, "loss": 82.0843, "step": 52660 }, { "epoch": 0.21279346469131413, "grad_norm": 761.0687255859375, "learning_rate": 4.81573014830435e-05, "loss": 98.4412, "step": 52670 }, { "epoch": 0.21283386595668177, "grad_norm": 639.0739135742188, "learning_rate": 4.815598595504717e-05, "loss": 103.2698, "step": 52680 }, { "epoch": 0.21287426722204938, "grad_norm": 1339.623291015625, "learning_rate": 4.8154669975612966e-05, "loss": 97.1118, "step": 52690 }, { "epoch": 0.21291466848741702, "grad_norm": 1258.3170166015625, "learning_rate": 4.8153353544766553e-05, "loss": 73.1002, "step": 52700 }, { "epoch": 0.21295506975278466, "grad_norm": 783.255859375, "learning_rate": 4.815203666253359e-05, "loss": 60.4868, "step": 52710 }, { "epoch": 0.21299547101815228, "grad_norm": 621.6766357421875, "learning_rate": 4.8150719328939755e-05, "loss": 53.9103, "step": 52720 }, { "epoch": 0.21303587228351992, "grad_norm": 899.034912109375, "learning_rate": 4.814940154401073e-05, "loss": 90.3724, "step": 52730 }, { "epoch": 0.21307627354888756, "grad_norm": 1244.728515625, "learning_rate": 4.81480833077722e-05, "loss": 57.8157, "step": 52740 }, { "epoch": 0.21311667481425517, "grad_norm": 809.1812744140625, "learning_rate": 4.814676462024988e-05, "loss": 110.7006, "step": 52750 }, { "epoch": 0.2131570760796228, "grad_norm": 3029.4775390625, "learning_rate": 4.814544548146945e-05, "loss": 63.5398, "step": 52760 }, { "epoch": 0.21319747734499045, "grad_norm": 392.19464111328125, "learning_rate": 4.814412589145665e-05, "loss": 84.5926, "step": 52770 }, { "epoch": 0.2132378786103581, "grad_norm": 883.9444580078125, "learning_rate": 4.814280585023721e-05, "loss": 73.6628, "step": 52780 }, { "epoch": 0.2132782798757257, "grad_norm": 1597.8800048828125, "learning_rate": 4.814148535783684e-05, "loss": 137.1192, "step": 52790 }, { "epoch": 0.21331868114109334, "grad_norm": 390.61407470703125, "learning_rate": 4.8140164414281306e-05, "loss": 58.5544, "step": 52800 }, { "epoch": 0.21335908240646098, "grad_norm": 1327.6656494140625, "learning_rate": 4.813884301959635e-05, "loss": 62.8842, "step": 52810 }, { "epoch": 0.2133994836718286, "grad_norm": 1165.1925048828125, "learning_rate": 4.813752117380774e-05, "loss": 73.2491, "step": 52820 }, { "epoch": 0.21343988493719623, "grad_norm": 974.8809204101562, "learning_rate": 4.813619887694124e-05, "loss": 86.8438, "step": 52830 }, { "epoch": 0.21348028620256387, "grad_norm": 658.647216796875, "learning_rate": 4.813487612902264e-05, "loss": 75.7994, "step": 52840 }, { "epoch": 0.21352068746793149, "grad_norm": 619.7402954101562, "learning_rate": 4.8133552930077716e-05, "loss": 47.0964, "step": 52850 }, { "epoch": 0.21356108873329913, "grad_norm": 865.9132690429688, "learning_rate": 4.813222928013226e-05, "loss": 47.7462, "step": 52860 }, { "epoch": 0.21360148999866677, "grad_norm": 983.5340576171875, "learning_rate": 4.813090517921209e-05, "loss": 48.6633, "step": 52870 }, { "epoch": 0.21364189126403438, "grad_norm": 607.2757568359375, "learning_rate": 4.812958062734302e-05, "loss": 93.2731, "step": 52880 }, { "epoch": 0.21368229252940202, "grad_norm": 732.9096069335938, "learning_rate": 4.812825562455086e-05, "loss": 83.7247, "step": 52890 }, { "epoch": 0.21372269379476966, "grad_norm": 493.807373046875, "learning_rate": 4.812693017086145e-05, "loss": 71.3714, "step": 52900 }, { "epoch": 0.21376309506013727, "grad_norm": 880.7445678710938, "learning_rate": 4.8125604266300636e-05, "loss": 69.891, "step": 52910 }, { "epoch": 0.2138034963255049, "grad_norm": 1000.0642700195312, "learning_rate": 4.812427791089426e-05, "loss": 106.0305, "step": 52920 }, { "epoch": 0.21384389759087255, "grad_norm": 649.3963012695312, "learning_rate": 4.812295110466817e-05, "loss": 71.4997, "step": 52930 }, { "epoch": 0.2138842988562402, "grad_norm": 539.8707885742188, "learning_rate": 4.812162384764826e-05, "loss": 81.3335, "step": 52940 }, { "epoch": 0.2139247001216078, "grad_norm": 822.1153564453125, "learning_rate": 4.8120296139860376e-05, "loss": 67.337, "step": 52950 }, { "epoch": 0.21396510138697544, "grad_norm": 2130.726806640625, "learning_rate": 4.811896798133042e-05, "loss": 111.1284, "step": 52960 }, { "epoch": 0.21400550265234308, "grad_norm": 467.0498046875, "learning_rate": 4.811763937208428e-05, "loss": 84.292, "step": 52970 }, { "epoch": 0.2140459039177107, "grad_norm": 1501.177978515625, "learning_rate": 4.811631031214786e-05, "loss": 63.4714, "step": 52980 }, { "epoch": 0.21408630518307833, "grad_norm": 628.075439453125, "learning_rate": 4.811498080154707e-05, "loss": 113.7706, "step": 52990 }, { "epoch": 0.21412670644844597, "grad_norm": 792.900146484375, "learning_rate": 4.8113650840307834e-05, "loss": 55.3442, "step": 53000 }, { "epoch": 0.2141671077138136, "grad_norm": 989.729736328125, "learning_rate": 4.811232042845607e-05, "loss": 88.5752, "step": 53010 }, { "epoch": 0.21420750897918123, "grad_norm": 1206.408447265625, "learning_rate": 4.8110989566017716e-05, "loss": 50.2796, "step": 53020 }, { "epoch": 0.21424791024454887, "grad_norm": 854.8396606445312, "learning_rate": 4.810965825301873e-05, "loss": 85.1104, "step": 53030 }, { "epoch": 0.21428831150991648, "grad_norm": 841.41357421875, "learning_rate": 4.810832648948505e-05, "loss": 64.3325, "step": 53040 }, { "epoch": 0.21432871277528412, "grad_norm": 1686.3604736328125, "learning_rate": 4.810699427544265e-05, "loss": 73.6608, "step": 53050 }, { "epoch": 0.21436911404065176, "grad_norm": 1173.083740234375, "learning_rate": 4.810566161091751e-05, "loss": 80.8808, "step": 53060 }, { "epoch": 0.21440951530601937, "grad_norm": 1014.2672119140625, "learning_rate": 4.810432849593559e-05, "loss": 109.5157, "step": 53070 }, { "epoch": 0.214449916571387, "grad_norm": 848.516357421875, "learning_rate": 4.810299493052289e-05, "loss": 86.9813, "step": 53080 }, { "epoch": 0.21449031783675465, "grad_norm": 2046.2830810546875, "learning_rate": 4.810166091470542e-05, "loss": 71.7423, "step": 53090 }, { "epoch": 0.2145307191021223, "grad_norm": 856.9232177734375, "learning_rate": 4.810032644850917e-05, "loss": 50.4441, "step": 53100 }, { "epoch": 0.2145711203674899, "grad_norm": 868.9803466796875, "learning_rate": 4.809899153196017e-05, "loss": 115.6349, "step": 53110 }, { "epoch": 0.21461152163285754, "grad_norm": 0.0, "learning_rate": 4.809765616508443e-05, "loss": 59.3985, "step": 53120 }, { "epoch": 0.21465192289822518, "grad_norm": 2540.765625, "learning_rate": 4.8096320347908e-05, "loss": 75.7832, "step": 53130 }, { "epoch": 0.2146923241635928, "grad_norm": 927.6019897460938, "learning_rate": 4.8094984080456904e-05, "loss": 65.1414, "step": 53140 }, { "epoch": 0.21473272542896044, "grad_norm": 863.6876831054688, "learning_rate": 4.8093647362757206e-05, "loss": 163.4252, "step": 53150 }, { "epoch": 0.21477312669432808, "grad_norm": 542.0109252929688, "learning_rate": 4.809231019483497e-05, "loss": 81.4207, "step": 53160 }, { "epoch": 0.2148135279596957, "grad_norm": 738.8974609375, "learning_rate": 4.809097257671625e-05, "loss": 71.7718, "step": 53170 }, { "epoch": 0.21485392922506333, "grad_norm": 1300.409423828125, "learning_rate": 4.808963450842713e-05, "loss": 63.6445, "step": 53180 }, { "epoch": 0.21489433049043097, "grad_norm": 710.06103515625, "learning_rate": 4.80882959899937e-05, "loss": 75.5958, "step": 53190 }, { "epoch": 0.21493473175579858, "grad_norm": 1211.570556640625, "learning_rate": 4.808695702144206e-05, "loss": 96.9039, "step": 53200 }, { "epoch": 0.21497513302116622, "grad_norm": 4932.537109375, "learning_rate": 4.808561760279831e-05, "loss": 62.6395, "step": 53210 }, { "epoch": 0.21501553428653386, "grad_norm": 1449.601806640625, "learning_rate": 4.8084277734088544e-05, "loss": 76.1311, "step": 53220 }, { "epoch": 0.21505593555190147, "grad_norm": 1062.4901123046875, "learning_rate": 4.808293741533891e-05, "loss": 94.2731, "step": 53230 }, { "epoch": 0.2150963368172691, "grad_norm": 766.841552734375, "learning_rate": 4.808159664657552e-05, "loss": 59.1123, "step": 53240 }, { "epoch": 0.21513673808263675, "grad_norm": 503.95635986328125, "learning_rate": 4.808025542782453e-05, "loss": 71.4319, "step": 53250 }, { "epoch": 0.2151771393480044, "grad_norm": 2950.0439453125, "learning_rate": 4.8078913759112066e-05, "loss": 90.5065, "step": 53260 }, { "epoch": 0.215217540613372, "grad_norm": 1027.7640380859375, "learning_rate": 4.80775716404643e-05, "loss": 56.2461, "step": 53270 }, { "epoch": 0.21525794187873964, "grad_norm": 507.6009521484375, "learning_rate": 4.8076229071907397e-05, "loss": 66.3295, "step": 53280 }, { "epoch": 0.21529834314410728, "grad_norm": 665.6806640625, "learning_rate": 4.807488605346753e-05, "loss": 80.3044, "step": 53290 }, { "epoch": 0.2153387444094749, "grad_norm": 989.35791015625, "learning_rate": 4.8073542585170877e-05, "loss": 68.0304, "step": 53300 }, { "epoch": 0.21537914567484254, "grad_norm": 6701.48046875, "learning_rate": 4.8072198667043635e-05, "loss": 130.3542, "step": 53310 }, { "epoch": 0.21541954694021018, "grad_norm": 626.8556518554688, "learning_rate": 4.8070854299111994e-05, "loss": 69.6508, "step": 53320 }, { "epoch": 0.2154599482055778, "grad_norm": 889.5938720703125, "learning_rate": 4.806950948140217e-05, "loss": 94.4536, "step": 53330 }, { "epoch": 0.21550034947094543, "grad_norm": 470.2935485839844, "learning_rate": 4.8068164213940393e-05, "loss": 65.35, "step": 53340 }, { "epoch": 0.21554075073631307, "grad_norm": 1064.505859375, "learning_rate": 4.8066818496752875e-05, "loss": 88.5015, "step": 53350 }, { "epoch": 0.21558115200168068, "grad_norm": 673.2662963867188, "learning_rate": 4.8065472329865854e-05, "loss": 63.5901, "step": 53360 }, { "epoch": 0.21562155326704832, "grad_norm": 880.4266967773438, "learning_rate": 4.806412571330557e-05, "loss": 63.0849, "step": 53370 }, { "epoch": 0.21566195453241596, "grad_norm": 761.7822875976562, "learning_rate": 4.8062778647098284e-05, "loss": 78.9925, "step": 53380 }, { "epoch": 0.21570235579778357, "grad_norm": 974.259521484375, "learning_rate": 4.806143113127025e-05, "loss": 93.895, "step": 53390 }, { "epoch": 0.2157427570631512, "grad_norm": 555.7957153320312, "learning_rate": 4.8060083165847754e-05, "loss": 74.5153, "step": 53400 }, { "epoch": 0.21578315832851885, "grad_norm": 925.432373046875, "learning_rate": 4.805873475085706e-05, "loss": 119.6515, "step": 53410 }, { "epoch": 0.2158235595938865, "grad_norm": 817.2948608398438, "learning_rate": 4.805738588632446e-05, "loss": 90.0423, "step": 53420 }, { "epoch": 0.2158639608592541, "grad_norm": 921.9326782226562, "learning_rate": 4.805603657227625e-05, "loss": 106.7092, "step": 53430 }, { "epoch": 0.21590436212462175, "grad_norm": 1191.692626953125, "learning_rate": 4.805468680873874e-05, "loss": 57.5349, "step": 53440 }, { "epoch": 0.21594476338998939, "grad_norm": 327.62908935546875, "learning_rate": 4.8053336595738236e-05, "loss": 84.3568, "step": 53450 }, { "epoch": 0.215985164655357, "grad_norm": 752.6683349609375, "learning_rate": 4.805198593330107e-05, "loss": 63.1536, "step": 53460 }, { "epoch": 0.21602556592072464, "grad_norm": 711.6024780273438, "learning_rate": 4.8050634821453565e-05, "loss": 62.8601, "step": 53470 }, { "epoch": 0.21606596718609228, "grad_norm": 466.1304931640625, "learning_rate": 4.8049283260222075e-05, "loss": 77.9144, "step": 53480 }, { "epoch": 0.2161063684514599, "grad_norm": 1117.0677490234375, "learning_rate": 4.804793124963294e-05, "loss": 76.5366, "step": 53490 }, { "epoch": 0.21614676971682753, "grad_norm": 637.1599731445312, "learning_rate": 4.8046578789712515e-05, "loss": 67.9231, "step": 53500 }, { "epoch": 0.21618717098219517, "grad_norm": 568.0106811523438, "learning_rate": 4.804522588048718e-05, "loss": 75.5774, "step": 53510 }, { "epoch": 0.21622757224756278, "grad_norm": 432.543212890625, "learning_rate": 4.8043872521983294e-05, "loss": 59.4265, "step": 53520 }, { "epoch": 0.21626797351293042, "grad_norm": 454.713134765625, "learning_rate": 4.804251871422725e-05, "loss": 96.0723, "step": 53530 }, { "epoch": 0.21630837477829806, "grad_norm": 1082.2203369140625, "learning_rate": 4.804116445724543e-05, "loss": 79.7681, "step": 53540 }, { "epoch": 0.21634877604366567, "grad_norm": 621.678466796875, "learning_rate": 4.803980975106427e-05, "loss": 85.7473, "step": 53550 }, { "epoch": 0.21638917730903331, "grad_norm": 890.781005859375, "learning_rate": 4.803845459571014e-05, "loss": 86.9005, "step": 53560 }, { "epoch": 0.21642957857440095, "grad_norm": 649.1585083007812, "learning_rate": 4.8037098991209484e-05, "loss": 61.2427, "step": 53570 }, { "epoch": 0.2164699798397686, "grad_norm": 850.4962768554688, "learning_rate": 4.8035742937588724e-05, "loss": 59.2555, "step": 53580 }, { "epoch": 0.2165103811051362, "grad_norm": 787.1389770507812, "learning_rate": 4.803438643487429e-05, "loss": 82.7354, "step": 53590 }, { "epoch": 0.21655078237050385, "grad_norm": 743.615234375, "learning_rate": 4.803302948309264e-05, "loss": 72.4777, "step": 53600 }, { "epoch": 0.2165911836358715, "grad_norm": 1553.5562744140625, "learning_rate": 4.8031672082270216e-05, "loss": 85.1181, "step": 53610 }, { "epoch": 0.2166315849012391, "grad_norm": 525.2081298828125, "learning_rate": 4.803031423243349e-05, "loss": 65.5073, "step": 53620 }, { "epoch": 0.21667198616660674, "grad_norm": 718.9132080078125, "learning_rate": 4.802895593360893e-05, "loss": 169.8341, "step": 53630 }, { "epoch": 0.21671238743197438, "grad_norm": 663.7149658203125, "learning_rate": 4.8027597185823016e-05, "loss": 54.949, "step": 53640 }, { "epoch": 0.216752788697342, "grad_norm": 558.2113037109375, "learning_rate": 4.802623798910224e-05, "loss": 72.456, "step": 53650 }, { "epoch": 0.21679318996270963, "grad_norm": 3290.45068359375, "learning_rate": 4.802487834347311e-05, "loss": 80.3636, "step": 53660 }, { "epoch": 0.21683359122807727, "grad_norm": 601.9519653320312, "learning_rate": 4.802351824896211e-05, "loss": 82.0123, "step": 53670 }, { "epoch": 0.21687399249344488, "grad_norm": 0.0, "learning_rate": 4.802215770559577e-05, "loss": 58.6228, "step": 53680 }, { "epoch": 0.21691439375881252, "grad_norm": 610.6953125, "learning_rate": 4.802079671340062e-05, "loss": 60.981, "step": 53690 }, { "epoch": 0.21695479502418016, "grad_norm": 774.2489624023438, "learning_rate": 4.801943527240318e-05, "loss": 94.8428, "step": 53700 }, { "epoch": 0.21699519628954778, "grad_norm": 1123.4874267578125, "learning_rate": 4.801807338263e-05, "loss": 76.9702, "step": 53710 }, { "epoch": 0.21703559755491542, "grad_norm": 1223.6903076171875, "learning_rate": 4.801671104410763e-05, "loss": 101.9046, "step": 53720 }, { "epoch": 0.21707599882028306, "grad_norm": 1136.135009765625, "learning_rate": 4.801534825686263e-05, "loss": 98.1998, "step": 53730 }, { "epoch": 0.2171164000856507, "grad_norm": 1007.6103515625, "learning_rate": 4.801398502092156e-05, "loss": 79.4994, "step": 53740 }, { "epoch": 0.2171568013510183, "grad_norm": 1269.972900390625, "learning_rate": 4.8012621336311016e-05, "loss": 63.9924, "step": 53750 }, { "epoch": 0.21719720261638595, "grad_norm": 643.7221069335938, "learning_rate": 4.8011257203057556e-05, "loss": 55.9485, "step": 53760 }, { "epoch": 0.2172376038817536, "grad_norm": 1297.72412109375, "learning_rate": 4.80098926211878e-05, "loss": 92.5376, "step": 53770 }, { "epoch": 0.2172780051471212, "grad_norm": 1075.8739013671875, "learning_rate": 4.800852759072833e-05, "loss": 59.2101, "step": 53780 }, { "epoch": 0.21731840641248884, "grad_norm": 1902.2833251953125, "learning_rate": 4.800716211170578e-05, "loss": 95.2259, "step": 53790 }, { "epoch": 0.21735880767785648, "grad_norm": 1468.7894287109375, "learning_rate": 4.800579618414676e-05, "loss": 94.4642, "step": 53800 }, { "epoch": 0.2173992089432241, "grad_norm": 640.5420532226562, "learning_rate": 4.80044298080779e-05, "loss": 86.4085, "step": 53810 }, { "epoch": 0.21743961020859173, "grad_norm": 872.8704833984375, "learning_rate": 4.800306298352583e-05, "loss": 64.3737, "step": 53820 }, { "epoch": 0.21748001147395937, "grad_norm": 586.101806640625, "learning_rate": 4.800169571051721e-05, "loss": 89.3042, "step": 53830 }, { "epoch": 0.21752041273932698, "grad_norm": 966.0181884765625, "learning_rate": 4.800032798907869e-05, "loss": 90.1948, "step": 53840 }, { "epoch": 0.21756081400469462, "grad_norm": 795.524658203125, "learning_rate": 4.799895981923693e-05, "loss": 110.5656, "step": 53850 }, { "epoch": 0.21760121527006226, "grad_norm": 496.3986511230469, "learning_rate": 4.799759120101861e-05, "loss": 51.413, "step": 53860 }, { "epoch": 0.21764161653542988, "grad_norm": 596.7861328125, "learning_rate": 4.799622213445041e-05, "loss": 44.7856, "step": 53870 }, { "epoch": 0.21768201780079752, "grad_norm": 1061.037353515625, "learning_rate": 4.7994852619559016e-05, "loss": 93.6382, "step": 53880 }, { "epoch": 0.21772241906616516, "grad_norm": 386.0953063964844, "learning_rate": 4.7993482656371135e-05, "loss": 70.8924, "step": 53890 }, { "epoch": 0.2177628203315328, "grad_norm": 1275.1336669921875, "learning_rate": 4.799211224491348e-05, "loss": 77.7109, "step": 53900 }, { "epoch": 0.2178032215969004, "grad_norm": 534.1307373046875, "learning_rate": 4.799074138521274e-05, "loss": 47.0972, "step": 53910 }, { "epoch": 0.21784362286226805, "grad_norm": 915.685791015625, "learning_rate": 4.798937007729568e-05, "loss": 67.5173, "step": 53920 }, { "epoch": 0.2178840241276357, "grad_norm": 965.9025268554688, "learning_rate": 4.7987998321189e-05, "loss": 60.5081, "step": 53930 }, { "epoch": 0.2179244253930033, "grad_norm": 2893.699462890625, "learning_rate": 4.798662611691947e-05, "loss": 109.9214, "step": 53940 }, { "epoch": 0.21796482665837094, "grad_norm": 468.1808776855469, "learning_rate": 4.7985253464513825e-05, "loss": 56.8471, "step": 53950 }, { "epoch": 0.21800522792373858, "grad_norm": 547.9169311523438, "learning_rate": 4.798388036399883e-05, "loss": 57.3491, "step": 53960 }, { "epoch": 0.2180456291891062, "grad_norm": 482.74652099609375, "learning_rate": 4.7982506815401254e-05, "loss": 69.7557, "step": 53970 }, { "epoch": 0.21808603045447383, "grad_norm": 1082.1453857421875, "learning_rate": 4.7981132818747876e-05, "loss": 96.0472, "step": 53980 }, { "epoch": 0.21812643171984147, "grad_norm": 1212.4945068359375, "learning_rate": 4.797975837406547e-05, "loss": 63.2927, "step": 53990 }, { "epoch": 0.21816683298520909, "grad_norm": 921.6978149414062, "learning_rate": 4.797838348138086e-05, "loss": 57.7244, "step": 54000 }, { "epoch": 0.21820723425057673, "grad_norm": 873.8997802734375, "learning_rate": 4.797700814072083e-05, "loss": 66.1426, "step": 54010 }, { "epoch": 0.21824763551594437, "grad_norm": 760.1358032226562, "learning_rate": 4.7975632352112195e-05, "loss": 77.4176, "step": 54020 }, { "epoch": 0.21828803678131198, "grad_norm": 1324.942626953125, "learning_rate": 4.7974256115581785e-05, "loss": 69.078, "step": 54030 }, { "epoch": 0.21832843804667962, "grad_norm": 1516.5565185546875, "learning_rate": 4.797287943115641e-05, "loss": 64.1911, "step": 54040 }, { "epoch": 0.21836883931204726, "grad_norm": 1137.977294921875, "learning_rate": 4.7971502298862936e-05, "loss": 102.8754, "step": 54050 }, { "epoch": 0.2184092405774149, "grad_norm": 813.7706909179688, "learning_rate": 4.7970124718728193e-05, "loss": 82.1068, "step": 54060 }, { "epoch": 0.2184496418427825, "grad_norm": 635.2130737304688, "learning_rate": 4.7968746690779044e-05, "loss": 58.6373, "step": 54070 }, { "epoch": 0.21849004310815015, "grad_norm": 642.5101928710938, "learning_rate": 4.796736821504235e-05, "loss": 74.7753, "step": 54080 }, { "epoch": 0.2185304443735178, "grad_norm": 849.4306030273438, "learning_rate": 4.7965989291545e-05, "loss": 91.7941, "step": 54090 }, { "epoch": 0.2185708456388854, "grad_norm": 740.3421630859375, "learning_rate": 4.796460992031385e-05, "loss": 72.5141, "step": 54100 }, { "epoch": 0.21861124690425304, "grad_norm": 606.4213256835938, "learning_rate": 4.7963230101375814e-05, "loss": 90.6276, "step": 54110 }, { "epoch": 0.21865164816962068, "grad_norm": 534.52197265625, "learning_rate": 4.7961849834757786e-05, "loss": 141.8116, "step": 54120 }, { "epoch": 0.2186920494349883, "grad_norm": 2459.29443359375, "learning_rate": 4.7960469120486674e-05, "loss": 104.6196, "step": 54130 }, { "epoch": 0.21873245070035593, "grad_norm": 804.312744140625, "learning_rate": 4.7959087958589386e-05, "loss": 71.0338, "step": 54140 }, { "epoch": 0.21877285196572357, "grad_norm": 884.2997436523438, "learning_rate": 4.7957706349092865e-05, "loss": 80.0751, "step": 54150 }, { "epoch": 0.2188132532310912, "grad_norm": 1428.2335205078125, "learning_rate": 4.795632429202405e-05, "loss": 73.554, "step": 54160 }, { "epoch": 0.21885365449645883, "grad_norm": 555.4033203125, "learning_rate": 4.795494178740986e-05, "loss": 52.6659, "step": 54170 }, { "epoch": 0.21889405576182647, "grad_norm": 479.893310546875, "learning_rate": 4.795355883527727e-05, "loss": 81.4304, "step": 54180 }, { "epoch": 0.21893445702719408, "grad_norm": 738.9581909179688, "learning_rate": 4.7952175435653226e-05, "loss": 83.4163, "step": 54190 }, { "epoch": 0.21897485829256172, "grad_norm": 599.0054321289062, "learning_rate": 4.79507915885647e-05, "loss": 62.3013, "step": 54200 }, { "epoch": 0.21901525955792936, "grad_norm": 672.7980346679688, "learning_rate": 4.794940729403869e-05, "loss": 73.0649, "step": 54210 }, { "epoch": 0.219055660823297, "grad_norm": 1021.8695068359375, "learning_rate": 4.794802255210217e-05, "loss": 74.2551, "step": 54220 }, { "epoch": 0.2190960620886646, "grad_norm": 2235.082763671875, "learning_rate": 4.794663736278212e-05, "loss": 100.521, "step": 54230 }, { "epoch": 0.21913646335403225, "grad_norm": 454.6265563964844, "learning_rate": 4.794525172610558e-05, "loss": 64.2982, "step": 54240 }, { "epoch": 0.2191768646193999, "grad_norm": 2061.634765625, "learning_rate": 4.794386564209953e-05, "loss": 80.8011, "step": 54250 }, { "epoch": 0.2192172658847675, "grad_norm": 1272.87451171875, "learning_rate": 4.7942479110791015e-05, "loss": 75.4505, "step": 54260 }, { "epoch": 0.21925766715013514, "grad_norm": 732.8099975585938, "learning_rate": 4.7941092132207056e-05, "loss": 76.4685, "step": 54270 }, { "epoch": 0.21929806841550278, "grad_norm": 1131.0069580078125, "learning_rate": 4.793970470637469e-05, "loss": 95.1893, "step": 54280 }, { "epoch": 0.2193384696808704, "grad_norm": 707.0789794921875, "learning_rate": 4.793831683332098e-05, "loss": 79.5281, "step": 54290 }, { "epoch": 0.21937887094623804, "grad_norm": 1276.7493896484375, "learning_rate": 4.7936928513072964e-05, "loss": 63.4697, "step": 54300 }, { "epoch": 0.21941927221160568, "grad_norm": 169.8773193359375, "learning_rate": 4.793553974565773e-05, "loss": 65.7283, "step": 54310 }, { "epoch": 0.2194596734769733, "grad_norm": 554.336181640625, "learning_rate": 4.793415053110233e-05, "loss": 56.5391, "step": 54320 }, { "epoch": 0.21950007474234093, "grad_norm": 848.8544921875, "learning_rate": 4.7932760869433865e-05, "loss": 66.7205, "step": 54330 }, { "epoch": 0.21954047600770857, "grad_norm": 1150.58349609375, "learning_rate": 4.793137076067942e-05, "loss": 70.9918, "step": 54340 }, { "epoch": 0.21958087727307618, "grad_norm": 1347.6571044921875, "learning_rate": 4.792998020486609e-05, "loss": 75.1297, "step": 54350 }, { "epoch": 0.21962127853844382, "grad_norm": 796.1422119140625, "learning_rate": 4.792858920202099e-05, "loss": 91.2722, "step": 54360 }, { "epoch": 0.21966167980381146, "grad_norm": 978.01025390625, "learning_rate": 4.792719775217124e-05, "loss": 79.3633, "step": 54370 }, { "epoch": 0.2197020810691791, "grad_norm": 676.4722900390625, "learning_rate": 4.7925805855343975e-05, "loss": 76.3162, "step": 54380 }, { "epoch": 0.2197424823345467, "grad_norm": 820.4541625976562, "learning_rate": 4.7924413511566315e-05, "loss": 43.4046, "step": 54390 }, { "epoch": 0.21978288359991435, "grad_norm": 598.8165893554688, "learning_rate": 4.7923020720865414e-05, "loss": 57.7346, "step": 54400 }, { "epoch": 0.219823284865282, "grad_norm": 2260.6591796875, "learning_rate": 4.792162748326841e-05, "loss": 97.9996, "step": 54410 }, { "epoch": 0.2198636861306496, "grad_norm": 1380.8861083984375, "learning_rate": 4.792023379880249e-05, "loss": 88.7846, "step": 54420 }, { "epoch": 0.21990408739601724, "grad_norm": 1240.204833984375, "learning_rate": 4.791883966749482e-05, "loss": 78.3506, "step": 54430 }, { "epoch": 0.21994448866138488, "grad_norm": 436.11798095703125, "learning_rate": 4.791744508937256e-05, "loss": 80.6678, "step": 54440 }, { "epoch": 0.2199848899267525, "grad_norm": 1142.2906494140625, "learning_rate": 4.791605006446291e-05, "loss": 63.2783, "step": 54450 }, { "epoch": 0.22002529119212014, "grad_norm": 815.8292846679688, "learning_rate": 4.7914654592793065e-05, "loss": 77.567, "step": 54460 }, { "epoch": 0.22006569245748778, "grad_norm": 920.00439453125, "learning_rate": 4.791325867439024e-05, "loss": 57.8406, "step": 54470 }, { "epoch": 0.2201060937228554, "grad_norm": 752.0264282226562, "learning_rate": 4.791186230928163e-05, "loss": 94.0675, "step": 54480 }, { "epoch": 0.22014649498822303, "grad_norm": 1893.0584716796875, "learning_rate": 4.7910465497494474e-05, "loss": 136.9929, "step": 54490 }, { "epoch": 0.22018689625359067, "grad_norm": 634.4692993164062, "learning_rate": 4.790906823905599e-05, "loss": 69.6565, "step": 54500 }, { "epoch": 0.22022729751895828, "grad_norm": 395.0136413574219, "learning_rate": 4.790767053399343e-05, "loss": 33.5299, "step": 54510 }, { "epoch": 0.22026769878432592, "grad_norm": 1267.1798095703125, "learning_rate": 4.790627238233405e-05, "loss": 52.7564, "step": 54520 }, { "epoch": 0.22030810004969356, "grad_norm": 1663.51611328125, "learning_rate": 4.790487378410509e-05, "loss": 97.8893, "step": 54530 }, { "epoch": 0.2203485013150612, "grad_norm": 279.7654113769531, "learning_rate": 4.790347473933382e-05, "loss": 49.2227, "step": 54540 }, { "epoch": 0.2203889025804288, "grad_norm": 1237.12255859375, "learning_rate": 4.7902075248047515e-05, "loss": 93.2389, "step": 54550 }, { "epoch": 0.22042930384579645, "grad_norm": 1792.2801513671875, "learning_rate": 4.7900675310273466e-05, "loss": 65.3986, "step": 54560 }, { "epoch": 0.2204697051111641, "grad_norm": 923.925048828125, "learning_rate": 4.7899274926038976e-05, "loss": 60.5506, "step": 54570 }, { "epoch": 0.2205101063765317, "grad_norm": 624.0264282226562, "learning_rate": 4.789787409537131e-05, "loss": 46.6642, "step": 54580 }, { "epoch": 0.22055050764189935, "grad_norm": 732.738037109375, "learning_rate": 4.789647281829781e-05, "loss": 118.6461, "step": 54590 }, { "epoch": 0.22059090890726699, "grad_norm": 654.3661499023438, "learning_rate": 4.789507109484579e-05, "loss": 108.6401, "step": 54600 }, { "epoch": 0.2206313101726346, "grad_norm": 628.9659423828125, "learning_rate": 4.7893668925042565e-05, "loss": 56.0631, "step": 54610 }, { "epoch": 0.22067171143800224, "grad_norm": 1165.976806640625, "learning_rate": 4.789226630891548e-05, "loss": 67.4176, "step": 54620 }, { "epoch": 0.22071211270336988, "grad_norm": 2275.9443359375, "learning_rate": 4.789086324649187e-05, "loss": 81.9733, "step": 54630 }, { "epoch": 0.2207525139687375, "grad_norm": 1239.550537109375, "learning_rate": 4.78894597377991e-05, "loss": 81.8693, "step": 54640 }, { "epoch": 0.22079291523410513, "grad_norm": 474.0306701660156, "learning_rate": 4.788805578286454e-05, "loss": 70.7014, "step": 54650 }, { "epoch": 0.22083331649947277, "grad_norm": 1004.7076416015625, "learning_rate": 4.788665138171553e-05, "loss": 79.816, "step": 54660 }, { "epoch": 0.22087371776484038, "grad_norm": 536.8333129882812, "learning_rate": 4.788524653437948e-05, "loss": 107.741, "step": 54670 }, { "epoch": 0.22091411903020802, "grad_norm": 1520.8243408203125, "learning_rate": 4.7883841240883766e-05, "loss": 67.9091, "step": 54680 }, { "epoch": 0.22095452029557566, "grad_norm": 978.5899047851562, "learning_rate": 4.7882435501255785e-05, "loss": 61.7931, "step": 54690 }, { "epoch": 0.22099492156094327, "grad_norm": 2291.6357421875, "learning_rate": 4.788102931552294e-05, "loss": 66.0947, "step": 54700 }, { "epoch": 0.22103532282631091, "grad_norm": 1223.5504150390625, "learning_rate": 4.787962268371266e-05, "loss": 96.6182, "step": 54710 }, { "epoch": 0.22107572409167855, "grad_norm": 753.4441528320312, "learning_rate": 4.7878215605852336e-05, "loss": 78.4969, "step": 54720 }, { "epoch": 0.2211161253570462, "grad_norm": 2007.7423095703125, "learning_rate": 4.7876808081969436e-05, "loss": 88.9049, "step": 54730 }, { "epoch": 0.2211565266224138, "grad_norm": 520.70361328125, "learning_rate": 4.787540011209138e-05, "loss": 55.8599, "step": 54740 }, { "epoch": 0.22119692788778145, "grad_norm": 509.8031005859375, "learning_rate": 4.7873991696245624e-05, "loss": 68.4668, "step": 54750 }, { "epoch": 0.2212373291531491, "grad_norm": 1531.3848876953125, "learning_rate": 4.787258283445962e-05, "loss": 109.6678, "step": 54760 }, { "epoch": 0.2212777304185167, "grad_norm": 1034.49755859375, "learning_rate": 4.7871173526760835e-05, "loss": 82.1401, "step": 54770 }, { "epoch": 0.22131813168388434, "grad_norm": 1044.2027587890625, "learning_rate": 4.7869763773176756e-05, "loss": 55.1931, "step": 54780 }, { "epoch": 0.22135853294925198, "grad_norm": 937.3899536132812, "learning_rate": 4.786835357373486e-05, "loss": 86.6882, "step": 54790 }, { "epoch": 0.2213989342146196, "grad_norm": 816.9603881835938, "learning_rate": 4.7866942928462625e-05, "loss": 68.9065, "step": 54800 }, { "epoch": 0.22143933547998723, "grad_norm": 1069.583984375, "learning_rate": 4.7865531837387576e-05, "loss": 79.5692, "step": 54810 }, { "epoch": 0.22147973674535487, "grad_norm": 859.8712768554688, "learning_rate": 4.7864120300537206e-05, "loss": 52.4455, "step": 54820 }, { "epoch": 0.22152013801072248, "grad_norm": 861.6782836914062, "learning_rate": 4.786270831793904e-05, "loss": 67.6935, "step": 54830 }, { "epoch": 0.22156053927609012, "grad_norm": 382.9268798828125, "learning_rate": 4.786129588962061e-05, "loss": 85.1282, "step": 54840 }, { "epoch": 0.22160094054145776, "grad_norm": 1050.475830078125, "learning_rate": 4.785988301560944e-05, "loss": 103.8309, "step": 54850 }, { "epoch": 0.22164134180682538, "grad_norm": 691.6054077148438, "learning_rate": 4.785846969593308e-05, "loss": 68.233, "step": 54860 }, { "epoch": 0.22168174307219302, "grad_norm": 2130.216064453125, "learning_rate": 4.785705593061909e-05, "loss": 68.9335, "step": 54870 }, { "epoch": 0.22172214433756066, "grad_norm": 804.7174072265625, "learning_rate": 4.7855641719695023e-05, "loss": 60.727, "step": 54880 }, { "epoch": 0.2217625456029283, "grad_norm": 634.82080078125, "learning_rate": 4.785422706318846e-05, "loss": 88.999, "step": 54890 }, { "epoch": 0.2218029468682959, "grad_norm": 597.0457763671875, "learning_rate": 4.785281196112698e-05, "loss": 64.0293, "step": 54900 }, { "epoch": 0.22184334813366355, "grad_norm": 1046.802001953125, "learning_rate": 4.785139641353815e-05, "loss": 70.1819, "step": 54910 }, { "epoch": 0.2218837493990312, "grad_norm": 753.67236328125, "learning_rate": 4.7849980420449594e-05, "loss": 66.1747, "step": 54920 }, { "epoch": 0.2219241506643988, "grad_norm": 1037.9305419921875, "learning_rate": 4.7848563981888893e-05, "loss": 108.7891, "step": 54930 }, { "epoch": 0.22196455192976644, "grad_norm": 1170.2239990234375, "learning_rate": 4.784714709788368e-05, "loss": 102.6976, "step": 54940 }, { "epoch": 0.22200495319513408, "grad_norm": 894.5665283203125, "learning_rate": 4.7845729768461576e-05, "loss": 54.2852, "step": 54950 }, { "epoch": 0.2220453544605017, "grad_norm": 584.92431640625, "learning_rate": 4.7844311993650205e-05, "loss": 75.9472, "step": 54960 }, { "epoch": 0.22208575572586933, "grad_norm": 1176.026123046875, "learning_rate": 4.784289377347721e-05, "loss": 88.7713, "step": 54970 }, { "epoch": 0.22212615699123697, "grad_norm": 834.7158203125, "learning_rate": 4.7841475107970244e-05, "loss": 86.6488, "step": 54980 }, { "epoch": 0.22216655825660458, "grad_norm": 1956.4654541015625, "learning_rate": 4.784005599715696e-05, "loss": 53.2461, "step": 54990 }, { "epoch": 0.22220695952197222, "grad_norm": 1810.177734375, "learning_rate": 4.783863644106502e-05, "loss": 141.2733, "step": 55000 }, { "epoch": 0.22224736078733986, "grad_norm": 641.3799438476562, "learning_rate": 4.783721643972211e-05, "loss": 62.0612, "step": 55010 }, { "epoch": 0.22228776205270748, "grad_norm": 1118.1512451171875, "learning_rate": 4.783579599315591e-05, "loss": 80.4928, "step": 55020 }, { "epoch": 0.22232816331807512, "grad_norm": 1295.068115234375, "learning_rate": 4.783437510139411e-05, "loss": 75.4781, "step": 55030 }, { "epoch": 0.22236856458344276, "grad_norm": 489.80078125, "learning_rate": 4.7832953764464405e-05, "loss": 67.9461, "step": 55040 }, { "epoch": 0.2224089658488104, "grad_norm": 432.6492614746094, "learning_rate": 4.783153198239452e-05, "loss": 66.1955, "step": 55050 }, { "epoch": 0.222449367114178, "grad_norm": 0.0, "learning_rate": 4.783010975521216e-05, "loss": 61.2556, "step": 55060 }, { "epoch": 0.22248976837954565, "grad_norm": 656.0430908203125, "learning_rate": 4.7828687082945054e-05, "loss": 50.6344, "step": 55070 }, { "epoch": 0.2225301696449133, "grad_norm": 799.9983520507812, "learning_rate": 4.782726396562094e-05, "loss": 63.7389, "step": 55080 }, { "epoch": 0.2225705709102809, "grad_norm": 848.791748046875, "learning_rate": 4.782584040326757e-05, "loss": 83.2984, "step": 55090 }, { "epoch": 0.22261097217564854, "grad_norm": 749.4911499023438, "learning_rate": 4.7824416395912686e-05, "loss": 84.4783, "step": 55100 }, { "epoch": 0.22265137344101618, "grad_norm": 546.4697265625, "learning_rate": 4.782299194358405e-05, "loss": 80.4849, "step": 55110 }, { "epoch": 0.2226917747063838, "grad_norm": 587.6947021484375, "learning_rate": 4.782156704630944e-05, "loss": 79.875, "step": 55120 }, { "epoch": 0.22273217597175143, "grad_norm": 1178.3653564453125, "learning_rate": 4.782014170411663e-05, "loss": 77.4586, "step": 55130 }, { "epoch": 0.22277257723711907, "grad_norm": 641.4794921875, "learning_rate": 4.781871591703341e-05, "loss": 70.9194, "step": 55140 }, { "epoch": 0.22281297850248669, "grad_norm": 450.2964172363281, "learning_rate": 4.7817289685087577e-05, "loss": 92.5157, "step": 55150 }, { "epoch": 0.22285337976785433, "grad_norm": 1155.9171142578125, "learning_rate": 4.781586300830693e-05, "loss": 125.689, "step": 55160 }, { "epoch": 0.22289378103322197, "grad_norm": 684.2454833984375, "learning_rate": 4.781443588671929e-05, "loss": 61.9113, "step": 55170 }, { "epoch": 0.22293418229858958, "grad_norm": 0.0, "learning_rate": 4.781300832035247e-05, "loss": 66.496, "step": 55180 }, { "epoch": 0.22297458356395722, "grad_norm": 546.8045043945312, "learning_rate": 4.7811580309234314e-05, "loss": 74.1194, "step": 55190 }, { "epoch": 0.22301498482932486, "grad_norm": 559.2355346679688, "learning_rate": 4.781015185339266e-05, "loss": 63.7682, "step": 55200 }, { "epoch": 0.2230553860946925, "grad_norm": 848.5513916015625, "learning_rate": 4.7808722952855344e-05, "loss": 59.4205, "step": 55210 }, { "epoch": 0.2230957873600601, "grad_norm": 513.669677734375, "learning_rate": 4.780729360765024e-05, "loss": 69.9661, "step": 55220 }, { "epoch": 0.22313618862542775, "grad_norm": 1898.05419921875, "learning_rate": 4.7805863817805196e-05, "loss": 60.6413, "step": 55230 }, { "epoch": 0.2231765898907954, "grad_norm": 304.1728820800781, "learning_rate": 4.78044335833481e-05, "loss": 65.169, "step": 55240 }, { "epoch": 0.223216991156163, "grad_norm": 1311.47265625, "learning_rate": 4.780300290430682e-05, "loss": 75.9994, "step": 55250 }, { "epoch": 0.22325739242153064, "grad_norm": 867.0023193359375, "learning_rate": 4.780157178070928e-05, "loss": 70.7438, "step": 55260 }, { "epoch": 0.22329779368689828, "grad_norm": 749.3482055664062, "learning_rate": 4.780014021258334e-05, "loss": 87.707, "step": 55270 }, { "epoch": 0.2233381949522659, "grad_norm": 717.660888671875, "learning_rate": 4.779870819995694e-05, "loss": 90.9197, "step": 55280 }, { "epoch": 0.22337859621763353, "grad_norm": 677.76708984375, "learning_rate": 4.779727574285798e-05, "loss": 69.9488, "step": 55290 }, { "epoch": 0.22341899748300117, "grad_norm": 781.3521728515625, "learning_rate": 4.77958428413144e-05, "loss": 83.5576, "step": 55300 }, { "epoch": 0.2234593987483688, "grad_norm": 678.9473266601562, "learning_rate": 4.779440949535412e-05, "loss": 74.7966, "step": 55310 }, { "epoch": 0.22349980001373643, "grad_norm": 1800.738037109375, "learning_rate": 4.779297570500509e-05, "loss": 97.8668, "step": 55320 }, { "epoch": 0.22354020127910407, "grad_norm": 642.32568359375, "learning_rate": 4.779154147029527e-05, "loss": 38.1043, "step": 55330 }, { "epoch": 0.22358060254447168, "grad_norm": 955.9810180664062, "learning_rate": 4.7790106791252614e-05, "loss": 99.5178, "step": 55340 }, { "epoch": 0.22362100380983932, "grad_norm": 2772.03564453125, "learning_rate": 4.7788671667905096e-05, "loss": 103.198, "step": 55350 }, { "epoch": 0.22366140507520696, "grad_norm": 896.0523071289062, "learning_rate": 4.7787236100280685e-05, "loss": 104.9025, "step": 55360 }, { "epoch": 0.2237018063405746, "grad_norm": 657.1527709960938, "learning_rate": 4.7785800088407376e-05, "loss": 64.4315, "step": 55370 }, { "epoch": 0.2237422076059422, "grad_norm": 1311.0645751953125, "learning_rate": 4.7784363632313166e-05, "loss": 82.5705, "step": 55380 }, { "epoch": 0.22378260887130985, "grad_norm": 636.6400146484375, "learning_rate": 4.778292673202606e-05, "loss": 69.7723, "step": 55390 }, { "epoch": 0.2238230101366775, "grad_norm": 768.77392578125, "learning_rate": 4.778148938757406e-05, "loss": 88.6552, "step": 55400 }, { "epoch": 0.2238634114020451, "grad_norm": 494.320068359375, "learning_rate": 4.7780051598985196e-05, "loss": 78.5235, "step": 55410 }, { "epoch": 0.22390381266741274, "grad_norm": 525.7529907226562, "learning_rate": 4.7778613366287505e-05, "loss": 106.9901, "step": 55420 }, { "epoch": 0.22394421393278038, "grad_norm": 574.625732421875, "learning_rate": 4.7777174689509006e-05, "loss": 81.0395, "step": 55430 }, { "epoch": 0.223984615198148, "grad_norm": 599.45947265625, "learning_rate": 4.7775735568677775e-05, "loss": 68.9037, "step": 55440 }, { "epoch": 0.22402501646351564, "grad_norm": 653.6656494140625, "learning_rate": 4.777429600382185e-05, "loss": 79.6398, "step": 55450 }, { "epoch": 0.22406541772888328, "grad_norm": 372.3857116699219, "learning_rate": 4.777285599496929e-05, "loss": 47.1804, "step": 55460 }, { "epoch": 0.2241058189942509, "grad_norm": 444.17919921875, "learning_rate": 4.777141554214819e-05, "loss": 79.5894, "step": 55470 }, { "epoch": 0.22414622025961853, "grad_norm": 952.4342041015625, "learning_rate": 4.776997464538662e-05, "loss": 79.2442, "step": 55480 }, { "epoch": 0.22418662152498617, "grad_norm": 1239.525146484375, "learning_rate": 4.776853330471266e-05, "loss": 70.9746, "step": 55490 }, { "epoch": 0.22422702279035378, "grad_norm": 5761.12841796875, "learning_rate": 4.776709152015443e-05, "loss": 110.6402, "step": 55500 }, { "epoch": 0.22426742405572142, "grad_norm": 5974.0244140625, "learning_rate": 4.776564929174003e-05, "loss": 88.7858, "step": 55510 }, { "epoch": 0.22430782532108906, "grad_norm": 702.3343505859375, "learning_rate": 4.776420661949758e-05, "loss": 80.6855, "step": 55520 }, { "epoch": 0.2243482265864567, "grad_norm": 736.3425903320312, "learning_rate": 4.776276350345519e-05, "loss": 74.7216, "step": 55530 }, { "epoch": 0.2243886278518243, "grad_norm": 412.60272216796875, "learning_rate": 4.776131994364102e-05, "loss": 80.698, "step": 55540 }, { "epoch": 0.22442902911719195, "grad_norm": 723.0787353515625, "learning_rate": 4.775987594008319e-05, "loss": 106.2656, "step": 55550 }, { "epoch": 0.2244694303825596, "grad_norm": 598.0833129882812, "learning_rate": 4.775843149280986e-05, "loss": 60.9806, "step": 55560 }, { "epoch": 0.2245098316479272, "grad_norm": 1235.12060546875, "learning_rate": 4.775698660184919e-05, "loss": 80.7508, "step": 55570 }, { "epoch": 0.22455023291329484, "grad_norm": 874.450927734375, "learning_rate": 4.775554126722935e-05, "loss": 42.9898, "step": 55580 }, { "epoch": 0.22459063417866248, "grad_norm": 589.5454711914062, "learning_rate": 4.775409548897853e-05, "loss": 94.5234, "step": 55590 }, { "epoch": 0.2246310354440301, "grad_norm": 918.5469360351562, "learning_rate": 4.775264926712489e-05, "loss": 48.5788, "step": 55600 }, { "epoch": 0.22467143670939774, "grad_norm": 1047.6563720703125, "learning_rate": 4.775120260169665e-05, "loss": 88.8342, "step": 55610 }, { "epoch": 0.22471183797476538, "grad_norm": 932.34912109375, "learning_rate": 4.774975549272199e-05, "loss": 48.4354, "step": 55620 }, { "epoch": 0.224752239240133, "grad_norm": 614.5720825195312, "learning_rate": 4.774830794022915e-05, "loss": 93.6599, "step": 55630 }, { "epoch": 0.22479264050550063, "grad_norm": 503.9685363769531, "learning_rate": 4.7746859944246325e-05, "loss": 85.0896, "step": 55640 }, { "epoch": 0.22483304177086827, "grad_norm": 662.5595703125, "learning_rate": 4.774541150480175e-05, "loss": 103.3519, "step": 55650 }, { "epoch": 0.22487344303623588, "grad_norm": 982.870849609375, "learning_rate": 4.7743962621923674e-05, "loss": 82.5962, "step": 55660 }, { "epoch": 0.22491384430160352, "grad_norm": 1002.5994873046875, "learning_rate": 4.774251329564034e-05, "loss": 64.0428, "step": 55670 }, { "epoch": 0.22495424556697116, "grad_norm": 360.1722412109375, "learning_rate": 4.7741063525980004e-05, "loss": 75.0625, "step": 55680 }, { "epoch": 0.2249946468323388, "grad_norm": 1272.3038330078125, "learning_rate": 4.773961331297092e-05, "loss": 114.2109, "step": 55690 }, { "epoch": 0.2250350480977064, "grad_norm": 6723.4404296875, "learning_rate": 4.773816265664136e-05, "loss": 103.1604, "step": 55700 }, { "epoch": 0.22507544936307405, "grad_norm": 2476.1787109375, "learning_rate": 4.7736711557019617e-05, "loss": 101.4048, "step": 55710 }, { "epoch": 0.2251158506284417, "grad_norm": 1094.4051513671875, "learning_rate": 4.7735260014133986e-05, "loss": 80.1582, "step": 55720 }, { "epoch": 0.2251562518938093, "grad_norm": 990.44677734375, "learning_rate": 4.773380802801275e-05, "loss": 80.2596, "step": 55730 }, { "epoch": 0.22519665315917695, "grad_norm": 700.4454956054688, "learning_rate": 4.773235559868422e-05, "loss": 78.9232, "step": 55740 }, { "epoch": 0.22523705442454459, "grad_norm": 1065.6510009765625, "learning_rate": 4.773090272617672e-05, "loss": 85.6272, "step": 55750 }, { "epoch": 0.2252774556899122, "grad_norm": 810.5206909179688, "learning_rate": 4.772944941051856e-05, "loss": 83.4951, "step": 55760 }, { "epoch": 0.22531785695527984, "grad_norm": 508.2962341308594, "learning_rate": 4.772799565173809e-05, "loss": 63.031, "step": 55770 }, { "epoch": 0.22535825822064748, "grad_norm": 371.3773498535156, "learning_rate": 4.772654144986364e-05, "loss": 96.568, "step": 55780 }, { "epoch": 0.2253986594860151, "grad_norm": 492.9206237792969, "learning_rate": 4.772508680492356e-05, "loss": 76.7317, "step": 55790 }, { "epoch": 0.22543906075138273, "grad_norm": 368.9469909667969, "learning_rate": 4.772363171694622e-05, "loss": 62.3136, "step": 55800 }, { "epoch": 0.22547946201675037, "grad_norm": 445.32501220703125, "learning_rate": 4.7722176185959974e-05, "loss": 47.5343, "step": 55810 }, { "epoch": 0.22551986328211798, "grad_norm": 385.32098388671875, "learning_rate": 4.772072021199321e-05, "loss": 86.4635, "step": 55820 }, { "epoch": 0.22556026454748562, "grad_norm": 828.4990844726562, "learning_rate": 4.771926379507431e-05, "loss": 79.7598, "step": 55830 }, { "epoch": 0.22560066581285326, "grad_norm": 881.7483520507812, "learning_rate": 4.7717806935231665e-05, "loss": 54.1686, "step": 55840 }, { "epoch": 0.2256410670782209, "grad_norm": 882.0201416015625, "learning_rate": 4.7716349632493674e-05, "loss": 61.8202, "step": 55850 }, { "epoch": 0.22568146834358851, "grad_norm": 1207.105712890625, "learning_rate": 4.7714891886888756e-05, "loss": 78.0542, "step": 55860 }, { "epoch": 0.22572186960895615, "grad_norm": 1286.038330078125, "learning_rate": 4.771343369844532e-05, "loss": 84.5764, "step": 55870 }, { "epoch": 0.2257622708743238, "grad_norm": 518.7008056640625, "learning_rate": 4.771197506719181e-05, "loss": 68.4155, "step": 55880 }, { "epoch": 0.2258026721396914, "grad_norm": 1427.2728271484375, "learning_rate": 4.7710515993156645e-05, "loss": 89.9567, "step": 55890 }, { "epoch": 0.22584307340505905, "grad_norm": 964.0196533203125, "learning_rate": 4.770905647636828e-05, "loss": 68.5106, "step": 55900 }, { "epoch": 0.2258834746704267, "grad_norm": 452.55487060546875, "learning_rate": 4.770759651685517e-05, "loss": 56.08, "step": 55910 }, { "epoch": 0.2259238759357943, "grad_norm": 1233.5595703125, "learning_rate": 4.770613611464577e-05, "loss": 74.0022, "step": 55920 }, { "epoch": 0.22596427720116194, "grad_norm": 1455.181640625, "learning_rate": 4.7704675269768565e-05, "loss": 91.9318, "step": 55930 }, { "epoch": 0.22600467846652958, "grad_norm": 735.1796264648438, "learning_rate": 4.7703213982252016e-05, "loss": 85.3958, "step": 55940 }, { "epoch": 0.2260450797318972, "grad_norm": 670.2793579101562, "learning_rate": 4.770175225212463e-05, "loss": 60.0693, "step": 55950 }, { "epoch": 0.22608548099726483, "grad_norm": 820.1773071289062, "learning_rate": 4.7700290079414896e-05, "loss": 75.0173, "step": 55960 }, { "epoch": 0.22612588226263247, "grad_norm": 640.2984619140625, "learning_rate": 4.769882746415132e-05, "loss": 58.8079, "step": 55970 }, { "epoch": 0.22616628352800008, "grad_norm": 1904.8134765625, "learning_rate": 4.769736440636241e-05, "loss": 59.5396, "step": 55980 }, { "epoch": 0.22620668479336772, "grad_norm": 764.6589965820312, "learning_rate": 4.76959009060767e-05, "loss": 47.2034, "step": 55990 }, { "epoch": 0.22624708605873536, "grad_norm": 312.55859375, "learning_rate": 4.769443696332272e-05, "loss": 62.7167, "step": 56000 }, { "epoch": 0.226287487324103, "grad_norm": 560.5413818359375, "learning_rate": 4.7692972578129005e-05, "loss": 79.5518, "step": 56010 }, { "epoch": 0.22632788858947062, "grad_norm": 664.6395874023438, "learning_rate": 4.769150775052411e-05, "loss": 63.6301, "step": 56020 }, { "epoch": 0.22636828985483826, "grad_norm": 1466.82666015625, "learning_rate": 4.769004248053658e-05, "loss": 63.513, "step": 56030 }, { "epoch": 0.2264086911202059, "grad_norm": 994.0587768554688, "learning_rate": 4.7688576768194994e-05, "loss": 59.1224, "step": 56040 }, { "epoch": 0.2264490923855735, "grad_norm": 700.6870727539062, "learning_rate": 4.7687110613527926e-05, "loss": 85.2521, "step": 56050 }, { "epoch": 0.22648949365094115, "grad_norm": 1606.5682373046875, "learning_rate": 4.7685644016563956e-05, "loss": 97.3078, "step": 56060 }, { "epoch": 0.2265298949163088, "grad_norm": 559.5364379882812, "learning_rate": 4.7684176977331674e-05, "loss": 75.0282, "step": 56070 }, { "epoch": 0.2265702961816764, "grad_norm": 750.7931518554688, "learning_rate": 4.768270949585968e-05, "loss": 89.0941, "step": 56080 }, { "epoch": 0.22661069744704404, "grad_norm": 843.3683471679688, "learning_rate": 4.7681241572176596e-05, "loss": 76.073, "step": 56090 }, { "epoch": 0.22665109871241168, "grad_norm": 759.1117553710938, "learning_rate": 4.767977320631103e-05, "loss": 72.5775, "step": 56100 }, { "epoch": 0.2266914999777793, "grad_norm": 687.8123168945312, "learning_rate": 4.76783043982916e-05, "loss": 53.4036, "step": 56110 }, { "epoch": 0.22673190124314693, "grad_norm": 996.044921875, "learning_rate": 4.767683514814696e-05, "loss": 56.8859, "step": 56120 }, { "epoch": 0.22677230250851457, "grad_norm": 821.6911010742188, "learning_rate": 4.767536545590574e-05, "loss": 66.1622, "step": 56130 }, { "epoch": 0.22681270377388218, "grad_norm": 1311.89892578125, "learning_rate": 4.767389532159659e-05, "loss": 69.1287, "step": 56140 }, { "epoch": 0.22685310503924982, "grad_norm": 734.8349609375, "learning_rate": 4.7672424745248176e-05, "loss": 54.9413, "step": 56150 }, { "epoch": 0.22689350630461746, "grad_norm": 854.7682495117188, "learning_rate": 4.767095372688918e-05, "loss": 98.0047, "step": 56160 }, { "epoch": 0.2269339075699851, "grad_norm": 969.171630859375, "learning_rate": 4.7669482266548264e-05, "loss": 98.2503, "step": 56170 }, { "epoch": 0.22697430883535272, "grad_norm": 1126.5001220703125, "learning_rate": 4.7668010364254124e-05, "loss": 63.528, "step": 56180 }, { "epoch": 0.22701471010072036, "grad_norm": 1452.1922607421875, "learning_rate": 4.7666538020035445e-05, "loss": 68.9039, "step": 56190 }, { "epoch": 0.227055111366088, "grad_norm": 1715.3773193359375, "learning_rate": 4.7665065233920945e-05, "loss": 109.966, "step": 56200 }, { "epoch": 0.2270955126314556, "grad_norm": 631.9252319335938, "learning_rate": 4.766359200593933e-05, "loss": 49.2157, "step": 56210 }, { "epoch": 0.22713591389682325, "grad_norm": 1373.4630126953125, "learning_rate": 4.766211833611931e-05, "loss": 74.802, "step": 56220 }, { "epoch": 0.2271763151621909, "grad_norm": 2058.58740234375, "learning_rate": 4.766064422448964e-05, "loss": 75.4964, "step": 56230 }, { "epoch": 0.2272167164275585, "grad_norm": 2785.7197265625, "learning_rate": 4.765916967107903e-05, "loss": 77.155, "step": 56240 }, { "epoch": 0.22725711769292614, "grad_norm": 593.6171264648438, "learning_rate": 4.765769467591625e-05, "loss": 60.3693, "step": 56250 }, { "epoch": 0.22729751895829378, "grad_norm": 1533.2698974609375, "learning_rate": 4.7656219239030046e-05, "loss": 95.2811, "step": 56260 }, { "epoch": 0.2273379202236614, "grad_norm": 806.5111083984375, "learning_rate": 4.7654743360449186e-05, "loss": 65.6008, "step": 56270 }, { "epoch": 0.22737832148902903, "grad_norm": 1836.31787109375, "learning_rate": 4.7653267040202436e-05, "loss": 80.5326, "step": 56280 }, { "epoch": 0.22741872275439667, "grad_norm": 733.4277954101562, "learning_rate": 4.765179027831858e-05, "loss": 81.6788, "step": 56290 }, { "epoch": 0.22745912401976429, "grad_norm": 1102.4857177734375, "learning_rate": 4.7650313074826425e-05, "loss": 63.4363, "step": 56300 }, { "epoch": 0.22749952528513193, "grad_norm": 1184.0565185546875, "learning_rate": 4.764883542975475e-05, "loss": 113.9402, "step": 56310 }, { "epoch": 0.22753992655049957, "grad_norm": 901.2548828125, "learning_rate": 4.764735734313236e-05, "loss": 61.9935, "step": 56320 }, { "epoch": 0.2275803278158672, "grad_norm": 750.1239624023438, "learning_rate": 4.7645878814988075e-05, "loss": 82.1202, "step": 56330 }, { "epoch": 0.22762072908123482, "grad_norm": 1717.6053466796875, "learning_rate": 4.764439984535074e-05, "loss": 54.0498, "step": 56340 }, { "epoch": 0.22766113034660246, "grad_norm": 755.3112182617188, "learning_rate": 4.764292043424916e-05, "loss": 46.0645, "step": 56350 }, { "epoch": 0.2277015316119701, "grad_norm": 819.234375, "learning_rate": 4.764144058171219e-05, "loss": 68.3072, "step": 56360 }, { "epoch": 0.2277419328773377, "grad_norm": 1272.74462890625, "learning_rate": 4.763996028776868e-05, "loss": 81.0632, "step": 56370 }, { "epoch": 0.22778233414270535, "grad_norm": 970.59765625, "learning_rate": 4.763847955244749e-05, "loss": 45.2182, "step": 56380 }, { "epoch": 0.227822735408073, "grad_norm": 895.1033325195312, "learning_rate": 4.7636998375777486e-05, "loss": 78.1244, "step": 56390 }, { "epoch": 0.2278631366734406, "grad_norm": 1142.0018310546875, "learning_rate": 4.763551675778755e-05, "loss": 93.2416, "step": 56400 }, { "epoch": 0.22790353793880824, "grad_norm": 694.2943725585938, "learning_rate": 4.7634034698506545e-05, "loss": 66.8523, "step": 56410 }, { "epoch": 0.22794393920417588, "grad_norm": 649.4854736328125, "learning_rate": 4.76325521979634e-05, "loss": 94.2294, "step": 56420 }, { "epoch": 0.2279843404695435, "grad_norm": 1363.1820068359375, "learning_rate": 4.7631069256186986e-05, "loss": 68.8548, "step": 56430 }, { "epoch": 0.22802474173491113, "grad_norm": 766.2236328125, "learning_rate": 4.7629585873206226e-05, "loss": 98.7785, "step": 56440 }, { "epoch": 0.22806514300027877, "grad_norm": 1092.222900390625, "learning_rate": 4.7628102049050036e-05, "loss": 68.331, "step": 56450 }, { "epoch": 0.2281055442656464, "grad_norm": 745.677978515625, "learning_rate": 4.7626617783747364e-05, "loss": 86.9224, "step": 56460 }, { "epoch": 0.22814594553101403, "grad_norm": 848.4603881835938, "learning_rate": 4.762513307732711e-05, "loss": 97.7969, "step": 56470 }, { "epoch": 0.22818634679638167, "grad_norm": 924.7987670898438, "learning_rate": 4.762364792981825e-05, "loss": 68.205, "step": 56480 }, { "epoch": 0.2282267480617493, "grad_norm": 442.6836242675781, "learning_rate": 4.762216234124972e-05, "loss": 125.6712, "step": 56490 }, { "epoch": 0.22826714932711692, "grad_norm": 716.2772827148438, "learning_rate": 4.762067631165049e-05, "loss": 126.1072, "step": 56500 }, { "epoch": 0.22830755059248456, "grad_norm": 956.81005859375, "learning_rate": 4.761918984104953e-05, "loss": 51.0658, "step": 56510 }, { "epoch": 0.2283479518578522, "grad_norm": 379.62493896484375, "learning_rate": 4.761770292947582e-05, "loss": 97.7945, "step": 56520 }, { "epoch": 0.2283883531232198, "grad_norm": 1116.5791015625, "learning_rate": 4.761621557695834e-05, "loss": 77.2991, "step": 56530 }, { "epoch": 0.22842875438858745, "grad_norm": 544.430419921875, "learning_rate": 4.76147277835261e-05, "loss": 94.4459, "step": 56540 }, { "epoch": 0.2284691556539551, "grad_norm": 827.6028442382812, "learning_rate": 4.7613239549208106e-05, "loss": 53.5641, "step": 56550 }, { "epoch": 0.2285095569193227, "grad_norm": 1755.43505859375, "learning_rate": 4.7611750874033356e-05, "loss": 97.6019, "step": 56560 }, { "epoch": 0.22854995818469034, "grad_norm": 456.3388671875, "learning_rate": 4.7610261758030886e-05, "loss": 65.4111, "step": 56570 }, { "epoch": 0.22859035945005798, "grad_norm": 710.4873046875, "learning_rate": 4.760877220122971e-05, "loss": 70.0858, "step": 56580 }, { "epoch": 0.2286307607154256, "grad_norm": 717.0205688476562, "learning_rate": 4.76072822036589e-05, "loss": 80.4853, "step": 56590 }, { "epoch": 0.22867116198079324, "grad_norm": 813.551513671875, "learning_rate": 4.760579176534747e-05, "loss": 72.0974, "step": 56600 }, { "epoch": 0.22871156324616088, "grad_norm": 1653.39111328125, "learning_rate": 4.7604300886324496e-05, "loss": 37.6728, "step": 56610 }, { "epoch": 0.2287519645115285, "grad_norm": 1388.64501953125, "learning_rate": 4.760280956661903e-05, "loss": 102.1589, "step": 56620 }, { "epoch": 0.22879236577689613, "grad_norm": 1519.29931640625, "learning_rate": 4.760131780626017e-05, "loss": 107.9805, "step": 56630 }, { "epoch": 0.22883276704226377, "grad_norm": 967.7742919921875, "learning_rate": 4.759982560527698e-05, "loss": 86.072, "step": 56640 }, { "epoch": 0.2288731683076314, "grad_norm": 1597.58642578125, "learning_rate": 4.7598332963698545e-05, "loss": 95.1602, "step": 56650 }, { "epoch": 0.22891356957299902, "grad_norm": 616.4041137695312, "learning_rate": 4.7596839881553976e-05, "loss": 83.347, "step": 56660 }, { "epoch": 0.22895397083836666, "grad_norm": 728.1845703125, "learning_rate": 4.75953463588724e-05, "loss": 58.7279, "step": 56670 }, { "epoch": 0.2289943721037343, "grad_norm": 1369.0509033203125, "learning_rate": 4.759385239568289e-05, "loss": 104.2516, "step": 56680 }, { "epoch": 0.2290347733691019, "grad_norm": 445.80267333984375, "learning_rate": 4.75923579920146e-05, "loss": 69.9564, "step": 56690 }, { "epoch": 0.22907517463446955, "grad_norm": 1323.3516845703125, "learning_rate": 4.7590863147896666e-05, "loss": 57.2469, "step": 56700 }, { "epoch": 0.2291155758998372, "grad_norm": 1134.4810791015625, "learning_rate": 4.7589367863358225e-05, "loss": 74.4222, "step": 56710 }, { "epoch": 0.2291559771652048, "grad_norm": 1246.8629150390625, "learning_rate": 4.758787213842842e-05, "loss": 97.5686, "step": 56720 }, { "epoch": 0.22919637843057244, "grad_norm": 1165.90478515625, "learning_rate": 4.758637597313642e-05, "loss": 84.3428, "step": 56730 }, { "epoch": 0.22923677969594008, "grad_norm": 683.56689453125, "learning_rate": 4.7584879367511395e-05, "loss": 61.9244, "step": 56740 }, { "epoch": 0.2292771809613077, "grad_norm": 2286.635498046875, "learning_rate": 4.758338232158252e-05, "loss": 90.8525, "step": 56750 }, { "epoch": 0.22931758222667534, "grad_norm": 1139.4610595703125, "learning_rate": 4.758188483537898e-05, "loss": 93.7091, "step": 56760 }, { "epoch": 0.22935798349204298, "grad_norm": 517.6661376953125, "learning_rate": 4.758038690892997e-05, "loss": 62.2874, "step": 56770 }, { "epoch": 0.2293983847574106, "grad_norm": 657.8548583984375, "learning_rate": 4.7578888542264686e-05, "loss": 90.596, "step": 56780 }, { "epoch": 0.22943878602277823, "grad_norm": 1066.094482421875, "learning_rate": 4.757738973541236e-05, "loss": 54.5622, "step": 56790 }, { "epoch": 0.22947918728814587, "grad_norm": 1410.8668212890625, "learning_rate": 4.7575890488402185e-05, "loss": 44.4098, "step": 56800 }, { "epoch": 0.2295195885535135, "grad_norm": 1515.7193603515625, "learning_rate": 4.75743908012634e-05, "loss": 89.9203, "step": 56810 }, { "epoch": 0.22955998981888112, "grad_norm": 1256.6802978515625, "learning_rate": 4.757289067402525e-05, "loss": 65.3287, "step": 56820 }, { "epoch": 0.22960039108424876, "grad_norm": 520.7095947265625, "learning_rate": 4.757139010671697e-05, "loss": 67.0209, "step": 56830 }, { "epoch": 0.2296407923496164, "grad_norm": 1852.7618408203125, "learning_rate": 4.7569889099367824e-05, "loss": 90.3647, "step": 56840 }, { "epoch": 0.229681193614984, "grad_norm": 888.56787109375, "learning_rate": 4.7568387652007075e-05, "loss": 97.766, "step": 56850 }, { "epoch": 0.22972159488035165, "grad_norm": 515.2661743164062, "learning_rate": 4.756688576466398e-05, "loss": 52.9156, "step": 56860 }, { "epoch": 0.2297619961457193, "grad_norm": 779.8905639648438, "learning_rate": 4.756538343736784e-05, "loss": 85.2727, "step": 56870 }, { "epoch": 0.2298023974110869, "grad_norm": 808.3165893554688, "learning_rate": 4.756388067014792e-05, "loss": 89.8556, "step": 56880 }, { "epoch": 0.22984279867645455, "grad_norm": 1065.549072265625, "learning_rate": 4.7562377463033536e-05, "loss": 81.1974, "step": 56890 }, { "epoch": 0.22988319994182219, "grad_norm": 1067.1673583984375, "learning_rate": 4.7560873816053984e-05, "loss": 61.5579, "step": 56900 }, { "epoch": 0.2299236012071898, "grad_norm": 768.210205078125, "learning_rate": 4.755936972923859e-05, "loss": 75.4602, "step": 56910 }, { "epoch": 0.22996400247255744, "grad_norm": 333.94232177734375, "learning_rate": 4.7557865202616656e-05, "loss": 65.8191, "step": 56920 }, { "epoch": 0.23000440373792508, "grad_norm": 1091.112548828125, "learning_rate": 4.7556360236217534e-05, "loss": 112.1934, "step": 56930 }, { "epoch": 0.2300448050032927, "grad_norm": 1078.4996337890625, "learning_rate": 4.755485483007056e-05, "loss": 80.1494, "step": 56940 }, { "epoch": 0.23008520626866033, "grad_norm": 663.4862670898438, "learning_rate": 4.755334898420507e-05, "loss": 81.2759, "step": 56950 }, { "epoch": 0.23012560753402797, "grad_norm": 1670.8721923828125, "learning_rate": 4.7551842698650436e-05, "loss": 98.7399, "step": 56960 }, { "epoch": 0.2301660087993956, "grad_norm": 707.2112426757812, "learning_rate": 4.755033597343602e-05, "loss": 84.5847, "step": 56970 }, { "epoch": 0.23020641006476322, "grad_norm": 913.306884765625, "learning_rate": 4.7548828808591195e-05, "loss": 78.6086, "step": 56980 }, { "epoch": 0.23024681133013086, "grad_norm": 390.49578857421875, "learning_rate": 4.754732120414534e-05, "loss": 81.2955, "step": 56990 }, { "epoch": 0.2302872125954985, "grad_norm": 593.7713623046875, "learning_rate": 4.754581316012785e-05, "loss": 59.7465, "step": 57000 }, { "epoch": 0.23032761386086611, "grad_norm": 704.7691650390625, "learning_rate": 4.754430467656812e-05, "loss": 61.9324, "step": 57010 }, { "epoch": 0.23036801512623375, "grad_norm": 770.904296875, "learning_rate": 4.7542795753495574e-05, "loss": 68.9715, "step": 57020 }, { "epoch": 0.2304084163916014, "grad_norm": 630.1139526367188, "learning_rate": 4.754128639093961e-05, "loss": 82.8383, "step": 57030 }, { "epoch": 0.230448817656969, "grad_norm": 1761.17626953125, "learning_rate": 4.753977658892967e-05, "loss": 59.8425, "step": 57040 }, { "epoch": 0.23048921892233665, "grad_norm": 1344.968017578125, "learning_rate": 4.753826634749518e-05, "loss": 89.4451, "step": 57050 }, { "epoch": 0.2305296201877043, "grad_norm": 448.2859802246094, "learning_rate": 4.753675566666558e-05, "loss": 79.9184, "step": 57060 }, { "epoch": 0.2305700214530719, "grad_norm": 999.4277954101562, "learning_rate": 4.7535244546470325e-05, "loss": 112.2555, "step": 57070 }, { "epoch": 0.23061042271843954, "grad_norm": 1350.092041015625, "learning_rate": 4.753373298693888e-05, "loss": 84.6386, "step": 57080 }, { "epoch": 0.23065082398380718, "grad_norm": 318.74359130859375, "learning_rate": 4.753222098810071e-05, "loss": 48.7075, "step": 57090 }, { "epoch": 0.2306912252491748, "grad_norm": 1049.048095703125, "learning_rate": 4.7530708549985287e-05, "loss": 55.4899, "step": 57100 }, { "epoch": 0.23073162651454243, "grad_norm": 2994.60107421875, "learning_rate": 4.75291956726221e-05, "loss": 82.0188, "step": 57110 }, { "epoch": 0.23077202777991007, "grad_norm": 1358.6580810546875, "learning_rate": 4.752768235604065e-05, "loss": 84.3785, "step": 57120 }, { "epoch": 0.2308124290452777, "grad_norm": 309.6406555175781, "learning_rate": 4.7526168600270435e-05, "loss": 73.9946, "step": 57130 }, { "epoch": 0.23085283031064532, "grad_norm": 1002.0283203125, "learning_rate": 4.752465440534096e-05, "loss": 104.0093, "step": 57140 }, { "epoch": 0.23089323157601296, "grad_norm": 697.1533813476562, "learning_rate": 4.752313977128175e-05, "loss": 45.6771, "step": 57150 }, { "epoch": 0.2309336328413806, "grad_norm": 1255.6212158203125, "learning_rate": 4.752162469812234e-05, "loss": 77.5493, "step": 57160 }, { "epoch": 0.23097403410674822, "grad_norm": 5676.9970703125, "learning_rate": 4.752010918589226e-05, "loss": 64.4288, "step": 57170 }, { "epoch": 0.23101443537211586, "grad_norm": 564.4259033203125, "learning_rate": 4.7518593234621056e-05, "loss": 98.8771, "step": 57180 }, { "epoch": 0.2310548366374835, "grad_norm": 1186.0013427734375, "learning_rate": 4.7517076844338285e-05, "loss": 67.5268, "step": 57190 }, { "epoch": 0.2310952379028511, "grad_norm": 1269.7178955078125, "learning_rate": 4.7515560015073514e-05, "loss": 89.8433, "step": 57200 }, { "epoch": 0.23113563916821875, "grad_norm": 0.0, "learning_rate": 4.75140427468563e-05, "loss": 60.9059, "step": 57210 }, { "epoch": 0.2311760404335864, "grad_norm": 1260.9530029296875, "learning_rate": 4.751252503971624e-05, "loss": 68.3741, "step": 57220 }, { "epoch": 0.231216441698954, "grad_norm": 1468.294677734375, "learning_rate": 4.75110068936829e-05, "loss": 86.1376, "step": 57230 }, { "epoch": 0.23125684296432164, "grad_norm": 631.3265991210938, "learning_rate": 4.7509488308785905e-05, "loss": 92.6364, "step": 57240 }, { "epoch": 0.23129724422968928, "grad_norm": 391.2497863769531, "learning_rate": 4.7507969285054845e-05, "loss": 57.7668, "step": 57250 }, { "epoch": 0.2313376454950569, "grad_norm": 1009.0247192382812, "learning_rate": 4.750644982251933e-05, "loss": 60.969, "step": 57260 }, { "epoch": 0.23137804676042453, "grad_norm": 787.4730834960938, "learning_rate": 4.7504929921208984e-05, "loss": 105.2391, "step": 57270 }, { "epoch": 0.23141844802579217, "grad_norm": 649.8621215820312, "learning_rate": 4.750340958115346e-05, "loss": 62.9108, "step": 57280 }, { "epoch": 0.2314588492911598, "grad_norm": 1550.597900390625, "learning_rate": 4.7501888802382365e-05, "loss": 96.6346, "step": 57290 }, { "epoch": 0.23149925055652743, "grad_norm": 1971.5574951171875, "learning_rate": 4.750036758492537e-05, "loss": 85.7584, "step": 57300 }, { "epoch": 0.23153965182189506, "grad_norm": 386.6150817871094, "learning_rate": 4.749884592881212e-05, "loss": 109.9105, "step": 57310 }, { "epoch": 0.2315800530872627, "grad_norm": 1582.1597900390625, "learning_rate": 4.749732383407229e-05, "loss": 112.0731, "step": 57320 }, { "epoch": 0.23162045435263032, "grad_norm": 0.0, "learning_rate": 4.7495801300735554e-05, "loss": 52.8345, "step": 57330 }, { "epoch": 0.23166085561799796, "grad_norm": 489.4527893066406, "learning_rate": 4.7494278328831584e-05, "loss": 69.7233, "step": 57340 }, { "epoch": 0.2317012568833656, "grad_norm": 483.29364013671875, "learning_rate": 4.7492754918390074e-05, "loss": 89.4249, "step": 57350 }, { "epoch": 0.2317416581487332, "grad_norm": 2289.152099609375, "learning_rate": 4.749123106944073e-05, "loss": 75.3741, "step": 57360 }, { "epoch": 0.23178205941410085, "grad_norm": 730.8201904296875, "learning_rate": 4.748970678201326e-05, "loss": 76.5618, "step": 57370 }, { "epoch": 0.2318224606794685, "grad_norm": 604.216064453125, "learning_rate": 4.7488182056137374e-05, "loss": 64.0703, "step": 57380 }, { "epoch": 0.2318628619448361, "grad_norm": 850.9320068359375, "learning_rate": 4.74866568918428e-05, "loss": 82.7347, "step": 57390 }, { "epoch": 0.23190326321020374, "grad_norm": 948.7066650390625, "learning_rate": 4.7485131289159276e-05, "loss": 76.1124, "step": 57400 }, { "epoch": 0.23194366447557138, "grad_norm": 985.8638916015625, "learning_rate": 4.7483605248116544e-05, "loss": 90.0347, "step": 57410 }, { "epoch": 0.231984065740939, "grad_norm": 322.2492370605469, "learning_rate": 4.7482078768744345e-05, "loss": 97.9077, "step": 57420 }, { "epoch": 0.23202446700630663, "grad_norm": 1223.1875, "learning_rate": 4.7480551851072454e-05, "loss": 53.8952, "step": 57430 }, { "epoch": 0.23206486827167427, "grad_norm": 455.7870788574219, "learning_rate": 4.747902449513063e-05, "loss": 78.5205, "step": 57440 }, { "epoch": 0.23210526953704191, "grad_norm": 1016.0787963867188, "learning_rate": 4.747749670094864e-05, "loss": 86.3612, "step": 57450 }, { "epoch": 0.23214567080240953, "grad_norm": 1260.7244873046875, "learning_rate": 4.7475968468556295e-05, "loss": 84.6346, "step": 57460 }, { "epoch": 0.23218607206777717, "grad_norm": 437.6191711425781, "learning_rate": 4.7474439797983364e-05, "loss": 107.3411, "step": 57470 }, { "epoch": 0.2322264733331448, "grad_norm": 516.1616821289062, "learning_rate": 4.7472910689259655e-05, "loss": 71.8174, "step": 57480 }, { "epoch": 0.23226687459851242, "grad_norm": 662.67822265625, "learning_rate": 4.747138114241499e-05, "loss": 73.287, "step": 57490 }, { "epoch": 0.23230727586388006, "grad_norm": 1739.5025634765625, "learning_rate": 4.7469851157479177e-05, "loss": 92.4851, "step": 57500 }, { "epoch": 0.2323476771292477, "grad_norm": 682.4597778320312, "learning_rate": 4.746832073448205e-05, "loss": 88.4044, "step": 57510 }, { "epoch": 0.2323880783946153, "grad_norm": 797.5489501953125, "learning_rate": 4.7466789873453444e-05, "loss": 86.4066, "step": 57520 }, { "epoch": 0.23242847965998295, "grad_norm": 423.3626708984375, "learning_rate": 4.74652585744232e-05, "loss": 62.8509, "step": 57530 }, { "epoch": 0.2324688809253506, "grad_norm": 712.4830322265625, "learning_rate": 4.746372683742117e-05, "loss": 42.4057, "step": 57540 }, { "epoch": 0.2325092821907182, "grad_norm": 799.061767578125, "learning_rate": 4.746219466247722e-05, "loss": 52.0032, "step": 57550 }, { "epoch": 0.23254968345608584, "grad_norm": 945.8649291992188, "learning_rate": 4.746066204962123e-05, "loss": 88.8774, "step": 57560 }, { "epoch": 0.23259008472145348, "grad_norm": 803.7813110351562, "learning_rate": 4.745912899888306e-05, "loss": 93.842, "step": 57570 }, { "epoch": 0.2326304859868211, "grad_norm": 696.2904663085938, "learning_rate": 4.745759551029261e-05, "loss": 101.2722, "step": 57580 }, { "epoch": 0.23267088725218874, "grad_norm": 857.1948852539062, "learning_rate": 4.745606158387978e-05, "loss": 61.6568, "step": 57590 }, { "epoch": 0.23271128851755638, "grad_norm": 548.427734375, "learning_rate": 4.745452721967446e-05, "loss": 96.5383, "step": 57600 }, { "epoch": 0.23275168978292402, "grad_norm": 1423.155517578125, "learning_rate": 4.745299241770658e-05, "loss": 85.5254, "step": 57610 }, { "epoch": 0.23279209104829163, "grad_norm": 496.381591796875, "learning_rate": 4.745145717800605e-05, "loss": 67.1594, "step": 57620 }, { "epoch": 0.23283249231365927, "grad_norm": 629.1244506835938, "learning_rate": 4.74499215006028e-05, "loss": 64.9573, "step": 57630 }, { "epoch": 0.2328728935790269, "grad_norm": 903.4159545898438, "learning_rate": 4.744838538552677e-05, "loss": 76.1402, "step": 57640 }, { "epoch": 0.23291329484439452, "grad_norm": 504.8460693359375, "learning_rate": 4.744684883280792e-05, "loss": 82.0837, "step": 57650 }, { "epoch": 0.23295369610976216, "grad_norm": 923.25732421875, "learning_rate": 4.744531184247619e-05, "loss": 61.5699, "step": 57660 }, { "epoch": 0.2329940973751298, "grad_norm": 869.8147583007812, "learning_rate": 4.744377441456155e-05, "loss": 68.7058, "step": 57670 }, { "epoch": 0.2330344986404974, "grad_norm": 924.66845703125, "learning_rate": 4.744223654909397e-05, "loss": 109.6333, "step": 57680 }, { "epoch": 0.23307489990586505, "grad_norm": 1206.5703125, "learning_rate": 4.744069824610344e-05, "loss": 95.6685, "step": 57690 }, { "epoch": 0.2331153011712327, "grad_norm": 777.4462280273438, "learning_rate": 4.743915950561994e-05, "loss": 65.7932, "step": 57700 }, { "epoch": 0.2331557024366003, "grad_norm": 741.0814819335938, "learning_rate": 4.743762032767348e-05, "loss": 51.2344, "step": 57710 }, { "epoch": 0.23319610370196794, "grad_norm": 898.6174926757812, "learning_rate": 4.743608071229405e-05, "loss": 97.9571, "step": 57720 }, { "epoch": 0.23323650496733558, "grad_norm": 949.9400024414062, "learning_rate": 4.743454065951168e-05, "loss": 88.1819, "step": 57730 }, { "epoch": 0.2332769062327032, "grad_norm": 6401.75244140625, "learning_rate": 4.743300016935639e-05, "loss": 86.2424, "step": 57740 }, { "epoch": 0.23331730749807084, "grad_norm": 1315.59130859375, "learning_rate": 4.743145924185821e-05, "loss": 78.7651, "step": 57750 }, { "epoch": 0.23335770876343848, "grad_norm": 968.3670043945312, "learning_rate": 4.742991787704719e-05, "loss": 53.8725, "step": 57760 }, { "epoch": 0.2333981100288061, "grad_norm": 281.5350036621094, "learning_rate": 4.7428376074953365e-05, "loss": 88.1397, "step": 57770 }, { "epoch": 0.23343851129417373, "grad_norm": 1783.01318359375, "learning_rate": 4.7426833835606806e-05, "loss": 111.0655, "step": 57780 }, { "epoch": 0.23347891255954137, "grad_norm": 971.6173095703125, "learning_rate": 4.7425291159037575e-05, "loss": 71.0971, "step": 57790 }, { "epoch": 0.233519313824909, "grad_norm": 966.53662109375, "learning_rate": 4.742374804527575e-05, "loss": 85.1464, "step": 57800 }, { "epoch": 0.23355971509027662, "grad_norm": 1798.930419921875, "learning_rate": 4.742220449435141e-05, "loss": 84.1092, "step": 57810 }, { "epoch": 0.23360011635564426, "grad_norm": 1772.7415771484375, "learning_rate": 4.742066050629465e-05, "loss": 50.9059, "step": 57820 }, { "epoch": 0.2336405176210119, "grad_norm": 942.0059814453125, "learning_rate": 4.741911608113557e-05, "loss": 77.9877, "step": 57830 }, { "epoch": 0.2336809188863795, "grad_norm": 472.93890380859375, "learning_rate": 4.741757121890428e-05, "loss": 67.7341, "step": 57840 }, { "epoch": 0.23372132015174715, "grad_norm": 731.5870971679688, "learning_rate": 4.7416025919630904e-05, "loss": 96.6892, "step": 57850 }, { "epoch": 0.2337617214171148, "grad_norm": 973.33935546875, "learning_rate": 4.741448018334555e-05, "loss": 106.4843, "step": 57860 }, { "epoch": 0.2338021226824824, "grad_norm": 1150.518310546875, "learning_rate": 4.741293401007837e-05, "loss": 57.7009, "step": 57870 }, { "epoch": 0.23384252394785005, "grad_norm": 879.5131225585938, "learning_rate": 4.741138739985951e-05, "loss": 75.2222, "step": 57880 }, { "epoch": 0.23388292521321769, "grad_norm": 1559.7349853515625, "learning_rate": 4.7409840352719106e-05, "loss": 95.9505, "step": 57890 }, { "epoch": 0.2339233264785853, "grad_norm": 1342.956298828125, "learning_rate": 4.740829286868733e-05, "loss": 74.7077, "step": 57900 }, { "epoch": 0.23396372774395294, "grad_norm": 1618.971923828125, "learning_rate": 4.740674494779435e-05, "loss": 72.2942, "step": 57910 }, { "epoch": 0.23400412900932058, "grad_norm": 598.3280029296875, "learning_rate": 4.740519659007033e-05, "loss": 71.8578, "step": 57920 }, { "epoch": 0.2340445302746882, "grad_norm": 1528.6131591796875, "learning_rate": 4.7403647795545484e-05, "loss": 109.1537, "step": 57930 }, { "epoch": 0.23408493154005583, "grad_norm": 1394.4090576171875, "learning_rate": 4.7402098564249974e-05, "loss": 58.2658, "step": 57940 }, { "epoch": 0.23412533280542347, "grad_norm": 415.70263671875, "learning_rate": 4.7400548896214024e-05, "loss": 67.7115, "step": 57950 }, { "epoch": 0.2341657340707911, "grad_norm": 928.4489135742188, "learning_rate": 4.739899879146785e-05, "loss": 54.671, "step": 57960 }, { "epoch": 0.23420613533615872, "grad_norm": 995.4201049804688, "learning_rate": 4.739744825004165e-05, "loss": 85.7247, "step": 57970 }, { "epoch": 0.23424653660152636, "grad_norm": 950.1658935546875, "learning_rate": 4.739589727196568e-05, "loss": 53.7052, "step": 57980 }, { "epoch": 0.234286937866894, "grad_norm": 431.6142883300781, "learning_rate": 4.739434585727015e-05, "loss": 56.5389, "step": 57990 }, { "epoch": 0.23432733913226161, "grad_norm": 1080.2391357421875, "learning_rate": 4.7392794005985326e-05, "loss": 82.0306, "step": 58000 }, { "epoch": 0.23436774039762925, "grad_norm": 1104.7730712890625, "learning_rate": 4.739124171814145e-05, "loss": 64.9178, "step": 58010 }, { "epoch": 0.2344081416629969, "grad_norm": 1969.99755859375, "learning_rate": 4.7389688993768786e-05, "loss": 97.3219, "step": 58020 }, { "epoch": 0.2344485429283645, "grad_norm": 792.9730224609375, "learning_rate": 4.738813583289762e-05, "loss": 55.4297, "step": 58030 }, { "epoch": 0.23448894419373215, "grad_norm": 1705.9310302734375, "learning_rate": 4.7386582235558205e-05, "loss": 105.6232, "step": 58040 }, { "epoch": 0.2345293454590998, "grad_norm": 630.877197265625, "learning_rate": 4.738502820178085e-05, "loss": 72.6224, "step": 58050 }, { "epoch": 0.2345697467244674, "grad_norm": 656.330322265625, "learning_rate": 4.738347373159585e-05, "loss": 106.0807, "step": 58060 }, { "epoch": 0.23461014798983504, "grad_norm": 917.6051635742188, "learning_rate": 4.73819188250335e-05, "loss": 82.6257, "step": 58070 }, { "epoch": 0.23465054925520268, "grad_norm": 574.9983520507812, "learning_rate": 4.738036348212412e-05, "loss": 86.4343, "step": 58080 }, { "epoch": 0.2346909505205703, "grad_norm": 593.380859375, "learning_rate": 4.737880770289803e-05, "loss": 84.0278, "step": 58090 }, { "epoch": 0.23473135178593793, "grad_norm": 342.4287109375, "learning_rate": 4.737725148738557e-05, "loss": 48.0205, "step": 58100 }, { "epoch": 0.23477175305130557, "grad_norm": 520.4551391601562, "learning_rate": 4.737569483561707e-05, "loss": 48.0131, "step": 58110 }, { "epoch": 0.2348121543166732, "grad_norm": 431.37115478515625, "learning_rate": 4.737413774762287e-05, "loss": 49.1139, "step": 58120 }, { "epoch": 0.23485255558204082, "grad_norm": 1225.8043212890625, "learning_rate": 4.737258022343335e-05, "loss": 55.6529, "step": 58130 }, { "epoch": 0.23489295684740846, "grad_norm": 475.2739562988281, "learning_rate": 4.737102226307884e-05, "loss": 72.1621, "step": 58140 }, { "epoch": 0.2349333581127761, "grad_norm": 5526.28271484375, "learning_rate": 4.736946386658976e-05, "loss": 81.7198, "step": 58150 }, { "epoch": 0.23497375937814372, "grad_norm": 836.2157592773438, "learning_rate": 4.7367905033996445e-05, "loss": 64.377, "step": 58160 }, { "epoch": 0.23501416064351136, "grad_norm": 880.4158935546875, "learning_rate": 4.736634576532931e-05, "loss": 92.249, "step": 58170 }, { "epoch": 0.235054561908879, "grad_norm": 582.840576171875, "learning_rate": 4.736478606061875e-05, "loss": 52.3027, "step": 58180 }, { "epoch": 0.2350949631742466, "grad_norm": 587.2545776367188, "learning_rate": 4.7363225919895185e-05, "loss": 57.8036, "step": 58190 }, { "epoch": 0.23513536443961425, "grad_norm": 975.4487915039062, "learning_rate": 4.7361665343189e-05, "loss": 74.3185, "step": 58200 }, { "epoch": 0.2351757657049819, "grad_norm": 1377.298583984375, "learning_rate": 4.736010433053064e-05, "loss": 80.2555, "step": 58210 }, { "epoch": 0.2352161669703495, "grad_norm": 806.4180297851562, "learning_rate": 4.735854288195054e-05, "loss": 63.3192, "step": 58220 }, { "epoch": 0.23525656823571714, "grad_norm": 1805.80810546875, "learning_rate": 4.735698099747913e-05, "loss": 73.7252, "step": 58230 }, { "epoch": 0.23529696950108478, "grad_norm": 1094.813232421875, "learning_rate": 4.735541867714687e-05, "loss": 114.6661, "step": 58240 }, { "epoch": 0.2353373707664524, "grad_norm": 3966.76123046875, "learning_rate": 4.73538559209842e-05, "loss": 168.1956, "step": 58250 }, { "epoch": 0.23537777203182003, "grad_norm": 496.0440673828125, "learning_rate": 4.735229272902162e-05, "loss": 90.3166, "step": 58260 }, { "epoch": 0.23541817329718767, "grad_norm": 639.0549926757812, "learning_rate": 4.735072910128957e-05, "loss": 101.5113, "step": 58270 }, { "epoch": 0.2354585745625553, "grad_norm": 643.226318359375, "learning_rate": 4.734916503781856e-05, "loss": 46.3206, "step": 58280 }, { "epoch": 0.23549897582792292, "grad_norm": 1588.71142578125, "learning_rate": 4.7347600538639067e-05, "loss": 82.4462, "step": 58290 }, { "epoch": 0.23553937709329056, "grad_norm": 306.2060852050781, "learning_rate": 4.73460356037816e-05, "loss": 86.551, "step": 58300 }, { "epoch": 0.2355797783586582, "grad_norm": 838.1617431640625, "learning_rate": 4.734447023327666e-05, "loss": 62.018, "step": 58310 }, { "epoch": 0.23562017962402582, "grad_norm": 839.5139770507812, "learning_rate": 4.7342904427154766e-05, "loss": 68.918, "step": 58320 }, { "epoch": 0.23566058088939346, "grad_norm": 2884.371826171875, "learning_rate": 4.734133818544645e-05, "loss": 93.363, "step": 58330 }, { "epoch": 0.2357009821547611, "grad_norm": 1337.994873046875, "learning_rate": 4.733977150818225e-05, "loss": 80.4189, "step": 58340 }, { "epoch": 0.2357413834201287, "grad_norm": 916.3220825195312, "learning_rate": 4.7338204395392694e-05, "loss": 64.318, "step": 58350 }, { "epoch": 0.23578178468549635, "grad_norm": 4077.7041015625, "learning_rate": 4.733663684710835e-05, "loss": 106.2886, "step": 58360 }, { "epoch": 0.235822185950864, "grad_norm": 471.49566650390625, "learning_rate": 4.7335068863359764e-05, "loss": 76.7145, "step": 58370 }, { "epoch": 0.2358625872162316, "grad_norm": 821.7672119140625, "learning_rate": 4.733350044417752e-05, "loss": 59.775, "step": 58380 }, { "epoch": 0.23590298848159924, "grad_norm": 628.004638671875, "learning_rate": 4.733193158959218e-05, "loss": 65.8933, "step": 58390 }, { "epoch": 0.23594338974696688, "grad_norm": 487.54107666015625, "learning_rate": 4.733036229963435e-05, "loss": 78.1683, "step": 58400 }, { "epoch": 0.2359837910123345, "grad_norm": 967.7349243164062, "learning_rate": 4.732879257433459e-05, "loss": 87.9057, "step": 58410 }, { "epoch": 0.23602419227770213, "grad_norm": 834.74755859375, "learning_rate": 4.7327222413723536e-05, "loss": 74.7745, "step": 58420 }, { "epoch": 0.23606459354306977, "grad_norm": 1843.8294677734375, "learning_rate": 4.7325651817831784e-05, "loss": 69.399, "step": 58430 }, { "epoch": 0.2361049948084374, "grad_norm": 900.3565063476562, "learning_rate": 4.732408078668995e-05, "loss": 70.5233, "step": 58440 }, { "epoch": 0.23614539607380503, "grad_norm": 420.2508544921875, "learning_rate": 4.7322509320328675e-05, "loss": 67.6304, "step": 58450 }, { "epoch": 0.23618579733917267, "grad_norm": 1126.9072265625, "learning_rate": 4.732093741877859e-05, "loss": 95.9313, "step": 58460 }, { "epoch": 0.2362261986045403, "grad_norm": 496.6501770019531, "learning_rate": 4.731936508207033e-05, "loss": 65.5002, "step": 58470 }, { "epoch": 0.23626659986990792, "grad_norm": 511.6619567871094, "learning_rate": 4.731779231023456e-05, "loss": 68.3283, "step": 58480 }, { "epoch": 0.23630700113527556, "grad_norm": 728.8594970703125, "learning_rate": 4.731621910330194e-05, "loss": 71.2413, "step": 58490 }, { "epoch": 0.2363474024006432, "grad_norm": 721.872802734375, "learning_rate": 4.731464546130314e-05, "loss": 64.1135, "step": 58500 }, { "epoch": 0.2363878036660108, "grad_norm": 557.9456787109375, "learning_rate": 4.7313071384268836e-05, "loss": 71.9643, "step": 58510 }, { "epoch": 0.23642820493137845, "grad_norm": 952.9915771484375, "learning_rate": 4.731149687222972e-05, "loss": 72.1631, "step": 58520 }, { "epoch": 0.2364686061967461, "grad_norm": 406.0745544433594, "learning_rate": 4.7309921925216484e-05, "loss": 87.8844, "step": 58530 }, { "epoch": 0.2365090074621137, "grad_norm": 6206.943359375, "learning_rate": 4.730834654325984e-05, "loss": 117.5255, "step": 58540 }, { "epoch": 0.23654940872748134, "grad_norm": 0.0, "learning_rate": 4.7306770726390496e-05, "loss": 52.3261, "step": 58550 }, { "epoch": 0.23658980999284898, "grad_norm": 1959.0308837890625, "learning_rate": 4.730519447463916e-05, "loss": 87.7869, "step": 58560 }, { "epoch": 0.2366302112582166, "grad_norm": 1353.8363037109375, "learning_rate": 4.730361778803658e-05, "loss": 92.4961, "step": 58570 }, { "epoch": 0.23667061252358423, "grad_norm": 1127.2203369140625, "learning_rate": 4.730204066661349e-05, "loss": 101.3056, "step": 58580 }, { "epoch": 0.23671101378895187, "grad_norm": 1524.7484130859375, "learning_rate": 4.730046311040064e-05, "loss": 144.1944, "step": 58590 }, { "epoch": 0.23675141505431951, "grad_norm": 831.2205200195312, "learning_rate": 4.7298885119428773e-05, "loss": 70.5365, "step": 58600 }, { "epoch": 0.23679181631968713, "grad_norm": 913.74853515625, "learning_rate": 4.729730669372866e-05, "loss": 68.5036, "step": 58610 }, { "epoch": 0.23683221758505477, "grad_norm": 2814.275146484375, "learning_rate": 4.729572783333108e-05, "loss": 90.047, "step": 58620 }, { "epoch": 0.2368726188504224, "grad_norm": 1022.5120849609375, "learning_rate": 4.72941485382668e-05, "loss": 84.8219, "step": 58630 }, { "epoch": 0.23691302011579002, "grad_norm": 665.8732299804688, "learning_rate": 4.729256880856662e-05, "loss": 63.3056, "step": 58640 }, { "epoch": 0.23695342138115766, "grad_norm": 2222.1650390625, "learning_rate": 4.7290988644261336e-05, "loss": 77.9888, "step": 58650 }, { "epoch": 0.2369938226465253, "grad_norm": 780.3485717773438, "learning_rate": 4.728940804538176e-05, "loss": 58.1334, "step": 58660 }, { "epoch": 0.2370342239118929, "grad_norm": 568.1907958984375, "learning_rate": 4.728782701195869e-05, "loss": 53.3181, "step": 58670 }, { "epoch": 0.23707462517726055, "grad_norm": 662.433349609375, "learning_rate": 4.728624554402295e-05, "loss": 67.1239, "step": 58680 }, { "epoch": 0.2371150264426282, "grad_norm": 742.7056274414062, "learning_rate": 4.7284663641605384e-05, "loss": 56.0102, "step": 58690 }, { "epoch": 0.2371554277079958, "grad_norm": 1382.75439453125, "learning_rate": 4.728308130473683e-05, "loss": 52.9646, "step": 58700 }, { "epoch": 0.23719582897336344, "grad_norm": 1125.7822265625, "learning_rate": 4.7281498533448136e-05, "loss": 69.9835, "step": 58710 }, { "epoch": 0.23723623023873108, "grad_norm": 446.69677734375, "learning_rate": 4.7279915327770155e-05, "loss": 54.0501, "step": 58720 }, { "epoch": 0.2372766315040987, "grad_norm": 2016.18310546875, "learning_rate": 4.7278331687733754e-05, "loss": 111.3491, "step": 58730 }, { "epoch": 0.23731703276946634, "grad_norm": 1517.979736328125, "learning_rate": 4.727674761336981e-05, "loss": 95.3152, "step": 58740 }, { "epoch": 0.23735743403483398, "grad_norm": 965.1806030273438, "learning_rate": 4.72751631047092e-05, "loss": 77.6278, "step": 58750 }, { "epoch": 0.23739783530020162, "grad_norm": 697.4866333007812, "learning_rate": 4.727357816178282e-05, "loss": 86.2706, "step": 58760 }, { "epoch": 0.23743823656556923, "grad_norm": 521.2189331054688, "learning_rate": 4.727199278462156e-05, "loss": 59.1101, "step": 58770 }, { "epoch": 0.23747863783093687, "grad_norm": 456.9306945800781, "learning_rate": 4.727040697325634e-05, "loss": 57.2227, "step": 58780 }, { "epoch": 0.2375190390963045, "grad_norm": 1488.9251708984375, "learning_rate": 4.726882072771807e-05, "loss": 83.9987, "step": 58790 }, { "epoch": 0.23755944036167212, "grad_norm": 816.0877075195312, "learning_rate": 4.7267234048037664e-05, "loss": 75.2146, "step": 58800 }, { "epoch": 0.23759984162703976, "grad_norm": 1049.84375, "learning_rate": 4.726564693424608e-05, "loss": 85.2812, "step": 58810 }, { "epoch": 0.2376402428924074, "grad_norm": 660.714599609375, "learning_rate": 4.7264059386374236e-05, "loss": 50.8552, "step": 58820 }, { "epoch": 0.237680644157775, "grad_norm": 835.5247192382812, "learning_rate": 4.72624714044531e-05, "loss": 85.9735, "step": 58830 }, { "epoch": 0.23772104542314265, "grad_norm": 934.1461181640625, "learning_rate": 4.7260882988513624e-05, "loss": 82.1574, "step": 58840 }, { "epoch": 0.2377614466885103, "grad_norm": 253.60675048828125, "learning_rate": 4.725929413858677e-05, "loss": 86.5755, "step": 58850 }, { "epoch": 0.2378018479538779, "grad_norm": 476.08544921875, "learning_rate": 4.725770485470351e-05, "loss": 79.0762, "step": 58860 }, { "epoch": 0.23784224921924554, "grad_norm": 1652.21484375, "learning_rate": 4.725611513689485e-05, "loss": 76.6932, "step": 58870 }, { "epoch": 0.23788265048461318, "grad_norm": 504.9270324707031, "learning_rate": 4.725452498519175e-05, "loss": 76.0411, "step": 58880 }, { "epoch": 0.2379230517499808, "grad_norm": 1309.692626953125, "learning_rate": 4.7252934399625234e-05, "loss": 52.6607, "step": 58890 }, { "epoch": 0.23796345301534844, "grad_norm": 1309.8441162109375, "learning_rate": 4.725134338022631e-05, "loss": 88.8201, "step": 58900 }, { "epoch": 0.23800385428071608, "grad_norm": 1632.8326416015625, "learning_rate": 4.7249751927025996e-05, "loss": 88.7722, "step": 58910 }, { "epoch": 0.23804425554608372, "grad_norm": 535.5170288085938, "learning_rate": 4.7248160040055304e-05, "loss": 47.7584, "step": 58920 }, { "epoch": 0.23808465681145133, "grad_norm": 708.9509887695312, "learning_rate": 4.724656771934528e-05, "loss": 83.726, "step": 58930 }, { "epoch": 0.23812505807681897, "grad_norm": 579.57275390625, "learning_rate": 4.7244974964926965e-05, "loss": 62.5475, "step": 58940 }, { "epoch": 0.2381654593421866, "grad_norm": 651.062744140625, "learning_rate": 4.724338177683141e-05, "loss": 63.3703, "step": 58950 }, { "epoch": 0.23820586060755422, "grad_norm": 1036.9290771484375, "learning_rate": 4.724178815508967e-05, "loss": 59.7281, "step": 58960 }, { "epoch": 0.23824626187292186, "grad_norm": 406.64044189453125, "learning_rate": 4.724019409973283e-05, "loss": 75.1738, "step": 58970 }, { "epoch": 0.2382866631382895, "grad_norm": 533.7338256835938, "learning_rate": 4.723859961079195e-05, "loss": 76.1549, "step": 58980 }, { "epoch": 0.2383270644036571, "grad_norm": 595.7689819335938, "learning_rate": 4.7237004688298125e-05, "loss": 134.3011, "step": 58990 }, { "epoch": 0.23836746566902475, "grad_norm": 1294.3282470703125, "learning_rate": 4.723540933228244e-05, "loss": 100.5167, "step": 59000 }, { "epoch": 0.2384078669343924, "grad_norm": 612.8972778320312, "learning_rate": 4.7233813542776006e-05, "loss": 75.6335, "step": 59010 }, { "epoch": 0.23844826819976, "grad_norm": 1156.35498046875, "learning_rate": 4.723221731980993e-05, "loss": 70.4552, "step": 59020 }, { "epoch": 0.23848866946512765, "grad_norm": 1819.1259765625, "learning_rate": 4.723062066341533e-05, "loss": 64.2368, "step": 59030 }, { "epoch": 0.23852907073049529, "grad_norm": 765.05615234375, "learning_rate": 4.722902357362333e-05, "loss": 80.1383, "step": 59040 }, { "epoch": 0.2385694719958629, "grad_norm": 623.2485961914062, "learning_rate": 4.7227426050465084e-05, "loss": 73.9087, "step": 59050 }, { "epoch": 0.23860987326123054, "grad_norm": 719.6350708007812, "learning_rate": 4.722582809397171e-05, "loss": 59.0293, "step": 59060 }, { "epoch": 0.23865027452659818, "grad_norm": 410.1199645996094, "learning_rate": 4.722422970417438e-05, "loss": 58.2241, "step": 59070 }, { "epoch": 0.23869067579196582, "grad_norm": 598.96240234375, "learning_rate": 4.722263088110426e-05, "loss": 79.149, "step": 59080 }, { "epoch": 0.23873107705733343, "grad_norm": 504.0932922363281, "learning_rate": 4.72210316247925e-05, "loss": 67.0265, "step": 59090 }, { "epoch": 0.23877147832270107, "grad_norm": 1157.12060546875, "learning_rate": 4.721943193527029e-05, "loss": 89.9269, "step": 59100 }, { "epoch": 0.2388118795880687, "grad_norm": 1023.6410522460938, "learning_rate": 4.7217831812568815e-05, "loss": 59.8878, "step": 59110 }, { "epoch": 0.23885228085343632, "grad_norm": 299.01300048828125, "learning_rate": 4.721623125671927e-05, "loss": 106.7344, "step": 59120 }, { "epoch": 0.23889268211880396, "grad_norm": 490.2772521972656, "learning_rate": 4.7214630267752856e-05, "loss": 84.1761, "step": 59130 }, { "epoch": 0.2389330833841716, "grad_norm": 689.8060913085938, "learning_rate": 4.721302884570079e-05, "loss": 61.8113, "step": 59140 }, { "epoch": 0.23897348464953921, "grad_norm": 573.6192016601562, "learning_rate": 4.7211426990594296e-05, "loss": 84.5191, "step": 59150 }, { "epoch": 0.23901388591490685, "grad_norm": 554.744384765625, "learning_rate": 4.720982470246459e-05, "loss": 68.6441, "step": 59160 }, { "epoch": 0.2390542871802745, "grad_norm": 279.83795166015625, "learning_rate": 4.720822198134293e-05, "loss": 70.0726, "step": 59170 }, { "epoch": 0.2390946884456421, "grad_norm": 1090.882568359375, "learning_rate": 4.7206618827260534e-05, "loss": 41.3233, "step": 59180 }, { "epoch": 0.23913508971100975, "grad_norm": 2077.9443359375, "learning_rate": 4.720501524024867e-05, "loss": 81.099, "step": 59190 }, { "epoch": 0.2391754909763774, "grad_norm": 537.9066772460938, "learning_rate": 4.720341122033862e-05, "loss": 71.1957, "step": 59200 }, { "epoch": 0.239215892241745, "grad_norm": 1677.8251953125, "learning_rate": 4.720180676756162e-05, "loss": 70.0094, "step": 59210 }, { "epoch": 0.23925629350711264, "grad_norm": 618.0469970703125, "learning_rate": 4.720020188194897e-05, "loss": 60.1108, "step": 59220 }, { "epoch": 0.23929669477248028, "grad_norm": 843.9923095703125, "learning_rate": 4.719859656353196e-05, "loss": 54.7157, "step": 59230 }, { "epoch": 0.23933709603784792, "grad_norm": 1101.4031982421875, "learning_rate": 4.719699081234188e-05, "loss": 101.9431, "step": 59240 }, { "epoch": 0.23937749730321553, "grad_norm": 541.9828491210938, "learning_rate": 4.719538462841003e-05, "loss": 85.9788, "step": 59250 }, { "epoch": 0.23941789856858317, "grad_norm": 470.3389587402344, "learning_rate": 4.719377801176774e-05, "loss": 73.3981, "step": 59260 }, { "epoch": 0.2394582998339508, "grad_norm": 867.8907470703125, "learning_rate": 4.719217096244631e-05, "loss": 56.2378, "step": 59270 }, { "epoch": 0.23949870109931842, "grad_norm": 1353.7008056640625, "learning_rate": 4.7190563480477095e-05, "loss": 57.5405, "step": 59280 }, { "epoch": 0.23953910236468606, "grad_norm": 968.8933715820312, "learning_rate": 4.718895556589141e-05, "loss": 55.8982, "step": 59290 }, { "epoch": 0.2395795036300537, "grad_norm": 1031.3507080078125, "learning_rate": 4.718734721872062e-05, "loss": 72.3141, "step": 59300 }, { "epoch": 0.23961990489542132, "grad_norm": 531.7722778320312, "learning_rate": 4.718573843899607e-05, "loss": 79.3897, "step": 59310 }, { "epoch": 0.23966030616078896, "grad_norm": 1232.718994140625, "learning_rate": 4.718412922674913e-05, "loss": 79.8532, "step": 59320 }, { "epoch": 0.2397007074261566, "grad_norm": 3018.275390625, "learning_rate": 4.718251958201117e-05, "loss": 88.7309, "step": 59330 }, { "epoch": 0.2397411086915242, "grad_norm": 1827.9061279296875, "learning_rate": 4.718090950481356e-05, "loss": 101.574, "step": 59340 }, { "epoch": 0.23978150995689185, "grad_norm": 715.8681640625, "learning_rate": 4.71792989951877e-05, "loss": 72.9536, "step": 59350 }, { "epoch": 0.2398219112222595, "grad_norm": 1006.0107421875, "learning_rate": 4.717768805316501e-05, "loss": 67.8689, "step": 59360 }, { "epoch": 0.2398623124876271, "grad_norm": 1562.623779296875, "learning_rate": 4.717607667877685e-05, "loss": 94.7435, "step": 59370 }, { "epoch": 0.23990271375299474, "grad_norm": 822.7528686523438, "learning_rate": 4.717446487205466e-05, "loss": 71.5322, "step": 59380 }, { "epoch": 0.23994311501836238, "grad_norm": 733.3274536132812, "learning_rate": 4.717285263302987e-05, "loss": 63.2148, "step": 59390 }, { "epoch": 0.23998351628373002, "grad_norm": 1009.9617919921875, "learning_rate": 4.71712399617339e-05, "loss": 92.269, "step": 59400 }, { "epoch": 0.24002391754909763, "grad_norm": 768.0454711914062, "learning_rate": 4.716962685819819e-05, "loss": 46.8281, "step": 59410 }, { "epoch": 0.24006431881446527, "grad_norm": 392.7835693359375, "learning_rate": 4.716801332245419e-05, "loss": 57.8384, "step": 59420 }, { "epoch": 0.2401047200798329, "grad_norm": 1200.4879150390625, "learning_rate": 4.7166399354533365e-05, "loss": 63.8987, "step": 59430 }, { "epoch": 0.24014512134520052, "grad_norm": 540.96142578125, "learning_rate": 4.7164784954467166e-05, "loss": 86.5465, "step": 59440 }, { "epoch": 0.24018552261056816, "grad_norm": 679.489501953125, "learning_rate": 4.716317012228707e-05, "loss": 52.6366, "step": 59450 }, { "epoch": 0.2402259238759358, "grad_norm": 1019.4818115234375, "learning_rate": 4.716155485802457e-05, "loss": 55.8305, "step": 59460 }, { "epoch": 0.24026632514130342, "grad_norm": 330.5364990234375, "learning_rate": 4.715993916171114e-05, "loss": 42.9955, "step": 59470 }, { "epoch": 0.24030672640667106, "grad_norm": 1147.410888671875, "learning_rate": 4.715832303337829e-05, "loss": 93.0537, "step": 59480 }, { "epoch": 0.2403471276720387, "grad_norm": 1502.9691162109375, "learning_rate": 4.715670647305753e-05, "loss": 66.4446, "step": 59490 }, { "epoch": 0.2403875289374063, "grad_norm": 1007.5733032226562, "learning_rate": 4.715508948078037e-05, "loss": 68.1062, "step": 59500 }, { "epoch": 0.24042793020277395, "grad_norm": 1082.7093505859375, "learning_rate": 4.715347205657833e-05, "loss": 85.7707, "step": 59510 }, { "epoch": 0.2404683314681416, "grad_norm": 591.158447265625, "learning_rate": 4.715185420048295e-05, "loss": 53.6973, "step": 59520 }, { "epoch": 0.2405087327335092, "grad_norm": 357.9864196777344, "learning_rate": 4.715023591252576e-05, "loss": 70.244, "step": 59530 }, { "epoch": 0.24054913399887684, "grad_norm": 975.7410888671875, "learning_rate": 4.714861719273833e-05, "loss": 84.0754, "step": 59540 }, { "epoch": 0.24058953526424448, "grad_norm": 986.4381103515625, "learning_rate": 4.7146998041152204e-05, "loss": 64.928, "step": 59550 }, { "epoch": 0.24062993652961212, "grad_norm": 996.6936645507812, "learning_rate": 4.714537845779894e-05, "loss": 47.7855, "step": 59560 }, { "epoch": 0.24067033779497973, "grad_norm": 864.1629638671875, "learning_rate": 4.7143758442710124e-05, "loss": 65.1874, "step": 59570 }, { "epoch": 0.24071073906034737, "grad_norm": 291.1817321777344, "learning_rate": 4.7142137995917336e-05, "loss": 57.6154, "step": 59580 }, { "epoch": 0.240751140325715, "grad_norm": 353.8094177246094, "learning_rate": 4.714051711745217e-05, "loss": 62.8998, "step": 59590 }, { "epoch": 0.24079154159108263, "grad_norm": 667.8562622070312, "learning_rate": 4.713889580734623e-05, "loss": 61.3621, "step": 59600 }, { "epoch": 0.24083194285645027, "grad_norm": 600.4000854492188, "learning_rate": 4.713727406563111e-05, "loss": 101.2895, "step": 59610 }, { "epoch": 0.2408723441218179, "grad_norm": 1322.2716064453125, "learning_rate": 4.713565189233844e-05, "loss": 73.4781, "step": 59620 }, { "epoch": 0.24091274538718552, "grad_norm": 378.68841552734375, "learning_rate": 4.7134029287499834e-05, "loss": 68.5723, "step": 59630 }, { "epoch": 0.24095314665255316, "grad_norm": 1208.138916015625, "learning_rate": 4.7132406251146935e-05, "loss": 66.9278, "step": 59640 }, { "epoch": 0.2409935479179208, "grad_norm": 402.3450927734375, "learning_rate": 4.713078278331138e-05, "loss": 84.2316, "step": 59650 }, { "epoch": 0.2410339491832884, "grad_norm": 785.4451293945312, "learning_rate": 4.712915888402483e-05, "loss": 82.4133, "step": 59660 }, { "epoch": 0.24107435044865605, "grad_norm": 485.10504150390625, "learning_rate": 4.7127534553318925e-05, "loss": 62.9998, "step": 59670 }, { "epoch": 0.2411147517140237, "grad_norm": 791.6461791992188, "learning_rate": 4.712590979122534e-05, "loss": 59.8595, "step": 59680 }, { "epoch": 0.2411551529793913, "grad_norm": 417.98040771484375, "learning_rate": 4.712428459777576e-05, "loss": 71.6051, "step": 59690 }, { "epoch": 0.24119555424475894, "grad_norm": 866.4073486328125, "learning_rate": 4.712265897300186e-05, "loss": 50.0575, "step": 59700 }, { "epoch": 0.24123595551012658, "grad_norm": 1018.1526489257812, "learning_rate": 4.712103291693533e-05, "loss": 88.681, "step": 59710 }, { "epoch": 0.24127635677549422, "grad_norm": 2852.052978515625, "learning_rate": 4.7119406429607885e-05, "loss": 85.578, "step": 59720 }, { "epoch": 0.24131675804086183, "grad_norm": 682.5413208007812, "learning_rate": 4.711777951105121e-05, "loss": 73.2862, "step": 59730 }, { "epoch": 0.24135715930622947, "grad_norm": 1790.8092041015625, "learning_rate": 4.7116152161297045e-05, "loss": 67.5014, "step": 59740 }, { "epoch": 0.24139756057159711, "grad_norm": 671.518310546875, "learning_rate": 4.71145243803771e-05, "loss": 67.0482, "step": 59750 }, { "epoch": 0.24143796183696473, "grad_norm": 248.55958557128906, "learning_rate": 4.711289616832312e-05, "loss": 52.1543, "step": 59760 }, { "epoch": 0.24147836310233237, "grad_norm": 676.2057495117188, "learning_rate": 4.7111267525166845e-05, "loss": 89.2593, "step": 59770 }, { "epoch": 0.2415187643677, "grad_norm": 846.4784545898438, "learning_rate": 4.710963845094003e-05, "loss": 94.6082, "step": 59780 }, { "epoch": 0.24155916563306762, "grad_norm": 735.9619750976562, "learning_rate": 4.710800894567443e-05, "loss": 85.0373, "step": 59790 }, { "epoch": 0.24159956689843526, "grad_norm": 0.0, "learning_rate": 4.710637900940181e-05, "loss": 56.9569, "step": 59800 }, { "epoch": 0.2416399681638029, "grad_norm": 1803.2908935546875, "learning_rate": 4.7104748642153954e-05, "loss": 76.5792, "step": 59810 }, { "epoch": 0.2416803694291705, "grad_norm": 886.1052856445312, "learning_rate": 4.710311784396264e-05, "loss": 44.95, "step": 59820 }, { "epoch": 0.24172077069453815, "grad_norm": 2984.022705078125, "learning_rate": 4.710148661485966e-05, "loss": 104.791, "step": 59830 }, { "epoch": 0.2417611719599058, "grad_norm": 1253.9434814453125, "learning_rate": 4.709985495487682e-05, "loss": 71.7768, "step": 59840 }, { "epoch": 0.2418015732252734, "grad_norm": 785.1651000976562, "learning_rate": 4.7098222864045945e-05, "loss": 81.442, "step": 59850 }, { "epoch": 0.24184197449064104, "grad_norm": 1157.2420654296875, "learning_rate": 4.709659034239883e-05, "loss": 91.4549, "step": 59860 }, { "epoch": 0.24188237575600868, "grad_norm": 2114.29833984375, "learning_rate": 4.7094957389967306e-05, "loss": 82.0255, "step": 59870 }, { "epoch": 0.24192277702137632, "grad_norm": 1235.954345703125, "learning_rate": 4.7093324006783214e-05, "loss": 95.1471, "step": 59880 }, { "epoch": 0.24196317828674394, "grad_norm": 990.7355346679688, "learning_rate": 4.709169019287839e-05, "loss": 83.1672, "step": 59890 }, { "epoch": 0.24200357955211158, "grad_norm": 790.0713500976562, "learning_rate": 4.7090055948284706e-05, "loss": 75.6788, "step": 59900 }, { "epoch": 0.24204398081747922, "grad_norm": 613.9937133789062, "learning_rate": 4.7088421273034e-05, "loss": 74.3221, "step": 59910 }, { "epoch": 0.24208438208284683, "grad_norm": 1069.763427734375, "learning_rate": 4.708678616715815e-05, "loss": 94.9927, "step": 59920 }, { "epoch": 0.24212478334821447, "grad_norm": 1685.7093505859375, "learning_rate": 4.7085150630689034e-05, "loss": 79.8483, "step": 59930 }, { "epoch": 0.2421651846135821, "grad_norm": 1826.313720703125, "learning_rate": 4.7083514663658536e-05, "loss": 110.4784, "step": 59940 }, { "epoch": 0.24220558587894972, "grad_norm": 2508.780029296875, "learning_rate": 4.7081878266098545e-05, "loss": 95.3383, "step": 59950 }, { "epoch": 0.24224598714431736, "grad_norm": 659.6832275390625, "learning_rate": 4.708024143804097e-05, "loss": 79.6695, "step": 59960 }, { "epoch": 0.242286388409685, "grad_norm": 570.7511596679688, "learning_rate": 4.707860417951773e-05, "loss": 68.3188, "step": 59970 }, { "epoch": 0.2423267896750526, "grad_norm": 1179.564208984375, "learning_rate": 4.707696649056073e-05, "loss": 109.7361, "step": 59980 }, { "epoch": 0.24236719094042025, "grad_norm": 632.1577758789062, "learning_rate": 4.70753283712019e-05, "loss": 86.0636, "step": 59990 }, { "epoch": 0.2424075922057879, "grad_norm": 975.7955322265625, "learning_rate": 4.707368982147318e-05, "loss": 87.0066, "step": 60000 }, { "epoch": 0.2424479934711555, "grad_norm": 3105.386962890625, "learning_rate": 4.707205084140651e-05, "loss": 77.5594, "step": 60010 }, { "epoch": 0.24248839473652314, "grad_norm": 834.739013671875, "learning_rate": 4.707041143103384e-05, "loss": 72.7049, "step": 60020 }, { "epoch": 0.24252879600189078, "grad_norm": 481.8167419433594, "learning_rate": 4.706877159038715e-05, "loss": 91.9634, "step": 60030 }, { "epoch": 0.24256919726725842, "grad_norm": 651.197021484375, "learning_rate": 4.706713131949839e-05, "loss": 57.6539, "step": 60040 }, { "epoch": 0.24260959853262604, "grad_norm": 821.2056884765625, "learning_rate": 4.706549061839954e-05, "loss": 67.2985, "step": 60050 }, { "epoch": 0.24264999979799368, "grad_norm": 445.24578857421875, "learning_rate": 4.70638494871226e-05, "loss": 70.1479, "step": 60060 }, { "epoch": 0.24269040106336132, "grad_norm": 902.0319213867188, "learning_rate": 4.7062207925699544e-05, "loss": 85.1113, "step": 60070 }, { "epoch": 0.24273080232872893, "grad_norm": 845.628662109375, "learning_rate": 4.7060565934162394e-05, "loss": 86.7892, "step": 60080 }, { "epoch": 0.24277120359409657, "grad_norm": 936.5902709960938, "learning_rate": 4.7058923512543154e-05, "loss": 84.2543, "step": 60090 }, { "epoch": 0.2428116048594642, "grad_norm": 770.0076904296875, "learning_rate": 4.7057280660873835e-05, "loss": 77.7724, "step": 60100 }, { "epoch": 0.24285200612483182, "grad_norm": 1131.8125, "learning_rate": 4.705563737918648e-05, "loss": 69.4776, "step": 60110 }, { "epoch": 0.24289240739019946, "grad_norm": 940.4691772460938, "learning_rate": 4.705399366751312e-05, "loss": 88.3803, "step": 60120 }, { "epoch": 0.2429328086555671, "grad_norm": 763.4826049804688, "learning_rate": 4.705234952588579e-05, "loss": 79.9678, "step": 60130 }, { "epoch": 0.2429732099209347, "grad_norm": 712.3192749023438, "learning_rate": 4.705070495433657e-05, "loss": 71.1181, "step": 60140 }, { "epoch": 0.24301361118630235, "grad_norm": 1741.696044921875, "learning_rate": 4.704905995289749e-05, "loss": 70.7245, "step": 60150 }, { "epoch": 0.24305401245167, "grad_norm": 588.7029418945312, "learning_rate": 4.7047414521600644e-05, "loss": 57.2675, "step": 60160 }, { "epoch": 0.2430944137170376, "grad_norm": 2938.370361328125, "learning_rate": 4.704576866047808e-05, "loss": 61.0899, "step": 60170 }, { "epoch": 0.24313481498240525, "grad_norm": 936.1781005859375, "learning_rate": 4.704412236956193e-05, "loss": 62.6005, "step": 60180 }, { "epoch": 0.24317521624777289, "grad_norm": 804.415771484375, "learning_rate": 4.7042475648884254e-05, "loss": 79.235, "step": 60190 }, { "epoch": 0.24321561751314053, "grad_norm": 729.1420288085938, "learning_rate": 4.704082849847718e-05, "loss": 92.9888, "step": 60200 }, { "epoch": 0.24325601877850814, "grad_norm": 604.2557983398438, "learning_rate": 4.703918091837279e-05, "loss": 55.6201, "step": 60210 }, { "epoch": 0.24329642004387578, "grad_norm": 580.6284790039062, "learning_rate": 4.703753290860323e-05, "loss": 45.9183, "step": 60220 }, { "epoch": 0.24333682130924342, "grad_norm": 361.0988464355469, "learning_rate": 4.703588446920062e-05, "loss": 96.7986, "step": 60230 }, { "epoch": 0.24337722257461103, "grad_norm": 5288.78271484375, "learning_rate": 4.70342356001971e-05, "loss": 62.3638, "step": 60240 }, { "epoch": 0.24341762383997867, "grad_norm": 981.030517578125, "learning_rate": 4.70325863016248e-05, "loss": 72.3735, "step": 60250 }, { "epoch": 0.2434580251053463, "grad_norm": 1044.311279296875, "learning_rate": 4.703093657351591e-05, "loss": 101.13, "step": 60260 }, { "epoch": 0.24349842637071392, "grad_norm": 695.3463134765625, "learning_rate": 4.702928641590255e-05, "loss": 61.2876, "step": 60270 }, { "epoch": 0.24353882763608156, "grad_norm": 825.5418701171875, "learning_rate": 4.702763582881692e-05, "loss": 77.4805, "step": 60280 }, { "epoch": 0.2435792289014492, "grad_norm": 568.7557983398438, "learning_rate": 4.702598481229118e-05, "loss": 52.6858, "step": 60290 }, { "epoch": 0.24361963016681681, "grad_norm": 1038.3533935546875, "learning_rate": 4.702433336635753e-05, "loss": 72.8244, "step": 60300 }, { "epoch": 0.24366003143218445, "grad_norm": 534.8966674804688, "learning_rate": 4.702268149104816e-05, "loss": 54.4095, "step": 60310 }, { "epoch": 0.2437004326975521, "grad_norm": 683.1052856445312, "learning_rate": 4.702102918639528e-05, "loss": 55.7349, "step": 60320 }, { "epoch": 0.2437408339629197, "grad_norm": 483.41552734375, "learning_rate": 4.70193764524311e-05, "loss": 74.4342, "step": 60330 }, { "epoch": 0.24378123522828735, "grad_norm": 688.1597900390625, "learning_rate": 4.701772328918784e-05, "loss": 56.6066, "step": 60340 }, { "epoch": 0.243821636493655, "grad_norm": 874.982177734375, "learning_rate": 4.701606969669773e-05, "loss": 62.7406, "step": 60350 }, { "epoch": 0.24386203775902263, "grad_norm": 1608.3681640625, "learning_rate": 4.7014415674993e-05, "loss": 87.2219, "step": 60360 }, { "epoch": 0.24390243902439024, "grad_norm": 852.6019287109375, "learning_rate": 4.701276122410591e-05, "loss": 65.5959, "step": 60370 }, { "epoch": 0.24394284028975788, "grad_norm": 796.290771484375, "learning_rate": 4.70111063440687e-05, "loss": 64.5564, "step": 60380 }, { "epoch": 0.24398324155512552, "grad_norm": 1192.7855224609375, "learning_rate": 4.7009451034913645e-05, "loss": 56.2537, "step": 60390 }, { "epoch": 0.24402364282049313, "grad_norm": 691.683837890625, "learning_rate": 4.7007795296673006e-05, "loss": 58.2113, "step": 60400 }, { "epoch": 0.24406404408586077, "grad_norm": 586.52685546875, "learning_rate": 4.700613912937907e-05, "loss": 47.3722, "step": 60410 }, { "epoch": 0.2441044453512284, "grad_norm": 1319.3663330078125, "learning_rate": 4.700448253306412e-05, "loss": 75.1673, "step": 60420 }, { "epoch": 0.24414484661659602, "grad_norm": 1594.365234375, "learning_rate": 4.7002825507760465e-05, "loss": 120.8496, "step": 60430 }, { "epoch": 0.24418524788196366, "grad_norm": 789.883544921875, "learning_rate": 4.700116805350039e-05, "loss": 65.3508, "step": 60440 }, { "epoch": 0.2442256491473313, "grad_norm": 627.7055053710938, "learning_rate": 4.699951017031621e-05, "loss": 80.6547, "step": 60450 }, { "epoch": 0.24426605041269892, "grad_norm": 564.2537841796875, "learning_rate": 4.699785185824026e-05, "loss": 73.5011, "step": 60460 }, { "epoch": 0.24430645167806656, "grad_norm": 589.3305053710938, "learning_rate": 4.6996193117304864e-05, "loss": 50.1976, "step": 60470 }, { "epoch": 0.2443468529434342, "grad_norm": 2720.202392578125, "learning_rate": 4.699453394754236e-05, "loss": 65.688, "step": 60480 }, { "epoch": 0.2443872542088018, "grad_norm": 926.1817016601562, "learning_rate": 4.6992874348985093e-05, "loss": 99.6495, "step": 60490 }, { "epoch": 0.24442765547416945, "grad_norm": 845.654541015625, "learning_rate": 4.6991214321665414e-05, "loss": 63.7764, "step": 60500 }, { "epoch": 0.2444680567395371, "grad_norm": 458.4206237792969, "learning_rate": 4.698955386561569e-05, "loss": 53.1651, "step": 60510 }, { "epoch": 0.24450845800490473, "grad_norm": 1341.775146484375, "learning_rate": 4.6987892980868296e-05, "loss": 85.8457, "step": 60520 }, { "epoch": 0.24454885927027234, "grad_norm": 1214.4681396484375, "learning_rate": 4.6986231667455605e-05, "loss": 59.1112, "step": 60530 }, { "epoch": 0.24458926053563998, "grad_norm": 791.0181884765625, "learning_rate": 4.6984569925410016e-05, "loss": 113.4298, "step": 60540 }, { "epoch": 0.24462966180100762, "grad_norm": 1435.430908203125, "learning_rate": 4.6982907754763906e-05, "loss": 106.5237, "step": 60550 }, { "epoch": 0.24467006306637523, "grad_norm": 671.5120239257812, "learning_rate": 4.69812451555497e-05, "loss": 75.014, "step": 60560 }, { "epoch": 0.24471046433174287, "grad_norm": 419.3539733886719, "learning_rate": 4.697958212779981e-05, "loss": 40.2964, "step": 60570 }, { "epoch": 0.2447508655971105, "grad_norm": 1304.94189453125, "learning_rate": 4.697791867154663e-05, "loss": 52.5812, "step": 60580 }, { "epoch": 0.24479126686247812, "grad_norm": 577.18505859375, "learning_rate": 4.697625478682263e-05, "loss": 82.3342, "step": 60590 }, { "epoch": 0.24483166812784576, "grad_norm": 455.48480224609375, "learning_rate": 4.6974590473660216e-05, "loss": 55.9024, "step": 60600 }, { "epoch": 0.2448720693932134, "grad_norm": 592.9940185546875, "learning_rate": 4.697292573209185e-05, "loss": 65.5786, "step": 60610 }, { "epoch": 0.24491247065858102, "grad_norm": 368.27581787109375, "learning_rate": 4.697126056214999e-05, "loss": 34.5167, "step": 60620 }, { "epoch": 0.24495287192394866, "grad_norm": 400.2873840332031, "learning_rate": 4.6969594963867084e-05, "loss": 104.06, "step": 60630 }, { "epoch": 0.2449932731893163, "grad_norm": 744.5037231445312, "learning_rate": 4.696792893727562e-05, "loss": 87.9777, "step": 60640 }, { "epoch": 0.2450336744546839, "grad_norm": 654.1832885742188, "learning_rate": 4.696626248240807e-05, "loss": 54.0729, "step": 60650 }, { "epoch": 0.24507407572005155, "grad_norm": 339.2250671386719, "learning_rate": 4.6964595599296926e-05, "loss": 40.1769, "step": 60660 }, { "epoch": 0.2451144769854192, "grad_norm": 632.5075073242188, "learning_rate": 4.696292828797468e-05, "loss": 79.759, "step": 60670 }, { "epoch": 0.24515487825078683, "grad_norm": 884.93310546875, "learning_rate": 4.696126054847385e-05, "loss": 73.9001, "step": 60680 }, { "epoch": 0.24519527951615444, "grad_norm": 1115.2449951171875, "learning_rate": 4.695959238082692e-05, "loss": 70.6572, "step": 60690 }, { "epoch": 0.24523568078152208, "grad_norm": 1520.8741455078125, "learning_rate": 4.6957923785066445e-05, "loss": 90.2716, "step": 60700 }, { "epoch": 0.24527608204688972, "grad_norm": 1127.73876953125, "learning_rate": 4.6956254761224936e-05, "loss": 70.0457, "step": 60710 }, { "epoch": 0.24531648331225733, "grad_norm": 1073.5108642578125, "learning_rate": 4.695458530933494e-05, "loss": 100.9174, "step": 60720 }, { "epoch": 0.24535688457762497, "grad_norm": 773.4358520507812, "learning_rate": 4.6952915429429e-05, "loss": 82.9172, "step": 60730 }, { "epoch": 0.2453972858429926, "grad_norm": 1714.5184326171875, "learning_rate": 4.6951245121539675e-05, "loss": 74.2328, "step": 60740 }, { "epoch": 0.24543768710836023, "grad_norm": 1113.5059814453125, "learning_rate": 4.694957438569951e-05, "loss": 68.1501, "step": 60750 }, { "epoch": 0.24547808837372787, "grad_norm": 1123.2255859375, "learning_rate": 4.694790322194111e-05, "loss": 57.4253, "step": 60760 }, { "epoch": 0.2455184896390955, "grad_norm": 1276.2410888671875, "learning_rate": 4.6946231630297036e-05, "loss": 110.3862, "step": 60770 }, { "epoch": 0.24555889090446312, "grad_norm": 695.6715087890625, "learning_rate": 4.694455961079987e-05, "loss": 70.8457, "step": 60780 }, { "epoch": 0.24559929216983076, "grad_norm": 1895.229736328125, "learning_rate": 4.694288716348221e-05, "loss": 62.3386, "step": 60790 }, { "epoch": 0.2456396934351984, "grad_norm": 894.3408813476562, "learning_rate": 4.694121428837668e-05, "loss": 73.6542, "step": 60800 }, { "epoch": 0.245680094700566, "grad_norm": 773.7852172851562, "learning_rate": 4.693954098551587e-05, "loss": 57.4668, "step": 60810 }, { "epoch": 0.24572049596593365, "grad_norm": 873.3363037109375, "learning_rate": 4.693786725493242e-05, "loss": 88.7946, "step": 60820 }, { "epoch": 0.2457608972313013, "grad_norm": 787.7183227539062, "learning_rate": 4.6936193096658955e-05, "loss": 95.2631, "step": 60830 }, { "epoch": 0.2458012984966689, "grad_norm": 1632.471923828125, "learning_rate": 4.693451851072811e-05, "loss": 76.3312, "step": 60840 }, { "epoch": 0.24584169976203654, "grad_norm": 624.8790283203125, "learning_rate": 4.693284349717254e-05, "loss": 60.229, "step": 60850 }, { "epoch": 0.24588210102740418, "grad_norm": 776.8783569335938, "learning_rate": 4.693116805602489e-05, "loss": 67.0748, "step": 60860 }, { "epoch": 0.24592250229277182, "grad_norm": 357.4325866699219, "learning_rate": 4.692949218731782e-05, "loss": 75.164, "step": 60870 }, { "epoch": 0.24596290355813943, "grad_norm": 934.4722290039062, "learning_rate": 4.692781589108402e-05, "loss": 104.851, "step": 60880 }, { "epoch": 0.24600330482350707, "grad_norm": 788.913330078125, "learning_rate": 4.692613916735615e-05, "loss": 76.9564, "step": 60890 }, { "epoch": 0.24604370608887471, "grad_norm": 1184.293701171875, "learning_rate": 4.692446201616692e-05, "loss": 104.7225, "step": 60900 }, { "epoch": 0.24608410735424233, "grad_norm": 768.3632202148438, "learning_rate": 4.692278443754901e-05, "loss": 96.9056, "step": 60910 }, { "epoch": 0.24612450861960997, "grad_norm": 1289.7335205078125, "learning_rate": 4.6921106431535135e-05, "loss": 51.5294, "step": 60920 }, { "epoch": 0.2461649098849776, "grad_norm": 537.193359375, "learning_rate": 4.6919427998158e-05, "loss": 87.4695, "step": 60930 }, { "epoch": 0.24620531115034522, "grad_norm": 3655.863525390625, "learning_rate": 4.691774913745033e-05, "loss": 92.1189, "step": 60940 }, { "epoch": 0.24624571241571286, "grad_norm": 465.6102600097656, "learning_rate": 4.691606984944486e-05, "loss": 79.8114, "step": 60950 }, { "epoch": 0.2462861136810805, "grad_norm": 531.8803100585938, "learning_rate": 4.691439013417433e-05, "loss": 62.0595, "step": 60960 }, { "epoch": 0.2463265149464481, "grad_norm": 1052.3104248046875, "learning_rate": 4.691270999167147e-05, "loss": 67.6164, "step": 60970 }, { "epoch": 0.24636691621181575, "grad_norm": 624.75390625, "learning_rate": 4.691102942196906e-05, "loss": 67.1735, "step": 60980 }, { "epoch": 0.2464073174771834, "grad_norm": 626.8027954101562, "learning_rate": 4.6909348425099835e-05, "loss": 64.8553, "step": 60990 }, { "epoch": 0.246447718742551, "grad_norm": 250.1268768310547, "learning_rate": 4.690766700109659e-05, "loss": 63.7221, "step": 61000 }, { "epoch": 0.24648812000791864, "grad_norm": 978.392333984375, "learning_rate": 4.6905985149992107e-05, "loss": 138.7521, "step": 61010 }, { "epoch": 0.24652852127328628, "grad_norm": 770.6021728515625, "learning_rate": 4.690430287181915e-05, "loss": 104.2015, "step": 61020 }, { "epoch": 0.24656892253865392, "grad_norm": 1836.369873046875, "learning_rate": 4.690262016661054e-05, "loss": 64.2337, "step": 61030 }, { "epoch": 0.24660932380402154, "grad_norm": 1369.9937744140625, "learning_rate": 4.690093703439907e-05, "loss": 114.4457, "step": 61040 }, { "epoch": 0.24664972506938918, "grad_norm": 756.3161010742188, "learning_rate": 4.689925347521757e-05, "loss": 49.0843, "step": 61050 }, { "epoch": 0.24669012633475682, "grad_norm": 446.1201171875, "learning_rate": 4.689756948909884e-05, "loss": 59.4647, "step": 61060 }, { "epoch": 0.24673052760012443, "grad_norm": 1100.18994140625, "learning_rate": 4.689588507607572e-05, "loss": 55.1963, "step": 61070 }, { "epoch": 0.24677092886549207, "grad_norm": 863.7999877929688, "learning_rate": 4.689420023618104e-05, "loss": 67.0081, "step": 61080 }, { "epoch": 0.2468113301308597, "grad_norm": 696.3552856445312, "learning_rate": 4.6892514969447664e-05, "loss": 80.1907, "step": 61090 }, { "epoch": 0.24685173139622732, "grad_norm": 695.3787231445312, "learning_rate": 4.6890829275908434e-05, "loss": 70.8243, "step": 61100 }, { "epoch": 0.24689213266159496, "grad_norm": 893.7193603515625, "learning_rate": 4.6889143155596214e-05, "loss": 99.1561, "step": 61110 }, { "epoch": 0.2469325339269626, "grad_norm": 1360.45263671875, "learning_rate": 4.688745660854388e-05, "loss": 103.6161, "step": 61120 }, { "epoch": 0.2469729351923302, "grad_norm": 726.0472412109375, "learning_rate": 4.688576963478432e-05, "loss": 77.2796, "step": 61130 }, { "epoch": 0.24701333645769785, "grad_norm": 120.09856414794922, "learning_rate": 4.68840822343504e-05, "loss": 77.299, "step": 61140 }, { "epoch": 0.2470537377230655, "grad_norm": 1099.0858154296875, "learning_rate": 4.6882394407275044e-05, "loss": 103.8713, "step": 61150 }, { "epoch": 0.2470941389884331, "grad_norm": 704.1102294921875, "learning_rate": 4.688070615359114e-05, "loss": 100.1313, "step": 61160 }, { "epoch": 0.24713454025380074, "grad_norm": 429.2733459472656, "learning_rate": 4.6879017473331595e-05, "loss": 105.0034, "step": 61170 }, { "epoch": 0.24717494151916838, "grad_norm": 715.7286376953125, "learning_rate": 4.6877328366529346e-05, "loss": 57.8755, "step": 61180 }, { "epoch": 0.24721534278453602, "grad_norm": 708.1912841796875, "learning_rate": 4.687563883321732e-05, "loss": 67.7243, "step": 61190 }, { "epoch": 0.24725574404990364, "grad_norm": 885.6137084960938, "learning_rate": 4.687394887342845e-05, "loss": 63.6241, "step": 61200 }, { "epoch": 0.24729614531527128, "grad_norm": 549.2717895507812, "learning_rate": 4.687225848719568e-05, "loss": 66.4841, "step": 61210 }, { "epoch": 0.24733654658063892, "grad_norm": 390.741455078125, "learning_rate": 4.687056767455198e-05, "loss": 59.3939, "step": 61220 }, { "epoch": 0.24737694784600653, "grad_norm": 770.025390625, "learning_rate": 4.6868876435530296e-05, "loss": 78.1586, "step": 61230 }, { "epoch": 0.24741734911137417, "grad_norm": 565.1214599609375, "learning_rate": 4.686718477016361e-05, "loss": 68.8117, "step": 61240 }, { "epoch": 0.2474577503767418, "grad_norm": 944.383544921875, "learning_rate": 4.6865492678484895e-05, "loss": 72.7522, "step": 61250 }, { "epoch": 0.24749815164210942, "grad_norm": 2328.01513671875, "learning_rate": 4.6863800160527147e-05, "loss": 86.5814, "step": 61260 }, { "epoch": 0.24753855290747706, "grad_norm": 648.8901977539062, "learning_rate": 4.686210721632336e-05, "loss": 85.4312, "step": 61270 }, { "epoch": 0.2475789541728447, "grad_norm": 1613.290283203125, "learning_rate": 4.6860413845906534e-05, "loss": 80.4138, "step": 61280 }, { "epoch": 0.2476193554382123, "grad_norm": 515.4049682617188, "learning_rate": 4.685872004930969e-05, "loss": 48.0804, "step": 61290 }, { "epoch": 0.24765975670357995, "grad_norm": 258.1792297363281, "learning_rate": 4.685702582656584e-05, "loss": 48.0407, "step": 61300 }, { "epoch": 0.2477001579689476, "grad_norm": 1516.9779052734375, "learning_rate": 4.685533117770803e-05, "loss": 76.3524, "step": 61310 }, { "epoch": 0.2477405592343152, "grad_norm": 1197.6373291015625, "learning_rate": 4.6853636102769274e-05, "loss": 95.2485, "step": 61320 }, { "epoch": 0.24778096049968285, "grad_norm": 433.1604309082031, "learning_rate": 4.6851940601782635e-05, "loss": 83.6786, "step": 61330 }, { "epoch": 0.24782136176505049, "grad_norm": 622.607666015625, "learning_rate": 4.685024467478116e-05, "loss": 75.2844, "step": 61340 }, { "epoch": 0.24786176303041813, "grad_norm": 710.9769287109375, "learning_rate": 4.684854832179792e-05, "loss": 106.4003, "step": 61350 }, { "epoch": 0.24790216429578574, "grad_norm": 1580.35009765625, "learning_rate": 4.684685154286599e-05, "loss": 58.8193, "step": 61360 }, { "epoch": 0.24794256556115338, "grad_norm": 917.3915405273438, "learning_rate": 4.684515433801843e-05, "loss": 78.9511, "step": 61370 }, { "epoch": 0.24798296682652102, "grad_norm": 1568.43408203125, "learning_rate": 4.684345670728834e-05, "loss": 82.2572, "step": 61380 }, { "epoch": 0.24802336809188863, "grad_norm": 768.3124389648438, "learning_rate": 4.6841758650708824e-05, "loss": 83.1386, "step": 61390 }, { "epoch": 0.24806376935725627, "grad_norm": 972.3724975585938, "learning_rate": 4.684006016831297e-05, "loss": 111.0157, "step": 61400 }, { "epoch": 0.2481041706226239, "grad_norm": 710.788330078125, "learning_rate": 4.68383612601339e-05, "loss": 70.9548, "step": 61410 }, { "epoch": 0.24814457188799152, "grad_norm": 1372.3570556640625, "learning_rate": 4.6836661926204736e-05, "loss": 57.5385, "step": 61420 }, { "epoch": 0.24818497315335916, "grad_norm": 2235.90380859375, "learning_rate": 4.6834962166558605e-05, "loss": 86.6557, "step": 61430 }, { "epoch": 0.2482253744187268, "grad_norm": 1524.7860107421875, "learning_rate": 4.6833261981228646e-05, "loss": 86.687, "step": 61440 }, { "epoch": 0.24826577568409441, "grad_norm": 514.3629760742188, "learning_rate": 4.683156137024801e-05, "loss": 73.8575, "step": 61450 }, { "epoch": 0.24830617694946205, "grad_norm": 830.9341430664062, "learning_rate": 4.6829860333649836e-05, "loss": 63.1829, "step": 61460 }, { "epoch": 0.2483465782148297, "grad_norm": 0.0, "learning_rate": 4.68281588714673e-05, "loss": 49.3014, "step": 61470 }, { "epoch": 0.2483869794801973, "grad_norm": 1122.3907470703125, "learning_rate": 4.682645698373357e-05, "loss": 56.4632, "step": 61480 }, { "epoch": 0.24842738074556495, "grad_norm": 838.9703369140625, "learning_rate": 4.682475467048182e-05, "loss": 51.4873, "step": 61490 }, { "epoch": 0.2484677820109326, "grad_norm": 966.208251953125, "learning_rate": 4.682305193174524e-05, "loss": 92.0763, "step": 61500 }, { "epoch": 0.24850818327630023, "grad_norm": 623.809326171875, "learning_rate": 4.682134876755704e-05, "loss": 87.1039, "step": 61510 }, { "epoch": 0.24854858454166784, "grad_norm": 769.28759765625, "learning_rate": 4.68196451779504e-05, "loss": 98.8148, "step": 61520 }, { "epoch": 0.24858898580703548, "grad_norm": 437.263671875, "learning_rate": 4.6817941162958544e-05, "loss": 78.6662, "step": 61530 }, { "epoch": 0.24862938707240312, "grad_norm": 566.79638671875, "learning_rate": 4.681623672261469e-05, "loss": 67.6416, "step": 61540 }, { "epoch": 0.24866978833777073, "grad_norm": 378.28082275390625, "learning_rate": 4.6814531856952084e-05, "loss": 51.0772, "step": 61550 }, { "epoch": 0.24871018960313837, "grad_norm": 2680.483154296875, "learning_rate": 4.6812826566003934e-05, "loss": 102.0762, "step": 61560 }, { "epoch": 0.248750590868506, "grad_norm": 787.8114624023438, "learning_rate": 4.68111208498035e-05, "loss": 40.7656, "step": 61570 }, { "epoch": 0.24879099213387362, "grad_norm": 701.6543579101562, "learning_rate": 4.6809414708384046e-05, "loss": 71.9813, "step": 61580 }, { "epoch": 0.24883139339924126, "grad_norm": 643.1692504882812, "learning_rate": 4.680770814177882e-05, "loss": 84.7958, "step": 61590 }, { "epoch": 0.2488717946646089, "grad_norm": 857.9775390625, "learning_rate": 4.68060011500211e-05, "loss": 78.1685, "step": 61600 }, { "epoch": 0.24891219592997652, "grad_norm": 897.5280151367188, "learning_rate": 4.680429373314415e-05, "loss": 57.9258, "step": 61610 }, { "epoch": 0.24895259719534416, "grad_norm": 756.8377685546875, "learning_rate": 4.680258589118128e-05, "loss": 90.2208, "step": 61620 }, { "epoch": 0.2489929984607118, "grad_norm": 650.5374145507812, "learning_rate": 4.680087762416576e-05, "loss": 81.8518, "step": 61630 }, { "epoch": 0.2490333997260794, "grad_norm": 675.606201171875, "learning_rate": 4.6799168932130915e-05, "loss": 72.3285, "step": 61640 }, { "epoch": 0.24907380099144705, "grad_norm": 932.6596069335938, "learning_rate": 4.679745981511005e-05, "loss": 51.2554, "step": 61650 }, { "epoch": 0.2491142022568147, "grad_norm": 891.9270629882812, "learning_rate": 4.679575027313649e-05, "loss": 54.2277, "step": 61660 }, { "epoch": 0.24915460352218233, "grad_norm": 927.309814453125, "learning_rate": 4.6794040306243545e-05, "loss": 68.7457, "step": 61670 }, { "epoch": 0.24919500478754994, "grad_norm": 2662.16845703125, "learning_rate": 4.679232991446456e-05, "loss": 63.2462, "step": 61680 }, { "epoch": 0.24923540605291758, "grad_norm": 604.02978515625, "learning_rate": 4.67906190978329e-05, "loss": 37.8021, "step": 61690 }, { "epoch": 0.24927580731828522, "grad_norm": 291.47430419921875, "learning_rate": 4.6788907856381895e-05, "loss": 131.591, "step": 61700 }, { "epoch": 0.24931620858365283, "grad_norm": 640.17529296875, "learning_rate": 4.678719619014491e-05, "loss": 86.4894, "step": 61710 }, { "epoch": 0.24935660984902047, "grad_norm": 745.0997924804688, "learning_rate": 4.678548409915532e-05, "loss": 62.8037, "step": 61720 }, { "epoch": 0.2493970111143881, "grad_norm": 576.8112182617188, "learning_rate": 4.67837715834465e-05, "loss": 96.3769, "step": 61730 }, { "epoch": 0.24943741237975572, "grad_norm": 687.8386840820312, "learning_rate": 4.678205864305184e-05, "loss": 78.1074, "step": 61740 }, { "epoch": 0.24947781364512336, "grad_norm": 542.1369018554688, "learning_rate": 4.678034527800474e-05, "loss": 70.0832, "step": 61750 }, { "epoch": 0.249518214910491, "grad_norm": 945.9921875, "learning_rate": 4.677863148833859e-05, "loss": 87.9454, "step": 61760 }, { "epoch": 0.24955861617585862, "grad_norm": 443.54583740234375, "learning_rate": 4.6776917274086806e-05, "loss": 63.2674, "step": 61770 }, { "epoch": 0.24959901744122626, "grad_norm": 485.80865478515625, "learning_rate": 4.67752026352828e-05, "loss": 86.9183, "step": 61780 }, { "epoch": 0.2496394187065939, "grad_norm": 514.6878051757812, "learning_rate": 4.677348757196002e-05, "loss": 95.4372, "step": 61790 }, { "epoch": 0.2496798199719615, "grad_norm": 1996.046142578125, "learning_rate": 4.6771772084151885e-05, "loss": 93.6957, "step": 61800 }, { "epoch": 0.24972022123732915, "grad_norm": 893.337646484375, "learning_rate": 4.6770056171891846e-05, "loss": 75.4777, "step": 61810 }, { "epoch": 0.2497606225026968, "grad_norm": 999.063232421875, "learning_rate": 4.676833983521335e-05, "loss": 133.2376, "step": 61820 }, { "epoch": 0.24980102376806443, "grad_norm": 1203.1588134765625, "learning_rate": 4.676662307414987e-05, "loss": 92.8217, "step": 61830 }, { "epoch": 0.24984142503343204, "grad_norm": 1015.3056030273438, "learning_rate": 4.676490588873486e-05, "loss": 71.3694, "step": 61840 }, { "epoch": 0.24988182629879968, "grad_norm": 504.0675354003906, "learning_rate": 4.6763188279001804e-05, "loss": 37.9027, "step": 61850 }, { "epoch": 0.24992222756416732, "grad_norm": 1208.43310546875, "learning_rate": 4.6761470244984196e-05, "loss": 79.1104, "step": 61860 }, { "epoch": 0.24996262882953493, "grad_norm": 1380.7022705078125, "learning_rate": 4.675975178671551e-05, "loss": 59.1448, "step": 61870 }, { "epoch": 0.25000303009490255, "grad_norm": 348.9476623535156, "learning_rate": 4.675803290422927e-05, "loss": 85.426, "step": 61880 }, { "epoch": 0.2500434313602702, "grad_norm": 709.4434204101562, "learning_rate": 4.6756313597558977e-05, "loss": 52.583, "step": 61890 }, { "epoch": 0.2500838326256378, "grad_norm": 490.9687805175781, "learning_rate": 4.675459386673815e-05, "loss": 84.7173, "step": 61900 }, { "epoch": 0.25012423389100547, "grad_norm": 935.8824462890625, "learning_rate": 4.6752873711800306e-05, "loss": 61.2777, "step": 61910 }, { "epoch": 0.2501646351563731, "grad_norm": 1041.913818359375, "learning_rate": 4.6751153132779e-05, "loss": 90.0399, "step": 61920 }, { "epoch": 0.25020503642174075, "grad_norm": 516.0934448242188, "learning_rate": 4.674943212970776e-05, "loss": 53.4762, "step": 61930 }, { "epoch": 0.2502454376871084, "grad_norm": 1382.7197265625, "learning_rate": 4.674771070262014e-05, "loss": 128.2385, "step": 61940 }, { "epoch": 0.25028583895247597, "grad_norm": 759.5361938476562, "learning_rate": 4.67459888515497e-05, "loss": 56.0513, "step": 61950 }, { "epoch": 0.2503262402178436, "grad_norm": 1789.28466796875, "learning_rate": 4.674426657653003e-05, "loss": 59.7396, "step": 61960 }, { "epoch": 0.25036664148321125, "grad_norm": 456.0893859863281, "learning_rate": 4.6742543877594675e-05, "loss": 42.1051, "step": 61970 }, { "epoch": 0.2504070427485789, "grad_norm": 1111.8485107421875, "learning_rate": 4.6740820754777235e-05, "loss": 62.1872, "step": 61980 }, { "epoch": 0.25044744401394653, "grad_norm": 927.1618041992188, "learning_rate": 4.6739097208111306e-05, "loss": 67.374, "step": 61990 }, { "epoch": 0.25048784527931417, "grad_norm": 898.595458984375, "learning_rate": 4.6737373237630476e-05, "loss": 97.9774, "step": 62000 }, { "epoch": 0.25052824654468175, "grad_norm": 0.0, "learning_rate": 4.6735648843368376e-05, "loss": 87.7824, "step": 62010 }, { "epoch": 0.2505686478100494, "grad_norm": 850.822265625, "learning_rate": 4.6733924025358597e-05, "loss": 58.0294, "step": 62020 }, { "epoch": 0.25060904907541703, "grad_norm": 1727.8907470703125, "learning_rate": 4.673219878363479e-05, "loss": 71.4364, "step": 62030 }, { "epoch": 0.2506494503407847, "grad_norm": 878.0247802734375, "learning_rate": 4.6730473118230575e-05, "loss": 90.9596, "step": 62040 }, { "epoch": 0.2506898516061523, "grad_norm": 1600.9791259765625, "learning_rate": 4.67287470291796e-05, "loss": 64.3531, "step": 62050 }, { "epoch": 0.25073025287151995, "grad_norm": 617.0396118164062, "learning_rate": 4.672702051651552e-05, "loss": 60.0307, "step": 62060 }, { "epoch": 0.2507706541368876, "grad_norm": 583.3132934570312, "learning_rate": 4.672529358027198e-05, "loss": 82.0429, "step": 62070 }, { "epoch": 0.2508110554022552, "grad_norm": 579.264892578125, "learning_rate": 4.6723566220482664e-05, "loss": 71.0199, "step": 62080 }, { "epoch": 0.2508514566676228, "grad_norm": 723.0690307617188, "learning_rate": 4.672183843718123e-05, "loss": 72.1286, "step": 62090 }, { "epoch": 0.25089185793299046, "grad_norm": 543.1493530273438, "learning_rate": 4.672011023040138e-05, "loss": 64.0717, "step": 62100 }, { "epoch": 0.2509322591983581, "grad_norm": 724.9541625976562, "learning_rate": 4.671838160017681e-05, "loss": 63.9457, "step": 62110 }, { "epoch": 0.25097266046372574, "grad_norm": 565.6871948242188, "learning_rate": 4.6716652546541194e-05, "loss": 52.1139, "step": 62120 }, { "epoch": 0.2510130617290934, "grad_norm": 1354.7672119140625, "learning_rate": 4.671492306952826e-05, "loss": 76.9495, "step": 62130 }, { "epoch": 0.25105346299446096, "grad_norm": 636.530029296875, "learning_rate": 4.6713193169171724e-05, "loss": 55.6664, "step": 62140 }, { "epoch": 0.2510938642598286, "grad_norm": 1726.2562255859375, "learning_rate": 4.6711462845505304e-05, "loss": 81.8684, "step": 62150 }, { "epoch": 0.25113426552519624, "grad_norm": 742.2718505859375, "learning_rate": 4.6709732098562745e-05, "loss": 68.1843, "step": 62160 }, { "epoch": 0.2511746667905639, "grad_norm": 725.1959228515625, "learning_rate": 4.670800092837777e-05, "loss": 62.3066, "step": 62170 }, { "epoch": 0.2512150680559315, "grad_norm": 1269.651123046875, "learning_rate": 4.670626933498415e-05, "loss": 75.0705, "step": 62180 }, { "epoch": 0.25125546932129916, "grad_norm": 1048.4793701171875, "learning_rate": 4.670453731841563e-05, "loss": 67.2275, "step": 62190 }, { "epoch": 0.25129587058666675, "grad_norm": 1658.6903076171875, "learning_rate": 4.670280487870598e-05, "loss": 87.6034, "step": 62200 }, { "epoch": 0.2513362718520344, "grad_norm": 706.1924438476562, "learning_rate": 4.670107201588898e-05, "loss": 66.6542, "step": 62210 }, { "epoch": 0.251376673117402, "grad_norm": 1182.62939453125, "learning_rate": 4.669933872999841e-05, "loss": 61.4396, "step": 62220 }, { "epoch": 0.25141707438276967, "grad_norm": 973.4239501953125, "learning_rate": 4.669760502106805e-05, "loss": 49.4658, "step": 62230 }, { "epoch": 0.2514574756481373, "grad_norm": 954.9755859375, "learning_rate": 4.6695870889131724e-05, "loss": 58.9961, "step": 62240 }, { "epoch": 0.25149787691350495, "grad_norm": 983.7535400390625, "learning_rate": 4.669413633422322e-05, "loss": 67.7966, "step": 62250 }, { "epoch": 0.2515382781788726, "grad_norm": 934.3507690429688, "learning_rate": 4.669240135637635e-05, "loss": 64.6385, "step": 62260 }, { "epoch": 0.25157867944424017, "grad_norm": 535.4703369140625, "learning_rate": 4.669066595562496e-05, "loss": 99.6696, "step": 62270 }, { "epoch": 0.2516190807096078, "grad_norm": 887.987060546875, "learning_rate": 4.668893013200286e-05, "loss": 66.7841, "step": 62280 }, { "epoch": 0.25165948197497545, "grad_norm": 1214.275146484375, "learning_rate": 4.66871938855439e-05, "loss": 75.537, "step": 62290 }, { "epoch": 0.2516998832403431, "grad_norm": 572.87158203125, "learning_rate": 4.6685457216281936e-05, "loss": 67.1349, "step": 62300 }, { "epoch": 0.25174028450571073, "grad_norm": 507.9439697265625, "learning_rate": 4.668372012425082e-05, "loss": 79.3798, "step": 62310 }, { "epoch": 0.25178068577107837, "grad_norm": 265.6028747558594, "learning_rate": 4.6681982609484416e-05, "loss": 57.9518, "step": 62320 }, { "epoch": 0.25182108703644596, "grad_norm": 2266.7421875, "learning_rate": 4.6680244672016595e-05, "loss": 74.0739, "step": 62330 }, { "epoch": 0.2518614883018136, "grad_norm": 727.9891967773438, "learning_rate": 4.6678506311881245e-05, "loss": 107.9434, "step": 62340 }, { "epoch": 0.25190188956718124, "grad_norm": 769.7603759765625, "learning_rate": 4.667676752911225e-05, "loss": 62.9798, "step": 62350 }, { "epoch": 0.2519422908325489, "grad_norm": 367.056396484375, "learning_rate": 4.667502832374352e-05, "loss": 76.6132, "step": 62360 }, { "epoch": 0.2519826920979165, "grad_norm": 1385.6729736328125, "learning_rate": 4.667328869580895e-05, "loss": 56.0849, "step": 62370 }, { "epoch": 0.25202309336328416, "grad_norm": 509.8416442871094, "learning_rate": 4.6671548645342456e-05, "loss": 59.8157, "step": 62380 }, { "epoch": 0.2520634946286518, "grad_norm": 1469.747314453125, "learning_rate": 4.666980817237797e-05, "loss": 89.1393, "step": 62390 }, { "epoch": 0.2521038958940194, "grad_norm": 1213.3016357421875, "learning_rate": 4.6668067276949414e-05, "loss": 59.1252, "step": 62400 }, { "epoch": 0.252144297159387, "grad_norm": 990.9741821289062, "learning_rate": 4.666632595909072e-05, "loss": 68.1975, "step": 62410 }, { "epoch": 0.25218469842475466, "grad_norm": 916.1857299804688, "learning_rate": 4.666458421883586e-05, "loss": 66.8495, "step": 62420 }, { "epoch": 0.2522250996901223, "grad_norm": 381.66485595703125, "learning_rate": 4.666284205621877e-05, "loss": 91.8923, "step": 62430 }, { "epoch": 0.25226550095548994, "grad_norm": 894.6453247070312, "learning_rate": 4.666109947127343e-05, "loss": 76.0347, "step": 62440 }, { "epoch": 0.2523059022208576, "grad_norm": 455.5608215332031, "learning_rate": 4.6659356464033795e-05, "loss": 54.6976, "step": 62450 }, { "epoch": 0.25234630348622517, "grad_norm": 581.934814453125, "learning_rate": 4.6657613034533866e-05, "loss": 60.0454, "step": 62460 }, { "epoch": 0.2523867047515928, "grad_norm": 911.0252685546875, "learning_rate": 4.665586918280761e-05, "loss": 102.2392, "step": 62470 }, { "epoch": 0.25242710601696045, "grad_norm": 1507.02685546875, "learning_rate": 4.665412490888904e-05, "loss": 90.9116, "step": 62480 }, { "epoch": 0.2524675072823281, "grad_norm": 2237.4755859375, "learning_rate": 4.6652380212812155e-05, "loss": 93.2901, "step": 62490 }, { "epoch": 0.2525079085476957, "grad_norm": 816.4346923828125, "learning_rate": 4.665063509461097e-05, "loss": 80.2981, "step": 62500 }, { "epoch": 0.25254830981306337, "grad_norm": 769.7588500976562, "learning_rate": 4.6648889554319506e-05, "loss": 55.1148, "step": 62510 }, { "epoch": 0.25258871107843095, "grad_norm": 401.32989501953125, "learning_rate": 4.66471435919718e-05, "loss": 67.0569, "step": 62520 }, { "epoch": 0.2526291123437986, "grad_norm": 888.2326049804688, "learning_rate": 4.6645397207601884e-05, "loss": 78.4589, "step": 62530 }, { "epoch": 0.25266951360916623, "grad_norm": 310.5882568359375, "learning_rate": 4.66436504012438e-05, "loss": 98.2279, "step": 62540 }, { "epoch": 0.25270991487453387, "grad_norm": 984.7472534179688, "learning_rate": 4.664190317293161e-05, "loss": 97.2269, "step": 62550 }, { "epoch": 0.2527503161399015, "grad_norm": 793.189697265625, "learning_rate": 4.6640155522699374e-05, "loss": 127.4911, "step": 62560 }, { "epoch": 0.25279071740526915, "grad_norm": 2629.53466796875, "learning_rate": 4.6638407450581165e-05, "loss": 95.0568, "step": 62570 }, { "epoch": 0.2528311186706368, "grad_norm": 380.42779541015625, "learning_rate": 4.663665895661107e-05, "loss": 81.795, "step": 62580 }, { "epoch": 0.2528715199360044, "grad_norm": 690.9551391601562, "learning_rate": 4.663491004082316e-05, "loss": 50.0963, "step": 62590 }, { "epoch": 0.252911921201372, "grad_norm": 1449.210693359375, "learning_rate": 4.6633160703251554e-05, "loss": 65.8343, "step": 62600 }, { "epoch": 0.25295232246673965, "grad_norm": 1021.6344604492188, "learning_rate": 4.6631410943930334e-05, "loss": 107.6034, "step": 62610 }, { "epoch": 0.2529927237321073, "grad_norm": 869.527099609375, "learning_rate": 4.662966076289362e-05, "loss": 102.0092, "step": 62620 }, { "epoch": 0.25303312499747493, "grad_norm": 584.1009521484375, "learning_rate": 4.662791016017554e-05, "loss": 69.7825, "step": 62630 }, { "epoch": 0.2530735262628426, "grad_norm": 802.0068359375, "learning_rate": 4.6626159135810205e-05, "loss": 72.057, "step": 62640 }, { "epoch": 0.25311392752821016, "grad_norm": 2002.407958984375, "learning_rate": 4.662440768983177e-05, "loss": 76.6827, "step": 62650 }, { "epoch": 0.2531543287935778, "grad_norm": 573.6917724609375, "learning_rate": 4.662265582227438e-05, "loss": 116.2082, "step": 62660 }, { "epoch": 0.25319473005894544, "grad_norm": 1771.079345703125, "learning_rate": 4.662090353317217e-05, "loss": 81.9311, "step": 62670 }, { "epoch": 0.2532351313243131, "grad_norm": 1918.1868896484375, "learning_rate": 4.661915082255932e-05, "loss": 109.1544, "step": 62680 }, { "epoch": 0.2532755325896807, "grad_norm": 1021.0386962890625, "learning_rate": 4.661739769047e-05, "loss": 71.0243, "step": 62690 }, { "epoch": 0.25331593385504836, "grad_norm": 1025.5999755859375, "learning_rate": 4.6615644136938375e-05, "loss": 69.5765, "step": 62700 }, { "epoch": 0.253356335120416, "grad_norm": 441.26617431640625, "learning_rate": 4.661389016199864e-05, "loss": 48.4432, "step": 62710 }, { "epoch": 0.2533967363857836, "grad_norm": 2817.1181640625, "learning_rate": 4.6612135765685e-05, "loss": 43.8955, "step": 62720 }, { "epoch": 0.2534371376511512, "grad_norm": 539.3783569335938, "learning_rate": 4.6610380948031627e-05, "loss": 63.5467, "step": 62730 }, { "epoch": 0.25347753891651886, "grad_norm": 735.4268188476562, "learning_rate": 4.660862570907277e-05, "loss": 107.1801, "step": 62740 }, { "epoch": 0.2535179401818865, "grad_norm": 1085.6962890625, "learning_rate": 4.6606870048842624e-05, "loss": 88.6567, "step": 62750 }, { "epoch": 0.25355834144725414, "grad_norm": 488.97369384765625, "learning_rate": 4.660511396737541e-05, "loss": 74.2245, "step": 62760 }, { "epoch": 0.2535987427126218, "grad_norm": 3399.495361328125, "learning_rate": 4.660335746470539e-05, "loss": 188.06, "step": 62770 }, { "epoch": 0.25363914397798937, "grad_norm": 515.9884033203125, "learning_rate": 4.6601600540866794e-05, "loss": 61.2756, "step": 62780 }, { "epoch": 0.253679545243357, "grad_norm": 328.14276123046875, "learning_rate": 4.659984319589387e-05, "loss": 66.6262, "step": 62790 }, { "epoch": 0.25371994650872465, "grad_norm": 1215.5234375, "learning_rate": 4.659808542982088e-05, "loss": 107.4222, "step": 62800 }, { "epoch": 0.2537603477740923, "grad_norm": 1078.8668212890625, "learning_rate": 4.65963272426821e-05, "loss": 101.8976, "step": 62810 }, { "epoch": 0.2538007490394599, "grad_norm": 620.238037109375, "learning_rate": 4.659456863451181e-05, "loss": 49.1838, "step": 62820 }, { "epoch": 0.25384115030482757, "grad_norm": 357.8902587890625, "learning_rate": 4.6592809605344276e-05, "loss": 76.9179, "step": 62830 }, { "epoch": 0.25388155157019515, "grad_norm": 763.6502075195312, "learning_rate": 4.65910501552138e-05, "loss": 68.4214, "step": 62840 }, { "epoch": 0.2539219528355628, "grad_norm": 650.4541015625, "learning_rate": 4.658929028415468e-05, "loss": 67.5361, "step": 62850 }, { "epoch": 0.25396235410093043, "grad_norm": 1105.059814453125, "learning_rate": 4.658752999220125e-05, "loss": 107.3979, "step": 62860 }, { "epoch": 0.25400275536629807, "grad_norm": 1753.42822265625, "learning_rate": 4.65857692793878e-05, "loss": 59.3314, "step": 62870 }, { "epoch": 0.2540431566316657, "grad_norm": 610.6260375976562, "learning_rate": 4.6584008145748656e-05, "loss": 75.3817, "step": 62880 }, { "epoch": 0.25408355789703335, "grad_norm": 815.94482421875, "learning_rate": 4.6582246591318175e-05, "loss": 49.2156, "step": 62890 }, { "epoch": 0.254123959162401, "grad_norm": 802.24072265625, "learning_rate": 4.658048461613068e-05, "loss": 84.062, "step": 62900 }, { "epoch": 0.2541643604277686, "grad_norm": 517.5693969726562, "learning_rate": 4.6578722220220525e-05, "loss": 101.054, "step": 62910 }, { "epoch": 0.2542047616931362, "grad_norm": 948.2103271484375, "learning_rate": 4.657695940362207e-05, "loss": 78.7945, "step": 62920 }, { "epoch": 0.25424516295850386, "grad_norm": 751.4588623046875, "learning_rate": 4.657519616636968e-05, "loss": 68.9439, "step": 62930 }, { "epoch": 0.2542855642238715, "grad_norm": 209.2197723388672, "learning_rate": 4.6573432508497735e-05, "loss": 68.4884, "step": 62940 }, { "epoch": 0.25432596548923914, "grad_norm": 793.4098510742188, "learning_rate": 4.6571668430040625e-05, "loss": 97.053, "step": 62950 }, { "epoch": 0.2543663667546068, "grad_norm": 606.1543579101562, "learning_rate": 4.6569903931032735e-05, "loss": 69.4006, "step": 62960 }, { "epoch": 0.25440676801997436, "grad_norm": 757.6336059570312, "learning_rate": 4.656813901150845e-05, "loss": 79.5474, "step": 62970 }, { "epoch": 0.254447169285342, "grad_norm": 866.605712890625, "learning_rate": 4.6566373671502196e-05, "loss": 56.2643, "step": 62980 }, { "epoch": 0.25448757055070964, "grad_norm": 121.89437866210938, "learning_rate": 4.656460791104839e-05, "loss": 48.1909, "step": 62990 }, { "epoch": 0.2545279718160773, "grad_norm": 1390.025390625, "learning_rate": 4.656284173018144e-05, "loss": 107.6447, "step": 63000 }, { "epoch": 0.2545683730814449, "grad_norm": 1153.6959228515625, "learning_rate": 4.656107512893579e-05, "loss": 75.1273, "step": 63010 }, { "epoch": 0.25460877434681256, "grad_norm": 441.3359069824219, "learning_rate": 4.655930810734589e-05, "loss": 46.4667, "step": 63020 }, { "epoch": 0.2546491756121802, "grad_norm": 914.875732421875, "learning_rate": 4.655754066544617e-05, "loss": 58.5283, "step": 63030 }, { "epoch": 0.2546895768775478, "grad_norm": 1101.917236328125, "learning_rate": 4.65557728032711e-05, "loss": 84.4799, "step": 63040 }, { "epoch": 0.2547299781429154, "grad_norm": 985.5469360351562, "learning_rate": 4.655400452085514e-05, "loss": 63.2999, "step": 63050 }, { "epoch": 0.25477037940828307, "grad_norm": 596.2723999023438, "learning_rate": 4.6552235818232764e-05, "loss": 72.8403, "step": 63060 }, { "epoch": 0.2548107806736507, "grad_norm": 654.357666015625, "learning_rate": 4.655046669543845e-05, "loss": 57.4079, "step": 63070 }, { "epoch": 0.25485118193901835, "grad_norm": 634.196044921875, "learning_rate": 4.6548697152506705e-05, "loss": 80.194, "step": 63080 }, { "epoch": 0.254891583204386, "grad_norm": 701.0132446289062, "learning_rate": 4.6546927189472014e-05, "loss": 46.6557, "step": 63090 }, { "epoch": 0.25493198446975357, "grad_norm": 1064.72412109375, "learning_rate": 4.654515680636888e-05, "loss": 73.7024, "step": 63100 }, { "epoch": 0.2549723857351212, "grad_norm": 967.7996826171875, "learning_rate": 4.654338600323182e-05, "loss": 62.9957, "step": 63110 }, { "epoch": 0.25501278700048885, "grad_norm": 533.4601440429688, "learning_rate": 4.654161478009536e-05, "loss": 63.6593, "step": 63120 }, { "epoch": 0.2550531882658565, "grad_norm": 588.319580078125, "learning_rate": 4.6539843136994036e-05, "loss": 60.8479, "step": 63130 }, { "epoch": 0.25509358953122413, "grad_norm": 1351.3424072265625, "learning_rate": 4.653807107396237e-05, "loss": 87.2913, "step": 63140 }, { "epoch": 0.25513399079659177, "grad_norm": 780.9151000976562, "learning_rate": 4.653629859103492e-05, "loss": 64.7784, "step": 63150 }, { "epoch": 0.25517439206195935, "grad_norm": 618.2050170898438, "learning_rate": 4.653452568824625e-05, "loss": 44.7988, "step": 63160 }, { "epoch": 0.255214793327327, "grad_norm": 295.4721984863281, "learning_rate": 4.653275236563091e-05, "loss": 83.7096, "step": 63170 }, { "epoch": 0.25525519459269463, "grad_norm": 567.467041015625, "learning_rate": 4.653097862322348e-05, "loss": 47.7101, "step": 63180 }, { "epoch": 0.2552955958580623, "grad_norm": 1230.712646484375, "learning_rate": 4.652920446105853e-05, "loss": 82.6227, "step": 63190 }, { "epoch": 0.2553359971234299, "grad_norm": 1003.6646118164062, "learning_rate": 4.652742987917066e-05, "loss": 82.4499, "step": 63200 }, { "epoch": 0.25537639838879755, "grad_norm": 488.0013122558594, "learning_rate": 4.652565487759446e-05, "loss": 69.6767, "step": 63210 }, { "epoch": 0.2554167996541652, "grad_norm": 423.2750244140625, "learning_rate": 4.652387945636454e-05, "loss": 99.7746, "step": 63220 }, { "epoch": 0.2554572009195328, "grad_norm": 531.3796997070312, "learning_rate": 4.65221036155155e-05, "loss": 65.6895, "step": 63230 }, { "epoch": 0.2554976021849004, "grad_norm": 664.51513671875, "learning_rate": 4.652032735508198e-05, "loss": 76.6833, "step": 63240 }, { "epoch": 0.25553800345026806, "grad_norm": 1186.4224853515625, "learning_rate": 4.65185506750986e-05, "loss": 75.191, "step": 63250 }, { "epoch": 0.2555784047156357, "grad_norm": 1025.0635986328125, "learning_rate": 4.651677357559998e-05, "loss": 79.5044, "step": 63260 }, { "epoch": 0.25561880598100334, "grad_norm": 1971.0733642578125, "learning_rate": 4.65149960566208e-05, "loss": 78.158, "step": 63270 }, { "epoch": 0.255659207246371, "grad_norm": 601.2109375, "learning_rate": 4.651321811819568e-05, "loss": 109.8638, "step": 63280 }, { "epoch": 0.25569960851173856, "grad_norm": 926.903076171875, "learning_rate": 4.65114397603593e-05, "loss": 67.0008, "step": 63290 }, { "epoch": 0.2557400097771062, "grad_norm": 543.5036010742188, "learning_rate": 4.6509660983146334e-05, "loss": 90.5155, "step": 63300 }, { "epoch": 0.25578041104247384, "grad_norm": 1474.077392578125, "learning_rate": 4.650788178659146e-05, "loss": 76.4021, "step": 63310 }, { "epoch": 0.2558208123078415, "grad_norm": 1020.8681030273438, "learning_rate": 4.650610217072934e-05, "loss": 78.0826, "step": 63320 }, { "epoch": 0.2558612135732091, "grad_norm": 963.8372192382812, "learning_rate": 4.650432213559469e-05, "loss": 63.825, "step": 63330 }, { "epoch": 0.25590161483857676, "grad_norm": 1577.4927978515625, "learning_rate": 4.650254168122222e-05, "loss": 76.4572, "step": 63340 }, { "epoch": 0.2559420161039444, "grad_norm": 985.75439453125, "learning_rate": 4.650076080764662e-05, "loss": 45.7849, "step": 63350 }, { "epoch": 0.255982417369312, "grad_norm": 651.5304565429688, "learning_rate": 4.649897951490262e-05, "loss": 86.52, "step": 63360 }, { "epoch": 0.2560228186346796, "grad_norm": 1333.999267578125, "learning_rate": 4.649719780302495e-05, "loss": 59.2886, "step": 63370 }, { "epoch": 0.25606321990004727, "grad_norm": 926.7659912109375, "learning_rate": 4.649541567204834e-05, "loss": 91.8786, "step": 63380 }, { "epoch": 0.2561036211654149, "grad_norm": 720.6255493164062, "learning_rate": 4.649363312200753e-05, "loss": 76.3271, "step": 63390 }, { "epoch": 0.25614402243078255, "grad_norm": 0.0, "learning_rate": 4.649185015293728e-05, "loss": 68.2173, "step": 63400 }, { "epoch": 0.2561844236961502, "grad_norm": 905.1610717773438, "learning_rate": 4.649006676487234e-05, "loss": 72.3964, "step": 63410 }, { "epoch": 0.25622482496151777, "grad_norm": 1136.3037109375, "learning_rate": 4.6488282957847494e-05, "loss": 102.3837, "step": 63420 }, { "epoch": 0.2562652262268854, "grad_norm": 1587.19921875, "learning_rate": 4.648649873189751e-05, "loss": 70.8869, "step": 63430 }, { "epoch": 0.25630562749225305, "grad_norm": 683.1766967773438, "learning_rate": 4.648471408705717e-05, "loss": 78.3768, "step": 63440 }, { "epoch": 0.2563460287576207, "grad_norm": 719.8806762695312, "learning_rate": 4.648292902336126e-05, "loss": 81.2503, "step": 63450 }, { "epoch": 0.25638643002298833, "grad_norm": 1979.6317138671875, "learning_rate": 4.648114354084459e-05, "loss": 129.6083, "step": 63460 }, { "epoch": 0.25642683128835597, "grad_norm": 565.4490356445312, "learning_rate": 4.647935763954198e-05, "loss": 78.8221, "step": 63470 }, { "epoch": 0.25646723255372356, "grad_norm": 446.4573669433594, "learning_rate": 4.647757131948822e-05, "loss": 103.6806, "step": 63480 }, { "epoch": 0.2565076338190912, "grad_norm": 1135.3199462890625, "learning_rate": 4.6475784580718155e-05, "loss": 97.5097, "step": 63490 }, { "epoch": 0.25654803508445884, "grad_norm": 725.8003540039062, "learning_rate": 4.6473997423266614e-05, "loss": 65.7066, "step": 63500 }, { "epoch": 0.2565884363498265, "grad_norm": 844.005859375, "learning_rate": 4.6472209847168435e-05, "loss": 75.9603, "step": 63510 }, { "epoch": 0.2566288376151941, "grad_norm": 555.097412109375, "learning_rate": 4.647042185245847e-05, "loss": 68.8893, "step": 63520 }, { "epoch": 0.25666923888056176, "grad_norm": 434.0204772949219, "learning_rate": 4.646863343917158e-05, "loss": 82.4939, "step": 63530 }, { "epoch": 0.2567096401459294, "grad_norm": 918.038818359375, "learning_rate": 4.646684460734263e-05, "loss": 61.4218, "step": 63540 }, { "epoch": 0.256750041411297, "grad_norm": 617.525390625, "learning_rate": 4.646505535700649e-05, "loss": 55.3601, "step": 63550 }, { "epoch": 0.2567904426766646, "grad_norm": 864.6348266601562, "learning_rate": 4.6463265688198044e-05, "loss": 76.1844, "step": 63560 }, { "epoch": 0.25683084394203226, "grad_norm": 2091.900390625, "learning_rate": 4.6461475600952184e-05, "loss": 76.1381, "step": 63570 }, { "epoch": 0.2568712452073999, "grad_norm": 1115.5269775390625, "learning_rate": 4.645968509530381e-05, "loss": 69.8563, "step": 63580 }, { "epoch": 0.25691164647276754, "grad_norm": 808.4495849609375, "learning_rate": 4.645789417128783e-05, "loss": 65.0377, "step": 63590 }, { "epoch": 0.2569520477381352, "grad_norm": 867.1176147460938, "learning_rate": 4.645610282893915e-05, "loss": 78.228, "step": 63600 }, { "epoch": 0.25699244900350277, "grad_norm": 452.4241943359375, "learning_rate": 4.64543110682927e-05, "loss": 80.4785, "step": 63610 }, { "epoch": 0.2570328502688704, "grad_norm": 626.6029663085938, "learning_rate": 4.6452518889383414e-05, "loss": 68.1861, "step": 63620 }, { "epoch": 0.25707325153423805, "grad_norm": 561.9072265625, "learning_rate": 4.645072629224622e-05, "loss": 68.4941, "step": 63630 }, { "epoch": 0.2571136527996057, "grad_norm": 495.2870788574219, "learning_rate": 4.6448933276916076e-05, "loss": 78.5616, "step": 63640 }, { "epoch": 0.2571540540649733, "grad_norm": 757.0482788085938, "learning_rate": 4.644713984342793e-05, "loss": 61.9383, "step": 63650 }, { "epoch": 0.25719445533034097, "grad_norm": 666.8660278320312, "learning_rate": 4.644534599181677e-05, "loss": 60.4049, "step": 63660 }, { "epoch": 0.2572348565957086, "grad_norm": 487.714111328125, "learning_rate": 4.644355172211753e-05, "loss": 65.7735, "step": 63670 }, { "epoch": 0.2572752578610762, "grad_norm": 403.6894836425781, "learning_rate": 4.644175703436522e-05, "loss": 74.5089, "step": 63680 }, { "epoch": 0.25731565912644383, "grad_norm": 806.9922485351562, "learning_rate": 4.643996192859481e-05, "loss": 47.5238, "step": 63690 }, { "epoch": 0.25735606039181147, "grad_norm": 1357.157470703125, "learning_rate": 4.643816640484131e-05, "loss": 86.1968, "step": 63700 }, { "epoch": 0.2573964616571791, "grad_norm": 1357.3590087890625, "learning_rate": 4.643637046313972e-05, "loss": 82.4154, "step": 63710 }, { "epoch": 0.25743686292254675, "grad_norm": 1421.4974365234375, "learning_rate": 4.6434574103525044e-05, "loss": 91.1663, "step": 63720 }, { "epoch": 0.2574772641879144, "grad_norm": 1042.117431640625, "learning_rate": 4.6432777326032316e-05, "loss": 86.4203, "step": 63730 }, { "epoch": 0.257517665453282, "grad_norm": 1068.4251708984375, "learning_rate": 4.6430980130696555e-05, "loss": 79.4296, "step": 63740 }, { "epoch": 0.2575580667186496, "grad_norm": 709.8405151367188, "learning_rate": 4.642918251755281e-05, "loss": 59.4794, "step": 63750 }, { "epoch": 0.25759846798401725, "grad_norm": 752.4713745117188, "learning_rate": 4.6427384486636113e-05, "loss": 80.5621, "step": 63760 }, { "epoch": 0.2576388692493849, "grad_norm": 500.284423828125, "learning_rate": 4.6425586037981526e-05, "loss": 70.1939, "step": 63770 }, { "epoch": 0.25767927051475253, "grad_norm": 1230.3875732421875, "learning_rate": 4.6423787171624114e-05, "loss": 93.3144, "step": 63780 }, { "epoch": 0.2577196717801202, "grad_norm": 375.65838623046875, "learning_rate": 4.642198788759894e-05, "loss": 83.3093, "step": 63790 }, { "epoch": 0.25776007304548776, "grad_norm": 1165.2003173828125, "learning_rate": 4.642018818594107e-05, "loss": 67.3937, "step": 63800 }, { "epoch": 0.2578004743108554, "grad_norm": 1046.146728515625, "learning_rate": 4.641838806668562e-05, "loss": 72.0059, "step": 63810 }, { "epoch": 0.25784087557622304, "grad_norm": 695.942138671875, "learning_rate": 4.6416587529867664e-05, "loss": 54.245, "step": 63820 }, { "epoch": 0.2578812768415907, "grad_norm": 1145.5577392578125, "learning_rate": 4.6414786575522306e-05, "loss": 88.9208, "step": 63830 }, { "epoch": 0.2579216781069583, "grad_norm": 453.61181640625, "learning_rate": 4.6412985203684654e-05, "loss": 66.1485, "step": 63840 }, { "epoch": 0.25796207937232596, "grad_norm": 765.479248046875, "learning_rate": 4.6411183414389837e-05, "loss": 74.9149, "step": 63850 }, { "epoch": 0.2580024806376936, "grad_norm": 794.6134643554688, "learning_rate": 4.6409381207672974e-05, "loss": 58.8936, "step": 63860 }, { "epoch": 0.2580428819030612, "grad_norm": 582.8801879882812, "learning_rate": 4.64075785835692e-05, "loss": 95.4306, "step": 63870 }, { "epoch": 0.2580832831684288, "grad_norm": 1164.59619140625, "learning_rate": 4.640577554211366e-05, "loss": 64.1848, "step": 63880 }, { "epoch": 0.25812368443379646, "grad_norm": 1310.0181884765625, "learning_rate": 4.640397208334151e-05, "loss": 63.126, "step": 63890 }, { "epoch": 0.2581640856991641, "grad_norm": 571.4081420898438, "learning_rate": 4.64021682072879e-05, "loss": 42.7484, "step": 63900 }, { "epoch": 0.25820448696453174, "grad_norm": 726.766845703125, "learning_rate": 4.640036391398801e-05, "loss": 43.3208, "step": 63910 }, { "epoch": 0.2582448882298994, "grad_norm": 754.6783447265625, "learning_rate": 4.639855920347701e-05, "loss": 110.9717, "step": 63920 }, { "epoch": 0.25828528949526697, "grad_norm": 1137.88818359375, "learning_rate": 4.639675407579007e-05, "loss": 70.8244, "step": 63930 }, { "epoch": 0.2583256907606346, "grad_norm": 508.732421875, "learning_rate": 4.6394948530962396e-05, "loss": 55.6226, "step": 63940 }, { "epoch": 0.25836609202600225, "grad_norm": 405.8502197265625, "learning_rate": 4.639314256902919e-05, "loss": 51.9442, "step": 63950 }, { "epoch": 0.2584064932913699, "grad_norm": 941.8868408203125, "learning_rate": 4.6391336190025644e-05, "loss": 97.1299, "step": 63960 }, { "epoch": 0.2584468945567375, "grad_norm": 368.6508483886719, "learning_rate": 4.6389529393987e-05, "loss": 52.9645, "step": 63970 }, { "epoch": 0.25848729582210517, "grad_norm": 846.4256591796875, "learning_rate": 4.638772218094847e-05, "loss": 76.5249, "step": 63980 }, { "epoch": 0.25852769708747275, "grad_norm": 552.8169555664062, "learning_rate": 4.638591455094527e-05, "loss": 58.1772, "step": 63990 }, { "epoch": 0.2585680983528404, "grad_norm": 1205.1046142578125, "learning_rate": 4.638410650401267e-05, "loss": 59.0313, "step": 64000 }, { "epoch": 0.25860849961820803, "grad_norm": 979.0299682617188, "learning_rate": 4.63822980401859e-05, "loss": 75.0273, "step": 64010 }, { "epoch": 0.25864890088357567, "grad_norm": 1404.583740234375, "learning_rate": 4.638048915950022e-05, "loss": 77.1647, "step": 64020 }, { "epoch": 0.2586893021489433, "grad_norm": 1869.6007080078125, "learning_rate": 4.637867986199089e-05, "loss": 57.802, "step": 64030 }, { "epoch": 0.25872970341431095, "grad_norm": 502.42138671875, "learning_rate": 4.6376870147693196e-05, "loss": 62.1255, "step": 64040 }, { "epoch": 0.2587701046796786, "grad_norm": 700.3970947265625, "learning_rate": 4.6375060016642415e-05, "loss": 72.0223, "step": 64050 }, { "epoch": 0.2588105059450462, "grad_norm": 1100.4659423828125, "learning_rate": 4.6373249468873833e-05, "loss": 84.0306, "step": 64060 }, { "epoch": 0.2588509072104138, "grad_norm": 818.85595703125, "learning_rate": 4.637143850442275e-05, "loss": 82.6929, "step": 64070 }, { "epoch": 0.25889130847578146, "grad_norm": 373.8831787109375, "learning_rate": 4.6369627123324465e-05, "loss": 65.1804, "step": 64080 }, { "epoch": 0.2589317097411491, "grad_norm": 932.3283081054688, "learning_rate": 4.6367815325614306e-05, "loss": 72.5139, "step": 64090 }, { "epoch": 0.25897211100651674, "grad_norm": 1563.8875732421875, "learning_rate": 4.636600311132758e-05, "loss": 100.7779, "step": 64100 }, { "epoch": 0.2590125122718844, "grad_norm": 1865.01220703125, "learning_rate": 4.6364190480499624e-05, "loss": 105.2925, "step": 64110 }, { "epoch": 0.25905291353725196, "grad_norm": 947.768798828125, "learning_rate": 4.636237743316578e-05, "loss": 58.8598, "step": 64120 }, { "epoch": 0.2590933148026196, "grad_norm": 663.779052734375, "learning_rate": 4.636056396936138e-05, "loss": 97.7154, "step": 64130 }, { "epoch": 0.25913371606798724, "grad_norm": 814.3782958984375, "learning_rate": 4.6358750089121795e-05, "loss": 72.49, "step": 64140 }, { "epoch": 0.2591741173333549, "grad_norm": 634.1887817382812, "learning_rate": 4.635693579248238e-05, "loss": 64.081, "step": 64150 }, { "epoch": 0.2592145185987225, "grad_norm": 1665.0477294921875, "learning_rate": 4.635512107947851e-05, "loss": 60.826, "step": 64160 }, { "epoch": 0.25925491986409016, "grad_norm": 587.009765625, "learning_rate": 4.635330595014555e-05, "loss": 62.3891, "step": 64170 }, { "epoch": 0.2592953211294578, "grad_norm": 1239.5069580078125, "learning_rate": 4.635149040451891e-05, "loss": 83.9093, "step": 64180 }, { "epoch": 0.2593357223948254, "grad_norm": 743.6141357421875, "learning_rate": 4.634967444263397e-05, "loss": 111.2367, "step": 64190 }, { "epoch": 0.259376123660193, "grad_norm": 848.8331909179688, "learning_rate": 4.6347858064526125e-05, "loss": 56.9608, "step": 64200 }, { "epoch": 0.25941652492556067, "grad_norm": 628.3904418945312, "learning_rate": 4.6346041270230804e-05, "loss": 72.7587, "step": 64210 }, { "epoch": 0.2594569261909283, "grad_norm": 849.9680786132812, "learning_rate": 4.634422405978342e-05, "loss": 47.3868, "step": 64220 }, { "epoch": 0.25949732745629595, "grad_norm": 474.90264892578125, "learning_rate": 4.6342406433219394e-05, "loss": 102.6614, "step": 64230 }, { "epoch": 0.2595377287216636, "grad_norm": 2507.705322265625, "learning_rate": 4.634058839057417e-05, "loss": 70.4811, "step": 64240 }, { "epoch": 0.25957812998703117, "grad_norm": 1063.095458984375, "learning_rate": 4.6338769931883185e-05, "loss": 78.3297, "step": 64250 }, { "epoch": 0.2596185312523988, "grad_norm": 622.4473266601562, "learning_rate": 4.63369510571819e-05, "loss": 77.0825, "step": 64260 }, { "epoch": 0.25965893251776645, "grad_norm": 674.91455078125, "learning_rate": 4.633513176650577e-05, "loss": 60.1044, "step": 64270 }, { "epoch": 0.2596993337831341, "grad_norm": 857.1925659179688, "learning_rate": 4.6333312059890256e-05, "loss": 75.2282, "step": 64280 }, { "epoch": 0.25973973504850173, "grad_norm": 1186.2684326171875, "learning_rate": 4.633149193737084e-05, "loss": 90.6651, "step": 64290 }, { "epoch": 0.25978013631386937, "grad_norm": 503.4192810058594, "learning_rate": 4.632967139898301e-05, "loss": 52.2867, "step": 64300 }, { "epoch": 0.25982053757923695, "grad_norm": 0.0, "learning_rate": 4.632785044476225e-05, "loss": 54.2169, "step": 64310 }, { "epoch": 0.2598609388446046, "grad_norm": 808.451904296875, "learning_rate": 4.6326029074744074e-05, "loss": 89.7694, "step": 64320 }, { "epoch": 0.25990134010997223, "grad_norm": 3421.571533203125, "learning_rate": 4.6324207288963974e-05, "loss": 94.0056, "step": 64330 }, { "epoch": 0.2599417413753399, "grad_norm": 300.9707336425781, "learning_rate": 4.632238508745748e-05, "loss": 72.748, "step": 64340 }, { "epoch": 0.2599821426407075, "grad_norm": 1028.27197265625, "learning_rate": 4.632056247026011e-05, "loss": 66.6853, "step": 64350 }, { "epoch": 0.26002254390607515, "grad_norm": 730.0326538085938, "learning_rate": 4.63187394374074e-05, "loss": 90.4567, "step": 64360 }, { "epoch": 0.2600629451714428, "grad_norm": 859.37890625, "learning_rate": 4.6316915988934884e-05, "loss": 76.2773, "step": 64370 }, { "epoch": 0.2601033464368104, "grad_norm": 956.5285034179688, "learning_rate": 4.631509212487811e-05, "loss": 70.2754, "step": 64380 }, { "epoch": 0.260143747702178, "grad_norm": 627.6688232421875, "learning_rate": 4.6313267845272656e-05, "loss": 61.4442, "step": 64390 }, { "epoch": 0.26018414896754566, "grad_norm": 716.159423828125, "learning_rate": 4.631144315015407e-05, "loss": 72.7093, "step": 64400 }, { "epoch": 0.2602245502329133, "grad_norm": 447.9820861816406, "learning_rate": 4.630961803955792e-05, "loss": 66.9453, "step": 64410 }, { "epoch": 0.26026495149828094, "grad_norm": 588.1676025390625, "learning_rate": 4.63077925135198e-05, "loss": 44.5305, "step": 64420 }, { "epoch": 0.2603053527636486, "grad_norm": 594.3401489257812, "learning_rate": 4.6305966572075286e-05, "loss": 60.6098, "step": 64430 }, { "epoch": 0.26034575402901616, "grad_norm": 1224.4241943359375, "learning_rate": 4.630414021525999e-05, "loss": 95.3115, "step": 64440 }, { "epoch": 0.2603861552943838, "grad_norm": 1307.3175048828125, "learning_rate": 4.6302313443109526e-05, "loss": 59.6623, "step": 64450 }, { "epoch": 0.26042655655975144, "grad_norm": 667.8453979492188, "learning_rate": 4.6300486255659484e-05, "loss": 68.9579, "step": 64460 }, { "epoch": 0.2604669578251191, "grad_norm": 648.3145141601562, "learning_rate": 4.6298658652945494e-05, "loss": 72.2152, "step": 64470 }, { "epoch": 0.2605073590904867, "grad_norm": 1166.9095458984375, "learning_rate": 4.629683063500319e-05, "loss": 56.1489, "step": 64480 }, { "epoch": 0.26054776035585436, "grad_norm": 856.700927734375, "learning_rate": 4.629500220186821e-05, "loss": 92.3313, "step": 64490 }, { "epoch": 0.260588161621222, "grad_norm": 1207.9443359375, "learning_rate": 4.629317335357619e-05, "loss": 62.1099, "step": 64500 }, { "epoch": 0.2606285628865896, "grad_norm": 618.7341918945312, "learning_rate": 4.6291344090162804e-05, "loss": 89.1145, "step": 64510 }, { "epoch": 0.2606689641519572, "grad_norm": 1234.6435546875, "learning_rate": 4.62895144116637e-05, "loss": 81.7492, "step": 64520 }, { "epoch": 0.26070936541732487, "grad_norm": 1199.2274169921875, "learning_rate": 4.628768431811455e-05, "loss": 93.2701, "step": 64530 }, { "epoch": 0.2607497666826925, "grad_norm": 787.503662109375, "learning_rate": 4.6285853809551036e-05, "loss": 67.4107, "step": 64540 }, { "epoch": 0.26079016794806015, "grad_norm": 1185.4876708984375, "learning_rate": 4.6284022886008836e-05, "loss": 137.017, "step": 64550 }, { "epoch": 0.2608305692134278, "grad_norm": 1550.175048828125, "learning_rate": 4.628219154752367e-05, "loss": 105.3924, "step": 64560 }, { "epoch": 0.26087097047879537, "grad_norm": 474.1405334472656, "learning_rate": 4.628035979413121e-05, "loss": 63.6596, "step": 64570 }, { "epoch": 0.260911371744163, "grad_norm": 631.3251953125, "learning_rate": 4.627852762586718e-05, "loss": 88.0774, "step": 64580 }, { "epoch": 0.26095177300953065, "grad_norm": 932.6629638671875, "learning_rate": 4.627669504276731e-05, "loss": 69.9022, "step": 64590 }, { "epoch": 0.2609921742748983, "grad_norm": 462.3409423828125, "learning_rate": 4.6274862044867304e-05, "loss": 105.5615, "step": 64600 }, { "epoch": 0.26103257554026593, "grad_norm": 421.7193908691406, "learning_rate": 4.627302863220291e-05, "loss": 83.5629, "step": 64610 }, { "epoch": 0.26107297680563357, "grad_norm": 849.374267578125, "learning_rate": 4.627119480480987e-05, "loss": 108.5556, "step": 64620 }, { "epoch": 0.26111337807100116, "grad_norm": 851.120849609375, "learning_rate": 4.626936056272394e-05, "loss": 76.4113, "step": 64630 }, { "epoch": 0.2611537793363688, "grad_norm": 587.6618041992188, "learning_rate": 4.626752590598088e-05, "loss": 97.3091, "step": 64640 }, { "epoch": 0.26119418060173644, "grad_norm": 1624.475341796875, "learning_rate": 4.6265690834616446e-05, "loss": 66.4535, "step": 64650 }, { "epoch": 0.2612345818671041, "grad_norm": 194.85707092285156, "learning_rate": 4.626385534866642e-05, "loss": 51.6759, "step": 64660 }, { "epoch": 0.2612749831324717, "grad_norm": 1180.03369140625, "learning_rate": 4.626201944816659e-05, "loss": 51.8923, "step": 64670 }, { "epoch": 0.26131538439783936, "grad_norm": 951.8189697265625, "learning_rate": 4.626018313315275e-05, "loss": 76.8008, "step": 64680 }, { "epoch": 0.261355785663207, "grad_norm": 894.181396484375, "learning_rate": 4.625834640366068e-05, "loss": 93.1069, "step": 64690 }, { "epoch": 0.2613961869285746, "grad_norm": 575.2242431640625, "learning_rate": 4.625650925972622e-05, "loss": 85.9263, "step": 64700 }, { "epoch": 0.2614365881939422, "grad_norm": 925.6211547851562, "learning_rate": 4.6254671701385154e-05, "loss": 69.3511, "step": 64710 }, { "epoch": 0.26147698945930986, "grad_norm": 588.945068359375, "learning_rate": 4.625283372867333e-05, "loss": 36.42, "step": 64720 }, { "epoch": 0.2615173907246775, "grad_norm": 1043.0775146484375, "learning_rate": 4.625099534162656e-05, "loss": 81.0917, "step": 64730 }, { "epoch": 0.26155779199004514, "grad_norm": 1658.7032470703125, "learning_rate": 4.62491565402807e-05, "loss": 62.2384, "step": 64740 }, { "epoch": 0.2615981932554128, "grad_norm": 1211.529052734375, "learning_rate": 4.6247317324671605e-05, "loss": 74.7915, "step": 64750 }, { "epoch": 0.26163859452078037, "grad_norm": 1145.8216552734375, "learning_rate": 4.6245477694835106e-05, "loss": 52.6177, "step": 64760 }, { "epoch": 0.261678995786148, "grad_norm": 758.6449584960938, "learning_rate": 4.6243637650807086e-05, "loss": 85.0178, "step": 64770 }, { "epoch": 0.26171939705151565, "grad_norm": 427.8953857421875, "learning_rate": 4.624179719262342e-05, "loss": 76.1282, "step": 64780 }, { "epoch": 0.2617597983168833, "grad_norm": 750.1713256835938, "learning_rate": 4.623995632031997e-05, "loss": 74.4636, "step": 64790 }, { "epoch": 0.2618001995822509, "grad_norm": 421.75958251953125, "learning_rate": 4.6238115033932636e-05, "loss": 56.7678, "step": 64800 }, { "epoch": 0.26184060084761857, "grad_norm": 1215.94482421875, "learning_rate": 4.623627333349732e-05, "loss": 72.2328, "step": 64810 }, { "epoch": 0.2618810021129862, "grad_norm": 0.0, "learning_rate": 4.623443121904992e-05, "loss": 68.4072, "step": 64820 }, { "epoch": 0.2619214033783538, "grad_norm": 739.1293334960938, "learning_rate": 4.623258869062636e-05, "loss": 58.8318, "step": 64830 }, { "epoch": 0.26196180464372143, "grad_norm": 760.8939819335938, "learning_rate": 4.623074574826254e-05, "loss": 85.5972, "step": 64840 }, { "epoch": 0.26200220590908907, "grad_norm": 902.6166381835938, "learning_rate": 4.622890239199441e-05, "loss": 77.9647, "step": 64850 }, { "epoch": 0.2620426071744567, "grad_norm": 504.2374572753906, "learning_rate": 4.622705862185789e-05, "loss": 61.3637, "step": 64860 }, { "epoch": 0.26208300843982435, "grad_norm": 550.212890625, "learning_rate": 4.622521443788894e-05, "loss": 58.025, "step": 64870 }, { "epoch": 0.262123409705192, "grad_norm": 1491.5367431640625, "learning_rate": 4.622336984012351e-05, "loss": 119.6012, "step": 64880 }, { "epoch": 0.2621638109705596, "grad_norm": 1074.4013671875, "learning_rate": 4.622152482859755e-05, "loss": 53.4368, "step": 64890 }, { "epoch": 0.2622042122359272, "grad_norm": 1384.9111328125, "learning_rate": 4.621967940334705e-05, "loss": 73.9773, "step": 64900 }, { "epoch": 0.26224461350129485, "grad_norm": 594.1865234375, "learning_rate": 4.621783356440796e-05, "loss": 59.9528, "step": 64910 }, { "epoch": 0.2622850147666625, "grad_norm": 1251.0433349609375, "learning_rate": 4.621598731181629e-05, "loss": 69.5674, "step": 64920 }, { "epoch": 0.26232541603203013, "grad_norm": 799.28564453125, "learning_rate": 4.621414064560803e-05, "loss": 90.669, "step": 64930 }, { "epoch": 0.2623658172973978, "grad_norm": 766.1452026367188, "learning_rate": 4.6212293565819166e-05, "loss": 65.2333, "step": 64940 }, { "epoch": 0.26240621856276536, "grad_norm": 2778.8935546875, "learning_rate": 4.6210446072485725e-05, "loss": 89.6359, "step": 64950 }, { "epoch": 0.262446619828133, "grad_norm": 1777.0819091796875, "learning_rate": 4.6208598165643715e-05, "loss": 106.0751, "step": 64960 }, { "epoch": 0.26248702109350064, "grad_norm": 607.8473510742188, "learning_rate": 4.6206749845329164e-05, "loss": 65.859, "step": 64970 }, { "epoch": 0.2625274223588683, "grad_norm": 640.949951171875, "learning_rate": 4.62049011115781e-05, "loss": 93.8094, "step": 64980 }, { "epoch": 0.2625678236242359, "grad_norm": 449.35650634765625, "learning_rate": 4.620305196442659e-05, "loss": 52.2666, "step": 64990 }, { "epoch": 0.26260822488960356, "grad_norm": 1173.4410400390625, "learning_rate": 4.620120240391065e-05, "loss": 93.0513, "step": 65000 }, { "epoch": 0.2626486261549712, "grad_norm": 489.27716064453125, "learning_rate": 4.619935243006636e-05, "loss": 64.3064, "step": 65010 }, { "epoch": 0.2626890274203388, "grad_norm": 265.78076171875, "learning_rate": 4.619750204292978e-05, "loss": 80.1383, "step": 65020 }, { "epoch": 0.2627294286857064, "grad_norm": 581.3447265625, "learning_rate": 4.619565124253698e-05, "loss": 41.3353, "step": 65030 }, { "epoch": 0.26276982995107406, "grad_norm": 3157.60595703125, "learning_rate": 4.619380002892406e-05, "loss": 86.7737, "step": 65040 }, { "epoch": 0.2628102312164417, "grad_norm": 679.6066284179688, "learning_rate": 4.619194840212708e-05, "loss": 73.4557, "step": 65050 }, { "epoch": 0.26285063248180934, "grad_norm": 352.4366760253906, "learning_rate": 4.6190096362182167e-05, "loss": 84.3029, "step": 65060 }, { "epoch": 0.262891033747177, "grad_norm": 1345.6533203125, "learning_rate": 4.618824390912541e-05, "loss": 76.0789, "step": 65070 }, { "epoch": 0.26293143501254457, "grad_norm": 606.6847534179688, "learning_rate": 4.618639104299294e-05, "loss": 84.0097, "step": 65080 }, { "epoch": 0.2629718362779122, "grad_norm": 777.6514892578125, "learning_rate": 4.618453776382086e-05, "loss": 54.7092, "step": 65090 }, { "epoch": 0.26301223754327985, "grad_norm": 686.7308959960938, "learning_rate": 4.61826840716453e-05, "loss": 61.8434, "step": 65100 }, { "epoch": 0.2630526388086475, "grad_norm": 538.2279052734375, "learning_rate": 4.618082996650243e-05, "loss": 49.7606, "step": 65110 }, { "epoch": 0.2630930400740151, "grad_norm": 537.077392578125, "learning_rate": 4.617897544842836e-05, "loss": 66.0555, "step": 65120 }, { "epoch": 0.26313344133938277, "grad_norm": 676.6751098632812, "learning_rate": 4.617712051745927e-05, "loss": 64.1107, "step": 65130 }, { "epoch": 0.2631738426047504, "grad_norm": 587.270263671875, "learning_rate": 4.61752651736313e-05, "loss": 63.3736, "step": 65140 }, { "epoch": 0.263214243870118, "grad_norm": 1263.456787109375, "learning_rate": 4.617340941698064e-05, "loss": 59.6258, "step": 65150 }, { "epoch": 0.26325464513548563, "grad_norm": 427.99737548828125, "learning_rate": 4.617155324754346e-05, "loss": 52.6464, "step": 65160 }, { "epoch": 0.26329504640085327, "grad_norm": 0.0, "learning_rate": 4.616969666535596e-05, "loss": 73.0998, "step": 65170 }, { "epoch": 0.2633354476662209, "grad_norm": 617.71337890625, "learning_rate": 4.6167839670454315e-05, "loss": 44.5023, "step": 65180 }, { "epoch": 0.26337584893158855, "grad_norm": 517.5345458984375, "learning_rate": 4.616598226287474e-05, "loss": 95.0356, "step": 65190 }, { "epoch": 0.2634162501969562, "grad_norm": 1026.675537109375, "learning_rate": 4.616412444265345e-05, "loss": 67.167, "step": 65200 }, { "epoch": 0.2634566514623238, "grad_norm": 1128.9427490234375, "learning_rate": 4.616226620982665e-05, "loss": 89.6982, "step": 65210 }, { "epoch": 0.2634970527276914, "grad_norm": 809.5255737304688, "learning_rate": 4.6160407564430574e-05, "loss": 65.6736, "step": 65220 }, { "epoch": 0.26353745399305906, "grad_norm": 1231.8623046875, "learning_rate": 4.6158548506501464e-05, "loss": 86.9045, "step": 65230 }, { "epoch": 0.2635778552584267, "grad_norm": 1171.7540283203125, "learning_rate": 4.6156689036075555e-05, "loss": 77.3384, "step": 65240 }, { "epoch": 0.26361825652379434, "grad_norm": 619.1737060546875, "learning_rate": 4.615482915318911e-05, "loss": 65.3949, "step": 65250 }, { "epoch": 0.263658657789162, "grad_norm": 518.4265747070312, "learning_rate": 4.6152968857878366e-05, "loss": 55.1953, "step": 65260 }, { "epoch": 0.26369905905452956, "grad_norm": 1298.9517822265625, "learning_rate": 4.615110815017961e-05, "loss": 56.1695, "step": 65270 }, { "epoch": 0.2637394603198972, "grad_norm": 452.3489074707031, "learning_rate": 4.614924703012911e-05, "loss": 58.1403, "step": 65280 }, { "epoch": 0.26377986158526484, "grad_norm": 809.9728393554688, "learning_rate": 4.614738549776315e-05, "loss": 49.3975, "step": 65290 }, { "epoch": 0.2638202628506325, "grad_norm": 703.8427124023438, "learning_rate": 4.614552355311802e-05, "loss": 54.3999, "step": 65300 }, { "epoch": 0.2638606641160001, "grad_norm": 518.8963623046875, "learning_rate": 4.6143661196230026e-05, "loss": 59.6565, "step": 65310 }, { "epoch": 0.26390106538136776, "grad_norm": 780.0380249023438, "learning_rate": 4.614179842713547e-05, "loss": 85.9944, "step": 65320 }, { "epoch": 0.2639414666467354, "grad_norm": 855.755126953125, "learning_rate": 4.613993524587067e-05, "loss": 57.039, "step": 65330 }, { "epoch": 0.263981867912103, "grad_norm": 722.5667114257812, "learning_rate": 4.613807165247195e-05, "loss": 77.862, "step": 65340 }, { "epoch": 0.2640222691774706, "grad_norm": 833.8828125, "learning_rate": 4.613620764697564e-05, "loss": 70.7339, "step": 65350 }, { "epoch": 0.26406267044283827, "grad_norm": 827.6605224609375, "learning_rate": 4.6134343229418075e-05, "loss": 91.2179, "step": 65360 }, { "epoch": 0.2641030717082059, "grad_norm": 398.13287353515625, "learning_rate": 4.613247839983561e-05, "loss": 85.9632, "step": 65370 }, { "epoch": 0.26414347297357355, "grad_norm": 661.8615112304688, "learning_rate": 4.613061315826461e-05, "loss": 89.6971, "step": 65380 }, { "epoch": 0.2641838742389412, "grad_norm": 425.14483642578125, "learning_rate": 4.612874750474142e-05, "loss": 46.4946, "step": 65390 }, { "epoch": 0.26422427550430877, "grad_norm": 709.1620483398438, "learning_rate": 4.612688143930242e-05, "loss": 97.9462, "step": 65400 }, { "epoch": 0.2642646767696764, "grad_norm": 888.79248046875, "learning_rate": 4.612501496198398e-05, "loss": 94.5246, "step": 65410 }, { "epoch": 0.26430507803504405, "grad_norm": 629.85791015625, "learning_rate": 4.612314807282251e-05, "loss": 47.7306, "step": 65420 }, { "epoch": 0.2643454793004117, "grad_norm": 76.55754089355469, "learning_rate": 4.612128077185439e-05, "loss": 79.3505, "step": 65430 }, { "epoch": 0.26438588056577933, "grad_norm": 1617.7518310546875, "learning_rate": 4.611941305911602e-05, "loss": 100.8537, "step": 65440 }, { "epoch": 0.26442628183114697, "grad_norm": 653.6577758789062, "learning_rate": 4.611754493464383e-05, "loss": 61.9429, "step": 65450 }, { "epoch": 0.2644666830965146, "grad_norm": 1721.381103515625, "learning_rate": 4.611567639847422e-05, "loss": 72.449, "step": 65460 }, { "epoch": 0.2645070843618822, "grad_norm": 2042.4002685546875, "learning_rate": 4.611380745064363e-05, "loss": 66.475, "step": 65470 }, { "epoch": 0.26454748562724983, "grad_norm": 603.5230712890625, "learning_rate": 4.61119380911885e-05, "loss": 81.908, "step": 65480 }, { "epoch": 0.2645878868926175, "grad_norm": 507.7789001464844, "learning_rate": 4.611006832014526e-05, "loss": 57.1852, "step": 65490 }, { "epoch": 0.2646282881579851, "grad_norm": 646.4906005859375, "learning_rate": 4.610819813755038e-05, "loss": 48.6094, "step": 65500 }, { "epoch": 0.26466868942335275, "grad_norm": 783.2797241210938, "learning_rate": 4.61063275434403e-05, "loss": 67.8832, "step": 65510 }, { "epoch": 0.2647090906887204, "grad_norm": 698.7806396484375, "learning_rate": 4.610445653785151e-05, "loss": 53.7803, "step": 65520 }, { "epoch": 0.264749491954088, "grad_norm": 563.870361328125, "learning_rate": 4.610258512082046e-05, "loss": 54.008, "step": 65530 }, { "epoch": 0.2647898932194556, "grad_norm": 734.525146484375, "learning_rate": 4.610071329238366e-05, "loss": 59.3586, "step": 65540 }, { "epoch": 0.26483029448482326, "grad_norm": 568.80419921875, "learning_rate": 4.6098841052577583e-05, "loss": 52.5479, "step": 65550 }, { "epoch": 0.2648706957501909, "grad_norm": 473.8629150390625, "learning_rate": 4.6096968401438745e-05, "loss": 88.4755, "step": 65560 }, { "epoch": 0.26491109701555854, "grad_norm": 804.3765258789062, "learning_rate": 4.609509533900364e-05, "loss": 82.4179, "step": 65570 }, { "epoch": 0.2649514982809262, "grad_norm": 556.0540161132812, "learning_rate": 4.6093221865308786e-05, "loss": 108.8011, "step": 65580 }, { "epoch": 0.26499189954629376, "grad_norm": 761.5044555664062, "learning_rate": 4.609134798039073e-05, "loss": 61.9983, "step": 65590 }, { "epoch": 0.2650323008116614, "grad_norm": 1213.9462890625, "learning_rate": 4.6089473684285974e-05, "loss": 93.0749, "step": 65600 }, { "epoch": 0.26507270207702904, "grad_norm": 691.1778564453125, "learning_rate": 4.608759897703107e-05, "loss": 55.7611, "step": 65610 }, { "epoch": 0.2651131033423967, "grad_norm": 1190.127685546875, "learning_rate": 4.608572385866257e-05, "loss": 56.0981, "step": 65620 }, { "epoch": 0.2651535046077643, "grad_norm": 1123.38916015625, "learning_rate": 4.6083848329217026e-05, "loss": 88.4509, "step": 65630 }, { "epoch": 0.26519390587313196, "grad_norm": 358.7076110839844, "learning_rate": 4.608197238873101e-05, "loss": 70.7137, "step": 65640 }, { "epoch": 0.2652343071384996, "grad_norm": 1214.915283203125, "learning_rate": 4.6080096037241086e-05, "loss": 89.7655, "step": 65650 }, { "epoch": 0.2652747084038672, "grad_norm": 890.3026123046875, "learning_rate": 4.607821927478383e-05, "loss": 86.5065, "step": 65660 }, { "epoch": 0.2653151096692348, "grad_norm": 835.5069580078125, "learning_rate": 4.607634210139584e-05, "loss": 84.1165, "step": 65670 }, { "epoch": 0.26535551093460247, "grad_norm": 350.78350830078125, "learning_rate": 4.607446451711372e-05, "loss": 67.6091, "step": 65680 }, { "epoch": 0.2653959121999701, "grad_norm": 1582.5438232421875, "learning_rate": 4.607258652197406e-05, "loss": 86.531, "step": 65690 }, { "epoch": 0.26543631346533775, "grad_norm": 1221.990234375, "learning_rate": 4.6070708116013476e-05, "loss": 70.8549, "step": 65700 }, { "epoch": 0.2654767147307054, "grad_norm": 931.9036865234375, "learning_rate": 4.606882929926858e-05, "loss": 47.7265, "step": 65710 }, { "epoch": 0.26551711599607297, "grad_norm": 787.3499755859375, "learning_rate": 4.6066950071776015e-05, "loss": 82.7044, "step": 65720 }, { "epoch": 0.2655575172614406, "grad_norm": 3718.5693359375, "learning_rate": 4.606507043357242e-05, "loss": 120.9925, "step": 65730 }, { "epoch": 0.26559791852680825, "grad_norm": 836.9452514648438, "learning_rate": 4.606319038469443e-05, "loss": 59.5213, "step": 65740 }, { "epoch": 0.2656383197921759, "grad_norm": 694.1288452148438, "learning_rate": 4.606130992517869e-05, "loss": 70.1149, "step": 65750 }, { "epoch": 0.26567872105754353, "grad_norm": 1005.6154174804688, "learning_rate": 4.605942905506188e-05, "loss": 49.102, "step": 65760 }, { "epoch": 0.26571912232291117, "grad_norm": 684.3242797851562, "learning_rate": 4.605754777438065e-05, "loss": 79.9185, "step": 65770 }, { "epoch": 0.2657595235882788, "grad_norm": 1068.396240234375, "learning_rate": 4.605566608317169e-05, "loss": 109.3487, "step": 65780 }, { "epoch": 0.2657999248536464, "grad_norm": 2943.5751953125, "learning_rate": 4.6053783981471675e-05, "loss": 78.831, "step": 65790 }, { "epoch": 0.26584032611901404, "grad_norm": 386.4954833984375, "learning_rate": 4.605190146931731e-05, "loss": 91.7346, "step": 65800 }, { "epoch": 0.2658807273843817, "grad_norm": 1254.3067626953125, "learning_rate": 4.605001854674529e-05, "loss": 88.2726, "step": 65810 }, { "epoch": 0.2659211286497493, "grad_norm": 553.072509765625, "learning_rate": 4.604813521379231e-05, "loss": 71.1216, "step": 65820 }, { "epoch": 0.26596152991511696, "grad_norm": 780.3787841796875, "learning_rate": 4.60462514704951e-05, "loss": 71.7296, "step": 65830 }, { "epoch": 0.2660019311804846, "grad_norm": 1093.623046875, "learning_rate": 4.6044367316890386e-05, "loss": 53.4292, "step": 65840 }, { "epoch": 0.2660423324458522, "grad_norm": 986.8058471679688, "learning_rate": 4.6042482753014895e-05, "loss": 70.1599, "step": 65850 }, { "epoch": 0.2660827337112198, "grad_norm": 614.8383178710938, "learning_rate": 4.604059777890537e-05, "loss": 68.2766, "step": 65860 }, { "epoch": 0.26612313497658746, "grad_norm": 608.2391357421875, "learning_rate": 4.603871239459856e-05, "loss": 51.4506, "step": 65870 }, { "epoch": 0.2661635362419551, "grad_norm": 1213.140869140625, "learning_rate": 4.6036826600131216e-05, "loss": 66.3176, "step": 65880 }, { "epoch": 0.26620393750732274, "grad_norm": 1492.8607177734375, "learning_rate": 4.603494039554011e-05, "loss": 83.3855, "step": 65890 }, { "epoch": 0.2662443387726904, "grad_norm": 2795.365478515625, "learning_rate": 4.603305378086201e-05, "loss": 86.1449, "step": 65900 }, { "epoch": 0.26628474003805797, "grad_norm": 429.2622985839844, "learning_rate": 4.60311667561337e-05, "loss": 93.9114, "step": 65910 }, { "epoch": 0.2663251413034256, "grad_norm": 1739.33544921875, "learning_rate": 4.602927932139197e-05, "loss": 72.7662, "step": 65920 }, { "epoch": 0.26636554256879325, "grad_norm": 850.324951171875, "learning_rate": 4.6027391476673606e-05, "loss": 87.2089, "step": 65930 }, { "epoch": 0.2664059438341609, "grad_norm": 248.5250244140625, "learning_rate": 4.602550322201542e-05, "loss": 73.9269, "step": 65940 }, { "epoch": 0.2664463450995285, "grad_norm": 414.97100830078125, "learning_rate": 4.602361455745423e-05, "loss": 38.258, "step": 65950 }, { "epoch": 0.26648674636489617, "grad_norm": 821.9193725585938, "learning_rate": 4.602172548302684e-05, "loss": 51.6796, "step": 65960 }, { "epoch": 0.2665271476302638, "grad_norm": 972.9696044921875, "learning_rate": 4.60198359987701e-05, "loss": 75.2604, "step": 65970 }, { "epoch": 0.2665675488956314, "grad_norm": 589.0814819335938, "learning_rate": 4.6017946104720836e-05, "loss": 86.6934, "step": 65980 }, { "epoch": 0.26660795016099903, "grad_norm": 812.1279296875, "learning_rate": 4.6016055800915884e-05, "loss": 77.9313, "step": 65990 }, { "epoch": 0.26664835142636667, "grad_norm": 456.3207702636719, "learning_rate": 4.601416508739211e-05, "loss": 49.5027, "step": 66000 }, { "epoch": 0.2666887526917343, "grad_norm": 1623.6546630859375, "learning_rate": 4.6012273964186365e-05, "loss": 64.7836, "step": 66010 }, { "epoch": 0.26672915395710195, "grad_norm": 779.8026733398438, "learning_rate": 4.601038243133552e-05, "loss": 108.0493, "step": 66020 }, { "epoch": 0.2667695552224696, "grad_norm": 685.0618286132812, "learning_rate": 4.600849048887646e-05, "loss": 79.6456, "step": 66030 }, { "epoch": 0.2668099564878372, "grad_norm": 1468.7335205078125, "learning_rate": 4.6006598136846056e-05, "loss": 63.8021, "step": 66040 }, { "epoch": 0.2668503577532048, "grad_norm": 932.735107421875, "learning_rate": 4.600470537528121e-05, "loss": 51.6305, "step": 66050 }, { "epoch": 0.26689075901857245, "grad_norm": 815.7344970703125, "learning_rate": 4.6002812204218816e-05, "loss": 66.5829, "step": 66060 }, { "epoch": 0.2669311602839401, "grad_norm": 606.532958984375, "learning_rate": 4.600091862369579e-05, "loss": 77.9135, "step": 66070 }, { "epoch": 0.26697156154930773, "grad_norm": 760.3806762695312, "learning_rate": 4.599902463374903e-05, "loss": 64.1426, "step": 66080 }, { "epoch": 0.2670119628146754, "grad_norm": 1549.1610107421875, "learning_rate": 4.599713023441549e-05, "loss": 67.7165, "step": 66090 }, { "epoch": 0.267052364080043, "grad_norm": 500.93182373046875, "learning_rate": 4.599523542573207e-05, "loss": 65.0257, "step": 66100 }, { "epoch": 0.2670927653454106, "grad_norm": 301.22589111328125, "learning_rate": 4.599334020773574e-05, "loss": 69.3244, "step": 66110 }, { "epoch": 0.26713316661077824, "grad_norm": 374.9650573730469, "learning_rate": 4.599144458046343e-05, "loss": 57.7532, "step": 66120 }, { "epoch": 0.2671735678761459, "grad_norm": 521.96337890625, "learning_rate": 4.59895485439521e-05, "loss": 80.4386, "step": 66130 }, { "epoch": 0.2672139691415135, "grad_norm": 1903.363525390625, "learning_rate": 4.5987652098238714e-05, "loss": 68.9257, "step": 66140 }, { "epoch": 0.26725437040688116, "grad_norm": 561.215087890625, "learning_rate": 4.598575524336025e-05, "loss": 56.7605, "step": 66150 }, { "epoch": 0.2672947716722488, "grad_norm": 502.40240478515625, "learning_rate": 4.598385797935368e-05, "loss": 61.1385, "step": 66160 }, { "epoch": 0.2673351729376164, "grad_norm": 948.8161010742188, "learning_rate": 4.5981960306255996e-05, "loss": 71.2674, "step": 66170 }, { "epoch": 0.267375574202984, "grad_norm": 683.00244140625, "learning_rate": 4.598006222410419e-05, "loss": 56.7152, "step": 66180 }, { "epoch": 0.26741597546835166, "grad_norm": 916.79833984375, "learning_rate": 4.597816373293528e-05, "loss": 75.4204, "step": 66190 }, { "epoch": 0.2674563767337193, "grad_norm": 406.5462341308594, "learning_rate": 4.597626483278625e-05, "loss": 63.3937, "step": 66200 }, { "epoch": 0.26749677799908694, "grad_norm": 953.0973510742188, "learning_rate": 4.5974365523694155e-05, "loss": 55.9855, "step": 66210 }, { "epoch": 0.2675371792644546, "grad_norm": 664.46923828125, "learning_rate": 4.5972465805695996e-05, "loss": 100.492, "step": 66220 }, { "epoch": 0.26757758052982217, "grad_norm": 810.0478515625, "learning_rate": 4.597056567882883e-05, "loss": 68.5419, "step": 66230 }, { "epoch": 0.2676179817951898, "grad_norm": 821.2228393554688, "learning_rate": 4.596866514312967e-05, "loss": 71.8012, "step": 66240 }, { "epoch": 0.26765838306055745, "grad_norm": 1174.8955078125, "learning_rate": 4.5966764198635606e-05, "loss": 92.9574, "step": 66250 }, { "epoch": 0.2676987843259251, "grad_norm": 1086.9854736328125, "learning_rate": 4.596486284538367e-05, "loss": 57.3918, "step": 66260 }, { "epoch": 0.2677391855912927, "grad_norm": 690.481689453125, "learning_rate": 4.5962961083410946e-05, "loss": 71.6719, "step": 66270 }, { "epoch": 0.26777958685666037, "grad_norm": 1423.809814453125, "learning_rate": 4.596105891275449e-05, "loss": 66.3168, "step": 66280 }, { "epoch": 0.267819988122028, "grad_norm": 1662.720947265625, "learning_rate": 4.595915633345141e-05, "loss": 94.1194, "step": 66290 }, { "epoch": 0.2678603893873956, "grad_norm": 687.7420654296875, "learning_rate": 4.595725334553879e-05, "loss": 125.3564, "step": 66300 }, { "epoch": 0.26790079065276323, "grad_norm": 580.8500366210938, "learning_rate": 4.595534994905372e-05, "loss": 45.3163, "step": 66310 }, { "epoch": 0.26794119191813087, "grad_norm": 0.0, "learning_rate": 4.5953446144033316e-05, "loss": 59.1748, "step": 66320 }, { "epoch": 0.2679815931834985, "grad_norm": 292.1687927246094, "learning_rate": 4.595154193051469e-05, "loss": 62.6477, "step": 66330 }, { "epoch": 0.26802199444886615, "grad_norm": 379.4153747558594, "learning_rate": 4.594963730853497e-05, "loss": 90.7049, "step": 66340 }, { "epoch": 0.2680623957142338, "grad_norm": 2087.564208984375, "learning_rate": 4.5947732278131286e-05, "loss": 75.824, "step": 66350 }, { "epoch": 0.2681027969796014, "grad_norm": 1189.827880859375, "learning_rate": 4.594582683934078e-05, "loss": 92.8753, "step": 66360 }, { "epoch": 0.268143198244969, "grad_norm": 524.31005859375, "learning_rate": 4.5943920992200585e-05, "loss": 57.8241, "step": 66370 }, { "epoch": 0.26818359951033666, "grad_norm": 1099.4833984375, "learning_rate": 4.5942014736747875e-05, "loss": 65.1616, "step": 66380 }, { "epoch": 0.2682240007757043, "grad_norm": 1009.6073608398438, "learning_rate": 4.59401080730198e-05, "loss": 68.1652, "step": 66390 }, { "epoch": 0.26826440204107194, "grad_norm": 4793.3828125, "learning_rate": 4.593820100105355e-05, "loss": 111.5321, "step": 66400 }, { "epoch": 0.2683048033064396, "grad_norm": 940.0222778320312, "learning_rate": 4.5936293520886275e-05, "loss": 83.8232, "step": 66410 }, { "epoch": 0.2683452045718072, "grad_norm": 1047.490478515625, "learning_rate": 4.59343856325552e-05, "loss": 58.4591, "step": 66420 }, { "epoch": 0.2683856058371748, "grad_norm": 874.4302978515625, "learning_rate": 4.593247733609748e-05, "loss": 104.9535, "step": 66430 }, { "epoch": 0.26842600710254244, "grad_norm": 742.1166381835938, "learning_rate": 4.593056863155034e-05, "loss": 84.062, "step": 66440 }, { "epoch": 0.2684664083679101, "grad_norm": 313.023681640625, "learning_rate": 4.5928659518951e-05, "loss": 44.992, "step": 66450 }, { "epoch": 0.2685068096332777, "grad_norm": 1163.264892578125, "learning_rate": 4.592674999833666e-05, "loss": 71.2603, "step": 66460 }, { "epoch": 0.26854721089864536, "grad_norm": 1168.4986572265625, "learning_rate": 4.592484006974456e-05, "loss": 46.842, "step": 66470 }, { "epoch": 0.268587612164013, "grad_norm": 2281.073974609375, "learning_rate": 4.5922929733211926e-05, "loss": 54.4491, "step": 66480 }, { "epoch": 0.2686280134293806, "grad_norm": 656.7672729492188, "learning_rate": 4.5921018988776e-05, "loss": 88.2362, "step": 66490 }, { "epoch": 0.2686684146947482, "grad_norm": 787.104736328125, "learning_rate": 4.591910783647404e-05, "loss": 102.6616, "step": 66500 }, { "epoch": 0.26870881596011587, "grad_norm": 602.447265625, "learning_rate": 4.591719627634331e-05, "loss": 70.4162, "step": 66510 }, { "epoch": 0.2687492172254835, "grad_norm": 607.5802612304688, "learning_rate": 4.591528430842107e-05, "loss": 67.1125, "step": 66520 }, { "epoch": 0.26878961849085115, "grad_norm": 1654.757080078125, "learning_rate": 4.5913371932744584e-05, "loss": 82.985, "step": 66530 }, { "epoch": 0.2688300197562188, "grad_norm": 711.9852905273438, "learning_rate": 4.591145914935116e-05, "loss": 66.6757, "step": 66540 }, { "epoch": 0.26887042102158637, "grad_norm": 956.3167114257812, "learning_rate": 4.590954595827806e-05, "loss": 78.8319, "step": 66550 }, { "epoch": 0.268910822286954, "grad_norm": 1775.7945556640625, "learning_rate": 4.59076323595626e-05, "loss": 88.6002, "step": 66560 }, { "epoch": 0.26895122355232165, "grad_norm": 776.41015625, "learning_rate": 4.5905718353242086e-05, "loss": 70.2173, "step": 66570 }, { "epoch": 0.2689916248176893, "grad_norm": 934.8427124023438, "learning_rate": 4.590380393935383e-05, "loss": 71.2888, "step": 66580 }, { "epoch": 0.26903202608305693, "grad_norm": 1015.8541259765625, "learning_rate": 4.5901889117935153e-05, "loss": 94.4522, "step": 66590 }, { "epoch": 0.26907242734842457, "grad_norm": 0.0, "learning_rate": 4.589997388902338e-05, "loss": 66.415, "step": 66600 }, { "epoch": 0.2691128286137922, "grad_norm": 1408.8917236328125, "learning_rate": 4.589805825265587e-05, "loss": 83.8427, "step": 66610 }, { "epoch": 0.2691532298791598, "grad_norm": 654.0909423828125, "learning_rate": 4.5896142208869954e-05, "loss": 59.3255, "step": 66620 }, { "epoch": 0.26919363114452743, "grad_norm": 644.368896484375, "learning_rate": 4.589422575770298e-05, "loss": 51.3406, "step": 66630 }, { "epoch": 0.2692340324098951, "grad_norm": 2795.22998046875, "learning_rate": 4.589230889919232e-05, "loss": 64.9199, "step": 66640 }, { "epoch": 0.2692744336752627, "grad_norm": 940.733642578125, "learning_rate": 4.589039163337534e-05, "loss": 102.8163, "step": 66650 }, { "epoch": 0.26931483494063035, "grad_norm": 847.7637939453125, "learning_rate": 4.588847396028942e-05, "loss": 77.151, "step": 66660 }, { "epoch": 0.269355236205998, "grad_norm": 710.346435546875, "learning_rate": 4.588655587997195e-05, "loss": 57.7109, "step": 66670 }, { "epoch": 0.2693956374713656, "grad_norm": 607.9664306640625, "learning_rate": 4.5884637392460314e-05, "loss": 71.083, "step": 66680 }, { "epoch": 0.2694360387367332, "grad_norm": 615.1112060546875, "learning_rate": 4.588271849779192e-05, "loss": 63.7689, "step": 66690 }, { "epoch": 0.26947644000210086, "grad_norm": 664.7433471679688, "learning_rate": 4.588079919600419e-05, "loss": 80.6283, "step": 66700 }, { "epoch": 0.2695168412674685, "grad_norm": 407.6193542480469, "learning_rate": 4.587887948713452e-05, "loss": 49.1637, "step": 66710 }, { "epoch": 0.26955724253283614, "grad_norm": 785.1792602539062, "learning_rate": 4.5876959371220344e-05, "loss": 64.5263, "step": 66720 }, { "epoch": 0.2695976437982038, "grad_norm": 565.7954711914062, "learning_rate": 4.587503884829909e-05, "loss": 86.9049, "step": 66730 }, { "epoch": 0.2696380450635714, "grad_norm": 1209.4056396484375, "learning_rate": 4.587311791840822e-05, "loss": 103.0452, "step": 66740 }, { "epoch": 0.269678446328939, "grad_norm": 384.4944152832031, "learning_rate": 4.5871196581585166e-05, "loss": 46.5085, "step": 66750 }, { "epoch": 0.26971884759430664, "grad_norm": 818.3886108398438, "learning_rate": 4.5869274837867394e-05, "loss": 100.6729, "step": 66760 }, { "epoch": 0.2697592488596743, "grad_norm": 964.607421875, "learning_rate": 4.5867352687292355e-05, "loss": 64.0361, "step": 66770 }, { "epoch": 0.2697996501250419, "grad_norm": 641.611328125, "learning_rate": 4.5865430129897536e-05, "loss": 44.5035, "step": 66780 }, { "epoch": 0.26984005139040956, "grad_norm": 822.2906494140625, "learning_rate": 4.5863507165720415e-05, "loss": 63.5622, "step": 66790 }, { "epoch": 0.2698804526557772, "grad_norm": 370.7065124511719, "learning_rate": 4.586158379479848e-05, "loss": 57.749, "step": 66800 }, { "epoch": 0.2699208539211448, "grad_norm": 565.5477905273438, "learning_rate": 4.5859660017169224e-05, "loss": 80.5675, "step": 66810 }, { "epoch": 0.2699612551865124, "grad_norm": 869.1512451171875, "learning_rate": 4.5857735832870166e-05, "loss": 76.6965, "step": 66820 }, { "epoch": 0.27000165645188007, "grad_norm": 390.1742858886719, "learning_rate": 4.5855811241938806e-05, "loss": 87.9975, "step": 66830 }, { "epoch": 0.2700420577172477, "grad_norm": 815.9105834960938, "learning_rate": 4.585388624441267e-05, "loss": 60.8064, "step": 66840 }, { "epoch": 0.27008245898261535, "grad_norm": 777.6511840820312, "learning_rate": 4.585196084032928e-05, "loss": 50.49, "step": 66850 }, { "epoch": 0.270122860247983, "grad_norm": 975.7098999023438, "learning_rate": 4.585003502972618e-05, "loss": 82.6168, "step": 66860 }, { "epoch": 0.27016326151335057, "grad_norm": 773.058349609375, "learning_rate": 4.584810881264092e-05, "loss": 58.9796, "step": 66870 }, { "epoch": 0.2702036627787182, "grad_norm": 342.78265380859375, "learning_rate": 4.5846182189111035e-05, "loss": 77.3388, "step": 66880 }, { "epoch": 0.27024406404408585, "grad_norm": 759.47314453125, "learning_rate": 4.584425515917411e-05, "loss": 53.6399, "step": 66890 }, { "epoch": 0.2702844653094535, "grad_norm": 392.1265869140625, "learning_rate": 4.584232772286768e-05, "loss": 48.1632, "step": 66900 }, { "epoch": 0.27032486657482113, "grad_norm": 704.0643920898438, "learning_rate": 4.5840399880229354e-05, "loss": 73.1397, "step": 66910 }, { "epoch": 0.27036526784018877, "grad_norm": 828.9876098632812, "learning_rate": 4.58384716312967e-05, "loss": 60.1899, "step": 66920 }, { "epoch": 0.2704056691055564, "grad_norm": 529.6195678710938, "learning_rate": 4.583654297610731e-05, "loss": 59.9641, "step": 66930 }, { "epoch": 0.270446070370924, "grad_norm": 733.8816528320312, "learning_rate": 4.583461391469879e-05, "loss": 90.0057, "step": 66940 }, { "epoch": 0.27048647163629164, "grad_norm": 1088.7060546875, "learning_rate": 4.583268444710875e-05, "loss": 103.6154, "step": 66950 }, { "epoch": 0.2705268729016593, "grad_norm": 952.4298095703125, "learning_rate": 4.583075457337479e-05, "loss": 78.6486, "step": 66960 }, { "epoch": 0.2705672741670269, "grad_norm": 490.1788024902344, "learning_rate": 4.5828824293534555e-05, "loss": 64.2271, "step": 66970 }, { "epoch": 0.27060767543239456, "grad_norm": 1667.7532958984375, "learning_rate": 4.5826893607625665e-05, "loss": 64.5124, "step": 66980 }, { "epoch": 0.2706480766977622, "grad_norm": 823.8807373046875, "learning_rate": 4.582496251568576e-05, "loss": 71.0763, "step": 66990 }, { "epoch": 0.2706884779631298, "grad_norm": 0.0, "learning_rate": 4.5823031017752485e-05, "loss": 65.76, "step": 67000 }, { "epoch": 0.2707288792284974, "grad_norm": 962.5149536132812, "learning_rate": 4.5821099113863506e-05, "loss": 87.7109, "step": 67010 }, { "epoch": 0.27076928049386506, "grad_norm": 1748.1005859375, "learning_rate": 4.581916680405648e-05, "loss": 62.5524, "step": 67020 }, { "epoch": 0.2708096817592327, "grad_norm": 1395.0408935546875, "learning_rate": 4.581723408836908e-05, "loss": 70.1472, "step": 67030 }, { "epoch": 0.27085008302460034, "grad_norm": 1065.451904296875, "learning_rate": 4.581530096683898e-05, "loss": 65.4421, "step": 67040 }, { "epoch": 0.270890484289968, "grad_norm": 1557.5567626953125, "learning_rate": 4.5813367439503875e-05, "loss": 50.4025, "step": 67050 }, { "epoch": 0.27093088555533557, "grad_norm": 990.9954223632812, "learning_rate": 4.5811433506401456e-05, "loss": 60.8415, "step": 67060 }, { "epoch": 0.2709712868207032, "grad_norm": 535.752685546875, "learning_rate": 4.580949916756942e-05, "loss": 56.8241, "step": 67070 }, { "epoch": 0.27101168808607085, "grad_norm": 589.4478149414062, "learning_rate": 4.580756442304549e-05, "loss": 43.5019, "step": 67080 }, { "epoch": 0.2710520893514385, "grad_norm": 471.8979187011719, "learning_rate": 4.580562927286738e-05, "loss": 83.2732, "step": 67090 }, { "epoch": 0.2710924906168061, "grad_norm": 1266.64794921875, "learning_rate": 4.5803693717072815e-05, "loss": 97.8621, "step": 67100 }, { "epoch": 0.27113289188217377, "grad_norm": 939.3637084960938, "learning_rate": 4.5801757755699534e-05, "loss": 73.3667, "step": 67110 }, { "epoch": 0.2711732931475414, "grad_norm": 516.1859741210938, "learning_rate": 4.579982138878527e-05, "loss": 64.6031, "step": 67120 }, { "epoch": 0.271213694412909, "grad_norm": 688.78125, "learning_rate": 4.579788461636778e-05, "loss": 86.9311, "step": 67130 }, { "epoch": 0.27125409567827663, "grad_norm": 726.7952270507812, "learning_rate": 4.579594743848482e-05, "loss": 97.9171, "step": 67140 }, { "epoch": 0.27129449694364427, "grad_norm": 1185.85498046875, "learning_rate": 4.579400985517416e-05, "loss": 92.5374, "step": 67150 }, { "epoch": 0.2713348982090119, "grad_norm": 928.0086059570312, "learning_rate": 4.579207186647357e-05, "loss": 46.1306, "step": 67160 }, { "epoch": 0.27137529947437955, "grad_norm": 5591.84228515625, "learning_rate": 4.579013347242085e-05, "loss": 74.2452, "step": 67170 }, { "epoch": 0.2714157007397472, "grad_norm": 886.691650390625, "learning_rate": 4.5788194673053756e-05, "loss": 55.0841, "step": 67180 }, { "epoch": 0.2714561020051148, "grad_norm": 1341.62109375, "learning_rate": 4.578625546841011e-05, "loss": 62.2266, "step": 67190 }, { "epoch": 0.2714965032704824, "grad_norm": 839.7449951171875, "learning_rate": 4.5784315858527715e-05, "loss": 73.6595, "step": 67200 }, { "epoch": 0.27153690453585005, "grad_norm": 570.7716674804688, "learning_rate": 4.578237584344438e-05, "loss": 68.3076, "step": 67210 }, { "epoch": 0.2715773058012177, "grad_norm": 673.6726684570312, "learning_rate": 4.578043542319793e-05, "loss": 59.9907, "step": 67220 }, { "epoch": 0.27161770706658533, "grad_norm": 966.4290161132812, "learning_rate": 4.577849459782619e-05, "loss": 56.2375, "step": 67230 }, { "epoch": 0.271658108331953, "grad_norm": 516.3497924804688, "learning_rate": 4.5776553367367e-05, "loss": 86.4351, "step": 67240 }, { "epoch": 0.2716985095973206, "grad_norm": 2003.4329833984375, "learning_rate": 4.577461173185821e-05, "loss": 76.1077, "step": 67250 }, { "epoch": 0.2717389108626882, "grad_norm": 829.5421752929688, "learning_rate": 4.5772669691337665e-05, "loss": 67.1411, "step": 67260 }, { "epoch": 0.27177931212805584, "grad_norm": 783.5784912109375, "learning_rate": 4.577072724584323e-05, "loss": 72.7227, "step": 67270 }, { "epoch": 0.2718197133934235, "grad_norm": 875.1643676757812, "learning_rate": 4.576878439541278e-05, "loss": 80.1554, "step": 67280 }, { "epoch": 0.2718601146587911, "grad_norm": 432.68487548828125, "learning_rate": 4.576684114008418e-05, "loss": 44.3341, "step": 67290 }, { "epoch": 0.27190051592415876, "grad_norm": 427.4791259765625, "learning_rate": 4.5764897479895317e-05, "loss": 74.892, "step": 67300 }, { "epoch": 0.2719409171895264, "grad_norm": 1177.6551513671875, "learning_rate": 4.576295341488409e-05, "loss": 68.8277, "step": 67310 }, { "epoch": 0.271981318454894, "grad_norm": 739.357421875, "learning_rate": 4.57610089450884e-05, "loss": 83.5637, "step": 67320 }, { "epoch": 0.2720217197202616, "grad_norm": 464.8116760253906, "learning_rate": 4.575906407054615e-05, "loss": 69.9499, "step": 67330 }, { "epoch": 0.27206212098562926, "grad_norm": 439.9012145996094, "learning_rate": 4.5757118791295264e-05, "loss": 54.5363, "step": 67340 }, { "epoch": 0.2721025222509969, "grad_norm": 415.9515380859375, "learning_rate": 4.575517310737365e-05, "loss": 73.2503, "step": 67350 }, { "epoch": 0.27214292351636454, "grad_norm": 784.4149780273438, "learning_rate": 4.575322701881926e-05, "loss": 64.8702, "step": 67360 }, { "epoch": 0.2721833247817322, "grad_norm": 389.4613342285156, "learning_rate": 4.575128052567002e-05, "loss": 76.2548, "step": 67370 }, { "epoch": 0.27222372604709977, "grad_norm": 942.818603515625, "learning_rate": 4.5749333627963884e-05, "loss": 76.8624, "step": 67380 }, { "epoch": 0.2722641273124674, "grad_norm": 895.9558715820312, "learning_rate": 4.574738632573881e-05, "loss": 69.907, "step": 67390 }, { "epoch": 0.27230452857783505, "grad_norm": 585.9174194335938, "learning_rate": 4.574543861903274e-05, "loss": 88.2039, "step": 67400 }, { "epoch": 0.2723449298432027, "grad_norm": 556.5941162109375, "learning_rate": 4.5743490507883685e-05, "loss": 128.176, "step": 67410 }, { "epoch": 0.2723853311085703, "grad_norm": 730.61865234375, "learning_rate": 4.574154199232959e-05, "loss": 69.3492, "step": 67420 }, { "epoch": 0.27242573237393797, "grad_norm": 887.3568725585938, "learning_rate": 4.573959307240847e-05, "loss": 69.9397, "step": 67430 }, { "epoch": 0.2724661336393056, "grad_norm": 549.9888916015625, "learning_rate": 4.5737643748158295e-05, "loss": 76.4168, "step": 67440 }, { "epoch": 0.2725065349046732, "grad_norm": 1629.8140869140625, "learning_rate": 4.573569401961708e-05, "loss": 59.1505, "step": 67450 }, { "epoch": 0.27254693617004083, "grad_norm": 982.314453125, "learning_rate": 4.573374388682283e-05, "loss": 70.0269, "step": 67460 }, { "epoch": 0.27258733743540847, "grad_norm": 499.3196716308594, "learning_rate": 4.573179334981358e-05, "loss": 41.5002, "step": 67470 }, { "epoch": 0.2726277387007761, "grad_norm": 764.8738403320312, "learning_rate": 4.5729842408627334e-05, "loss": 65.5099, "step": 67480 }, { "epoch": 0.27266813996614375, "grad_norm": 813.9560546875, "learning_rate": 4.572789106330214e-05, "loss": 67.2205, "step": 67490 }, { "epoch": 0.2727085412315114, "grad_norm": 813.4544677734375, "learning_rate": 4.572593931387604e-05, "loss": 58.4749, "step": 67500 }, { "epoch": 0.272748942496879, "grad_norm": 1321.0914306640625, "learning_rate": 4.572398716038709e-05, "loss": 76.9033, "step": 67510 }, { "epoch": 0.2727893437622466, "grad_norm": 809.7509155273438, "learning_rate": 4.572203460287333e-05, "loss": 44.7534, "step": 67520 }, { "epoch": 0.27282974502761426, "grad_norm": 502.35369873046875, "learning_rate": 4.5720081641372844e-05, "loss": 75.6196, "step": 67530 }, { "epoch": 0.2728701462929819, "grad_norm": 285.2467956542969, "learning_rate": 4.57181282759237e-05, "loss": 49.662, "step": 67540 }, { "epoch": 0.27291054755834954, "grad_norm": 1159.55419921875, "learning_rate": 4.571617450656397e-05, "loss": 68.6516, "step": 67550 }, { "epoch": 0.2729509488237172, "grad_norm": 883.5384521484375, "learning_rate": 4.5714220333331756e-05, "loss": 48.4618, "step": 67560 }, { "epoch": 0.2729913500890848, "grad_norm": 714.6936645507812, "learning_rate": 4.571226575626516e-05, "loss": 70.2103, "step": 67570 }, { "epoch": 0.2730317513544524, "grad_norm": 803.3701782226562, "learning_rate": 4.5710310775402274e-05, "loss": 65.889, "step": 67580 }, { "epoch": 0.27307215261982004, "grad_norm": 463.9601745605469, "learning_rate": 4.570835539078121e-05, "loss": 65.3356, "step": 67590 }, { "epoch": 0.2731125538851877, "grad_norm": 5190.55419921875, "learning_rate": 4.5706399602440106e-05, "loss": 79.4295, "step": 67600 }, { "epoch": 0.2731529551505553, "grad_norm": 1560.6190185546875, "learning_rate": 4.5704443410417075e-05, "loss": 60.054, "step": 67610 }, { "epoch": 0.27319335641592296, "grad_norm": 566.1893310546875, "learning_rate": 4.5702486814750265e-05, "loss": 65.9659, "step": 67620 }, { "epoch": 0.2732337576812906, "grad_norm": 497.7099914550781, "learning_rate": 4.570052981547782e-05, "loss": 60.542, "step": 67630 }, { "epoch": 0.2732741589466582, "grad_norm": 595.028076171875, "learning_rate": 4.569857241263788e-05, "loss": 113.0741, "step": 67640 }, { "epoch": 0.2733145602120258, "grad_norm": 562.2115478515625, "learning_rate": 4.569661460626862e-05, "loss": 73.3425, "step": 67650 }, { "epoch": 0.27335496147739347, "grad_norm": 3808.25244140625, "learning_rate": 4.5694656396408195e-05, "loss": 101.9904, "step": 67660 }, { "epoch": 0.2733953627427611, "grad_norm": 1519.3927001953125, "learning_rate": 4.56926977830948e-05, "loss": 89.2418, "step": 67670 }, { "epoch": 0.27343576400812875, "grad_norm": 1025.804931640625, "learning_rate": 4.56907387663666e-05, "loss": 69.6141, "step": 67680 }, { "epoch": 0.2734761652734964, "grad_norm": 852.5979614257812, "learning_rate": 4.56887793462618e-05, "loss": 88.1962, "step": 67690 }, { "epoch": 0.27351656653886397, "grad_norm": 589.6854858398438, "learning_rate": 4.5686819522818594e-05, "loss": 73.4965, "step": 67700 }, { "epoch": 0.2735569678042316, "grad_norm": 1321.8143310546875, "learning_rate": 4.56848592960752e-05, "loss": 79.9866, "step": 67710 }, { "epoch": 0.27359736906959925, "grad_norm": 1166.6270751953125, "learning_rate": 4.568289866606981e-05, "loss": 55.3361, "step": 67720 }, { "epoch": 0.2736377703349669, "grad_norm": 440.9592590332031, "learning_rate": 4.568093763284067e-05, "loss": 41.0626, "step": 67730 }, { "epoch": 0.27367817160033453, "grad_norm": 598.6520385742188, "learning_rate": 4.567897619642601e-05, "loss": 80.7803, "step": 67740 }, { "epoch": 0.27371857286570217, "grad_norm": 897.6016235351562, "learning_rate": 4.567701435686404e-05, "loss": 94.469, "step": 67750 }, { "epoch": 0.2737589741310698, "grad_norm": 926.674560546875, "learning_rate": 4.567505211419305e-05, "loss": 68.1472, "step": 67760 }, { "epoch": 0.2737993753964374, "grad_norm": 497.29339599609375, "learning_rate": 4.567308946845127e-05, "loss": 47.2474, "step": 67770 }, { "epoch": 0.27383977666180503, "grad_norm": 891.9127807617188, "learning_rate": 4.567112641967697e-05, "loss": 88.2951, "step": 67780 }, { "epoch": 0.2738801779271727, "grad_norm": 655.0907592773438, "learning_rate": 4.566916296790842e-05, "loss": 61.0948, "step": 67790 }, { "epoch": 0.2739205791925403, "grad_norm": 577.0123901367188, "learning_rate": 4.566719911318389e-05, "loss": 89.3447, "step": 67800 }, { "epoch": 0.27396098045790795, "grad_norm": 598.5209350585938, "learning_rate": 4.5665234855541675e-05, "loss": 42.0639, "step": 67810 }, { "epoch": 0.2740013817232756, "grad_norm": 578.243408203125, "learning_rate": 4.566327019502007e-05, "loss": 50.7425, "step": 67820 }, { "epoch": 0.2740417829886432, "grad_norm": 849.9849853515625, "learning_rate": 4.566130513165737e-05, "loss": 58.8878, "step": 67830 }, { "epoch": 0.2740821842540108, "grad_norm": 585.877685546875, "learning_rate": 4.565933966549189e-05, "loss": 64.4063, "step": 67840 }, { "epoch": 0.27412258551937846, "grad_norm": 600.2329711914062, "learning_rate": 4.565737379656195e-05, "loss": 62.7049, "step": 67850 }, { "epoch": 0.2741629867847461, "grad_norm": 1287.9954833984375, "learning_rate": 4.5655407524905866e-05, "loss": 55.8217, "step": 67860 }, { "epoch": 0.27420338805011374, "grad_norm": 640.1063842773438, "learning_rate": 4.5653440850561986e-05, "loss": 107.2745, "step": 67870 }, { "epoch": 0.2742437893154814, "grad_norm": 1059.8924560546875, "learning_rate": 4.565147377356864e-05, "loss": 64.9931, "step": 67880 }, { "epoch": 0.274284190580849, "grad_norm": 1559.3463134765625, "learning_rate": 4.564950629396418e-05, "loss": 117.1461, "step": 67890 }, { "epoch": 0.2743245918462166, "grad_norm": 367.62322998046875, "learning_rate": 4.564753841178697e-05, "loss": 45.144, "step": 67900 }, { "epoch": 0.27436499311158424, "grad_norm": 2093.43505859375, "learning_rate": 4.564557012707536e-05, "loss": 118.0313, "step": 67910 }, { "epoch": 0.2744053943769519, "grad_norm": 1412.019287109375, "learning_rate": 4.5643601439867734e-05, "loss": 85.6378, "step": 67920 }, { "epoch": 0.2744457956423195, "grad_norm": 1107.6322021484375, "learning_rate": 4.564163235020247e-05, "loss": 77.5563, "step": 67930 }, { "epoch": 0.27448619690768716, "grad_norm": 938.7551879882812, "learning_rate": 4.563966285811796e-05, "loss": 53.3907, "step": 67940 }, { "epoch": 0.2745265981730548, "grad_norm": 898.0419921875, "learning_rate": 4.5637692963652596e-05, "loss": 56.9066, "step": 67950 }, { "epoch": 0.2745669994384224, "grad_norm": 654.0993041992188, "learning_rate": 4.5635722666844775e-05, "loss": 71.2898, "step": 67960 }, { "epoch": 0.27460740070379, "grad_norm": 762.6791381835938, "learning_rate": 4.563375196773293e-05, "loss": 111.2552, "step": 67970 }, { "epoch": 0.27464780196915767, "grad_norm": 683.8074951171875, "learning_rate": 4.5631780866355454e-05, "loss": 79.4953, "step": 67980 }, { "epoch": 0.2746882032345253, "grad_norm": 0.0, "learning_rate": 4.56298093627508e-05, "loss": 79.2197, "step": 67990 }, { "epoch": 0.27472860449989295, "grad_norm": 525.245361328125, "learning_rate": 4.562783745695738e-05, "loss": 53.7112, "step": 68000 }, { "epoch": 0.2747690057652606, "grad_norm": 588.7833251953125, "learning_rate": 4.562586514901366e-05, "loss": 50.6445, "step": 68010 }, { "epoch": 0.27480940703062817, "grad_norm": 515.099853515625, "learning_rate": 4.5623892438958074e-05, "loss": 48.6461, "step": 68020 }, { "epoch": 0.2748498082959958, "grad_norm": 835.2371826171875, "learning_rate": 4.562191932682908e-05, "loss": 49.5645, "step": 68030 }, { "epoch": 0.27489020956136345, "grad_norm": 1118.2117919921875, "learning_rate": 4.561994581266516e-05, "loss": 97.7809, "step": 68040 }, { "epoch": 0.2749306108267311, "grad_norm": 514.0653076171875, "learning_rate": 4.561797189650478e-05, "loss": 76.3595, "step": 68050 }, { "epoch": 0.27497101209209873, "grad_norm": 461.3988342285156, "learning_rate": 4.561599757838643e-05, "loss": 66.2146, "step": 68060 }, { "epoch": 0.27501141335746637, "grad_norm": 710.6083374023438, "learning_rate": 4.561402285834858e-05, "loss": 65.2828, "step": 68070 }, { "epoch": 0.275051814622834, "grad_norm": 1474.451416015625, "learning_rate": 4.561204773642974e-05, "loss": 54.9727, "step": 68080 }, { "epoch": 0.2750922158882016, "grad_norm": 606.7708129882812, "learning_rate": 4.5610072212668434e-05, "loss": 57.3314, "step": 68090 }, { "epoch": 0.27513261715356924, "grad_norm": 706.001953125, "learning_rate": 4.560809628710315e-05, "loss": 72.5477, "step": 68100 }, { "epoch": 0.2751730184189369, "grad_norm": 805.1146850585938, "learning_rate": 4.560611995977242e-05, "loss": 62.7448, "step": 68110 }, { "epoch": 0.2752134196843045, "grad_norm": 502.4281311035156, "learning_rate": 4.560414323071477e-05, "loss": 40.9372, "step": 68120 }, { "epoch": 0.27525382094967216, "grad_norm": 468.7350158691406, "learning_rate": 4.560216609996874e-05, "loss": 71.8095, "step": 68130 }, { "epoch": 0.2752942222150398, "grad_norm": 557.2088623046875, "learning_rate": 4.5600188567572876e-05, "loss": 64.4124, "step": 68140 }, { "epoch": 0.2753346234804074, "grad_norm": 1210.1270751953125, "learning_rate": 4.559821063356574e-05, "loss": 85.0767, "step": 68150 }, { "epoch": 0.275375024745775, "grad_norm": 1038.1043701171875, "learning_rate": 4.559623229798587e-05, "loss": 71.8373, "step": 68160 }, { "epoch": 0.27541542601114266, "grad_norm": 929.40576171875, "learning_rate": 4.5594253560871854e-05, "loss": 72.4507, "step": 68170 }, { "epoch": 0.2754558272765103, "grad_norm": 957.2470092773438, "learning_rate": 4.559227442226226e-05, "loss": 97.403, "step": 68180 }, { "epoch": 0.27549622854187794, "grad_norm": 706.5764770507812, "learning_rate": 4.559029488219567e-05, "loss": 64.6209, "step": 68190 }, { "epoch": 0.2755366298072456, "grad_norm": 434.8182373046875, "learning_rate": 4.558831494071069e-05, "loss": 87.0814, "step": 68200 }, { "epoch": 0.2755770310726132, "grad_norm": 760.851318359375, "learning_rate": 4.5586334597845904e-05, "loss": 80.2775, "step": 68210 }, { "epoch": 0.2756174323379808, "grad_norm": 256.12982177734375, "learning_rate": 4.558435385363993e-05, "loss": 53.6615, "step": 68220 }, { "epoch": 0.27565783360334845, "grad_norm": 421.9570617675781, "learning_rate": 4.5582372708131385e-05, "loss": 75.5364, "step": 68230 }, { "epoch": 0.2756982348687161, "grad_norm": 812.7416381835938, "learning_rate": 4.558039116135887e-05, "loss": 65.1539, "step": 68240 }, { "epoch": 0.2757386361340837, "grad_norm": 639.4966430664062, "learning_rate": 4.557840921336105e-05, "loss": 54.6641, "step": 68250 }, { "epoch": 0.27577903739945137, "grad_norm": 370.4179992675781, "learning_rate": 4.557642686417654e-05, "loss": 98.5805, "step": 68260 }, { "epoch": 0.275819438664819, "grad_norm": 516.9441528320312, "learning_rate": 4.5574444113844e-05, "loss": 76.0973, "step": 68270 }, { "epoch": 0.2758598399301866, "grad_norm": 434.20660400390625, "learning_rate": 4.5572460962402075e-05, "loss": 59.694, "step": 68280 }, { "epoch": 0.27590024119555423, "grad_norm": 1096.5814208984375, "learning_rate": 4.557047740988944e-05, "loss": 83.0501, "step": 68290 }, { "epoch": 0.27594064246092187, "grad_norm": 679.775634765625, "learning_rate": 4.556849345634475e-05, "loss": 71.1283, "step": 68300 }, { "epoch": 0.2759810437262895, "grad_norm": 1577.58251953125, "learning_rate": 4.5566509101806695e-05, "loss": 99.9247, "step": 68310 }, { "epoch": 0.27602144499165715, "grad_norm": 1189.281982421875, "learning_rate": 4.556452434631395e-05, "loss": 80.3163, "step": 68320 }, { "epoch": 0.2760618462570248, "grad_norm": 809.91015625, "learning_rate": 4.5562539189905223e-05, "loss": 104.2898, "step": 68330 }, { "epoch": 0.2761022475223924, "grad_norm": 1438.5238037109375, "learning_rate": 4.5560553632619205e-05, "loss": 99.692, "step": 68340 }, { "epoch": 0.27614264878776, "grad_norm": 330.85479736328125, "learning_rate": 4.555856767449461e-05, "loss": 67.1174, "step": 68350 }, { "epoch": 0.27618305005312765, "grad_norm": 803.5668334960938, "learning_rate": 4.555658131557015e-05, "loss": 99.5837, "step": 68360 }, { "epoch": 0.2762234513184953, "grad_norm": 674.5006713867188, "learning_rate": 4.555459455588456e-05, "loss": 65.4563, "step": 68370 }, { "epoch": 0.27626385258386293, "grad_norm": 773.7620849609375, "learning_rate": 4.555260739547657e-05, "loss": 43.1016, "step": 68380 }, { "epoch": 0.2763042538492306, "grad_norm": 639.3969116210938, "learning_rate": 4.55506198343849e-05, "loss": 78.3155, "step": 68390 }, { "epoch": 0.2763446551145982, "grad_norm": 373.80694580078125, "learning_rate": 4.5548631872648326e-05, "loss": 83.402, "step": 68400 }, { "epoch": 0.2763850563799658, "grad_norm": 420.725830078125, "learning_rate": 4.55466435103056e-05, "loss": 61.3818, "step": 68410 }, { "epoch": 0.27642545764533344, "grad_norm": 877.37890625, "learning_rate": 4.554465474739548e-05, "loss": 59.281, "step": 68420 }, { "epoch": 0.2764658589107011, "grad_norm": 295.0977478027344, "learning_rate": 4.5542665583956736e-05, "loss": 36.8095, "step": 68430 }, { "epoch": 0.2765062601760687, "grad_norm": 288.376953125, "learning_rate": 4.5540676020028145e-05, "loss": 68.784, "step": 68440 }, { "epoch": 0.27654666144143636, "grad_norm": 1751.3602294921875, "learning_rate": 4.5538686055648506e-05, "loss": 93.4555, "step": 68450 }, { "epoch": 0.276587062706804, "grad_norm": 857.876953125, "learning_rate": 4.5536695690856606e-05, "loss": 53.5522, "step": 68460 }, { "epoch": 0.2766274639721716, "grad_norm": 1057.3465576171875, "learning_rate": 4.553470492569125e-05, "loss": 58.7135, "step": 68470 }, { "epoch": 0.2766678652375392, "grad_norm": 602.7147216796875, "learning_rate": 4.553271376019125e-05, "loss": 73.2039, "step": 68480 }, { "epoch": 0.27670826650290686, "grad_norm": 799.6278686523438, "learning_rate": 4.5530722194395425e-05, "loss": 66.5906, "step": 68490 }, { "epoch": 0.2767486677682745, "grad_norm": 539.0422973632812, "learning_rate": 4.5528730228342605e-05, "loss": 79.3498, "step": 68500 }, { "epoch": 0.27678906903364214, "grad_norm": 838.4744873046875, "learning_rate": 4.552673786207161e-05, "loss": 46.1416, "step": 68510 }, { "epoch": 0.2768294702990098, "grad_norm": 1674.3841552734375, "learning_rate": 4.55247450956213e-05, "loss": 72.0151, "step": 68520 }, { "epoch": 0.2768698715643774, "grad_norm": 5940.70263671875, "learning_rate": 4.552275192903052e-05, "loss": 75.9517, "step": 68530 }, { "epoch": 0.276910272829745, "grad_norm": 1089.8138427734375, "learning_rate": 4.552075836233812e-05, "loss": 97.0178, "step": 68540 }, { "epoch": 0.27695067409511265, "grad_norm": 608.082275390625, "learning_rate": 4.551876439558298e-05, "loss": 85.0739, "step": 68550 }, { "epoch": 0.2769910753604803, "grad_norm": 1392.0042724609375, "learning_rate": 4.5516770028803954e-05, "loss": 60.7242, "step": 68560 }, { "epoch": 0.27703147662584793, "grad_norm": 758.804931640625, "learning_rate": 4.5514775262039934e-05, "loss": 61.6169, "step": 68570 }, { "epoch": 0.27707187789121557, "grad_norm": 3674.3056640625, "learning_rate": 4.551278009532981e-05, "loss": 77.6101, "step": 68580 }, { "epoch": 0.2771122791565832, "grad_norm": 1095.73876953125, "learning_rate": 4.551078452871248e-05, "loss": 54.9264, "step": 68590 }, { "epoch": 0.2771526804219508, "grad_norm": 1469.385009765625, "learning_rate": 4.550878856222685e-05, "loss": 70.6401, "step": 68600 }, { "epoch": 0.27719308168731843, "grad_norm": 866.8749389648438, "learning_rate": 4.5506792195911817e-05, "loss": 50.9549, "step": 68610 }, { "epoch": 0.27723348295268607, "grad_norm": 735.7921142578125, "learning_rate": 4.550479542980632e-05, "loss": 85.6192, "step": 68620 }, { "epoch": 0.2772738842180537, "grad_norm": 400.9674377441406, "learning_rate": 4.550279826394928e-05, "loss": 97.7626, "step": 68630 }, { "epoch": 0.27731428548342135, "grad_norm": 700.01318359375, "learning_rate": 4.5500800698379624e-05, "loss": 66.2745, "step": 68640 }, { "epoch": 0.277354686748789, "grad_norm": 1081.5074462890625, "learning_rate": 4.549880273313631e-05, "loss": 73.2586, "step": 68650 }, { "epoch": 0.2773950880141566, "grad_norm": 770.979736328125, "learning_rate": 4.5496804368258286e-05, "loss": 56.8278, "step": 68660 }, { "epoch": 0.2774354892795242, "grad_norm": 733.8268432617188, "learning_rate": 4.549480560378451e-05, "loss": 57.6247, "step": 68670 }, { "epoch": 0.27747589054489186, "grad_norm": 744.9702758789062, "learning_rate": 4.5492806439753935e-05, "loss": 59.9518, "step": 68680 }, { "epoch": 0.2775162918102595, "grad_norm": 701.0172729492188, "learning_rate": 4.549080687620555e-05, "loss": 86.6771, "step": 68690 }, { "epoch": 0.27755669307562714, "grad_norm": 387.11041259765625, "learning_rate": 4.548880691317835e-05, "loss": 79.3791, "step": 68700 }, { "epoch": 0.2775970943409948, "grad_norm": 1299.3056640625, "learning_rate": 4.54868065507113e-05, "loss": 64.44, "step": 68710 }, { "epoch": 0.2776374956063624, "grad_norm": 795.0128173828125, "learning_rate": 4.548480578884341e-05, "loss": 60.6718, "step": 68720 }, { "epoch": 0.27767789687173, "grad_norm": 332.22100830078125, "learning_rate": 4.5482804627613686e-05, "loss": 61.0125, "step": 68730 }, { "epoch": 0.27771829813709764, "grad_norm": 525.1163940429688, "learning_rate": 4.548080306706114e-05, "loss": 74.0871, "step": 68740 }, { "epoch": 0.2777586994024653, "grad_norm": 1393.9156494140625, "learning_rate": 4.54788011072248e-05, "loss": 72.4176, "step": 68750 }, { "epoch": 0.2777991006678329, "grad_norm": 950.6893920898438, "learning_rate": 4.547679874814368e-05, "loss": 62.7376, "step": 68760 }, { "epoch": 0.27783950193320056, "grad_norm": 1314.624267578125, "learning_rate": 4.547479598985683e-05, "loss": 95.1351, "step": 68770 }, { "epoch": 0.2778799031985682, "grad_norm": 360.00762939453125, "learning_rate": 4.547279283240329e-05, "loss": 52.8572, "step": 68780 }, { "epoch": 0.2779203044639358, "grad_norm": 789.6798706054688, "learning_rate": 4.547078927582212e-05, "loss": 79.6554, "step": 68790 }, { "epoch": 0.2779607057293034, "grad_norm": 973.8438110351562, "learning_rate": 4.5468785320152365e-05, "loss": 66.0115, "step": 68800 }, { "epoch": 0.27800110699467107, "grad_norm": 351.57525634765625, "learning_rate": 4.546678096543311e-05, "loss": 62.568, "step": 68810 }, { "epoch": 0.2780415082600387, "grad_norm": 1510.792724609375, "learning_rate": 4.546477621170342e-05, "loss": 82.8039, "step": 68820 }, { "epoch": 0.27808190952540635, "grad_norm": 1050.9688720703125, "learning_rate": 4.546277105900237e-05, "loss": 71.6794, "step": 68830 }, { "epoch": 0.278122310790774, "grad_norm": 1624.618408203125, "learning_rate": 4.5460765507369084e-05, "loss": 64.593, "step": 68840 }, { "epoch": 0.2781627120561416, "grad_norm": 1015.446533203125, "learning_rate": 4.5458759556842624e-05, "loss": 73.7418, "step": 68850 }, { "epoch": 0.2782031133215092, "grad_norm": 1055.7169189453125, "learning_rate": 4.545675320746212e-05, "loss": 89.4522, "step": 68860 }, { "epoch": 0.27824351458687685, "grad_norm": 939.6693725585938, "learning_rate": 4.545474645926668e-05, "loss": 55.4124, "step": 68870 }, { "epoch": 0.2782839158522445, "grad_norm": 524.6759033203125, "learning_rate": 4.5452739312295436e-05, "loss": 67.2065, "step": 68880 }, { "epoch": 0.27832431711761213, "grad_norm": 636.6887817382812, "learning_rate": 4.54507317665875e-05, "loss": 54.9611, "step": 68890 }, { "epoch": 0.27836471838297977, "grad_norm": 1205.8494873046875, "learning_rate": 4.544872382218202e-05, "loss": 55.5338, "step": 68900 }, { "epoch": 0.2784051196483474, "grad_norm": 400.2951965332031, "learning_rate": 4.544671547911814e-05, "loss": 72.6024, "step": 68910 }, { "epoch": 0.278445520913715, "grad_norm": 671.6885986328125, "learning_rate": 4.5444706737435014e-05, "loss": 88.0325, "step": 68920 }, { "epoch": 0.27848592217908263, "grad_norm": 905.302490234375, "learning_rate": 4.544269759717181e-05, "loss": 90.4285, "step": 68930 }, { "epoch": 0.2785263234444503, "grad_norm": 713.8814086914062, "learning_rate": 4.5440688058367686e-05, "loss": 70.4518, "step": 68940 }, { "epoch": 0.2785667247098179, "grad_norm": 986.9356689453125, "learning_rate": 4.543867812106183e-05, "loss": 61.4856, "step": 68950 }, { "epoch": 0.27860712597518555, "grad_norm": 852.2457885742188, "learning_rate": 4.543666778529342e-05, "loss": 67.6646, "step": 68960 }, { "epoch": 0.2786475272405532, "grad_norm": 1037.552490234375, "learning_rate": 4.543465705110165e-05, "loss": 50.1012, "step": 68970 }, { "epoch": 0.2786879285059208, "grad_norm": 327.4298095703125, "learning_rate": 4.543264591852572e-05, "loss": 61.3465, "step": 68980 }, { "epoch": 0.2787283297712884, "grad_norm": 866.1886596679688, "learning_rate": 4.543063438760483e-05, "loss": 48.84, "step": 68990 }, { "epoch": 0.27876873103665606, "grad_norm": 708.6840209960938, "learning_rate": 4.542862245837821e-05, "loss": 66.3793, "step": 69000 }, { "epoch": 0.2788091323020237, "grad_norm": 1597.6319580078125, "learning_rate": 4.5426610130885087e-05, "loss": 80.3279, "step": 69010 }, { "epoch": 0.27884953356739134, "grad_norm": 906.7168579101562, "learning_rate": 4.542459740516467e-05, "loss": 73.8522, "step": 69020 }, { "epoch": 0.278889934832759, "grad_norm": 890.2028198242188, "learning_rate": 4.542258428125622e-05, "loss": 90.2994, "step": 69030 }, { "epoch": 0.2789303360981266, "grad_norm": 612.5167846679688, "learning_rate": 4.542057075919897e-05, "loss": 87.7907, "step": 69040 }, { "epoch": 0.2789707373634942, "grad_norm": 983.6898193359375, "learning_rate": 4.541855683903219e-05, "loss": 65.7596, "step": 69050 }, { "epoch": 0.27901113862886184, "grad_norm": 366.17645263671875, "learning_rate": 4.541654252079513e-05, "loss": 63.9307, "step": 69060 }, { "epoch": 0.2790515398942295, "grad_norm": 489.0401306152344, "learning_rate": 4.541452780452705e-05, "loss": 41.8797, "step": 69070 }, { "epoch": 0.2790919411595971, "grad_norm": 983.3743896484375, "learning_rate": 4.5412512690267246e-05, "loss": 111.5006, "step": 69080 }, { "epoch": 0.27913234242496476, "grad_norm": 1284.0035400390625, "learning_rate": 4.5410497178055e-05, "loss": 58.7547, "step": 69090 }, { "epoch": 0.2791727436903324, "grad_norm": 731.1244506835938, "learning_rate": 4.5408481267929605e-05, "loss": 81.0387, "step": 69100 }, { "epoch": 0.2792131449557, "grad_norm": 717.0902099609375, "learning_rate": 4.540646495993036e-05, "loss": 130.641, "step": 69110 }, { "epoch": 0.27925354622106763, "grad_norm": 844.9999389648438, "learning_rate": 4.540444825409657e-05, "loss": 67.0038, "step": 69120 }, { "epoch": 0.27929394748643527, "grad_norm": 399.6801452636719, "learning_rate": 4.540243115046756e-05, "loss": 55.7817, "step": 69130 }, { "epoch": 0.2793343487518029, "grad_norm": 438.31207275390625, "learning_rate": 4.540041364908265e-05, "loss": 63.9536, "step": 69140 }, { "epoch": 0.27937475001717055, "grad_norm": 1663.7740478515625, "learning_rate": 4.539839574998117e-05, "loss": 80.7982, "step": 69150 }, { "epoch": 0.2794151512825382, "grad_norm": 1818.9991455078125, "learning_rate": 4.5396377453202466e-05, "loss": 113.5125, "step": 69160 }, { "epoch": 0.27945555254790583, "grad_norm": 736.188720703125, "learning_rate": 4.539435875878588e-05, "loss": 81.4038, "step": 69170 }, { "epoch": 0.2794959538132734, "grad_norm": 879.2432250976562, "learning_rate": 4.539233966677078e-05, "loss": 87.2281, "step": 69180 }, { "epoch": 0.27953635507864105, "grad_norm": 560.682373046875, "learning_rate": 4.539032017719651e-05, "loss": 82.0745, "step": 69190 }, { "epoch": 0.2795767563440087, "grad_norm": 943.4149780273438, "learning_rate": 4.5388300290102456e-05, "loss": 117.8961, "step": 69200 }, { "epoch": 0.27961715760937633, "grad_norm": 854.2255249023438, "learning_rate": 4.538628000552799e-05, "loss": 46.8455, "step": 69210 }, { "epoch": 0.27965755887474397, "grad_norm": 768.5675659179688, "learning_rate": 4.5384259323512504e-05, "loss": 77.2018, "step": 69220 }, { "epoch": 0.2796979601401116, "grad_norm": 790.1107177734375, "learning_rate": 4.538223824409538e-05, "loss": 62.6797, "step": 69230 }, { "epoch": 0.2797383614054792, "grad_norm": 0.0, "learning_rate": 4.538021676731603e-05, "loss": 50.2399, "step": 69240 }, { "epoch": 0.27977876267084684, "grad_norm": 2715.3037109375, "learning_rate": 4.537819489321386e-05, "loss": 107.9521, "step": 69250 }, { "epoch": 0.2798191639362145, "grad_norm": 2060.965576171875, "learning_rate": 4.537617262182829e-05, "loss": 65.6806, "step": 69260 }, { "epoch": 0.2798595652015821, "grad_norm": 1369.305419921875, "learning_rate": 4.5374149953198746e-05, "loss": 92.2145, "step": 69270 }, { "epoch": 0.27989996646694976, "grad_norm": 830.0508422851562, "learning_rate": 4.5372126887364655e-05, "loss": 106.5613, "step": 69280 }, { "epoch": 0.2799403677323174, "grad_norm": 942.8829345703125, "learning_rate": 4.5370103424365474e-05, "loss": 76.8458, "step": 69290 }, { "epoch": 0.279980768997685, "grad_norm": 842.5079345703125, "learning_rate": 4.536807956424063e-05, "loss": 53.2971, "step": 69300 }, { "epoch": 0.2800211702630526, "grad_norm": 1059.9853515625, "learning_rate": 4.5366055307029585e-05, "loss": 80.9336, "step": 69310 }, { "epoch": 0.28006157152842026, "grad_norm": 326.0080261230469, "learning_rate": 4.536403065277182e-05, "loss": 78.7748, "step": 69320 }, { "epoch": 0.2801019727937879, "grad_norm": 275.553955078125, "learning_rate": 4.536200560150678e-05, "loss": 69.185, "step": 69330 }, { "epoch": 0.28014237405915554, "grad_norm": 686.8462524414062, "learning_rate": 4.5359980153273964e-05, "loss": 68.1509, "step": 69340 }, { "epoch": 0.2801827753245232, "grad_norm": 630.9837646484375, "learning_rate": 4.535795430811285e-05, "loss": 107.0847, "step": 69350 }, { "epoch": 0.2802231765898908, "grad_norm": 2537.67138671875, "learning_rate": 4.535592806606294e-05, "loss": 64.2041, "step": 69360 }, { "epoch": 0.2802635778552584, "grad_norm": 871.0073852539062, "learning_rate": 4.5353901427163725e-05, "loss": 56.6685, "step": 69370 }, { "epoch": 0.28030397912062605, "grad_norm": 805.7468872070312, "learning_rate": 4.535187439145473e-05, "loss": 56.4696, "step": 69380 }, { "epoch": 0.2803443803859937, "grad_norm": 638.688720703125, "learning_rate": 4.534984695897546e-05, "loss": 61.3559, "step": 69390 }, { "epoch": 0.2803847816513613, "grad_norm": 916.9631958007812, "learning_rate": 4.534781912976546e-05, "loss": 81.307, "step": 69400 }, { "epoch": 0.28042518291672897, "grad_norm": 695.0220947265625, "learning_rate": 4.534579090386424e-05, "loss": 69.4657, "step": 69410 }, { "epoch": 0.2804655841820966, "grad_norm": 2208.393310546875, "learning_rate": 4.5343762281311345e-05, "loss": 78.4579, "step": 69420 }, { "epoch": 0.2805059854474642, "grad_norm": 903.3010864257812, "learning_rate": 4.534173326214634e-05, "loss": 85.2893, "step": 69430 }, { "epoch": 0.28054638671283183, "grad_norm": 1086.91455078125, "learning_rate": 4.533970384640877e-05, "loss": 56.4757, "step": 69440 }, { "epoch": 0.28058678797819947, "grad_norm": 841.8659057617188, "learning_rate": 4.53376740341382e-05, "loss": 71.8159, "step": 69450 }, { "epoch": 0.2806271892435671, "grad_norm": 1415.8238525390625, "learning_rate": 4.533564382537421e-05, "loss": 62.5434, "step": 69460 }, { "epoch": 0.28066759050893475, "grad_norm": 974.2418212890625, "learning_rate": 4.533361322015637e-05, "loss": 69.978, "step": 69470 }, { "epoch": 0.2807079917743024, "grad_norm": 2103.853515625, "learning_rate": 4.533158221852427e-05, "loss": 81.6764, "step": 69480 }, { "epoch": 0.28074839303967003, "grad_norm": 644.2200317382812, "learning_rate": 4.532955082051751e-05, "loss": 49.908, "step": 69490 }, { "epoch": 0.2807887943050376, "grad_norm": 603.8206176757812, "learning_rate": 4.532751902617569e-05, "loss": 112.3736, "step": 69500 }, { "epoch": 0.28082919557040525, "grad_norm": 700.8045654296875, "learning_rate": 4.532548683553842e-05, "loss": 65.1945, "step": 69510 }, { "epoch": 0.2808695968357729, "grad_norm": 565.0137329101562, "learning_rate": 4.5323454248645324e-05, "loss": 89.2239, "step": 69520 }, { "epoch": 0.28090999810114053, "grad_norm": 550.55859375, "learning_rate": 4.532142126553603e-05, "loss": 62.9486, "step": 69530 }, { "epoch": 0.2809503993665082, "grad_norm": 819.1644287109375, "learning_rate": 4.5319387886250156e-05, "loss": 79.9527, "step": 69540 }, { "epoch": 0.2809908006318758, "grad_norm": 1997.134033203125, "learning_rate": 4.531735411082735e-05, "loss": 73.3801, "step": 69550 }, { "epoch": 0.2810312018972434, "grad_norm": 233.41488647460938, "learning_rate": 4.531531993930727e-05, "loss": 79.5541, "step": 69560 }, { "epoch": 0.28107160316261104, "grad_norm": 981.9132080078125, "learning_rate": 4.5313285371729575e-05, "loss": 66.0245, "step": 69570 }, { "epoch": 0.2811120044279787, "grad_norm": 600.988037109375, "learning_rate": 4.531125040813392e-05, "loss": 71.3912, "step": 69580 }, { "epoch": 0.2811524056933463, "grad_norm": 760.9716796875, "learning_rate": 4.530921504855997e-05, "loss": 52.4341, "step": 69590 }, { "epoch": 0.28119280695871396, "grad_norm": 1831.3338623046875, "learning_rate": 4.530717929304743e-05, "loss": 109.2623, "step": 69600 }, { "epoch": 0.2812332082240816, "grad_norm": 838.1921997070312, "learning_rate": 4.5305143141635976e-05, "loss": 68.4752, "step": 69610 }, { "epoch": 0.2812736094894492, "grad_norm": 446.0285949707031, "learning_rate": 4.5303106594365296e-05, "loss": 53.7918, "step": 69620 }, { "epoch": 0.2813140107548168, "grad_norm": 1072.6981201171875, "learning_rate": 4.530106965127511e-05, "loss": 71.5331, "step": 69630 }, { "epoch": 0.28135441202018446, "grad_norm": 841.3654174804688, "learning_rate": 4.529903231240511e-05, "loss": 74.8359, "step": 69640 }, { "epoch": 0.2813948132855521, "grad_norm": 685.6234741210938, "learning_rate": 4.529699457779503e-05, "loss": 77.722, "step": 69650 }, { "epoch": 0.28143521455091974, "grad_norm": 1382.5294189453125, "learning_rate": 4.5294956447484584e-05, "loss": 72.9175, "step": 69660 }, { "epoch": 0.2814756158162874, "grad_norm": 651.1674194335938, "learning_rate": 4.529291792151351e-05, "loss": 73.6339, "step": 69670 }, { "epoch": 0.281516017081655, "grad_norm": 540.8804931640625, "learning_rate": 4.529087899992156e-05, "loss": 36.5959, "step": 69680 }, { "epoch": 0.2815564183470226, "grad_norm": 648.30615234375, "learning_rate": 4.528883968274848e-05, "loss": 96.0729, "step": 69690 }, { "epoch": 0.28159681961239025, "grad_norm": 1536.320556640625, "learning_rate": 4.528679997003403e-05, "loss": 62.9985, "step": 69700 }, { "epoch": 0.2816372208777579, "grad_norm": 458.5219421386719, "learning_rate": 4.528475986181796e-05, "loss": 69.9463, "step": 69710 }, { "epoch": 0.28167762214312553, "grad_norm": 342.8268127441406, "learning_rate": 4.5282719358140056e-05, "loss": 87.2414, "step": 69720 }, { "epoch": 0.28171802340849317, "grad_norm": 564.786376953125, "learning_rate": 4.5280678459040095e-05, "loss": 61.6622, "step": 69730 }, { "epoch": 0.2817584246738608, "grad_norm": 665.1903686523438, "learning_rate": 4.5278637164557866e-05, "loss": 116.5534, "step": 69740 }, { "epoch": 0.2817988259392284, "grad_norm": 736.6189575195312, "learning_rate": 4.527659547473317e-05, "loss": 67.6608, "step": 69750 }, { "epoch": 0.28183922720459603, "grad_norm": 660.351806640625, "learning_rate": 4.52745533896058e-05, "loss": 84.5275, "step": 69760 }, { "epoch": 0.2818796284699637, "grad_norm": 641.83642578125, "learning_rate": 4.527251090921558e-05, "loss": 64.5504, "step": 69770 }, { "epoch": 0.2819200297353313, "grad_norm": 966.1611328125, "learning_rate": 4.527046803360232e-05, "loss": 66.6436, "step": 69780 }, { "epoch": 0.28196043100069895, "grad_norm": 1488.1563720703125, "learning_rate": 4.526842476280585e-05, "loss": 72.4152, "step": 69790 }, { "epoch": 0.2820008322660666, "grad_norm": 809.4866333007812, "learning_rate": 4.5266381096866e-05, "loss": 56.8854, "step": 69800 }, { "epoch": 0.28204123353143423, "grad_norm": 945.8799438476562, "learning_rate": 4.526433703582262e-05, "loss": 85.1573, "step": 69810 }, { "epoch": 0.2820816347968018, "grad_norm": 500.84307861328125, "learning_rate": 4.5262292579715556e-05, "loss": 37.6372, "step": 69820 }, { "epoch": 0.28212203606216946, "grad_norm": 909.1983642578125, "learning_rate": 4.526024772858467e-05, "loss": 69.4535, "step": 69830 }, { "epoch": 0.2821624373275371, "grad_norm": 1079.6373291015625, "learning_rate": 4.525820248246982e-05, "loss": 111.5219, "step": 69840 }, { "epoch": 0.28220283859290474, "grad_norm": 727.4838256835938, "learning_rate": 4.5256156841410886e-05, "loss": 66.2702, "step": 69850 }, { "epoch": 0.2822432398582724, "grad_norm": 1162.87353515625, "learning_rate": 4.525411080544775e-05, "loss": 62.2597, "step": 69860 }, { "epoch": 0.28228364112364, "grad_norm": 2795.06640625, "learning_rate": 4.5252064374620285e-05, "loss": 99.6256, "step": 69870 }, { "epoch": 0.2823240423890076, "grad_norm": 672.3999633789062, "learning_rate": 4.5250017548968404e-05, "loss": 104.1018, "step": 69880 }, { "epoch": 0.28236444365437524, "grad_norm": 1359.24609375, "learning_rate": 4.524797032853201e-05, "loss": 90.7683, "step": 69890 }, { "epoch": 0.2824048449197429, "grad_norm": 744.1480102539062, "learning_rate": 4.5245922713350996e-05, "loss": 73.5604, "step": 69900 }, { "epoch": 0.2824452461851105, "grad_norm": 629.0797729492188, "learning_rate": 4.524387470346531e-05, "loss": 79.3253, "step": 69910 }, { "epoch": 0.28248564745047816, "grad_norm": 846.8230590820312, "learning_rate": 4.524182629891486e-05, "loss": 52.2285, "step": 69920 }, { "epoch": 0.2825260487158458, "grad_norm": 916.1362915039062, "learning_rate": 4.523977749973958e-05, "loss": 47.4978, "step": 69930 }, { "epoch": 0.2825664499812134, "grad_norm": 878.1392822265625, "learning_rate": 4.523772830597942e-05, "loss": 69.1207, "step": 69940 }, { "epoch": 0.282606851246581, "grad_norm": 1516.745361328125, "learning_rate": 4.523567871767434e-05, "loss": 70.5018, "step": 69950 }, { "epoch": 0.28264725251194867, "grad_norm": 1540.39501953125, "learning_rate": 4.523362873486427e-05, "loss": 61.0742, "step": 69960 }, { "epoch": 0.2826876537773163, "grad_norm": 588.6622314453125, "learning_rate": 4.52315783575892e-05, "loss": 105.1381, "step": 69970 }, { "epoch": 0.28272805504268395, "grad_norm": 792.640380859375, "learning_rate": 4.522952758588909e-05, "loss": 74.2773, "step": 69980 }, { "epoch": 0.2827684563080516, "grad_norm": 381.4082336425781, "learning_rate": 4.5227476419803916e-05, "loss": 42.4223, "step": 69990 }, { "epoch": 0.2828088575734192, "grad_norm": 2376.44775390625, "learning_rate": 4.522542485937369e-05, "loss": 73.0111, "step": 70000 }, { "epoch": 0.2828492588387868, "grad_norm": 478.2873229980469, "learning_rate": 4.5223372904638386e-05, "loss": 89.4105, "step": 70010 }, { "epoch": 0.28288966010415445, "grad_norm": 1394.88623046875, "learning_rate": 4.5221320555638016e-05, "loss": 57.0316, "step": 70020 }, { "epoch": 0.2829300613695221, "grad_norm": 1728.24267578125, "learning_rate": 4.521926781241259e-05, "loss": 60.0436, "step": 70030 }, { "epoch": 0.28297046263488973, "grad_norm": 313.620361328125, "learning_rate": 4.521721467500213e-05, "loss": 78.582, "step": 70040 }, { "epoch": 0.28301086390025737, "grad_norm": 876.0878295898438, "learning_rate": 4.521516114344666e-05, "loss": 96.7629, "step": 70050 }, { "epoch": 0.283051265165625, "grad_norm": 715.8422241210938, "learning_rate": 4.521310721778622e-05, "loss": 68.7341, "step": 70060 }, { "epoch": 0.2830916664309926, "grad_norm": 984.6596069335938, "learning_rate": 4.5211052898060855e-05, "loss": 85.3589, "step": 70070 }, { "epoch": 0.28313206769636023, "grad_norm": 440.2537536621094, "learning_rate": 4.5208998184310596e-05, "loss": 66.0765, "step": 70080 }, { "epoch": 0.2831724689617279, "grad_norm": 958.81982421875, "learning_rate": 4.520694307657551e-05, "loss": 62.4521, "step": 70090 }, { "epoch": 0.2832128702270955, "grad_norm": 592.9948120117188, "learning_rate": 4.5204887574895684e-05, "loss": 76.5842, "step": 70100 }, { "epoch": 0.28325327149246315, "grad_norm": 810.19287109375, "learning_rate": 4.520283167931115e-05, "loss": 67.7459, "step": 70110 }, { "epoch": 0.2832936727578308, "grad_norm": 486.7115173339844, "learning_rate": 4.5200775389862026e-05, "loss": 57.3887, "step": 70120 }, { "epoch": 0.2833340740231984, "grad_norm": 1011.84033203125, "learning_rate": 4.519871870658838e-05, "loss": 77.9214, "step": 70130 }, { "epoch": 0.283374475288566, "grad_norm": 1170.3331298828125, "learning_rate": 4.519666162953032e-05, "loss": 81.3161, "step": 70140 }, { "epoch": 0.28341487655393366, "grad_norm": 885.1085815429688, "learning_rate": 4.519460415872794e-05, "loss": 73.1431, "step": 70150 }, { "epoch": 0.2834552778193013, "grad_norm": 746.1101684570312, "learning_rate": 4.519254629422136e-05, "loss": 75.5062, "step": 70160 }, { "epoch": 0.28349567908466894, "grad_norm": 805.8640747070312, "learning_rate": 4.5190488036050685e-05, "loss": 62.1489, "step": 70170 }, { "epoch": 0.2835360803500366, "grad_norm": 1288.6954345703125, "learning_rate": 4.518842938425605e-05, "loss": 70.3223, "step": 70180 }, { "epoch": 0.2835764816154042, "grad_norm": 820.5307006835938, "learning_rate": 4.51863703388776e-05, "loss": 49.1447, "step": 70190 }, { "epoch": 0.2836168828807718, "grad_norm": 279.9125671386719, "learning_rate": 4.5184310899955465e-05, "loss": 41.5823, "step": 70200 }, { "epoch": 0.28365728414613944, "grad_norm": 2208.78564453125, "learning_rate": 4.518225106752979e-05, "loss": 63.2972, "step": 70210 }, { "epoch": 0.2836976854115071, "grad_norm": 609.0946655273438, "learning_rate": 4.5180190841640747e-05, "loss": 59.1739, "step": 70220 }, { "epoch": 0.2837380866768747, "grad_norm": 761.7667236328125, "learning_rate": 4.517813022232849e-05, "loss": 76.7521, "step": 70230 }, { "epoch": 0.28377848794224236, "grad_norm": 623.1316528320312, "learning_rate": 4.51760692096332e-05, "loss": 54.5164, "step": 70240 }, { "epoch": 0.28381888920761, "grad_norm": 875.2012329101562, "learning_rate": 4.5174007803595055e-05, "loss": 69.7034, "step": 70250 }, { "epoch": 0.2838592904729776, "grad_norm": 294.3750915527344, "learning_rate": 4.517194600425423e-05, "loss": 75.7382, "step": 70260 }, { "epoch": 0.28389969173834523, "grad_norm": 793.992919921875, "learning_rate": 4.516988381165095e-05, "loss": 57.0891, "step": 70270 }, { "epoch": 0.28394009300371287, "grad_norm": 265.2918395996094, "learning_rate": 4.516782122582538e-05, "loss": 53.4736, "step": 70280 }, { "epoch": 0.2839804942690805, "grad_norm": 637.1311645507812, "learning_rate": 4.516575824681777e-05, "loss": 89.6522, "step": 70290 }, { "epoch": 0.28402089553444815, "grad_norm": 2700.84521484375, "learning_rate": 4.516369487466832e-05, "loss": 78.3818, "step": 70300 }, { "epoch": 0.2840612967998158, "grad_norm": 775.3411865234375, "learning_rate": 4.5161631109417246e-05, "loss": 75.4549, "step": 70310 }, { "epoch": 0.28410169806518343, "grad_norm": 616.4786376953125, "learning_rate": 4.5159566951104796e-05, "loss": 78.6188, "step": 70320 }, { "epoch": 0.284142099330551, "grad_norm": 265.2325744628906, "learning_rate": 4.515750239977122e-05, "loss": 58.4777, "step": 70330 }, { "epoch": 0.28418250059591865, "grad_norm": 356.9336242675781, "learning_rate": 4.5155437455456744e-05, "loss": 88.5666, "step": 70340 }, { "epoch": 0.2842229018612863, "grad_norm": 1054.8193359375, "learning_rate": 4.515337211820165e-05, "loss": 74.0532, "step": 70350 }, { "epoch": 0.28426330312665393, "grad_norm": 1073.7655029296875, "learning_rate": 4.5151306388046175e-05, "loss": 57.4097, "step": 70360 }, { "epoch": 0.2843037043920216, "grad_norm": 1117.8515625, "learning_rate": 4.5149240265030627e-05, "loss": 91.991, "step": 70370 }, { "epoch": 0.2843441056573892, "grad_norm": 554.7203979492188, "learning_rate": 4.5147173749195255e-05, "loss": 51.3412, "step": 70380 }, { "epoch": 0.2843845069227568, "grad_norm": 669.3203125, "learning_rate": 4.514510684058036e-05, "loss": 90.7202, "step": 70390 }, { "epoch": 0.28442490818812444, "grad_norm": 861.2157592773438, "learning_rate": 4.5143039539226234e-05, "loss": 70.1871, "step": 70400 }, { "epoch": 0.2844653094534921, "grad_norm": 345.44781494140625, "learning_rate": 4.514097184517318e-05, "loss": 49.2981, "step": 70410 }, { "epoch": 0.2845057107188597, "grad_norm": 1595.496337890625, "learning_rate": 4.5138903758461515e-05, "loss": 47.9407, "step": 70420 }, { "epoch": 0.28454611198422736, "grad_norm": 560.5866088867188, "learning_rate": 4.5136835279131556e-05, "loss": 67.7868, "step": 70430 }, { "epoch": 0.284586513249595, "grad_norm": 215.32069396972656, "learning_rate": 4.513476640722362e-05, "loss": 59.5168, "step": 70440 }, { "epoch": 0.2846269145149626, "grad_norm": 858.737548828125, "learning_rate": 4.513269714277805e-05, "loss": 53.7752, "step": 70450 }, { "epoch": 0.2846673157803302, "grad_norm": 575.0614624023438, "learning_rate": 4.5130627485835186e-05, "loss": 95.9444, "step": 70460 }, { "epoch": 0.28470771704569786, "grad_norm": 2945.451416015625, "learning_rate": 4.512855743643537e-05, "loss": 69.3364, "step": 70470 }, { "epoch": 0.2847481183110655, "grad_norm": 578.4557495117188, "learning_rate": 4.512648699461897e-05, "loss": 53.4026, "step": 70480 }, { "epoch": 0.28478851957643314, "grad_norm": 362.3124084472656, "learning_rate": 4.512441616042634e-05, "loss": 86.2023, "step": 70490 }, { "epoch": 0.2848289208418008, "grad_norm": 521.9320068359375, "learning_rate": 4.512234493389785e-05, "loss": 69.5704, "step": 70500 }, { "epoch": 0.2848693221071684, "grad_norm": 606.285888671875, "learning_rate": 4.5120273315073897e-05, "loss": 67.9938, "step": 70510 }, { "epoch": 0.284909723372536, "grad_norm": 977.4638671875, "learning_rate": 4.511820130399485e-05, "loss": 90.854, "step": 70520 }, { "epoch": 0.28495012463790365, "grad_norm": 402.9257507324219, "learning_rate": 4.5116128900701114e-05, "loss": 79.0524, "step": 70530 }, { "epoch": 0.2849905259032713, "grad_norm": 1074.8834228515625, "learning_rate": 4.511405610523309e-05, "loss": 80.932, "step": 70540 }, { "epoch": 0.2850309271686389, "grad_norm": 1405.5771484375, "learning_rate": 4.5111982917631194e-05, "loss": 103.0241, "step": 70550 }, { "epoch": 0.28507132843400657, "grad_norm": 1106.503662109375, "learning_rate": 4.510990933793583e-05, "loss": 70.0878, "step": 70560 }, { "epoch": 0.2851117296993742, "grad_norm": 788.6580810546875, "learning_rate": 4.5107835366187425e-05, "loss": 32.8911, "step": 70570 }, { "epoch": 0.2851521309647418, "grad_norm": 1015.0027465820312, "learning_rate": 4.5105761002426415e-05, "loss": 55.8387, "step": 70580 }, { "epoch": 0.28519253223010943, "grad_norm": 537.4610595703125, "learning_rate": 4.510368624669325e-05, "loss": 58.8079, "step": 70590 }, { "epoch": 0.28523293349547707, "grad_norm": 1022.7601318359375, "learning_rate": 4.510161109902837e-05, "loss": 66.04, "step": 70600 }, { "epoch": 0.2852733347608447, "grad_norm": 785.5269165039062, "learning_rate": 4.5099535559472234e-05, "loss": 87.9221, "step": 70610 }, { "epoch": 0.28531373602621235, "grad_norm": 765.5206298828125, "learning_rate": 4.50974596280653e-05, "loss": 72.3749, "step": 70620 }, { "epoch": 0.28535413729158, "grad_norm": 1824.7486572265625, "learning_rate": 4.509538330484805e-05, "loss": 70.8314, "step": 70630 }, { "epoch": 0.28539453855694763, "grad_norm": 784.1112670898438, "learning_rate": 4.509330658986095e-05, "loss": 64.7922, "step": 70640 }, { "epoch": 0.2854349398223152, "grad_norm": 621.5699462890625, "learning_rate": 4.5091229483144495e-05, "loss": 94.8581, "step": 70650 }, { "epoch": 0.28547534108768285, "grad_norm": 724.716796875, "learning_rate": 4.508915198473919e-05, "loss": 54.7563, "step": 70660 }, { "epoch": 0.2855157423530505, "grad_norm": 435.5616149902344, "learning_rate": 4.50870740946855e-05, "loss": 69.268, "step": 70670 }, { "epoch": 0.28555614361841813, "grad_norm": 1086.2598876953125, "learning_rate": 4.508499581302398e-05, "loss": 56.1892, "step": 70680 }, { "epoch": 0.2855965448837858, "grad_norm": 1214.41015625, "learning_rate": 4.5082917139795125e-05, "loss": 75.7297, "step": 70690 }, { "epoch": 0.2856369461491534, "grad_norm": 665.9967041015625, "learning_rate": 4.508083807503945e-05, "loss": 56.6401, "step": 70700 }, { "epoch": 0.285677347414521, "grad_norm": 1049.6688232421875, "learning_rate": 4.50787586187975e-05, "loss": 70.0839, "step": 70710 }, { "epoch": 0.28571774867988864, "grad_norm": 680.3385009765625, "learning_rate": 4.507667877110982e-05, "loss": 65.669, "step": 70720 }, { "epoch": 0.2857581499452563, "grad_norm": 351.40545654296875, "learning_rate": 4.507459853201695e-05, "loss": 52.2754, "step": 70730 }, { "epoch": 0.2857985512106239, "grad_norm": 699.9556274414062, "learning_rate": 4.507251790155944e-05, "loss": 66.7143, "step": 70740 }, { "epoch": 0.28583895247599156, "grad_norm": 483.2270202636719, "learning_rate": 4.5070436879777865e-05, "loss": 72.3885, "step": 70750 }, { "epoch": 0.2858793537413592, "grad_norm": 13059.109375, "learning_rate": 4.506835546671278e-05, "loss": 100.1066, "step": 70760 }, { "epoch": 0.2859197550067268, "grad_norm": 1270.271240234375, "learning_rate": 4.506627366240479e-05, "loss": 76.3044, "step": 70770 }, { "epoch": 0.2859601562720944, "grad_norm": 373.1097106933594, "learning_rate": 4.506419146689446e-05, "loss": 138.1525, "step": 70780 }, { "epoch": 0.28600055753746206, "grad_norm": 627.6198120117188, "learning_rate": 4.506210888022239e-05, "loss": 80.5968, "step": 70790 }, { "epoch": 0.2860409588028297, "grad_norm": 396.57012939453125, "learning_rate": 4.5060025902429174e-05, "loss": 53.7545, "step": 70800 }, { "epoch": 0.28608136006819734, "grad_norm": 534.5454711914062, "learning_rate": 4.505794253355542e-05, "loss": 87.9787, "step": 70810 }, { "epoch": 0.286121761333565, "grad_norm": 1753.4989013671875, "learning_rate": 4.505585877364175e-05, "loss": 80.0071, "step": 70820 }, { "epoch": 0.2861621625989326, "grad_norm": 940.0836791992188, "learning_rate": 4.505377462272879e-05, "loss": 64.5939, "step": 70830 }, { "epoch": 0.2862025638643002, "grad_norm": 524.5255737304688, "learning_rate": 4.5051690080857176e-05, "loss": 54.3899, "step": 70840 }, { "epoch": 0.28624296512966785, "grad_norm": 982.0265502929688, "learning_rate": 4.504960514806753e-05, "loss": 64.2541, "step": 70850 }, { "epoch": 0.2862833663950355, "grad_norm": 1201.5341796875, "learning_rate": 4.504751982440052e-05, "loss": 88.8859, "step": 70860 }, { "epoch": 0.28632376766040313, "grad_norm": 624.1221313476562, "learning_rate": 4.5045434109896786e-05, "loss": 59.5705, "step": 70870 }, { "epoch": 0.28636416892577077, "grad_norm": 422.28778076171875, "learning_rate": 4.504334800459699e-05, "loss": 44.9323, "step": 70880 }, { "epoch": 0.2864045701911384, "grad_norm": 665.96923828125, "learning_rate": 4.504126150854181e-05, "loss": 60.9275, "step": 70890 }, { "epoch": 0.286444971456506, "grad_norm": 1277.694580078125, "learning_rate": 4.503917462177192e-05, "loss": 77.8568, "step": 70900 }, { "epoch": 0.28648537272187363, "grad_norm": 1883.61962890625, "learning_rate": 4.5037087344328e-05, "loss": 62.4995, "step": 70910 }, { "epoch": 0.2865257739872413, "grad_norm": 885.0050659179688, "learning_rate": 4.5034999676250745e-05, "loss": 62.4988, "step": 70920 }, { "epoch": 0.2865661752526089, "grad_norm": 1359.623779296875, "learning_rate": 4.503291161758087e-05, "loss": 71.7273, "step": 70930 }, { "epoch": 0.28660657651797655, "grad_norm": 497.2101745605469, "learning_rate": 4.5030823168359046e-05, "loss": 76.8819, "step": 70940 }, { "epoch": 0.2866469777833442, "grad_norm": 891.3270874023438, "learning_rate": 4.502873432862603e-05, "loss": 69.6958, "step": 70950 }, { "epoch": 0.28668737904871183, "grad_norm": 441.0372314453125, "learning_rate": 4.5026645098422515e-05, "loss": 102.5139, "step": 70960 }, { "epoch": 0.2867277803140794, "grad_norm": 993.6549072265625, "learning_rate": 4.5024555477789255e-05, "loss": 79.794, "step": 70970 }, { "epoch": 0.28676818157944706, "grad_norm": 3066.687255859375, "learning_rate": 4.5022465466766974e-05, "loss": 105.9858, "step": 70980 }, { "epoch": 0.2868085828448147, "grad_norm": 1127.01025390625, "learning_rate": 4.502037506539642e-05, "loss": 69.0879, "step": 70990 }, { "epoch": 0.28684898411018234, "grad_norm": 1199.5037841796875, "learning_rate": 4.5018284273718336e-05, "loss": 58.2869, "step": 71000 }, { "epoch": 0.28688938537555, "grad_norm": 706.9274291992188, "learning_rate": 4.5016193091773504e-05, "loss": 71.1858, "step": 71010 }, { "epoch": 0.2869297866409176, "grad_norm": 758.85498046875, "learning_rate": 4.501410151960268e-05, "loss": 75.0063, "step": 71020 }, { "epoch": 0.2869701879062852, "grad_norm": 938.1792602539062, "learning_rate": 4.5012009557246645e-05, "loss": 56.8306, "step": 71030 }, { "epoch": 0.28701058917165284, "grad_norm": 539.9905395507812, "learning_rate": 4.5009917204746184e-05, "loss": 49.6551, "step": 71040 }, { "epoch": 0.2870509904370205, "grad_norm": 1365.8111572265625, "learning_rate": 4.5007824462142076e-05, "loss": 67.7948, "step": 71050 }, { "epoch": 0.2870913917023881, "grad_norm": 1149.0391845703125, "learning_rate": 4.500573132947514e-05, "loss": 62.5273, "step": 71060 }, { "epoch": 0.28713179296775576, "grad_norm": 833.6195678710938, "learning_rate": 4.500363780678617e-05, "loss": 46.2563, "step": 71070 }, { "epoch": 0.2871721942331234, "grad_norm": 2005.719482421875, "learning_rate": 4.5001543894115975e-05, "loss": 65.34, "step": 71080 }, { "epoch": 0.287212595498491, "grad_norm": 499.4972839355469, "learning_rate": 4.4999449591505396e-05, "loss": 46.8325, "step": 71090 }, { "epoch": 0.2872529967638586, "grad_norm": 1662.39013671875, "learning_rate": 4.499735489899524e-05, "loss": 75.1804, "step": 71100 }, { "epoch": 0.28729339802922627, "grad_norm": 297.105712890625, "learning_rate": 4.4995259816626356e-05, "loss": 59.1684, "step": 71110 }, { "epoch": 0.2873337992945939, "grad_norm": 506.93670654296875, "learning_rate": 4.499316434443959e-05, "loss": 41.7648, "step": 71120 }, { "epoch": 0.28737420055996155, "grad_norm": 666.0968627929688, "learning_rate": 4.49910684824758e-05, "loss": 110.1385, "step": 71130 }, { "epoch": 0.2874146018253292, "grad_norm": 786.0115966796875, "learning_rate": 4.498897223077582e-05, "loss": 72.202, "step": 71140 }, { "epoch": 0.2874550030906968, "grad_norm": 7043.43310546875, "learning_rate": 4.498687558938055e-05, "loss": 108.4283, "step": 71150 }, { "epoch": 0.2874954043560644, "grad_norm": 857.16748046875, "learning_rate": 4.4984778558330844e-05, "loss": 96.0511, "step": 71160 }, { "epoch": 0.28753580562143205, "grad_norm": 637.8214111328125, "learning_rate": 4.4982681137667594e-05, "loss": 44.1582, "step": 71170 }, { "epoch": 0.2875762068867997, "grad_norm": 904.2376098632812, "learning_rate": 4.498058332743168e-05, "loss": 98.6093, "step": 71180 }, { "epoch": 0.28761660815216733, "grad_norm": 1919.4237060546875, "learning_rate": 4.4978485127664015e-05, "loss": 103.2059, "step": 71190 }, { "epoch": 0.28765700941753497, "grad_norm": 810.6397094726562, "learning_rate": 4.4976386538405495e-05, "loss": 67.5545, "step": 71200 }, { "epoch": 0.2876974106829026, "grad_norm": 1610.69384765625, "learning_rate": 4.4974287559697035e-05, "loss": 60.4293, "step": 71210 }, { "epoch": 0.2877378119482702, "grad_norm": 1313.538818359375, "learning_rate": 4.497218819157956e-05, "loss": 57.7007, "step": 71220 }, { "epoch": 0.28777821321363783, "grad_norm": 872.7113037109375, "learning_rate": 4.497008843409399e-05, "loss": 51.1471, "step": 71230 }, { "epoch": 0.2878186144790055, "grad_norm": 398.4093017578125, "learning_rate": 4.496798828728126e-05, "loss": 73.8484, "step": 71240 }, { "epoch": 0.2878590157443731, "grad_norm": 3197.44970703125, "learning_rate": 4.496588775118232e-05, "loss": 97.3321, "step": 71250 }, { "epoch": 0.28789941700974075, "grad_norm": 422.7282409667969, "learning_rate": 4.496378682583813e-05, "loss": 79.5314, "step": 71260 }, { "epoch": 0.2879398182751084, "grad_norm": 409.5416564941406, "learning_rate": 4.4961685511289625e-05, "loss": 93.6449, "step": 71270 }, { "epoch": 0.28798021954047603, "grad_norm": 714.4796752929688, "learning_rate": 4.495958380757779e-05, "loss": 105.323, "step": 71280 }, { "epoch": 0.2880206208058436, "grad_norm": 1594.0919189453125, "learning_rate": 4.4957481714743585e-05, "loss": 68.0684, "step": 71290 }, { "epoch": 0.28806102207121126, "grad_norm": 460.36334228515625, "learning_rate": 4.495537923282801e-05, "loss": 64.8978, "step": 71300 }, { "epoch": 0.2881014233365789, "grad_norm": 1116.5792236328125, "learning_rate": 4.4953276361872034e-05, "loss": 61.4736, "step": 71310 }, { "epoch": 0.28814182460194654, "grad_norm": 463.9264221191406, "learning_rate": 4.4951173101916675e-05, "loss": 59.6178, "step": 71320 }, { "epoch": 0.2881822258673142, "grad_norm": 958.1243896484375, "learning_rate": 4.494906945300291e-05, "loss": 76.7924, "step": 71330 }, { "epoch": 0.2882226271326818, "grad_norm": 479.6456604003906, "learning_rate": 4.4946965415171775e-05, "loss": 57.5769, "step": 71340 }, { "epoch": 0.2882630283980494, "grad_norm": 595.579345703125, "learning_rate": 4.4944860988464276e-05, "loss": 72.5989, "step": 71350 }, { "epoch": 0.28830342966341704, "grad_norm": 821.9955444335938, "learning_rate": 4.494275617292144e-05, "loss": 74.0442, "step": 71360 }, { "epoch": 0.2883438309287847, "grad_norm": 890.1593627929688, "learning_rate": 4.494065096858432e-05, "loss": 67.1569, "step": 71370 }, { "epoch": 0.2883842321941523, "grad_norm": 935.3182983398438, "learning_rate": 4.4938545375493934e-05, "loss": 57.2469, "step": 71380 }, { "epoch": 0.28842463345951996, "grad_norm": 930.1670532226562, "learning_rate": 4.493643939369134e-05, "loss": 62.9051, "step": 71390 }, { "epoch": 0.2884650347248876, "grad_norm": 923.1588745117188, "learning_rate": 4.493433302321759e-05, "loss": 72.7095, "step": 71400 }, { "epoch": 0.2885054359902552, "grad_norm": 1056.5206298828125, "learning_rate": 4.4932226264113764e-05, "loss": 89.8381, "step": 71410 }, { "epoch": 0.28854583725562283, "grad_norm": 556.2738647460938, "learning_rate": 4.493011911642092e-05, "loss": 64.1478, "step": 71420 }, { "epoch": 0.28858623852099047, "grad_norm": 614.498046875, "learning_rate": 4.4928011580180155e-05, "loss": 71.601, "step": 71430 }, { "epoch": 0.2886266397863581, "grad_norm": 1232.3363037109375, "learning_rate": 4.492590365543253e-05, "loss": 59.9572, "step": 71440 }, { "epoch": 0.28866704105172575, "grad_norm": 1182.1708984375, "learning_rate": 4.492379534221916e-05, "loss": 87.4886, "step": 71450 }, { "epoch": 0.2887074423170934, "grad_norm": 703.7147827148438, "learning_rate": 4.492168664058114e-05, "loss": 48.6093, "step": 71460 }, { "epoch": 0.28874784358246103, "grad_norm": 1096.2550048828125, "learning_rate": 4.491957755055959e-05, "loss": 55.8847, "step": 71470 }, { "epoch": 0.2887882448478286, "grad_norm": 824.9928588867188, "learning_rate": 4.491746807219561e-05, "loss": 69.6475, "step": 71480 }, { "epoch": 0.28882864611319625, "grad_norm": 799.056640625, "learning_rate": 4.491535820553034e-05, "loss": 74.3196, "step": 71490 }, { "epoch": 0.2888690473785639, "grad_norm": 1376.468017578125, "learning_rate": 4.491324795060491e-05, "loss": 56.1541, "step": 71500 }, { "epoch": 0.28890944864393153, "grad_norm": 792.1198120117188, "learning_rate": 4.491113730746046e-05, "loss": 80.2355, "step": 71510 }, { "epoch": 0.2889498499092992, "grad_norm": 1302.2103271484375, "learning_rate": 4.490902627613813e-05, "loss": 70.5781, "step": 71520 }, { "epoch": 0.2889902511746668, "grad_norm": 1214.075927734375, "learning_rate": 4.4906914856679094e-05, "loss": 84.3187, "step": 71530 }, { "epoch": 0.2890306524400344, "grad_norm": 401.3418273925781, "learning_rate": 4.49048030491245e-05, "loss": 53.576, "step": 71540 }, { "epoch": 0.28907105370540204, "grad_norm": 885.4319458007812, "learning_rate": 4.490269085351552e-05, "loss": 46.2776, "step": 71550 }, { "epoch": 0.2891114549707697, "grad_norm": 410.9359436035156, "learning_rate": 4.4900578269893335e-05, "loss": 75.6896, "step": 71560 }, { "epoch": 0.2891518562361373, "grad_norm": 858.1013793945312, "learning_rate": 4.4898465298299134e-05, "loss": 78.359, "step": 71570 }, { "epoch": 0.28919225750150496, "grad_norm": 1423.806640625, "learning_rate": 4.489635193877411e-05, "loss": 62.488, "step": 71580 }, { "epoch": 0.2892326587668726, "grad_norm": 962.8115234375, "learning_rate": 4.489423819135945e-05, "loss": 68.6732, "step": 71590 }, { "epoch": 0.28927306003224024, "grad_norm": 737.3809814453125, "learning_rate": 4.4892124056096386e-05, "loss": 85.0963, "step": 71600 }, { "epoch": 0.2893134612976078, "grad_norm": 699.3038330078125, "learning_rate": 4.489000953302612e-05, "loss": 39.0792, "step": 71610 }, { "epoch": 0.28935386256297546, "grad_norm": 526.4671020507812, "learning_rate": 4.488789462218987e-05, "loss": 73.1894, "step": 71620 }, { "epoch": 0.2893942638283431, "grad_norm": 869.59130859375, "learning_rate": 4.4885779323628886e-05, "loss": 80.5256, "step": 71630 }, { "epoch": 0.28943466509371074, "grad_norm": 2488.22216796875, "learning_rate": 4.4883663637384396e-05, "loss": 96.78, "step": 71640 }, { "epoch": 0.2894750663590784, "grad_norm": 322.6717529296875, "learning_rate": 4.488154756349764e-05, "loss": 55.7131, "step": 71650 }, { "epoch": 0.289515467624446, "grad_norm": 762.85498046875, "learning_rate": 4.4879431102009886e-05, "loss": 83.2775, "step": 71660 }, { "epoch": 0.2895558688898136, "grad_norm": 312.4606018066406, "learning_rate": 4.487731425296238e-05, "loss": 73.5523, "step": 71670 }, { "epoch": 0.28959627015518125, "grad_norm": 774.15478515625, "learning_rate": 4.487519701639641e-05, "loss": 58.399, "step": 71680 }, { "epoch": 0.2896366714205489, "grad_norm": 939.1637573242188, "learning_rate": 4.487307939235323e-05, "loss": 103.4214, "step": 71690 }, { "epoch": 0.2896770726859165, "grad_norm": 378.4061584472656, "learning_rate": 4.487096138087414e-05, "loss": 54.6242, "step": 71700 }, { "epoch": 0.28971747395128417, "grad_norm": 841.2785034179688, "learning_rate": 4.4868842982000425e-05, "loss": 92.4388, "step": 71710 }, { "epoch": 0.2897578752166518, "grad_norm": 1951.40673828125, "learning_rate": 4.486672419577339e-05, "loss": 99.056, "step": 71720 }, { "epoch": 0.2897982764820194, "grad_norm": 870.7053833007812, "learning_rate": 4.486460502223434e-05, "loss": 82.6444, "step": 71730 }, { "epoch": 0.28983867774738703, "grad_norm": 1036.0379638671875, "learning_rate": 4.4862485461424585e-05, "loss": 84.8484, "step": 71740 }, { "epoch": 0.28987907901275467, "grad_norm": 1319.0260009765625, "learning_rate": 4.4860365513385456e-05, "loss": 65.2683, "step": 71750 }, { "epoch": 0.2899194802781223, "grad_norm": 623.9202270507812, "learning_rate": 4.4858245178158276e-05, "loss": 51.1636, "step": 71760 }, { "epoch": 0.28995988154348995, "grad_norm": 1197.3074951171875, "learning_rate": 4.4856124455784375e-05, "loss": 57.0653, "step": 71770 }, { "epoch": 0.2900002828088576, "grad_norm": 951.6260986328125, "learning_rate": 4.485400334630511e-05, "loss": 79.302, "step": 71780 }, { "epoch": 0.29004068407422523, "grad_norm": 714.1635131835938, "learning_rate": 4.485188184976182e-05, "loss": 64.435, "step": 71790 }, { "epoch": 0.2900810853395928, "grad_norm": 1034.5103759765625, "learning_rate": 4.484975996619589e-05, "loss": 69.878, "step": 71800 }, { "epoch": 0.29012148660496045, "grad_norm": 2745.012939453125, "learning_rate": 4.484763769564866e-05, "loss": 46.8337, "step": 71810 }, { "epoch": 0.2901618878703281, "grad_norm": 905.9317626953125, "learning_rate": 4.4845515038161515e-05, "loss": 72.7523, "step": 71820 }, { "epoch": 0.29020228913569573, "grad_norm": 596.106689453125, "learning_rate": 4.484339199377583e-05, "loss": 56.2039, "step": 71830 }, { "epoch": 0.2902426904010634, "grad_norm": 770.546875, "learning_rate": 4.484126856253301e-05, "loss": 63.8412, "step": 71840 }, { "epoch": 0.290283091666431, "grad_norm": 1505.4610595703125, "learning_rate": 4.483914474447445e-05, "loss": 84.4822, "step": 71850 }, { "epoch": 0.2903234929317986, "grad_norm": 925.32421875, "learning_rate": 4.483702053964154e-05, "loss": 58.1121, "step": 71860 }, { "epoch": 0.29036389419716624, "grad_norm": 754.6409912109375, "learning_rate": 4.4834895948075704e-05, "loss": 94.093, "step": 71870 }, { "epoch": 0.2904042954625339, "grad_norm": 296.52349853515625, "learning_rate": 4.483277096981836e-05, "loss": 70.3271, "step": 71880 }, { "epoch": 0.2904446967279015, "grad_norm": 387.88568115234375, "learning_rate": 4.483064560491094e-05, "loss": 46.6679, "step": 71890 }, { "epoch": 0.29048509799326916, "grad_norm": 628.3577270507812, "learning_rate": 4.482851985339487e-05, "loss": 73.2846, "step": 71900 }, { "epoch": 0.2905254992586368, "grad_norm": 429.7192077636719, "learning_rate": 4.4826393715311595e-05, "loss": 54.1423, "step": 71910 }, { "epoch": 0.29056590052400444, "grad_norm": 933.8616943359375, "learning_rate": 4.482426719070258e-05, "loss": 82.952, "step": 71920 }, { "epoch": 0.290606301789372, "grad_norm": 1550.8494873046875, "learning_rate": 4.482214027960925e-05, "loss": 60.8338, "step": 71930 }, { "epoch": 0.29064670305473966, "grad_norm": 725.9008178710938, "learning_rate": 4.48200129820731e-05, "loss": 58.8863, "step": 71940 }, { "epoch": 0.2906871043201073, "grad_norm": 426.5110168457031, "learning_rate": 4.481788529813559e-05, "loss": 84.5091, "step": 71950 }, { "epoch": 0.29072750558547494, "grad_norm": 735.1624145507812, "learning_rate": 4.481575722783821e-05, "loss": 71.0047, "step": 71960 }, { "epoch": 0.2907679068508426, "grad_norm": 1465.6466064453125, "learning_rate": 4.481362877122243e-05, "loss": 93.0289, "step": 71970 }, { "epoch": 0.2908083081162102, "grad_norm": 448.85919189453125, "learning_rate": 4.481149992832977e-05, "loss": 49.5786, "step": 71980 }, { "epoch": 0.2908487093815778, "grad_norm": 1719.1485595703125, "learning_rate": 4.4809370699201706e-05, "loss": 99.739, "step": 71990 }, { "epoch": 0.29088911064694545, "grad_norm": 2338.972900390625, "learning_rate": 4.480724108387977e-05, "loss": 94.2316, "step": 72000 } ], "logging_steps": 10, "max_steps": 250000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }